Skip to content

Commit

Permalink
feature: support string column validation for pandas 2.1.3 (#1425)
Browse files Browse the repository at this point in the history
* fix: always cast series to bool dtype

Signed-off-by: karlma821 <[email protected]>

* test: add test_pandas_data_type_check

Signed-off-by: karlma821 <[email protected]>

---------

Signed-off-by: karlma821 <[email protected]>
  • Loading branch information
karlma821 authored Dec 4, 2023
1 parent d89ddaf commit a318e48
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def check(
)
else:
is_python_string = data_container.map(lambda x: isinstance(x, str)) # type: ignore[operator]
return is_python_string | data_container.isna()
return is_python_string.astype(bool) | data_container.isna()


Engine.register_dtype(
Expand Down
30 changes: 30 additions & 0 deletions tests/core/test_pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hypothesis
import hypothesis.extra.pandas as pd_st
import hypothesis.strategies as st
import numpy as np
import pandas as pd
import pytest
import pytz
Expand Down Expand Up @@ -58,6 +59,35 @@ def test_pandas_data_type_coerce(data_type_cls):
assert exc.failure_cases.shape[0] > 0


@pytest.mark.parametrize(
"data_type_cls", list(pandas_engine.Engine.get_registered_dtypes())
)
def test_pandas_data_type_check(data_type_cls):
"""
Test that pandas data type check results can be reduced.
"""
try:
data_type = data_type_cls()
except TypeError:
# don't test data types that require parameters
return

try:
data_container = pd.Series([], dtype=data_type.type)
except TypeError:
# don't test complex data types, e.g. PythonDict, PythonTuple, etc
return

check_result = data_type.check(
pandas_engine.Engine.dtype(data_container.dtype),
data_container,
)
assert isinstance(check_result, bool) or isinstance(
check_result.all(),
(bool, np.bool_),
)


CATEGORIES = ["A", "B", "C"]


Expand Down

0 comments on commit a318e48

Please sign in to comment.