Skip to content

Commit

Permalink
Allow numeric_only=True for reduction operations on numeric types (#…
Browse files Browse the repository at this point in the history
…14111)

Fixes: #14090 
This PR allows passing `numeric_only=True` for reduction operation on numerical columns.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #14111
  • Loading branch information
galipremsagar authored Sep 14, 2023
1 parent 664dfc3 commit 89557bb
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 24 deletions.
6 changes: 4 additions & 2 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,11 @@ def _reduce(
if level is not None:
raise NotImplementedError("level parameter is not implemented yet")

if numeric_only:
if numeric_only and not isinstance(
self._column, cudf.core.column.numerical_base.NumericalBaseColumn
):
raise NotImplementedError(
f"Series.{op} does not implement numeric_only"
f"Series.{op} does not implement numeric_only."
)
try:
return getattr(self._column, op)(**kwargs)
Expand Down
44 changes: 22 additions & 22 deletions python/cudf/cudf/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,30 +247,37 @@ def test_misc_quantiles(data, q):
],
)
@pytest.mark.parametrize("null_flag", [False, True])
def test_kurtosis_series(data, null_flag):
@pytest.mark.parametrize("numeric_only", [False, True])
def test_kurtosis_series(data, null_flag, numeric_only):
pdata = data.to_pandas()

if null_flag and len(data) > 2:
data.iloc[[0, 2]] = None
pdata.iloc[[0, 2]] = None

got = data.kurtosis()
got = data.kurtosis(numeric_only=numeric_only)
got = got if np.isscalar(got) else got.to_numpy()
expected = pdata.kurtosis()
expected = pdata.kurtosis(numeric_only=numeric_only)
np.testing.assert_array_almost_equal(got, expected)

got = data.kurt()
got = data.kurt(numeric_only=numeric_only)
got = got if np.isscalar(got) else got.to_numpy()
expected = pdata.kurt()
expected = pdata.kurt(numeric_only=numeric_only)
np.testing.assert_array_almost_equal(got, expected)

got = data.kurt(numeric_only=False)
got = got if np.isscalar(got) else got.to_numpy()
expected = pdata.kurt(numeric_only=False)
np.testing.assert_array_almost_equal(got, expected)

with pytest.raises(NotImplementedError):
data.kurt(numeric_only=True)
@pytest.mark.parametrize("op", ["skew", "kurt"])
def test_kurt_skew_error(op):
gs = cudf.Series(["ab", "cd"])
ps = gs.to_pandas()

with pytest.raises(FutureWarning):
assert_exceptions_equal(
getattr(gs, op),
getattr(ps, op),
lfunc_args_and_kwargs=([], {"numeric_only": True}),
rfunc_args_and_kwargs=([], {"numeric_only": True}),
)


@pytest.mark.parametrize(
Expand All @@ -290,26 +297,19 @@ def test_kurtosis_series(data, null_flag):
],
)
@pytest.mark.parametrize("null_flag", [False, True])
def test_skew_series(data, null_flag):
@pytest.mark.parametrize("numeric_only", [False, True])
def test_skew_series(data, null_flag, numeric_only):
pdata = data.to_pandas()

if null_flag and len(data) > 2:
data.iloc[[0, 2]] = None
pdata.iloc[[0, 2]] = None

got = data.skew()
expected = pdata.skew()
got = data.skew(numeric_only=numeric_only)
expected = pdata.skew(numeric_only=numeric_only)
got = got if np.isscalar(got) else got.to_numpy()
np.testing.assert_array_almost_equal(got, expected)

got = data.skew(numeric_only=False)
expected = pdata.skew(numeric_only=False)
got = got if np.isscalar(got) else got.to_numpy()
np.testing.assert_array_almost_equal(got, expected)

with pytest.raises(NotImplementedError):
data.skew(numeric_only=True)


@pytest.mark.parametrize("dtype", params_dtypes)
@pytest.mark.parametrize("num_na", [0, 1, 50, 99, 100])
Expand Down

0 comments on commit 89557bb

Please sign in to comment.