From dbb38a5107c5e00f57e90a3b5c0370df921480e6 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 11 Sep 2023 16:21:29 -0700 Subject: [PATCH] Fix rename API --- python/cudf/cudf/core/index.py | 10 +++++---- python/cudf/cudf/core/series.py | 4 ++-- python/cudf/cudf/testing/_utils.py | 27 ++++++++++++++++++++++++ python/cudf/cudf/tests/test_binops.py | 30 ++------------------------- python/cudf/cudf/tests/test_index.py | 13 ++++++++---- python/cudf/cudf/tests/test_series.py | 15 ++++++++++++++ 6 files changed, 61 insertions(+), 38 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 4bb5428838f..57c481db0d8 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -28,6 +28,7 @@ from cudf._lib.filling import sequence from cudf._lib.search import search_sorted from cudf._lib.types import size_type_dtype +from cudf.api.extensions import no_default from cudf.api.types import ( _is_non_decimal_numeric_dtype, is_categorical_dtype, @@ -95,7 +96,7 @@ def _lexsorted_equal_range( return lower_bound, upper_bound, sort_inds -def _index_from_data(data: MutableMapping, name: Any = None): +def _index_from_data(data: MutableMapping, name: Any = no_default): """Construct an index of the appropriate type from some data.""" if len(data) == 0: @@ -131,7 +132,7 @@ def _index_from_data(data: MutableMapping, name: Any = None): def _index_from_columns( - columns: List[cudf.core.column.ColumnBase], name: Any = None + columns: List[cudf.core.column.ColumnBase], name: Any = no_default ): """Construct an index from ``columns``, with levels named 0, 1, 2...""" return _index_from_data(dict(zip(range(len(columns)), columns)), name=name) @@ -1032,10 +1033,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): @classmethod @_cudf_nvtx_annotate def _from_data( - cls, data: MutableMapping, name: Any = None + cls, data: MutableMapping, name: Any = no_default ) -> GenericIndex: out = super()._from_data(data=data) - if name is not None: + if name is not no_default: out.name = name return out @@ -3334,6 +3335,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex: - DatetimeIndex for Datetime input. - GenericIndex for all other inputs. """ + kwargs = _setdefault_name(arbitrary, **kwargs) if isinstance(arbitrary, cudf.MultiIndex): return arbitrary diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 78be3085754..f44a3123dd3 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -605,10 +605,10 @@ def _from_data( cls, data: MutableMapping, index: Optional[BaseIndex] = None, - name: Any = None, + name: Any = no_default, ) -> Series: out = super()._from_data(data=data, index=index) - if name is not None: + if name is not no_default: out.name = name return out diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 0489329d801..e949f7d78e7 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -48,6 +48,33 @@ OTHER_TYPES = sorted(list(dtypeutils.OTHER_TYPES)) ALL_TYPES = sorted(list(dtypeutils.ALL_TYPES)) +SERIES_OR_INDEX_NAMES = [ + None, + pd.NA, + cudf.NA, + np.nan, + float("NaN"), + "abc", + 1, + pd.NaT, + np.datetime64("nat"), + np.timedelta64("NaT"), + np.timedelta64(10, "D"), + np.timedelta64(5, "D"), + np.datetime64("1970-01-01 00:00:00.000000001"), + np.datetime64("1970-01-01 00:00:00.000000002"), + pd.Timestamp(1), + pd.Timestamp(2), + pd.Timedelta(1), + pd.Timedelta(2), + Decimal("NaN"), + Decimal("1.2"), + np.int64(1), + np.int32(1), + np.float32(1), + pd.Timestamp(1), +] + def set_random_null_mask_inplace(series, null_probability=0.5, seed=None): """Randomly nullify elements in series with the provided probability.""" diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 549cd8da78e..87d510927ae 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -150,32 +150,6 @@ lambda x: cudf.Scalar(0) / x, ] -_series_or_index_names = [ - None, - pd.NA, - cudf.NA, - np.nan, - float("NaN"), - "abc", - 1, - pd.NaT, - np.datetime64("nat"), - np.timedelta64("NaT"), - np.timedelta64(10, "D"), - np.timedelta64(5, "D"), - np.datetime64("1970-01-01 00:00:00.000000001"), - np.datetime64("1970-01-01 00:00:00.000000002"), - pd.Timestamp(1), - pd.Timestamp(2), - pd.Timedelta(1), - pd.Timedelta(2), - decimal.Decimal("NaN"), - decimal.Decimal("1.2"), - np.int64(1), - np.int32(1), - np.float32(1), - pd.Timestamp(1), -] pytest_xfail = pytest.mark.xfail pytestmark = pytest.mark.spilling @@ -3315,8 +3289,8 @@ def test_binop_index_series(op): utils.assert_eq(expected, actual) -@pytest.mark.parametrize("name1", _series_or_index_names) -@pytest.mark.parametrize("name2", _series_or_index_names) +@pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES) +@pytest.mark.parametrize("name2", utils.SERIES_OR_INDEX_NAMES) def test_binop_index_dt_td_series_with_names(name1, name2): gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]", name=name1) gs = cudf.Series([10, 11, 12], dtype="timedelta64[ns]", name=name2) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 58dbc48e31e..f7f6e1f9114 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -24,6 +24,7 @@ FLOAT_TYPES, NUMERIC_TYPES, OTHER_TYPES, + SERIES_OR_INDEX_NAMES, SIGNED_INTEGER_TYPES, SIGNED_TYPES, UNSIGNED_TYPES, @@ -227,12 +228,16 @@ def test_pandas_as_index(): ) -def test_index_rename(): - pds = pd.Index([1, 2, 3], name="asdf") +@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES) +@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES) +def test_index_rename(initial_name, name): + pds = pd.Index([1, 2, 3], name=initial_name) gds = as_index(pds) - expect = pds.rename("new_name") - got = gds.rename("new_name") + assert_eq(pds, gds) + + expect = pds.rename(name) + got = gds.rename(name) assert_eq(expect, got) """ diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 783d7d31d7f..8a652caa6e2 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -16,6 +16,7 @@ from cudf.core._compat import PANDAS_LT_140 from cudf.testing._utils import ( NUMERIC_TYPES, + SERIES_OR_INDEX_NAMES, TIMEDELTA_TYPES, _create_pandas_series, assert_eq, @@ -2267,3 +2268,17 @@ def test_series_unique_pandas_compatibility(): actual = gs.unique() expected = ps.unique() assert_eq(actual, expected) + + +@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES) +@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES) +def test_series_rename(initial_name, name): + gsr = cudf.Series([1, 2, 3], name=initial_name) + psr = pd.Series([1, 2, 3], name=initial_name) + + assert_eq(gsr, psr) + + actual = gsr.rename(name) + expected = psr.rename(name) + + assert_eq(actual, expected)