Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix renaming Series and Index #14080

Merged
merged 2 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from cudf._lib.filling import sequence
from cudf._lib.search import search_sorted
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
is_categorical_dtype,
Expand Down Expand Up @@ -95,7 +96,7 @@ def _lexsorted_equal_range(
return lower_bound, upper_bound, sort_inds


def _index_from_data(data: MutableMapping, name: Any = None):
def _index_from_data(data: MutableMapping, name: Any = no_default):
"""Construct an index of the appropriate type from some data."""

if len(data) == 0:
Expand Down Expand Up @@ -131,7 +132,7 @@ def _index_from_data(data: MutableMapping, name: Any = None):


def _index_from_columns(
columns: List[cudf.core.column.ColumnBase], name: Any = None
columns: List[cudf.core.column.ColumnBase], name: Any = no_default
):
"""Construct an index from ``columns``, with levels named 0, 1, 2..."""
return _index_from_data(dict(zip(range(len(columns)), columns)), name=name)
Expand Down Expand Up @@ -1032,10 +1033,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
@classmethod
@_cudf_nvtx_annotate
def _from_data(
cls, data: MutableMapping, name: Any = None
cls, data: MutableMapping, name: Any = no_default
) -> GenericIndex:
out = super()._from_data(data=data)
if name is not None:
if name is not no_default:
out.name = name
return out

Expand Down Expand Up @@ -3334,6 +3335,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex:
- DatetimeIndex for Datetime input.
- GenericIndex for all other inputs.
"""

kwargs = _setdefault_name(arbitrary, **kwargs)
if isinstance(arbitrary, cudf.MultiIndex):
return arbitrary
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,10 +605,10 @@ def _from_data(
cls,
data: MutableMapping,
index: Optional[BaseIndex] = None,
name: Any = None,
name: Any = no_default,
) -> Series:
out = super()._from_data(data=data, index=index)
if name is not None:
if name is not no_default:
out.name = name
return out

Expand Down
27 changes: 27 additions & 0 deletions python/cudf/cudf/testing/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,33 @@
OTHER_TYPES = sorted(list(dtypeutils.OTHER_TYPES))
ALL_TYPES = sorted(list(dtypeutils.ALL_TYPES))

SERIES_OR_INDEX_NAMES = [
None,
pd.NA,
cudf.NA,
np.nan,
float("NaN"),
"abc",
1,
pd.NaT,
np.datetime64("nat"),
np.timedelta64("NaT"),
np.timedelta64(10, "D"),
np.timedelta64(5, "D"),
np.datetime64("1970-01-01 00:00:00.000000001"),
np.datetime64("1970-01-01 00:00:00.000000002"),
pd.Timestamp(1),
pd.Timestamp(2),
pd.Timedelta(1),
pd.Timedelta(2),
Decimal("NaN"),
Decimal("1.2"),
np.int64(1),
np.int32(1),
np.float32(1),
pd.Timestamp(1),
]


def set_random_null_mask_inplace(series, null_probability=0.5, seed=None):
"""Randomly nullify elements in series with the provided probability."""
Expand Down
30 changes: 2 additions & 28 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,32 +150,6 @@
lambda x: cudf.Scalar(0) / x,
]

_series_or_index_names = [
None,
pd.NA,
cudf.NA,
np.nan,
float("NaN"),
"abc",
1,
pd.NaT,
np.datetime64("nat"),
np.timedelta64("NaT"),
np.timedelta64(10, "D"),
np.timedelta64(5, "D"),
np.datetime64("1970-01-01 00:00:00.000000001"),
np.datetime64("1970-01-01 00:00:00.000000002"),
pd.Timestamp(1),
pd.Timestamp(2),
pd.Timedelta(1),
pd.Timedelta(2),
decimal.Decimal("NaN"),
decimal.Decimal("1.2"),
np.int64(1),
np.int32(1),
np.float32(1),
pd.Timestamp(1),
]

pytest_xfail = pytest.mark.xfail
pytestmark = pytest.mark.spilling
Expand Down Expand Up @@ -3315,8 +3289,8 @@ def test_binop_index_series(op):
utils.assert_eq(expected, actual)


@pytest.mark.parametrize("name1", _series_or_index_names)
@pytest.mark.parametrize("name2", _series_or_index_names)
@pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES)
@pytest.mark.parametrize("name2", utils.SERIES_OR_INDEX_NAMES)
def test_binop_index_dt_td_series_with_names(name1, name2):
gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]", name=name1)
gs = cudf.Series([10, 11, 12], dtype="timedelta64[ns]", name=name2)
Expand Down
13 changes: 9 additions & 4 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
FLOAT_TYPES,
NUMERIC_TYPES,
OTHER_TYPES,
SERIES_OR_INDEX_NAMES,
SIGNED_INTEGER_TYPES,
SIGNED_TYPES,
UNSIGNED_TYPES,
Expand Down Expand Up @@ -227,12 +228,16 @@ def test_pandas_as_index():
)


def test_index_rename():
pds = pd.Index([1, 2, 3], name="asdf")
@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
def test_index_rename(initial_name, name):
pds = pd.Index([1, 2, 3], name=initial_name)
gds = as_index(pds)

expect = pds.rename("new_name")
got = gds.rename("new_name")
assert_eq(pds, gds)

expect = pds.rename(name)
got = gds.rename(name)

assert_eq(expect, got)
"""
Expand Down
15 changes: 15 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from cudf.core._compat import PANDAS_LT_140
from cudf.testing._utils import (
NUMERIC_TYPES,
SERIES_OR_INDEX_NAMES,
TIMEDELTA_TYPES,
_create_pandas_series,
assert_eq,
Expand Down Expand Up @@ -2267,3 +2268,17 @@ def test_series_unique_pandas_compatibility():
actual = gs.unique()
expected = ps.unique()
assert_eq(actual, expected)


@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
def test_series_rename(initial_name, name):
gsr = cudf.Series([1, 2, 3], name=initial_name)
psr = pd.Series([1, 2, 3], name=initial_name)

assert_eq(gsr, psr)

actual = gsr.rename(name)
expected = psr.rename(name)

assert_eq(actual, expected)