diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index bec9c367ba9..cb79a30422e 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -197,8 +197,6 @@ def nlevels(self) -> int: @property def name(self) -> Any: - if len(self._data) == 0: - return None return self.level_names[-1] @property diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8a3dbe77787..ead2f182e2d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -665,7 +665,10 @@ def __init__( len(self), dtype="object", masked=True ) for k in columns - } + }, + level_names=tuple(columns.names) + if isinstance(columns, pd.Index) + else None, ) elif isinstance(data, ColumnAccessor): raise TypeError( @@ -712,6 +715,11 @@ def __init__( self._data = new_df._data self._index = new_df._index + self._data._level_names = ( + tuple(columns.names) + if isinstance(columns, pd.Index) + else self._data._level_names + ) elif len(data) > 0 and isinstance(data[0], Series): self._init_from_series_list( data=data, columns=columns, index=index @@ -834,6 +842,11 @@ def _init_from_series_list(self, data, columns, index): self._data[col_name] = column.column_empty( row_count=len(self), dtype=None, masked=True ) + self._data._level_names = ( + tuple(columns.names) + if isinstance(columns, pd.Index) + else self._data._level_names + ) self._data = self._data.select_by_label(columns) @_cudf_nvtx_annotate @@ -957,6 +970,11 @@ def _init_from_dict_like( data[col_name], nan_as_null=nan_as_null, ) + self._data._level_names = ( + tuple(columns.names) + if isinstance(columns, pd.Index) + else self._data._level_names + ) @classmethod def _from_data( @@ -5131,7 +5149,7 @@ def from_pandas(cls, dataframe, nan_as_null=None): index = cudf.from_pandas(dataframe.index, nan_as_null=nan_as_null) df = cls._from_data(data, index) - df._data._level_names = list(dataframe.columns.names) + df._data._level_names = tuple(dataframe.columns.names) # Set columns only if it is a MultiIndex if isinstance(dataframe.columns, pd.MultiIndex): @@ -5377,6 +5395,8 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): df = df.set_index(index) else: df._index = as_index(index) + if isinstance(columns, pd.Index): + df._data._level_names = tuple(columns.names) return df @classmethod @@ -5434,7 +5454,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): data, nan_as_null=nan_as_null ) if isinstance(columns, pd.Index): - df._data._level_names = list(columns.names) + df._data._level_names = tuple(columns.names) if index is None: df._index = RangeIndex(start=0, stop=len(data)) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index aacf1fa8dae..1008cbdb67f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -2661,7 +2661,9 @@ def _reindex( data=cudf.core.column_accessor.ColumnAccessor( cols, multiindex=self._data.multiindex, - level_names=self._data.level_names, + level_names=tuple(column_names.names) + if isinstance(column_names, pd.Index) + else None, ), index=index, ) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 67b63028fab..c297748f7e5 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -6394,6 +6394,7 @@ def test_df_series_dataframe_astype_dtype_dict(copy): ([range(100), range(100)], ["range" + str(i) for i in range(100)]), (((1, 2, 3), (1, 2, 3)), ["tuple0", "tuple1", "tuple2"]), ([[1, 2, 3]], ["list col1", "list col2", "list col3"]), + ([[1, 2, 3]], pd.Index(["col1", "col2", "col3"], name="rapids")), ([range(100)], ["range" + str(i) for i in range(100)]), (((1, 2, 3),), ["k1", "k2", "k3"]), ], @@ -7969,6 +7970,7 @@ def test_series_empty(ps): @pytest.mark.parametrize( "data", [ + None, [], [1], {"a": [10, 11, 12]}, @@ -7979,7 +7981,10 @@ def test_series_empty(ps): }, ], ) -@pytest.mark.parametrize("columns", [["a"], ["another column name"], None]) +@pytest.mark.parametrize( + "columns", + [["a"], ["another column name"], None, pd.Index(["a"], name="index name")], +) def test_dataframe_init_with_columns(data, columns): pdf = pd.DataFrame(data, columns=columns) gdf = cudf.DataFrame(data, columns=columns) @@ -8047,7 +8052,16 @@ def test_dataframe_init_with_columns(data, columns): ], ) @pytest.mark.parametrize( - "columns", [None, ["0"], [0], ["abc"], [144, 13], [2, 1, 0]] + "columns", + [ + None, + ["0"], + [0], + ["abc"], + [144, 13], + [2, 1, 0], + pd.Index(["abc"], name="custom_name"), + ], ) def test_dataframe_init_from_series_list(data, ignore_dtype, columns): gd_data = [cudf.from_pandas(obj) for obj in data] @@ -10239,14 +10253,21 @@ def test_dataframe_binop_with_datetime_index(): @pytest.mark.parametrize( - "columns", ([], ["c", "a"], ["a", "d", "b", "e", "c"], ["a", "b", "c"]) + "columns", + ( + [], + ["c", "a"], + ["a", "d", "b", "e", "c"], + ["a", "b", "c"], + pd.Index(["b", "a", "c"], name="custom_name"), + ), ) @pytest.mark.parametrize("index", (None, [4, 5, 6])) def test_dataframe_dict_like_with_columns(columns, index): data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} expect = pd.DataFrame(data, columns=columns, index=index) actual = cudf.DataFrame(data, columns=columns, index=index) - if index is None and columns == []: + if index is None and len(columns) == 0: # We make an empty range index, pandas makes an empty index expect = expect.reset_index(drop=True) assert_eq(expect, actual)