diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 62e091b29b5..aacf1fa8dae 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -5438,6 +5438,13 @@ def _is_same_dtype(lhs_dtype, rhs_dtype): # for matching column dtype. if lhs_dtype == rhs_dtype: return True + elif ( + is_categorical_dtype(lhs_dtype) + and is_categorical_dtype(rhs_dtype) + and lhs_dtype.categories.dtype == rhs_dtype.categories.dtype + ): + # OK if categories are not all the same + return True elif ( is_categorical_dtype(lhs_dtype) and not is_categorical_dtype(rhs_dtype) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 2f531afdeb7..67b63028fab 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10408,6 +10408,19 @@ def test_dataframe_init_from_nested_dict(): assert_eq(pdf, gdf) +def test_init_from_2_categoricalindex_series_diff_categories(): + s1 = cudf.Series( + [39, 6, 4], index=cudf.CategoricalIndex(["female", "male", "unknown"]) + ) + s2 = cudf.Series( + [2, 152, 2, 242, 150], + index=cudf.CategoricalIndex(["f", "female", "m", "male", "unknown"]), + ) + result = cudf.DataFrame([s1, s2]) + expected = pd.DataFrame([s1.to_pandas(), s2.to_pandas()]) + assert_eq(result, expected, check_dtype=False) + + def test_data_frame_values_no_cols_but_index(): result = cudf.DataFrame(index=range(5)).values expected = pd.DataFrame(index=range(5)).values