fix: Don't raise when converting from pandas if index contains duplic…

…ate names when `include_index=False` (the default) (#18133)
pola-rs · Aug 11, 2024 · 4233d61 · 4233d61
1 parent afd93b2
commit 4233d61
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 4 deletions.
diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py
@@ -1044,13 +1044,17 @@ def to_frame_chunk(values: list[Any], schema: SchemaDefinition | None) -> DataFr
     return df._df
 
 
-def _check_pandas_columns(data: pd.DataFrame) -> None:
+def _check_pandas_columns(data: pd.DataFrame, *, include_index: bool) -> None:
     """Check pandas dataframe columns can be converted to polars."""
     stringified_cols: set[str] = {str(col) for col in data.columns}
-    stringified_index: set[str] = {str(idx) for idx in data.index.names}
+    stringified_index: set[str] = (
+        {str(idx) for idx in data.index.names} if include_index else set()
+    )
 
     non_unique_cols: bool = len(stringified_cols) < len(data.columns)
-    non_unique_indices: bool = len(stringified_index) < len(data.index.names)
+    non_unique_indices: bool = (
+        (len(stringified_index) < len(data.index.names)) if include_index else False
+    )
     if non_unique_cols or non_unique_indices:
         msg = (
             "Pandas dataframe contains non-unique indices and/or column names. "
@@ -1075,7 +1079,7 @@ def pandas_to_pydf(
     include_index: bool = False,
 ) -> PyDataFrame:
     """Construct a PyDataFrame from a pandas DataFrame."""
-    _check_pandas_columns(data)
+    _check_pandas_columns(data, include_index=include_index)
 
     convert_index = include_index and not _pandas_has_default_index(data)
     if not convert_index and all(

diff --git a/py-polars/tests/unit/interop/test_from_pandas.py b/py-polars/tests/unit/interop/test_from_pandas.py
@@ -22,6 +22,17 @@ def test_index_not_silently_excluded() -> None:
         pl.from_pandas(df, include_index=True)
 
 
+def test_nameless_multiindex_doesnt_raise_with_include_index_false_18130() -> None:
+    df = pd.DataFrame(
+        range(4),
+        columns=["A"],
+        index=pd.MultiIndex.from_product((["C", "D"], [3, 4])),
+    )
+    result = pl.from_pandas(df)
+    expected = pl.DataFrame({"A": [0, 1, 2, 3]})
+    assert_frame_equal(result, expected)
+
+
 def test_from_pandas() -> None:
     df = pd.DataFrame(
         {