Skip to content

Commit

Permalink
fix: Don't raise when converting from pandas if index contains duplic…
Browse files Browse the repository at this point in the history
…ate names when `include_index=False` (the default) (#18133)
  • Loading branch information
MarcoGorelli authored Aug 11, 2024
1 parent afd93b2 commit 4233d61
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
12 changes: 8 additions & 4 deletions py-polars/polars/_utils/construction/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,13 +1044,17 @@ def to_frame_chunk(values: list[Any], schema: SchemaDefinition | None) -> DataFr
return df._df


def _check_pandas_columns(data: pd.DataFrame) -> None:
def _check_pandas_columns(data: pd.DataFrame, *, include_index: bool) -> None:
"""Check pandas dataframe columns can be converted to polars."""
stringified_cols: set[str] = {str(col) for col in data.columns}
stringified_index: set[str] = {str(idx) for idx in data.index.names}
stringified_index: set[str] = (
{str(idx) for idx in data.index.names} if include_index else set()
)

non_unique_cols: bool = len(stringified_cols) < len(data.columns)
non_unique_indices: bool = len(stringified_index) < len(data.index.names)
non_unique_indices: bool = (
(len(stringified_index) < len(data.index.names)) if include_index else False
)
if non_unique_cols or non_unique_indices:
msg = (
"Pandas dataframe contains non-unique indices and/or column names. "
Expand All @@ -1075,7 +1079,7 @@ def pandas_to_pydf(
include_index: bool = False,
) -> PyDataFrame:
"""Construct a PyDataFrame from a pandas DataFrame."""
_check_pandas_columns(data)
_check_pandas_columns(data, include_index=include_index)

convert_index = include_index and not _pandas_has_default_index(data)
if not convert_index and all(
Expand Down
11 changes: 11 additions & 0 deletions py-polars/tests/unit/interop/test_from_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ def test_index_not_silently_excluded() -> None:
pl.from_pandas(df, include_index=True)


def test_nameless_multiindex_doesnt_raise_with_include_index_false_18130() -> None:
df = pd.DataFrame(
range(4),
columns=["A"],
index=pd.MultiIndex.from_product((["C", "D"], [3, 4])),
)
result = pl.from_pandas(df)
expected = pl.DataFrame({"A": [0, 1, 2, 3]})
assert_frame_equal(result, expected)


def test_from_pandas() -> None:
df = pd.DataFrame(
{
Expand Down

0 comments on commit 4233d61

Please sign in to comment.