Skip to content

Commit

Permalink
Merge #207
Browse files Browse the repository at this point in the history
207: Dequantify duplicate cols r=andrewgsavage a=MichaelTiemannOSC

Use enumeration to wade through duplicate column names.  This does not preserve default column names when dequantifying, but it doesn't break.  Should `pint.dequantify()` also preserve duplicated column names?

- [x] Closes #202 
- [x] Executed `pre-commit run --all-files` with no errors
- [x] The change is fully covered by automated unit tests
- [x] Documented in docs/ as appropriate
- [x] Added an entry to the CHANGES file


Co-authored-by: Michael Tiemann <[email protected]>
  • Loading branch information
bors[bot] and MichaelTiemannOSC authored Oct 28, 2023
2 parents 2e087e6 + 2808bb8 commit 34c4b03
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pint-pandas Changelog
0.6 (unreleased)
----------------

- Fix dequantify duplicate column failure #202
- Fix astype issue #196


Expand Down
33 changes: 22 additions & 11 deletions pint_pandas/pint_array.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import re
import warnings
from collections import OrderedDict
from importlib.metadata import version

import numpy as np
Expand Down Expand Up @@ -981,23 +980,35 @@ def formatter_func(dtype):

df_columns = df.columns.to_frame()
df_columns["units"] = [
formatter_func(df[col].dtype)
if isinstance(df[col].dtype, PintType)
formatter_func(df.dtypes.iloc[i])
if isinstance(df.dtypes.iloc[i], PintType)
else NO_UNIT
for col in df.columns
for i, col in enumerate(df.columns)
]

data_for_df = OrderedDict()
data_for_df = []
for i, col in enumerate(df.columns):
if isinstance(df[col].dtype, PintType):
data_for_df[tuple(df_columns.iloc[i])] = df[col].values.data
if isinstance(df.dtypes.iloc[i], PintType):
data_for_df.append(
pd.Series(
data=df.iloc[:, i].values.data,
name=tuple(df_columns.iloc[i]),
index=df.index,
copy=False,
)
)
else:
data_for_df[tuple(df_columns.iloc[i])] = df[col].values

df_new = DataFrame(data_for_df, columns=data_for_df.keys())
data_for_df.append(
pd.Series(
data=df.iloc[:, i].values,
name=tuple(df_columns.iloc[i]),
index=df.index,
copy=False,
)
)

df_new = pd.concat(data_for_df, axis=1, copy=False)
df_new.columns.names = df.columns.names + ["unit"]
df_new.index = df.index

return df_new

Expand Down
35 changes: 35 additions & 0 deletions pint_pandas/testsuite/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,38 @@ def test_issue_194(dtype):
s2 = s1.astype(dtype)

tm.assert_series_equal(s0, s2)


class TestIssue202(BaseExtensionTests):
def test_dequantify(self):
df = pd.DataFrame()
df["test"] = pd.Series([1, 2, 3], dtype="pint[kN]")
df.insert(0, "test", df["test"], allow_duplicates=True)

expected = pd.DataFrame.from_dict(
data={
"index": [0, 1, 2],
"columns": [("test", "kilonewton")],
"data": [[1], [2], [3]],
"index_names": [None],
"column_names": [None, "unit"],
},
orient="tight",
dtype="Int64",
)
result = df.iloc[:, 1:].pint.dequantify()
tm.assert_frame_equal(expected, result)

expected = pd.DataFrame.from_dict(
data={
"index": [0, 1, 2],
"columns": [("test", "kilonewton"), ("test", "kilonewton")],
"data": [[1, 1], [2, 2], [3, 3]],
"index_names": [None],
"column_names": [None, "unit"],
},
orient="tight",
dtype="Int64",
)
result = df.pint.dequantify()
tm.assert_frame_equal(expected, result)

0 comments on commit 34c4b03

Please sign in to comment.