Skip to content

Commit

Permalink
table_from_frames: assure index is in metas if not range index
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Nov 8, 2021
1 parent fd9d708 commit bffbff9
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
13 changes: 8 additions & 5 deletions Orange/data/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,13 @@ def table_from_frame(df, *, force_nominal=False):


def table_from_frames(xdf, ydf, mdf):
if not (xdf.index.equals(ydf.index) and xdf.index.equals(mdf.index)):
raise ValueError("Indexes not equal. Make sure that all three dataframes have equal index")

# drop index from x and y - it makes sure that index if not range will be
# placed in metas
xdf = xdf.reset_index(drop=True)
ydf = ydf.reset_index(drop=True)
dfs = xdf, ydf, mdf

if not all(df.shape[0] == xdf.shape[0] for df in dfs):
Expand All @@ -351,12 +358,8 @@ def table_from_frames(xdf, ydf, mdf):
XYM = (xXYM[0], yXYM[1], mXYM[2])
domain = Domain(xDomain.attributes, yDomain.class_vars, mDomain.metas)

indexes = [df.index for df in dfs]
ids = [
int(x[2:])
if str(x).startswith("_o") and x[2:].isdigit() and x == y == m
else Table.new_id()
for x, y, m in zip(*indexes)
int(idx[2:]) if str(idx).startswith("_o") and idx[2:].isdigit() else Table.new_id() for idx in mdf.index
]

attributes = {}
Expand Down
29 changes: 28 additions & 1 deletion Orange/data/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,33 @@ def test_table_from_frames_not_orange_dataframe(self):
self.assertEqual(y.columns[0], d.class_var.name)
self.assertListEqual(m.columns.tolist(), [a.name for a in d.metas])

def test_table_from_frames_same_index(self):
"""
Test that index column is placed in metas. Function should fail
with ValueError when indexes are different
"""
index = np.array(["a", "b"])
x = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["x1", "x2", "x3"], index=index)
y = pd.DataFrame([[5], [6]], columns=["y"], index=index)
m = pd.DataFrame([[1, 2], [4, 5]], columns=["m1", "m2"], index=index)
new_table = Table.from_pandas_dfs(x, y, m)

# index should be placed in metas
np.testing.assert_array_equal(x, new_table.X)
np.testing.assert_array_equal(y.values.flatten(), new_table.Y)
np.testing.assert_array_equal(np.hstack((index[:, None], m.values.astype("object"))), new_table.metas)
d = new_table.domain
self.assertListEqual(x.columns.tolist(), [a.name for a in d.attributes])
self.assertEqual(y.columns[0], d.class_var.name)
self.assertListEqual(["index"] + m.columns.tolist(), [a.name for a in d.metas])

index2 = np.array(["a", "c"])
x = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["x1", "x2", "x3"], index=index)
y = pd.DataFrame([[5], [6]], columns=["y"], index=index2)
m = pd.DataFrame([[1, 2], [4, 5]], columns=["m1", "m2"], index=index)
with self.assertRaises(ValueError):
Table.from_pandas_dfs(x, y, m)


class TestTablePandas(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -593,7 +620,7 @@ def test_merge(self):
table3 = df3.to_orange_table()

self.assertEqual(len(table2), len(table3))
self.assertFalse(any(table3.W))
self.assertEqual(0, table3.W.size)
self.assertEqual(self.table.attributes, table3.attributes)

d1 = table2.domain
Expand Down

0 comments on commit bffbff9

Please sign in to comment.