From bffbff9fadafcff322b4269d355d926a73fa53b7 Mon Sep 17 00:00:00 2001
From: Primoz Godec
Date: Mon, 8 Nov 2021 13:12:16 +0100
Subject: [PATCH] table_from_frames: assure index is in metas if not range
index
---
Orange/data/pandas_compat.py | 13 ++++++++-----
Orange/data/tests/test_pandas.py | 29 ++++++++++++++++++++++++++++-
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py
index 19f372e11f1..4e4a66562a1 100644
--- a/Orange/data/pandas_compat.py
+++ b/Orange/data/pandas_compat.py
@@ -338,6 +338,13 @@ def table_from_frame(df, *, force_nominal=False):
def table_from_frames(xdf, ydf, mdf):
+ if not (xdf.index.equals(ydf.index) and xdf.index.equals(mdf.index)):
+ raise ValueError("Indexes not equal. Make sure that all three dataframes have equal index")
+
+ # drop index from x and y - it makes sure that index if not range will be
+ # placed in metas
+ xdf = xdf.reset_index(drop=True)
+ ydf = ydf.reset_index(drop=True)
dfs = xdf, ydf, mdf
if not all(df.shape[0] == xdf.shape[0] for df in dfs):
@@ -351,12 +358,8 @@ def table_from_frames(xdf, ydf, mdf):
XYM = (xXYM[0], yXYM[1], mXYM[2])
domain = Domain(xDomain.attributes, yDomain.class_vars, mDomain.metas)
- indexes = [df.index for df in dfs]
ids = [
- int(x[2:])
- if str(x).startswith("_o") and x[2:].isdigit() and x == y == m
- else Table.new_id()
- for x, y, m in zip(*indexes)
+ int(idx[2:]) if str(idx).startswith("_o") and idx[2:].isdigit() else Table.new_id() for idx in mdf.index
]
attributes = {}
diff --git a/Orange/data/tests/test_pandas.py b/Orange/data/tests/test_pandas.py
index cedda3cf698..4f49b60edec 100644
--- a/Orange/data/tests/test_pandas.py
+++ b/Orange/data/tests/test_pandas.py
@@ -457,6 +457,33 @@ def test_table_from_frames_not_orange_dataframe(self):
self.assertEqual(y.columns[0], d.class_var.name)
self.assertListEqual(m.columns.tolist(), [a.name for a in d.metas])
+ def test_table_from_frames_same_index(self):
+ """
+ Test that index column is placed in metas. Function should fail
+ with ValueError when indexes are different
+ """
+ index = np.array(["a", "b"])
+ x = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["x1", "x2", "x3"], index=index)
+ y = pd.DataFrame([[5], [6]], columns=["y"], index=index)
+ m = pd.DataFrame([[1, 2], [4, 5]], columns=["m1", "m2"], index=index)
+ new_table = Table.from_pandas_dfs(x, y, m)
+
+ # index should be placed in metas
+ np.testing.assert_array_equal(x, new_table.X)
+ np.testing.assert_array_equal(y.values.flatten(), new_table.Y)
+ np.testing.assert_array_equal(np.hstack((index[:, None], m.values.astype("object"))), new_table.metas)
+ d = new_table.domain
+ self.assertListEqual(x.columns.tolist(), [a.name for a in d.attributes])
+ self.assertEqual(y.columns[0], d.class_var.name)
+ self.assertListEqual(["index"] + m.columns.tolist(), [a.name for a in d.metas])
+
+ index2 = np.array(["a", "c"])
+ x = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["x1", "x2", "x3"], index=index)
+ y = pd.DataFrame([[5], [6]], columns=["y"], index=index2)
+ m = pd.DataFrame([[1, 2], [4, 5]], columns=["m1", "m2"], index=index)
+ with self.assertRaises(ValueError):
+ Table.from_pandas_dfs(x, y, m)
+
class TestTablePandas(unittest.TestCase):
def setUp(self):
@@ -593,7 +620,7 @@ def test_merge(self):
table3 = df3.to_orange_table()
self.assertEqual(len(table2), len(table3))
- self.assertFalse(any(table3.W))
+ self.assertEqual(0, table3.W.size)
self.assertEqual(self.table.attributes, table3.attributes)
d1 = table2.domain