diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py index 52bbed150fb..788c06394c0 100644 --- a/Orange/data/pandas_compat.py +++ b/Orange/data/pandas_compat.py @@ -348,6 +348,15 @@ def table_from_frame(df, *, force_nominal=False): def table_from_frames(xdf, ydf, mdf): + if not (xdf.index.equals(ydf.index) and xdf.index.equals(mdf.index)): + raise ValueError( + "Indexes not equal. Make sure that all three dataframes have equal index" + ) + + # drop index from x and y - it makes sure that index if not range will be + # placed in metas + xdf = xdf.reset_index(drop=True) + ydf = ydf.reset_index(drop=True) dfs = xdf, ydf, mdf if not all(df.shape[0] == xdf.shape[0] for df in dfs): @@ -361,12 +370,11 @@ def table_from_frames(xdf, ydf, mdf): XYM = (xXYM[0], yXYM[1], mXYM[2]) domain = Domain(xDomain.attributes, yDomain.class_vars, mDomain.metas) - indexes = [df.index for df in dfs] ids = [ - int(x[2:]) - if str(x).startswith("_o") and x[2:].isdigit() and x == y == m + int(idx[2:]) + if str(idx).startswith("_o") and idx[2:].isdigit() else Table.new_id() - for x, y, m in zip(*indexes) + for idx in mdf.index ] attributes = {} diff --git a/Orange/data/tests/test_pandas.py b/Orange/data/tests/test_pandas.py index 1f4bd4cc050..4eb7371fc65 100644 --- a/Orange/data/tests/test_pandas.py +++ b/Orange/data/tests/test_pandas.py @@ -476,6 +476,39 @@ def test_table_from_frames_not_orange_dataframe(self): self.assertEqual(y.columns[0], d.class_var.name) self.assertListEqual(m.columns.tolist(), [a.name for a in d.metas]) + def test_table_from_frames_same_index(self): + """ + Test that index column is placed in metas. Function should fail + with ValueError when indexes are different + """ + index = np.array(["a", "b"]) + x = pd.DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=["x1", "x2", "x3"], index=index + ) + y = pd.DataFrame([[5], [6]], columns=["y"], index=index) + m = pd.DataFrame([[1, 2], [4, 5]], columns=["m1", "m2"], index=index) + new_table = Table.from_pandas_dfs(x, y, m) + + # index should be placed in metas + np.testing.assert_array_equal(x, new_table.X) + np.testing.assert_array_equal(y.values.flatten(), new_table.Y) + np.testing.assert_array_equal( + np.hstack((index[:, None], m.values.astype("object"))), new_table.metas + ) + d = new_table.domain + self.assertListEqual(x.columns.tolist(), [a.name for a in d.attributes]) + self.assertEqual(y.columns[0], d.class_var.name) + self.assertListEqual(["index"] + m.columns.tolist(), [a.name for a in d.metas]) + + index2 = np.array(["a", "c"]) + x = pd.DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=["x1", "x2", "x3"], index=index + ) + y = pd.DataFrame([[5], [6]], columns=["y"], index=index2) + m = pd.DataFrame([[1, 2], [4, 5]], columns=["m1", "m2"], index=index) + with self.assertRaises(ValueError): + Table.from_pandas_dfs(x, y, m) + class TestTablePandas(unittest.TestCase): def setUp(self): @@ -612,7 +645,7 @@ def test_merge(self): table3 = df3.to_orange_table() self.assertEqual(len(table2), len(table3)) - self.assertFalse(any(table3.W)) + self.assertEqual(0, table3.W.size) self.assertEqual(self.table.attributes, table3.attributes) d1 = table2.domain