diff --git a/Orange/data/tests/test_util.py b/Orange/data/tests/test_util.py index fc9cd0ef3fd..0006ea4773b 100644 --- a/Orange/data/tests/test_util.py +++ b/Orange/data/tests/test_util.py @@ -1,7 +1,8 @@ import unittest from Orange.data import Domain, ContinuousVariable -from Orange.data.util import get_unique_names, get_unique_names_duplicates +from Orange.data.util import \ + get_unique_names, get_unique_names_duplicates, get_unique_names_domain class TestGetUniqueNames(unittest.TestCase): @@ -63,6 +64,56 @@ def test_get_unique_names_from_duplicates(self): get_unique_names_duplicates(["x", "", "", None, None, "x"]), ["x (1)", "", "", None, None, "x (2)"]) + def test_get_unique_names_domain(self): + (attrs, classes, metas), renamed = \ + get_unique_names_domain(["a", "t", "c", "t"], ["t", "d"], ["d", "e"]) + self.assertEqual(attrs, ["a", "t (1)", "c", "t (2)"]) + self.assertEqual(classes, ["t (3)", "d (1)"]) + self.assertEqual(metas, ["d (2)", "e"]) + self.assertEqual(renamed, ["t", "d"]) + + (attrs, classes, metas), renamed = \ + get_unique_names_domain(["a", "t", "c", "t"], ["t", "d"]) + self.assertEqual(attrs, ["a", "t (1)", "c", "t (2)"]) + self.assertEqual(classes, ["t (3)", "d"]) + self.assertEqual(metas, []) + self.assertEqual(renamed, ["t"]) + + (attrs, classes, metas), renamed = \ + get_unique_names_domain(["a", "t", "c"]) + self.assertEqual(attrs, ["a", "t", "c"]) + self.assertEqual(classes, []) + self.assertEqual(metas, []) + self.assertEqual(renamed, []) + + (attrs, classes, metas), renamed = \ + get_unique_names_domain(["a", "t", "d", "t"], [], ["d", "e"]) + self.assertEqual(attrs, ["a", "t (1)", "d (1)", "t (2)"]) + self.assertEqual(classes, []) + self.assertEqual(metas, ["d (2)", "e"]) + self.assertEqual(renamed, ["t", "d"]) + + (attrs, classes, metas), renamed = \ + get_unique_names_domain([], ["t", "d"], ["d", "e"]) + self.assertEqual(attrs, []) + self.assertEqual(classes, ["t", "d (1)"]) + self.assertEqual(metas, ["d (2)", "e"]) + self.assertEqual(renamed, ["d"]) + + (attrs, classes, metas), renamed = \ + get_unique_names_domain([], ["t", "t", "d"], []) + self.assertEqual(attrs, []) + self.assertEqual(classes, ["t (1)", "t (2)", "d"]) + self.assertEqual(metas, []) + self.assertEqual(renamed, ["t"]) + + (attrs, classes, metas), renamed = \ + get_unique_names_domain([], [], []) + self.assertEqual(attrs, []) + self.assertEqual(classes, []) + self.assertEqual(metas, []) + self.assertEqual(renamed, []) + if __name__ == "__main__": unittest.main() diff --git a/Orange/data/util.py b/Orange/data/util.py index 033e342716e..273a6bb7602 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -221,3 +221,32 @@ def get_unique_names_duplicates(proposed: list) -> list: name = unique_name names.append(name) return names + + +def get_unique_names_domain(attributes, class_vars=(), metas=()): + """ + Return de-duplicated names for variables for attributes, class_vars + and metas. If a name appears more than once, the function appends + indices in parentheses. + + Args: + attributes (list of str): proposed names for attributes + class_vars (list of str): proposed names for class_vars + metas (list of str): proposed names for metas + + Returns: + (attributes, class_vars, metas): new names + renamed: list of names renamed variables; names appear in order of + appearance in original lists; every name appears only once + """ + all_names = list(chain(attributes, class_vars, metas)) + unique_names = get_unique_names_duplicates(all_names) + # don't be smart with negative indices: they won't work for empty lists + attributes = unique_names[:len(attributes)] + class_vars = unique_names[len(attributes):len(attributes) + len(class_vars)] + metas = unique_names[len(attributes) + len(class_vars):] + # use dict, not set, to keep the order + renamed = list(dict.fromkeys(old + for old, new in zip(all_names, unique_names) + if new != old)) + return (attributes, class_vars, metas), renamed