diff --git a/Orange/data/tests/test_util.py b/Orange/data/tests/test_util.py index 48977ada259..4d9911e4778 100644 --- a/Orange/data/tests/test_util.py +++ b/Orange/data/tests/test_util.py @@ -48,6 +48,78 @@ def test_get_unique_names_with_domain(self): self.assertEqual(get_unique_names(domain, "foo"), "foo (1)") self.assertEqual(get_unique_names(domain, "baz"), "baz (4)") + def test_get_unique_names_not_equal(self): + names = ["foo", "bar", "baz", "baz (3)"] + self.assertEqual( + get_unique_names(names, ["qux"], equal_numbers=False), ["qux"] + ) + self.assertEqual( + get_unique_names(names, ["foo"], equal_numbers=False), ["foo (1)"] + ) + self.assertEqual( + get_unique_names(names, ["baz"], equal_numbers=False), ["baz (4)"] + ) + self.assertEqual( + get_unique_names(names, ["baz (3)"], equal_numbers=False), + ["baz (3) (1)"] + ) + self.assertEqual( + get_unique_names(names, ["qux", "quux"], equal_numbers=False), + ["qux", "quux"] + ) + self.assertEqual( + get_unique_names(names, ["bar", "baz"], equal_numbers=False), + ["bar (1)", "baz (4)"] + ) + self.assertEqual( + get_unique_names(names, ["qux", "baz"], equal_numbers=False), + ["qux", "baz (4)"] + ) + self.assertEqual( + get_unique_names(names, ["qux", "bar"], equal_numbers=False), + ["qux", "bar (1)"] + ) + self.assertEqual( + get_unique_names(names, ["foo", "bar", "baz"], equal_numbers=False), + ["foo (1)", "bar (1)", "baz (4)"] + ) + + a, b, c, d = map(ContinuousVariable, ["foo", "bar", "baz", "baz (3)"]) + domain = Domain([a, b], c, [d]) + self.assertEqual( + get_unique_names(names, ["qux"], equal_numbers=False), ["qux"] + ) + self.assertEqual( + get_unique_names(names, ["foo"], equal_numbers=False), ["foo (1)"] + ) + self.assertEqual( + get_unique_names(names, ["baz"], equal_numbers=False), ["baz (4)"] + ) + self.assertEqual( + get_unique_names(names, ["baz (3)"], equal_numbers=False), + ["baz (3) (1)"] + ) + self.assertEqual( + get_unique_names(domain, ["qux", "quux"], equal_numbers=False), + ["qux", "quux"] + ) + self.assertEqual( + get_unique_names(domain, ["bar", "baz"], equal_numbers=False), + ["bar (1)", "baz (4)"] + ) + self.assertEqual( + get_unique_names(domain, ["qux", "baz"], equal_numbers=False), + ["qux", "baz (4)"] + ) + self.assertEqual( + get_unique_names(domain, ["qux", "bar"], equal_numbers=False), + ["qux", "bar (1)"] + ) + self.assertEqual( + get_unique_names(domain, ["foo", "bar", "baz"], equal_numbers=False), + ["foo (1)", "bar (1)", "baz (4)"] + ) + def test_get_unique_names_from_duplicates(self): self.assertEqual( get_unique_names_duplicates(["foo", "bar", "baz"]), diff --git a/Orange/data/util.py b/Orange/data/util.py index 4976da8f0b1..f1b58874e8f 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -10,7 +10,7 @@ import bottleneck as bn from scipy import sparse as sp -RE_FIND_INDEX = r"(^{} \()(\d{{1,}})(\)$)" +RE_FIND_INDEX = r"(^{})( \((\d{{1,}})\))?$" def one_hot( @@ -167,11 +167,11 @@ def get_indices(names, name): :param name: str :return: list of indices """ - return [int(a.group(2)) for x in filter(None, names) + return [int(a.group(3) or 0) for x in filter(None, names) for a in re.finditer(RE_FIND_INDEX.format(re.escape(name)), x)] -def get_unique_names(names, proposed): +def get_unique_names(names, proposed, equal_numbers=True): """ Returns unique names for variables @@ -189,13 +189,15 @@ def get_unique_names(names, proposed): list. The method is used in widgets like MDS, which adds two variables (`x` and - `y`). It is desired that they have the same index. If `x`, `x (1)` and - `x (2)` and `y` (but no other `y`'s already exist in the domain, MDS - should append `x (3)` and `y (3)`, not `x (3)` and y (1)`. + `y`). It is desired that they have the same index. In case when + equal_numbers=True, if `x`, `x (1)` and `x (2)` and `y` (but no other + `y`'s already exist in the domain, MDS should append `x (3)` and `y (3)`, + not `x (3)` and y (1)`. Args: names (Domain or list of str): used names proposed (str or list of str): proposed name + equal_numbers (bool): Add same number to all proposed names Return: str or list of str @@ -206,13 +208,16 @@ def get_unique_names(names, proposed): names = [var.name for var in chain(names.variables, names.metas)] if isinstance(proposed, str): return get_unique_names(names, [proposed])[0] - indicess = [indices - for indices in (get_indices(names, name) for name in proposed) - if indices] - if not (set(proposed) & set(names) or indicess): + indices = {name: get_indices(names, name) for name in proposed} + indices = {name: max(ind) + 1 for name, ind in indices.items() if ind} + if not (set(proposed) & set(names) or indices): return proposed - max_index = max(map(max, indicess), default=0) + 1 - return [f"{name} ({max_index})" for name in proposed] + if equal_numbers: + max_index = max(indices.values()) + return [f"{name} ({max_index})" for name in proposed] + else: + return [f"{name} ({indices[name]})" if name in indices else name + for name in proposed] def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list: