Skip to content

Commit

Permalink
Merge pull request #4866 from PrimozGodec/ge-unique-nonequal-numbers
Browse files Browse the repository at this point in the history
[ENH] get_unique_names: Handle more independent names
  • Loading branch information
janezd authored Jun 26, 2020
2 parents 03a1609 + dc24036 commit 5031956
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 12 deletions.
72 changes: 72 additions & 0 deletions Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,78 @@ def test_get_unique_names_with_domain(self):
self.assertEqual(get_unique_names(domain, "foo"), "foo (1)")
self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")

def test_get_unique_names_not_equal(self):
names = ["foo", "bar", "baz", "baz (3)"]
self.assertEqual(
get_unique_names(names, ["qux"], equal_numbers=False), ["qux"]
)
self.assertEqual(
get_unique_names(names, ["foo"], equal_numbers=False), ["foo (1)"]
)
self.assertEqual(
get_unique_names(names, ["baz"], equal_numbers=False), ["baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["baz (3)"], equal_numbers=False),
["baz (3) (1)"]
)
self.assertEqual(
get_unique_names(names, ["qux", "quux"], equal_numbers=False),
["qux", "quux"]
)
self.assertEqual(
get_unique_names(names, ["bar", "baz"], equal_numbers=False),
["bar (1)", "baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["qux", "baz"], equal_numbers=False),
["qux", "baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["qux", "bar"], equal_numbers=False),
["qux", "bar (1)"]
)
self.assertEqual(
get_unique_names(names, ["foo", "bar", "baz"], equal_numbers=False),
["foo (1)", "bar (1)", "baz (4)"]
)

a, b, c, d = map(ContinuousVariable, ["foo", "bar", "baz", "baz (3)"])
domain = Domain([a, b], c, [d])
self.assertEqual(
get_unique_names(names, ["qux"], equal_numbers=False), ["qux"]
)
self.assertEqual(
get_unique_names(names, ["foo"], equal_numbers=False), ["foo (1)"]
)
self.assertEqual(
get_unique_names(names, ["baz"], equal_numbers=False), ["baz (4)"]
)
self.assertEqual(
get_unique_names(names, ["baz (3)"], equal_numbers=False),
["baz (3) (1)"]
)
self.assertEqual(
get_unique_names(domain, ["qux", "quux"], equal_numbers=False),
["qux", "quux"]
)
self.assertEqual(
get_unique_names(domain, ["bar", "baz"], equal_numbers=False),
["bar (1)", "baz (4)"]
)
self.assertEqual(
get_unique_names(domain, ["qux", "baz"], equal_numbers=False),
["qux", "baz (4)"]
)
self.assertEqual(
get_unique_names(domain, ["qux", "bar"], equal_numbers=False),
["qux", "bar (1)"]
)
self.assertEqual(
get_unique_names(domain, ["foo", "bar", "baz"], equal_numbers=False),
["foo (1)", "bar (1)", "baz (4)"]
)

def test_get_unique_names_from_duplicates(self):
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz"]),
Expand Down
29 changes: 17 additions & 12 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import bottleneck as bn
from scipy import sparse as sp

RE_FIND_INDEX = r"(^{} \()(\d{{1,}})(\)$)"
RE_FIND_INDEX = r"(^{})( \((\d{{1,}})\))?$"


def one_hot(
Expand Down Expand Up @@ -167,11 +167,11 @@ def get_indices(names, name):
:param name: str
:return: list of indices
"""
return [int(a.group(2)) for x in filter(None, names)
return [int(a.group(3) or 0) for x in filter(None, names)
for a in re.finditer(RE_FIND_INDEX.format(re.escape(name)), x)]


def get_unique_names(names, proposed):
def get_unique_names(names, proposed, equal_numbers=True):
"""
Returns unique names for variables
Expand All @@ -189,13 +189,15 @@ def get_unique_names(names, proposed):
list.
The method is used in widgets like MDS, which adds two variables (`x` and
`y`). It is desired that they have the same index. If `x`, `x (1)` and
`x (2)` and `y` (but no other `y`'s already exist in the domain, MDS
should append `x (3)` and `y (3)`, not `x (3)` and y (1)`.
`y`). It is desired that they have the same index. In case when
equal_numbers=True, if `x`, `x (1)` and `x (2)` and `y` (but no other
`y`'s already exist in the domain, MDS should append `x (3)` and `y (3)`,
not `x (3)` and y (1)`.
Args:
names (Domain or list of str): used names
proposed (str or list of str): proposed name
equal_numbers (bool): Add same number to all proposed names
Return:
str or list of str
Expand All @@ -206,13 +208,16 @@ def get_unique_names(names, proposed):
names = [var.name for var in chain(names.variables, names.metas)]
if isinstance(proposed, str):
return get_unique_names(names, [proposed])[0]
indicess = [indices
for indices in (get_indices(names, name) for name in proposed)
if indices]
if not (set(proposed) & set(names) or indicess):
indices = {name: get_indices(names, name) for name in proposed}
indices = {name: max(ind) + 1 for name, ind in indices.items() if ind}
if not (set(proposed) & set(names) or indices):
return proposed
max_index = max(map(max, indicess), default=0) + 1
return [f"{name} ({max_index})" for name in proposed]
if equal_numbers:
max_index = max(indices.values())
return [f"{name} ({max_index})" for name in proposed]
else:
return [f"{name} ({indices[name]})" if name in indices else name
for name in proposed]


def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list:
Expand Down

0 comments on commit 5031956

Please sign in to comment.