Skip to content

Commit

Permalink
util.get_unique_names_duplicates: Fix duplication when indexed name a…
Browse files Browse the repository at this point in the history
…lready exists
  • Loading branch information
janezd committed Feb 21, 2020
1 parent c690473 commit ed06e3f
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 18 deletions.
21 changes: 20 additions & 1 deletion Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,29 @@ def test_get_unique_names_from_duplicates(self):
["x (2)", "x (3)", "x (1)"])
self.assertEqual(
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"]),
["x (2) (1)", "x (1)", "x (4)", "x (2) (2)", "x (3)"])
["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"])
self.assertEqual(
get_unique_names_duplicates(["iris", "iris", "iris (1)"]),
["iris (2)", "iris (3)", "iris (1)"])

self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz"], return_duplicated=True),
(["foo", "bar", "baz"], []))
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz", "bar"], return_duplicated=True),
(["foo", "bar (1)", "baz", "bar (2)"], ["bar"]))
self.assertEqual(
get_unique_names_duplicates(["x", "x", "x (1)"], return_duplicated=True),
(["x (2)", "x (3)", "x (1)"], ["x"]))
self.assertEqual(
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"], return_duplicated=True),
(["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"], ["x (2)", "x"]))
self.assertEqual(
get_unique_names_duplicates(["x", "", "", None, None, "x"]),
["x (1)", "", "", None, None, "x (2)"])
self.assertEqual(
get_unique_names_duplicates(["iris", "iris", "iris (1)"], return_duplicated=True),
(["iris (2)", "iris (3)", "iris (1)"], ["iris"]))

def test_get_unique_names_domain(self):
(attrs, classes, metas), renamed = \
Expand Down
30 changes: 13 additions & 17 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import re
from collections import Counter, defaultdict
from itertools import chain
from itertools import chain, count

import numpy as np
import bottleneck as bn
Expand Down Expand Up @@ -153,7 +153,7 @@ def get_indices(names, name):
:param name: str
:return: list of indices
"""
return [int(a.group(2)) for x in names
return [int(a.group(2)) for x in filter(None, names)
for a in re.finditer(RE_FIND_INDEX.format(name), x)]


Expand Down Expand Up @@ -201,26 +201,22 @@ def get_unique_names(names, proposed):
return [f"{name} ({max_index})" for name in proposed]


def get_unique_names_duplicates(proposed: list) -> list:
def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list:
"""
Returns list of unique names. If a name is duplicated, the
function appends the smallest available index in parentheses.
function appends the next available index in parentheses.
For example, a proposed list of names `x`, `x` and `x (2)`
results in `x (1)`, `x (3)`, `x (2)`.
results in `x (3)`, `x (4)`, `x (2)`.
"""
counter = Counter(proposed)
index = defaultdict(int)
names = []
for name in proposed:
if name and counter[name] > 1:
unique_name = name
while unique_name in counter:
index[name] += 1
unique_name = f"{name} ({index[name]})"
name = unique_name
names.append(name)
return names
indices = {name: count(max(get_indices(proposed, name), default=0) + 1)
for name, cnt in Counter(proposed).items()
if name and cnt > 1}
new_names = [f"{name} ({next(indices[name])})" if name in indices else name
for name in proposed]
if return_duplicated:
return new_names, list(indices)
return new_names


def get_unique_names_domain(attributes, class_vars=(), metas=()):
Expand Down

0 comments on commit ed06e3f

Please sign in to comment.