Skip to content

Commit

Permalink
util.get_unique_names_duplicates: Fix duplication when indexed name a…
Browse files Browse the repository at this point in the history
…lready exists
  • Loading branch information
janezd committed Feb 24, 2020
1 parent b334484 commit d9464f7
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 19 deletions.
21 changes: 20 additions & 1 deletion Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,29 @@ def test_get_unique_names_from_duplicates(self):
["x (2)", "x (3)", "x (1)"])
self.assertEqual(
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"]),
["x (2) (1)", "x (1)", "x (4)", "x (2) (2)", "x (3)"])
["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"])
self.assertEqual(
get_unique_names_duplicates(["iris", "iris", "iris (1)"]),
["iris (2)", "iris (3)", "iris (1)"])

self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz"], return_duplicated=True),
(["foo", "bar", "baz"], []))
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz", "bar"], return_duplicated=True),
(["foo", "bar (1)", "baz", "bar (2)"], ["bar"]))
self.assertEqual(
get_unique_names_duplicates(["x", "x", "x (1)"], return_duplicated=True),
(["x (2)", "x (3)", "x (1)"], ["x"]))
self.assertEqual(
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"], return_duplicated=True),
(["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"], ["x (2)", "x"]))
self.assertEqual(
get_unique_names_duplicates(["x", "", "", None, None, "x"]),
["x (1)", "", "", None, None, "x (2)"])
self.assertEqual(
get_unique_names_duplicates(["iris", "iris", "iris (1)"], return_duplicated=True),
(["iris (2)", "iris (3)", "iris (1)"], ["iris"]))

def test_get_unique_names_domain(self):
(attrs, classes, metas), renamed = \
Expand Down
32 changes: 14 additions & 18 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Data-manipulation utilities.
"""
import re
from collections import Counter, defaultdict
from itertools import chain
from collections import Counter
from itertools import chain, count
from typing import Callable

import numpy as np
Expand Down Expand Up @@ -155,7 +155,7 @@ def get_indices(names, name):
:param name: str
:return: list of indices
"""
return [int(a.group(2)) for x in names
return [int(a.group(2)) for x in filter(None, names)
for a in re.finditer(RE_FIND_INDEX.format(name), x)]


Expand Down Expand Up @@ -203,26 +203,22 @@ def get_unique_names(names, proposed):
return [f"{name} ({max_index})" for name in proposed]


def get_unique_names_duplicates(proposed: list) -> list:
def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list:
"""
Returns list of unique names. If a name is duplicated, the
function appends the smallest available index in parentheses.
function appends the next available index in parentheses.
For example, a proposed list of names `x`, `x` and `x (2)`
results in `x (1)`, `x (3)`, `x (2)`.
results in `x (3)`, `x (4)`, `x (2)`.
"""
counter = Counter(proposed)
index = defaultdict(int)
names = []
for name in proposed:
if name and counter[name] > 1:
unique_name = name
while unique_name in counter:
index[name] += 1
unique_name = f"{name} ({index[name]})"
name = unique_name
names.append(name)
return names
indices = {name: count(max(get_indices(proposed, name), default=0) + 1)
for name, cnt in Counter(proposed).items()
if name and cnt > 1}
new_names = [f"{name} ({next(indices[name])})" if name in indices else name
for name in proposed]
if return_duplicated:
return new_names, list(indices)
return new_names


def get_unique_names_domain(attributes, class_vars=(), metas=()):
Expand Down

0 comments on commit d9464f7

Please sign in to comment.