Skip to content

Commit

Permalink
util.get_unique_names_duplicates: Fix duplication when indexed name a…
Browse files Browse the repository at this point in the history
…lready exists
  • Loading branch information
janezd committed Feb 27, 2020
1 parent 9eac7d1 commit 87517bd
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 20 deletions.
31 changes: 30 additions & 1 deletion Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,39 @@ def test_get_unique_names_from_duplicates(self):
["x (2)", "x (3)", "x (1)"])
self.assertEqual(
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"]),
["x (2) (1)", "x (1)", "x (4)", "x (2) (2)", "x (3)"])
["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"])
self.assertEqual(
get_unique_names_duplicates(["iris", "iris", "iris (1)"]),
["iris (2)", "iris (3)", "iris (1)"])

self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz"], return_duplicated=True),
(["foo", "bar", "baz"], []))
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz", "bar"], return_duplicated=True),
(["foo", "bar (1)", "baz", "bar (2)"], ["bar"]))
self.assertEqual(
get_unique_names_duplicates(["x", "x", "x (1)"], return_duplicated=True),
(["x (2)", "x (3)", "x (1)"], ["x"]))
self.assertEqual(
get_unique_names_duplicates(["x (2)", "x", "x", "x (2)", "x (3)"], return_duplicated=True),
(["x (2) (1)", "x (4)", "x (5)", "x (2) (2)", "x (3)"], ["x (2)", "x"]))
self.assertEqual(
get_unique_names_duplicates(["x", "", "", None, None, "x"]),
["x (1)", "", "", None, None, "x (2)"])
self.assertEqual(
get_unique_names_duplicates(["iris", "iris", "iris (1)"], return_duplicated=True),
(["iris (2)", "iris (3)", "iris (1)"], ["iris"]))

self.assertEqual(
get_unique_names_duplicates(["iris (1) (1)", "iris (1)", "iris (1)"]),
["iris (1) (1)", "iris (1) (2)", "iris (1) (3)"]
)

self.assertEqual(
get_unique_names_duplicates(["iris (1) (1)", "iris (1)", "iris (1)", "iris", "iris"]),
["iris (1) (1)", "iris (1) (2)", "iris (1) (3)", "iris (2)", "iris (3)"]
)

def test_get_unique_names_domain(self):
(attrs, classes, metas), renamed = \
Expand Down
34 changes: 15 additions & 19 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Data-manipulation utilities.
"""
import re
from collections import Counter, defaultdict
from itertools import chain
from collections import Counter
from itertools import chain, count
from typing import Callable

import numpy as np
Expand Down Expand Up @@ -155,8 +155,8 @@ def get_indices(names, name):
:param name: str
:return: list of indices
"""
return [int(a.group(2)) for x in names
for a in re.finditer(RE_FIND_INDEX.format(name), x)]
return [int(a.group(2)) for x in filter(None, names)
for a in re.finditer(RE_FIND_INDEX.format(re.escape(name)), x)]


def get_unique_names(names, proposed):
Expand Down Expand Up @@ -203,26 +203,22 @@ def get_unique_names(names, proposed):
return [f"{name} ({max_index})" for name in proposed]


def get_unique_names_duplicates(proposed: list) -> list:
def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list:
"""
Returns list of unique names. If a name is duplicated, the
function appends the smallest available index in parentheses.
function appends the next available index in parentheses.
For example, a proposed list of names `x`, `x` and `x (2)`
results in `x (1)`, `x (3)`, `x (2)`.
results in `x (3)`, `x (4)`, `x (2)`.
"""
counter = Counter(proposed)
index = defaultdict(int)
names = []
for name in proposed:
if name and counter[name] > 1:
unique_name = name
while unique_name in counter:
index[name] += 1
unique_name = f"{name} ({index[name]})"
name = unique_name
names.append(name)
return names
indices = {name: count(max(get_indices(proposed, name), default=0) + 1)
for name, cnt in Counter(proposed).items()
if name and cnt > 1}
new_names = [f"{name} ({next(indices[name])})" if name in indices else name
for name in proposed]
if return_duplicated:
return new_names, list(indices)
return new_names


def get_unique_names_domain(attributes, class_vars=(), metas=()):
Expand Down

0 comments on commit 87517bd

Please sign in to comment.