Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function data.util.get_unique_names_domain #4444

Merged
merged 1 commit into from
Feb 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import unittest

from Orange.data import Domain, ContinuousVariable
from Orange.data.util import get_unique_names, get_unique_names_duplicates
from Orange.data.util import \
get_unique_names, get_unique_names_duplicates, get_unique_names_domain


class TestGetUniqueNames(unittest.TestCase):
Expand Down Expand Up @@ -63,6 +64,56 @@ def test_get_unique_names_from_duplicates(self):
get_unique_names_duplicates(["x", "", "", None, None, "x"]),
["x (1)", "", "", None, None, "x (2)"])

def test_get_unique_names_domain(self):
(attrs, classes, metas), renamed = \
get_unique_names_domain(["a", "t", "c", "t"], ["t", "d"], ["d", "e"])
self.assertEqual(attrs, ["a", "t (1)", "c", "t (2)"])
self.assertEqual(classes, ["t (3)", "d (1)"])
self.assertEqual(metas, ["d (2)", "e"])
self.assertEqual(renamed, ["t", "d"])

(attrs, classes, metas), renamed = \
get_unique_names_domain(["a", "t", "c", "t"], ["t", "d"])
self.assertEqual(attrs, ["a", "t (1)", "c", "t (2)"])
self.assertEqual(classes, ["t (3)", "d"])
self.assertEqual(metas, [])
self.assertEqual(renamed, ["t"])

(attrs, classes, metas), renamed = \
get_unique_names_domain(["a", "t", "c"])
self.assertEqual(attrs, ["a", "t", "c"])
self.assertEqual(classes, [])
self.assertEqual(metas, [])
self.assertEqual(renamed, [])

(attrs, classes, metas), renamed = \
get_unique_names_domain(["a", "t", "d", "t"], [], ["d", "e"])
self.assertEqual(attrs, ["a", "t (1)", "d (1)", "t (2)"])
self.assertEqual(classes, [])
self.assertEqual(metas, ["d (2)", "e"])
self.assertEqual(renamed, ["t", "d"])

(attrs, classes, metas), renamed = \
get_unique_names_domain([], ["t", "d"], ["d", "e"])
self.assertEqual(attrs, [])
self.assertEqual(classes, ["t", "d (1)"])
self.assertEqual(metas, ["d (2)", "e"])
self.assertEqual(renamed, ["d"])

(attrs, classes, metas), renamed = \
get_unique_names_domain([], ["t", "t", "d"], [])
self.assertEqual(attrs, [])
self.assertEqual(classes, ["t (1)", "t (2)", "d"])
self.assertEqual(metas, [])
self.assertEqual(renamed, ["t"])

(attrs, classes, metas), renamed = \
get_unique_names_domain([], [], [])
self.assertEqual(attrs, [])
self.assertEqual(classes, [])
self.assertEqual(metas, [])
self.assertEqual(renamed, [])


if __name__ == "__main__":
unittest.main()
29 changes: 29 additions & 0 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,32 @@ def get_unique_names_duplicates(proposed: list) -> list:
name = unique_name
names.append(name)
return names


def get_unique_names_domain(attributes, class_vars=(), metas=()):
"""
Return de-duplicated names for variables for attributes, class_vars
and metas. If a name appears more than once, the function appends
indices in parentheses.

Args:
attributes (list of str): proposed names for attributes
class_vars (list of str): proposed names for class_vars
metas (list of str): proposed names for metas

Returns:
(attributes, class_vars, metas): new names
renamed: list of names renamed variables; names appear in order of
appearance in original lists; every name appears only once
"""
all_names = list(chain(attributes, class_vars, metas))
unique_names = get_unique_names_duplicates(all_names)
# don't be smart with negative indices: they won't work for empty lists
attributes = unique_names[:len(attributes)]
class_vars = unique_names[len(attributes):len(attributes) + len(class_vars)]
metas = unique_names[len(attributes) + len(class_vars):]
# use dict, not set, to keep the order
renamed = list(dict.fromkeys(old
for old, new in zip(all_names, unique_names)
if new != old))
return (attributes, class_vars, metas), renamed