Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Unique domain checks #4760

Merged
merged 14 commits into from
May 21, 2020
Merged
43 changes: 29 additions & 14 deletions Orange/evaluation/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
# pylint: disable=arguments-differ
from warnings import warn
from collections import namedtuple
from itertools import chain
from time import time

import numpy as np

import sklearn.model_selection as skl

from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable
from Orange.data.util import get_unique_names

__all__ = ["Results", "CrossValidation", "LeaveOneOut", "TestOnTrainingData",
"ShuffleSplit", "TestOnTestData", "sample", "CrossValidationFeature"]
Expand Down Expand Up @@ -259,42 +261,44 @@ def get_augmented_data(self, model_names,
assert self.predicted.shape[0] == len(model_names)

data = self.data[self.row_indices]
class_var = data.domain.class_var
domain = data.domain
class_var = domain.class_var
classification = class_var and class_var.is_discrete

new_meta_attr = []
new_meta_vals = np.empty((len(data), 0))
names = [var.name for var in chain(domain.attributes,
domain.metas,
[class_var])]

if classification:
# predictions
if include_predictions:
new_meta_attr += (
DiscreteVariable(name=name, values=class_var.values)
for name in model_names)
uniq_new, names = self.create_unique_vars(names, model_names, class_var.values)
new_meta_attr += uniq_new
new_meta_vals = np.hstack((new_meta_vals, self.predicted.T))

# probabilities
if include_probabilities:
for name in model_names:
new_meta_attr += (
ContinuousVariable(name=f"{name} ({value})")
for value in class_var.values)
proposed = [f"{name} ({value})" for name in model_names for value in class_var.values]

uniq_new, names = self.create_unique_vars(names, proposed)
new_meta_attr += uniq_new

for i in self.probabilities:
new_meta_vals = np.hstack((new_meta_vals, i))

elif include_predictions:
# regression
new_meta_attr += (ContinuousVariable(name=name)
for name in model_names)
uniq_new, names = self.create_unique_vars(names, model_names)
new_meta_attr += uniq_new
new_meta_vals = np.hstack((new_meta_vals, self.predicted.T))

# add fold info
if self.folds is not None:
new_meta_attr.append(
DiscreteVariable(
name="Fold",
values=[str(i + 1) for i in range(len(self.folds))]))
values = [str(i + 1) for i in range(len(self.folds))]
uniq_new, names = self.create_unique_vars(names, ["Fold"], values)
new_meta_attr += uniq_new
fold = np.empty((len(data), 1))
for i, s in enumerate(self.folds):
fold[s, 0] = i
Expand All @@ -311,6 +315,17 @@ def get_augmented_data(self, model_names,
predictions.name = data.name
return predictions

def create_unique_vars(self, names, proposed_names, values=()):
unique_vars = []
for proposed in proposed_names:
uniq = get_unique_names(names, proposed)
if values:
unique_vars.append(DiscreteVariable(uniq, values))
else:
unique_vars.append(ContinuousVariable(uniq))
names.append(uniq)
return unique_vars, names

def split_by_model(self):
"""
Split evaluation results by models.
Expand Down
7 changes: 6 additions & 1 deletion Orange/projection/manifold.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import warnings
from collections import Iterable
from itertools import chain

import numpy as np
import scipy.sparse as sp
Expand All @@ -10,6 +11,7 @@

import Orange
from Orange.data import Table, Domain, ContinuousVariable
from Orange.data.util import get_unique_names
from Orange.distance import Distance, DistanceModel, Euclidean
from Orange.projection import SklProjector, Projector, Projection
from Orange.projection.base import TransformDomain, ComputeValueProjector
Expand Down Expand Up @@ -510,7 +512,10 @@ def convert_embedding_to_model(self, data, embedding):
# need the full embedding attributes and is cast into a regular array
n = self.n_components
postfixes = ["x", "y"] if n == 2 else list(range(1, n + 1))
tsne_cols = [ContinuousVariable(f"t-SNE-{p}") for p in postfixes]
names = [var.name for var in chain(data.domain.class_vars, data.domain.metas) if var]
proposed = [(f"t-SNE-{p}") for p in postfixes]
uniq_names = get_unique_names(names, proposed)
tsne_cols = [ContinuousVariable(name) for name in uniq_names]
embedding_domain = Domain(tsne_cols, data.domain.class_vars, data.domain.metas)
embedding_table = Table(embedding_domain, embedding.view(np.ndarray), data.Y, data.metas)

Expand Down
39 changes: 28 additions & 11 deletions Orange/widgets/data/owfeatureconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@
QSizePolicy, QAbstractItemView, QComboBox, QFormLayout, QLineEdit,
QHBoxLayout, QVBoxLayout, QStackedWidget, QStyledItemDelegate,
QPushButton, QMenu, QListView, QFrame, QLabel)
from AnyQt.QtGui import QKeySequence
from AnyQt.QtGui import QKeySequence, QColor
from AnyQt.QtCore import Qt, pyqtSignal as Signal, pyqtProperty as Property
from orangewidget.utils.combobox import ComboBoxSearch

import Orange
from Orange.data.util import get_unique_names
from Orange.widgets import gui
from Orange.widgets.settings import ContextSetting, DomainContextHandler
from Orange.widgets.utils import itemmodels, vartype
Expand All @@ -55,6 +56,7 @@

StringDescriptor = namedtuple("StringDescriptor", ["name", "expression"])

#warningIcon = gui.createAttributePixmap('!', QColor((202, 0, 32)))

def make_variable(descriptor, compute_value):
if isinstance(descriptor, ContinuousDescriptor):
Expand Down Expand Up @@ -390,6 +392,10 @@ class Error(OWWidget.Error):
more_values_needed = Msg("Categorical feature {} needs more values.")
invalid_expressions = Msg("Invalid expressions: {}.")

class Warning(OWWidget.Warning):
renamed_var = Msg("Recently added variable has been renamed, "
"to avoid duplicates.\n")

def __init__(self):
super().__init__()
self.data = None
Expand Down Expand Up @@ -427,16 +433,8 @@ def unique_name(fmt, reserved):
candidates = (fmt.format(i) for i in count(1))
return next(c for c in candidates if c not in reserved)

def reserved_names():
varnames = []
if self.data is not None:
varnames = [var.name for var in
self.data.domain.variables + self.data.domain.metas]
varnames += [desc.name for desc in self.featuremodel]
return set(varnames)

def generate_newname(fmt):
return unique_name(fmt, reserved_names())
return unique_name(fmt, self.reserved_names())

menu = QMenu(self.addbutton)
cont = menu.addAction("Numeric")
Expand Down Expand Up @@ -531,8 +529,18 @@ def _on_selectedVariableChanged(self, selected, *_):

def _on_modified(self):
if self.currentIndex >= 0:
self.Warning.clear()
editor = self.editorstack.currentWidget()
self.featuremodel[self.currentIndex] = editor.editorData()
proposed = editor.editorData().name
unique = get_unique_names(self.reserved_names(self.currentIndex),
proposed)

feature = editor.editorData()
if editor.editorData().name != unique:
self.Warning.renamed_var()
feature = feature.__class__(unique, *feature[1:])

self.featuremodel[self.currentIndex] = feature
self.descriptors = list(self.featuremodel)

def setDescriptors(self, descriptors):
Expand All @@ -542,6 +550,15 @@ def setDescriptors(self, descriptors):
self.descriptors = descriptors
self.featuremodel[:] = list(self.descriptors)

def reserved_names(self, idx_=None):
varnames = []
if self.data is not None:
varnames = [var.name for var in
self.data.domain.variables + self.data.domain.metas]
varnames += [desc.name for idx, desc in enumerate(self.featuremodel)
if idx != idx_]
return set(varnames)

@Inputs.data
@check_sql_input
def setData(self, data=None):
Expand Down
11 changes: 11 additions & 0 deletions Orange/widgets/data/tests/test_owfeatureconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,17 @@ def test_error_invalid_expression(self):
self.widget.apply()
self.assertTrue(self.widget.Error.invalid_expressions.is_shown())

def test_renaming_duplicate_vars(self):
data = Table("iris")
self.widget.setData(data)
self.widget.addFeature(
ContinuousDescriptor("iris", "0", 3)
)
self.widget.apply()
output = self.get_output(self.widget.Outputs.data)
self.assertEqual(len(set(var.name for var in output.domain.variables)),
len(output.domain.variables))

def test_discrete_no_values(self):
"""
Should not fail when there are no values set.
Expand Down
11 changes: 8 additions & 3 deletions Orange/widgets/evaluate/owconfusionmatrix.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Confusion matrix widget"""

from math import isnan, isinf
from itertools import chain
import unicodedata

from AnyQt.QtWidgets import QTableView, QHeaderView, QStyledItemDelegate, \
Expand All @@ -11,6 +12,7 @@
import sklearn.metrics as skl_metrics

import Orange
from Orange.data.util import get_unique_names
import Orange.evaluation
from Orange.widgets import widget, gui
from Orange.widgets.settings import \
Expand Down Expand Up @@ -371,13 +373,16 @@ def _prepare_data(self):
extra = []
class_var = self.data.domain.class_var
metas = self.data.domain.metas
attrs = self.data.domain.attributes
names = [var.name for var in chain(metas, [class_var], attrs)]

if self.append_predictions:
extra.append(predicted.reshape(-1, 1))
proposed = "{}({})".format(class_var.name, learner_name)
name = get_unique_names(names, proposed)
var = Orange.data.DiscreteVariable(
"{}({})".format(class_var.name, learner_name),
class_var.values
)
name,
class_var.values)
metas = metas + (var,)

if self.append_probabilities and \
Expand Down
14 changes: 13 additions & 1 deletion Orange/widgets/evaluate/owpredictions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import namedtuple
from functools import partial
from operator import itemgetter
from itertools import chain

import numpy
from AnyQt.QtWidgets import (
Expand All @@ -18,6 +19,7 @@
from Orange.base import Model
from Orange.data import ContinuousVariable, DiscreteVariable, Value, Domain
from Orange.data.table import DomainTransformationError
from Orange.data.util import get_unique_names
from Orange.widgets import gui, settings
from Orange.widgets.evaluate.utils import (
ScoreTable, usable_scorers, learner_name, scorer_caller)
Expand Down Expand Up @@ -542,7 +544,17 @@ def _commit_predictions(self):
self._add_regression_out_columns(slot, newmetas, newcolumns)

attrs = list(self.data.domain.attributes)
metas = list(self.data.domain.metas) + newmetas
metas = list(self.data.domain.metas)
names = [var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var]
uniq_newmetas = []
for new_ in newmetas:
uniq = get_unique_names(names, new_.name)
if uniq != new_.name:
new_ = new_.copy(name=uniq)
uniq_newmetas.append(new_)
names.append(uniq)

metas += uniq_newmetas
domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
predictions = self.data.transform(domain)
if newcolumns:
Expand Down
10 changes: 10 additions & 0 deletions Orange/widgets/evaluate/tests/test_owconfusionmatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from Orange.widgets.evaluate.owconfusionmatrix import OWConfusionMatrix
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
from Orange.widgets.utils.state_summary import format_summary_details
from Orange.widgets.tests.utils import possible_duplicate_table


class TestOWConfusionMatrix(WidgetTest, WidgetOutputsTestMixin):
Expand Down Expand Up @@ -135,3 +136,12 @@ def test_summary(self):
self.send_signal(self.widget.Inputs.evaluation_results, None)
self.assertEqual(info._StateInfo__output_summary.brief, "")
self.assertEqual(info._StateInfo__output_summary.details, no_output)

def test_unique_output_domain(self):
bayes = NaiveBayesLearner()
common = dict(k=3, store_data=True)
data = possible_duplicate_table('iris(Learner #1)')
input_data = CrossValidation(data, [bayes], **common)
self.send_signal(self.widget.Inputs.evaluation_results, input_data)
output = self.get_output(self.widget.Outputs.annotated_data)
self.assertEqual(output.domain.metas[0].name, 'iris(Learner #1) (1)')
12 changes: 11 additions & 1 deletion Orange/widgets/evaluate/tests/test_owpredictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from Orange.data import Table, Domain, DiscreteVariable
from Orange.modelling import ConstantLearner, TreeLearner
from Orange.evaluation import Results
from Orange.widgets.tests.utils import excepthook_catch
from Orange.widgets.tests.utils import excepthook_catch, \
possible_duplicate_table
from Orange.widgets.utils.colorpalette import ColorPaletteGenerator


Expand Down Expand Up @@ -414,6 +415,15 @@ def test_colors_continuous(self):

self.widget.send_report() # just a quick check that it doesn't crash

def test_unique_output_domain(self):
data = possible_duplicate_table('constant')
predictor = ConstantLearner()(data)
self.send_signal(self.widget.Inputs.data, data)
self.send_signal(self.widget.Inputs.predictors, predictor)

output = self.get_output(self.widget.Outputs.predictions)
self.assertEqual(output.domain.metas[0].name, 'constant (1)')


if __name__ == "__main__":
import unittest
Expand Down
13 changes: 11 additions & 2 deletions Orange/widgets/evaluate/tests/test_owtestandscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from AnyQt.QtTest import QTest
import baycomp

from Orange.classification import MajorityLearner, LogisticRegressionLearner
from Orange.classification import MajorityLearner, LogisticRegressionLearner, \
RandomForestLearner
from Orange.classification.majority import ConstantModel
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
from Orange.evaluation import Results, TestOnTestData, scoring
Expand All @@ -24,7 +25,7 @@
from Orange.widgets.settings import (
ClassValuesContextHandler, PerfectDomainContextHandler)
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.utils import simulate
from Orange.widgets.tests.utils import simulate, possible_duplicate_table
from Orange.widgets.utils.state_summary import (format_summary_details,
format_multiple_summaries)
from Orange.tests import test_filename
Expand Down Expand Up @@ -678,6 +679,14 @@ def test_summary(self):
self.assertEqual(info._StateInfo__output_summary.brief, "")
self.assertEqual(info._StateInfo__output_summary.details, no_output)

def test_unique_output_domain(self):
data = possible_duplicate_table('random forest')
self.send_signal(self.widget.Inputs.train_data, data)
self.send_signal(self.widget.Inputs.learner, RandomForestLearner(), 0)
output = self.get_output(self.widget.Outputs.predictions)
self.assertEqual(output.domain.metas[0].name, 'random forest (1)')


class TestHelpers(unittest.TestCase):
def test_results_one_vs_rest(self):
data = Table(test_filename("datasets/lenses.tab"))
Expand Down
Loading