Skip to content

Commit

Permalink
Merge pull request #1655 from VesnaT/flagged_data
Browse files Browse the repository at this point in the history
[ENH] Scatterplot, HeatMap, TreeGraph, ConfusionMatrix and Unsupervised widgets: Output Flagged Data
  • Loading branch information
astaric authored Oct 19, 2016
2 parents bdc564d + 5610750 commit caa0ff2
Show file tree
Hide file tree
Showing 18 changed files with 554 additions and 127 deletions.
7 changes: 6 additions & 1 deletion Orange/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,14 @@ def _count(node):
return _count(self.root)

def get_instances(self, nodes):
indices = self.get_indices(nodes)
if indices is not None:
return self.instances[indices]

def get_indices(self, nodes):
subsets = [node.subset for node in nodes]
if subsets:
return self.instances[np.unique(np.hstack(subsets))]
return np.unique(np.hstack(subsets))

@staticmethod
def climb(node):
Expand Down
82 changes: 45 additions & 37 deletions Orange/widgets/evaluate/owconfusionmatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import Orange
from Orange.widgets import widget, settings, gui
from Orange.widgets.utils.annotated_data import (create_annotated_table,
ANNOTATED_DATA_SIGNAL_NAME)


def confusion_matrix(res, index):
Expand Down Expand Up @@ -78,7 +80,8 @@ class OWConfusionMatrix(widget.OWWidget):
priority = 1001

inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")]
outputs = [("Selected Data", Orange.data.Table)]
outputs = [("Selected Data", Orange.data.Table, widget.Default),
(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)]

quantities = ["Number of instances",
"Proportion of predicted",
Expand Down Expand Up @@ -324,51 +327,56 @@ def commit(self):
predicted = self.results.predicted[self.selected_learner[0]]
selected = [i for i, t in enumerate(zip(actual, predicted))
if t in indices]

extra = []
class_var = self.data.domain.class_var
metas = self.data.domain.metas

if self.append_predictions:
extra.append(predicted.reshape(-1, 1))
var = Orange.data.DiscreteVariable(
"{}({})".format(class_var.name, learner_name),
class_var.values
)
metas = metas + (var,)

if self.append_probabilities and \
self.results.probabilities is not None:
probs = self.results.probabilities[self.selected_learner[0]]
extra.append(numpy.array(probs, dtype=object))
pvars = [Orange.data.ContinuousVariable("p({})".format(value))
for value in class_var.values]
metas = metas + tuple(pvars)

X = self.data.X
Y = self.data.Y
M = self.data.metas
row_ids = self.data.ids

M = numpy.hstack((M,) + tuple(extra))
domain = Orange.data.Domain(
self.data.domain.attributes,
self.data.domain.class_vars,
metas
)
data = Orange.data.Table.from_numpy(domain, X, Y, M)
data.ids = row_ids
data.name = learner_name

if selected:
row_indices = self.results.row_indices[selected]
extra = []
class_var = self.data.domain.class_var
metas = self.data.domain.metas

if self.append_predictions:
predicted = numpy.array(predicted[selected], dtype=object)
extra.append(predicted.reshape(-1, 1))
var = Orange.data.DiscreteVariable(
"{}({})".format(class_var.name, learner_name),
class_var.values
)
metas = metas + (var,)

if self.append_probabilities and \
self.results.probabilities is not None:
probs = self.results.probabilities[self.selected_learner[0],
selected]
extra.append(numpy.array(probs, dtype=object))
pvars = [Orange.data.ContinuousVariable("p({})".format(value))
for value in class_var.values]
metas = metas + tuple(pvars)

X = self.data.X[row_indices]
Y = self.data.Y[row_indices]
M = self.data.metas[row_indices]
row_ids = self.data.ids[row_indices]

M = numpy.hstack((M,) + tuple(extra))
domain = Orange.data.Domain(
self.data.domain.attributes,
self.data.domain.class_vars,
metas
)
data = Orange.data.Table.from_numpy(domain, X, Y, M)
data.ids = row_ids
data.name = learner_name
annotated_data = create_annotated_table(data, row_indices)
data = data[row_indices]
else:
annotated_data = create_annotated_table(data, [])
data = None

else:
data = None
annotated_data = None

self.send("Selected Data", data)
self.send(ANNOTATED_DATA_SIGNAL_NAME, annotated_data)

def _invalidate(self):
indices = self.tableview.selectedIndexes()
Expand Down
34 changes: 16 additions & 18 deletions Orange/widgets/evaluate/tests/test_owconfusionmatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,29 @@
from Orange.classification import NaiveBayesLearner, TreeLearner
from Orange.evaluation.testing import CrossValidation
from Orange.widgets.evaluate.owconfusionmatrix import OWConfusionMatrix
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin


class TestOWConfusionMatrix(WidgetTest):
class TestOWConfusionMatrix(WidgetTest, WidgetOutputsTestMixin):
@classmethod
def setUpClass(cls):
super().setUpClass()
WidgetOutputsTestMixin.init(cls)

bayes = NaiveBayesLearner()
tree = TreeLearner()
iris = Table("iris")
iris = cls.data
titanic = Table("titanic")
common = dict(k=3, store_data=True)
cls.results_1_iris = CrossValidation(iris, [bayes], **common)
cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common)
cls.results_2_titanic = CrossValidation(titanic, [bayes, tree],
**common)

cls.signal_name = "Evaluation Results"
cls.signal_data = cls.results_1_iris
cls.same_input_output_domain = False

def setUp(self):
self.widget = self.create_widget(OWConfusionMatrix,
stored_settings={"auto_apply": False})
Expand All @@ -40,19 +46,11 @@ def test_selected_learner(self):
self.send_signal("Evaluation Results", self.results_1_iris)
self.widget.selected_learner[:] = [0]

def test_outputs(self):
self.send_signal("Evaluation Results", self.results_1_iris)

# check selected data output
self.assertIsNone(self.get_output("Selected Data"))

# select data instances
def _select_data(self):
self.widget.select_correct()

# check selected data output
selected = self.get_output("Selected Data")
self.assertGreater(len(selected), 0)

# check output when data is removed
self.send_signal("Evaluation Results", None)
self.assertIsNone(self.get_output("Selected Data"))
indices = self.widget.tableview.selectedIndexes()
indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
selected = [i for i, t in enumerate(zip(
self.widget.results.actual, self.widget.results.predicted[0]))
if t in indices]
self.selected_indices = self.widget.results.row_indices[selected]
76 changes: 76 additions & 0 deletions Orange/widgets/tests/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import unittest

import numpy as np

from Orange.base import SklLearner, SklModel
from PyQt4.QtGui import (QApplication, QComboBox, QSpinBox, QDoubleSpinBox,
QSlider)
Expand All @@ -13,6 +15,8 @@
from Orange.regression.base_regression import LearnerRegression, ModelRegression
from Orange.canvas.report.owreport import OWReport
from Orange.widgets.utils.owlearnerwidget import OWBaseLearner
from Orange.widgets.utils.annotated_data import (ANNOTATED_DATA_FEATURE_NAME,
ANNOTATED_DATA_SIGNAL_NAME)

app = None

Expand Down Expand Up @@ -476,3 +480,75 @@ def get_value(learner, name):
self.assertFalse(self.widget.Error.active)
else:
self.assertTrue(self.widget.Error.active)


class WidgetOutputsTestMixin:
"""Class for widget's outputs testing.
Contains init method to set up testing parameters and a test method, which
checks Selected Data and (Annotated) Data outputs.
Since widgets have different ways of selecting data instances, _select_data
method should be implemented when subclassed. The method should assign
value to selected_indices parameter.
If output's expected domain differs from input's domain, parameter
same_input_output_domain should be set to False.
If Selected Data and Data domains differ, override method
_compare_selected_annotated_domains.
"""

def init(self):
self.data = Table("iris")
self.same_input_output_domain = True
self.selected_indices = []

def test_outputs(self):
self.send_signal(self.signal_name, self.signal_data)

# only needed in TestOWMDS
if type(self).__name__ == "TestOWMDS":
from PyQt4.QtCore import QEvent
self.widget.customEvent(QEvent(QEvent.User))
self.widget.commit()

# check selected data output
self.assertIsNone(self.get_output("Selected Data"))

# check annotated data output
feature_name = ANNOTATED_DATA_FEATURE_NAME
annotated = self.get_output(ANNOTATED_DATA_SIGNAL_NAME)
self.assertEqual(0, np.sum([i[feature_name] for i in annotated]))

# select data instances
self._select_data()

# check selected data output
selected = self.get_output("Selected Data")
n_sel, n_attr = len(selected), len(self.data.domain.attributes)
self.assertGreater(n_sel, 0)
self.assertEqual(selected.domain == self.data.domain,
self.same_input_output_domain)
np.testing.assert_array_equal(selected.X[:, :n_attr],
self.data.X[self.selected_indices])

# check annotated data output
annotated = self.get_output(ANNOTATED_DATA_SIGNAL_NAME)
self.assertEqual(n_sel, np.sum([i[feature_name] for i in annotated]))

# compare selected and annotated data domains
self._compare_selected_annotated_domains(selected, annotated)

# check output when data is removed
self.send_signal(self.signal_name, None)
self.assertIsNone(self.get_output("Selected Data"))
self.assertIsNone(self.get_output(ANNOTATED_DATA_SIGNAL_NAME))

def _select_data(self):
raise NotImplementedError("Subclasses should implement select_data")

def _compare_selected_annotated_domains(self, selected, annotated):
selected_vars = selected.domain.variables + selected.domain.metas
annotated_vars = annotated.domain.variables + annotated.domain.metas
self.assertTrue(all((var in annotated_vars for var in selected_vars)))
102 changes: 102 additions & 0 deletions Orange/widgets/tests/test_annotated_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import random
import unittest

import numpy as np

from Orange.data import Table, Variable
from Orange.widgets.utils.annotated_data import (create_annotated_table,
ANNOTATED_DATA_FEATURE_NAME)


class TestAnnotatedData(unittest.TestCase):
def setUp(self):
Variable._clear_all_caches()
random.seed(42)
self.zoo = Table("zoo")

def test_create_annotated_table(self):
annotated = create_annotated_table(self.zoo, list(range(10)))

# check annotated table domain
self.assertEqual(annotated.domain.variables, self.zoo.domain.variables)
self.assertEqual(2, len(annotated.domain.metas))
self.assertIn(self.zoo.domain.metas[0], annotated.domain.metas)
self.assertIn(ANNOTATED_DATA_FEATURE_NAME,
[m.name for m in annotated.domain.metas])

# check annotated table data
np.testing.assert_array_equal(annotated.X, self.zoo.X)
np.testing.assert_array_equal(annotated.Y, self.zoo.Y)
np.testing.assert_array_equal(annotated.metas[:, 0].ravel(),
self.zoo.metas.ravel())
self.assertEqual(
10, np.sum([i[ANNOTATED_DATA_FEATURE_NAME] for i in annotated]))

def test_create_annotated_table_selected(self):
# check annotated column for no selected indices
annotated = create_annotated_table(self.zoo, [])
self.assertEqual(len(annotated), len(self.zoo))
self.assertEqual(
0, np.sum([i[ANNOTATED_DATA_FEATURE_NAME] for i in annotated]))

# check annotated column fol all selectes indices
annotated = create_annotated_table(self.zoo, list(range(len(self.zoo))))
self.assertEqual(len(annotated), len(self.zoo))
self.assertEqual(
len(self.zoo),
np.sum([i[ANNOTATED_DATA_FEATURE_NAME] for i in annotated]))

def test_create_annotated_table_none_data(self):
self.assertIsNone(create_annotated_table(None, None))

def test_create_annotated_table_none_indices(self):
annotated = create_annotated_table(self.zoo, None)
self.assertEqual(len(annotated), len(self.zoo))
self.assertEqual(
0, np.sum([i[ANNOTATED_DATA_FEATURE_NAME] for i in annotated]))

def test_cascade_annotated_tables(self):
# check cascade of annotated tables
data = self.zoo
data.domain.metas[0].name = ANNOTATED_DATA_FEATURE_NAME
for i in range(5):
data = create_annotated_table(
data, random.sample(range(0, len(self.zoo)), 20))
self.assertEqual(2 + i, len(data.domain.metas))
self.assertIn(self.zoo.domain.metas[0], data.domain.metas)
self.assertIn(ANNOTATED_DATA_FEATURE_NAME,
[m.name for m in data.domain.metas])
for j in range(2, i + 3):
self.assertIn("{} ({})".format(ANNOTATED_DATA_FEATURE_NAME, j),
[m.name for m in data.domain.metas])

def test_cascade_annotated_tables_with_missing_middle_feature(self):
# check table for domain [..., "Feature", "Selected", "Selected (3)] ->
# [..., "Feature", "Selected", "Selected (3), "Selected (4)"]
data = self.zoo
data.domain.attributes[0].name = ANNOTATED_DATA_FEATURE_NAME
data.domain.metas[0].name = "{} ({})".format(
ANNOTATED_DATA_FEATURE_NAME, 3)
data = create_annotated_table(
data, random.sample(range(0, len(self.zoo)), 20))
self.assertEqual(2, len(data.domain.metas))
self.assertEqual(data.domain.attributes[0].name,
ANNOTATED_DATA_FEATURE_NAME)
self.assertEqual(data.domain.metas[0].name,
"{} ({})".format(ANNOTATED_DATA_FEATURE_NAME, 3))
self.assertEqual(data.domain.metas[1].name,
"{} ({})".format(ANNOTATED_DATA_FEATURE_NAME, 4))

def test_cascade_annotated_tables_with_missing_annotated_feature(self):
# check table for domain [..., "Feature", "Selected (3)] ->
# [..., "Feature", "Selected (3), "Selected (4)"]
data = self.zoo
data.domain.metas[0].name = "{} ({})".format(
ANNOTATED_DATA_FEATURE_NAME, 3)
data = create_annotated_table(
data, random.sample(range(0, len(self.zoo)), 20))
self.assertEqual(2, len(data.domain.metas))
self.assertEqual(data.domain.metas[0].name,
"{} ({})".format(ANNOTATED_DATA_FEATURE_NAME, 3))
self.assertEqual(data.domain.metas[1].name,
"{} ({})".format(ANNOTATED_DATA_FEATURE_NAME, 4))
Loading

0 comments on commit caa0ff2

Please sign in to comment.