Skip to content

Commit

Permalink
Merge pull request #2067 from pavlin-policar/deterministic-trees
Browse files Browse the repository at this point in the history
[FIX] OWTreeViewer: Fix trees being displayed differently for same tree object
  • Loading branch information
astaric authored Mar 10, 2017
2 parents a951185 + 82a6959 commit 1f72515
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 17 deletions.
7 changes: 7 additions & 0 deletions Orange/widgets/tests/datasets/same_entropy.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
target d1 d2
d d d
class
A 2 2
A 2 2
B 1 1
B 1 1
27 changes: 17 additions & 10 deletions Orange/widgets/visualize/owtreeviewer2d.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from collections import OrderedDict

from AnyQt.QtGui import (
QBrush, QPen, QColor, QPainter, QPainterPath, QTransform
)
from AnyQt.QtWidgets import (
QGraphicsItem, QGraphicsEllipseItem, QGraphicsRectItem, QGraphicsTextItem,
QGraphicsItem, QGraphicsEllipseItem, QGraphicsTextItem,
QGraphicsLineItem, QGraphicsScene, QGraphicsView, QStyle, QSizePolicy,
QFormLayout
)
Expand All @@ -20,16 +22,20 @@

class GraphNode:
def __init__(self, *_, **kwargs):
self.edges = kwargs.get("edges", set())
# Implement edges as an ordered dict to get the nice speed benefits as
# well as adding ordering, which we need to make trees deterministic
self.__edges = kwargs.get("edges", OrderedDict())

def graph_edges(self):
return self.edges
"""Get a list of the edges that stem from the node."""
return self.__edges.keys()

def graph_add_edge(self, edge):
self.edges.add(edge)
"""Add an edge stemming from the node."""
self.__edges[edge] = 0

def __iter__(self):
for edge in self.edges:
for edge in self.__edges.keys():
yield edge.node2

def graph_nodes(self, atype=1):
Expand Down Expand Up @@ -185,7 +191,7 @@ def itemChange(self, change, value):

# noinspection PyCallByClass,PyTypeChecker
def update_edge(self):
for edge in self.edges:
for edge in self.graph_edges():
if edge.node1 is self:
QTimer.singleShot(0, edge.update_ends)
elif edge.node2 is self:
Expand Down Expand Up @@ -274,16 +280,17 @@ def fix_pos(self, node=None, x=10, y=10):
self.update()

def _fix_pos(self, node, x, y):
"""Fix the position of the tree stemming from the given node."""
def brect(node):
"""Get the bounding box of the parent rect and all its children."""
return node.boundingRect() | node.childrenBoundingRect()

if node.branches and node.isOpen:
for n in node.branches:
x, ry = self._fix_pos(n, x,
y + self._VSPACING + brect(node).height())
x, _ = self._fix_pos(n, x, y + self._VSPACING + brect(node).height())
x = (node.branches[0].pos().x() + node.branches[-1].pos().x()) / 2
node.setPos(x, y)
for e in node.edges:
for e in node.graph_edges():
e.update_ends()
else:
node.setPos(self.gx, y)
Expand Down Expand Up @@ -428,7 +435,7 @@ def send_report(self):

if self.model:
self.reportSection("Tree")
urlfn, filefn = self.getUniqueImageName(ext=".svg")
_, filefn = self.getUniqueImageName(ext=".svg")
svg = QSvgGenerator()
svg.setFileName(filefn)
ssize = self.scene.sceneRect().size()
Expand Down
55 changes: 50 additions & 5 deletions Orange/widgets/visualize/tests/test_owpythagorastree.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import math
import unittest

from Orange.classification import TreeLearner as TreeClassificationLearner
from os import path

from Orange.data import Table
from Orange.regression import TreeLearner as TreeRegressionLearner
from Orange.modelling import TreeLearner
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
from Orange.widgets.visualize.owpythagorastree import OWPythagorasTree
from Orange.widgets.visualize.pythagorastreeviewer import (
Expand Down Expand Up @@ -93,7 +94,7 @@ def setUpClass(cls):
WidgetOutputsTestMixin.init(cls)

# Set up for output tests
tree = TreeClassificationLearner()
tree = TreeLearner()
cls.model = tree(cls.data)
cls.model.instances = cls.data

Expand All @@ -102,11 +103,11 @@ def setUpClass(cls):

# Set up for widget tests
titanic_data = Table('titanic')[::50]
cls.titanic = TreeClassificationLearner(max_depth=1)(titanic_data)
cls.titanic = TreeLearner(max_depth=1)(titanic_data)
cls.titanic.instances = titanic_data

housing_data = Table('housing')[:10]
cls.housing = TreeRegressionLearner(max_depth=1)(housing_data)
cls.housing = TreeLearner(max_depth=1)(housing_data)
cls.housing.instances = housing_data

def setUp(self):
Expand Down Expand Up @@ -283,3 +284,47 @@ def test_label_on_tree_connect_and_disconnect(self):
self.assertNotRegex(
self.widget.info.text(), regex,
'Initial info should not contain node or depth info')

def test_tree_determinism(self):
"""Check that the tree is drawn identically upon receiving the same
dataset with no parameter changes."""
n_tries = 10

def _check_all_same(data):
"""Check that all the elements within an iterable are identical."""
iterator = iter(data)
try:
first = next(iterator)
except StopIteration:
return True
return all(first == rest for rest in iterator)

# Make sure the tree are deterministic for iris
scene_nodes = []
for _ in range(n_tries):
self.send_signal(self.signal_name, self.signal_data)
scene_nodes.append([n.pos() for n in self.get_visible_squares()])
for node_row in zip(*scene_nodes):
self.assertTrue(
_check_all_same(node_row),
"The tree was not drawn identically in the %d times it was "
"sent to widget after receiving the iris dataset." % n_tries
)

# Make sure trees are deterministic with data where some variables have
# the same entropy
data_same_entropy = Table(path.join(
path.dirname(path.dirname(path.dirname(__file__))), "tests",
"datasets", "same_entropy.tab"))
data_same_entropy = TreeLearner()(data_same_entropy)
scene_nodes = []
for _ in range(n_tries):
self.send_signal(self.signal_name, data_same_entropy)
scene_nodes.append([n.pos() for n in self.get_visible_squares()])
for node_row in zip(*scene_nodes):
self.assertTrue(
_check_all_same(node_row),
"The tree was not drawn identically in the %d times it was "
"sent to widget after receiving a dataset with variables with "
"same entropy." % n_tries
)
53 changes: 51 additions & 2 deletions Orange/widgets/visualize/tests/test_owtreegraph.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
from os import path

from Orange.classification import TreeLearner
from Orange.data import Table
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
from Orange.widgets.visualize.owtreeviewer import \
OWTreeGraph
from Orange.widgets.visualize.owtreeviewer import OWTreeGraph


class TestOWTreeGraph(WidgetTest, WidgetOutputsTestMixin):
Expand All @@ -19,6 +21,13 @@ def setUpClass(cls):
cls.signal_name = "Tree"
cls.signal_data = cls.model

# Load a dataset that contains two variables with the same entropy
data_same_entropy = Table(path.join(
path.dirname(path.dirname(path.dirname(__file__))), "tests",
"datasets", "same_entropy.tab"))
cls.data_same_entropy = tree(data_same_entropy)
cls.data_same_entropy.instances = data_same_entropy

def setUp(self):
self.widget = self.create_widget(OWTreeGraph)

Expand All @@ -35,3 +44,43 @@ def test_target_class_changed(self):
self.widget.color_combo.activated.emit(1)
self.widget.color_combo.setCurrentIndex(1)
self.assertNotEqual(nodes[0].toPlainText(), text)

def test_tree_determinism(self):
"""Check that the tree is drawn identically upon receiving the same
dataset with no parameter changes."""
n_tries = 10

def _check_all_same(data):
"""Check that all the elements within an iterable are identical."""
iterator = iter(data)
try:
first = next(iterator)
except StopIteration:
return True
return all(first == rest for rest in iterator)

# Make sure the tree are deterministic for iris
scene_nodes = []
for _ in range(n_tries):
self.send_signal(self.signal_name, self.signal_data)
scene_nodes.append([n.pos() for n in self.widget.scene.nodes()])
for node_row in zip(*scene_nodes):
self.assertTrue(
_check_all_same(node_row),
"The tree was not drawn identically in the %d times it was "
"sent to widget after receiving the iris dataset." % n_tries
)

# Make sure trees are deterministic with data where some variables have
# the same entropy
scene_nodes = []
for _ in range(n_tries):
self.send_signal(self.signal_name, self.data_same_entropy)
scene_nodes.append([n.pos() for n in self.widget.scene.nodes()])
for node_row in zip(*scene_nodes):
self.assertTrue(
_check_all_same(node_row),
"The tree was not drawn identically in the %d times it was "
"sent to widget after receiving a dataset with variables with "
"same entropy." % n_tries
)

0 comments on commit 1f72515

Please sign in to comment.