Skip to content

Commit

Permalink
Merge pull request #1406 from janezd/boxplot-vizrank
Browse files Browse the repository at this point in the history
VizRank-like function for Box Plot
  • Loading branch information
VesnaT authored Oct 28, 2016
2 parents deccbe9 + 08b6ae3 commit 1387a1a
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 24 deletions.
20 changes: 19 additions & 1 deletion Orange/widgets/tests/test_itemmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,25 @@

from Orange.data import Domain, ContinuousVariable
from Orange.widgets.utils.itemmodels import \
PyTableModel, PyListModel, DomainModel
PyTableModel, PyListModel, DomainModel, _argsort


class TestArgsort(TestCase):
def test_argsort(self):
self.assertEqual(_argsort("dacb"), [1, 3, 2, 0])
self.assertEqual(_argsort("dacb", reverse=True), [0, 2, 3, 1])
self.assertEqual(_argsort([3, -1, 0, 2], key=abs), [2, 1, 3, 0])
self.assertEqual(
_argsort([3, -1, 0, 2], key=abs, reverse=True), [0, 3, 1, 2])
self.assertEqual(
_argsort([3, -1, 0, 2],
cmp=lambda x, y: (abs(x) > abs(y)) - (abs(x) < abs(y))),
[2, 1, 3, 0])
self.assertEqual(
_argsort([3, -1, 0, 2],
cmp=lambda x, y: (abs(x) > abs(y)) - (abs(x) < abs(y)),
reverse=True),
[0, 3, 1, 2])


class TestPyTableModel(TestCase):
Expand Down
10 changes: 6 additions & 4 deletions Orange/widgets/utils/itemmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ class _store(dict):


def _argsort(seq, cmp=None, key=None, reverse=False):
indices = range(len(seq))
if key is not None:
return sorted(enumerate(seq), key=lambda pair: key(pair[1]), reverse=reverse)
return sorted(indices, key=lambda i: key(seq[i]), reverse=reverse)
elif cmp is not None:
from functools import cmp_to_key
return sorted(enumerate(seq), key=cmp_to_key(lambda a, b: cmp(a[1], b[1])), reverse=reverse)
return sorted(indices, key=cmp_to_key(lambda a, b: cmp(seq[a], seq[b])),
reverse=reverse)
else:
return sorted(enumerate(seq), key=operator.itemgetter(1), reverse=reverse)
return sorted(indices, key=lambda i: seq[i], reverse=reverse)


@contextmanager
Expand Down Expand Up @@ -517,7 +519,7 @@ def sort(self, *args, **kwargs):
indices = _argsort(self._list, *args, **kwargs)
lst = [self._list[i] for i in indices]
other = [self._other_data[i] for i in indices]
for i, new_l, new_o in enumerate(zip(lst, other)):
for i, (new_l, new_o) in enumerate(zip(lst, other)):
self._list[i] = new_l
self._other_data[i] = new_o
self.dataChanged.emit(self.index(0), self.index(len(self) - 1))
Expand Down
112 changes: 94 additions & 18 deletions Orange/widgets/visualize/owboxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from AnyQt.QtCore import Qt, QEvent, QRectF, QSize

import scipy.special
from scipy.stats import f_oneway, chisquare

import Orange.data
from Orange.statistics import contingency, distribution
Expand Down Expand Up @@ -72,12 +73,14 @@ class OWBoxPlot(widget.OWWidget):
Here's how the widget's functions call each other:
- `set_data` is a signal handler fills the list boxes and calls
`attr_changed`.
`grouping_changed`.
- `attr_changed` handles changes of attribute or grouping (callbacks for
list boxes). It recomputes box data by calling `compute_box_data`, shows
the appropriate display box (discrete/continuous) and then calls
`layout_changed`
- `grouping_changed` handles changes of grouping attribute: it enables or
disables the box for ordering, orders attributes and calls `attr_changed`.
- `attr_changed` handles changes of attribute. It recomputes box data by
calling `compute_box_data`, shows the appropriate display box
(discrete/continuous) and then calls`layout_changed`
- `layout_changed` constructs all the elements for the scene (as lists of
QGraphicsItemGroup) and calls `display_changed`. It is called when the
Expand All @@ -104,9 +107,10 @@ class OWBoxPlot(widget.OWWidget):
settingsHandler = DomainContextHandler()

attribute = ContextSetting(None)
order_by_importance = Setting(False)
group_var = ContextSetting(None)
show_annotations = Setting(True)
compare = Setting(CompareMedians)
compare = Setting(CompareMeans)
stattest = Setting(0)
sig_threshold = Setting(0.05)
stretched = Setting(True)
Expand Down Expand Up @@ -152,19 +156,34 @@ def __init__(self):
self.scale_x = self.scene_min_x = self.scene_width = 0
self.label_width = 0

common_options = dict(
callback=self.attr_changed, sizeHint=(200, 100))
self.attrs = VariableListModel()
gui.listView(
view = gui.listView(
self.controlArea, self, "attribute", box="Variable",
model=self.attrs, **common_options)
model=self.attrs, callback=self.attr_changed)
view.setMinimumSize(QSize(30, 30))
# Any other policy than Ignored will let the QListBox's scrollbar
# set the minimal height (see the penultimate paragraph of
# http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
gui.separator(view.box, 6, 6)
self.cb_order = gui.checkBox(
view.box, self, "order_by_importance",
"Order by relevance",
tooltip="Order by 𝜒² or ANOVA over the subgroups",
callback=self.apply_sorting)
self.group_vars = VariableListModel()
gui.listView(
self.controlArea, self, "group_var", box="Grouping",
model=self.group_vars, **common_options)
view = gui.listView(
self.controlArea, self, "group_var", box="Subgroups",
model=self.group_vars, callback=self.grouping_changed)
view.setMinimumSize(QSize(30, 30))
# See the comment above
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

# TODO: move Compare median/mean to grouping box
self.display_box = gui.vBox(self.controlArea, "Display")
# The vertical size policy is needed to let only the list views expand
self.display_box = gui.vBox(
self.controlArea, "Display",
sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum))

gui.checkBox(self.display_box, self, "show_annotations", "Annotate",
callback=self.display_changed)
Expand All @@ -173,9 +192,11 @@ def __init__(self):
btnLabels=["No comparison", "Compare medians", "Compare means"],
callback=self.display_changed)

# The vertical size policy is needed to let only the list views expand
self.stretching_box = gui.checkBox(
self.controlArea, self, 'stretched', "Stretch bars", box='Display',
callback=self.display_changed).box
callback=self.display_changed,
sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)).box

gui.vBox(self.mainArea, addSpace=True)
self.box_scene = QGraphicsScene()
Expand All @@ -189,13 +210,16 @@ def __init__(self):

e = gui.hBox(self.mainArea, addSpace=False)
self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
self.mainArea.setMinimumWidth(650)
self.mainArea.setMinimumWidth(600)

self.stats = self.dist = self.conts = []
self.is_continuous = False

self.update_display_box()

def sizeHint(self):
return QSize(100, 500) # Vertical size is regulated by mainArea

def eventFilter(self, obj, event):
if obj is self.box_view.viewport() and \
event.type() == QEvent.Resize:
Expand Down Expand Up @@ -228,10 +252,57 @@ def set_data(self, dataset):
else:
self.group_var = None # Reset to trigger selection via callback
self.openContext(self.dataset)
self.attr_changed()
self.grouping_changed()
else:
self.reset_all_data()

def apply_sorting(self):
def compute_score(attr):
if attr is group_var:
return 3
if attr.is_continuous:
# One-way ANOVA
col = data.get_column_view(attr)[0]
groups = (col[group_col == i] for i in range(n_groups))
groups = (col[~np.isnan(col)] for col in groups)
groups = [group for group in groups if len(group)]
p = f_oneway(*groups)[1] if len(groups) > 1 else 2
else:
# Chi-square with the given distribution into groups
# (see degrees of freedom in computation of the p-value)
observed = np.array(
contingency.get_contingency(data, group_var, attr))
observed = observed[observed.sum(axis=1) != 0, :]
observed = observed[:, observed.sum(axis=0) != 0]
if min(observed.shape) < 2:
return 2
expected = \
np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
np.sum(observed)
p = chisquare(observed.ravel(), f_exp=expected.ravel(),
ddof=n_groups - 1)[1]
if math.isnan(p):
return 2
return p

data = self.dataset
if data is None:
return
domain = data.domain
attribute = self.attribute
group_var = self.group_var
if self.order_by_importance and group_var is not None:
n_groups = len(group_var.values)
group_col = data.get_column_view(group_var)[0] \
if domain.has_continuous_attributes(include_class=True) \
else None
self.attrs.sort(key=compute_score)
else:
self.attrs[:] = chain(
domain.variables,
(a for a in data.domain.metas if a.is_primitive()))
self.attribute = attribute

def reset_all_data(self):
self.clear_scene()
self.infot1.setText("")
Expand All @@ -240,6 +311,11 @@ def reset_all_data(self):
self.is_continuous = False
self.update_display_box()

def grouping_changed(self):
self.cb_order.setEnabled(self.group_var is not None)
self.apply_sorting()
self.attr_changed()

def attr_changed(self):
self.compute_box_data()
self.update_display_box()
Expand Down Expand Up @@ -796,7 +872,7 @@ def main(argv=None):
if len(argv) > 1:
filename = argv[1]
else:
filename = "brown-selected"
filename = "heart_disease"

data = Orange.data.Table(filename)
w = OWBoxPlot()
Expand Down
44 changes: 43 additions & 1 deletion Orange/widgets/visualize/tests/test_owboxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
from Orange.widgets.tests.base import WidgetTest


class TestOWBoxPlot(WidgetTest):
class OWBoxPlotTests(WidgetTest):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.iris = Table("iris")
cls.zoo = Table("zoo")
cls.housing = Table("housing")
cls.titanic = Table("titanic")
cls.heart = Table("heart_disease")

def setUp(self):
self.widget = self.create_widget(OWBoxPlot)
Expand Down Expand Up @@ -58,3 +60,43 @@ def test_input_data_missings_disc_no_group_var(self):
data.domain.class_var = ContinuousVariable("cls")
data.X[:, 0] = np.nan
self.send_signal("Data", data)

def test_apply_sorting(self):
controls = self.widget.controlledAttributes
group_list = controls["group_var"][0].control
order_check = controls["order_by_importance"][0].control
attributes = self.widget.attrs

def select_group(i):
group_selection = group_list.selectionModel()
group_selection.setCurrentIndex(
group_list.model().index(i),
group_selection.ClearAndSelect)

data = self.titanic
self.send_signal("Data", data)

select_group(0)
self.assertFalse(order_check.isEnabled())
select_group(1)
self.assertTrue(order_check.isEnabled())

order_check.setChecked(False)
self.assertEqual(tuple(attributes), data.domain.variables)
order_check.setChecked(True)
self.assertEqual([x.name for x in attributes],
['sex', 'survived', 'age', 'status'])
select_group(4)
self.assertEqual([x.name for x in attributes],
['sex', 'status', 'age', 'survived'])

data = self.heart
self.send_signal("Data", data)
select_group(len(group_list.model()) - 1)
order_check.setChecked(True)
self.assertEqual([x.name for x in attributes],
['thal', 'major vessels colored', 'chest pain',
'ST by exercise', 'max HR', 'exerc ind ang',
'slope peak exc ST', 'gender', 'age', 'rest SBP',
'rest ECG', 'cholesterol',
'fasting blood sugar > 120', 'diameter narrowing'])

0 comments on commit 1387a1a

Please sign in to comment.