From 5729747409860950bcfa48081ed1f243fb9503ef Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 7 Jan 2019 22:49:54 +0100 Subject: [PATCH 1/3] Scatterplot: Draw separate regression lines for colors --- Orange/widgets/visualize/owscatterplot.py | 59 ++++++++++++++--------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/Orange/widgets/visualize/owscatterplot.py b/Orange/widgets/visualize/owscatterplot.py index 8c195827814..9796bf8e4b9 100644 --- a/Orange/widgets/visualize/owscatterplot.py +++ b/Orange/widgets/visualize/owscatterplot.py @@ -100,50 +100,65 @@ class OWScatterPlotGraph(OWScatterPlotBase): def __init__(self, scatter_widget, parent): super().__init__(scatter_widget, parent) - self.reg_line_item = None + self.reg_line_items = [] def clear(self): super().clear() - self.reg_line_item = None + self.reg_line_items.clear() def update_coordinates(self): super().update_coordinates() self.update_axes() self.update_regression_line() + def update_colors(self): + super().update_colors() + self.update_regression_line() + def update_axes(self): for axis, title in self.master.get_axes().items(): self.plot_widget.setLabel(axis=axis, text=title or "") if title is None: self.plot_widget.hideAxis(axis) - def update_regression_line(self): - if self.reg_line_item is not None: - self.plot_widget.removeItem(self.reg_line_item) - self.reg_line_item = None - if not self.show_reg_line: - return - x, y = self.master.get_coordinates_data() - if x is None: - return + def _add_line(self, x, y, color, width): min_x, max_x = np.min(x), np.max(x) slope, intercept, rvalue, _, _ = linregress(x, y) start_y = min_x * slope + intercept - end_y = max_x * slope + intercept - angle = np.degrees(np.arctan((end_y - start_y) / (max_x - min_x))) - rotate = ((angle + 45) % 180) - 45 > 90 - color = QColor("#505050") - l_opts = dict(color=color, position=abs(int(rotate) - 0.85), + angle = np.degrees(np.arctan(slope)) + rotate = 135 < angle % 360 < 315 + l_opts = dict(color=color, position=abs(rotate - 0.85), rotateAxis=(1, 0), movable=True) - self.reg_line_item = pg.InfiniteLine( + reg_line_item = pg.InfiniteLine( pos=QPointF(min_x, start_y), angle=angle, - pen=pg.mkPen(color=color, width=1), - label="r = {:.2f}".format(rvalue), labelOpts=l_opts + pen=pg.mkPen(color=color, width=width), + label=f"r = {rvalue:.2f}", labelOpts=l_opts ) if rotate: - self.reg_line_item.label.angle = 180 - self.reg_line_item.label.updateTransform() - self.plot_widget.addItem(self.reg_line_item) + reg_line_item.label.angle = 180 + reg_line_item.label.updateTransform() + self.plot_widget.addItem(reg_line_item) + self.reg_line_items.append(reg_line_item) + + def update_regression_line(self): + for line in self.reg_line_items: + self.plot_widget.removeItem(line) + self.reg_line_items.clear() + if not self.show_reg_line: + return + x, y = self.master.get_coordinates_data() + if x is None: + return + self._add_line(x, y, QColor("#505050"), width=1) + if self.master.is_continuous_color() or self.palette is None: + return + c_data = self.master.get_color_data().astype(int) + if c_data is None: + return + for val in range(c_data.max() + 1): + mask = c_data == val + if mask.sum() > 1: + self._add_line(x[mask], y[mask], self.palette[val], width=3) class OWScatterPlot(OWDataProjectionWidget): From 8d4e67856b18559cb4fb9367698057457523011e Mon Sep 17 00:00:00 2001 From: janezd Date: Sat, 12 Jan 2019 14:30:32 +0100 Subject: [PATCH 2/3] Scatterplot: Add orthonormal regression --- Orange/widgets/visualize/owscatterplot.py | 66 +++++++++++++++++++---- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/Orange/widgets/visualize/owscatterplot.py b/Orange/widgets/visualize/owscatterplot.py index 9796bf8e4b9..c1670bb418c 100644 --- a/Orange/widgets/visualize/owscatterplot.py +++ b/Orange/widgets/visualize/owscatterplot.py @@ -97,6 +97,7 @@ def score_heuristic(self): class OWScatterPlotGraph(OWScatterPlotBase): show_reg_line = Setting(False) + orthonormal_regression = Setting(False) def __init__(self, scatter_widget, parent): super().__init__(scatter_widget, parent) @@ -109,7 +110,8 @@ def clear(self): def update_coordinates(self): super().update_coordinates() self.update_axes() - self.update_regression_line() + # Don't update_regression line here: update_coordinates is always + # followed by update_point_props, which calls update_colors def update_colors(self): super().update_colors() @@ -121,24 +123,59 @@ def update_axes(self): if title is None: self.plot_widget.hideAxis(axis) - def _add_line(self, x, y, color, width): + @staticmethod + def _orthonormal_line(x, y, color, width): + # https://en.wikipedia.org/wiki/Deming_regression, with δ=0. + pen = pg.mkPen(color=color, width=width) + xm = np.mean(x) + ym = np.mean(y) + sxx, sxy, _, syy = np.cov(x, y, ddof=1).flatten() + + if sxy != 0: # also covers sxx != 0 and syy != 0 + slope = (syy - sxx + np.sqrt((syy - sxx) ** 2 + 4 * sxy ** 2)) \ + / (2 * sxy) + intercept = ym - slope * xm + xmin = x.min() + return pg.InfiniteLine( + QPointF(xmin, xmin * slope + intercept), + np.degrees(np.arctan(slope)), + pen) + elif (sxx == 0) == (syy == 0): # both zero or non-zero -> can't draw + return None + elif sxx != 0: + return pg.InfiniteLine(QPointF(x.min(), ym), 0, pen) + else: + return pg.InfiniteLine(QPointF(xm, y.min()), 90, pen) + + @staticmethod + def _regression_line(x, y, color, width): min_x, max_x = np.min(x), np.max(x) + if min_x == max_x: + return None slope, intercept, rvalue, _, _ = linregress(x, y) - start_y = min_x * slope + intercept angle = np.degrees(np.arctan(slope)) + start_y = min_x * slope + intercept rotate = 135 < angle % 360 < 315 l_opts = dict(color=color, position=abs(rotate - 0.85), rotateAxis=(1, 0), movable=True) reg_line_item = pg.InfiniteLine( pos=QPointF(min_x, start_y), angle=angle, pen=pg.mkPen(color=color, width=width), - label=f"r = {rvalue:.2f}", labelOpts=l_opts - ) + label=f"r = {rvalue:.2f}", labelOpts=l_opts) if rotate: reg_line_item.label.angle = 180 reg_line_item.label.updateTransform() - self.plot_widget.addItem(reg_line_item) - self.reg_line_items.append(reg_line_item) + return reg_line_item + + def _add_line(self, x, y, color, width): + if self.orthonormal_regression: + line = self._orthonormal_line(x, y, color, width) + else: + line = self._regression_line(x, y, color, width) + if line is None: + return + self.plot_widget.addItem(line) + self.reg_line_items.append(line) def update_regression_line(self): for line in self.reg_line_items: @@ -149,16 +186,17 @@ def update_regression_line(self): x, y = self.master.get_coordinates_data() if x is None: return - self._add_line(x, y, QColor("#505050"), width=1) + self._add_line(x, y, QColor("#505050"), width=2) if self.master.is_continuous_color() or self.palette is None: return - c_data = self.master.get_color_data().astype(int) + c_data = self.master.get_color_data() if c_data is None: return + c_data = c_data.astype(int) for val in range(c_data.max() + 1): mask = c_data == val if mask.sum() > 1: - self._add_line(x[mask], y[mask], self.palette[val], width=3) + self._add_line(x[mask], y[mask], self.palette[val], width=2) class OWScatterPlot(OWDataProjectionWidget): @@ -223,6 +261,14 @@ def _add_controls(self): self.gui.ToolTipShowsAll, self.gui.RegressionLine], self._plot_box) + gui.checkBox( + gui.indentedBox(self._plot_box), self, + value="graph.orthonormal_regression", + label="Treat variables as independent", + callback=self.graph.update_regression_line, + tooltip= + "If checked, fit line to group (minimize distance from points);\n" + "otherwise fit y as a function of x (minimize vertical distances)") def _add_controls_axis(self): common_options = dict( From 6081008697a3f7e48009606b8a4d4ce70637beaf Mon Sep 17 00:00:00 2001 From: janezd Date: Sun, 13 Jan 2019 13:42:46 +0100 Subject: [PATCH 3/3] Scatterplot: Add tests for regression lines --- .../visualize/tests/test_owscatterplot.py | 270 +++++++++++++++++- 1 file changed, 267 insertions(+), 3 deletions(-) diff --git a/Orange/widgets/visualize/tests/test_owscatterplot.py b/Orange/widgets/visualize/tests/test_owscatterplot.py index 64fe07073a0..9fc85977a01 100644 --- a/Orange/widgets/visualize/tests/test_owscatterplot.py +++ b/Orange/widgets/visualize/tests/test_owscatterplot.py @@ -1,19 +1,21 @@ # Test methods with long descriptive names can omit docstrings # pylint: disable=missing-docstring,too-many-public-methods,protected-access +# pylint: disable=too-many-lines from unittest.mock import MagicMock, patch, Mock import numpy as np from AnyQt.QtCore import QRectF, Qt from AnyQt.QtWidgets import QToolTip +from AnyQt.QtGui import QColor from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable from Orange.widgets.tests.base import ( WidgetTest, WidgetOutputsTestMixin, datasets, ProjectionWidgetTestMixin ) from Orange.widgets.tests.utils import simulate +from Orange.widgets.utils.colorpalette import DefaultRGBColors from Orange.widgets.visualize.owscatterplot import ( - OWScatterPlot, ScatterPlotVizRank -) + OWScatterPlot, ScatterPlotVizRank, OWScatterPlotGraph) from Orange.widgets.visualize.utils.widget import MAX_CATEGORIES from Orange.widgets.widget import AttributeList @@ -735,12 +737,274 @@ def test_on_manual_change(self): selection = vizrank.rank_table.selectedIndexes() self.assertEqual(len(selection), 0) - def test_regression_line(self): + def test_regression_lines_appear(self): self.widget.graph.controls.show_reg_line.setChecked(True) + self.assertEqual(len(self.widget.graph.reg_line_items), 0) self.send_signal(self.widget.Inputs.data, self.data) + self.assertEqual(len(self.widget.graph.reg_line_items), 4) + simulate.combobox_activate_index(self.widget.controls.attr_color, 0) + self.assertEqual(len(self.widget.graph.reg_line_items), 1) data = self.data.copy() data[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) + self.assertEqual(len(self.widget.graph.reg_line_items), 0) + + def test_regression_line_coeffs(self): + widget = self.widget + graph = widget.graph + xy = np.array([[0, 0], [1, 0], [1, 2], [2, 2], + [0, 1], [1, 3], [2, 5]], dtype=np.float) + colors = np.array([0, 0, 0, 0, 1, 1, 1], dtype=np.float) + widget.get_coordinates_data = lambda: xy.T + widget.get_color_data = lambda: colors + widget.is_continuous_color = lambda: False + graph.palette = DefaultRGBColors + graph.controls.show_reg_line.setChecked(True) + + graph.update_regression_line() + + line1 = graph.reg_line_items[1] + self.assertEqual(line1.pos().x(), 0) + self.assertEqual(line1.pos().y(), 0) + self.assertEqual(line1.angle, 45) + self.assertEqual(line1.pen.color().getRgb()[:3], graph.palette[0]) + + line2 = graph.reg_line_items[2] + self.assertEqual(line2.pos().x(), 0) + self.assertEqual(line2.pos().y(), 1) + self.assertAlmostEqual(line2.angle, np.degrees(np.arctan2(2, 1))) + self.assertEqual(line2.pen.color().getRgb()[:3], graph.palette[1]) + + graph.orthonormal_regression = True + graph.update_regression_line() + + line1 = graph.reg_line_items[1] + self.assertEqual(line1.pos().x(), 0) + self.assertAlmostEqual(line1.pos().y(), -0.6180339887498949) + self.assertEqual(line1.angle, 58.28252558853899) + self.assertEqual(line1.pen.color().getRgb()[:3], graph.palette[0]) + + line2 = graph.reg_line_items[2] + self.assertEqual(line2.pos().x(), 0) + self.assertEqual(line2.pos().y(), 1) + self.assertAlmostEqual(line2.angle, np.degrees(np.arctan2(2, 1))) + self.assertEqual(line2.pen.color().getRgb()[:3], graph.palette[1]) + + def test_orthonormal_line(self): + color = QColor(1, 2, 3) + width = 42 + # Normal line + line = OWScatterPlotGraph._orthonormal_line( + np.array([0, 1, 1, 2]), np.array([0, 0, 2, 2]), color, width) + self.assertEqual(line.pos().x(), 0) + self.assertAlmostEqual(line.pos().y(), -0.6180339887498949) + self.assertEqual(line.angle, 58.28252558853899) + self.assertEqual(line.pen.color(), color) + self.assertEqual(line.pen.width(), width) + + # Normal line, negative slope + line = OWScatterPlotGraph._orthonormal_line( + np.array([1, 2, 3]), np.array([3, 2, 1]), color, width) + self.assertEqual(line.pos().x(), 1) + self.assertEqual(line.pos().y(), 3) + self.assertEqual(line.angle % 360, 315) + + # Horizontal line + line = OWScatterPlotGraph._orthonormal_line( + np.array([10, 11, 12]), np.array([42, 42, 42]), color, width) + self.assertEqual(line.pos().x(), 10) + self.assertEqual(line.pos().y(), 42) + self.assertEqual(line.angle, 0) + + # Vertical line + line = OWScatterPlotGraph._orthonormal_line( + np.array([42, 42, 42]), np.array([10, 11, 12]), color, width) + self.assertEqual(line.pos().x(), 42) + self.assertEqual(line.pos().y(), 10) + self.assertEqual(line.angle, 90) + + # No line because all points coincide + line = OWScatterPlotGraph._orthonormal_line( + np.array([1, 1, 1]), np.array([42, 42, 42]), color, width) + self.assertIsNone(line) + + # No line because the group is symmetric + line = OWScatterPlotGraph._orthonormal_line( + np.array([1, 1, 2, 2]), np.array([42, 5, 5, 42]), color, width) + self.assertIsNone(line) + + def test_regression_line(self): + color = QColor(1, 2, 3) + width = 42 + # Normal line + line = OWScatterPlotGraph._regression_line( + np.array([0, 1, 1, 2]), np.array([0, 0, 2, 2]), color, width) + self.assertEqual(line.pos().x(), 0) + self.assertAlmostEqual(line.pos().y(), 0) + self.assertEqual(line.angle, 45) + self.assertEqual(line.pen.color(), color) + self.assertEqual(line.pen.width(), width) + + # Normal line, negative slope + line = OWScatterPlotGraph._regression_line( + np.array([1, 2, 3]), np.array([3, 2, 1]), color, width) + self.assertEqual(line.pos().x(), 1) + self.assertEqual(line.pos().y(), 3) + self.assertEqual(line.angle % 360, 315) + + # Horizontal line + line = OWScatterPlotGraph._regression_line( + np.array([10, 11, 12]), np.array([42, 42, 42]), color, width) + self.assertEqual(line.pos().x(), 10) + self.assertEqual(line.pos().y(), 42) + self.assertEqual(line.angle, 0) + + # Vertical line + line = OWScatterPlotGraph._regression_line( + np.array([42, 42, 42]), np.array([10, 11, 12]), color, width) + self.assertIsNone(line) + + # No line because all points coincide + line = OWScatterPlotGraph._regression_line( + np.array([1, 1, 1]), np.array([42, 42, 42]), color, width) + self.assertIsNone(line) + + def test_add_line_calls_proper_regressor(self): + graph = self.widget.graph + graph._orthonormal_line = Mock(return_value=None) + graph._regression_line = Mock(return_value=None) + x, y, c, w = Mock(), Mock(), Mock(), Mock() + + graph.orthonormal_regression = True + graph._add_line(x, y, c, w) + graph._orthonormal_line.assert_called_once_with(x, y, c, w) + graph._regression_line.assert_not_called() + graph._orthonormal_line.reset_mock() + + graph.orthonormal_regression = False + graph._add_line(x, y, c, w) + graph._regression_line.assert_called_with(x, y, c, w) + graph._orthonormal_line.assert_not_called() + + def test_no_regression_line(self): + graph = self.widget.graph + graph._orthonormal_line = lambda *_: None + graph.orthonormal_regression = True + + graph.plot_widget.addItem = Mock() + + x, y, c, w = Mock(), Mock(), Mock(), Mock() + graph._add_line(x, y, c, w) + graph.plot_widget.addItem.assert_not_called() + self.assertEqual(graph.reg_line_items, []) + + def test_update_regression_line_calls_add_line(self): + widget = self.widget + graph = widget.graph + x, y = np.array([[0, 0], [1, 0], [1, 2], [2, 2], + [0, 1], [1, 3], [2, 5]], dtype=np.float).T + colors = np.array([0, 0, 0, 0, 1, 1, 1], dtype=np.float) + widget.get_coordinates_data = lambda: (x, y) + widget.get_color_data = lambda: colors + widget.is_continuous_color = lambda: False + graph.palette = DefaultRGBColors + graph.controls.show_reg_line.setChecked(True) + + graph._add_line = Mock() + + graph.update_regression_line() + (args1, _), (args2, _), (args3, _) = graph._add_line.call_args_list + np.testing.assert_equal(args1[0], x) + np.testing.assert_equal(args1[1], y) + self.assertEqual(args1[2], QColor("#505050")) + + np.testing.assert_equal(args2[0], x[:4]) + np.testing.assert_equal(args2[1], y[:4]) + self.assertEqual(args2[2], graph.palette[0]) + + np.testing.assert_equal(args3[0], x[4:]) + np.testing.assert_equal(args3[1], y[4:]) + self.assertEqual(args3[2], graph.palette[1]) + graph._add_line.reset_mock() + + # Continuous color - just a single line + widget.is_continuous_color = lambda: True + graph.update_regression_line() + graph._add_line.assert_called_once() + args1, _ = graph._add_line.call_args_list[0] + np.testing.assert_equal(args1[0], x) + np.testing.assert_equal(args1[1], y) + self.assertEqual(args1[2], QColor("#505050")) + graph._add_line.reset_mock() + widget.is_continuous_color = lambda: False + + # No palette - just a single line + graph.palette = None + graph.update_regression_line() + graph._add_line.assert_called_once() + graph._add_line.reset_mock() + graph.palette = DefaultRGBColors + + # Regression line is disabled + graph.show_reg_line = False + graph.update_regression_line() + graph._add_line.assert_not_called() + graph.show_reg_line = True + + # No colors - just one line + widget.get_color_data = lambda: None + graph.update_regression_line() + graph._add_line.assert_called_once() + graph._add_line.reset_mock() + + # No data + widget.get_coordinates_data = lambda: (None, None) + graph.update_regression_line() + graph._add_line.assert_not_called() + graph.show_reg_line = True + widget.get_coordinates_data = lambda: (x, y) + + # One color group contains just one point - skip that line + widget.get_color_data = lambda: np.array([0] + [1] * (len(x) - 1)) + + graph.update_regression_line() + (args1, _), (args2, _) = graph._add_line.call_args_list + np.testing.assert_equal(args1[0], x) + np.testing.assert_equal(args1[1], y) + self.assertEqual(args1[2], QColor("#505050")) + + np.testing.assert_equal(args2[0], x[1:]) + np.testing.assert_equal(args2[1], y[1:]) + self.assertEqual(args2[2], graph.palette[1]) + + def test_update_regression_line_is_called(self): + widget = self.widget + graph = widget.graph + urline = graph.update_regression_line = Mock() + + self.send_signal(widget.Inputs.data, self.data) + urline.assert_called_once() + urline.reset_mock() + + self.send_signal(widget.Inputs.data, None) + urline.assert_called_once() + urline.reset_mock() + + self.send_signal(widget.Inputs.data, self.data) + urline.assert_called_once() + urline.reset_mock() + + simulate.combobox_activate_index(self.widget.controls.attr_color, 0) + urline.assert_called_once() + urline.reset_mock() + + simulate.combobox_activate_index(self.widget.controls.attr_color, 2) + urline.assert_called_once() + urline.reset_mock() + + simulate.combobox_activate_index(self.widget.controls.attr_x, 3) + urline.assert_called_once() + urline.reset_mock() if __name__ == "__main__":