From 98e283978fd2d68ee28dc900613af7969d404d44 Mon Sep 17 00:00:00 2001 From: jernej-local Date: Thu, 2 Mar 2017 13:47:32 +0100 Subject: [PATCH] [FIX] owcorrespondence: Handle variables with one value Errors thrown: 1. IndexError: index 1 is out of bounds for axis 1 with size 1 2. SVD did not converge 3. Must specify at least one of rect, xRange, or yRange. (gave rect=) Problems caused by: 1) Domain has a two or more discrete variables but less than in a table 2) There is at least one NaN value in a column. --- .../widgets/unsupervised/owcorrespondence.py | 43 +++++++++++++------ .../tests/test_owcorrespondence.py | 24 ++++++++++- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/Orange/widgets/unsupervised/owcorrespondence.py b/Orange/widgets/unsupervised/owcorrespondence.py index df4849cac5f..07239d9f264 100644 --- a/Orange/widgets/unsupervised/owcorrespondence.py +++ b/Orange/widgets/unsupervised/owcorrespondence.py @@ -4,7 +4,7 @@ from AnyQt.QtWidgets import QListView, QApplication from AnyQt.QtGui import QBrush, QColor, QPainter -from AnyQt.QtCore import Qt, QEvent, QItemSelectionModel, QItemSelection +from AnyQt.QtCore import QEvent, QItemSelectionModel, QItemSelection import pyqtgraph as pg import Orange.data @@ -14,7 +14,6 @@ from Orange.widgets.utils import itemmodels, colorpalette from Orange.widgets.visualize.owscatterplotgraph import ScatterPlotItem -from Orange.widgets.io import FileFormat class ScatterPlotItem(pg.ScatterPlotItem): @@ -140,6 +139,12 @@ def _p_axes(self): def _var_changed(self): self.selected_var_indices = sorted( ind.row() for ind in self.varview.selectionModel().selectedRows()) + rfs = self.update_XY() + if rfs is not None: + if self.component_x >= rfs: + self.component_x = rfs-1 + if self.component_y >= rfs: + self.component_y = rfs-1 self._invalidate() def _component_changed(self): @@ -160,6 +165,15 @@ def customEvent(self, event): return super().customEvent(event) def _update_CA(self): + self.update_XY() + self.component_x, self.component_y = self.component_x, self.component_y + + self._setup_plot() + self._update_info() + + def update_XY(self): + self.axis_x_cb.clear() + self.axis_y_cb.clear() ca_vars = self.selected_vars() if len(ca_vars) == 0: return @@ -171,26 +185,24 @@ def _update_CA(self): ctable = contingency.get_contingency(self.data, *ca_vars[::-1]) self.ca = correspondence(ctable, ) + rfs = self.ca.row_factors.shape[1] axes = ["{}".format(i + 1) - for i in range(self.ca.row_factors.shape[1])] - self.axis_x_cb.clear() + for i in range(rfs)] self.axis_x_cb.addItems(axes) - self.axis_y_cb.clear() self.axis_y_cb.addItems(axes) - self.component_x, self.component_y = self.component_x, self.component_y - - self._setup_plot() - self._update_info() + return rfs def _setup_plot(self): self.plot.clear() - points = self.ca variables = self.selected_vars() colors = colorpalette.ColorPaletteGenerator(len(variables)) p_axes = self._p_axes() + if points == None: + return + if len(variables) == 2: row_points = self.ca.row_factors[:, p_axes] col_points = self.ca.col_factors[:, p_axes] @@ -220,7 +232,10 @@ def _setup_plot(self): item.setPos(point[0], point[1]) inertia = self.ca.inertia_of_axis() - inertia = 100 * inertia / numpy.sum(inertia) + if numpy.sum(inertia) == 0: + inertia = 100 * inertia + else: + inertia = 100 * inertia / numpy.sum(inertia) ax = self.plot.getAxis("bottom") ax.setLabel("Component {} ({:.1f}%)" @@ -236,7 +251,10 @@ def _update_info(self): fmt = ("Axis 1: {:.2f}\n" "Axis 2: {:.2f}") inertia = self.ca.inertia_of_axis() - inertia = 100 * inertia / numpy.sum(inertia) + if numpy.sum(inertia) == 0: + inertia = 100 * inertia + else: + inertia = 100 * inertia / numpy.sum(inertia) ax1, ax2 = self._p_axes() self.infotext.setText(fmt.format(inertia[ax1], inertia[ax2])) @@ -314,6 +332,7 @@ def correspondence(A): E = row_sum * col_sum D_r, D_c = row_sum.ravel() ** -1, col_sum.ravel() ** -1 + D_r, D_c = numpy.nan_to_num(D_r), numpy.nan_to_num(D_c) def gsvd(M, Wu, Wv): assert len(M.shape) == 2 diff --git a/Orange/widgets/unsupervised/tests/test_owcorrespondence.py b/Orange/widgets/unsupervised/tests/test_owcorrespondence.py index d8f81421cdb..84251c5dff6 100644 --- a/Orange/widgets/unsupervised/tests/test_owcorrespondence.py +++ b/Orange/widgets/unsupervised/tests/test_owcorrespondence.py @@ -1,6 +1,6 @@ # Test methods with long descriptive names can omit docstrings # pylint: disable=missing-docstring -from Orange.data import Table +from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable from Orange.widgets.tests.base import WidgetTest from Orange.widgets.unsupervised.owcorrespondence \ import OWCorrespondenceAnalysis @@ -15,3 +15,25 @@ def test_no_data(self): self.send_signal("Data", Table(Table("iris").domain)) self.assertTrue(self.widget.Error.empty_data.is_shown()) self.assertIsNone(self.widget.data) + + def test_data_values_in_column(self): + """ + Check that the widget does not crash when: + 1) Domain has a two or more discrete variables but less than in a table + 2) There is at least one NaN value in a column. + GH-2066 + """ + table = Table( + Domain( + [ContinuousVariable("a"), + DiscreteVariable("b", values=["t", "f"]), + DiscreteVariable("c", values=["y", "n"]), + DiscreteVariable("d", values=["k", "l", "z"])] + ), + list(zip( + [42.48, 16.84, 15.23, 23.8], + ["t", "t", "", "f"], + "yyyy", + "klkk" + ))) + self.send_signal("Data", table)