From fe28eafd0af6dc0aace878f59988835bfcaa6338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Fri, 15 Feb 2019 10:21:04 +0100 Subject: [PATCH] OwLouvain: Properly compare new data with old without warnings --- Orange/statistics/util.py | 17 +++++++++++++++ Orange/tests/test_statistics.py | 21 ++++++++++++++++++- .../unsupervised/owlouvainclustering.py | 4 ++-- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index f4b44b1e4ea..94207b2ec62 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -13,6 +13,23 @@ from sklearn.utils.sparsefuncs import mean_variance_axis +def sparse_array_equal(x1, x2): + """Check if two sparse arrays are equal.""" + if not sp.issparse(x1): + raise TypeError("`x1` must be sparse.") + if not sp.issparse(x2): + raise TypeError("`x2` must be sparse.") + + return x1.shape == x2.shape and (x1 != x2).nnz == 0 + + +def array_equal(x1, x2): + """Equivalent of np.array_equal that properly handles sparse matrices.""" + if sp.issparse(x1) and sp.issparse(x2): + return sparse_array_equal(x1, x2) + return np.array_equal(x1, x2) + + def _count_nans_per_row_sparse(X, weights, dtype=None): """ Count the number of nans (undefined) values per row. """ if weights is not None: diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py index 99be289556e..722025774d3 100644 --- a/Orange/tests/test_statistics.py +++ b/Orange/tests/test_statistics.py @@ -9,7 +9,7 @@ from Orange.statistics.util import bincount, countnans, contingency, digitize, \ mean, nanmax, nanmean, nanmedian, nanmin, nansum, nanunique, stats, std, \ - unique, var, nanstd, nanvar, nanmode + unique, var, nanstd, nanvar, nanmode, array_equal from sklearn.utils import check_random_state @@ -590,6 +590,25 @@ def test_nanunique_ignores_nans_in_counts(self, array): np.testing.assert_equal(nanunique(x, return_counts=True)[1], expected) +class TestArrayEqual(unittest.TestCase): + @dense_sparse + def test_same_matrices(self, array): + x = array([0, 1, 0, 0, 2]) + self.assertTrue(array_equal(x, x)) + + @dense_sparse + def test_with_different_shapes(self, array): + x = array(np.eye(4)) + y = array(np.eye(5)) + self.assertFalse(array_equal(x, y)) + + @dense_sparse + def test_with_different_values(self, array): + x = array([0, 1, 0, 0, 2]) + y = array([0, 3, 0, 0, 2]) + self.assertFalse(array_equal(x, y)) + + class TestNanModeAppVeyor(unittest.TestCase): def test_appveyour_still_not_onscipy_1_2_0(self): import scipy diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py index 3e6f1cecf9c..f1509c55829 100644 --- a/Orange/widgets/unsupervised/owlouvainclustering.py +++ b/Orange/widgets/unsupervised/owlouvainclustering.py @@ -19,6 +19,7 @@ from Orange.data.util import get_unique_names from Orange import preprocess from Orange.projection import PCA +from Orange.statistics import util as ut from Orange.widgets import widget, gui, report from Orange.widgets.settings import DomainContextHandler, ContextSetting, \ Setting @@ -407,8 +408,7 @@ def set_data(self, data): # Make sure to properly enable/disable slider based on `apply_pca` setting self.controls.pca_components.setEnabled(self.apply_pca) - # If X hasn't changed, there's no reason to recompute clusters - if prev_data and self.data and np.array_equal(self.data.X, prev_data.X): + if prev_data and self.data and ut.array_equal(prev_data.X, self.data.X): if self.auto_commit: self._send_data() return