From 50663730586e5ab6219c4b369a40d1775e979bf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Mon, 4 Feb 2019 16:20:09 +0100 Subject: [PATCH 1/7] Normalize: Add option to skip zero-centering --- Orange/preprocess/normalize.py | 19 ++++++++++++++++--- Orange/preprocess/preprocess.py | 13 +++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Orange/preprocess/normalize.py b/Orange/preprocess/normalize.py index a7058695a3d..a3f27d5f89b 100644 --- a/Orange/preprocess/normalize.py +++ b/Orange/preprocess/normalize.py @@ -11,21 +11,24 @@ class Normalizer(Reprable): def __init__(self, zero_based=True, norm_type=Normalize.NormalizeBySD, - transform_class=False): + transform_class=False, + center=True): self.zero_based = zero_based self.norm_type = norm_type self.transform_class = transform_class + self.center = center def __call__(self, data): - dists = distribution.get_distributions(data) new_attrs = [self.normalize(dists[i], var) for (i, var) in enumerate(data.domain.attributes)] + new_class_vars = data.domain.class_vars if self.transform_class: attr_len = len(data.domain.attributes) new_class_vars = [self.normalize(dists[i + attr_len], var) for (i, var) in enumerate(data.domain.class_vars)] + domain = Domain(new_attrs, new_class_vars, data.domain.metas) return data.transform(domain) @@ -41,7 +44,17 @@ def normalize_by_sd(self, dist, var): avg, sd = (dist.mean(), dist.standard_deviation()) if dist.size else (0, 1) if sd == 0: sd = 1 - return ContinuousVariable(var.name, compute_value=Norm(var, avg, 1 / sd), sparse=var.sparse) + + if self.center: + compute_val = Norm(var, avg, 1 / sd) + else: + compute_val = Norm(var, 0, 1 / sd) + + return ContinuousVariable( + var.name, + compute_value=compute_val, + sparse=var.sparse, + ) def normalize_by_span(self, dist, var): dma, dmi = dist.max(), dist.min() diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index f4cde0f364f..b8bfc0280da 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -273,6 +273,8 @@ class Normalize(Preprocess): It determines the interval for normalized continuous variables (either [-1, 1] or [0, 1]). + This has no effect when `norm_type` is set to `NormalizeBySD`. + norm_type : NormTypes (default: Normalize.NormalizeBySD) Normalization type. If Normalize.NormalizeBySD, the values are replaced with standardized values by subtracting the average @@ -286,6 +288,9 @@ class Normalize(Preprocess): transform_class : bool (default=False) If True the class is normalized as well. + center : bool (default=True) + Whether or not to center the data so it has mean zero. + Examples -------- >>> from Orange.data import Table @@ -301,10 +306,12 @@ class Normalize(Preprocess): def __init__(self, zero_based=True, norm_type=NormalizeBySD, - transform_class=False): + transform_class=False, + center=True): self.zero_based = zero_based self.norm_type = norm_type self.transform_class = transform_class + self.center = center def __call__(self, data): """ @@ -334,7 +341,9 @@ def __call__(self, data): normalizer = normalize.Normalizer( zero_based=self.zero_based, norm_type=self.norm_type, - transform_class=self.transform_class) + transform_class=self.transform_class, + center=self.center, + ) return normalizer(data) From 5001e4db7da376418c96ffc002985403b32947c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Mon, 4 Feb 2019 17:18:05 +0100 Subject: [PATCH 2/7] Move table_dense_sparse test utility to Orange.widgets.tests.utils --- .../data/tests/test_owfeaturestatistics.py | 18 +++------------ Orange/widgets/tests/utils.py | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/Orange/widgets/data/tests/test_owfeaturestatistics.py b/Orange/widgets/data/tests/test_owfeaturestatistics.py index 5d8faa288e4..3c8c22bddfd 100644 --- a/Orange/widgets/data/tests/test_owfeaturestatistics.py +++ b/Orange/widgets/data/tests/test_owfeaturestatistics.py @@ -1,9 +1,9 @@ import datetime import warnings from collections import namedtuple -from functools import wraps, partial +from functools import partial from itertools import chain -from typing import Callable, List +from typing import List import numpy as np from AnyQt.QtCore import QItemSelection, QItemSelectionRange, \ @@ -12,7 +12,7 @@ from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \ DiscreteVariable, TimeVariable from Orange.widgets.tests.base import WidgetTest, datasets -from Orange.widgets.tests.utils import simulate +from Orange.widgets.tests.utils import simulate, table_dense_sparse from Orange.widgets.data.owfeaturestatistics import \ OWFeatureStatistics @@ -175,18 +175,6 @@ def make_table(attributes, target=None, metas=None): ) -def table_dense_sparse(test_case): - # type: (Callable) -> Callable - """Run a single test case on both dense and sparse Orange tables.""" - - @wraps(test_case) - def _wrapper(self): - test_case(self, lambda table: table.to_dense()) - test_case(self, lambda table: table.to_sparse()) - - return _wrapper - - class TestVariousDataSets(WidgetTest): def setUp(self): self.widget = self.create_widget( diff --git a/Orange/widgets/tests/utils.py b/Orange/widgets/tests/utils.py index cd502ce1991..b21097002e4 100644 --- a/Orange/widgets/tests/utils.py +++ b/Orange/widgets/tests/utils.py @@ -1,4 +1,6 @@ import sys +from functools import wraps + import warnings import contextlib @@ -317,3 +319,24 @@ def mouseMove(widget, pos=QPoint(), delay=-1): # pragma: no-cover QTest.qWait(delay) QApplication.sendEvent(widget, me) + + +def table_dense_sparse(test_case): + # type: (Callable) -> Callable + """Run a single test case on both dense and sparse Orange tables. + + Examples + -------- + >>> @table_dense_sparse + ... def test_something(self, prepare_table): + ... data: Table # The table you want to test on + ... data = prepare_table(data) # This converts the table to dense/sparse + + """ + + @wraps(test_case) + def _wrapper(self): + test_case(self, lambda table: table.to_dense()) + test_case(self, lambda table: table.to_sparse()) + + return _wrapper From a624258866327e73b29f7687f7d825a9eeb870e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Mon, 4 Feb 2019 17:21:10 +0100 Subject: [PATCH 3/7] OwLouvain: Enable normalization for sparse data --- .../unsupervised/owlouvainclustering.py | 55 ++++++++++++++---- .../unsupervised/tests/test_owlouvain.py | 56 +++++++++++++++++++ 2 files changed, 99 insertions(+), 12 deletions(-) diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py index 9cbff27dc07..3ad293fa8c5 100644 --- a/Orange/widgets/unsupervised/owlouvainclustering.py +++ b/Orange/widgets/unsupervised/owlouvainclustering.py @@ -17,6 +17,7 @@ from Orange.clustering.louvain import table_to_knn_graph, Louvain from Orange.data import Table, DiscreteVariable from Orange.data.util import get_unique_names +from Orange import preprocess from Orange.projection import PCA from Orange.widgets import widget, gui, report from Orange.widgets.settings import DomainContextHandler, ContextSetting, \ @@ -66,6 +67,7 @@ class Outputs: apply_pca = ContextSetting(True) pca_components = ContextSetting(_DEFAULT_PCA_COMPONENTS) + normalize = ContextSetting(True) metric_idx = ContextSetting(0) k_neighbors = ContextSetting(_DEFAULT_K_NEIGHBORS) resolution = ContextSetting(1.) @@ -101,13 +103,17 @@ def __init__(self): info_box = gui.vBox(self.controlArea, "Info") self.info_label = gui.widgetLabel(info_box, "No data on input.") # type: QLabel - pca_box = gui.vBox(self.controlArea, "PCA Preprocessing") + preprocessing_box = gui.vBox(self.controlArea, "Preprocessing") + self.normalize_cbx = gui.checkBox( + preprocessing_box, self, "normalize", label="Normalize data", + callback=self._invalidate_preprocessed_data, + ) # type: QCheckBox self.apply_pca_cbx = gui.checkBox( - pca_box, self, "apply_pca", label="Apply PCA preprocessing", + preprocessing_box, self, "apply_pca", label="Apply PCA preprocessing", callback=self._invalidate_graph, ) # type: QCheckBox self.pca_components_slider = gui.hSlider( - pca_box, self, "pca_components", label="Components: ", minValue=2, + preprocessing_box, self, "pca_components", label="PCA Components: ", minValue=2, maxValue=_MAX_PCA_COMPONENTS, callback=self._invalidate_pca_projection, tracking=False ) # type: QSlider @@ -139,6 +145,14 @@ def __init__(self): callback=lambda: self._on_auto_commit_changed(), ) # type: QWidget + def _invalidate_preprocessed_data(self): + self.preprocessed_data = None + self._invalidate_pca_projection() + # If we don't apply PCA, this still invalidates the graph, otherwise + # this change won't be propagated further + if not self.apply_pca: + self._invalidate_graph() + def _invalidate_pca_projection(self): self.pca_projection = None if not self.apply_pca: @@ -190,6 +204,7 @@ def cancel(self): self.__set_state_ready() def commit(self): + # pylint: disable=too-many-branches self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) @@ -215,8 +230,11 @@ def commit(self): # Preprocess the dataset if self.preprocessed_data is None: - louvain = Louvain(random_state=0) - self.preprocessed_data = louvain.preprocess(self.data) + if self.normalize: + normalizer = preprocess.Normalize(center=False) + self.preprocessed_data = normalizer(self.data) + else: + self.preprocessed_data = self.data state = TaskState(self) @@ -243,8 +261,8 @@ def commit(self): if graph is None: task = partial( run_on_data, data, pca_components=pca_components, - k_neighbors=k_neighbors, metric=metric, - resolution=self.resolution, state=state + normalize=self.normalize, k_neighbors=k_neighbors, + metric=metric, resolution=self.resolution, state=state, ) else: task = partial( @@ -381,6 +399,7 @@ def _send_data(self): @Inputs.data def set_data(self, data): + # pylint: disable=too-many-branches self.closeContext() self.Error.clear() @@ -439,6 +458,7 @@ def send_report(self): pca += report.plural(", {number} component{s}", self.pca_components) self.report_items(( + ("Normalize data", report.bool_str(self.normalize)), ("PCA preprocessing", pca), ("Metric", METRICS[self.metric_idx][0]), ("k neighbors", self.k_neighbors), @@ -520,6 +540,7 @@ class InteruptRequested(BaseException): class Results(namespace): pca_projection = None # type: Optional[Table] pca_components = None # type: Optional[int] + normalize = None # type: Optional[bool] k_neighbors = None # type: Optional[int] metric = None # type: Optional[str] graph = None # type: Optional[nx.Graph] @@ -527,8 +548,8 @@ class Results(namespace): partition = None # type: Optional[np.ndarray] -def run_on_data(data, pca_components, k_neighbors, metric, resolution, state): - # type: (Table, Optional[int], int, str, float, TaskState) -> Results +def run_on_data(data, normalize, pca_components, k_neighbors, metric, resolution, state): + # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results """ Run the louvain clustering on `data`. @@ -539,6 +560,8 @@ def run_on_data(data, pca_components, k_neighbors, metric, resolution, state): ---------- data : Table Data table + normalize : bool + If `True`, the data is first normalized before computing PCA. pca_components : Optional[int] If not `None` then the data is first projected onto first `pca_components` principal components. @@ -556,16 +579,18 @@ def run_on_data(data, pca_components, k_neighbors, metric, resolution, state): """ state = state # type: TaskState res = Results( - pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, - resolution=resolution, + normalize=normalize, pca_components=pca_components, + k_neighbors=k_neighbors, metric=metric, resolution=resolution, ) step = 0 if state.is_interuption_requested(): return res + if pca_components is not None: steps = 3 state.set_status("Computing PCA...") pca = PCA(n_components=pca_components, random_state=0) + data = res.pca_projection = pca(data)(data) assert isinstance(data, Table) state.set_partial_results(("pca_projection", res.pca_projection)) @@ -579,6 +604,13 @@ def run_on_data(data, pca_components, k_neighbors, metric, resolution, state): state.set_progress_value(100. * step / steps) state.set_status("Building graph...") + # Apply Louvain preprocessing before converting the table into a graph + louvain = Louvain(resolution=resolution, random_state=0) + data = louvain.preprocess(data) + + if state.is_interuption_requested(): + return res + def pcallback(val): state.set_progress_value((100. * step + 100 * val) / steps) if state.is_interuption_requested(): @@ -600,7 +632,6 @@ def pcallback(val): if state.is_interuption_requested(): return res - louvain = Louvain(resolution=resolution, random_state=0) res.partition = louvain.fit_predict(graph) state.set_partial_results(("partition", res.partition)) return res diff --git a/Orange/widgets/unsupervised/tests/test_owlouvain.py b/Orange/widgets/unsupervised/tests/test_owlouvain.py index b686ca37e1e..ee3b7bc1c67 100644 --- a/Orange/widgets/unsupervised/tests/test_owlouvain.py +++ b/Orange/widgets/unsupervised/tests/test_owlouvain.py @@ -3,8 +3,11 @@ import numpy as np from Orange.data import Table, Domain, ContinuousVariable +from Orange.preprocess import Normalize from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.tests.utils import table_dense_sparse from Orange.widgets.unsupervised.owlouvainclustering import OWLouvainClustering +from sklearn.utils import check_random_state # Deterministic tests np.random.seed(42) @@ -178,3 +181,56 @@ def test_deterministic_clustering(self): # Ensure that clustering was the same in both instances np.testing.assert_equal(result1.metas, result2.metas) + + @table_dense_sparse + def test_normalize_data(self, prepare_table): + """Check that normalization is called at the proper times.""" + data = prepare_table(self.iris) + + # Enable checkbox + self.widget.controls.normalize.setChecked(True) + self.assertTrue(self.widget.controls.normalize.isChecked()) + with patch("Orange.preprocess.Normalize", wraps=Normalize) as normalize: + self.send_signal(self.widget.Inputs.data, data) + self.wait_until_stop_blocking() + self.assertTrue(self.widget.controls.normalize.isEnabled()) + normalize.assert_called_once() + + # Disable checkbox + self.widget.controls.normalize.setChecked(False) + self.assertFalse(self.widget.controls.normalize.isChecked()) + with patch("Orange.preprocess.Normalize", wraps=Normalize) as normalize: + self.send_signal(self.widget.Inputs.data, data) + self.wait_until_stop_blocking() + self.assertTrue(self.widget.controls.normalize.isEnabled()) + normalize.assert_not_called() + + def test_dense_and_sparse_return_same_result(self): + """Check that Louvain clustering returns identical results for both + dense and sparse data.""" + random_state = check_random_state(42) + + # Randomly set some values to zero + dense_data = self.iris + mask = random_state.beta(1, 2, size=self.iris.X.shape) > 0.5 + dense_data.X[mask] = 0 + sparse_data = dense_data.to_sparse() + + def _compute_clustering(data): + self.send_signal(self.widget.Inputs.data, data) + self.wait_until_stop_blocking() + result = self.get_output(self.widget.Outputs.annotated_data) + self.send_signal(self.widget.Inputs.data, None) + return result + + # Disable normalization + self.widget.controls.normalize.setChecked(False) + dense_result = _compute_clustering(dense_data) + sparse_result = _compute_clustering(sparse_data) + np.testing.assert_equal(dense_result.metas, sparse_result.metas) + + # Enable normalization + self.widget.controls.normalize.setChecked(True) + dense_result = _compute_clustering(dense_data) + sparse_result = _compute_clustering(sparse_data) + np.testing.assert_equal(dense_result.metas, sparse_result.metas) From 9e3a237ade4539d74773cc3c6873e31d250f8f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Mon, 4 Feb 2019 18:22:59 +0100 Subject: [PATCH 4/7] OwLouvain: Disable PCA slider if Apply PCA is unchecked --- Orange/widgets/unsupervised/owlouvainclustering.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py index 3ad293fa8c5..18ae49bf08a 100644 --- a/Orange/widgets/unsupervised/owlouvainclustering.py +++ b/Orange/widgets/unsupervised/owlouvainclustering.py @@ -110,7 +110,7 @@ def __init__(self): ) # type: QCheckBox self.apply_pca_cbx = gui.checkBox( preprocessing_box, self, "apply_pca", label="Apply PCA preprocessing", - callback=self._invalidate_graph, + callback=self._apply_pca_changed, ) # type: QCheckBox self.pca_components_slider = gui.hSlider( preprocessing_box, self, "pca_components", label="PCA Components: ", minValue=2, @@ -145,6 +145,10 @@ def __init__(self): callback=lambda: self._on_auto_commit_changed(), ) # type: QWidget + def _apply_pca_changed(self): + self.controls.pca_components.setEnabled(self.apply_pca) + self._invalidate_graph() + def _invalidate_preprocessed_data(self): self.preprocessed_data = None self._invalidate_pca_projection() @@ -399,12 +403,13 @@ def _send_data(self): @Inputs.data def set_data(self, data): - # pylint: disable=too-many-branches self.closeContext() self.Error.clear() prev_data, self.data = self.data, data self.openContext(self.data) + # Make sure to properly enable/disable slider based on `apply_pca` setting + self.controls.pca_components.setEnabled(self.apply_pca) # If X hasn't changed, there's no reason to recompute clusters if prev_data and self.data and np.array_equal(self.data.X, prev_data.X): From c1de7d902e72f3f21ec29aa06fb3c8f23b8d2f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Sun, 10 Feb 2019 14:23:52 +0100 Subject: [PATCH 5/7] Preprocess: Fixup docstrings for Normalize --- Orange/preprocess/preprocess.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index b8bfc0280da..f4a52bb0531 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -269,12 +269,12 @@ class Normalize(Preprocess): Parameters ---------- zero_based : bool (default=True) + Only used when `norm_type=NormalizeBySpan`. + Determines the value used as the “low” value of the variable. It determines the interval for normalized continuous variables (either [-1, 1] or [0, 1]). - This has no effect when `norm_type` is set to `NormalizeBySD`. - norm_type : NormTypes (default: Normalize.NormalizeBySD) Normalization type. If Normalize.NormalizeBySD, the values are replaced with standardized values by subtracting the average @@ -289,6 +289,8 @@ class Normalize(Preprocess): If True the class is normalized as well. center : bool (default=True) + Only used when `norm_type=NormalizeBySD`. + Whether or not to center the data so it has mean zero. Examples From cd31ed53916101c8f3e9bae6571614bfbe3dd1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Sun, 10 Feb 2019 14:24:36 +0100 Subject: [PATCH 6/7] OwLouvain: Move data preprocessing to separate function --- .../unsupervised/owlouvainclustering.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py index 18ae49bf08a..3e6f1cecf9c 100644 --- a/Orange/widgets/unsupervised/owlouvainclustering.py +++ b/Orange/widgets/unsupervised/owlouvainclustering.py @@ -145,6 +145,14 @@ def __init__(self): callback=lambda: self._on_auto_commit_changed(), ) # type: QWidget + def _preprocess_data(self): + if self.preprocessed_data is None: + if self.normalize: + normalizer = preprocess.Normalize(center=False) + self.preprocessed_data = normalizer(self.data) + else: + self.preprocessed_data = self.data + def _apply_pca_changed(self): self.controls.pca_components.setEnabled(self.apply_pca) self._invalidate_graph() @@ -208,11 +216,9 @@ def cancel(self): self.__set_state_ready() def commit(self): - # pylint: disable=too-many-branches self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) - self.Error.clear() # Cancel current running task self.__cancel_task(wait=False) @@ -221,24 +227,14 @@ def commit(self): self.__set_state_ready() return - # Make sure the dataset is ok - if len(self.data.domain.attributes) < 1: - self.Error.empty_dataset() - self.__set_state_ready() - return + self.Error.clear() if self.partition is not None: self.__set_state_ready() self._send_data() return - # Preprocess the dataset - if self.preprocessed_data is None: - if self.normalize: - normalizer = preprocess.Normalize(center=False) - self.preprocessed_data = normalizer(self.data) - else: - self.preprocessed_data = self.data + self._preprocess_data() state = TaskState(self) @@ -425,6 +421,12 @@ def set_data(self, data): # Clear internal state self.clear() self._invalidate_pca_projection() + + # Make sure the dataset is ok + if self.data is not None and len(self.data.domain.attributes) < 1: + self.Error.empty_dataset() + self.data = None + if self.data is None: return From fe28eafd0af6dc0aace878f59988835bfcaa6338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Fri, 15 Feb 2019 10:21:04 +0100 Subject: [PATCH 7/7] OwLouvain: Properly compare new data with old without warnings --- Orange/statistics/util.py | 17 +++++++++++++++ Orange/tests/test_statistics.py | 21 ++++++++++++++++++- .../unsupervised/owlouvainclustering.py | 4 ++-- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index f4b44b1e4ea..94207b2ec62 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -13,6 +13,23 @@ from sklearn.utils.sparsefuncs import mean_variance_axis +def sparse_array_equal(x1, x2): + """Check if two sparse arrays are equal.""" + if not sp.issparse(x1): + raise TypeError("`x1` must be sparse.") + if not sp.issparse(x2): + raise TypeError("`x2` must be sparse.") + + return x1.shape == x2.shape and (x1 != x2).nnz == 0 + + +def array_equal(x1, x2): + """Equivalent of np.array_equal that properly handles sparse matrices.""" + if sp.issparse(x1) and sp.issparse(x2): + return sparse_array_equal(x1, x2) + return np.array_equal(x1, x2) + + def _count_nans_per_row_sparse(X, weights, dtype=None): """ Count the number of nans (undefined) values per row. """ if weights is not None: diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py index 99be289556e..722025774d3 100644 --- a/Orange/tests/test_statistics.py +++ b/Orange/tests/test_statistics.py @@ -9,7 +9,7 @@ from Orange.statistics.util import bincount, countnans, contingency, digitize, \ mean, nanmax, nanmean, nanmedian, nanmin, nansum, nanunique, stats, std, \ - unique, var, nanstd, nanvar, nanmode + unique, var, nanstd, nanvar, nanmode, array_equal from sklearn.utils import check_random_state @@ -590,6 +590,25 @@ def test_nanunique_ignores_nans_in_counts(self, array): np.testing.assert_equal(nanunique(x, return_counts=True)[1], expected) +class TestArrayEqual(unittest.TestCase): + @dense_sparse + def test_same_matrices(self, array): + x = array([0, 1, 0, 0, 2]) + self.assertTrue(array_equal(x, x)) + + @dense_sparse + def test_with_different_shapes(self, array): + x = array(np.eye(4)) + y = array(np.eye(5)) + self.assertFalse(array_equal(x, y)) + + @dense_sparse + def test_with_different_values(self, array): + x = array([0, 1, 0, 0, 2]) + y = array([0, 3, 0, 0, 2]) + self.assertFalse(array_equal(x, y)) + + class TestNanModeAppVeyor(unittest.TestCase): def test_appveyour_still_not_onscipy_1_2_0(self): import scipy diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py index 3e6f1cecf9c..f1509c55829 100644 --- a/Orange/widgets/unsupervised/owlouvainclustering.py +++ b/Orange/widgets/unsupervised/owlouvainclustering.py @@ -19,6 +19,7 @@ from Orange.data.util import get_unique_names from Orange import preprocess from Orange.projection import PCA +from Orange.statistics import util as ut from Orange.widgets import widget, gui, report from Orange.widgets.settings import DomainContextHandler, ContextSetting, \ Setting @@ -407,8 +408,7 @@ def set_data(self, data): # Make sure to properly enable/disable slider based on `apply_pca` setting self.controls.pca_components.setEnabled(self.apply_pca) - # If X hasn't changed, there's no reason to recompute clusters - if prev_data and self.data and np.array_equal(self.data.X, prev_data.X): + if prev_data and self.data and ut.array_equal(prev_data.X, self.data.X): if self.auto_commit: self._send_data() return