From 50663730586e5ab6219c4b369a40d1775e979bf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Mon, 4 Feb 2019 16:20:09 +0100
Subject: [PATCH 1/7] Normalize: Add option to skip zero-centering

---
 Orange/preprocess/normalize.py  | 19 ++++++++++++++++---
 Orange/preprocess/preprocess.py | 13 +++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/Orange/preprocess/normalize.py b/Orange/preprocess/normalize.py
index a7058695a3d..a3f27d5f89b 100644
--- a/Orange/preprocess/normalize.py
+++ b/Orange/preprocess/normalize.py
@@ -11,21 +11,24 @@ class Normalizer(Reprable):
     def __init__(self,
                  zero_based=True,
                  norm_type=Normalize.NormalizeBySD,
-                 transform_class=False):
+                 transform_class=False,
+                 center=True):
         self.zero_based = zero_based
         self.norm_type = norm_type
         self.transform_class = transform_class
+        self.center = center
 
     def __call__(self, data):
-
         dists = distribution.get_distributions(data)
         new_attrs = [self.normalize(dists[i], var) for
                      (i, var) in enumerate(data.domain.attributes)]
+
         new_class_vars = data.domain.class_vars
         if self.transform_class:
             attr_len = len(data.domain.attributes)
             new_class_vars = [self.normalize(dists[i + attr_len], var) for
                               (i, var) in enumerate(data.domain.class_vars)]
+
         domain = Domain(new_attrs, new_class_vars, data.domain.metas)
         return data.transform(domain)
 
@@ -41,7 +44,17 @@ def normalize_by_sd(self, dist, var):
         avg, sd = (dist.mean(), dist.standard_deviation()) if dist.size else (0, 1)
         if sd == 0:
             sd = 1
-        return ContinuousVariable(var.name, compute_value=Norm(var, avg, 1 / sd), sparse=var.sparse)
+
+        if self.center:
+            compute_val = Norm(var, avg, 1 / sd)
+        else:
+            compute_val = Norm(var, 0, 1 / sd)
+
+        return ContinuousVariable(
+            var.name,
+            compute_value=compute_val,
+            sparse=var.sparse,
+        )
 
     def normalize_by_span(self, dist, var):
         dma, dmi = dist.max(), dist.min()
diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py
index f4cde0f364f..b8bfc0280da 100644
--- a/Orange/preprocess/preprocess.py
+++ b/Orange/preprocess/preprocess.py
@@ -273,6 +273,8 @@ class Normalize(Preprocess):
         It determines the interval for normalized continuous variables
         (either [-1, 1] or [0, 1]).
 
+        This has no effect when `norm_type` is set to `NormalizeBySD`.
+
     norm_type : NormTypes (default: Normalize.NormalizeBySD)
         Normalization type. If Normalize.NormalizeBySD, the values are
         replaced with standardized values by subtracting the average
@@ -286,6 +288,9 @@ class Normalize(Preprocess):
     transform_class : bool (default=False)
         If True the class is normalized as well.
 
+    center : bool (default=True)
+        Whether or not to center the data so it has mean zero.
+
     Examples
     --------
     >>> from Orange.data import Table
@@ -301,10 +306,12 @@ class Normalize(Preprocess):
     def __init__(self,
                  zero_based=True,
                  norm_type=NormalizeBySD,
-                 transform_class=False):
+                 transform_class=False,
+                 center=True):
         self.zero_based = zero_based
         self.norm_type = norm_type
         self.transform_class = transform_class
+        self.center = center
 
     def __call__(self, data):
         """
@@ -334,7 +341,9 @@ def __call__(self, data):
         normalizer = normalize.Normalizer(
             zero_based=self.zero_based,
             norm_type=self.norm_type,
-            transform_class=self.transform_class)
+            transform_class=self.transform_class,
+            center=self.center,
+        )
         return normalizer(data)
 
 

From 5001e4db7da376418c96ffc002985403b32947c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Mon, 4 Feb 2019 17:18:05 +0100
Subject: [PATCH 2/7] Move table_dense_sparse test utility to
 Orange.widgets.tests.utils

---
 .../data/tests/test_owfeaturestatistics.py    | 18 +++------------
 Orange/widgets/tests/utils.py                 | 23 +++++++++++++++++++
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/Orange/widgets/data/tests/test_owfeaturestatistics.py b/Orange/widgets/data/tests/test_owfeaturestatistics.py
index 5d8faa288e4..3c8c22bddfd 100644
--- a/Orange/widgets/data/tests/test_owfeaturestatistics.py
+++ b/Orange/widgets/data/tests/test_owfeaturestatistics.py
@@ -1,9 +1,9 @@
 import datetime
 import warnings
 from collections import namedtuple
-from functools import wraps, partial
+from functools import partial
 from itertools import chain
-from typing import Callable, List
+from typing import List
 
 import numpy as np
 from AnyQt.QtCore import QItemSelection, QItemSelectionRange, \
@@ -12,7 +12,7 @@
 from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \
     DiscreteVariable, TimeVariable
 from Orange.widgets.tests.base import WidgetTest, datasets
-from Orange.widgets.tests.utils import simulate
+from Orange.widgets.tests.utils import simulate, table_dense_sparse
 from Orange.widgets.data.owfeaturestatistics import \
     OWFeatureStatistics
 
@@ -175,18 +175,6 @@ def make_table(attributes, target=None, metas=None):
     )
 
 
-def table_dense_sparse(test_case):
-    # type: (Callable) -> Callable
-    """Run a single test case on both dense and sparse Orange tables."""
-
-    @wraps(test_case)
-    def _wrapper(self):
-        test_case(self, lambda table: table.to_dense())
-        test_case(self, lambda table: table.to_sparse())
-
-    return _wrapper
-
-
 class TestVariousDataSets(WidgetTest):
     def setUp(self):
         self.widget = self.create_widget(
diff --git a/Orange/widgets/tests/utils.py b/Orange/widgets/tests/utils.py
index cd502ce1991..b21097002e4 100644
--- a/Orange/widgets/tests/utils.py
+++ b/Orange/widgets/tests/utils.py
@@ -1,4 +1,6 @@
 import sys
+from functools import wraps
+
 import warnings
 import contextlib
 
@@ -317,3 +319,24 @@ def mouseMove(widget, pos=QPoint(), delay=-1):  # pragma: no-cover
         QTest.qWait(delay)
 
     QApplication.sendEvent(widget, me)
+
+
+def table_dense_sparse(test_case):
+    # type: (Callable) -> Callable
+    """Run a single test case on both dense and sparse Orange tables.
+
+    Examples
+    --------
+    >>> @table_dense_sparse
+    ... def test_something(self, prepare_table):
+    ...     data: Table  # The table you want to test on
+    ...     data = prepare_table(data)  # This converts the table to dense/sparse
+
+    """
+
+    @wraps(test_case)
+    def _wrapper(self):
+        test_case(self, lambda table: table.to_dense())
+        test_case(self, lambda table: table.to_sparse())
+
+    return _wrapper

From a624258866327e73b29f7687f7d825a9eeb870e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Mon, 4 Feb 2019 17:21:10 +0100
Subject: [PATCH 3/7] OwLouvain: Enable normalization for sparse data

---
 .../unsupervised/owlouvainclustering.py       | 55 ++++++++++++++----
 .../unsupervised/tests/test_owlouvain.py      | 56 +++++++++++++++++++
 2 files changed, 99 insertions(+), 12 deletions(-)

diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py
index 9cbff27dc07..3ad293fa8c5 100644
--- a/Orange/widgets/unsupervised/owlouvainclustering.py
+++ b/Orange/widgets/unsupervised/owlouvainclustering.py
@@ -17,6 +17,7 @@
 from Orange.clustering.louvain import table_to_knn_graph, Louvain
 from Orange.data import Table, DiscreteVariable
 from Orange.data.util import get_unique_names
+from Orange import preprocess
 from Orange.projection import PCA
 from Orange.widgets import widget, gui, report
 from Orange.widgets.settings import DomainContextHandler, ContextSetting, \
@@ -66,6 +67,7 @@ class Outputs:
 
     apply_pca = ContextSetting(True)
     pca_components = ContextSetting(_DEFAULT_PCA_COMPONENTS)
+    normalize = ContextSetting(True)
     metric_idx = ContextSetting(0)
     k_neighbors = ContextSetting(_DEFAULT_K_NEIGHBORS)
     resolution = ContextSetting(1.)
@@ -101,13 +103,17 @@ def __init__(self):
         info_box = gui.vBox(self.controlArea, "Info")
         self.info_label = gui.widgetLabel(info_box, "No data on input.")  # type: QLabel
 
-        pca_box = gui.vBox(self.controlArea, "PCA Preprocessing")
+        preprocessing_box = gui.vBox(self.controlArea, "Preprocessing")
+        self.normalize_cbx = gui.checkBox(
+            preprocessing_box, self, "normalize", label="Normalize data",
+            callback=self._invalidate_preprocessed_data,
+        )  # type: QCheckBox
         self.apply_pca_cbx = gui.checkBox(
-            pca_box, self, "apply_pca", label="Apply PCA preprocessing",
+            preprocessing_box, self, "apply_pca", label="Apply PCA preprocessing",
             callback=self._invalidate_graph,
         )  # type: QCheckBox
         self.pca_components_slider = gui.hSlider(
-            pca_box, self, "pca_components", label="Components: ", minValue=2,
+            preprocessing_box, self, "pca_components", label="PCA Components: ", minValue=2,
             maxValue=_MAX_PCA_COMPONENTS,
             callback=self._invalidate_pca_projection, tracking=False
         )  # type: QSlider
@@ -139,6 +145,14 @@ def __init__(self):
             callback=lambda: self._on_auto_commit_changed(),
         )  # type: QWidget
 
+    def _invalidate_preprocessed_data(self):
+        self.preprocessed_data = None
+        self._invalidate_pca_projection()
+        # If we don't apply PCA, this still invalidates the graph, otherwise
+        # this change won't be propagated further
+        if not self.apply_pca:
+            self._invalidate_graph()
+
     def _invalidate_pca_projection(self):
         self.pca_projection = None
         if not self.apply_pca:
@@ -190,6 +204,7 @@ def cancel(self):
         self.__set_state_ready()
 
     def commit(self):
+        # pylint: disable=too-many-branches
         self.__commit_timer.stop()
         self.__invalidated = False
         self._set_modified(False)
@@ -215,8 +230,11 @@ def commit(self):
 
         # Preprocess the dataset
         if self.preprocessed_data is None:
-            louvain = Louvain(random_state=0)
-            self.preprocessed_data = louvain.preprocess(self.data)
+            if self.normalize:
+                normalizer = preprocess.Normalize(center=False)
+                self.preprocessed_data = normalizer(self.data)
+            else:
+                self.preprocessed_data = self.data
 
         state = TaskState(self)
 
@@ -243,8 +261,8 @@ def commit(self):
         if graph is None:
             task = partial(
                 run_on_data, data, pca_components=pca_components,
-                k_neighbors=k_neighbors, metric=metric,
-                resolution=self.resolution, state=state
+                normalize=self.normalize, k_neighbors=k_neighbors,
+                metric=metric, resolution=self.resolution, state=state,
             )
         else:
             task = partial(
@@ -381,6 +399,7 @@ def _send_data(self):
 
     @Inputs.data
     def set_data(self, data):
+        # pylint: disable=too-many-branches
         self.closeContext()
         self.Error.clear()
 
@@ -439,6 +458,7 @@ def send_report(self):
             pca += report.plural(", {number} component{s}", self.pca_components)
 
         self.report_items((
+            ("Normalize data", report.bool_str(self.normalize)),
             ("PCA preprocessing", pca),
             ("Metric", METRICS[self.metric_idx][0]),
             ("k neighbors", self.k_neighbors),
@@ -520,6 +540,7 @@ class InteruptRequested(BaseException):
 class Results(namespace):
     pca_projection = None    # type: Optional[Table]
     pca_components = None    # type: Optional[int]
+    normalize = None         # type: Optional[bool]
     k_neighbors = None       # type: Optional[int]
     metric = None            # type: Optional[str]
     graph = None             # type: Optional[nx.Graph]
@@ -527,8 +548,8 @@ class Results(namespace):
     partition = None         # type: Optional[np.ndarray]
 
 
-def run_on_data(data, pca_components, k_neighbors, metric, resolution, state):
-    # type: (Table, Optional[int], int, str, float, TaskState) -> Results
+def run_on_data(data, normalize, pca_components, k_neighbors, metric, resolution, state):
+    # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results
     """
     Run the louvain clustering on `data`.
 
@@ -539,6 +560,8 @@ def run_on_data(data, pca_components, k_neighbors, metric, resolution, state):
     ----------
     data : Table
         Data table
+    normalize : bool
+        If `True`, the data is first normalized before computing PCA.
     pca_components : Optional[int]
         If not `None` then the data is first projected onto first
         `pca_components` principal components.
@@ -556,16 +579,18 @@ def run_on_data(data, pca_components, k_neighbors, metric, resolution, state):
     """
     state = state  # type: TaskState
     res = Results(
-        pca_components=pca_components, k_neighbors=k_neighbors, metric=metric,
-        resolution=resolution,
+        normalize=normalize, pca_components=pca_components,
+        k_neighbors=k_neighbors, metric=metric, resolution=resolution,
     )
     step = 0
     if state.is_interuption_requested():
         return res
+
     if pca_components is not None:
         steps = 3
         state.set_status("Computing PCA...")
         pca = PCA(n_components=pca_components, random_state=0)
+
         data = res.pca_projection = pca(data)(data)
         assert isinstance(data, Table)
         state.set_partial_results(("pca_projection", res.pca_projection))
@@ -579,6 +604,13 @@ def run_on_data(data, pca_components, k_neighbors, metric, resolution, state):
     state.set_progress_value(100. * step / steps)
     state.set_status("Building graph...")
 
+    # Apply Louvain preprocessing before converting the table into a graph
+    louvain = Louvain(resolution=resolution, random_state=0)
+    data = louvain.preprocess(data)
+
+    if state.is_interuption_requested():
+        return res
+
     def pcallback(val):
         state.set_progress_value((100. * step + 100 * val) / steps)
         if state.is_interuption_requested():
@@ -600,7 +632,6 @@ def pcallback(val):
     if state.is_interuption_requested():
         return res
 
-    louvain = Louvain(resolution=resolution, random_state=0)
     res.partition = louvain.fit_predict(graph)
     state.set_partial_results(("partition", res.partition))
     return res
diff --git a/Orange/widgets/unsupervised/tests/test_owlouvain.py b/Orange/widgets/unsupervised/tests/test_owlouvain.py
index b686ca37e1e..ee3b7bc1c67 100644
--- a/Orange/widgets/unsupervised/tests/test_owlouvain.py
+++ b/Orange/widgets/unsupervised/tests/test_owlouvain.py
@@ -3,8 +3,11 @@
 import numpy as np
 
 from Orange.data import Table, Domain, ContinuousVariable
+from Orange.preprocess import Normalize
 from Orange.widgets.tests.base import WidgetTest
+from Orange.widgets.tests.utils import table_dense_sparse
 from Orange.widgets.unsupervised.owlouvainclustering import OWLouvainClustering
+from sklearn.utils import check_random_state
 
 # Deterministic tests
 np.random.seed(42)
@@ -178,3 +181,56 @@ def test_deterministic_clustering(self):
 
         # Ensure that clustering was the same in both instances
         np.testing.assert_equal(result1.metas, result2.metas)
+
+    @table_dense_sparse
+    def test_normalize_data(self, prepare_table):
+        """Check that normalization is called at the proper times."""
+        data = prepare_table(self.iris)
+
+        # Enable checkbox
+        self.widget.controls.normalize.setChecked(True)
+        self.assertTrue(self.widget.controls.normalize.isChecked())
+        with patch("Orange.preprocess.Normalize", wraps=Normalize) as normalize:
+            self.send_signal(self.widget.Inputs.data, data)
+            self.wait_until_stop_blocking()
+            self.assertTrue(self.widget.controls.normalize.isEnabled())
+            normalize.assert_called_once()
+
+        # Disable checkbox
+        self.widget.controls.normalize.setChecked(False)
+        self.assertFalse(self.widget.controls.normalize.isChecked())
+        with patch("Orange.preprocess.Normalize", wraps=Normalize) as normalize:
+            self.send_signal(self.widget.Inputs.data, data)
+            self.wait_until_stop_blocking()
+            self.assertTrue(self.widget.controls.normalize.isEnabled())
+            normalize.assert_not_called()
+
+    def test_dense_and_sparse_return_same_result(self):
+        """Check that Louvain clustering returns identical results for both
+        dense and sparse data."""
+        random_state = check_random_state(42)
+
+        # Randomly set some values to zero
+        dense_data = self.iris
+        mask = random_state.beta(1, 2, size=self.iris.X.shape) > 0.5
+        dense_data.X[mask] = 0
+        sparse_data = dense_data.to_sparse()
+
+        def _compute_clustering(data):
+            self.send_signal(self.widget.Inputs.data, data)
+            self.wait_until_stop_blocking()
+            result = self.get_output(self.widget.Outputs.annotated_data)
+            self.send_signal(self.widget.Inputs.data, None)
+            return result
+
+        # Disable normalization
+        self.widget.controls.normalize.setChecked(False)
+        dense_result = _compute_clustering(dense_data)
+        sparse_result = _compute_clustering(sparse_data)
+        np.testing.assert_equal(dense_result.metas, sparse_result.metas)
+
+        # Enable normalization
+        self.widget.controls.normalize.setChecked(True)
+        dense_result = _compute_clustering(dense_data)
+        sparse_result = _compute_clustering(sparse_data)
+        np.testing.assert_equal(dense_result.metas, sparse_result.metas)

From 9e3a237ade4539d74773cc3c6873e31d250f8f1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Mon, 4 Feb 2019 18:22:59 +0100
Subject: [PATCH 4/7] OwLouvain: Disable PCA slider if Apply PCA is unchecked

---
 Orange/widgets/unsupervised/owlouvainclustering.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py
index 3ad293fa8c5..18ae49bf08a 100644
--- a/Orange/widgets/unsupervised/owlouvainclustering.py
+++ b/Orange/widgets/unsupervised/owlouvainclustering.py
@@ -110,7 +110,7 @@ def __init__(self):
         )  # type: QCheckBox
         self.apply_pca_cbx = gui.checkBox(
             preprocessing_box, self, "apply_pca", label="Apply PCA preprocessing",
-            callback=self._invalidate_graph,
+            callback=self._apply_pca_changed,
         )  # type: QCheckBox
         self.pca_components_slider = gui.hSlider(
             preprocessing_box, self, "pca_components", label="PCA Components: ", minValue=2,
@@ -145,6 +145,10 @@ def __init__(self):
             callback=lambda: self._on_auto_commit_changed(),
         )  # type: QWidget
 
+    def _apply_pca_changed(self):
+        self.controls.pca_components.setEnabled(self.apply_pca)
+        self._invalidate_graph()
+
     def _invalidate_preprocessed_data(self):
         self.preprocessed_data = None
         self._invalidate_pca_projection()
@@ -399,12 +403,13 @@ def _send_data(self):
 
     @Inputs.data
     def set_data(self, data):
-        # pylint: disable=too-many-branches
         self.closeContext()
         self.Error.clear()
 
         prev_data, self.data = self.data, data
         self.openContext(self.data)
+        # Make sure to properly enable/disable slider based on `apply_pca` setting
+        self.controls.pca_components.setEnabled(self.apply_pca)
 
         # If X hasn't changed, there's no reason to recompute clusters
         if prev_data and self.data and np.array_equal(self.data.X, prev_data.X):

From c1de7d902e72f3f21ec29aa06fb3c8f23b8d2f2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Sun, 10 Feb 2019 14:23:52 +0100
Subject: [PATCH 5/7] Preprocess: Fixup docstrings for Normalize

---
 Orange/preprocess/preprocess.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py
index b8bfc0280da..f4a52bb0531 100644
--- a/Orange/preprocess/preprocess.py
+++ b/Orange/preprocess/preprocess.py
@@ -269,12 +269,12 @@ class Normalize(Preprocess):
     Parameters
     ----------
     zero_based : bool (default=True)
+        Only used when `norm_type=NormalizeBySpan`.
+
         Determines the value used as the “low” value of the variable.
         It determines the interval for normalized continuous variables
         (either [-1, 1] or [0, 1]).
 
-        This has no effect when `norm_type` is set to `NormalizeBySD`.
-
     norm_type : NormTypes (default: Normalize.NormalizeBySD)
         Normalization type. If Normalize.NormalizeBySD, the values are
         replaced with standardized values by subtracting the average
@@ -289,6 +289,8 @@ class Normalize(Preprocess):
         If True the class is normalized as well.
 
     center : bool (default=True)
+        Only used when `norm_type=NormalizeBySD`.
+
         Whether or not to center the data so it has mean zero.
 
     Examples

From cd31ed53916101c8f3e9bae6571614bfbe3dd1dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Sun, 10 Feb 2019 14:24:36 +0100
Subject: [PATCH 6/7] OwLouvain: Move data preprocessing to separate function

---
 .../unsupervised/owlouvainclustering.py       | 30 ++++++++++---------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py
index 18ae49bf08a..3e6f1cecf9c 100644
--- a/Orange/widgets/unsupervised/owlouvainclustering.py
+++ b/Orange/widgets/unsupervised/owlouvainclustering.py
@@ -145,6 +145,14 @@ def __init__(self):
             callback=lambda: self._on_auto_commit_changed(),
         )  # type: QWidget
 
+    def _preprocess_data(self):
+        if self.preprocessed_data is None:
+            if self.normalize:
+                normalizer = preprocess.Normalize(center=False)
+                self.preprocessed_data = normalizer(self.data)
+            else:
+                self.preprocessed_data = self.data
+
     def _apply_pca_changed(self):
         self.controls.pca_components.setEnabled(self.apply_pca)
         self._invalidate_graph()
@@ -208,11 +216,9 @@ def cancel(self):
         self.__set_state_ready()
 
     def commit(self):
-        # pylint: disable=too-many-branches
         self.__commit_timer.stop()
         self.__invalidated = False
         self._set_modified(False)
-        self.Error.clear()
 
         # Cancel current running task
         self.__cancel_task(wait=False)
@@ -221,24 +227,14 @@ def commit(self):
             self.__set_state_ready()
             return
 
-        # Make sure the dataset is ok
-        if len(self.data.domain.attributes) < 1:
-            self.Error.empty_dataset()
-            self.__set_state_ready()
-            return
+        self.Error.clear()
 
         if self.partition is not None:
             self.__set_state_ready()
             self._send_data()
             return
 
-        # Preprocess the dataset
-        if self.preprocessed_data is None:
-            if self.normalize:
-                normalizer = preprocess.Normalize(center=False)
-                self.preprocessed_data = normalizer(self.data)
-            else:
-                self.preprocessed_data = self.data
+        self._preprocess_data()
 
         state = TaskState(self)
 
@@ -425,6 +421,12 @@ def set_data(self, data):
         # Clear internal state
         self.clear()
         self._invalidate_pca_projection()
+
+        # Make sure the dataset is ok
+        if self.data is not None and len(self.data.domain.attributes) < 1:
+            self.Error.empty_dataset()
+            self.data = None
+
         if self.data is None:
             return
 

From fe28eafd0af6dc0aace878f59988835bfcaa6338 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Fri, 15 Feb 2019 10:21:04 +0100
Subject: [PATCH 7/7] OwLouvain: Properly compare new data with old without
 warnings

---
 Orange/statistics/util.py                     | 17 +++++++++++++++
 Orange/tests/test_statistics.py               | 21 ++++++++++++++++++-
 .../unsupervised/owlouvainclustering.py       |  4 ++--
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py
index f4b44b1e4ea..94207b2ec62 100644
--- a/Orange/statistics/util.py
+++ b/Orange/statistics/util.py
@@ -13,6 +13,23 @@
 from sklearn.utils.sparsefuncs import mean_variance_axis
 
 
+def sparse_array_equal(x1, x2):
+    """Check if two sparse arrays are equal."""
+    if not sp.issparse(x1):
+        raise TypeError("`x1` must be sparse.")
+    if not sp.issparse(x2):
+        raise TypeError("`x2` must be sparse.")
+
+    return x1.shape == x2.shape and (x1 != x2).nnz == 0
+
+
+def array_equal(x1, x2):
+    """Equivalent of np.array_equal that properly handles sparse matrices."""
+    if sp.issparse(x1) and sp.issparse(x2):
+        return sparse_array_equal(x1, x2)
+    return np.array_equal(x1, x2)
+
+
 def _count_nans_per_row_sparse(X, weights, dtype=None):
     """ Count the number of nans (undefined) values per row. """
     if weights is not None:
diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py
index 99be289556e..722025774d3 100644
--- a/Orange/tests/test_statistics.py
+++ b/Orange/tests/test_statistics.py
@@ -9,7 +9,7 @@
 
 from Orange.statistics.util import bincount, countnans, contingency, digitize, \
     mean, nanmax, nanmean, nanmedian, nanmin, nansum, nanunique, stats, std, \
-    unique, var, nanstd, nanvar, nanmode
+    unique, var, nanstd, nanvar, nanmode, array_equal
 from sklearn.utils import check_random_state
 
 
@@ -590,6 +590,25 @@ def test_nanunique_ignores_nans_in_counts(self, array):
         np.testing.assert_equal(nanunique(x, return_counts=True)[1], expected)
 
 
+class TestArrayEqual(unittest.TestCase):
+    @dense_sparse
+    def test_same_matrices(self, array):
+        x = array([0, 1, 0, 0, 2])
+        self.assertTrue(array_equal(x, x))
+
+    @dense_sparse
+    def test_with_different_shapes(self, array):
+        x = array(np.eye(4))
+        y = array(np.eye(5))
+        self.assertFalse(array_equal(x, y))
+
+    @dense_sparse
+    def test_with_different_values(self, array):
+        x = array([0, 1, 0, 0, 2])
+        y = array([0, 3, 0, 0, 2])
+        self.assertFalse(array_equal(x, y))
+
+
 class TestNanModeAppVeyor(unittest.TestCase):
     def test_appveyour_still_not_onscipy_1_2_0(self):
         import scipy
diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py
index 3e6f1cecf9c..f1509c55829 100644
--- a/Orange/widgets/unsupervised/owlouvainclustering.py
+++ b/Orange/widgets/unsupervised/owlouvainclustering.py
@@ -19,6 +19,7 @@
 from Orange.data.util import get_unique_names
 from Orange import preprocess
 from Orange.projection import PCA
+from Orange.statistics import util as ut
 from Orange.widgets import widget, gui, report
 from Orange.widgets.settings import DomainContextHandler, ContextSetting, \
     Setting
@@ -407,8 +408,7 @@ def set_data(self, data):
         # Make sure to properly enable/disable slider based on `apply_pca` setting
         self.controls.pca_components.setEnabled(self.apply_pca)
 
-        # If X hasn't changed, there's no reason to recompute clusters
-        if prev_data and self.data and np.array_equal(self.data.X, prev_data.X):
+        if prev_data and self.data and ut.array_equal(prev_data.X, self.data.X):
             if self.auto_commit:
                 self._send_data()
             return