From 3d2fa0eaaa321b7808d6e562c2104fbf403eb4ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Wed, 22 May 2019 11:04:25 +0200
Subject: [PATCH 1/5] Clustering simplified

---
 Orange/clustering/__init__.py   |   1 +
 Orange/clustering/clustering.py | 100 ++++++++++++++++++++++
 Orange/clustering/dbscan.py     |  60 ++++---------
 Orange/clustering/kmeans.py     |  85 ++++++-------------
 Orange/clustering/louvain.py    | 146 +++++++++++++++-----------------
 5 files changed, 206 insertions(+), 186 deletions(-)
 create mode 100644 Orange/clustering/clustering.py

diff --git a/Orange/clustering/__init__.py b/Orange/clustering/__init__.py
index 03b53fa7f05..818a33c076d 100644
--- a/Orange/clustering/__init__.py
+++ b/Orange/clustering/__init__.py
@@ -4,3 +4,4 @@
 from .dbscan import *
 from .hierarchical import *
 from .kmeans import *
+from .louvain import *
diff --git a/Orange/clustering/clustering.py b/Orange/clustering/clustering.py
new file mode 100644
index 00000000000..0dfaca0608f
--- /dev/null
+++ b/Orange/clustering/clustering.py
@@ -0,0 +1,100 @@
+import numpy as np
+import scipy.sparse
+
+from Orange.data import Table, Instance
+from Orange.data.table import DomainTransformationError
+from Orange.misc.wrapper_meta import WrapperMeta
+from Orange.preprocess import Continuize, SklImpute
+
+
+class ClusteringModel:
+
+    def __init__(self, projector):
+        self.projector = projector
+        self.domain = None
+        self.original_domain = None
+        self.labels = projector.labels_
+
+    def __call__(self, data):
+        def fix_dim(x):
+            return x[0] if one_d else x
+
+        one_d = False
+        if isinstance(data, np.ndarray):
+            one_d = data.ndim == 1
+            prediction = self.predict(np.atleast_2d(data))
+        elif isinstance(data, scipy.sparse.csr.csr_matrix) or \
+                isinstance(data, scipy.sparse.csc.csc_matrix):
+            prediction = self.predict(data)
+        elif isinstance(data, (Table, Instance)):
+            if isinstance(data, Instance):
+                data = Table(data.domain, [data])
+                one_d = True
+            if data.domain != self.domain:
+                if self.original_domain.attributes != data.domain.attributes \
+                        and data.X.size \
+                        and not np.isnan(data.X).all():
+                    data = data.transform(self.original_domain)
+                    if np.isnan(data.X).all():
+                        raise DomainTransformationError(
+                            "domain transformation produced no defined values")
+                data = data.transform(self.domain)
+            prediction = self.predict(data.X)
+        elif isinstance(data, (list, tuple)):
+            if not isinstance(data[0], (list, tuple)):
+                data = [data]
+                one_d = True
+            data = Table.from_list(self.original_domain, data)
+            data = data.transform(self.domain)
+            prediction = self.predict(data.X)
+        else:
+            raise TypeError("Unrecognized argument (instance of '{}')"
+                            .format(type(data).__name__))
+
+        return fix_dim(prediction)
+
+    def predict(self, X):
+        raise NotImplementedError(
+            "This clustering algorithm does not support predicting.")
+
+
+class Clustering(metaclass=WrapperMeta):
+    """
+    ${skldoc}
+    Additional Orange parameters
+
+    preprocessors : list, optional (default = [Continuize(), SklImpute()])
+        An ordered list of preprocessors applied to data before
+        training or testing.
+    """
+    __wraps__ = None
+    __returns__ = ClusteringModel
+    preprocessors = [Continuize(), SklImpute()]
+
+    def __init__(self, preprocessors, parameters):
+        self.preprocessors = tuple(preprocessors or self.preprocessors)
+        self.params = {k: v for k, v in parameters.items()
+                       if k not in ["self", "preprocessors", "__class__"]}
+
+    def __call__(self, data):
+        return self.get_model(data).labels
+
+    def get_model(self, data):
+        orig_domain = data.domain
+        data = self.preprocess(data)
+        model = self.fit_storage(data)
+        model.domain = data.domain
+        model.original_domain = orig_domain
+        return model
+
+    def fit_storage(self, data):
+        # only data Table
+        return self.fit(data.X)
+
+    def fit(self, X: np.ndarray, y: np.ndarray = None):
+        return self.__returns__(self.__wraps__(**self.params).fit(X))
+
+    def preprocess(self, data):
+        for pp in self.preprocessors:
+            data = pp(data)
+        return data
diff --git a/Orange/clustering/dbscan.py b/Orange/clustering/dbscan.py
index 7481f1c34ec..80e7cdd9948 100644
--- a/Orange/clustering/dbscan.py
+++ b/Orange/clustering/dbscan.py
@@ -1,52 +1,22 @@
-import sklearn.cluster as skl_cluster
-from numpy import ndarray, unique
+import sklearn.cluster
 
-from Orange.data import Table, DiscreteVariable, Domain, Instance
-from Orange.projection import SklProjector, Projection
+from Orange.clustering.clustering import Clustering
+from Orange.data import Table
 
 
 __all__ = ["DBSCAN"]
 
-class DBSCAN(SklProjector):
-    __wraps__ = skl_cluster.DBSCAN
+
+class DBSCAN(Clustering):
+
+    __wraps__ = sklearn.cluster.DBSCAN
 
     def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
-                 algorithm='auto', leaf_size=30, p=None,
-                 preprocessors=None):
-        super().__init__(preprocessors=preprocessors)
-        self.params = vars()
-
-    def fit(self, X, Y=None):
-        proj = skl_cluster.DBSCAN(**self.params)
-        self.X = X
-        if isinstance(X, Table):
-            proj = proj.fit(X.X,)
-        else:
-            proj = proj.fit(X, )
-        return DBSCANModel(proj)
-
-
-class DBSCANModel(Projection):
-    def __init__(self, proj):
-        super().__init__(proj=proj)
-
-    def __call__(self, data):
-        if isinstance(data, ndarray):
-            return self.proj.fit_predict(data).reshape((len(data), 1))
-
-        if isinstance(data, Table):
-            if data.domain is not self.pre_domain:
-                data = data.transform(self.pre_domain)
-            y = self.proj.fit_predict(data.X)
-            vals, indices = unique(y, return_inverse=True)
-            c = DiscreteVariable(name='Core sample index',
-                                 values=[str(v) for v in vals])
-            domain = Domain([c])
-            return Table(domain, indices.reshape(len(y), 1))
-
-        elif isinstance(data, Instance):
-            if data.domain is not self.pre_domain:
-                data = Instance(self.pre_domain, data)
-            # Instances-by-Instance classification is not defined;
-            raise Exception("Core sample assignment is not supported "
-                            "for single instances.")
+                 algorithm='auto', leaf_size=30, p=None, preprocessors=None):
+        super().__init__(preprocessors, vars())
+
+
+if __name__ == "__main__":
+    d = Table("iris")
+    km = DBSCAN(preprocessors=None)
+    clusters = km(d)
diff --git a/Orange/clustering/kmeans.py b/Orange/clustering/kmeans.py
index 364a4261405..4230858c8a3 100644
--- a/Orange/clustering/kmeans.py
+++ b/Orange/clustering/kmeans.py
@@ -1,72 +1,35 @@
-import numpy as np
-import sklearn.cluster as skl_cluster
-from sklearn.metrics import silhouette_samples, silhouette_score
+import sklearn.cluster
 
-from Orange.data import Table, DiscreteVariable, Domain, Instance
-from Orange.projection import SklProjector, Projection
-from Orange.distance import Euclidean
+from Orange.clustering.clustering import Clustering, ClusteringModel
+from Orange.data import Table
 
 
 __all__ = ["KMeans"]
 
-SILHOUETTE_MAX_SAMPLES = 5000
 
-class KMeans(SklProjector):
-    __wraps__ = skl_cluster.KMeans
+class KMeansModel(ClusteringModel):
+
+    def __init__(self, projector):
+        super().__init__(projector)
+        self.centroids = projector.cluster_centers_
+        self.k = projector.get_params()["n_clusters"]
+
+    def predict(self, X):
+        return self.projector.predict(X)
 
-    def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300,
-                 tol=0.0001, random_state=None, preprocessors=None,
-                 compute_silhouette_score=False):
-        super().__init__(preprocessors=preprocessors)
-        self.params = vars()
-        self._compute_silhouette = compute_silhouette_score
 
-    def fit(self, X, Y=None):
-        proj = skl_cluster.KMeans(**self.params)
-        proj = proj.fit(X, Y)
-        proj.silhouette = np.nan
-        try:
-            if self._compute_silhouette and 2 <= proj.n_clusters < X.shape[0]:
-                if X.shape[0] <= SILHOUETTE_MAX_SAMPLES:
-                    proj.silhouette_samples = \
-                        silhouette_samples(X, proj.labels_)
-                    proj.silhouette = np.mean(proj.silhouette_samples)
-                else:
-                    proj.silhouette_samples = None
-                    proj.silhouette = \
-                        silhouette_score(X, proj.labels_, sample_size=SILHOUETTE_MAX_SAMPLES)
-        except MemoryError:  # Pairwise dist in silhouette fails for large data
-            pass
-        proj.inertia = proj.inertia_ / X.shape[0]
-        cluster_dist = Euclidean(proj.cluster_centers_)
-        proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)])
-        return KMeansModel(proj, self.preprocessors)
+class KMeans(Clustering):
 
+    __wraps__ = sklearn.cluster.KMeans
+    __returns__ = KMeansModel
+
+    def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300,
+                 tol=0.0001, random_state=None, preprocessors=None):
+        super().__init__(preprocessors, vars())
 
-class KMeansModel(Projection):
-    def __init__(self, proj, preprocessors=None):
-        super().__init__(proj=proj)
-        self.k = self.proj.get_params()["n_clusters"]
-        self.centroids = self.proj.cluster_centers_
 
-    def __call__(self, data):
-        if isinstance(data, Table):
-            if data.domain is not self.pre_domain:
-                data = data.transform(self.pre_domain)
-            c = DiscreteVariable(name='Cluster id',
-                                 values=[str(i) for i in range(self.k)])
-            domain = Domain([c])
-            return Table(
-                domain,
-                self.proj.predict(data.X).astype(int).reshape((len(data), 1)))
-        elif isinstance(data, Instance):
-            if data.domain is not self.pre_domain:
-                data = Instance(self.pre_domain, data)
-            c = DiscreteVariable(name='Cluster id',
-                                 values=[str(i) for i in range(self.k)])
-            domain = Domain([c])
-            return Table(
-                domain,
-                np.atleast_2d(self.proj.predict(data._x.reshape(1, -1))).astype(int))
-        else:
-            return self.proj.predict(data).reshape((data.shape[0], 1))
+if __name__ == "__main__":
+    d = Table("iris")
+    km = KMeans(preprocessors=None, n_clusters=3)
+    clusters = km(d)
+    model = km.fit_storage(d)
diff --git a/Orange/clustering/louvain.py b/Orange/clustering/louvain.py
index a4072341b25..ed2ff7ddc3e 100644
--- a/Orange/clustering/louvain.py
+++ b/Orange/clustering/louvain.py
@@ -3,7 +3,6 @@
 
 Original C++ implementation available at
 https://sites.google.com/site/findcommunities/
-
 """
 
 import numpy as np
@@ -11,25 +10,29 @@
 # NOTE: The ``community`` package might be renamed in the near future, see
 # GH issue https://github.com/taynaud/python-louvain/issues/23
 from community import best_partition
+from sklearn.base import BaseEstimator
 from sklearn.neighbors import NearestNeighbors
 
-import Orange
+from Orange.clustering.clustering import Clustering
 from Orange.data import Table
 
 
+__all__ = ["Louvain", "matrix_to_knn_graph"]
+
+
 def jaccard(x, y):
     # type: (set, set) -> float
     """Compute the Jaccard similarity between two sets."""
     return len(x & y) / len(x | y)
 
 
-def table_to_knn_graph(data, k_neighbors, metric, progress_callback=None):
-    """Convert tabular data to a graph using a nearest neighbors approach with
+def matrix_to_knn_graph(data, k_neighbors, metric, progress_callback=None):
+    """Convert data matrix to a graph using a nearest neighbors approach with
     the Jaccard similarity as the edge weights.
 
     Parameters
     ----------
-    data : Table
+    data : np.ndarray
     k_neighbors : int
     metric : str
         A distance metric supported by sklearn.
@@ -59,99 +62,82 @@ def table_to_knn_graph(data, k_neighbors, metric, progress_callback=None):
             graph.add_edge(
                 node,
                 neighbor,
-                weight=jaccard(nearest_neighbors[node], nearest_neighbors[neighbor]),
+                weight=jaccard(
+                    nearest_neighbors[node], nearest_neighbors[neighbor]),
             )
 
     return graph
 
 
-class Louvain:
-    preprocessors = [Orange.preprocess.Continuize(), Orange.preprocess.SklImpute()]
-
-    def __init__(
-            self,
-            k_neighbors=30,
-            metric="l2",
-            resolution=1.0,
-            random_state=None,
-            preprocessors=None,
-    ):
-        """Louvain clustering for community detection in graphs.
-
-        Louvain clustering is a community detection algorithm for detecting
-        clusters of "communities" in graphs. As such, tabular data must first
-        be converted into graph form. This is typically done by computing the
-        KNN graph on the input data.
-
-        Parameters
-        ----------
-        k_neighbors : Optional[int]
-            The number of nearest neighbors to use for the KNN graph if
-            tabular data is passed.
-
-        metric : Optional[str]
-            The metric to use to compute the nearest neighbors.
-
-        resolution : Optional[float]
-            The resolution is a parameter of the Louvain method that affects
-            the size of the recovered clusters.
-
-        random_state: Union[int, RandomState]
-            The random state parameter follows the convention used in scikit-learn.
-            If the value is an int, random_state is the seed used by the random
-            number generator. If the value is a RandomState instance, then it will
-            be used as the random number generator. If the value is None, the random
-            number generator is the RandomState instance used by `np.random`.
-
-        """
-        if preprocessors is None:
-            preprocessors = type(self).preprocessors
-        self.preprocessors = tuple(preprocessors)
+class LouvainMethod(BaseEstimator):
 
+    def __init__(self, k_neighbors=30, metric="l2", resolution=1.0,
+                 random_state=None):
         self.k_neighbors = k_neighbors
         self.metric = metric
         self.resolution = resolution
         self.random_state = random_state
+        self.labels_ = None
 
-        self.labels = None
+    def fit(self, X: np.ndarray, y: np.ndarray = None):
+        # If we are given a table, we have to convert it to a graph first
+        graph = matrix_to_knn_graph(
+            X, metric=self.metric, k_neighbors=self.k_neighbors)
+        return self.fit_graph(graph)
 
-    def __call__(self, data):
-        data = self.preprocess(data)
-        return self.fit_predict(data.X, data.Y)
+    def fit_graph(self, graph):
+        partition = best_partition(
+            graph, resolution=self.resolution, random_state=self.random_state)
+        self.labels_ = np.fromiter(
+            list(zip(*sorted(partition.items())))[1], dtype=int)
+        return self
 
-    def preprocess(self, data):
-        for pp in self.preprocessors:
-            data = pp(data)
-        return data
 
-    def fit(self, X, y=None):
-        # If we are given a table, we have to convert it to a graph first
-        if isinstance(X, Table):
-            graph = table_to_knn_graph(
-                X.X, metric=self.metric, k_neighbors=self.k_neighbors
-            )
-        # Same goes for a matrix
-        elif isinstance(X, np.ndarray):
-            graph = table_to_knn_graph(
-                X, metric=self.metric, k_neighbors=self.k_neighbors
-            )
-        elif isinstance(X, nx.Graph):
-            graph = X
+class Louvain(Clustering):
+    """Louvain clustering for community detection in graphs.
 
-        partition = best_partition(
-            graph, resolution=self.resolution, random_state=self.random_state
-        )
-        partition = np.fromiter(list(zip(*sorted(partition.items())))[1], dtype=int)
+    Louvain clustering is a community detection algorithm for detecting
+    clusters of "communities" in graphs. As such, tabular data must first
+    be converted into graph form. This is typically done by computing the
+    KNN graph on the input data.
+
+    Attributes
+    ----------
+    k_neighbors : Optional[int]
+        The number of nearest neighbors to use for the KNN graph if
+        tabular data is passed.
+
+    metric : Optional[str]
+        The metric to use to compute the nearest neighbors.
+
+    resolution : Optional[float]
+        The resolution is a parameter of the Louvain method that affects
+        the size of the recovered clusters.
+
+    random_state: Union[int, RandomState]
+        The random state parameter follows the convention used in scikit-learn.
+        If the value is an int, random_state is the seed used by the random
+        number generator. If the value is a RandomState instance, then it will
+        be used as the random number generator. If the value is None, the random
+        number generator is the RandomState instance used by `np.random`.
+    """
+
+    __wraps__ = LouvainMethod
 
-        self.labels = partition
+    def __init__(self, k_neighbors=30, metric="l2", resolution=1.0,
+                 random_state=None, preprocessors=None):
+        super().__init__(preprocessors, vars())
 
-    def fit_predict(self, X, y=None):
-        self.fit(X, y)
-        return self.labels
+    def get_model(self, data):
+        if isinstance(data, nx.Graph):
+            return self.__returns__(
+                self.__wraps__(**self.params).fit_graph(data))
+        else:
+            return super().get_model(data)
 
 
 if __name__ == "__main__":
     # clustering run on iris data - orange table
-    data = Table("iris")
-    louvain = Louvain(2)
-    louvain.fit(data)
+    d = Table("iris")
+    louvain = Louvain(5)
+    clusters = louvain(d)

From fa2af37f6929a4b78156e6054bd3295467ab8623 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Thu, 30 May 2019 12:32:42 +0200
Subject: [PATCH 2/5] Clustering: modified dependent widgets

---
 Orange/clustering/kmeans.py                   | 10 ++++-
 Orange/evaluation/clustering.py               |  5 +--
 Orange/widgets/unsupervised/owkmeans.py       | 41 +++++++++++-------
 .../unsupervised/owlouvainclustering.py       | 13 +++---
 .../unsupervised/tests/test_owkmeans.py       | 42 +++++++++----------
 Orange/widgets/visualize/owheatmap.py         |  6 +--
 6 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/Orange/clustering/kmeans.py b/Orange/clustering/kmeans.py
index 4230858c8a3..26957535050 100644
--- a/Orange/clustering/kmeans.py
+++ b/Orange/clustering/kmeans.py
@@ -1,3 +1,5 @@
+import warnings
+
 import sklearn.cluster
 
 from Orange.clustering.clustering import Clustering, ClusteringModel
@@ -24,7 +26,13 @@ class KMeans(Clustering):
     __returns__ = KMeansModel
 
     def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300,
-                 tol=0.0001, random_state=None, preprocessors=None):
+                 tol=0.0001, random_state=None, preprocessors=None,
+                 compute_silhouette_score=None):
+        if compute_silhouette_score is not None:
+            warnings.warn(
+                "compute_silhouette_score is deprecated. Please use "
+                "sklearn.metrics.silhouette_score to compute silhouettes.",
+                DeprecationWarning)
         super().__init__(preprocessors, vars())
 
 
diff --git a/Orange/evaluation/clustering.py b/Orange/evaluation/clustering.py
index 53bb7858cde..77682dd310e 100644
--- a/Orange/evaluation/clustering.py
+++ b/Orange/evaluation/clustering.py
@@ -94,11 +94,10 @@ def __call__(self, data, learners, preprocessor=None, *, callback=None):
 
         for k in range(self.k):
             for i, learner in enumerate(learners):
-                model = learner(data)
+                model = learner.get_model(data)
                 if self.store_models:
                     res.models[k, i] = model
-                labels = model(data)
-                res.predicted[i, k, :] = labels.X.flatten()
+                res.predicted[i, k, :] = model.labels
 
         return res
 
diff --git a/Orange/widgets/unsupervised/owkmeans.py b/Orange/widgets/unsupervised/owkmeans.py
index 185b184eb48..0a44de35280 100644
--- a/Orange/widgets/unsupervised/owkmeans.py
+++ b/Orange/widgets/unsupervised/owkmeans.py
@@ -6,9 +6,10 @@
     pyqtSlot as Slot
 from AnyQt.QtGui import QIntValidator
 from AnyQt.QtWidgets import QGridLayout, QTableView
+from sklearn.metrics import silhouette_samples, silhouette_score
 
 from Orange.clustering import KMeans
-from Orange.clustering.kmeans import KMeansModel, SILHOUETTE_MAX_SAMPLES
+from Orange.clustering.kmeans import KMeansModel
 from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
 from Orange.data.util import get_unique_names, array_equal
 from Orange.preprocess.impute import ReplaceUnknowns
@@ -23,6 +24,7 @@
 
 
 RANDOM_STATE = 0
+SILHOUETTE_MAX_SAMPLES = 5000
 
 
 class ClusterTableModel(QAbstractTableModel):
@@ -268,15 +270,15 @@ def has_attributes(self):
         return len(self.data.domain.attributes)
 
     @staticmethod
-    def _compute_clustering(data, k, init, n_init, max_iter, silhouette, random_state):
+    def _compute_clustering(data, k, init, n_init, max_iter, random_state):
         # type: (Table, int, str, int, int, bool) -> KMeansModel
         if k > len(data):
             raise NotEnoughData()
 
         return KMeans(
             n_clusters=k, init=init, n_init=n_init, max_iter=max_iter,
-            compute_silhouette_score=silhouette, random_state=random_state,
-        )(data)
+            random_state=random_state
+        ).get_model(data)
 
     @Slot(int, int)
     def __progress_changed(self, n, d):
@@ -336,7 +338,6 @@ def __launch_tasks(self, ks):
             init=self.INIT_METHODS[self.smart_init][1],
             n_init=self.n_init,
             max_iter=self.max_iterations,
-            silhouette=True,
             random_state=RANDOM_STATE,
         ) for k in ks]
         watcher = FutureSetWatcher(futures)
@@ -432,10 +433,9 @@ def invalidate(self):
         self.commit()
 
     def update_results(self):
-        scores = [
-            mk if isinstance(mk, str) else mk.silhouette for mk in (
-                self.clusterings[k] for k in range(self.k_from, self.k_to + 1))
-        ]
+        scores = [mk if isinstance(mk, str) else silhouette_score(
+            self.data.X, mk.labels) for mk in (
+                self.clusterings[k] for k in range(self.k_from, self.k_to + 1))]
         best_row = max(
             range(len(scores)), default=0,
             key=lambda x: 0 if isinstance(scores[x], str) else scores[x]
@@ -454,6 +454,16 @@ def selected_row(self):
     def select_row(self):
         self.send_data()
 
+    def preproces(self, data):
+        for preprocessor in KMeans.preprocessors:  # use same preprocessors than
+            data = preprocessor(data)
+        return data
+
+    def samples_scores(self, clust_ids):
+        d = self.preproces(self.data)
+        return np.arctan(
+            silhouette_samples(d.X, clust_ids)) / np.pi + 0.5
+
     def send_data(self):
         if self.optimize_k:
             row = self.selected_row()
@@ -472,16 +482,15 @@ def send_data(self):
             get_unique_names(domain, "Cluster"),
             values=["C%d" % (x + 1) for x in range(km.k)]
         )
-        clust_ids = km(self.data)
-        clust_col = clust_ids.X.ravel()
+        clust_ids = km.labels
         silhouette_var = ContinuousVariable(
             get_unique_names(domain, "Silhouette"))
-        if km.silhouette_samples is not None:
+        if len(self.data) <= SILHOUETTE_MAX_SAMPLES:
             self.Warning.no_silhouettes.clear()
-            scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
+            scores = self.samples_scores(clust_ids)
             clust_scores = []
             for i in range(km.k):
-                in_clust = clust_col == i
+                in_clust = clust_ids == i
                 if in_clust.any():
                     clust_scores.append(np.mean(scores[in_clust]))
                 else:
@@ -494,7 +503,7 @@ def send_data(self):
 
         new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
         new_table = self.data.transform(new_domain)
-        new_table.get_column_view(cluster_var)[0][:] = clust_col
+        new_table.get_column_view(cluster_var)[0][:] = clust_ids
         new_table.get_column_view(silhouette_var)[0][:] = scores
 
         centroid_attributes = [
@@ -502,7 +511,7 @@ def send_data(self):
             if isinstance(attr.compute_value, ReplaceUnknowns)
             and attr.compute_value.variable in domain.attributes
             else attr
-            for attr in km.pre_domain.attributes]
+            for attr in km.domain.attributes]
         centroid_domain = add_columns(
             Domain(centroid_attributes, [], domain.metas),
             metas=[cluster_var, silhouette_var])
diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py
index a4d27ddc4b1..56432544b5a 100644
--- a/Orange/widgets/unsupervised/owlouvainclustering.py
+++ b/Orange/widgets/unsupervised/owlouvainclustering.py
@@ -14,7 +14,7 @@
 )
 from AnyQt.QtWidgets import QSlider, QCheckBox, QWidget, QLabel
 
-from Orange.clustering.louvain import table_to_knn_graph, Louvain
+from Orange.clustering.louvain import matrix_to_knn_graph, Louvain
 from Orange.data import Table, DiscreteVariable
 from Orange.data.util import get_unique_names, array_equal
 from Orange import preprocess
@@ -623,10 +623,9 @@ def pcallback(val):
             raise InteruptRequested()
 
     try:
-        res.graph = graph = table_to_knn_graph(
-            data, k_neighbors=k_neighbors, metric=metric,
-            progress_callback=pcallback
-        )
+        res.graph = graph = matrix_to_knn_graph(
+            data.X, k_neighbors=k_neighbors, metric=metric,
+            progress_callback=pcallback)
     except InteruptRequested:
         return res
 
@@ -638,7 +637,7 @@ def pcallback(val):
     if state.is_interuption_requested():
         return res
 
-    res.partition = louvain.fit_predict(graph)
+    res.partition = louvain(graph)
     state.set_partial_results(("partition", res.partition))
     return res
 
@@ -654,7 +653,7 @@ def run_on_graph(graph, resolution, state):
     state.set_status("Detecting communities...")
     if state.is_interuption_requested():
         return res
-    partition = louvain.fit_predict(graph)
+    partition = louvain(graph)
     res.partition = partition
     state.set_partial_results(("partition", res.partition))
     return res
diff --git a/Orange/widgets/unsupervised/tests/test_owkmeans.py b/Orange/widgets/unsupervised/tests/test_owkmeans.py
index 988daf5c9a8..f81fed3fc04 100644
--- a/Orange/widgets/unsupervised/tests/test_owkmeans.py
+++ b/Orange/widgets/unsupervised/tests/test_owkmeans.py
@@ -5,6 +5,7 @@
 import numpy as np
 from AnyQt.QtCore import Qt
 from AnyQt.QtWidgets import QRadioButton
+from sklearn.metrics import silhouette_score
 
 import Orange.clustering
 from Orange.data import Table, Domain
@@ -197,26 +198,22 @@ def test_data_on_output(self):
         # removing data should have cleared the output
         self.assertEqual(self.widget.data, None)
 
-    @patch("Orange.clustering.kmeans.KMeansModel.__call__")
-    def test_centroids_on_output(self, km_call):
-        ret = km_call.return_value = Mock()
-        ret.X = np.array([0] * 50 + [1] * 100)
-        ret.silhouette_samples = np.arange(150) / 150
-
+    def test_centroids_on_output(self):
         widget = self.widget
         widget.optimize_k = False
         widget.k = 4
         self.send_signal(widget.Inputs.data, self.iris)
         self.commit_and_wait()
+        widget.clusterings[widget.k].labels = np.array([0] * 50 + [1] * 100).flatten()
 
-        widget.clusterings[4].silhouette_samples = np.arange(150) / 150
+        widget.samples_scores = lambda x: np.arctan(
+            np.arange(150) / 150) / np.pi + 0.5
         widget.send_data()
         out = self.get_output(widget.Outputs.centroids)
-        np.testing.assert_almost_equal(
-            out.metas,
-            [[0, np.mean(np.arctan(np.arange(50) / 150)) / np.pi + 0.5],
-             [1, np.mean(np.arctan(np.arange(50, 150) / 150)) / np.pi + 0.5],
-             [2, 0], [3, 0]])
+        np.testing.assert_array_almost_equal(
+            np.array([[0, np.mean(np.arctan(np.arange(50) / 150)) / np.pi + 0.5],
+                      [1, np.mean(np.arctan(np.arange(50, 150) / 150)) / np.pi + 0.5],
+                      [2, 0], [3, 0]]), out.metas.astype(float))
         self.assertEqual(out.name, "iris centroids")
 
     def test_centroids_domain_on_output(self):
@@ -262,12 +259,14 @@ def test_optimization_fails(self):
         self.KMeansFail.fail_on = {3, 5, 7}
         model = widget.table_view.model()
 
-        with patch.object(model, "set_scores", wraps=model.set_scores) as set_scores:
+        with patch.object(
+                model, "set_scores", wraps=model.set_scores) as set_scores:
             self.send_signal(self.widget.Inputs.data, self.iris, wait=5000)
             scores, start_k = set_scores.call_args[0]
             self.assertEqual(
                 scores,
-                [km if isinstance(km, str) else km.silhouette
+                [km if isinstance(km, str) else silhouette_score(
+                    self.iris.X, km(self.iris))
                  for km in (widget.clusterings[k] for k in range(3, 9))]
             )
             self.assertEqual(start_k, 3)
@@ -312,15 +311,14 @@ def test_run_fails(self):
         self.assertIsNotNone(self.get_output(self.widget.Outputs.annotated_data))
 
     def test_select_best_row(self):
-        class Cluster:
-            def __init__(self, n):
-                self.silhouette = n
-
         widget = self.widget
         widget.k_from, widget.k_to = 2, 6
-        widget.clusterings = {k: Cluster(5 - (k - 4) ** 2) for k in range(2, 7)}
+        widget.optimize_k = True
+        self.send_signal(self.widget.Inputs.data, Table("housing"), wait=5000)
+        self.commit_and_wait()
         widget.update_results()
-        self.assertEqual(widget.selected_row(), 2)
+        # for housing dataset best selection is 3 clusters, so row no. 1
+        self.assertEqual(widget.selected_row(), 1)
 
         widget.clusterings = {k: "error" for k in range(2, 7)}
         widget.update_results()
@@ -394,7 +392,9 @@ def test_silhouette_column(self):
         # Avoid randomness in the test
         random = np.random.RandomState(0)  # pylint: disable=no-member
         table = Table(random.rand(110, 2))
-        with patch("Orange.clustering.kmeans.SILHOUETTE_MAX_SAMPLES", 100):
+        with patch(
+                "Orange.widgets.unsupervised.owkmeans.SILHOUETTE_MAX_SAMPLES",
+                100):
             self.send_signal(self.widget.Inputs.data, table)
             outtable = self.get_output(widget.Outputs.annotated_data)
             outtable = outtable.get_column_view("Silhouette")[0]
diff --git a/Orange/widgets/visualize/owheatmap.py b/Orange/widgets/visualize/owheatmap.py
index 6c36e25b803..40f1a5dbef3 100644
--- a/Orange/widgets/visualize/owheatmap.py
+++ b/Orange/widgets/visualize/owheatmap.py
@@ -114,7 +114,7 @@ def barycenter(a, axis=0):
 
 def kmeans_compress(X, k=50):
     km = kmeans.KMeans(n_clusters=k, n_init=5, random_state=42)
-    return km(X)
+    return km.get_model(X)
 
 
 def candidate_split_labels(data):
@@ -866,8 +866,8 @@ def construct_heatmaps(self, data, split_label=None):
                         self.input_data.domain.metas))
                 nclust = min(self.merge_kmeans_k, len(effective_data) - 1)
                 self.kmeans_model = kmeans_compress(effective_data, k=nclust)
-                effective_data.domain = self.kmeans_model.pre_domain
-                merge_indices = [np.flatnonzero(self.kmeans_model.labels_ == ind)
+                effective_data.domain = self.kmeans_model.domain
+                merge_indices = [np.flatnonzero(self.kmeans_model.labels == ind)
                                  for ind in range(nclust)]
                 not_empty_indices = [i for i, x in enumerate(merge_indices)
                                      if len(x) > 0]

From 857a29ab27e545873fe6bc94c8306df6948a6a24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Thu, 30 May 2019 12:33:07 +0200
Subject: [PATCH 3/5] Clustering: Fixed tests

---
 Orange/tests/test_clustering_dbscan.py | 41 ++++++---------------
 Orange/tests/test_clustering_kmeans.py | 49 +++++++-------------------
 Orange/tests/test_louvain.py           | 20 ++++-------
 3 files changed, 30 insertions(+), 80 deletions(-)

diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py
index 5065fe2482c..14e107d4ba5 100644
--- a/Orange/tests/test_clustering_dbscan.py
+++ b/Orange/tests/test_clustering_dbscan.py
@@ -3,45 +3,26 @@
 
 import unittest
 
-import Orange
+import numpy as np
+
+from Orange.data import Table
 from Orange.clustering.dbscan import DBSCAN
 
 
 class TestDBSCAN(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        cls.iris = Orange.data.Table('iris')
+    def setUp(self):
+        self.iris = Table('iris')
+        self.dbscan = DBSCAN()
 
     def test_dbscan_parameters(self):
         dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
                         algorithm='auto', leaf_size=12, p=None)
-        c = dbscan(self.iris)
+        dbscan(self.iris)
 
     def test_predict_table(self):
-        dbscan = DBSCAN()
-        c = dbscan(self.iris)
-        table = self.iris[:20]
-        p = c(table)
+        pred = self.dbscan(self.iris)
+        self.assertEqual(np.ndarray, type(pred))
 
     def test_predict_numpy(self):
-        dbscan = DBSCAN()
-        c = dbscan(self.iris)
-        X = self.iris.X[::20]
-        p = c(X)
-
-    def test_values(self):
-        dbscan = DBSCAN(eps=1)  # it clusters data in two classes
-        c = dbscan(self.iris)
-        table = self.iris
-        p = c(table)
-
-        self.assertEqual(2, len(p.domain[0].values))
-        self.assertSetEqual({"0", "1"}, set(p.domain[0].values))
-
-        table.X[0] = [100, 100, 100, 100]  # we add a big outlier
-
-        p = c(table)
-
-        self.assertEqual(3, len(p.domain[0].values))
-        self.assertSetEqual({"-1", "0", "1"}, set(p.domain[0].values))
+        model = self.dbscan.fit(self.iris.X)
+        self.assertEqual(np.ndarray, type(model.labels))
diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py
index ae2dc82eb1e..e406b8df204 100644
--- a/Orange/tests/test_clustering_kmeans.py
+++ b/Orange/tests/test_clustering_kmeans.py
@@ -11,55 +11,32 @@
 
 
 class TestKMeans(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.iris = Orange.data.Table('iris')
+    def setUp(self):
+        self.kmeans = KMeans(n_clusters=2)
+        self.iris = Orange.data.Table('iris')
 
     def test_kmeans(self):
-        kmeans = KMeans(n_clusters=2)
-        c = kmeans(self.iris)
-        X = self.iris.X[:20]
-        p = c(X)
+        c = self.kmeans(self.iris)
         # First 20 iris belong to one cluster
-        assert len(set(p.ravel())) == 1
+        self.assertEqual(1, len(set(c[:20].ravel())))
 
     def test_kmeans_parameters(self):
-        kmeans = KMeans(n_clusters=10,
-                        max_iter=10,
-                        random_state=42,
-                        tol=0.001,
-                        init='random',
-                        compute_silhouette_score=True)
-        c = kmeans(self.iris)
-
-    def test_predict_single_instance(self):
-        kmeans = KMeans()
-        c = kmeans(self.iris)
-        inst = self.iris[0]
-        p = c(inst)
+        kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
+                        init='random')
+        kmeans(self.iris)
 
     def test_predict_table(self):
         kmeans = KMeans()
         c = kmeans(self.iris)
-        table = self.iris[:20]
-        p = c(table)
+        self.assertEqual(np.ndarray, type(c))
 
     def test_predict_numpy(self):
         kmeans = KMeans()
-        c = kmeans(self.iris)
-        X = self.iris.X[::20]
-        p = c(X)
+        c = kmeans.fit(self.iris.X)
+        self.assertEqual(np.ndarray, type(c.labels))
 
     def test_predict_sparse(self):
         kmeans = KMeans()
+        self.iris.X = csc_matrix(self.iris.X[::20])
         c = kmeans(self.iris)
-        X = csc_matrix(self.iris.X[::20])
-        p = c(X)
-
-    def test_silhouette_sparse(self):
-        """Test if silhouette gets calculated for sparse data"""
-        kmeans = KMeans(compute_silhouette_score=True)
-        sparse_iris = self.iris.copy()
-        sparse_iris.X = csc_matrix(sparse_iris.X)
-        c = kmeans(sparse_iris)
-        self.assertFalse(np.isnan(c.silhouette))
+        self.assertEqual(np.ndarray, type(c))
diff --git a/Orange/tests/test_louvain.py b/Orange/tests/test_louvain.py
index 2a012889482..e1c192f1eac 100644
--- a/Orange/tests/test_louvain.py
+++ b/Orange/tests/test_louvain.py
@@ -8,19 +8,11 @@
 from Orange.clustering.louvain import Louvain
 
 
-class TestSVMLearner(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.data = Table('iris')
-        cls.louvain = Louvain()
+class TestLouvain(unittest.TestCase):
+    def setUp(self):
+        self.data = Table('iris')
+        self.louvain = Louvain()
 
     def test_orange_table(self):
-        self.assertIsNone(self.louvain.fit(self.data))
-        clusters = self.louvain.fit_predict(self.data)
-        self.assertIn(type(clusters), [list, np.ndarray])
-
-    def test_np_array(self):
-        data_np = self.data.X
-        self.assertIsNone(self.louvain.fit(data_np))
-        clusters = self.louvain.fit_predict(data_np)
-        self.assertIn(type(clusters), [list, np.ndarray])
+        labels = self.louvain(self.data)
+        self.assertEqual(np.ndarray, type(labels))

From 27634c5f3ce815c31f400fabd4fca86251502418 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Tue, 11 Jun 2019 16:28:46 +0200
Subject: [PATCH 4/5] Clustering: Deprecate silhouette in kmeans

---
 Orange/clustering/kmeans.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Orange/clustering/kmeans.py b/Orange/clustering/kmeans.py
index 26957535050..97ba7d0e8a1 100644
--- a/Orange/clustering/kmeans.py
+++ b/Orange/clustering/kmeans.py
@@ -33,7 +33,9 @@ def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300,
                 "compute_silhouette_score is deprecated. Please use "
                 "sklearn.metrics.silhouette_score to compute silhouettes.",
                 DeprecationWarning)
-        super().__init__(preprocessors, vars())
+        super().__init__(
+            preprocessors, {k: v for k, v in vars().items()
+                            if k != "compute_silhouette_score"})
 
 
 if __name__ == "__main__":

From 2d6b629e8413becb3490cfe8912782b0603f1fe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Mon, 17 Jun 2019 13:46:00 +0200
Subject: [PATCH 5/5] Clustering: Additional tests for clustering methods

---
 Orange/tests/test_clustering_dbscan.py  |  70 ++++++++++++-
 Orange/tests/test_clustering_kmeans.py  | 128 ++++++++++++++++++++++--
 Orange/tests/test_clustering_louvain.py | 120 ++++++++++++++++++++++
 Orange/tests/test_louvain.py            |  18 ----
 4 files changed, 307 insertions(+), 29 deletions(-)
 create mode 100644 Orange/tests/test_clustering_louvain.py
 delete mode 100644 Orange/tests/test_louvain.py

diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py
index 14e107d4ba5..3286f5a714d 100644
--- a/Orange/tests/test_clustering_dbscan.py
+++ b/Orange/tests/test_clustering_dbscan.py
@@ -4,7 +4,9 @@
 import unittest
 
 import numpy as np
+from scipy.sparse import csc_matrix, csr_matrix
 
+from Orange.clustering.clustering import ClusteringModel
 from Orange.data import Table
 from Orange.clustering.dbscan import DBSCAN
 
@@ -14,15 +16,81 @@ def setUp(self):
         self.iris = Table('iris')
         self.dbscan = DBSCAN()
 
+    def test_dbscan(self):
+        c = self.dbscan(self.iris)
+        # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+        self.assertEqual(1, len(set(c[:20].ravel())))
+
     def test_dbscan_parameters(self):
         dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
                         algorithm='auto', leaf_size=12, p=None)
-        dbscan(self.iris)
+        c = dbscan(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
 
     def test_predict_table(self):
         pred = self.dbscan(self.iris)
         self.assertEqual(np.ndarray, type(pred))
+        self.assertEqual(len(self.iris), len(pred))
 
     def test_predict_numpy(self):
         model = self.dbscan.fit(self.iris.X)
+        self.assertEqual(ClusteringModel, type(model))
         self.assertEqual(np.ndarray, type(model.labels))
+        self.assertEqual(len(self.iris), len(model.labels))
+
+    def test_predict_sparse_csc(self):
+        self.iris.X = csc_matrix(self.iris.X[::20])
+        c = self.dbscan(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_spares_csr(self):
+        self.iris.X = csr_matrix(self.iris.X[::20])
+        c = self.dbscan(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_model(self):
+        c = self.dbscan.get_model(self.iris)
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+        self.assertRaises(NotImplementedError, c, self.iris)
+
+    def test_model_np(self):
+        """
+        Test with numpy array as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X)
+
+    def test_model_sparse(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
+
+    def test_model_instance(self):
+        """
+        Test with instance as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris[0])
+
+    def test_model_list(self):
+        """
+        Test with list as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
+
+    def test_model_bad_datatype(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(TypeError, c, 10)
diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py
index e406b8df204..7ff40d94992 100644
--- a/Orange/tests/test_clustering_kmeans.py
+++ b/Orange/tests/test_clustering_kmeans.py
@@ -2,12 +2,15 @@
 # pylint: disable=missing-docstring
 
 import unittest
+import warnings
 
 import numpy as np
-from scipy.sparse import csc_matrix
+from scipy.sparse import csc_matrix, csr_matrix
 
 import Orange
-from Orange.clustering.kmeans import KMeans
+from Orange.clustering.kmeans import KMeans, KMeansModel
+from Orange.data import Table, Domain, ContinuousVariable
+from Orange.data.table import DomainTransformationError
 
 
 class TestKMeans(unittest.TestCase):
@@ -18,25 +21,130 @@ def setUp(self):
     def test_kmeans(self):
         c = self.kmeans(self.iris)
         # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
         self.assertEqual(1, len(set(c[:20].ravel())))
 
     def test_kmeans_parameters(self):
         kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
                         init='random')
-        kmeans(self.iris)
+        c = kmeans(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
 
     def test_predict_table(self):
-        kmeans = KMeans()
-        c = kmeans(self.iris)
+        c = self.kmeans(self.iris)
         self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
 
     def test_predict_numpy(self):
-        kmeans = KMeans()
-        c = kmeans.fit(self.iris.X)
+        c = self.kmeans.fit(self.iris.X)
+        self.assertEqual(KMeansModel, type(c))
         self.assertEqual(np.ndarray, type(c.labels))
+        self.assertEqual(len(self.iris), len(c.labels))
 
-    def test_predict_sparse(self):
-        kmeans = KMeans()
+    def test_predict_sparse_csc(self):
         self.iris.X = csc_matrix(self.iris.X[::20])
-        c = kmeans(self.iris)
+        c = self.kmeans(self.iris)
         self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_spares_csr(self):
+        self.iris.X = csr_matrix(self.iris.X[::20])
+        c = self.kmeans(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_model(self):
+        c = self.kmeans.get_model(self.iris)
+        self.assertEqual(KMeansModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+        c1 = c(self.iris)
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_np(self):
+        """
+        Test with numpy array as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(self.iris.X)
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_sparse_csc(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(csc_matrix(self.iris.X))
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_sparse_csr(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(csr_matrix(self.iris.X))
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_instance(self):
+        """
+        Test with instance as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(self.iris[0])
+        # prediction of the model must be same since data are same
+        self.assertEqual(c1, c.labels[0])
+
+    def test_model_list(self):
+        """
+        Test with list as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(self.iris.X.tolist())
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+        # example with a list of only one data item
+        c1 = c(self.iris.X.tolist()[0])
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels[0], c1)
+
+    def test_model_bad_datatype(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        c = self.kmeans.get_model(self.iris)
+        self.assertRaises(TypeError, c, 10)
+
+    def test_model_data_table_domain(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        # ok domain
+        data = Table(Domain(
+            list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
+                     np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1))
+        c = self.kmeans.get_model(self.iris)
+        res = c(data)
+        np.testing.assert_array_almost_equal(c.labels, res)
+
+        # totally different domain - should fail
+        self.assertRaises(DomainTransformationError, c, Table("housing"))
+
+    def test_deprecated_silhouette(self):
+        with warnings.catch_warnings(record=True) as w:
+            KMeans(compute_silhouette_score=True)
+
+            assert len(w) == 1
+            assert issubclass(w[-1].category, DeprecationWarning)
+
+        with warnings.catch_warnings(record=True) as w:
+            KMeans(compute_silhouette_score=False)
+
+            assert len(w) == 1
+            assert issubclass(w[-1].category, DeprecationWarning)
diff --git a/Orange/tests/test_clustering_louvain.py b/Orange/tests/test_clustering_louvain.py
new file mode 100644
index 00000000000..a65ba4a8edf
--- /dev/null
+++ b/Orange/tests/test_clustering_louvain.py
@@ -0,0 +1,120 @@
+# Test methods with long descriptive names can omit docstrings
+# pylint: disable=missing-docstring
+
+import unittest
+
+import numpy as np
+import networkx
+from scipy.sparse import csc_matrix, csr_matrix
+
+from Orange.clustering.clustering import ClusteringModel
+from Orange.clustering.louvain import matrix_to_knn_graph
+from Orange.data import Table
+from Orange.clustering.louvain import Louvain
+
+
+class TestLouvain(unittest.TestCase):
+    def setUp(self):
+        self.iris = Table('iris')
+        self.louvain = Louvain()
+
+    def test_louvain(self):
+        c = self.louvain(self.iris)
+        # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+        self.assertEqual(1, len(set(c[:20].ravel())))
+
+    def test_louvain_parameters(self):
+        louvain = Louvain(
+            k_neighbors=3, resolution=1.2, random_state=42, metric="l2")
+        c = louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_table(self):
+        c = self.louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_numpy(self):
+        c = self.louvain.fit(self.iris.X)
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(np.ndarray, type(c.labels))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+    def test_predict_sparse_csc(self):
+        self.iris.X = csc_matrix(self.iris.X[::5])
+        c = self.louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_spares_csr(self):
+        self.iris.X = csr_matrix(self.iris.X[::5])
+        c = self.louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_model(self):
+        c = self.louvain.get_model(self.iris)
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+        self.assertRaises(NotImplementedError, c, self.iris)
+
+    def test_model_np(self):
+        """
+        Test with numpy array as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X)
+
+    def test_model_sparse(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
+
+    def test_model_instance(self):
+        """
+        Test with instance as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris[0])
+
+    def test_model_list(self):
+        """
+        Test with list as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
+
+    def test_graph(self):
+        """
+        Louvain accepts graphs too.
+        :return:
+        """
+        graph = matrix_to_knn_graph(self.iris.X, 30, "l2")
+        self.assertIsNotNone(graph)
+        self.assertEqual(networkx.Graph, type(graph), 1)
+
+        # basic clustering - get clusters
+        c = self.louvain(graph)
+        # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+        self.assertEqual(1, len(set(c[:20].ravel())))
+
+        # clustering - get model
+        c = self.louvain.get_model(graph)
+        # First 20 iris belong to one cluster
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+    def test_model_bad_datatype(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(TypeError, c, 10)
diff --git a/Orange/tests/test_louvain.py b/Orange/tests/test_louvain.py
deleted file mode 100644
index e1c192f1eac..00000000000
--- a/Orange/tests/test_louvain.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Test methods with long descriptive names can omit docstrings
-# pylint: disable=missing-docstring
-
-import unittest
-import numpy as np
-
-from Orange.data import Table
-from Orange.clustering.louvain import Louvain
-
-
-class TestLouvain(unittest.TestCase):
-    def setUp(self):
-        self.data = Table('iris')
-        self.louvain = Louvain()
-
-    def test_orange_table(self):
-        labels = self.louvain(self.data)
-        self.assertEqual(np.ndarray, type(labels))