Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] OWLouvain: Ensure deterministic clustering #3492

Merged
merged 1 commit into from
Dec 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions Orange/clustering/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import numpy as np
import networkx as nx
# NOTE: The ``community`` package might be renamed in the near future, see
# GH issue https://github.com/taynaud/python-louvain/issues/23
from community import best_partition
from sklearn.neighbors import NearestNeighbors

Expand Down Expand Up @@ -53,18 +55,27 @@ def table_to_knn_graph(data, k_neighbors, metric, progress_callback=None):
if progress_callback:
progress_callback(idx / num_nodes)

for neighbour in nearest_neighbors[node]:
graph.add_edge(node, neighbour, weight=jaccard(
nearest_neighbors[node], nearest_neighbors[neighbour]))
for neighbor in nearest_neighbors[node]:
graph.add_edge(
node,
neighbor,
weight=jaccard(nearest_neighbors[node], nearest_neighbors[neighbor]),
)

return graph


class Louvain:
preprocessors = [Orange.preprocess.Continuize(),
Orange.preprocess.SklImpute()]

def __init__(self, k_neighbors=30, metric='l2', resolution=1., preprocessors=None):
preprocessors = [Orange.preprocess.Continuize(), Orange.preprocess.SklImpute()]

def __init__(
self,
k_neighbors=30,
metric="l2",
resolution=1.0,
random_state=None,
preprocessors=None,
):
"""Louvain clustering for community detection in graphs.

Louvain clustering is a community detection algorithm for detecting
Expand All @@ -77,12 +88,21 @@ def __init__(self, k_neighbors=30, metric='l2', resolution=1., preprocessors=Non
k_neighbors : Optional[int]
The number of nearest neighbors to use for the KNN graph if
tabular data is passed.

metric : Optional[str]
The metric to use to compute the nearest neighbors.

resolution : Optional[float]
The resolution is a parameter of the Louvain method that affects
the size of the recovered clusters.

random_state: Union[int, RandomState]
The random state parameter follows the convention used in scikit-learn.
If the value is an int, random_state is the seed used by the random
number generator. If the value is a RandomState instance, then it will
be used as the random number generator. If the value is None, the random
number generator is the RandomState instance used by `np.random`.

"""
if preprocessors is None:
preprocessors = type(self).preprocessors
Expand All @@ -91,6 +111,7 @@ def __init__(self, k_neighbors=30, metric='l2', resolution=1., preprocessors=Non
self.k_neighbors = k_neighbors
self.metric = metric
self.resolution = resolution
self.random_state = random_state

self.labels = None

Expand All @@ -106,14 +127,20 @@ def preprocess(self, data):
def fit(self, X, y=None):
# If we are given a table, we have to convert it to a graph first
if isinstance(X, Table):
graph = table_to_knn_graph(X.X, metric=self.metric, k_neighbors=self.k_neighbors)
graph = table_to_knn_graph(
X.X, metric=self.metric, k_neighbors=self.k_neighbors
)
# Same goes for a matrix
elif isinstance(X, np.ndarray):
graph = table_to_knn_graph(X, metric=self.metric, k_neighbors=self.k_neighbors)
graph = table_to_knn_graph(
X, metric=self.metric, k_neighbors=self.k_neighbors
)
elif isinstance(X, nx.Graph):
graph = X

partition = best_partition(graph, resolution=self.resolution)
partition = best_partition(
graph, resolution=self.resolution, random_state=self.random_state
)
partition = np.fromiter(list(zip(*sorted(partition.items())))[1], dtype=int)

self.labels = partition
Expand Down
6 changes: 3 additions & 3 deletions Orange/widgets/unsupervised/owlouvainclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def commit(self):

# Preprocess the dataset
if self.preprocessed_data is None:
louvain = Louvain()
louvain = Louvain(random_state=0)
self.preprocessed_data = louvain.preprocess(self.data)

state = TaskState(self)
Expand Down Expand Up @@ -590,7 +590,7 @@ def pcallback(val):
if state.is_interuption_requested():
return res

louvain = Louvain(resolution=resolution)
louvain = Louvain(resolution=resolution, random_state=0)
res.partition = louvain.fit_predict(graph)
state.set_partial_results(("partition", res.partition))
return res
Expand All @@ -603,7 +603,7 @@ def run_on_graph(graph, resolution, state):
"""
state = state # type: TaskState
res = Results(resolution=resolution)
louvain = Louvain(resolution=resolution)
louvain = Louvain(resolution=resolution, random_state=0)
state.set_status("Detecting communities...")
if state.is_interuption_requested():
return res
Expand Down
17 changes: 17 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owlouvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,20 @@ def test_invalidate(self):
self.widget.commit()
self.get_output(self.widget.Outputs.annotated_data)
self.assertFalse(self.widget.Information.modified.is_shown())

def test_deterministic_clustering(self):
# Compute clustering on iris
self.send_signal(self.widget.Inputs.data, self.iris)
self.commit_and_wait()
result1 = self.get_output(self.widget.Outputs.annotated_data)

# Reset widget state
self.send_signal(self.widget.Inputs.data, None)

# Compute clustering on iris again
self.send_signal(self.widget.Inputs.data, self.iris)
self.commit_and_wait()
result2 = self.get_output(self.widget.Outputs.annotated_data)

# Ensure that clustering was the same in both instances
np.testing.assert_equal(result1.metas, result2.metas)