Add louvain_communities to cugraph-nx (rapidsai#3803)

See: rapidsai#3773 Possible follow-up tasks: - Update to use threshold parameter exposed from C++ (rapidsai#3792) - Add `max_level` argument to networkx implementation - ~Or, add `max_level` as extra`cugraph_nx`-specific argument~ (**done**) - Update PLC to handle empty graphs gracefully (rapidsai#3804) - Update PLC to handle directed graphs - Add `louvain_partitions` (needs added to PLC) - https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.louvain.louvain_partitions.html This is passing many networkx tests. I don't have this as draft, b/c it's usable (and I would argue) mergable as is. Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: rapidsai#3803
rlratzel · Sep 8, 2023 · f270817 · f270817
1 parent b496254
commit f270817
Show file tree

Hide file tree

Showing 11 changed files with 224 additions and 24 deletions.
diff --git a/python/cugraph-nx/cugraph_nx/algorithms/__init__.py b/python/cugraph-nx/cugraph_nx/algorithms/__init__.py
@@ -10,5 +10,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import centrality
+from . import centrality, community
 from .centrality import *
diff --git a/python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py b/python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .louvain import *
diff --git a/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py b/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+import pylibcugraph as plc
+
+from cugraph_nx.convert import _to_undirected_graph
+from cugraph_nx.utils import _groupby, networkx_algorithm, not_implemented_for
+
+__all__ = ["louvain_communities"]
+
+
+@not_implemented_for("directed")
+@networkx_algorithm(extra_params="max_level")
+def louvain_communities(
+    G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
+):
+    """`threshold` and `seed` parameters are currently ignored.
+
+    Extra parameter: `max_level` controls the maximum number of levels of the algorithm.
+    """
+    # NetworkX allows both directed and undirected, but cugraph only allows undirected.
+    G = _to_undirected_graph(G, weight)
+    if G.row_indices.size == 0:
+        # TODO: PLC doesn't handle empty graphs gracefully!
+        return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
+    if max_level is None:
+        max_level = sys.maxsize
+    vertices, clusters, modularity = plc.louvain(
+        resource_handle=plc.ResourceHandle(),
+        graph=G._get_plc_graph(),
+        max_level=max_level,  # TODO: add this parameter to NetworkX
+        resolution=resolution,
+        # threshold=threshold,  # TODO: add this parameter to PLC
+        do_expensive_check=False,
+    )
+    groups = _groupby(clusters, vertices)
+    return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
+
+
+@louvain_communities._can_run
+def _(
+    G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
+):
+    # NetworkX allows both directed and undirected, but cugraph only allows undirected.
+    return not G.is_directed()
diff --git a/python/cugraph-nx/cugraph_nx/classes/graph.py b/python/cugraph-nx/cugraph_nx/classes/graph.py
@@ -24,7 +24,7 @@
 import cugraph_nx as cnx
 
 if TYPE_CHECKING:
-    from collections.abc import Iterator
+    from collections.abc import Iterable, Iterator
 
     from cugraph_nx.typing import (
         AttrKey,
@@ -532,6 +532,17 @@ def _get_plc_graph(
             do_expensive_check=False,
         )
 
+    def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]:
+        """Convert an iterable of node IDs to an iterable of node keys."""
+        if (id_to_key := self.id_to_key) is not None:
+            return map(id_to_key.__getitem__, node_ids)
+        return node_ids
+
+    def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
+        if self.key_to_id is None:
+            return node_ids.tolist()
+        return list(self._nodeiter_to_iter(node_ids.tolist()))
+
     def _nodearrays_to_dict(
         self, node_ids: cp.ndarray[IndexValue], values: cp.ndarray[NodeValue]
     ) -> dict[NodeKey, NodeValue]:

diff --git a/python/cugraph-nx/cugraph_nx/interface.py b/python/cugraph-nx/cugraph_nx/interface.py
@@ -59,8 +59,12 @@ def key(testpath):
                 return (testname, frozenset({classname, filename}))
             return (testname, frozenset({filename}))
 
+        # Reasons for xfailing
         no_weights = "weighted implementation not currently supported"
         no_multigraph = "multigraphs not currently supported"
+        louvain_different = (
+            "Louvain may be different due to RNG or unsupported threshold parameter"
+        )
 
         xfail = {}
 
@@ -69,6 +73,10 @@ def key(testpath):
         nxver = parse(nx.__version__)
         if nxver.major == 3 and nxver.minor in {0, 1}:
             # MAINT: networkx 3.0, 3.1
+            # NetworkX 3.2 added the ability to "fallback to nx" if backend algorithms
+            # raise NotImplementedError or `can_run` returns False. The tests below
+            # exercise behavior we have not implemented yet, so we mark them as xfail
+            # for previous versions of NetworkX.
             xfail.update(
                 {
                     key(
@@ -160,6 +168,18 @@ def key(testpath):
                     ): no_multigraph,
                 }
             )
+        else:
+            xfail.update(
+                {
+                    key(
+                        "test_louvain.py:test_karate_club_partition"
+                    ): louvain_different,
+                    key("test_louvain.py:test_none_weight_param"): louvain_different,
+                    key("test_louvain.py:test_multigraph"): louvain_different,
+                    key("test_louvain.py:test_threshold"): louvain_different,
+                }
+            )
+
         for item in items:
             kset = set(item.keywords)
             for (test_name, keywords), reason in xfail.items():

diff --git a/python/cugraph-nx/cugraph_nx/tests/test_match_api.py b/python/cugraph-nx/cugraph_nx/tests/test_match_api.py
@@ -10,6 +10,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import importlib
 import inspect
 
 import networkx as nx
@@ -25,37 +26,81 @@ def test_match_signature_and_names():
             continue
 
         # nx version >=3.2 uses utils.backends, version >=3.0,<3.2 uses classes.backends
-        nx_backends = getattr(
-            nx.utils, "backends", getattr(nx.classes, "backends", None)
-        )
-        if nx_backends is None:
-            raise AttributeError(
-                f"imported networkx version {nx.__version__} is not "
-                "supported, must be >= 3.0"
-            )
+        is_nx_30_or_31 = hasattr(nx.classes, "backends")
+        nx_backends = nx.classes.backends if is_nx_30_or_31 else nx.utils.backends
 
+        if is_nx_30_or_31 and name in {"louvain_communities"}:
+            continue
         dispatchable_func = nx_backends._registered_algorithms[name]
         # nx version >=3.2 uses orig_func, version >=3.0,<3.2 uses _orig_func
-        orig_func = getattr(
-            dispatchable_func, "orig_func", getattr(dispatchable_func, "_orig_func")
-        )
+        if is_nx_30_or_31:
+            orig_func = dispatchable_func._orig_func
+        else:
+            orig_func = dispatchable_func.orig_func
 
         # Matching signatures?
-        sig = inspect.signature(orig_func)
-        assert sig == inspect.signature(func)
+        orig_sig = inspect.signature(orig_func)
+        func_sig = inspect.signature(func)
+        if not func.extra_params:
+            assert orig_sig == func_sig
+        else:
+            # Ignore extra parameters added to cugraph-nx algorithm
+            assert orig_sig == func_sig.replace(
+                parameters=[
+                    p
+                    for name, p in func_sig.parameters.items()
+                    if name not in func.extra_params
+                ]
+            )
+        if func.can_run is not cnx.utils.decorators._default_can_run:
+            assert func_sig == inspect.signature(func.can_run)
 
         # Matching function names?
         assert func.__name__ == dispatchable_func.__name__ == orig_func.__name__
 
         # Matching dispatch names?
         # nx version >=3.2 uses name, version >=3.0,<3.2 uses dispatchname
-        assert func.name == getattr(
-            dispatchable_func, "name", getattr(dispatchable_func, "dispatchname")
-        )
+        if is_nx_30_or_31:
+            dispatchname = dispatchable_func.dispatchname
+        else:
+            dispatchname = dispatchable_func.name
+        assert func.name == dispatchname
 
         # Matching modules (i.e., where function defined)?
         assert (
             "networkx." + func.__module__.split(".", 1)[1]
             == dispatchable_func.__module__
             == orig_func.__module__
         )
+
+        # Matching package layout (i.e., which modules have the function)?
+        cnx_path = func.__module__
+        name = func.__name__
+        while "." in cnx_path:
+            # This only walks up the module tree and does not check sibling modules
+            cnx_path, mod_name = cnx_path.rsplit(".", 1)
+            nx_path = cnx_path.replace("cugraph_nx", "networkx")
+            cnx_mod = importlib.import_module(cnx_path)
+            nx_mod = importlib.import_module(nx_path)
+            # Is the function present in the current module?
+            present_in_cnx = hasattr(cnx_mod, name)
+            present_in_nx = hasattr(nx_mod, name)
+            if present_in_cnx is not present_in_nx:  # pragma: no cover (debug)
+                if present_in_cnx:
+                    raise AssertionError(
+                        f"{name} exists in {cnx_path}, but not in {nx_path}"
+                    )
+                raise AssertionError(
+                    f"{name} exists in {nx_path}, but not in {cnx_path}"
+                )
+            # Is the nested module present in the current module?
+            present_in_cnx = hasattr(cnx_mod, mod_name)
+            present_in_nx = hasattr(nx_mod, mod_name)
+            if present_in_cnx is not present_in_nx:  # pragma: no cover (debug)
+                if present_in_cnx:
+                    raise AssertionError(
+                        f"{mod_name} exists in {cnx_path}, but not in {nx_path}"
+                    )
+                raise AssertionError(
+                    f"{mod_name} exists in {nx_path}, but not in {cnx_path}"
+                )
diff --git a/python/cugraph-nx/cugraph_nx/utils/__init__.py b/python/cugraph-nx/cugraph_nx/utils/__init__.py
@@ -11,3 +11,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .decorators import *
+from .misc import *
diff --git a/python/cugraph-nx/cugraph_nx/utils/decorators.py b/python/cugraph-nx/cugraph_nx/utils/decorators.py
@@ -28,17 +28,26 @@ def inner(func):
 
 
 class networkx_algorithm:
-    def __new__(cls, func=None, *, name=None):
+    def __new__(cls, func=None, *, name=None, extra_params=None):
         if func is None:
-            return partial(networkx_algorithm, name=name)
+            return partial(networkx_algorithm, name=name, extra_params=extra_params)
         instance = object.__new__(cls)
         # update_wrapper sets __wrapped__, which will be used for the signature
         update_wrapper(instance, func)
         instance.__defaults__ = func.__defaults__
         instance.__kwdefaults__ = func.__kwdefaults__
         instance.name = func.__name__ if name is None else name
+        # TODO: should extra_params be a dict[str, str] that describes the parameters?
+        if extra_params is None:
+            instance.extra_params = None
+        elif isinstance(extra_params, str):
+            instance.extra_params = {extra_params}
+        else:
+            instance.extra_params = set(extra_params)
         instance.can_run = _default_can_run
         setattr(BackendInterface, instance.name, instance)
+        # Set methods so they are in __dict__
+        instance._can_run = instance._can_run
         return instance
 
     def _can_run(self, func):

diff --git a/python/cugraph-nx/cugraph_nx/utils/misc.py b/python/cugraph-nx/cugraph_nx/utils/misc.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cupy as cp
+
+__all__ = ["_groupby"]
+
+
+def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
+    """Perform a groupby operation given an array of group IDs and array of values.
+
+    Parameters
+    ----------
+    groups : cp.ndarray
+        Array that holds the group IDs.
+        Group IDs are assumed to be consecutive integers from 0.
+    values : cp.ndarray
+        Array of values to be grouped according to groups.
+        Must be the same size as groups array.
+
+    Returns
+    -------
+    dict with group IDs as keys and cp.ndarray as values.
+    """
+    # It would actually be easy to support groups that aren't consecutive integers,
+    # but let's wait until we need it to implement it.
+    sorted_groups = cp.argsort(groups)
+    sorted_values = values[sorted_groups]
+    rv = {}
+    start = 0
+    for i, end in enumerate(
+        [*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size]
+    ):
+        rv[i] = sorted_values[start:end]
+        start = end
+    return rv
diff --git a/python/cugraph-nx/lint.yaml b/python/cugraph-nx/lint.yaml
@@ -26,7 +26,7 @@ repos:
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.13
+    rev: v0.14
     hooks:
       - id: validate-pyproject
         name: Validate pyproject.toml
@@ -50,7 +50,7 @@ repos:
       - id: black
       # - id: black-jupyter
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.284
+    rev: v0.0.286
     hooks:
       - id: ruff
         args: [--fix-only, --show-fixes]
@@ -76,7 +76,7 @@ repos:
         additional_dependencies: [tomli]
         files: ^(cugraph_nx|docs)/
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.284
+    rev: v0.0.286
     hooks:
       - id: ruff
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/python/cugraph-nx/pyproject.toml b/python/cugraph-nx/pyproject.toml
@@ -116,7 +116,7 @@ omit = []
 ignore_errors = false
 precision = 1
 fail_under = 0
-skip_covered = true
+skip_covered = false  # Nice to see fully covered files when running `run_nx_tests.sh`
 skip_empty = true
 exclude_lines = [
     "pragma: no cover",