From f270817062c5ca19411286d5005c4c1106520e33 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 30 Aug 2023 22:35:35 -0500 Subject: [PATCH] Add `louvain_communities` to cugraph-nx (#3803) See: #3773 Possible follow-up tasks: - Update to use threshold parameter exposed from C++ (#3792) - Add `max_level` argument to networkx implementation - ~Or, add `max_level` as extra`cugraph_nx`-specific argument~ (**done**) - Update PLC to handle empty graphs gracefully (#3804) - Update PLC to handle directed graphs - Add `louvain_partitions` (needs added to PLC) - https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.louvain.louvain_partitions.html This is passing many networkx tests. I don't have this as draft, b/c it's usable (and I would argue) mergable as is. Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3803 --- .../cugraph_nx/algorithms/__init__.py | 2 +- .../algorithms/community/__init__.py | 13 ++++ .../algorithms/community/louvain.py | 56 ++++++++++++++ python/cugraph-nx/cugraph_nx/classes/graph.py | 13 +++- python/cugraph-nx/cugraph_nx/interface.py | 20 +++++ .../cugraph_nx/tests/test_match_api.py | 77 +++++++++++++++---- .../cugraph-nx/cugraph_nx/utils/__init__.py | 1 + .../cugraph-nx/cugraph_nx/utils/decorators.py | 13 +++- python/cugraph-nx/cugraph_nx/utils/misc.py | 45 +++++++++++ python/cugraph-nx/lint.yaml | 6 +- python/cugraph-nx/pyproject.toml | 2 +- 11 files changed, 224 insertions(+), 24 deletions(-) create mode 100644 python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py create mode 100644 python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py create mode 100644 python/cugraph-nx/cugraph_nx/utils/misc.py diff --git a/python/cugraph-nx/cugraph_nx/algorithms/__init__.py b/python/cugraph-nx/cugraph_nx/algorithms/__init__.py index d014f7f401f..3a585452d6d 100644 --- a/python/cugraph-nx/cugraph_nx/algorithms/__init__.py +++ b/python/cugraph-nx/cugraph_nx/algorithms/__init__.py @@ -10,5 +10,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from . import centrality +from . import centrality, community from .centrality import * diff --git a/python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py b/python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py new file mode 100644 index 00000000000..51a4f5c195f --- /dev/null +++ b/python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .louvain import * diff --git a/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py b/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py new file mode 100644 index 00000000000..476f7428aab --- /dev/null +++ b/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py @@ -0,0 +1,56 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys + +import pylibcugraph as plc + +from cugraph_nx.convert import _to_undirected_graph +from cugraph_nx.utils import _groupby, networkx_algorithm, not_implemented_for + +__all__ = ["louvain_communities"] + + +@not_implemented_for("directed") +@networkx_algorithm(extra_params="max_level") +def louvain_communities( + G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None +): + """`threshold` and `seed` parameters are currently ignored. + + Extra parameter: `max_level` controls the maximum number of levels of the algorithm. + """ + # NetworkX allows both directed and undirected, but cugraph only allows undirected. + G = _to_undirected_graph(G, weight) + if G.row_indices.size == 0: + # TODO: PLC doesn't handle empty graphs gracefully! + return [{key} for key in G._nodeiter_to_iter(range(len(G)))] + if max_level is None: + max_level = sys.maxsize + vertices, clusters, modularity = plc.louvain( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + max_level=max_level, # TODO: add this parameter to NetworkX + resolution=resolution, + # threshold=threshold, # TODO: add this parameter to PLC + do_expensive_check=False, + ) + groups = _groupby(clusters, vertices) + return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()] + + +@louvain_communities._can_run +def _( + G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None +): + # NetworkX allows both directed and undirected, but cugraph only allows undirected. + return not G.is_directed() diff --git a/python/cugraph-nx/cugraph_nx/classes/graph.py b/python/cugraph-nx/cugraph_nx/classes/graph.py index 3d561815de6..5604f2457f8 100644 --- a/python/cugraph-nx/cugraph_nx/classes/graph.py +++ b/python/cugraph-nx/cugraph_nx/classes/graph.py @@ -24,7 +24,7 @@ import cugraph_nx as cnx if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterable, Iterator from cugraph_nx.typing import ( AttrKey, @@ -532,6 +532,17 @@ def _get_plc_graph( do_expensive_check=False, ) + def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]: + """Convert an iterable of node IDs to an iterable of node keys.""" + if (id_to_key := self.id_to_key) is not None: + return map(id_to_key.__getitem__, node_ids) + return node_ids + + def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]: + if self.key_to_id is None: + return node_ids.tolist() + return list(self._nodeiter_to_iter(node_ids.tolist())) + def _nodearrays_to_dict( self, node_ids: cp.ndarray[IndexValue], values: cp.ndarray[NodeValue] ) -> dict[NodeKey, NodeValue]: diff --git a/python/cugraph-nx/cugraph_nx/interface.py b/python/cugraph-nx/cugraph_nx/interface.py index ccd8d418d30..198fdd09cfc 100644 --- a/python/cugraph-nx/cugraph_nx/interface.py +++ b/python/cugraph-nx/cugraph_nx/interface.py @@ -59,8 +59,12 @@ def key(testpath): return (testname, frozenset({classname, filename})) return (testname, frozenset({filename})) + # Reasons for xfailing no_weights = "weighted implementation not currently supported" no_multigraph = "multigraphs not currently supported" + louvain_different = ( + "Louvain may be different due to RNG or unsupported threshold parameter" + ) xfail = {} @@ -69,6 +73,10 @@ def key(testpath): nxver = parse(nx.__version__) if nxver.major == 3 and nxver.minor in {0, 1}: # MAINT: networkx 3.0, 3.1 + # NetworkX 3.2 added the ability to "fallback to nx" if backend algorithms + # raise NotImplementedError or `can_run` returns False. The tests below + # exercise behavior we have not implemented yet, so we mark them as xfail + # for previous versions of NetworkX. xfail.update( { key( @@ -160,6 +168,18 @@ def key(testpath): ): no_multigraph, } ) + else: + xfail.update( + { + key( + "test_louvain.py:test_karate_club_partition" + ): louvain_different, + key("test_louvain.py:test_none_weight_param"): louvain_different, + key("test_louvain.py:test_multigraph"): louvain_different, + key("test_louvain.py:test_threshold"): louvain_different, + } + ) + for item in items: kset = set(item.keywords) for (test_name, keywords), reason in xfail.items(): diff --git a/python/cugraph-nx/cugraph_nx/tests/test_match_api.py b/python/cugraph-nx/cugraph_nx/tests/test_match_api.py index 2a2e33ec2f4..918c18b4ce3 100644 --- a/python/cugraph-nx/cugraph_nx/tests/test_match_api.py +++ b/python/cugraph-nx/cugraph_nx/tests/test_match_api.py @@ -10,6 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import importlib import inspect import networkx as nx @@ -25,33 +26,45 @@ def test_match_signature_and_names(): continue # nx version >=3.2 uses utils.backends, version >=3.0,<3.2 uses classes.backends - nx_backends = getattr( - nx.utils, "backends", getattr(nx.classes, "backends", None) - ) - if nx_backends is None: - raise AttributeError( - f"imported networkx version {nx.__version__} is not " - "supported, must be >= 3.0" - ) + is_nx_30_or_31 = hasattr(nx.classes, "backends") + nx_backends = nx.classes.backends if is_nx_30_or_31 else nx.utils.backends + if is_nx_30_or_31 and name in {"louvain_communities"}: + continue dispatchable_func = nx_backends._registered_algorithms[name] # nx version >=3.2 uses orig_func, version >=3.0,<3.2 uses _orig_func - orig_func = getattr( - dispatchable_func, "orig_func", getattr(dispatchable_func, "_orig_func") - ) + if is_nx_30_or_31: + orig_func = dispatchable_func._orig_func + else: + orig_func = dispatchable_func.orig_func # Matching signatures? - sig = inspect.signature(orig_func) - assert sig == inspect.signature(func) + orig_sig = inspect.signature(orig_func) + func_sig = inspect.signature(func) + if not func.extra_params: + assert orig_sig == func_sig + else: + # Ignore extra parameters added to cugraph-nx algorithm + assert orig_sig == func_sig.replace( + parameters=[ + p + for name, p in func_sig.parameters.items() + if name not in func.extra_params + ] + ) + if func.can_run is not cnx.utils.decorators._default_can_run: + assert func_sig == inspect.signature(func.can_run) # Matching function names? assert func.__name__ == dispatchable_func.__name__ == orig_func.__name__ # Matching dispatch names? # nx version >=3.2 uses name, version >=3.0,<3.2 uses dispatchname - assert func.name == getattr( - dispatchable_func, "name", getattr(dispatchable_func, "dispatchname") - ) + if is_nx_30_or_31: + dispatchname = dispatchable_func.dispatchname + else: + dispatchname = dispatchable_func.name + assert func.name == dispatchname # Matching modules (i.e., where function defined)? assert ( @@ -59,3 +72,35 @@ def test_match_signature_and_names(): == dispatchable_func.__module__ == orig_func.__module__ ) + + # Matching package layout (i.e., which modules have the function)? + cnx_path = func.__module__ + name = func.__name__ + while "." in cnx_path: + # This only walks up the module tree and does not check sibling modules + cnx_path, mod_name = cnx_path.rsplit(".", 1) + nx_path = cnx_path.replace("cugraph_nx", "networkx") + cnx_mod = importlib.import_module(cnx_path) + nx_mod = importlib.import_module(nx_path) + # Is the function present in the current module? + present_in_cnx = hasattr(cnx_mod, name) + present_in_nx = hasattr(nx_mod, name) + if present_in_cnx is not present_in_nx: # pragma: no cover (debug) + if present_in_cnx: + raise AssertionError( + f"{name} exists in {cnx_path}, but not in {nx_path}" + ) + raise AssertionError( + f"{name} exists in {nx_path}, but not in {cnx_path}" + ) + # Is the nested module present in the current module? + present_in_cnx = hasattr(cnx_mod, mod_name) + present_in_nx = hasattr(nx_mod, mod_name) + if present_in_cnx is not present_in_nx: # pragma: no cover (debug) + if present_in_cnx: + raise AssertionError( + f"{mod_name} exists in {cnx_path}, but not in {nx_path}" + ) + raise AssertionError( + f"{mod_name} exists in {nx_path}, but not in {cnx_path}" + ) diff --git a/python/cugraph-nx/cugraph_nx/utils/__init__.py b/python/cugraph-nx/cugraph_nx/utils/__init__.py index f7ef42c8677..6df5fb60978 100644 --- a/python/cugraph-nx/cugraph_nx/utils/__init__.py +++ b/python/cugraph-nx/cugraph_nx/utils/__init__.py @@ -11,3 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. from .decorators import * +from .misc import * diff --git a/python/cugraph-nx/cugraph_nx/utils/decorators.py b/python/cugraph-nx/cugraph_nx/utils/decorators.py index 7bda3e58b6b..619c9610c5d 100644 --- a/python/cugraph-nx/cugraph_nx/utils/decorators.py +++ b/python/cugraph-nx/cugraph_nx/utils/decorators.py @@ -28,17 +28,26 @@ def inner(func): class networkx_algorithm: - def __new__(cls, func=None, *, name=None): + def __new__(cls, func=None, *, name=None, extra_params=None): if func is None: - return partial(networkx_algorithm, name=name) + return partial(networkx_algorithm, name=name, extra_params=extra_params) instance = object.__new__(cls) # update_wrapper sets __wrapped__, which will be used for the signature update_wrapper(instance, func) instance.__defaults__ = func.__defaults__ instance.__kwdefaults__ = func.__kwdefaults__ instance.name = func.__name__ if name is None else name + # TODO: should extra_params be a dict[str, str] that describes the parameters? + if extra_params is None: + instance.extra_params = None + elif isinstance(extra_params, str): + instance.extra_params = {extra_params} + else: + instance.extra_params = set(extra_params) instance.can_run = _default_can_run setattr(BackendInterface, instance.name, instance) + # Set methods so they are in __dict__ + instance._can_run = instance._can_run return instance def _can_run(self, func): diff --git a/python/cugraph-nx/cugraph_nx/utils/misc.py b/python/cugraph-nx/cugraph_nx/utils/misc.py new file mode 100644 index 00000000000..18487a05996 --- /dev/null +++ b/python/cugraph-nx/cugraph_nx/utils/misc.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp + +__all__ = ["_groupby"] + + +def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]: + """Perform a groupby operation given an array of group IDs and array of values. + + Parameters + ---------- + groups : cp.ndarray + Array that holds the group IDs. + Group IDs are assumed to be consecutive integers from 0. + values : cp.ndarray + Array of values to be grouped according to groups. + Must be the same size as groups array. + + Returns + ------- + dict with group IDs as keys and cp.ndarray as values. + """ + # It would actually be easy to support groups that aren't consecutive integers, + # but let's wait until we need it to implement it. + sorted_groups = cp.argsort(groups) + sorted_values = values[sorted_groups] + rv = {} + start = 0 + for i, end in enumerate( + [*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size] + ): + rv[i] = sorted_values[start:end] + start = end + return rv diff --git a/python/cugraph-nx/lint.yaml b/python/cugraph-nx/lint.yaml index 04747a2b49b..42c1b9657c7 100644 --- a/python/cugraph-nx/lint.yaml +++ b/python/cugraph-nx/lint.yaml @@ -26,7 +26,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.13 + rev: v0.14 hooks: - id: validate-pyproject name: Validate pyproject.toml @@ -50,7 +50,7 @@ repos: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.284 + rev: v0.0.286 hooks: - id: ruff args: [--fix-only, --show-fixes] @@ -76,7 +76,7 @@ repos: additional_dependencies: [tomli] files: ^(cugraph_nx|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.284 + rev: v0.0.286 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/cugraph-nx/pyproject.toml b/python/cugraph-nx/pyproject.toml index e8c4f670444..7384fc75007 100644 --- a/python/cugraph-nx/pyproject.toml +++ b/python/cugraph-nx/pyproject.toml @@ -116,7 +116,7 @@ omit = [] ignore_errors = false precision = 1 fail_under = 0 -skip_covered = true +skip_covered = false # Nice to see fully covered files when running `run_nx_tests.sh` skip_empty = true exclude_lines = [ "pragma: no cover",