Skip to content

Commit

Permalink
Add louvain_communities to cugraph-nx (rapidsai#3803)
Browse files Browse the repository at this point in the history
See: rapidsai#3773

Possible follow-up tasks:
- Update to use threshold parameter exposed from C++ (rapidsai#3792)
- Add `max_level` argument to networkx implementation
  - ~Or, add `max_level` as extra`cugraph_nx`-specific argument~ (**done**)
- Update PLC to handle empty graphs gracefully (rapidsai#3804)
- Update PLC to handle directed graphs
- Add `louvain_partitions` (needs added to PLC)
  - https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.louvain.louvain_partitions.html

This is passing many networkx tests. I don't have this as draft, b/c it's usable (and I would argue) mergable as is.

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: rapidsai#3803
  • Loading branch information
eriknw authored and rlratzel committed Sep 8, 2023
1 parent b496254 commit f270817
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 24 deletions.
2 changes: 1 addition & 1 deletion python/cugraph-nx/cugraph_nx/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import centrality
from . import centrality, community
from .centrality import *
13 changes: 13 additions & 0 deletions python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .louvain import *
56 changes: 56 additions & 0 deletions python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

import pylibcugraph as plc

from cugraph_nx.convert import _to_undirected_graph
from cugraph_nx.utils import _groupby, networkx_algorithm, not_implemented_for

__all__ = ["louvain_communities"]


@not_implemented_for("directed")
@networkx_algorithm(extra_params="max_level")
def louvain_communities(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
):
"""`threshold` and `seed` parameters are currently ignored.
Extra parameter: `max_level` controls the maximum number of levels of the algorithm.
"""
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
G = _to_undirected_graph(G, weight)
if G.row_indices.size == 0:
# TODO: PLC doesn't handle empty graphs gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
if max_level is None:
max_level = sys.maxsize
vertices, clusters, modularity = plc.louvain(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
max_level=max_level, # TODO: add this parameter to NetworkX
resolution=resolution,
# threshold=threshold, # TODO: add this parameter to PLC
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]


@louvain_communities._can_run
def _(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
):
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
return not G.is_directed()
13 changes: 12 additions & 1 deletion python/cugraph-nx/cugraph_nx/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import cugraph_nx as cnx

if TYPE_CHECKING:
from collections.abc import Iterator
from collections.abc import Iterable, Iterator

from cugraph_nx.typing import (
AttrKey,
Expand Down Expand Up @@ -532,6 +532,17 @@ def _get_plc_graph(
do_expensive_check=False,
)

def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]:
"""Convert an iterable of node IDs to an iterable of node keys."""
if (id_to_key := self.id_to_key) is not None:
return map(id_to_key.__getitem__, node_ids)
return node_ids

def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
if self.key_to_id is None:
return node_ids.tolist()
return list(self._nodeiter_to_iter(node_ids.tolist()))

def _nodearrays_to_dict(
self, node_ids: cp.ndarray[IndexValue], values: cp.ndarray[NodeValue]
) -> dict[NodeKey, NodeValue]:
Expand Down
20 changes: 20 additions & 0 deletions python/cugraph-nx/cugraph_nx/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,12 @@ def key(testpath):
return (testname, frozenset({classname, filename}))
return (testname, frozenset({filename}))

# Reasons for xfailing
no_weights = "weighted implementation not currently supported"
no_multigraph = "multigraphs not currently supported"
louvain_different = (
"Louvain may be different due to RNG or unsupported threshold parameter"
)

xfail = {}

Expand All @@ -69,6 +73,10 @@ def key(testpath):
nxver = parse(nx.__version__)
if nxver.major == 3 and nxver.minor in {0, 1}:
# MAINT: networkx 3.0, 3.1
# NetworkX 3.2 added the ability to "fallback to nx" if backend algorithms
# raise NotImplementedError or `can_run` returns False. The tests below
# exercise behavior we have not implemented yet, so we mark them as xfail
# for previous versions of NetworkX.
xfail.update(
{
key(
Expand Down Expand Up @@ -160,6 +168,18 @@ def key(testpath):
): no_multigraph,
}
)
else:
xfail.update(
{
key(
"test_louvain.py:test_karate_club_partition"
): louvain_different,
key("test_louvain.py:test_none_weight_param"): louvain_different,
key("test_louvain.py:test_multigraph"): louvain_different,
key("test_louvain.py:test_threshold"): louvain_different,
}
)

for item in items:
kset = set(item.keywords)
for (test_name, keywords), reason in xfail.items():
Expand Down
77 changes: 61 additions & 16 deletions python/cugraph-nx/cugraph_nx/tests/test_match_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import inspect

import networkx as nx
Expand All @@ -25,37 +26,81 @@ def test_match_signature_and_names():
continue

# nx version >=3.2 uses utils.backends, version >=3.0,<3.2 uses classes.backends
nx_backends = getattr(
nx.utils, "backends", getattr(nx.classes, "backends", None)
)
if nx_backends is None:
raise AttributeError(
f"imported networkx version {nx.__version__} is not "
"supported, must be >= 3.0"
)
is_nx_30_or_31 = hasattr(nx.classes, "backends")
nx_backends = nx.classes.backends if is_nx_30_or_31 else nx.utils.backends

if is_nx_30_or_31 and name in {"louvain_communities"}:
continue
dispatchable_func = nx_backends._registered_algorithms[name]
# nx version >=3.2 uses orig_func, version >=3.0,<3.2 uses _orig_func
orig_func = getattr(
dispatchable_func, "orig_func", getattr(dispatchable_func, "_orig_func")
)
if is_nx_30_or_31:
orig_func = dispatchable_func._orig_func
else:
orig_func = dispatchable_func.orig_func

# Matching signatures?
sig = inspect.signature(orig_func)
assert sig == inspect.signature(func)
orig_sig = inspect.signature(orig_func)
func_sig = inspect.signature(func)
if not func.extra_params:
assert orig_sig == func_sig
else:
# Ignore extra parameters added to cugraph-nx algorithm
assert orig_sig == func_sig.replace(
parameters=[
p
for name, p in func_sig.parameters.items()
if name not in func.extra_params
]
)
if func.can_run is not cnx.utils.decorators._default_can_run:
assert func_sig == inspect.signature(func.can_run)

# Matching function names?
assert func.__name__ == dispatchable_func.__name__ == orig_func.__name__

# Matching dispatch names?
# nx version >=3.2 uses name, version >=3.0,<3.2 uses dispatchname
assert func.name == getattr(
dispatchable_func, "name", getattr(dispatchable_func, "dispatchname")
)
if is_nx_30_or_31:
dispatchname = dispatchable_func.dispatchname
else:
dispatchname = dispatchable_func.name
assert func.name == dispatchname

# Matching modules (i.e., where function defined)?
assert (
"networkx." + func.__module__.split(".", 1)[1]
== dispatchable_func.__module__
== orig_func.__module__
)

# Matching package layout (i.e., which modules have the function)?
cnx_path = func.__module__
name = func.__name__
while "." in cnx_path:
# This only walks up the module tree and does not check sibling modules
cnx_path, mod_name = cnx_path.rsplit(".", 1)
nx_path = cnx_path.replace("cugraph_nx", "networkx")
cnx_mod = importlib.import_module(cnx_path)
nx_mod = importlib.import_module(nx_path)
# Is the function present in the current module?
present_in_cnx = hasattr(cnx_mod, name)
present_in_nx = hasattr(nx_mod, name)
if present_in_cnx is not present_in_nx: # pragma: no cover (debug)
if present_in_cnx:
raise AssertionError(
f"{name} exists in {cnx_path}, but not in {nx_path}"
)
raise AssertionError(
f"{name} exists in {nx_path}, but not in {cnx_path}"
)
# Is the nested module present in the current module?
present_in_cnx = hasattr(cnx_mod, mod_name)
present_in_nx = hasattr(nx_mod, mod_name)
if present_in_cnx is not present_in_nx: # pragma: no cover (debug)
if present_in_cnx:
raise AssertionError(
f"{mod_name} exists in {cnx_path}, but not in {nx_path}"
)
raise AssertionError(
f"{mod_name} exists in {nx_path}, but not in {cnx_path}"
)
1 change: 1 addition & 0 deletions python/cugraph-nx/cugraph_nx/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .decorators import *
from .misc import *
13 changes: 11 additions & 2 deletions python/cugraph-nx/cugraph_nx/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,26 @@ def inner(func):


class networkx_algorithm:
def __new__(cls, func=None, *, name=None):
def __new__(cls, func=None, *, name=None, extra_params=None):
if func is None:
return partial(networkx_algorithm, name=name)
return partial(networkx_algorithm, name=name, extra_params=extra_params)
instance = object.__new__(cls)
# update_wrapper sets __wrapped__, which will be used for the signature
update_wrapper(instance, func)
instance.__defaults__ = func.__defaults__
instance.__kwdefaults__ = func.__kwdefaults__
instance.name = func.__name__ if name is None else name
# TODO: should extra_params be a dict[str, str] that describes the parameters?
if extra_params is None:
instance.extra_params = None
elif isinstance(extra_params, str):
instance.extra_params = {extra_params}
else:
instance.extra_params = set(extra_params)
instance.can_run = _default_can_run
setattr(BackendInterface, instance.name, instance)
# Set methods so they are in __dict__
instance._can_run = instance._can_run
return instance

def _can_run(self, func):
Expand Down
45 changes: 45 additions & 0 deletions python/cugraph-nx/cugraph_nx/utils/misc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp

__all__ = ["_groupby"]


def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
"""Perform a groupby operation given an array of group IDs and array of values.
Parameters
----------
groups : cp.ndarray
Array that holds the group IDs.
Group IDs are assumed to be consecutive integers from 0.
values : cp.ndarray
Array of values to be grouped according to groups.
Must be the same size as groups array.
Returns
-------
dict with group IDs as keys and cp.ndarray as values.
"""
# It would actually be easy to support groups that aren't consecutive integers,
# but let's wait until we need it to implement it.
sorted_groups = cp.argsort(groups)
sorted_values = values[sorted_groups]
rv = {}
start = 0
for i, end in enumerate(
[*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size]
):
rv[i] = sorted_values[start:end]
start = end
return rv
6 changes: 3 additions & 3 deletions python/cugraph-nx/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ repos:
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.13
rev: v0.14
hooks:
- id: validate-pyproject
name: Validate pyproject.toml
Expand All @@ -50,7 +50,7 @@ repos:
- id: black
# - id: black-jupyter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.284
rev: v0.0.286
hooks:
- id: ruff
args: [--fix-only, --show-fixes]
Expand All @@ -76,7 +76,7 @@ repos:
additional_dependencies: [tomli]
files: ^(cugraph_nx|docs)/
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.284
rev: v0.0.286
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph-nx/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ omit = []
ignore_errors = false
precision = 1
fail_under = 0
skip_covered = true
skip_covered = false # Nice to see fully covered files when running `run_nx_tests.sh`
skip_empty = true
exclude_lines = [
"pragma: no cover",
Expand Down

0 comments on commit f270817

Please sign in to comment.