diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bab39557c99..188ea1a266a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: pass_filenames: false additional_dependencies: [gitpython] - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.5.1 + rev: v1.8.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index 971c3ff1032..a8ed18a20fc 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,29 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import random + import networkx as nx import pandas as pd import pytest from cugraph import datasets - -# FIXME: promote these to cugraph.datasets so the following steps aren't -# necessary -# -# These datasets can be downloaded using the script in the 'datasets' dir: -# -# cd /datasets -# ./get_test_data.sh --benchmark -# -# Then set the following env var so the dataset utils can find their location: -# -# export RAPIDS_DATASET_ROOT_DIR=/datasets -# -from cugraph_benchmarking.params import ( - hollywood, - europe_osm, - cit_patents, - soc_livejournal, -) +import nx_cugraph as nxcg # Attempt to import the NetworkX dispatching module, which is only needed when # testing with NX <3.2 in order to dynamically switch backends. NX >=3.2 allows @@ -45,22 +29,76 @@ ################################################################################ -# Fixtures and helpers -backend_params = ["cugraph", None] +# Fixtures and params + +# See https://pytest-benchmark.readthedocs.io/en/latest/glossary.html for how +# these variables are used. +rounds = 1 +iterations = 1 +warmup_rounds = 1 -dataset_params = [ +dataset_param_values = [ pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]), pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]), pytest.param( datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed] ), - pytest.param(cit_patents, marks=[pytest.mark.medium, pytest.mark.directed]), - pytest.param(hollywood, marks=[pytest.mark.medium, pytest.mark.undirected]), - pytest.param(europe_osm, marks=[pytest.mark.medium, pytest.mark.undirected]), - pytest.param(soc_livejournal, marks=[pytest.mark.large, pytest.mark.directed]), + pytest.param( + datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed] + ), + pytest.param( + datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected] + ), + pytest.param( + datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed] + ), + pytest.param( + datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected] + ), ] +backend_param_values = ["cugraph", "cugraph-preconverted", None] + + +def setup_module(module): + """ + Trivial conversion call to force various one-time CUDA initialization + operations to happen outside of benchmarks. + """ + G = nx.karate_club_graph() + nxcg.from_networkx(G) + + +# Test IDs are generated using the lambda assigned to the ids arg to provide an +# easier-to-read name. This is especially helpful for Dataset objs (see +# https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture) +@pytest.fixture( + scope="module", params=dataset_param_values, ids=lambda ds: f"ds={str(ds)}" +) +def graph_obj(request): + """ + Returns a NX Graph or DiGraph obj from the dataset instance parameter. + """ + dataset = request.param + return nx_graph_from_dataset(dataset) + + +@pytest.fixture( + scope="module", + params=backend_param_values, + ids=lambda backend: f"backend={backend}", +) +def backend(request): + """ + Returns the backend name to use. This is done as a fixture for consistency + and simplicity when creating benchmarks (no need to mark the benchmark as + parametrized). + """ + return request.param + +################################################################################ +# Helpers def nx_graph_from_dataset(dataset_obj): """ Read the dataset specified by the dataset_obj and create and return a @@ -87,126 +125,334 @@ def nx_graph_from_dataset(dataset_obj): return G -# Test IDs are generated using the lambda assigned to the ids arg to provide an -# easier-to-read name from the Dataset obj string repr. -# See: https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture -@pytest.fixture(scope="module", params=dataset_params, ids=lambda ds: f"ds={str(ds)}") -def graph_obj(request): - """ - Returns a NX Graph or DiGraph obj from the dataset instance parameter. - """ - dataset = request.param - return nx_graph_from_dataset(dataset) - - -def get_legacy_backend_selector(backend_name): +def get_legacy_backend_wrapper(backend_name): """ Returns a callable that wraps an algo function with either the default - dispatch decorator, or the "testing" decorator which unconditionally - dispatches. + dispatcher (which dispatches based on input graph type), or the "testing" + dispatcher (which autoconverts and unconditionally dispatches). This is only supported for NetworkX <3.2 """ backends.plugin_name = "cugraph" orig_dispatch = backends._dispatch testing_dispatch = backends.test_override_dispatch - # Testing with the networkx <3.2 dispatch mechanism is based on decorating - # networkx APIs. The decorator is either one that only uses a backend if - # the input graph type is for that backend (the default decorator), or the - # "testing" decorator, which unconditionally converts a graph type to the - # type needed by the backend then calls the backend. If the cugraph backend - # is specified, create a callable that decorates the benchmarked function - # with the testing decorator. - # - # Because both the default and testing decorators assume they are only - # applied once and do bookkeeping to ensure algos are not registered - # multiple times, the callable also clears bookkeeping so the decorators - # can be reapplied multiple times. This is obviously a hack and networkx - # >=3.2 makes this use case properly supported. if backend_name == "cugraph": - - def wrapper(*args, **kwargs): - backends._registered_algorithms = {} - return testing_dispatch(*args, **kwargs) - + dispatch = testing_dispatch else: + dispatch = orig_dispatch + + def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False): + # Networkx <3.2 registers functions when the dispatch decorator is + # applied (called) and errors if re-registered, so clear bookkeeping to + # allow it to be called repeatedly. + backends._registered_algorithms = {} + actual_func = dispatch(func) # returns the func the dispatcher picks def wrapper(*args, **kwargs): - backends._registered_algorithms = {} - return orig_dispatch(*args, **kwargs) + retval = actual_func(*args, **kwargs) + if exhaust_returned_iterator: + retval = list(retval) + return retval - return wrapper + return wrapper + + return wrap_callable_for_dispatch -def get_backend_selector(backend_name): +def get_backend_wrapper(backend_name): """ Returns a callable that wraps an algo function in order to set the "backend" kwarg on it. This is only supported for NetworkX >= 3.2 """ - def get_callable_for_func(func): + def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False): def wrapper(*args, **kwargs): kwargs["backend"] = backend_name - return func(*args, **kwargs) + retval = func(*args, **kwargs) + if exhaust_returned_iterator: + retval = list(retval) + return retval return wrapper - return get_callable_for_func + return wrap_callable_for_dispatch @pytest.fixture( - scope="module", params=backend_params, ids=lambda backend: f"backend={backend}" + scope="module", + params=backend_param_values, + ids=lambda backend: f"backend={backend}", ) -def backend_selector(request): +def backend_wrapper(request): """ Returns a callable that takes a function algo and wraps it in another function that calls the algo using the appropriate backend. + + For example: if the backend to test is "cugraph", this will return a + function that calls nx.pagerank(..., backend='cugraph') """ backend_name = request.param + actual_backend_name = backend_name + + # Special case: cugraph-preconverted may be specified as a backend but this + # name is reserved to indicate a cugraph backend is to be used with a + # preconverted graph obj (rather than having the backend do the + # conversion). + if backend_name == "cugraph-preconverted": + actual_backend_name = "cugraph" + + # NX <3.2 does not support the backends= kwarg, so the backend must be + # enabled differently if backends is not None: - return get_legacy_backend_selector(backend_name) + wrapper = get_legacy_backend_wrapper(actual_backend_name) else: - return get_backend_selector(backend_name) + wrapper = get_backend_wrapper(actual_backend_name) + + wrapper.backend_name = backend_name + return wrapper + + +def get_graph_obj_for_benchmark(graph_obj, backend_wrapper): + """ + Given a Graph object and a backend name, return a converted Graph or the + original Graph object based on the backend to use. + + This is needed because some backend names are actually used as descriptions + for combinations of backends and converted/non-converted graphs. For + example, a benchmark may specify the "cugraph-preconverted" backend, which + is not an installed backend but instead refers to the "cugraph" backend + passed a NX Graph that has been converted to a nx-cugraph Graph object. + """ + G = graph_obj + if backend_wrapper.backend_name == "cugraph-preconverted": + G = nxcg.from_networkx(G) + return G ################################################################################ # Benchmarks -normalized_params = [True, False] -k_params = [10, 100] - - -@pytest.mark.parametrize("normalized", normalized_params, ids=lambda norm: f"{norm=}") -@pytest.mark.parametrize("k", k_params, ids=lambda k: f"{k=}") -def bench_betweenness_centrality(benchmark, graph_obj, backend_selector, normalized, k): - result = benchmark( - backend_selector(nx.betweenness_centrality), - graph_obj, - weight=None, - normalized=normalized, - k=k, +# normalized_param_values = [True, False] +# k_param_values = [10, 100] +normalized_param_values = [True] +k_param_values = [10] + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") +def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.betweenness_centrality), + args=(G,), + kwargs=dict( + weight=None, + normalized=normalized, + k=k, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, ) assert type(result) is dict -@pytest.mark.parametrize("normalized", normalized_params, ids=lambda norm: f"{norm=}") +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") def bench_edge_betweenness_centrality( - benchmark, graph_obj, backend_selector, normalized + benchmark, graph_obj, backend_wrapper, normalized, k ): - result = benchmark( - backend_selector(nx.edge_betweenness_centrality), - graph_obj, - weight=None, - normalized=normalized, + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.edge_betweenness_centrality), + args=(G,), + kwargs=dict( + weight=None, + normalized=normalized, + k=k, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, ) assert type(result) is dict -def bench_louvain_communities(benchmark, graph_obj, backend_selector): +def bench_louvain_communities(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) # The cugraph backend for louvain_communities only supports undirected graphs - if isinstance(graph_obj, nx.DiGraph): - G = graph_obj.to_undirected() - else: - G = graph_obj - result = benchmark(backend_selector(nx.community.louvain_communities), G) + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.community.louvain_communities), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_eigenvector_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.eigenvector_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +def bench_hits(benchmark, graph_obj, backend_wrapper, normalized): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.hits), + args=(G,), + kwargs=dict( + normalized=normalized, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is tuple + assert len(result) == 2 + assert type(result[0]) is dict + assert type(result[1]) is dict + + +def bench_in_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.in_degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +def bench_katz_centrality(benchmark, graph_obj, backend_wrapper, normalized): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.katz_centrality), + args=(G,), + kwargs=dict( + normalized=normalized, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_k_truss(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.k_truss), + args=(G,), + kwargs=dict( + k=2, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # Check that this at least appears to be some kind of NX-like Graph + assert hasattr(result, "has_node") + + +def bench_out_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.out_degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_pagerank(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.pagerank), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): + # Use the node with the highest degree + degrees = graph_obj.degree() # list of tuples of (node, degree) + node = max(degrees, key=lambda t: t[1])[0] + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + result = benchmark.pedantic( + target=backend_wrapper(nx.single_source_shortest_path_length), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper): + # Use the node with the highest degree + degrees = graph_obj.degree() # list of tuples of (node, degree) + node = max(degrees, key=lambda t: t[1])[0] + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + result = benchmark.pedantic( + target=backend_wrapper( + nx.single_target_shortest_path_length, exhaust_returned_iterator=True + ), + args=(G,), + kwargs=dict( + target=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # exhaust_returned_iterator=True forces the result to a list, but is not + # needed for this algo in NX 3.3+ which returns a dict instead of an + # iterator. Forcing to a list does not change the benchmark timing. assert type(result) is list diff --git a/benchmarks/shared/python/cugraph_benchmarking/params.py b/benchmarks/shared/python/cugraph_benchmarking/params.py index d82cfd26117..034e22ffc37 100644 --- a/benchmarks/shared/python/cugraph_benchmarking/params.py +++ b/benchmarks/shared/python/cugraph_benchmarking/params.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,42 +14,16 @@ import pytest from pylibcugraph.testing.utils import gen_fixture_params -from cugraph.testing import RAPIDS_DATASET_ROOT_DIR_PATH from cugraph.datasets import ( - Dataset, karate, netscience, email_Eu_core, + hollywood, + europe_osm, + cit_patents, + soc_livejournal, ) -# Create Dataset objects from .csv files. -# Once the cugraph.dataset package is updated to include the metadata files for -# these (like karate), these will no longer need to be explicitly instantiated. -hollywood = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/hollywood.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -hollywood.metadata["is_directed"] = False -europe_osm = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/europe_osm.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -europe_osm.metadata["is_directed"] = False -cit_patents = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/cit-Patents.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -cit_patents.metadata["is_directed"] = True -soc_livejournal = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/soc-LiveJournal1.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -soc_livejournal.metadata["is_directed"] = True - # Assume all "file_data" (.csv file on disk) datasets are too small to be useful for MG. undirected_datasets = [ pytest.param( diff --git a/ci/test_python.sh b/ci/test_python.sh index 500bc2f3467..d8288758f3c 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. set -euo pipefail @@ -230,7 +230,6 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then # rmat is not tested because of multi-GPU testing pytest \ --cache-clear \ - --ignore=tests/int \ --ignore=tests/mg \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \ --cov-config=../../.coveragerc \ diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 9a211c81886..acd42224387 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -33,11 +33,12 @@ if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then -f https://data.pyg.org/whl/torch-2.1.0+cu118.html rapids-logger "pytest cugraph-pyg (single GPU)" + pushd python/cugraph-pyg/cugraph_pyg python -m pytest \ --cache-clear \ - --ignore=tests/int \ --ignore=tests/mg \ - python/cugraph-pyg/cugraph_pyg/tests + tests + popd else rapids-logger "skipping cugraph-pyg wheel test on CUDA!=11.8" fi diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6a75a420bf8..6684d31d8fd 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1005,9 +1005,14 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_types); /** - * @brief Remove all but one edge when a multi-edge exists. Note that this function does not use - * stable methods. When a multi-edge exists, one of the edges will remain, there is no - * guarantee on which one will remain. + * @brief Remove all but one edge when a multi-edge exists. + * + * When a multi-edge exists, one of the edges will remain. If @p keep_min_value_edge is false, an + * arbitrary edge will be selected among the edges in the multi-edge. If @p keep_min_value_edge is + * true, the edge with the minimum value will be selected. The edge weights will be first compared + * (if @p edgelist_weights.has_value() is true); edge IDs will be compared next (if @p + * edgelist_edge_ids.has_value() is true); and edge types (if @p edgelist_edge_types.has_value() is + * true) will compared last. * * In an MG context it is assumed that edges have been shuffled to the proper GPU, * in which case any multi-edges will be on the same GPU. @@ -1024,6 +1029,11 @@ remove_self_loops(raft::handle_t const& handle, * @param edgelist_weights Optional list of edge weights * @param edgelist_edge_ids Optional list of edge ids * @param edgelist_edge_types Optional list of edge types + * @param keep_min_value_edge Flag indicating whether to keep an arbitrary edge (false) or the + * minimum value edge (true) among the edges in a multi-edge. Relevant only if @p + * edgelist_weights.has_value() | @p edgelist_edge_ids.has_value() | @p + * edgelist_edge_types.has_value() is true. Setting this to true incurs performance overhead as this + * requires more comparisons. * @return Tuple of vectors storing edge sources, destinations, optional weights, * optional edge ids, optional edge types. */ @@ -1038,6 +1048,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge = false); } // namespace cugraph diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 326022a3fa9..57a589caf02 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -217,7 +217,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 7793458b53a..6745be01f95 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -200,7 +200,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/structure/remove_multi_edges.cu b/cpp/src/structure/remove_multi_edges.cu index ba07d068c0e..54403f0b034 100644 --- a/cpp/src/structure/remove_multi_edges.cu +++ b/cpp/src/structure/remove_multi_edges.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -39,7 +40,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -51,7 +53,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -63,7 +66,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -75,7 +79,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -87,6 +92,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); } // namespace cugraph diff --git a/cpp/src/structure/remove_multi_edges_impl.cuh b/cpp/src/structure/remove_multi_edges_impl.cuh index fdd3059f874..651876ac8b1 100644 --- a/cpp/src/structure/remove_multi_edges_impl.cuh +++ b/cpp/src/structure/remove_multi_edges_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,10 +104,12 @@ group_multi_edges( rmm::device_uvector&& edgelist_srcs, rmm::device_uvector&& edgelist_dsts, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, - size_t mem_frugal_threshold) + size_t mem_frugal_threshold, + bool keep_min_value_edge) { auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); auto value_first = get_dataframe_buffer_begin(edgelist_values); + auto edge_first = thrust::make_zip_iterator(pair_first, value_first); if (edgelist_srcs.size() > mem_frugal_threshold) { // FIXME: Tuning parameter to address high frequency multi-edges @@ -128,19 +130,28 @@ group_multi_edges( raft::update_host( h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream()); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + h_group_counts[0], - get_dataframe_buffer_begin(edgelist_values)); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first + h_group_counts[0], - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + h_group_counts[0]); + thrust::sort(handle.get_thrust_policy(), + edge_first + h_group_counts[0], + edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key( + handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0], value_first); + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first + h_group_counts[0], + pair_first + edgelist_srcs.size(), + value_first + h_group_counts[0]); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values)); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values)); + } } return std::make_tuple( @@ -160,7 +171,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types) + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge) { auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; @@ -187,7 +199,8 @@ remove_multi_edges(raft::handle_t const& handle, std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) = @@ -196,7 +209,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -207,7 +221,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) = detail::group_multi_edges>( @@ -215,7 +230,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } } else { @@ -228,7 +244,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) = detail::group_multi_edges>( @@ -236,7 +253,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -246,7 +264,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold); diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp index ca644b76c5a..99e752c0b02 100644 --- a/cpp/tests/link_prediction/weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,9 +27,9 @@ struct Similarity_Usecase { bool use_weights{false}; - bool check_correctness{true}; size_t max_seeds{std::numeric_limits::max()}; size_t max_vertex_pairs_to_check{std::numeric_limits::max()}; + bool check_correctness{true}; }; template @@ -293,7 +293,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20, //: 100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -305,7 +305,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, //: Similarity_Usecase{false,true,20,100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -319,7 +319,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, true}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -332,7 +333,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // disable correctness checks for large graphs //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), false}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 8cc87b26f1d..5a9dc9c90d4 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -633,12 +633,14 @@ construct_graph(raft::handle_t const& handle, if (drop_multi_edges) { std::tie(d_src_v, d_dst_v, d_weights_v, std::ignore, std::ignore) = - cugraph::remove_multi_edges(handle, - std::move(d_src_v), - std::move(d_dst_v), - std::move(d_weights_v), - std::nullopt, - std::nullopt); + cugraph::remove_multi_edges( + handle, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + std::nullopt, + std::nullopt, + is_symmetric ? true /* keep minimum weight edges to maintain symmetry */ : false); } graph_t graph(handle); diff --git a/dependencies.yaml b/dependencies.yaml index 3eed525bfe4..18ddb6c51dd 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -9,7 +9,8 @@ files: - checks - common_build - cpp_build - - cudatoolkit + - cuda + - cuda_version - docs - python_build_wheel - python_build_cythonize @@ -37,19 +38,19 @@ files: docs: output: none includes: - - cudatoolkit + - cuda_version - docs - py_version - depends_on_pylibcugraphops test_cpp: output: none includes: - - cudatoolkit + - cuda_version - test_cpp test_notebooks: output: none includes: - - cudatoolkit + - cuda_version - py_version - test_notebook - test_python_common @@ -57,7 +58,7 @@ files: test_python: output: none includes: - - cudatoolkit + - cuda_version - depends_on_cudf - py_version - test_python_common @@ -273,33 +274,40 @@ dependencies: - output_types: [conda, requirements] packages: - pre-commit - cudatoolkit: + cuda_version: specific: - - output_types: [conda] + - output_types: conda matrices: - matrix: - cuda: "12.0" + cuda: "11.2" packages: - - cuda-version=12.0 + - cuda-version=11.2 - matrix: - cuda: "11.8" + cuda: "11.4" packages: - - cuda-version=11.8 - - cudatoolkit + - cuda-version=11.4 - matrix: cuda: "11.5" packages: - cuda-version=11.5 - - cudatoolkit - matrix: - cuda: "11.4" + cuda: "11.8" packages: - - cuda-version=11.4 - - cudatoolkit + - cuda-version=11.8 - matrix: - cuda: "11.2" + cuda: "12.0" + packages: + - cuda-version=12.0 + cuda: + specific: + - output_types: [conda] + matrices: + - matrix: + cuda: "12.*" + packages: + - matrix: + cuda: "11.*" packages: - - cuda-version=11.2 - cudatoolkit common_build: common: @@ -344,9 +352,8 @@ dependencies: packages: - nvcc_linux-aarch64=11.8 - matrix: - cuda: "12.0" + cuda: "12.*" packages: - - cuda-version=12.0 - cuda-nvcc docs: common: diff --git a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py index be8f8245807..7047c62250b 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -385,7 +385,7 @@ def test_get_input_nodes(karate_gnn, dask_client): def test_mg_frame_handle(graph, dask_client): F, G, N = graph cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - assert isinstance(cugraph_store._EXPERIMENTAL__CuGraphStore__graph._plc_graph, dict) + assert isinstance(cugraph_store._CuGraphStore__graph._plc_graph, dict) @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index c4bd00bb86c..b8666c0d806 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -11,9 +11,9 @@ requires = [ testpaths = ["cugraph_pyg/tests"] [project] -name = "cugraph_pyg" +name = "cugraph-pyg" dynamic = ["version"] -description = "cugraph_pyg - PyG support for cuGraph massive-scale, ultra-fast GPU graph analytics." +description = "cugraph-pyg - PyG support for cuGraph massive-scale, ultra-fast GPU graph analytics." authors = [ { name = "NVIDIA Corporation" }, ]