From 836c27a2c64f5581ab5361376da7220f39546620 Mon Sep 17 00:00:00 2001 From: ralph <137829296+nv-rliu@users.noreply.github.com> Date: Fri, 28 Jul 2023 00:43:06 -0400 Subject: [PATCH 01/10] Promote `Datasets` to stable and clean-up unit tests (#3712) Addresses #3675 and #3706 This PR moves the `datasets` API from experimental to stable. Users can now do: ```python from cugraph.datasets import karate G = karate.get_graph() ``` The existing `experimental.datasets` package has been updated with the `promoted_experimental_warning_wrapper()`. All notebooks, docstrings, and unit test dependencies on `datasets` have been updated to use the stable API. This PR also removes the networkx warning suppression from the unit tests. Authors: - ralph (https://github.com/nv-rliu) - Naim (https://github.com/naimnv) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3712 --- .github/workflows/build.yaml | 1 + .github/workflows/pr.yaml | 2 +- .../algorithms/centrality/Betweenness.ipynb | 25 +- .../algorithms/centrality/Centrality.ipynb | 23 +- notebooks/algorithms/centrality/Degree.ipynb | 54 +-- .../algorithms/centrality/Eigenvector.ipynb | 48 ++- notebooks/algorithms/centrality/Katz.ipynb | 22 +- notebooks/algorithms/community/ECG.ipynb | 15 +- notebooks/algorithms/community/Louvain.ipynb | 15 +- .../community/Spectral-Clustering.ipynb | 28 +- .../community/Subgraph-Extraction.ipynb | 22 +- .../community/Triangle-Counting.ipynb | 28 +- notebooks/algorithms/community/ktruss.ipynb | 19 +- .../components/ConnectedComponents.ipynb | 24 +- notebooks/algorithms/cores/core-number.ipynb | 16 +- notebooks/algorithms/cores/kcore.ipynb | 58 ++-- .../algorithms/layout/Force-Atlas2.ipynb | 36 +- notebooks/algorithms/link_analysis/HITS.ipynb | 21 +- .../algorithms/link_analysis/Pagerank.ipynb | 24 +- .../link_prediction/Jaccard-Similarity.ipynb | 39 ++- .../link_prediction/Overlap-Similarity.ipynb | 28 +- .../Sorensen_coefficient.ipynb | 6 +- .../algorithms/sampling/RandomWalk.ipynb | 13 +- .../algorithms/structure/Renumber-2.ipynb | 18 +- notebooks/algorithms/structure/Renumber.ipynb | 17 +- .../algorithms/structure/Symmetrize.ipynb | 12 +- notebooks/algorithms/traversal/BFS.ipynb | 16 +- notebooks/algorithms/traversal/SSSP.ipynb | 13 +- python/cugraph/cugraph/__init__.py | 2 + .../centrality/betweenness_centrality.py | 8 +- .../cugraph/centrality/degree_centrality.py | 4 +- .../centrality/eigenvector_centrality.py | 4 +- .../cugraph/centrality/katz_centrality.py | 4 +- python/cugraph/cugraph/community/ecg.py | 4 +- python/cugraph/cugraph/community/egonet.py | 8 +- .../cugraph/community/induced_subgraph.py | 4 +- .../cugraph/community/ktruss_subgraph.py | 10 +- python/cugraph/cugraph/community/leiden.py | 4 +- python/cugraph/cugraph/community/louvain.py | 6 +- .../cugraph/community/spectral_clustering.py | 20 +- .../cugraph/community/subgraph_extraction.py | 4 +- .../cugraph/components/connectivity.py | 12 +- python/cugraph/cugraph/cores/core_number.py | 6 +- python/cugraph/cugraph/cores/k_core.py | 6 +- python/cugraph/cugraph/datasets/__init__.py | 40 +++ python/cugraph/cugraph/datasets/dataset.py | 312 ++++++++++++++++++ .../cugraph/datasets/datasets_config.yaml | 5 + .../cugraph/datasets/metadata/__init__.py | 0 .../cugraph/datasets/metadata/cyber.yaml | 26 ++ .../cugraph/datasets/metadata/dolphins.yaml | 31 ++ .../datasets/metadata/email_Eu_core.yaml | 29 ++ .../cugraph/datasets/metadata/karate.yaml | 26 ++ .../datasets/metadata/karate_asymmetric.yaml | 26 ++ .../datasets/metadata/karate_disjoint.yaml | 26 ++ .../cugraph/datasets/metadata/netscience.yaml | 24 ++ .../cugraph/datasets/metadata/polbooks.yaml | 24 ++ .../cugraph/datasets/metadata/small_line.yaml | 24 ++ .../cugraph/datasets/metadata/small_tree.yaml | 24 ++ .../cugraph/datasets/metadata/toy_graph.yaml | 24 ++ .../metadata/toy_graph_undirected.yaml | 24 ++ .../cugraph/cugraph/experimental/__init__.py | 2 - .../cugraph/experimental/datasets/__init__.py | 11 + .../experimental/link_prediction/jaccard.py | 12 +- .../experimental/link_prediction/overlap.py | 8 +- .../experimental/link_prediction/sorensen.py | 8 +- .../cugraph/gnn/data_loading/bulk_sampler.py | 4 +- python/cugraph/cugraph/layout/force_atlas2.py | 6 +- python/cugraph/cugraph/link_analysis/hits.py | 6 +- .../cugraph/cugraph/link_analysis/pagerank.py | 4 +- .../cugraph/link_prediction/jaccard.py | 12 +- .../cugraph/link_prediction/overlap.py | 4 +- .../cugraph/link_prediction/sorensen.py | 8 +- .../cugraph/link_prediction/wjaccard.py | 4 +- .../cugraph/link_prediction/woverlap.py | 4 +- .../cugraph/link_prediction/wsorensen.py | 4 +- python/cugraph/cugraph/sampling/node2vec.py | 4 +- .../cugraph/cugraph/sampling/random_walks.py | 4 +- python/cugraph/cugraph/testing/__init__.py | 44 ++- .../test_batch_betweenness_centrality_mg.py | 2 +- ...st_batch_edge_betweenness_centrality_mg.py | 3 +- .../centrality/test_betweenness_centrality.py | 25 +- .../test_betweenness_centrality_mg.py | 9 +- .../centrality/test_degree_centrality.py | 10 +- .../centrality/test_degree_centrality_mg.py | 5 +- .../test_edge_betweenness_centrality.py | 31 +- .../centrality/test_eigenvector_centrality.py | 19 +- .../test_eigenvector_centrality_mg.py | 11 +- .../tests/centrality/test_katz_centrality.py | 30 +- .../centrality/test_katz_centrality_mg.py | 11 +- .../cugraph/tests/comms/test_comms_mg.py | 10 +- .../tests/community/test_balanced_cut.py | 11 +- .../cugraph/tests/community/test_ecg.py | 7 +- .../community/test_induced_subgraph_mg.py | 8 +- .../tests/community/test_k_truss_subgraph.py | 42 +-- .../cugraph/tests/community/test_leiden.py | 10 +- .../cugraph/tests/community/test_leiden_mg.py | 5 +- .../cugraph/tests/community/test_louvain.py | 21 +- .../tests/community/test_louvain_mg.py | 1 - .../tests/community/test_modularity.py | 14 +- .../community/test_subgraph_extraction.py | 12 +- .../tests/community/test_triangle_count.py | 25 +- .../tests/community/test_triangle_count_mg.py | 8 +- .../tests/components/test_connectivity.py | 28 +- .../tests/components/test_connectivity_mg.py | 13 +- .../cugraph/tests/core/test_core_number.py | 10 +- .../cugraph/tests/core/test_core_number_mg.py | 6 +- .../cugraph/cugraph/tests/core/test_k_core.py | 23 +- .../cugraph/tests/core/test_k_core_mg.py | 8 +- .../tests/data_store/test_gnn_feat_storage.py | 6 +- .../tests/data_store/test_property_graph.py | 9 +- .../data_store/test_property_graph_mg.py | 17 +- .../cugraph/tests/generators/test_rmat.py | 3 +- .../cugraph/tests/generators/test_rmat_mg.py | 5 +- .../tests/gnn/test_dgl_uniform_sampler.py | 6 +- .../tests/gnn/test_dgl_uniform_sampler_mg.py | 9 +- .../cugraph/tests/internals/test_renumber.py | 11 +- .../tests/internals/test_renumber_mg.py | 6 +- .../tests/internals/test_symmetrize.py | 8 +- .../tests/internals/test_symmetrize_mg.py | 4 +- .../cugraph/tests/layout/test_force_atlas2.py | 12 +- .../cugraph/tests/link_analysis/test_hits.py | 10 +- .../tests/link_analysis/test_hits_mg.py | 6 +- .../tests/link_analysis/test_pagerank.py | 25 +- .../tests/link_analysis/test_pagerank_mg.py | 14 +- .../tests/link_prediction/test_jaccard.py | 28 +- .../tests/link_prediction/test_jaccard_mg.py | 5 +- .../tests/link_prediction/test_overlap.py | 18 +- .../tests/link_prediction/test_overlap_mg.py | 4 +- .../tests/link_prediction/test_sorensen.py | 26 +- .../tests/link_prediction/test_sorensen_mg.py | 8 +- .../tests/link_prediction/test_wjaccard.py | 24 +- .../tests/link_prediction/test_woverlap.py | 13 +- .../tests/link_prediction/test_wsorensen.py | 24 +- .../cugraph/tests/nx/test_compat_algo.py | 2 +- .../cugraph/tests/nx/test_compat_pr.py | 2 +- .../cugraph/tests/nx/test_nx_convert.py | 30 +- .../tests/sampling/test_bulk_sampler.py | 3 +- .../tests/sampling/test_bulk_sampler_io.py | 5 +- .../tests/sampling/test_bulk_sampler_io_mg.py | 7 +- .../tests/sampling/test_bulk_sampler_mg.py | 13 +- .../cugraph/tests/sampling/test_egonet.py | 21 +- .../cugraph/tests/sampling/test_egonet_mg.py | 8 +- .../cugraph/tests/sampling/test_node2vec.py | 15 +- .../tests/sampling/test_random_walks.py | 21 +- .../tests/sampling/test_random_walks_mg.py | 11 +- .../sampling/test_uniform_neighbor_sample.py | 9 +- .../test_uniform_neighbor_sample_mg.py | 15 +- .../tests/structure/test_convert_matrix.py | 16 +- .../cugraph/tests/structure/test_graph.py | 32 +- .../cugraph/tests/structure/test_graph_mg.py | 19 +- .../tests/structure/test_multigraph.py | 11 +- .../cugraph/tests/traversal/test_bfs.py | 39 +-- .../cugraph/tests/traversal/test_bfs_mg.py | 10 +- .../traversal/test_filter_unreachable.py | 18 +- .../cugraph/tests/traversal/test_paths.py | 8 +- .../cugraph/tests/traversal/test_sssp.py | 34 +- .../cugraph/tests/traversal/test_sssp_mg.py | 11 +- .../tests/tree/test_maximum_spanning_tree.py | 25 +- .../tests/tree/test_minimum_spanning_tree.py | 24 +- .../cugraph/cugraph/tests/utils/mg_context.py | 5 +- .../cugraph/tests/utils/test_dataset.py | 156 +++++++-- .../tests/utils/test_replication_mg.py | 9 +- .../cugraph/cugraph/tests/utils/test_utils.py | 6 +- .../cugraph/tests/utils/test_utils_mg.py | 21 +- python/cugraph/cugraph/traversal/bfs.py | 8 +- python/cugraph/cugraph/traversal/sssp.py | 4 +- .../cugraph/tree/minimum_spanning_tree.py | 10 +- 167 files changed, 1898 insertions(+), 1012 deletions(-) create mode 100644 python/cugraph/cugraph/datasets/__init__.py create mode 100644 python/cugraph/cugraph/datasets/dataset.py create mode 100644 python/cugraph/cugraph/datasets/datasets_config.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/__init__.py create mode 100644 python/cugraph/cugraph/datasets/metadata/cyber.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/dolphins.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/email_Eu_core.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/karate_disjoint.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/netscience.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/polbooks.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/small_line.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/small_tree.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/toy_graph.yaml create mode 100644 python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4354ca40327..74838271093 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,6 +27,7 @@ concurrency: jobs: cpp-build: + node_type: cpu32 secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08 with: diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 17d7ac48907..ea4d7c4c625 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -36,7 +36,7 @@ jobs: uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08 with: build_type: pull-request - node_type: cpu16 + node_type: cpu32 conda-cpp-tests: needs: conda-cpp-build secrets: inherit diff --git a/notebooks/algorithms/centrality/Betweenness.ipynb b/notebooks/algorithms/centrality/Betweenness.ipynb index 82b7b4bc29e..29ad37ec254 100644 --- a/notebooks/algorithms/centrality/Betweenness.ipynb +++ b/notebooks/algorithms/centrality/Betweenness.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -13,10 +14,12 @@ "| Brad Rees | 04/24/2019 | created | 0.15 | GV100, CUDA 11.0\n", "| Brad Rees | 08/16/2020 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4\n", "| Don Acosta | 07/05/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", - "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5" + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -28,6 +31,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -37,6 +41,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -71,6 +76,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -82,6 +88,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -98,6 +105,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -115,7 +123,7 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { @@ -129,6 +137,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -142,10 +151,11 @@ "outputs": [], "source": [ "# Create a graph using the imported Dataset object\n", - "G = karate.get_graph(fetch=True)" + "G = karate.get_graph(download=True)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -173,6 +183,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -203,6 +214,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -210,6 +222,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -267,6 +280,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -274,11 +288,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -303,7 +318,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/centrality/Centrality.ipynb b/notebooks/algorithms/centrality/Centrality.ipynb index 68d7776bbbb..d19dd646b15 100644 --- a/notebooks/algorithms/centrality/Centrality.ipynb +++ b/notebooks/algorithms/centrality/Centrality.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -8,6 +9,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -18,10 +20,12 @@ "| Brad Rees | 04/16/2021 | created | 0.19 | GV100, CUDA 11.0\n", "| Brad Rees | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4\n", "| Don Acosta | 07/29/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", " " ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -86,6 +90,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -93,6 +98,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -116,7 +122,7 @@ "# Import the cugraph modules\n", "import cugraph\n", "import cudf\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { @@ -132,6 +138,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -194,6 +201,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -203,6 +211,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -216,10 +225,11 @@ "outputs": [], "source": [ "# Create a graph using the imported Dataset object\n", - "G = karate.get_graph(fetch=True)" + "G = karate.get_graph(download=True)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -236,6 +246,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -254,6 +265,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -312,6 +324,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -320,6 +333,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -369,6 +383,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -385,6 +400,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -392,6 +408,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -420,7 +437,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/centrality/Degree.ipynb b/notebooks/algorithms/centrality/Degree.ipynb index 9b16cb71336..e7535420b65 100644 --- a/notebooks/algorithms/centrality/Degree.ipynb +++ b/notebooks/algorithms/centrality/Degree.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -10,10 +11,12 @@ "\n", "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|------------------|-----------------|----------------|\n", - "| Don Acosta | 07/05/2022 | created | 22.08 nightly | DGX Tesla V100 CUDA 11.5" + "| Don Acosta | 07/05/2022 | created | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated w/ `datasets` api | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -28,6 +31,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -37,6 +41,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -57,6 +62,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -67,6 +73,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -83,6 +90,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -90,6 +98,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [] @@ -116,10 +125,11 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### Some Prep" + "### Get Dataset" ] }, { @@ -128,30 +138,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the path to the test data \n", - "datafile='../../data/karate-data.csv'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", - "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -164,12 +155,11 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph()" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -187,6 +177,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -233,6 +224,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -240,6 +232,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -252,6 +245,9 @@ "metadata": {}, "outputs": [], "source": [ + "# Define the path to the test data \n", + "datafile='../../data/karate-data.csv'\n", + "\n", "# Read the data, this also created a NetworkX Graph \n", "file = open(datafile, 'rb')\n", "Gnx = nx.read_edgelist(file)" @@ -277,13 +273,17 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "As mentioned, the scores are different but the ranking is the same." + "As mentioned, the scores are different but the ranking is the same.\n", + "\n", + "*note: the 0th node from cuGraph is equivalent to the 1st node in NetworkX*" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/algorithms/centrality/Eigenvector.ipynb b/notebooks/algorithms/centrality/Eigenvector.ipynb index 7e4a080a00b..2d06aa39708 100644 --- a/notebooks/algorithms/centrality/Eigenvector.ipynb +++ b/notebooks/algorithms/centrality/Eigenvector.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -10,10 +11,12 @@ "\n", "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|------------------|-----------------|----------------|\n", - "| Don Acosta | 07/05/2022 | created | 22.08 nightly | DGX Tesla V100 CUDA 11.5" + "| Don Acosta | 07/05/2022 | created | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 07/11/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -27,6 +30,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -42,6 +46,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -78,6 +83,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -88,6 +94,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -104,6 +111,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -133,6 +141,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -145,11 +154,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Define the path to the test data \n", - "datafile='../../data/karate-data.csv'" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -169,13 +178,14 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", + "### Create a Graph\n", "\n", - "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" + "cuGraph's dataset objects depend on cuDF for data loading and the intial edge-list creation.\n", + "The original data file contains an edge-list, which represents the connection of a vertex to another. These `source` to `destination` pairs are in what is known as Coordinate Format (COO)." ] }, { @@ -184,14 +194,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = cudf.read_csv(datafile, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'] )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a Graph " + "karate.get_edgelist(download=True)" ] }, { @@ -200,12 +203,11 @@ "metadata": {}, "outputs": [], "source": [ - "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph(directed=True)\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = karate.get_graph()" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -223,6 +225,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -230,6 +233,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -237,6 +241,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -249,12 +254,16 @@ "metadata": {}, "outputs": [], "source": [ + "# Define the path to the test data \n", + "datafile='../../data/karate-data.csv'\n", + "\n", "# Read the data, this also created a NetworkX Graph \n", "file = open(datafile, 'rb')\n", "Gnx = nx.read_edgelist(file)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -273,6 +282,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -291,6 +301,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -298,6 +309,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -327,7 +339,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/centrality/Katz.ipynb b/notebooks/algorithms/centrality/Katz.ipynb index ef5a9c21381..c94a14bb14a 100755 --- a/notebooks/algorithms/centrality/Katz.ipynb +++ b/notebooks/algorithms/centrality/Katz.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -13,10 +14,12 @@ "| Brad Rees | 10/15/2019 | created | 0.14 | GV100, CUDA 10.2\n", "| Brad Rees | 08/16/2020 | tested / updated | 0.15.1 nightly | RTX 3090 CUDA 11.4\n", "| Don Acosta | 07/05/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", - "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5" + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -67,6 +70,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -77,6 +81,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -93,6 +98,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -110,7 +116,7 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { @@ -124,6 +130,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -145,6 +152,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -158,7 +166,7 @@ "outputs": [], "source": [ "# Create a graph using the imported Dataset object\n", - "G = karate.get_graph(fetch=True)" + "G = karate.get_graph(download=True)" ] }, { @@ -182,6 +190,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -208,6 +217,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -229,6 +239,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -236,6 +247,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -282,6 +294,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -308,6 +321,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -344,7 +358,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/community/ECG.ipynb b/notebooks/algorithms/community/ECG.ipynb index 829be21035c..c6c5a7b1b03 100644 --- a/notebooks/algorithms/community/ECG.ipynb +++ b/notebooks/algorithms/community/ECG.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,6 +15,7 @@ "| | 08/05/2021 | tested/updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Don Acosta | 07/20/2022 | tested/updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "\n", @@ -62,6 +64,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -73,6 +76,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -88,6 +92,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -105,10 +110,11 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -122,7 +128,7 @@ "outputs": [], "source": [ "# You can also just get the edgelist\n", - "gdf = karate.get_edgelist(fetch=True)\n", + "gdf = karate.get_edgelist(download=True)\n", "\n", "# The algorithm also requires that there are vertex weights. Just use 1.0 \n", "gdf[\"data\"] = 1.0" @@ -204,11 +210,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -233,7 +240,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/community/Louvain.ipynb b/notebooks/algorithms/community/Louvain.ipynb index a8529483534..a6a8cc363df 100755 --- a/notebooks/algorithms/community/Louvain.ipynb +++ b/notebooks/algorithms/community/Louvain.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -16,6 +17,7 @@ "| | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "\n", "\n", @@ -101,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -112,6 +115,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -127,6 +131,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -144,10 +149,11 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -161,7 +167,7 @@ "outputs": [], "source": [ "# You can also just get the edgelist\n", - "gdf = karate.get_edgelist(fetch=True)\n", + "gdf = karate.get_edgelist(download=True)\n", "\n", "# The algorithm also requires that there are vertex weights. Just use 1.0 \n", "gdf[\"data\"] = 1.0" @@ -295,11 +301,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -324,7 +331,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/community/Spectral-Clustering.ipynb b/notebooks/algorithms/community/Spectral-Clustering.ipynb index 2ac1b9e8c16..fa6f0e954c0 100755 --- a/notebooks/algorithms/community/Spectral-Clustering.ipynb +++ b/notebooks/algorithms/community/Spectral-Clustering.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,10 +15,12 @@ "| Brad Rees and James Wyles | 08/01/2019 | created | 0.14 | GV100 32G, CUDA 10.2 |\n", "| | 08/16/2020 | updated | 0.15 | GV100 32G, CUDA 10.2 |\n", "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", - "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -36,6 +39,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -76,6 +80,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -111,6 +116,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -122,6 +128,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -141,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -151,10 +158,11 @@ "import numpy as np\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -163,11 +171,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "gdf = karate.get_edgelist(fetch=True)\n", + "gdf = karate.get_edgelist(download=True)\n", "\n", "# The algorithm requires that there are edge weights. In this case all the weights are being set to 1\n", "gdf[\"data\"] = cudf.Series(np.ones(len(gdf), dtype=np.float32))" @@ -194,6 +202,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -212,6 +221,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -236,6 +246,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -296,6 +307,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -355,6 +367,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -362,11 +375,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -391,7 +405,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/community/Subgraph-Extraction.ipynb b/notebooks/algorithms/community/Subgraph-Extraction.ipynb index fb7708d1462..8d94eddf878 100755 --- a/notebooks/algorithms/community/Subgraph-Extraction.ipynb +++ b/notebooks/algorithms/community/Subgraph-Extraction.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,10 +15,12 @@ "| Brad Rees | 10/16/2019 | created | 0.13 | GV100 32G, CUDA 10.2 |\n", "| | 08/16/2020 | updated | 0.15 | GV100 32G, CUDA 10.2 |\n", "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", - "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" + "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -43,6 +46,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -54,6 +58,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -66,6 +71,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -83,10 +89,11 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -94,6 +101,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -108,7 +116,7 @@ "outputs": [], "source": [ "# You can also just get the edgelist\n", - "gdf = karate.get_edgelist(fetch=True)\n", + "gdf = karate.get_edgelist(download=True)\n", "\n", "# The louvain algorithm requires that there are vertex weights. Just use 1.0 \n", "gdf[\"data\"] = 1.0\n", @@ -162,6 +170,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -239,11 +248,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -268,7 +278,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { @@ -278,4 +288,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/algorithms/community/Triangle-Counting.ipynb b/notebooks/algorithms/community/Triangle-Counting.ipynb index b55c835cf4f..07203373617 100755 --- a/notebooks/algorithms/community/Triangle-Counting.ipynb +++ b/notebooks/algorithms/community/Triangle-Counting.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -15,6 +16,7 @@ "| | 08/16/2020 | updated | 0.15 | GV100 32G, CUDA 10.2 |\n", "| Don Acosta | 07/11/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "| Ralph Liu | 07/27/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "Triangle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", @@ -52,6 +54,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -63,6 +66,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -76,6 +80,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -94,7 +99,7 @@ "from collections import OrderedDict\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { @@ -110,6 +115,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -183,6 +189,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -192,6 +199,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -199,6 +207,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -216,6 +225,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -233,13 +243,7 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -258,6 +262,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -267,11 +272,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -296,7 +302,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13-final" + "version": "3.10.12" }, "vscode": { "interpreter": { @@ -306,4 +312,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/algorithms/community/ktruss.ipynb b/notebooks/algorithms/community/ktruss.ipynb index 3c96f7ff5a7..cb838e304c3 100644 --- a/notebooks/algorithms/community/ktruss.ipynb +++ b/notebooks/algorithms/community/ktruss.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -15,6 +16,7 @@ "| | 08/05/2021 | tested/updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Don Acosta | 07/08/2022 | tested/updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", "| Ralph Liu | 07/26/2022 | updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "## Introduction\n", "\n", "Compute the k-truss of the graph G. A K-Truss is a relaxed cliques where every vertex is supported by at least k-2 triangle.\n", @@ -37,6 +39,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -59,6 +62,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -70,6 +74,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -83,6 +88,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -100,10 +106,11 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -117,7 +124,7 @@ "outputs": [], "source": [ "# Create a graph using the imported Dataset object\n", - "G = karate.get_graph(fetch=True)\n", + "G = karate.get_graph(download=True)\n", "G = G.to_undirected()" ] }, @@ -133,6 +140,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -161,6 +169,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -200,6 +209,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -229,11 +239,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -258,7 +269,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/components/ConnectedComponents.ipynb b/notebooks/algorithms/components/ConnectedComponents.ipynb index 5f18352647f..c41a004e704 100755 --- a/notebooks/algorithms/components/ConnectedComponents.ipynb +++ b/notebooks/algorithms/components/ConnectedComponents.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -22,6 +23,7 @@ "| Brad Rees | 10/18/2021 | updated | 21.12 nightly | GV100, CUDA 11.4 |\n", "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5 |\n", "| Don Acosta | 07/22/2021 | updated | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "\n", "\n", @@ -90,6 +92,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -100,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -129,6 +133,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -141,10 +146,11 @@ "metadata": {}, "outputs": [], "source": [ - "from cugraph.experimental.datasets import netscience" + "from cugraph.datasets import netscience" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,10 +163,11 @@ "metadata": {}, "outputs": [], "source": [ - "G = netscience.get_graph(fetch=True)" + "G = netscience.get_graph(download=True)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -179,6 +186,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -199,6 +207,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -220,6 +229,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -243,6 +253,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -261,6 +272,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -280,6 +292,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -300,6 +313,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -323,6 +337,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -333,11 +348,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -362,7 +378,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/cores/core-number.ipynb b/notebooks/algorithms/cores/core-number.ipynb index 06fe570d390..cbb3e64311a 100755 --- a/notebooks/algorithms/cores/core-number.ipynb +++ b/notebooks/algorithms/cores/core-number.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -17,6 +18,7 @@ "| Brad Rees | 10/28/2019 | created | 0.13 | GV100, CUDA 10.2 |\n", "| Don Acosta | 07/21/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", "| Ralph Liu | 07/26/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "\n", @@ -41,6 +43,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -51,6 +54,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -64,6 +68,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -81,10 +86,11 @@ "import cudf\n", "\n", "# import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -97,11 +103,12 @@ "metadata": {}, "outputs": [], "source": [ - "G = karate.get_graph(fetch=True)\n", + "G = karate.get_graph(download=True)\n", "G = G.to_undirected()" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -128,11 +135,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -157,7 +165,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/cores/kcore.ipynb b/notebooks/algorithms/cores/kcore.ipynb index 065f02ffd98..ebb9cbe9c50 100755 --- a/notebooks/algorithms/cores/kcore.ipynb +++ b/notebooks/algorithms/cores/kcore.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -18,6 +19,7 @@ "| Brad Rees | 08/16/2020 | created | 0.15 | GV100, CUDA 10.2 |\n", "| Don Acosta | 07/21/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", "| Ralph Liu | 07/26/2022 | updated/tested | 22.08 nightly | DGX Tesla V100, CUDA 11.5 |\n", + "| | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "\n", @@ -41,6 +43,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -51,6 +54,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -64,6 +68,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -72,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -81,10 +86,11 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -93,29 +99,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "G = karate.get_graph(fetch=True)\n", + "G = karate.get_graph(download=True)\n", "G = G.to_undirected()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Main Graph\n", - "\tNumber of Vertices: 34\n", - "\tNumber of Edges: 156\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Main Graph\")\n", "print(\"\\tNumber of Vertices: \" + str(G.number_of_vertices()))\n", @@ -123,6 +119,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -131,25 +128,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "RuntimeError", - "evalue": "non-success value returned from cugraph_core_number: CUGRAPH_UNKNOWN_ERROR", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/nfs/ralphl/datasets-api/notebooks/algorithms/cores/kcore.ipynb Cell 10\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39m# Call k-cores on the graph\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m kcg \u001b[39m=\u001b[39m cugraph\u001b[39m.\u001b[39;49mk_core(G)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_dev/lib/python3.9/site-packages/cugraph-22.2.0a0+366.gabd2f0ef-py3.9-linux-x86_64.egg/cugraph/cores/k_core.py:103\u001b[0m, in \u001b[0;36mk_core\u001b[0;34m(G, k, core_number)\u001b[0m\n\u001b[1;32m 99\u001b[0m core_number \u001b[39m=\u001b[39m G\u001b[39m.\u001b[39madd_internal_vertex_id(core_number, \u001b[39m'\u001b[39m\u001b[39mvertex\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[1;32m 100\u001b[0m cols)\n\u001b[1;32m 102\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 103\u001b[0m core_number \u001b[39m=\u001b[39m _call_plc_core_number(G)\n\u001b[1;32m 104\u001b[0m core_number \u001b[39m=\u001b[39m core_number\u001b[39m.\u001b[39mrename(\n\u001b[1;32m 105\u001b[0m columns\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mcore_number\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mvalues\u001b[39m\u001b[39m\"\u001b[39m}, copy\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 106\u001b[0m )\n\u001b[1;32m 108\u001b[0m \u001b[39mif\u001b[39;00m k \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_dev/lib/python3.9/site-packages/cugraph-22.2.0a0+366.gabd2f0ef-py3.9-linux-x86_64.egg/cugraph/cores/k_core.py:27\u001b[0m, in \u001b[0;36m_call_plc_core_number\u001b[0;34m(G)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_call_plc_core_number\u001b[39m(G):\n\u001b[1;32m 26\u001b[0m vertex, core_number \u001b[39m=\u001b[39m \\\n\u001b[0;32m---> 27\u001b[0m pylibcugraph_core_number(\n\u001b[1;32m 28\u001b[0m resource_handle\u001b[39m=\u001b[39;49mResourceHandle(),\n\u001b[1;32m 29\u001b[0m graph\u001b[39m=\u001b[39;49mG\u001b[39m.\u001b[39;49m_plc_graph,\n\u001b[1;32m 30\u001b[0m degree_type\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 31\u001b[0m do_expensive_check\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m\n\u001b[1;32m 32\u001b[0m )\n\u001b[1;32m 34\u001b[0m df \u001b[39m=\u001b[39m cudf\u001b[39m.\u001b[39mDataFrame()\n\u001b[1;32m 35\u001b[0m df[\u001b[39m\"\u001b[39m\u001b[39mvertex\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m vertex\n", - "File \u001b[0;32mcore_number.pyx:124\u001b[0m, in \u001b[0;36mpylibcugraph.core_number.core_number\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mutils.pyx:51\u001b[0m, in \u001b[0;36mpylibcugraph.utils.assert_success\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mRuntimeError\u001b[0m: non-success value returned from cugraph_core_number: CUGRAPH_UNKNOWN_ERROR" - ] - } - ], + "outputs": [], "source": [ "# Call k-cores on the graph\n", "kcg = cugraph.k_core(G) " @@ -167,6 +148,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -186,6 +168,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -194,6 +177,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -221,6 +205,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -261,11 +246,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -290,7 +276,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/layout/Force-Atlas2.ipynb b/notebooks/algorithms/layout/Force-Atlas2.ipynb index eaab502f180..f457d3f38fc 100644 --- a/notebooks/algorithms/layout/Force-Atlas2.ipynb +++ b/notebooks/algorithms/layout/Force-Atlas2.ipynb @@ -21,10 +21,18 @@ "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", "| -----------------|------------|------------------|-----------------|----------------|\n", "| Hugo Linsenmaier | 11/16/2020 | created | 0.17 | GV100, CUDA 11.0\n", - "| Brad Rees | 01/11/2022 | tested / updated | 22.02 nightly | RTX A6000 48GB CUDA 11.5\n", - "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 nightly | V100, CUDA 11.5\n", + "| Brad Rees | 01/11/2022 | tested / updated | 22.02 nightly | RTX A6000 CUDA 11.5\n", + "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5\n", "| Don Acosta | 08/01/2022 | tested / updated | 22.08 nightly | DGX Tesla A100 CUDA 11.5 \n", - "| Don Acosta | 07/17/2023 | tested / updated | 23.08 nightly |RTX A6000 48GB CUDA 11.7 " + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### This notebook will not currently run because there is a conflict between the version of CuPy required by cugraph (11.0) and the version supported in cuxfilter (7.8 to 10.0). Notebook will be updated when cuxfilter supports CuPy 11." ] }, { @@ -74,7 +82,23 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import netscience" + "from cugraph.datasets import netscience" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup Viz\n", + "client = Client()\n", + "hv.notebook_extension('bokeh','matplotlib')\n", + "decimate.max_samples=20000\n", + "dynspread.threshold=0.01\n", + "datashade.cmap=fire[40:]\n", + "sz = dict(width=150,height=150)\n", + "%opts RGB [xaxis=None yaxis=None show_grid=False bgcolor=\"black\"]" ] }, { @@ -91,7 +115,7 @@ "metadata": {}, "outputs": [], "source": [ - "G = netscience.get_graph(fetch=True)\n", + "G = netscience.get_graph(download=True)\n", "G.number_of_nodes(), G.number_of_edges()" ] }, @@ -146,6 +170,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -209,6 +234,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/algorithms/link_analysis/HITS.ipynb b/notebooks/algorithms/link_analysis/HITS.ipynb index 9578377b747..7e4673ae2a9 100755 --- a/notebooks/algorithms/link_analysis/HITS.ipynb +++ b/notebooks/algorithms/link_analysis/HITS.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -15,6 +16,7 @@ "| | 08/16/2020 | tested / updated | 0.15.1 nightly | RTX 3090 CUDA 11.4\n", "| Ralph Liu | 06/22/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", "| Don Acosta | 07/27/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "HITS, also known as hubs and authorities, computes the relative importance of vertices. \n", @@ -36,6 +38,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -71,6 +74,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -84,6 +88,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -124,6 +129,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -137,10 +143,11 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -179,6 +186,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -189,6 +197,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -196,6 +205,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -208,10 +218,11 @@ "metadata": {}, "outputs": [], "source": [ - "G = karate.get_graph(fetch=True)" + "G = karate.get_graph(download=True)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -229,6 +240,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -288,6 +300,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -315,6 +328,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -340,11 +354,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", diff --git a/notebooks/algorithms/link_analysis/Pagerank.ipynb b/notebooks/algorithms/link_analysis/Pagerank.ipynb index 6b78f5866d9..0e7d5b134a7 100755 --- a/notebooks/algorithms/link_analysis/Pagerank.ipynb +++ b/notebooks/algorithms/link_analysis/Pagerank.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "tags": [] @@ -16,6 +17,7 @@ "| | 04/06/2022 | tested / updated | 22.04 nightly | GV100 32G, CUDA 11.5\n", "| Ralph Liu | 06/22/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", "| Don Acosta | 07/27/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "Pagerank is measure of the relative importance, also called centrality, of a vertex based on the relative importance of it's neighbors. PageRank was developed by Google and is (was) used to rank it's search results. PageRank uses the connectivity information of a graph to rank the importance of each vertex. \n", @@ -43,6 +45,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -56,6 +59,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -99,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -124,10 +129,11 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -166,6 +172,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -176,6 +183,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -183,6 +191,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -195,10 +204,11 @@ "metadata": {}, "outputs": [], "source": [ - "G = karate.get_graph(fetch=True)" + "G = karate.get_graph(download=True)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -216,6 +226,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -248,6 +259,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -288,6 +300,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -317,6 +330,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -343,6 +357,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -350,6 +365,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -358,6 +374,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -406,11 +423,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", diff --git a/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb b/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb index 64255868cbc..1e6cd032650 100755 --- a/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -18,10 +19,12 @@ "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|------------------|-----------------|-----------------------|\n", "| Brad Rees | 10/14/2019 | created | 0.14 | GV100 32 GB, CUDA 10.2 |\n", - "| Don Acosta | 07/20/2022 | tested/updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" + "| Don Acosta | 07/20/2022 | tested/updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -34,6 +37,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -41,6 +45,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -69,6 +74,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -77,6 +83,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -111,6 +118,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -119,6 +127,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -129,6 +138,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -143,6 +153,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -165,6 +176,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -212,6 +224,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -226,8 +239,8 @@ "outputs": [], "source": [ "# Test file \n", - "from cugraph.experimental.datasets import karate\n", - "gdf = karate.get_edgelist(fetch=True)" + "from cugraph.datasets import karate\n", + "gdf = karate.get_edgelist(download=True)" ] }, { @@ -251,6 +264,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -269,6 +283,17 @@ ] }, { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How many vertices are in the graph? Remember that Graph is zero based\n", + "G.number_of_vertices()" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -280,6 +305,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -311,6 +337,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -346,6 +373,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -384,6 +412,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -391,6 +420,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -398,6 +428,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -425,6 +456,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -452,6 +484,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb b/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb index f99e34f2224..97f3874681b 100755 --- a/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -18,10 +19,12 @@ "| | 08/16/2020 | upadated | 0.12 | GV100, CUDA 10.0 |\n", "| | 08/05/2021 | tested / updated | 21.10 nightly | RTX 3090 CUDA 11.4 |\n", "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", - "| Don Acosta | 08/03/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |" + "| Don Acosta | 08/03/2022 | tested / updated | 22.08 nightly | DGX Tesla V100 CUDA 11.5 |\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -34,6 +37,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -42,6 +46,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -67,6 +72,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -94,6 +100,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -103,6 +110,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -114,6 +122,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -129,6 +138,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -150,6 +160,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -236,6 +247,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -248,8 +260,8 @@ "metadata": {}, "outputs": [], "source": [ - "from cugraph.experimental.datasets import karate\n", - "gdf = karate.get_edgelist(fetch=True)" + "from cugraph.datasets import karate\n", + "gdf = karate.get_edgelist(download=True)" ] }, { @@ -273,6 +285,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -301,6 +314,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -330,6 +344,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -365,6 +380,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -373,6 +389,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -415,6 +432,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -422,6 +440,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -471,6 +490,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -478,6 +498,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -564,6 +585,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/algorithms/link_prediction/Sorensen_coefficient.ipynb b/notebooks/algorithms/link_prediction/Sorensen_coefficient.ipynb index 75cb54dced6..5281f69d5dc 100755 --- a/notebooks/algorithms/link_prediction/Sorensen_coefficient.ipynb +++ b/notebooks/algorithms/link_prediction/Sorensen_coefficient.ipynb @@ -158,7 +158,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Use the cuGraph Datasets api to get the dataframe containing edge data\n" + "### Use the cuGraph Datasets API to get the dataframe containing edge data\n" ] }, { @@ -168,8 +168,8 @@ "outputs": [], "source": [ "# Test file \n", - "from cugraph.experimental.datasets import karate\n", - "gdf = karate.get_edgelist(fetch=True)" + "from cugraph.datasets import karate\n", + "gdf = karate.get_edgelist(download=True)" ] }, { diff --git a/notebooks/algorithms/sampling/RandomWalk.ipynb b/notebooks/algorithms/sampling/RandomWalk.ipynb index 687e55c0b16..011346a93af 100644 --- a/notebooks/algorithms/sampling/RandomWalk.ipynb +++ b/notebooks/algorithms/sampling/RandomWalk.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,11 +15,13 @@ "| Brad Rees | 04/20/2021 | created | 0.19 | GV100, CUDA 11.0\n", "| Ralph Liu | 06/22/2022 | updated/tested | 22.08 | TV100, CUDA 11.5\n", "| Don Acosta | 08/28/2022 | updated/tested | 22.10 | TV100, CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "Currently NetworkX does not have a random walk function. There is code on StackOverflow that generates a random walk by getting a vertex and then randomly selecting a neighbor and then repeating the process. " ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -45,7 +48,7 @@ "import cudf\n", "\n", "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate" + "from cugraph.datasets import karate" ] }, { @@ -54,7 +57,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = karate.get_edgelist(fetch=True)" + "gdf = karate.get_edgelist(download=True)" ] }, { @@ -117,6 +120,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -153,11 +157,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "-----\n", - "Copyright (c) 2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2022-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -191,4 +196,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/algorithms/structure/Renumber-2.ipynb b/notebooks/algorithms/structure/Renumber-2.ipynb index 6a52632b38a..479df53c6be 100755 --- a/notebooks/algorithms/structure/Renumber-2.ipynb +++ b/notebooks/algorithms/structure/Renumber-2.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -20,6 +21,7 @@ "| Brad Rees | 07/08/2020 | updated | 0.15 | GV100, CUDA 11.0\n", "| Ralph Liu | 06/22/2022 | docs & code change | 22.08 | TV100, CUDA 11.5\n", "| Don Acosta | 08/28/2022 | updated/tested | 22.10 | TV100, CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "\n", "## Introduction\n", @@ -42,6 +44,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -50,6 +53,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -77,11 +81,12 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import cyber\n", - "gdf = cyber.get_edgelist(fetch=True)" + "from cugraph.datasets import cyber\n", + "gdf = cyber.get_edgelist(download=True)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -128,6 +133,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -137,6 +143,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -170,6 +177,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -192,6 +200,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -214,11 +223,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -253,4 +263,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/algorithms/structure/Renumber.ipynb b/notebooks/algorithms/structure/Renumber.ipynb index 13b1eeba074..b6cca6591d7 100755 --- a/notebooks/algorithms/structure/Renumber.ipynb +++ b/notebooks/algorithms/structure/Renumber.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -36,6 +37,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -59,6 +61,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -88,6 +91,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -113,6 +117,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -139,6 +144,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -159,6 +165,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -177,6 +184,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -206,6 +214,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -235,6 +244,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -257,6 +267,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -288,6 +299,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -319,11 +331,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -358,4 +371,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/algorithms/structure/Symmetrize.ipynb b/notebooks/algorithms/structure/Symmetrize.ipynb index 0357a2737b1..5d5b270708a 100755 --- a/notebooks/algorithms/structure/Symmetrize.ipynb +++ b/notebooks/algorithms/structure/Symmetrize.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,6 +15,7 @@ "| Brad Rees and James Wyles | 08/13/2019 | created | 0.10 | GV100, CUDA 11.0\n", "| Brad Rees | 06/22/2020 | updated | 0.15 | GV100, CUDA 11.0\n", "| Don Acosta | 08/28/2022 | updated/tested | 22.10 | TV100, CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "\n", "## Introduction\n", @@ -35,6 +37,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -77,10 +80,10 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate\n", + "from cugraph.datasets import karate\n", "\n", "# This is the symmetrized dataset\n", - "test_gdf = karate.get_edgelist(fetch=True)" + "test_gdf = karate.get_edgelist(download=True)" ] }, { @@ -96,6 +99,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -126,6 +130,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -156,11 +161,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "---\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", diff --git a/notebooks/algorithms/traversal/BFS.ipynb b/notebooks/algorithms/traversal/BFS.ipynb index 7c4b87f30c3..4339ce26a70 100755 --- a/notebooks/algorithms/traversal/BFS.ipynb +++ b/notebooks/algorithms/traversal/BFS.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,6 +15,7 @@ "| Brad Rees and James Wyles | 08/13/2019 | created | 0.10 | GV100, CUDA 11.0\n", "| Ralph Liu | 06/22/2020 | updated | 22.08 | GV100, CUDA 11.0\n", "| Don Acosta | 08/28/2022 | updated/tested | 22.10 | TV100, CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "\n", @@ -44,6 +46,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -86,6 +89,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -99,9 +103,9 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate\n", + "from cugraph.datasets import karate\n", "\n", - "gdf = karate.get_edgelist(fetch=True)" + "gdf = karate.get_edgelist(download=True)" ] }, { @@ -115,6 +119,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -172,6 +177,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -200,6 +206,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -248,11 +255,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -277,7 +285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/notebooks/algorithms/traversal/SSSP.ipynb b/notebooks/algorithms/traversal/SSSP.ipynb index 0b5ff3d0932..4889ab399e8 100755 --- a/notebooks/algorithms/traversal/SSSP.ipynb +++ b/notebooks/algorithms/traversal/SSSP.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,6 +15,7 @@ "| Brad Rees and James Wyles | 08/13/2019 | created | 0.10 | GV100, CUDA 11.0\n", "| Ralph Liu | 06/22/2022 | updated | 22.08 | GV100, CUDA 11.0\n", "| Don Acosta | 08/28/2022 | updated/tested | 22.10 | TV100, CUDA 11.5\n", + "| Ralph Liu | 06/29/2023 | updated | 23.08 nightly | DGX Tesla V100 CUDA 12.0\n", "\n", "## Introduction\n", "\n", @@ -41,6 +43,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -68,6 +71,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -81,9 +85,9 @@ "outputs": [], "source": [ "# Import a built-in dataset\n", - "from cugraph.experimental.datasets import karate\n", + "from cugraph.datasets import karate\n", "\n", - "gdf = karate.get_edgelist(fetch=True)" + "gdf = karate.get_edgelist(download=True)" ] }, { @@ -106,6 +110,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -147,6 +152,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -154,11 +160,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py index 3b9c4e007e2..4be27991823 100644 --- a/python/cugraph/cugraph/__init__.py +++ b/python/cugraph/cugraph/__init__.py @@ -120,4 +120,6 @@ from cugraph import exceptions +from cugraph import datasets + __version__ = "23.08.00" diff --git a/python/cugraph/cugraph/centrality/betweenness_centrality.py b/python/cugraph/cugraph/centrality/betweenness_centrality.py index 63af410e06c..80ad2e630bd 100644 --- a/python/cugraph/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/cugraph/centrality/betweenness_centrality.py @@ -120,8 +120,8 @@ def betweenness_centrality( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> bc = cugraph.betweenness_centrality(G) """ @@ -283,8 +283,8 @@ def edge_betweenness_centrality( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> bc = cugraph.edge_betweenness_centrality(G) """ diff --git a/python/cugraph/cugraph/centrality/degree_centrality.py b/python/cugraph/cugraph/centrality/degree_centrality.py index 66946afded2..12d39f4127e 100644 --- a/python/cugraph/cugraph/centrality/degree_centrality.py +++ b/python/cugraph/cugraph/centrality/degree_centrality.py @@ -45,8 +45,8 @@ def degree_centrality(G, normalized=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> dc = cugraph.degree_centrality(G) """ diff --git a/python/cugraph/cugraph/centrality/eigenvector_centrality.py b/python/cugraph/cugraph/centrality/eigenvector_centrality.py index 07cbfefaaf1..6be797096fc 100644 --- a/python/cugraph/cugraph/centrality/eigenvector_centrality.py +++ b/python/cugraph/cugraph/centrality/eigenvector_centrality.py @@ -68,8 +68,8 @@ def eigenvector_centrality(G, max_iter=100, tol=1.0e-6): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> ec = cugraph.eigenvector_centrality(G) """ diff --git a/python/cugraph/cugraph/centrality/katz_centrality.py b/python/cugraph/cugraph/centrality/katz_centrality.py index ffede18b5d2..d902f9b06c9 100644 --- a/python/cugraph/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/cugraph/centrality/katz_centrality.py @@ -105,8 +105,8 @@ def katz_centrality( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> kc = cugraph.katz_centrality(G) """ diff --git a/python/cugraph/cugraph/community/ecg.py b/python/cugraph/cugraph/community/ecg.py index e59f3dcb1b7..fbf8df43867 100644 --- a/python/cugraph/cugraph/community/ecg.py +++ b/python/cugraph/cugraph/community/ecg.py @@ -65,8 +65,8 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16, weight=None): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> parts = cugraph.ecg(G) """ diff --git a/python/cugraph/cugraph/community/egonet.py b/python/cugraph/cugraph/community/egonet.py index 684ae92febd..01bbc41d8cd 100644 --- a/python/cugraph/cugraph/community/egonet.py +++ b/python/cugraph/cugraph/community/egonet.py @@ -86,8 +86,8 @@ def ego_graph(G, n, radius=1, center=True, undirected=None, distance=None): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> ego_graph = cugraph.ego_graph(G, 1, radius=2) """ @@ -190,8 +190,8 @@ def batched_ego_graphs(G, seeds, radius=1, center=True, undirected=None, distanc Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> b_ego_graph, offsets = cugraph.batched_ego_graphs(G, seeds=[1,5], ... radius=2) diff --git a/python/cugraph/cugraph/community/induced_subgraph.py b/python/cugraph/cugraph/community/induced_subgraph.py index d82e9d466a8..29fe2f29c1e 100644 --- a/python/cugraph/cugraph/community/induced_subgraph.py +++ b/python/cugraph/cugraph/community/induced_subgraph.py @@ -91,8 +91,8 @@ def induced_subgraph( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> verts = np.zeros(3, dtype=np.int32) >>> verts[0] = 0 >>> verts[1] = 1 diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.py b/python/cugraph/cugraph/community/ktruss_subgraph.py index 134df98f496..0ebbe633317 100644 --- a/python/cugraph/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/cugraph/community/ktruss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -69,8 +69,8 @@ def k_truss(G, k): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> k_subgraph = cugraph.k_truss(G, 3) """ @@ -150,8 +150,8 @@ def ktruss_subgraph(G, k, use_weights=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ diff --git a/python/cugraph/cugraph/community/leiden.py b/python/cugraph/cugraph/community/leiden.py index 1caa5476623..d2a1a413d7b 100644 --- a/python/cugraph/cugraph/community/leiden.py +++ b/python/cugraph/cugraph/community/leiden.py @@ -94,8 +94,8 @@ def leiden( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> parts, modularity_score = cugraph.leiden(G) """ diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py index a313aa44048..35ca864824f 100644 --- a/python/cugraph/cugraph/community/louvain.py +++ b/python/cugraph/cugraph/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -69,8 +69,8 @@ def louvain(G, max_iter=100, resolution=1.0): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> parts = cugraph.louvain(G) """ diff --git a/python/cugraph/cugraph/community/spectral_clustering.py b/python/cugraph/cugraph/community/spectral_clustering.py index 5116d4c8fdf..864c1005d20 100644 --- a/python/cugraph/cugraph/community/spectral_clustering.py +++ b/python/cugraph/cugraph/community/spectral_clustering.py @@ -81,8 +81,8 @@ def spectralBalancedCutClustering( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.spectralBalancedCutClustering(G, 5) """ @@ -178,8 +178,8 @@ def spectralModularityMaximizationClustering( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.spectralModularityMaximizationClustering(G, 5) """ @@ -254,8 +254,8 @@ def analyzeClustering_modularity( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_modularity(G, 5, df) @@ -336,8 +336,8 @@ def analyzeClustering_edge_cut( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df) @@ -416,8 +416,8 @@ def analyzeClustering_ratio_cut( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df, 'vertex', ... 'cluster') diff --git a/python/cugraph/cugraph/community/subgraph_extraction.py b/python/cugraph/cugraph/community/subgraph_extraction.py index 601b6365e5d..77b28d4daff 100644 --- a/python/cugraph/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/cugraph/community/subgraph_extraction.py @@ -57,8 +57,8 @@ def subgraph( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> verts = np.zeros(3, dtype=np.int32) >>> verts[0] = 0 >>> verts[1] = 1 diff --git a/python/cugraph/cugraph/components/connectivity.py b/python/cugraph/cugraph/components/connectivity.py index e235c6c92d4..45dba37d2ce 100644 --- a/python/cugraph/cugraph/components/connectivity.py +++ b/python/cugraph/cugraph/components/connectivity.py @@ -169,8 +169,8 @@ def weakly_connected_components(G, directed=None, connection=None, return_labels Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.weakly_connected_components(G) """ @@ -278,8 +278,8 @@ def strongly_connected_components( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.strongly_connected_components(G) """ @@ -387,8 +387,8 @@ def connected_components(G, directed=None, connection="weak", return_labels=None Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.connected_components(G, connection="weak") """ diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py index 84153632f58..3e6cbe0d96f 100644 --- a/python/cugraph/cugraph/cores/core_number.py +++ b/python/cugraph/cugraph/cores/core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,8 +55,8 @@ def core_number(G, degree_type="bidirectional"): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.core_number(G) """ diff --git a/python/cugraph/cugraph/cores/k_core.py b/python/cugraph/cugraph/cores/k_core.py index b1cc796a7dd..3dbc1cfa377 100644 --- a/python/cugraph/cugraph/cores/k_core.py +++ b/python/cugraph/cugraph/cores/k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -82,8 +82,8 @@ def k_core(G, k=None, core_number=None, degree_type="bidirectional"): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> KCoreGraph = cugraph.k_core(G) """ diff --git a/python/cugraph/cugraph/datasets/__init__.py b/python/cugraph/cugraph/datasets/__init__.py new file mode 100644 index 00000000000..7ba274c5960 --- /dev/null +++ b/python/cugraph/cugraph/datasets/__init__.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path + +# datasets module +from cugraph.datasets.dataset import ( + Dataset, + download_all, + set_download_dir, + get_download_dir, + default_download_dir, +) +from cugraph.datasets import metadata + +# metadata path for .yaml files +meta_path = Path(__file__).parent / "metadata" + +cyber = Dataset(meta_path / "cyber.yaml") +dolphins = Dataset(meta_path / "dolphins.yaml") +email_Eu_core = Dataset(meta_path / "email_Eu_core.yaml") +karate = Dataset(meta_path / "karate.yaml") +karate_asymmetric = Dataset(meta_path / "karate_asymmetric.yaml") +karate_disjoint = Dataset(meta_path / "karate_disjoint.yaml") +netscience = Dataset(meta_path / "netscience.yaml") +polbooks = Dataset(meta_path / "polbooks.yaml") +small_line = Dataset(meta_path / "small_line.yaml") +small_tree = Dataset(meta_path / "small_tree.yaml") +toy_graph = Dataset(meta_path / "toy_graph.yaml") +toy_graph_undirected = Dataset(meta_path / "toy_graph_undirected.yaml") diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py new file mode 100644 index 00000000000..229d0fda632 --- /dev/null +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -0,0 +1,312 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cudf +import yaml +import os +from pathlib import Path +from cugraph.structure.graph_classes import Graph + + +class DefaultDownloadDir: + """ + Maintains the path to the download directory used by Dataset instances. + Instances of this class are typically shared by several Dataset instances + in order to allow for the download directory to be defined and updated by + a single object. + """ + + def __init__(self): + self._path = Path( + os.environ.get("RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets") + ) + + @property + def path(self): + """ + If `path` is not set, set it to the environment variable + RAPIDS_DATASET_ROOT_DIR. If the variable is not set, default to the + user's home directory. + """ + if self._path is None: + self._path = Path( + os.environ.get( + "RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets" + ) + ) + return self._path + + @path.setter + def path(self, new): + self._path = Path(new) + + def clear(self): + self._path = None + + +default_download_dir = DefaultDownloadDir() + + +class Dataset: + """ + A Dataset Object, used to easily import edgelist data and cuGraph.Graph + instances. + + Parameters + ---------- + meta_data_file_name : yaml file + The metadata file for the specific graph dataset, which includes + information on the name, type, url link, data loading format, graph + properties + """ + + def __init__( + self, + metadata_yaml_file=None, + csv_file=None, + csv_header=None, + csv_delim=" ", + csv_col_names=None, + csv_col_dtypes=None, + ): + self._metadata_file = None + self._dl_path = default_download_dir + self._edgelist = None + self._path = None + + if metadata_yaml_file is not None and csv_file is not None: + raise ValueError("cannot specify both metadata_yaml_file and csv_file") + + elif metadata_yaml_file is not None: + with open(metadata_yaml_file, "r") as file: + self.metadata = yaml.safe_load(file) + self._metadata_file = Path(metadata_yaml_file) + + elif csv_file is not None: + if csv_col_names is None or csv_col_dtypes is None: + raise ValueError( + "csv_col_names and csv_col_dtypes must both be " + "not None when csv_file is specified." + ) + self._path = Path(csv_file) + if self._path.exists() is False: + raise FileNotFoundError(csv_file) + self.metadata = { + "name": self._path.with_suffix("").name, + "file_type": ".csv", + "url": None, + "header": csv_header, + "delim": csv_delim, + "col_names": csv_col_names, + "col_types": csv_col_dtypes, + } + + else: + raise ValueError("must specify either metadata_yaml_file or csv_file") + + def __str__(self): + """ + Use the basename of the meta_data_file the instance was constructed with, + without any extension, as the string repr. + """ + # The metadata file is likely to have a more descriptive file name, so + # use that one first if present. + # FIXME: this may need to provide a more unique or descriptive string repr + if self._metadata_file is not None: + return self._metadata_file.with_suffix("").name + else: + return self.get_path().with_suffix("").name + + def __download_csv(self, url): + """ + Downloads the .csv file from url to the current download path + (self._dl_path), updates self._path with the full path to the + downloaded file, and returns the latest value of self._path. + """ + self._dl_path.path.mkdir(parents=True, exist_ok=True) + + filename = self.metadata["name"] + self.metadata["file_type"] + if self._dl_path.path.is_dir(): + df = cudf.read_csv(url) + self._path = self._dl_path.path / filename + df.to_csv(self._path, index=False) + + else: + raise RuntimeError( + f"The directory {self._dl_path.path.absolute()}" "does not exist" + ) + return self._path + + def unload(self): + + """ + Remove all saved internal objects, forcing them to be re-created when + accessed. + + NOTE: This will cause calls to get_*() to re-read the dataset file from + disk. The caller should ensure the file on disk has not moved/been + deleted/changed. + """ + self._edgelist = None + + def get_edgelist(self, download=False): + """ + Return an Edgelist + + Parameters + ---------- + download : Boolean (default=False) + Automatically download the dataset from the 'url' location within + the YAML file. + """ + if self._edgelist is None: + full_path = self.get_path() + if not full_path.is_file(): + if download: + full_path = self.__download_csv(self.metadata["url"]) + else: + raise RuntimeError( + f"The datafile {full_path} does not" + " exist. Try setting download=True" + " to download the datafile" + ) + header = None + if isinstance(self.metadata["header"], int): + header = self.metadata["header"] + self._edgelist = cudf.read_csv( + full_path, + delimiter=self.metadata["delim"], + names=self.metadata["col_names"], + dtype=self.metadata["col_types"], + header=header, + ) + + return self._edgelist + + def get_graph( + self, + download=False, + create_using=Graph, + ignore_weights=False, + store_transposed=False, + ): + """ + Return a Graph object. + + Parameters + ---------- + download : Boolean (default=False) + Downloads the dataset from the web. + + create_using: cugraph.Graph (instance or class), optional + (default=Graph) + Specify the type of Graph to create. Can pass in an instance to + create a Graph instance with specified 'directed' attribute. + + ignore_weights : Boolean (default=False) + Ignores weights in the dataset if True, resulting in an + unweighted Graph. If False (the default), weights from the + dataset -if present- will be applied to the Graph. If the + dataset does not contain weights, the Graph returned will + be unweighted regardless of ignore_weights. + """ + if self._edgelist is None: + self.get_edgelist(download) + + if create_using is None: + G = Graph() + elif isinstance(create_using, Graph): + # what about BFS if trnaposed is True + attrs = {"directed": create_using.is_directed()} + G = type(create_using)(**attrs) + elif type(create_using) is type: + G = create_using() + else: + raise TypeError( + "create_using must be a cugraph.Graph " + "(or subclass) type or instance, got: " + f"{type(create_using)}" + ) + + if len(self.metadata["col_names"]) > 2 and not (ignore_weights): + G.from_cudf_edgelist( + self._edgelist, + source="src", + destination="dst", + edge_attr="wgt", + store_transposed=store_transposed, + ) + else: + G.from_cudf_edgelist( + self._edgelist, + source="src", + destination="dst", + store_transposed=store_transposed, + ) + return G + + def get_path(self): + """ + Returns the location of the stored dataset file + """ + if self._path is None: + self._path = self._dl_path.path / ( + self.metadata["name"] + self.metadata["file_type"] + ) + + return self._path.absolute() + + +def download_all(force=False): + """ + Looks in `metadata` directory and downloads all datafiles from the the URLs + provided in each YAML file. + + Parameters + force : Boolean (default=False) + Overwrite any existing copies of datafiles. + """ + default_download_dir.path.mkdir(parents=True, exist_ok=True) + + meta_path = Path(__file__).parent.absolute() / "metadata" + for file in meta_path.iterdir(): + meta = None + if file.suffix == ".yaml": + with open(meta_path / file, "r") as metafile: + meta = yaml.safe_load(metafile) + + if "url" in meta: + filename = meta["name"] + meta["file_type"] + save_to = default_download_dir.path / filename + if not save_to.is_file() or force: + df = cudf.read_csv(meta["url"]) + df.to_csv(save_to, index=False) + + +def set_download_dir(path): + """ + Set the download location fors datasets + + Parameters + ---------- + path : String + Location used to store datafiles + """ + if path is None: + default_download_dir.clear() + else: + default_download_dir.path = path + + +def get_download_dir(): + return default_download_dir.path.absolute() diff --git a/python/cugraph/cugraph/datasets/datasets_config.yaml b/python/cugraph/cugraph/datasets/datasets_config.yaml new file mode 100644 index 00000000000..69a79db9cd9 --- /dev/null +++ b/python/cugraph/cugraph/datasets/datasets_config.yaml @@ -0,0 +1,5 @@ +--- +fetch: "False" +force: "False" +# path where datasets will be downloaded to and stored +download_dir: "datasets" diff --git a/python/cugraph/cugraph/datasets/metadata/__init__.py b/python/cugraph/cugraph/datasets/metadata/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cugraph/cugraph/datasets/metadata/cyber.yaml b/python/cugraph/cugraph/datasets/metadata/cyber.yaml new file mode 100644 index 00000000000..09bf26541c7 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/cyber.yaml @@ -0,0 +1,26 @@ +name: cyber +file_type: .csv +description: + IP edge pairs of a cyber data set from the University of New South Wales. +author: Moustafa, Nour, and Jill Slay +refs: + Moustafa, Nour. Designing an online and reliable statistical anomaly detection + framework for dealing with large high-speed network traffic. Diss. University + of New South Wales, Canberra, Australia, 2017. +delim: "," +header: 0 +col_names: + - idx + - srcip + - dstip +col_types: + - int32 + - str + - str +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 2546575 +number_of_nodes: 706529 +url: https://data.rapids.ai/cugraph/datasets/cyber.csv diff --git a/python/cugraph/cugraph/datasets/metadata/dolphins.yaml b/python/cugraph/cugraph/datasets/metadata/dolphins.yaml new file mode 100644 index 00000000000..bc7cb6cd486 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/dolphins.yaml @@ -0,0 +1,31 @@ +name: dolphins +file_type: .csv +description: An undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). +author: + - D. Lusseau + - K. Schneider + - O. J. Boisseau + - P. Haase + - E. Slooten + - S. M. Dawson +refs: + D. Lusseau, K. Schneider, O. J. Boisseau, P. Haase, E. Slooten, and S. M. Dawson, + The bottlenose dolphin community of Doubtful Sound features a large proportion of + long-lasting associations, Behavioral Ecology and Sociobiology 54, 396-405 (2003). +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 159 +number_of_nodes: 62 +url: https://data.rapids.ai/cugraph/datasets/dolphins.csv diff --git a/python/cugraph/cugraph/datasets/metadata/email_Eu_core.yaml b/python/cugraph/cugraph/datasets/metadata/email_Eu_core.yaml new file mode 100644 index 00000000000..444a823788b --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/email_Eu_core.yaml @@ -0,0 +1,29 @@ +name: email-Eu-core +file_type: .csv +description: + The network was generated using anonymized email data from a large European + research institution. There is an edge (u, v) in the network if person u sent + person v at least one email. The e-mails only represent communication between + institution members (the core), and the dataset does not contain incoming messages + from or outgoing messages to the rest of the world. +author: Jure Leskovec +refs: + - Hao Yin, Austin R. Benson, Jure Leskovec, and David F. Gleich. 'Local Higher-order Graph Clustering.' In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017. + - J. Leskovec, J. Kleinberg and C. Faloutsos. Graph Evolution. Densification and Shrinking Diameters. ACM Transactions on Knowledge Discovery from Data (ACM TKDD), 1(1), 2007. +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: true +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 25571 +number_of_nodes: 1005 +url: https://data.rapids.ai/cugraph/datasets/email-Eu-core.csv diff --git a/python/cugraph/cugraph/datasets/metadata/karate.yaml b/python/cugraph/cugraph/datasets/metadata/karate.yaml new file mode 100644 index 00000000000..d10ecb242f5 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate.yaml @@ -0,0 +1,26 @@ +name: karate +file_type: .csv +description: + The graph "karate" contains the network of friendships between the 34 members + of a karate club at a US university, as described by Wayne Zachary in 1977. +author: Zachary W. +refs: + W. W. Zachary, An information flow model for conflict and fission in small groups, + Journal of Anthropological Research 33, 452-473 (1977). +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 156 +number_of_nodes: 34 +url: https://data.rapids.ai/cugraph/datasets/karate.csv diff --git a/python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml b/python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml new file mode 100644 index 00000000000..3b3a1e2478b --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate_asymmetric.yaml @@ -0,0 +1,26 @@ +name: karate-asymmetric +file_type: .csv +description: + This is an undirected, asymmetric variant of the Karate dataset. The original dataset, which + this is based on, was created by Wayne Zachary in 1977. +author: Nvidia +refs: + W. W. Zachary, An information flow model for conflict and fission in small + groups, Journal of Anthropological Research 33, 452-473 (1977). +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 78 +number_of_nodes: 34 +url: https://data.rapids.ai/cugraph/datasets/karate-asymmetric.csv diff --git a/python/cugraph/cugraph/datasets/metadata/karate_disjoint.yaml b/python/cugraph/cugraph/datasets/metadata/karate_disjoint.yaml new file mode 100644 index 00000000000..40cf59b3cfe --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/karate_disjoint.yaml @@ -0,0 +1,26 @@ +name: karate-disjoint +file_type: .csv +description: + This is disjoint variant of the Karate dataset. The original dataset, which + this is based on, was created by Wayne Zachary in 1977. +author: Nvidia +refs: + W. W. Zachary, An information flow model for conflict and fission in small groups, + Journal of Anthropological Research 33, 452-473 (1977). +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 312 +number_of_nodes: 68 +url: https://data.rapids.ai/cugraph/datasets/karate-disjoint.csv diff --git a/python/cugraph/cugraph/datasets/metadata/netscience.yaml b/python/cugraph/cugraph/datasets/metadata/netscience.yaml new file mode 100644 index 00000000000..4233da4bc7d --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/netscience.yaml @@ -0,0 +1,24 @@ +name: netscience +file_type: .csv +description: + The graph netscience contains a coauthorship network of scientists working + on network theory and experiment, as compiled by M. Newman in May 2006. +author: Newman, Mark E.J. +refs: Finding community structure in networks using the eigenvectors of matrices. +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 5484 +number_of_nodes: 1461 +url: https://data.rapids.ai/cugraph/datasets/netscience.csv diff --git a/python/cugraph/cugraph/datasets/metadata/polbooks.yaml b/python/cugraph/cugraph/datasets/metadata/polbooks.yaml new file mode 100644 index 00000000000..fbeb529ef8a --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/polbooks.yaml @@ -0,0 +1,24 @@ +name: polbooks +file_type: .csv +description: + A network of books about U.S. politics published close to the 2004 U.S. presidential election, and sold by Amazon.com. Edges between books represent frequent copurchasing of those books by the same buyers. +author: V. Krebs +refs: + V. Krebs, "The political books network", unpublished, https://doi.org/10.2307/40124305 [@sci-hub] +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 882 +number_of_nodes: 105 +url: https://data.rapids.ai/cugraph/datasets/polbooks.csv diff --git a/python/cugraph/cugraph/datasets/metadata/small_line.yaml b/python/cugraph/cugraph/datasets/metadata/small_line.yaml new file mode 100644 index 00000000000..825e829f16b --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/small_line.yaml @@ -0,0 +1,24 @@ +name: small_line +file_type: .csv +description: + The `small_line` dataset was created by Nvidia for testing and demonstration + purposes, and consists of a small (10 nodes) path/linear graph. +author: Nvidia +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 9 +number_of_nodes: 10 +url: https://data.rapids.ai/cugraph/datasets/small_line.csv diff --git a/python/cugraph/cugraph/datasets/metadata/small_tree.yaml b/python/cugraph/cugraph/datasets/metadata/small_tree.yaml new file mode 100644 index 00000000000..30df37c4d47 --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/small_tree.yaml @@ -0,0 +1,24 @@ +name: small_tree +file_type: .csv +description: + The `small_tree` dataset was created by Nvidia for testing/demonstration + purposes, and consists of a small (9 nodes) directed tree. +author: Nvidia +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 11 +number_of_nodes: 9 +url: https://data.rapids.ai/cugraph/datasets/small_tree.csv diff --git a/python/cugraph/cugraph/datasets/metadata/toy_graph.yaml b/python/cugraph/cugraph/datasets/metadata/toy_graph.yaml new file mode 100644 index 00000000000..afe85c01a4e --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/toy_graph.yaml @@ -0,0 +1,24 @@ +name: toy_graph +file_type: .csv +description: + The `toy_graph` dataset was created by Nvidia for testing and demonstration + purposes, and consists of a small (6 nodes) directed graph. +author: null +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 16 +number_of_nodes: 6 +url: https://data.rapids.ai/cugraph/datasets/toy_graph.csv diff --git a/python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml b/python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml new file mode 100644 index 00000000000..20c1a56df9a --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/toy_graph_undirected.yaml @@ -0,0 +1,24 @@ +name: toy_graph_undirected +file_type: .csv +description: + The `toy_graph_undirected` dataset was created by Nvidia for testing and + demonstration purposes, and consists of a small (6 nodes) undirected graph. +author: Nvidia +refs: null +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - int32 + - int32 + - float32 +has_loop: false +is_directed: false +is_multigraph: false +is_symmetric: true +number_of_edges: 8 +number_of_nodes: 6 +url: https://data.rapids.ai/cugraph/datasets/toy_graph_undirected.csv diff --git a/python/cugraph/cugraph/experimental/__init__.py b/python/cugraph/cugraph/experimental/__init__.py index 2adfb84868e..b96b760e634 100644 --- a/python/cugraph/cugraph/experimental/__init__.py +++ b/python/cugraph/cugraph/experimental/__init__.py @@ -48,8 +48,6 @@ experimental_warning_wrapper(EXPERIMENTAL__find_bicliques) ) -from cugraph.experimental.datasets.dataset import Dataset - from cugraph.experimental.link_prediction.jaccard import ( EXPERIMENTAL__jaccard, EXPERIMENTAL__jaccard_coefficient, diff --git a/python/cugraph/cugraph/experimental/datasets/__init__.py b/python/cugraph/cugraph/experimental/datasets/__init__.py index a1dd45b3d9f..18220243df1 100644 --- a/python/cugraph/cugraph/experimental/datasets/__init__.py +++ b/python/cugraph/cugraph/experimental/datasets/__init__.py @@ -22,9 +22,18 @@ from cugraph.experimental.datasets import metadata from pathlib import Path +from cugraph.utilities.api_tools import promoted_experimental_warning_wrapper + + +Dataset = promoted_experimental_warning_wrapper(Dataset) +load_all = promoted_experimental_warning_wrapper(load_all) +set_download_dir = promoted_experimental_warning_wrapper(set_download_dir) +get_download_dir = promoted_experimental_warning_wrapper(get_download_dir) meta_path = Path(__file__).parent / "metadata" + +# individual dataset objects karate = Dataset(meta_path / "karate.yaml") karate_data = Dataset(meta_path / "karate_data.yaml") karate_undirected = Dataset(meta_path / "karate_undirected.yaml") @@ -41,6 +50,8 @@ email_Eu_core = Dataset(meta_path / "email-Eu-core.yaml") ktruss_polbooks = Dataset(meta_path / "ktruss_polbooks.yaml") + +# batches of datasets DATASETS_UNDIRECTED = [karate, dolphins] DATASETS_UNDIRECTED_WEIGHTS = [netscience] diff --git a/python/cugraph/cugraph/experimental/link_prediction/jaccard.py b/python/cugraph/cugraph/experimental/link_prediction/jaccard.py index 29f2f3ffe16..2eba73b3824 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/experimental/link_prediction/jaccard.py @@ -80,8 +80,8 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): you can get the interesting (non-zero) values that are part of the networkx solution by doing the following: - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> pairs = G.get_two_hop_neighbors() >>> df = cugraph.jaccard(G, pairs) @@ -130,9 +130,9 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): Examples -------- - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> from cugraph.experimental import jaccard as exp_jaccard - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> df = exp_jaccard(G) """ @@ -230,9 +230,9 @@ def EXPERIMENTAL__jaccard_coefficient(G, ebunch=None, use_weight=False): Examples -------- - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> from cugraph.experimental import jaccard_coefficient as exp_jaccard_coefficient - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> df = exp_jaccard_coefficient(G) """ diff --git a/python/cugraph/cugraph/experimental/link_prediction/overlap.py b/python/cugraph/cugraph/experimental/link_prediction/overlap.py index f0c320be26b..0981ced4835 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/overlap.py +++ b/python/cugraph/cugraph/experimental/link_prediction/overlap.py @@ -86,9 +86,9 @@ def EXPERIMENTAL__overlap_coefficient(G, ebunch=None, use_weight=False): Examples -------- - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> from cugraph.experimental import overlap_coefficient as exp_overlap_coefficient - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> df = exp_overlap_coefficient(G) """ vertex_pair = None @@ -164,9 +164,9 @@ def EXPERIMENTAL__overlap(G, vertex_pair=None, use_weight=False): Examples -------- - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> from cugraph.experimental import overlap as exp_overlap - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> df = exp_overlap(G) """ diff --git a/python/cugraph/cugraph/experimental/link_prediction/sorensen.py b/python/cugraph/cugraph/experimental/link_prediction/sorensen.py index c6fdc1ee422..ed27e4813d3 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/experimental/link_prediction/sorensen.py @@ -98,9 +98,9 @@ def EXPERIMENTAL__sorensen(G, vertex_pair=None, use_weight=False): Examples -------- - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> from cugraph.experimental import sorensen as exp_sorensen - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> df = exp_sorensen(G) """ @@ -196,9 +196,9 @@ def EXPERIMENTAL__sorensen_coefficient(G, ebunch=None, use_weight=False): Examples -------- - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> from cugraph.experimental import sorensen_coefficient as exp_sorensen_coef - >>> G = karate.get_graph(fetch=True, ignore_weights=True) + >>> G = karate.get_graph(download=True, ignore_weights=True) >>> df = exp_sorensen_coef(G) """ diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py index 2bd01e5b5c7..90d290cbf0f 100644 --- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py @@ -145,7 +145,7 @@ def add_batches( -------- >>> import cudf >>> from cugraph.experimental.gnn import BulkSampler - >>> from cugraph.experimental.datasets import karate + >>> from cugraph.datasets import karate >>> import tempfile >>> df = cudf.DataFrame({ ... "start_vid": [0, 4, 2, 3, 9, 11], @@ -155,7 +155,7 @@ def add_batches( >>> bulk_sampler = BulkSampler( ... batch_size=3, ... output_path=output_tempdir.name, - ... graph=karate.get_graph(fetch=True)) + ... graph=karate.get_graph(download=True)) >>> bulk_sampler.add_batches( ... df, ... start_col_name="start_vid", diff --git a/python/cugraph/cugraph/layout/force_atlas2.py b/python/cugraph/cugraph/layout/force_atlas2.py index fb000feea89..0e15eee718f 100644 --- a/python/cugraph/cugraph/layout/force_atlas2.py +++ b/python/cugraph/cugraph/layout/force_atlas2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -123,8 +123,8 @@ def on_train_end(self, positions): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> pos = cugraph.force_atlas2(G) """ diff --git a/python/cugraph/cugraph/link_analysis/hits.py b/python/cugraph/cugraph/link_analysis/hits.py index fd3313ef86c..c826efb6095 100644 --- a/python/cugraph/cugraph/link_analysis/hits.py +++ b/python/cugraph/cugraph/link_analysis/hits.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -75,8 +75,8 @@ def hits(G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> hits = cugraph.hits(G, max_iter = 50) """ diff --git a/python/cugraph/cugraph/link_analysis/pagerank.py b/python/cugraph/cugraph/link_analysis/pagerank.py index d2b827fa7c8..3b39ac597ab 100644 --- a/python/cugraph/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/link_analysis/pagerank.py @@ -207,8 +207,8 @@ def pagerank( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, tol = 1.0e-05) """ diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index dd411fa889d..f1b488c8cca 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -58,8 +58,8 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): you can get the interesting (non-zero) values that are part of the networkx solution by doing the following: - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> pairs = G.get_two_hop_neighbors() >>> df = cugraph.jaccard(G, pairs) @@ -107,8 +107,8 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.jaccard(G) """ @@ -187,8 +187,8 @@ def jaccard_coefficient(G, ebunch=None, do_expensive_check=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.jaccard_coefficient(G) """ diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index e05e0c944fe..9bb7b76b0ca 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -97,8 +97,8 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.overlap(G) """ diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 0f35f868b7c..1d43adb51cd 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -79,8 +79,8 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.sorensen(G) """ @@ -160,8 +160,8 @@ def sorensen_coefficient(G, ebunch=None, do_expensive_check=True): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.sorensen_coefficient(G) """ diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index fc6edae8d3e..e3486473fe5 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -78,8 +78,8 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): Examples -------- >>> import random - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> # Create a dataframe containing the vertices with their >>> # corresponding weight >>> weights = cudf.DataFrame() diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index 27fb7d608ca..d7ebc5fc684 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -80,8 +80,8 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): Examples -------- >>> import random - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> # Create a dataframe containing the vertices with their >>> # corresponding weight >>> weights = cudf.DataFrame() diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index c27e4f66a02..8337b4602de 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -76,8 +76,8 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): Examples -------- >>> import random - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> # Create a dataframe containing the vertices with their >>> # corresponding weight >>> weights = cudf.DataFrame() diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py index 247989648f3..bc9b88250af 100644 --- a/python/cugraph/cugraph/sampling/node2vec.py +++ b/python/cugraph/cugraph/sampling/node2vec.py @@ -78,8 +78,8 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0) Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> start_vertices = cudf.Series([0, 2], dtype=np.int32) >>> paths, weights, path_sizes = cugraph.node2vec(G, start_vertices, 3, ... True, 0.8, 0.5) diff --git a/python/cugraph/cugraph/sampling/random_walks.py b/python/cugraph/cugraph/sampling/random_walks.py index a5e2a0371b3..015c05d1b08 100644 --- a/python/cugraph/cugraph/sampling/random_walks.py +++ b/python/cugraph/cugraph/sampling/random_walks.py @@ -114,8 +114,8 @@ def random_walks( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> M = karate.get_edgelist(fetch=True) + >>> from cugraph.datasets import karate + >>> M = karate.get_edgelist(download=True) >>> G = karate.get_graph() >>> start_vertices = G.nodes()[:4] >>> _, _, _ = cugraph.random_walks(G, "uniform", start_vertices, 3) diff --git a/python/cugraph/cugraph/testing/__init__.py b/python/cugraph/cugraph/testing/__init__.py index db1c574de21..db841a9a865 100644 --- a/python/cugraph/cugraph/testing/__init__.py +++ b/python/cugraph/cugraph/testing/__init__.py @@ -11,6 +11,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.testing.utils import ( - RAPIDS_DATASET_ROOT_DIR_PATH, +from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH, RAPIDS_DATASET_ROOT_DIR +from cugraph.datasets import ( + cyber, + dolphins, + karate, + karate_disjoint, + polbooks, + netscience, + small_line, + small_tree, + email_Eu_core, + toy_graph, + toy_graph_undirected, ) + +# +# Moved Dataset Batches +# + +UNDIRECTED_DATASETS = [karate, dolphins] +SMALL_DATASETS = [karate, dolphins, polbooks] +WEIGHTED_DATASETS = [ + dolphins, + karate, + karate_disjoint, + netscience, + polbooks, + small_line, + small_tree, +] +ALL_DATASETS = [ + dolphins, + karate, + karate_disjoint, + polbooks, + netscience, + small_line, + small_tree, + email_Eu_core, + toy_graph, + toy_graph_undirected, +] +DEFAULT_DATASETS = [dolphins, netscience, karate_disjoint] diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py index 3d1bbc5d67d..8ccbbfc9ec5 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_betweenness_centrality_mg.py @@ -17,7 +17,7 @@ import numpy as np from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.experimental.datasets import karate +from cugraph.datasets import karate from test_betweenness_centrality import ( calc_betweenness_centrality, diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py index dedf85a034b..154477a1a67 100644 --- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py @@ -17,8 +17,7 @@ import numpy as np from cugraph.dask.common.mg_utils import is_single_gpu - -from cugraph.experimental.datasets import karate, netscience +from cugraph.datasets import karate, netscience # Get parameters from standard betwenness_centrality_test # As tests directory is not a module, we need to add it to the path diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py index c9e31e804d4..3e4dd3af4fc 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py @@ -14,16 +14,15 @@ import gc import pytest - -import cugraph -from cugraph.testing import utils import random import numpy as np +import networkx as nx + import cudf import cupy - -import networkx as nx -from cugraph.experimental.datasets import DATASETS_SMALL, DATASETS_UNRENUMBERED +import cugraph +from cugraph.datasets import karate_disjoint +from cugraph.testing import utils, SMALL_DATASETS # ============================================================================= @@ -113,7 +112,9 @@ def calc_betweenness_centrality( edge_attr = None G = graph_file.get_graph( - create_using=cugraph.Graph(directed=directed), ignore_weights=not edgevals + download=True, + create_using=cugraph.Graph(directed=directed), + ignore_weights=not edgevals, ) M = G.to_pandas_edgelist().rename( @@ -304,7 +305,7 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON): # Tests # ============================================================================= @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", [False, True]) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -339,7 +340,7 @@ def test_betweenness_centrality( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -384,7 +385,7 @@ def test_betweenness_centrality_k_full( # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNRENUMBERED) +@pytest.mark.parametrize("graph_file", [karate_disjoint]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -423,7 +424,7 @@ def test_betweenness_centrality_fixed_sample( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -464,7 +465,7 @@ def test_betweenness_centrality_weight_except( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index e36e50c91aa..930f80c1bfa 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -11,16 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph.dask as dcg import gc + import pytest -import cugraph + import dask_cudf import cupy import cudf - - -# from cugraph.dask.common.mg_utils import is_single_gpu +import cugraph +import cugraph.dask as dcg from cugraph.testing import utils from pylibcugraph.testing import gen_fixture_params_product diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality.py index b1bf033aff4..921b419c3ae 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality.py @@ -14,13 +14,11 @@ import gc import pytest +import networkx as nx import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED - -import networkx as nx +from cugraph.testing import utils, UNDIRECTED_DATASETS # ============================================================================= @@ -37,7 +35,7 @@ def topKVertices(degree, col, k): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_degree_centrality_nx(graph_file): dataset_path = graph_file.get_path() NM = utils.read_csv_for_nx(dataset_path) @@ -69,7 +67,7 @@ def test_degree_centrality_nx(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_degree_centrality_multi_column(graph_file): dataset_path = graph_file.get_path() cu_M = utils.read_csv_file(dataset_path) diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index ba49a131d91..a46f4b9463b 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -12,14 +12,15 @@ # limitations under the License. import gc + import pytest + import cudf import dask_cudf -from cudf.testing import assert_series_equal - import cugraph from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cudf.testing import assert_series_equal # ============================================================================= # Pytest Setup / Teardown - called for each test function diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py index 12e9dd4c0a5..6b464f057f4 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py @@ -14,25 +14,16 @@ import gc import pytest - -import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_SMALL, DATASETS_UNRENUMBERED import random +import networkx as nx import numpy as np + import cupy import cudf +import cugraph +from cugraph.datasets import karate_disjoint +from cugraph.testing import utils, SMALL_DATASETS -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx # NOTE: Endpoint parameter is not currently being tested, there could be a test # to verify that python raise an error if it is used @@ -322,7 +313,7 @@ def generate_upper_triangle(dataframe): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -352,7 +343,7 @@ def test_edge_betweenness_centrality( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -391,7 +382,7 @@ def test_edge_betweenness_centrality_k_full( # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNRENUMBERED) +@pytest.mark.parametrize("graph_file", [karate_disjoint]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -425,7 +416,7 @@ def test_edge_betweenness_centrality_fixed_sample( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -461,7 +452,7 @@ def test_edge_betweenness_centrality_weight_except( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -494,7 +485,7 @@ def test_edge_betweenness_invalid_dtype( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality.py index 2b348314c20..006cb89b79c 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality.py @@ -14,17 +14,12 @@ import gc import pytest +import networkx as nx import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import ( - toy_graph, - karate, - DATASETS_UNDIRECTED, - DATASETS, -) +from cugraph.testing import utils, UNDIRECTED_DATASETS, DEFAULT_DATASETS +from cugraph.datasets import toy_graph, karate -import networkx as nx # This toy graph is used in multiple tests throughout libcugraph_c and pylib. TOY = toy_graph @@ -46,7 +41,7 @@ def topKVertices(eigen, col, k): def calc_eigenvector(graph_file): dataset_path = graph_file.get_path() G = graph_file.get_graph( - create_using=cugraph.Graph(directed=True), ignore_weights=True + download=True, create_using=cugraph.Graph(directed=True), ignore_weights=True ) k_df = cugraph.eigenvector_centrality(G, max_iter=1000) @@ -62,7 +57,7 @@ def calc_eigenvector(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_eigenvector_centrality(graph_file): eigen_scores = calc_eigenvector(graph_file) @@ -73,7 +68,7 @@ def test_eigenvector_centrality(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_eigenvector_centrality_nx(graph_file): dataset_path = graph_file.get_path() NM = utils.read_csv_for_nx(dataset_path) @@ -141,7 +136,7 @@ def test_eigenvector_centrality_multi_column(graph_file): @pytest.mark.parametrize("graph_file", [TOY]) def test_eigenvector_centrality_toy(graph_file): # This test is based off of libcugraph_c and pylibcugraph tests - G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) + G = graph_file.get_graph(download=True, create_using=cugraph.Graph(directed=True)) tol = 1e-6 max_iter = 200 diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py index f91ac418ef0..6828dd3cbd2 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py @@ -11,13 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -# import numpy as np -import pytest -import cugraph.dask as dcg import gc -import cugraph -import dask_cudf + +import pytest + import cudf +import dask_cudf +import cugraph +import cugraph.dask as dcg from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import DATASETS diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality.py index 5c34866e0d0..3c1371b8eff 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality.py @@ -14,27 +14,17 @@ import gc import pytest +import networkx as nx import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import ( - toy_graph_undirected, - karate, - DATASETS, - DATASETS_UNDIRECTED, +from cugraph.testing import ( + utils, + DEFAULT_DATASETS, + UNDIRECTED_DATASETS, ) +from cugraph.datasets import toy_graph_undirected, karate -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx # This toy graph is used in multiple tests throughout libcugraph_c and pylib. TOY = toy_graph_undirected @@ -75,7 +65,7 @@ def calc_katz(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_katz_centrality(graph_file): katz_scores = calc_katz(graph_file) @@ -86,7 +76,7 @@ def test_katz_centrality(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_katz_centrality_nx(graph_file): dataset_path = graph_file.get_path() NM = utils.read_csv_for_nx(dataset_path) @@ -118,7 +108,7 @@ def test_katz_centrality_nx(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_katz_centrality_multi_column(graph_file): dataset_path = graph_file.get_path() cu_M = utils.read_csv_file(dataset_path) @@ -161,7 +151,7 @@ def test_katz_centrality_multi_column(graph_file): @pytest.mark.parametrize("graph_file", [TOY]) def test_katz_centrality_toy(graph_file): # This test is based off of libcugraph_c and pylibcugraph tests - G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) + G = graph_file.get_graph(create_using=cugraph.Graph(directed=True), download=True) alpha = 0.01 beta = 1.0 tol = 0.000001 diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py index 9a6ee2d2668..1dcbcbae3cd 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py @@ -11,13 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -# import numpy as np -import pytest -import cugraph.dask as dcg import gc -import cugraph -import dask_cudf + +import pytest + import cudf +import dask_cudf +import cugraph +import cugraph.dask as dcg from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py index cebb97923ee..d4b33641c1a 100644 --- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py +++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py @@ -11,16 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import gc + import pytest import cugraph.dask as dcg -import gc -# import pytest -import cugraph -import dask_cudf import cudf - -# from cugraph.dask.common.mg_utils import is_single_gpu +import dask_cudf +import cugraph from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH # ============================================================================= diff --git a/python/cugraph/cugraph/tests/community/test_balanced_cut.py b/python/cugraph/cugraph/tests/community/test_balanced_cut.py index 5beca07dfb7..0a95a1846ce 100644 --- a/python/cugraph/cugraph/tests/community/test_balanced_cut.py +++ b/python/cugraph/cugraph/tests/community/test_balanced_cut.py @@ -12,14 +12,15 @@ # limitations under the License. import gc -import random +import random import pytest import networkx as nx import pandas as pd + import cudf import cugraph -from cugraph.experimental.datasets import DATASETS +from cugraph.testing import DEFAULT_DATASETS def cugraph_call(G, partitions): @@ -57,7 +58,7 @@ def random_call(G, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering(graph_file, partitions): gc.collect() @@ -78,7 +79,7 @@ def test_edge_cut_clustering(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals(graph_file, partitions): gc.collect() @@ -101,7 +102,7 @@ def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [DATASETS[2]]) +@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): gc.collect() diff --git a/python/cugraph/cugraph/tests/community/test_ecg.py b/python/cugraph/cugraph/tests/community/test_ecg.py index 5536d562402..4440973df83 100644 --- a/python/cugraph/cugraph/tests/community/test_ecg.py +++ b/python/cugraph/cugraph/tests/community/test_ecg.py @@ -12,15 +12,14 @@ # limitations under the License. import gc +from pathlib import PurePath import pytest import networkx as nx -import cugraph +import cugraph from cugraph.testing import utils -from cugraph.experimental.datasets import karate, dolphins, netscience - -from pathlib import PurePath +from cugraph.datasets import karate, dolphins, netscience def cugraph_call(G, min_weight, ensemble_size): diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 34cbf73aae6..3a6a6e0d409 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -12,17 +12,17 @@ # limitations under the License. import gc + import pytest -import dask_cudf +import cudf from cudf.testing.testing import assert_frame_equal -from pylibcugraph.testing import gen_fixture_params_product - +import dask_cudf import cugraph import cugraph.dask as dcg from cugraph.testing import utils from cugraph.dask.common.mg_utils import is_single_gpu -import cudf +from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py index 56c719ce7da..b0dcc2ede3d 100644 --- a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py @@ -14,31 +14,20 @@ import gc import pytest +import networkx as nx +import numpy as np import cugraph from cugraph.testing import utils - -import numpy as np +from cugraph.datasets import polbooks, karate_asymmetric from numba import cuda -from cugraph.experimental.datasets import DATASETS_KTRUSS, karate_asymmetric - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx - -print("Networkx version : {} ".format(nx.__version__)) # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() @@ -91,8 +80,7 @@ def test_unsupported_cuda_version(): """ k = 5 - graph_file = DATASETS_KTRUSS[0][0] - G = graph_file.get_graph() + G = polbooks.get_graph(download=True) if __cuda_version == __unsupported_cuda_version: with pytest.raises(NotImplementedError): cugraph.k_truss(G, k) @@ -105,13 +93,11 @@ def test_unsupported_cuda_version(): (__cuda_version == __unsupported_cuda_version), reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.", ) -@pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) -def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): +@pytest.mark.parametrize("_, nx_ground_truth", utils.DATASETS_KTRUSS) +def test_ktruss_subgraph_Graph(_, nx_ground_truth): k = 5 - cu_M = utils.read_csv_file(graph_file) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + G = polbooks.get_graph(download=True, create_using=cugraph.Graph(directed=False)) k_subgraph = cugraph.ktruss_subgraph(G, k) compare_k_truss(k_subgraph, k, nx_ground_truth) @@ -122,11 +108,9 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): (__cuda_version == __unsupported_cuda_version), reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.", ) -@pytest.mark.parametrize("graph_file, nx_ground_truth", DATASETS_KTRUSS) -def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): - +def test_ktruss_subgraph_Graph_nx(): k = 5 - dataset_path = graph_file.get_path() + dataset_path = polbooks.get_path() M = utils.read_csv_for_nx(dataset_path, read_weights_in_sp=True) G = nx.from_pandas_edgelist( M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() @@ -146,7 +130,9 @@ def test_ktruss_subgraph_directed_Graph(): k = 5 edgevals = True G = karate_asymmetric.get_graph( - create_using=cugraph.Graph(directed=True), ignore_weights=not edgevals + download=True, + create_using=cugraph.Graph(directed=True), + ignore_weights=not edgevals, ) with pytest.raises(ValueError): cugraph.k_truss(G, k) diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py index 9cbe0df2532..a06b0dd22c5 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden.py +++ b/python/cugraph/cugraph/tests/community/test_leiden.py @@ -15,12 +15,12 @@ import time import pytest - import networkx as nx + import cugraph import cudf -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, karate_asymmetric +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cugraph.datasets import karate_asymmetric from cudf.testing.testing import assert_series_equal @@ -179,7 +179,7 @@ def cugraph_louvain(G): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_leiden(graph_file): edgevals = True @@ -192,7 +192,7 @@ def test_leiden(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_leiden_nx(graph_file): dataset_path = graph_file.get_path() NM = utils.read_csv_for_nx(dataset_path) diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py index e76696e5769..69fccdae260 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden_mg.py +++ b/python/cugraph/cugraph/tests/community/test_leiden_mg.py @@ -13,13 +13,12 @@ import pytest -import cugraph.dask as dcg -import cugraph import dask_cudf +import cugraph +import cugraph.dask as dcg from cugraph.testing import utils -# from cugraph.dask.common.mg_utils import is_single_gpu try: from rapids_pytest_benchmark import setFixtureParamNames diff --git a/python/cugraph/cugraph/tests/community/test_louvain.py b/python/cugraph/cugraph/tests/community/test_louvain.py index 6c0dcef8c4a..183be071a44 100644 --- a/python/cugraph/cugraph/tests/community/test_louvain.py +++ b/python/cugraph/cugraph/tests/community/test_louvain.py @@ -12,26 +12,17 @@ # limitations under the License. import gc -import time +import time import pytest +import networkx as nx import cugraph import cupyx import cudf -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, karate_asymmetric - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, these import community and import networkx need to be -# relocated in the third-party group once this gets fixed. -import warnings +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cugraph.datasets import karate_asymmetric -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx try: import community @@ -82,7 +73,7 @@ def networkx_call(M): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_louvain(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) @@ -116,7 +107,7 @@ def test_louvain_directed_graph(): @pytest.mark.sg @pytest.mark.parametrize("is_weighted", [True, False]) def test_louvain_csr_graph(is_weighted): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() M = cupyx.scipy.sparse.coo_matrix( diff --git a/python/cugraph/cugraph/tests/community/test_louvain_mg.py b/python/cugraph/cugraph/tests/community/test_louvain_mg.py index a766f804673..5318262fe26 100644 --- a/python/cugraph/cugraph/tests/community/test_louvain_mg.py +++ b/python/cugraph/cugraph/tests/community/test_louvain_mg.py @@ -19,7 +19,6 @@ import dask_cudf from cugraph.testing import utils -# from cugraph.dask.common.mg_utils import is_single_gpu try: from rapids_pytest_benchmark import setFixtureParamNames diff --git a/python/cugraph/cugraph/tests/community/test_modularity.py b/python/cugraph/cugraph/tests/community/test_modularity.py index 07fa2718ee1..ac44b6c89c1 100644 --- a/python/cugraph/cugraph/tests/community/test_modularity.py +++ b/python/cugraph/cugraph/tests/community/test_modularity.py @@ -12,17 +12,15 @@ # limitations under the License. import gc -import random +import random import pytest +import networkx as nx import cudf import cugraph -from cugraph.testing import utils +from cugraph.testing import utils, DEFAULT_DATASETS from cugraph.utilities import ensure_cugraph_obj_for_nx -from cugraph.experimental.datasets import DATASETS - -import networkx as nx def cugraph_call(G, partitions): @@ -55,7 +53,7 @@ def random_call(G, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering(graph_file, partitions): gc.collect() @@ -77,7 +75,7 @@ def test_modularity_clustering(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering_nx(graph_file, partitions): # Read in the graph and get a cugraph object @@ -108,7 +106,7 @@ def test_modularity_clustering_nx(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering_multi_column(graph_file, partitions): # Read in the graph and get a cugraph object diff --git a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py index 5b115be81e0..8abab3179fe 100644 --- a/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py +++ b/python/cugraph/cugraph/tests/community/test_subgraph_extraction.py @@ -19,8 +19,8 @@ import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS, karate +from cugraph.testing import utils, DEFAULT_DATASETS +from cugraph.datasets import karate ############################################################################### @@ -66,7 +66,7 @@ def nx_call(M, verts, directed=True): ############################################################################### @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_subgraph_extraction_DiGraph(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) @@ -80,7 +80,7 @@ def test_subgraph_extraction_DiGraph(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_subgraph_extraction_Graph(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) @@ -94,7 +94,7 @@ def test_subgraph_extraction_Graph(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [DATASETS[2]]) +@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) def test_subgraph_extraction_Graph_nx(graph_file): directed = False verts = np.zeros(3, dtype=np.int32) @@ -123,7 +123,7 @@ def test_subgraph_extraction_Graph_nx(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_subgraph_extraction_multi_column(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count.py b/python/cugraph/cugraph/tests/community/test_triangle_count.py index 3705ffbf8ed..a4d267719ba 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count.py @@ -12,27 +12,16 @@ # limitations under the License. import gc -import random +import random +import networkx as nx import pytest -import cudf -from pylibcugraph.testing.utils import gen_fixture_params_product +import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, karate_asymmetric - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cugraph.datasets import karate_asymmetric +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= @@ -45,7 +34,7 @@ def setup_function(): # ============================================================================= # Pytest fixtures # ============================================================================= -datasets = DATASETS_UNDIRECTED +datasets = UNDIRECTED_DATASETS fixture_params = gen_fixture_params_product( (datasets, "graph_file"), ([True, False], "edgevals"), diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 4127b6ea4bd..2cf0525d2ad 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -11,17 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import random import gc +import random import pytest + import cudf import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product - import cugraph -from cugraph.testing import utils import cugraph.dask as dcg +from cugraph.testing import utils +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/components/test_connectivity.py b/python/cugraph/cugraph/tests/components/test_connectivity.py index 71bec2e15c1..df45e055c5e 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity.py @@ -12,35 +12,29 @@ # limitations under the License. import gc + import time from collections import defaultdict import pytest import cupy as cp import numpy as np +import networkx as nx from cupyx.scipy.sparse import coo_matrix as cp_coo_matrix from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix from cupyx.scipy.sparse import csc_matrix as cp_csc_matrix from scipy.sparse import coo_matrix as sp_coo_matrix from scipy.sparse import csr_matrix as sp_csr_matrix from scipy.sparse import csc_matrix as sp_csc_matrix -from cugraph.experimental.datasets import DATASETS, STRONGDATASETS from cugraph.utilities import is_nx_graph_type import cudf import cugraph -from cugraph.testing import utils +from cugraph.testing import utils, DEFAULT_DATASETS +from cugraph.datasets import dolphins, netscience, email_Eu_core -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +DATASETS_BATCH = [dolphins, netscience, email_Eu_core] print("Networkx version : {} ".format(nx.__version__)) @@ -270,22 +264,22 @@ def assert_scipy_api_compat(G, dataset_path, api_type): # ============================================================================= # Pytest fixtures # ============================================================================= -@pytest.fixture(scope="module", params=DATASETS) +@pytest.fixture(scope="module", params=DEFAULT_DATASETS) def dataset_nxresults_weak(request): return networkx_weak_call(request.param) -@pytest.fixture(scope="module", params=[DATASETS[0]]) +@pytest.fixture(scope="module", params=[DEFAULT_DATASETS[0]]) def single_dataset_nxresults_weak(request): return networkx_weak_call(request.param) -@pytest.fixture(scope="module", params=STRONGDATASETS) +@pytest.fixture(scope="module", params=DATASETS_BATCH) def dataset_nxresults_strong(request): return networkx_strong_call(request.param) -@pytest.fixture(scope="module", params=[STRONGDATASETS[0]]) +@pytest.fixture(scope="module", params=[DATASETS_BATCH[0]]) def single_dataset_nxresults_strong(request): return networkx_strong_call(request.param) @@ -440,9 +434,9 @@ def test_scipy_api_compat_strong(single_dataset_nxresults_strong): @pytest.mark.parametrize("connection_type", ["strong", "weak"]) def test_scipy_api_compat(connection_type): if connection_type == "strong": - graph_file = STRONGDATASETS[0] + graph_file = DATASETS_BATCH[0] else: - graph_file = DATASETS[0] + graph_file = DEFAULT_DATASETS[0] input_cugraph_graph = graph_file.get_graph() diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index 691e85e51cd..e809ab66438 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -11,18 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -import cugraph.dask as dcg import gc -# import pytest -import cugraph -import dask_cudf -import cudf +import pytest -# from cugraph.dask.common.mg_utils import is_single_gpu +import cudf +import dask_cudf +import cugraph +import cugraph.dask as dcg from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH + # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= diff --git a/python/cugraph/cugraph/tests/core/test_core_number.py b/python/cugraph/cugraph/tests/core/test_core_number.py index 6a8fedfe11c..a01b837ff61 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number.py +++ b/python/cugraph/cugraph/tests/core/test_core_number.py @@ -14,13 +14,12 @@ import gc import pytest -import cudf -from pylibcugraph.testing.utils import gen_fixture_params_product import networkx as nx +import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED +from cugraph.testing import utils, UNDIRECTED_DATASETS +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= @@ -33,11 +32,10 @@ def setup_function(): # ============================================================================= # Pytest fixtures # ============================================================================= -datasets = DATASETS_UNDIRECTED degree_type = ["incoming", "outgoing"] fixture_params = gen_fixture_params_product( - (datasets, "graph_file"), + (UNDIRECTED_DATASETS, "graph_file"), (degree_type, "degree_type"), ) diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index cff2ae11ef3..23214b5f51b 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -14,12 +14,12 @@ import gc import pytest -import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product +import dask_cudf import cugraph -from cugraph.testing import utils import cugraph.dask as dcg +from cugraph.testing import utils +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/core/test_k_core.py b/python/cugraph/cugraph/tests/core/test_k_core.py index 1ea3bdd8a0a..f0169238ece 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core.py +++ b/python/cugraph/cugraph/tests/core/test_k_core.py @@ -14,21 +14,10 @@ import gc import pytest +import networkx as nx import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cugraph.testing import utils, UNDIRECTED_DATASETS print("Networkx version : {} ".format(nx.__version__)) @@ -73,7 +62,7 @@ def compare_edges(cg, nxg): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_k_core_Graph(graph_file): cu_kcore, nx_kcore = calc_k_cores(graph_file, False) @@ -82,7 +71,7 @@ def test_k_core_Graph(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_k_core_Graph_nx(graph_file): dataset_path = graph_file.get_path() NM = utils.read_csv_for_nx(dataset_path) @@ -94,7 +83,7 @@ def test_k_core_Graph_nx(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_k_core_corenumber_multicolumn(graph_file): dataset_path = graph_file.get_path() cu_M = utils.read_csv_file(dataset_path) @@ -133,7 +122,7 @@ def test_k_core_corenumber_multicolumn(graph_file): @pytest.mark.sg def test_k_core_invalid_input(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] G = karate.get_graph(create_using=cugraph.Graph(directed=True)) with pytest.raises(ValueError): cugraph.k_core(G) diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index d8e7ef98d24..c68108ce241 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -14,14 +14,14 @@ import gc import pytest -import dask_cudf -from cudf.testing.testing import assert_frame_equal -from pylibcugraph.testing import gen_fixture_params_product +import dask_cudf import cugraph -from cugraph.testing import utils import cugraph.dask as dcg +from cugraph.testing import utils +from cudf.testing.testing import assert_frame_equal from cugraph.structure.symmetrize import symmetrize_df +from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py index 2b7a5a2b1c9..2d1537d11e3 100644 --- a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py +++ b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py @@ -11,10 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # Import FeatureStore class -from cugraph.gnn import FeatureStore + +import pytest import numpy as np + import cudf -import pytest +from cugraph.gnn import FeatureStore @pytest.mark.sg diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index c5c382df2eb..a33d4f753db 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -17,11 +17,16 @@ import pytest import pandas as pd import numpy as np + import cudf import cupy as cp +import cugraph +from cugraph.generators import rmat +from cugraph.datasets import cyber from cudf.testing import assert_frame_equal, assert_series_equal from pylibcugraph.testing.utils import gen_fixture_params_product + # If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" # fixture will be available automatically. Check that this fixture is available # by trying to import rapids_pytest_benchmark, and if that fails, set @@ -44,10 +49,6 @@ SettingWithCopyWarning as pandas_SettingWithCopyWarning, ) -import cugraph -from cugraph.generators import rmat -from cugraph.experimental.datasets import cyber - def type_is_categorical(pG): return ( diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py b/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py index 8bc2da37e89..dd48fc72e36 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph_mg.py @@ -10,22 +10,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import gc -import dask_cudf import pytest import pandas as pd +import numpy as np + import cudf +import cugraph +import dask_cudf import cupy as cp -import numpy as np -from cudf.testing import assert_frame_equal, assert_series_equal +import cugraph.dask as dcg from cupy.testing import assert_array_equal +from cudf.testing import assert_frame_equal, assert_series_equal from pylibcugraph.testing.utils import gen_fixture_params_product from cugraph.dask.common.mg_utils import is_single_gpu - -import cugraph.dask as dcg -from cugraph.experimental.datasets import cyber -from cugraph.experimental.datasets import netscience +from cugraph.datasets import cyber, netscience # If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" # fixture will be available automatically. Check that this fixture is available @@ -39,8 +40,6 @@ gpubenchmark = pytest_benchmark.plugin.benchmark -import cugraph - def type_is_categorical(pG): return ( diff --git a/python/cugraph/cugraph/tests/generators/test_rmat.py b/python/cugraph/cugraph/tests/generators/test_rmat.py index 876e9727b37..9b8353a4ca5 100644 --- a/python/cugraph/cugraph/tests/generators/test_rmat.py +++ b/python/cugraph/cugraph/tests/generators/test_rmat.py @@ -15,9 +15,8 @@ import pytest import cudf - -from cugraph.generators import rmat import cugraph +from cugraph.generators import rmat from cupy.sparse import coo_matrix, triu, tril import numpy as np import cupy as cp diff --git a/python/cugraph/cugraph/tests/generators/test_rmat_mg.py b/python/cugraph/cugraph/tests/generators/test_rmat_mg.py index d5d6db4d70f..0e1808d2f80 100644 --- a/python/cugraph/cugraph/tests/generators/test_rmat_mg.py +++ b/python/cugraph/cugraph/tests/generators/test_rmat_mg.py @@ -15,7 +15,8 @@ import pytest import dask_cudf - +import cugraph +from cugraph.generators import rmat from cugraph.testing.mg_utils import ( start_dask_client, stop_dask_client, @@ -23,8 +24,6 @@ from cugraph.dask.common.mg_utils import ( is_single_gpu, ) -from cugraph.generators import rmat -import cugraph ############################################################################## diff --git a/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler.py b/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler.py index 8d94aa6137d..eeb9d06b162 100644 --- a/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler.py +++ b/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler.py @@ -11,12 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cudf +import pytest import pandas as pd import numpy as np -import cupy as cp -import pytest +import cudf +import cupy as cp from cugraph.gnn.dgl_extensions.dgl_uniform_sampler import DGLUniformSampler diff --git a/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler_mg.py b/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler_mg.py index 45a64a1f4ca..bc36e5f0631 100644 --- a/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler_mg.py +++ b/python/cugraph/cugraph/tests/gnn/test_dgl_uniform_sampler_mg.py @@ -10,13 +10,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import dask_cudf -import cudf + +import pytest import pandas as pd import numpy as np -import cupy as cp -import pytest +import dask_cudf +import cudf +import cupy as cp from cugraph.gnn.dgl_extensions.dgl_uniform_sampler import DGLUniformSampler diff --git a/python/cugraph/cugraph/tests/internals/test_renumber.py b/python/cugraph/cugraph/tests/internals/test_renumber.py index 4526770ec2a..cd27dfecfe9 100644 --- a/python/cugraph/cugraph/tests/internals/test_renumber.py +++ b/python/cugraph/cugraph/tests/internals/test_renumber.py @@ -15,14 +15,13 @@ import gc -import pandas as pd import pytest +import pandas as pd + import cudf from cudf.testing import assert_series_equal - from cugraph.structure.number_map import NumberMap -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS +from cugraph.testing import utils, DEFAULT_DATASETS @pytest.mark.sg @@ -108,7 +107,7 @@ def test_renumber_negative_col(): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_renumber_files_col(graph_file): gc.collect() dataset_path = graph_file.get_path() @@ -150,7 +149,7 @@ def test_renumber_files_col(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_renumber_files_multi_col(graph_file): gc.collect() dataset_path = graph_file.get_path() diff --git a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py index cc7ee0368a5..c0abc61b050 100644 --- a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py @@ -18,17 +18,17 @@ import pandas import numpy as np -import dask_cudf + import dask import cudf -from cudf.testing import assert_frame_equal, assert_series_equal - +import dask_cudf import cugraph.dask as dcg import cugraph from cugraph.testing import utils from cugraph.structure.number_map import NumberMap from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cudf.testing import assert_frame_equal, assert_series_equal # ============================================================================= diff --git a/python/cugraph/cugraph/tests/internals/test_symmetrize.py b/python/cugraph/cugraph/tests/internals/test_symmetrize.py index 8d772abe822..654ed3296d9 100644 --- a/python/cugraph/cugraph/tests/internals/test_symmetrize.py +++ b/python/cugraph/cugraph/tests/internals/test_symmetrize.py @@ -14,11 +14,11 @@ import gc import pytest - import pandas as pd + import cudf import cugraph -from cugraph.experimental.datasets import DATASETS +from cugraph.testing import DEFAULT_DATASETS @pytest.mark.sg @@ -155,7 +155,7 @@ def compare(src1, dst1, val1, src2, dst2, val2): @pytest.mark.sg @pytest.mark.skip("debugging") -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_symmetrize_unweighted(graph_file): gc.collect() @@ -178,7 +178,7 @@ def test_symmetrize_unweighted(graph_file): @pytest.mark.sg @pytest.mark.skip("debugging") -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_symmetrize_weighted(graph_file): gc.collect() cu_M = graph_file.get_edgelist() diff --git a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py index d7b59e4f73a..05cc06e6282 100644 --- a/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_symmetrize_mg.py @@ -15,11 +15,11 @@ import pytest import pandas as pd -import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product +import dask_cudf import cugraph from cugraph.testing import utils +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py index 12d0a4e3aa6..495a2d945c0 100644 --- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py +++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py @@ -13,19 +13,13 @@ import time import pytest +import scipy.io +from sklearn.manifold import trustworthiness import cudf import cugraph from cugraph.internals import GraphBasedDimRedCallback -from sklearn.manifold import trustworthiness -import scipy.io -from cugraph.experimental.datasets import karate, polbooks, dolphins, netscience - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, these import fa2 and import networkx need to be -# relocated in the third-party group once this gets fixed. +from cugraph.datasets import karate, polbooks, dolphins, netscience def cugraph_call( diff --git a/python/cugraph/cugraph/tests/link_analysis/test_hits.py b/python/cugraph/cugraph/tests/link_analysis/test_hits.py index 16d89f20e80..1c5a135e944 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_hits.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_hits.py @@ -16,12 +16,12 @@ import pytest import networkx as nx import pandas as pd -import cudf -from pylibcugraph.testing.utils import gen_fixture_params_product +import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core, karate +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cugraph.datasets import email_Eu_core, karate +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= @@ -34,7 +34,7 @@ def setup_function(): # ============================================================================= # Pytest fixtures # ============================================================================= -datasets = DATASETS_UNDIRECTED + [email_Eu_core] +datasets = UNDIRECTED_DATASETS + [email_Eu_core] fixture_params = gen_fixture_params_product( (datasets, "graph_file"), ([50], "max_iter"), diff --git a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py index 9885d47b24a..bfb33ccd619 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py @@ -12,16 +12,14 @@ # limitations under the License. import gc + import pytest import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product - import cugraph import cugraph.dask as dcg - -# from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing import utils +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py index b7487ae329c..8e8ab13574d 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py @@ -13,26 +13,15 @@ import gc import time -import numpy as np import pytest +import numpy as np +import networkx as nx import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS, karate - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cugraph.testing import utils, DEFAULT_DATASETS +from cugraph.datasets import karate print("Networkx version : {} ".format(nx.__version__)) @@ -158,7 +147,7 @@ def setup_function(): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @pytest.mark.parametrize("alpha", ALPHA) @@ -224,7 +213,7 @@ def test_pagerank( @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @pytest.mark.parametrize("alpha", ALPHA) @@ -269,7 +258,7 @@ def test_pagerank_nx(graph_file, max_iter, tol, alpha, personalization_perc, has @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @pytest.mark.parametrize("alpha", ALPHA) diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py index 14a512c59e5..d68aeda4a2f 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py @@ -10,23 +10,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import pytest -import cugraph.dask as dcg + import gc + +import pytest +import numpy as np + +import cudf import cugraph +import cugraph.dask as dcg import dask_cudf from cugraph.testing import utils -import cudf - from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH # The function selects personalization_perc% of accessible vertices in graph M # and randomly assigns them personalization values - - def personalize(vertices, personalization_perc): personalization = None if personalization_perc != 0: diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py index 43077126827..cd883fb88f2 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py @@ -12,27 +12,17 @@ # limitations under the License. import gc + import pytest +import networkx as nx import cudf -from cudf.testing import assert_series_equal, assert_frame_equal - import cugraph -from cugraph.testing import utils -from cugraph.experimental import jaccard_coefficient as exp_jaccard_coefficient +from cugraph.datasets import netscience +from cugraph.testing import utils, UNDIRECTED_DATASETS from cugraph.experimental import jaccard as exp_jaccard -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, netscience - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cudf.testing import assert_series_equal, assert_frame_equal +from cugraph.experimental import jaccard_coefficient as exp_jaccard_coefficient print("Networkx version : {} ".format(nx.__version__)) @@ -140,7 +130,7 @@ def networkx_call(M, benchmark_callable=None): # ============================================================================= # Pytest Fixtures # ============================================================================= -@pytest.fixture(scope="module", params=DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", params=UNDIRECTED_DATASETS) def read_csv(request): """ Read csv file for both networkx and cugraph @@ -318,7 +308,7 @@ def test_jaccard_multi_column(read_csv): @pytest.mark.sg def test_weighted_exp_jaccard(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] G = karate.get_graph() with pytest.raises(ValueError): exp_jaccard(G) @@ -331,7 +321,7 @@ def test_weighted_exp_jaccard(): @pytest.mark.sg def test_invalid_datasets_jaccard(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() df = df.add(1) G = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py index 35f17d99184..1f7c0a9cadb 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py @@ -15,11 +15,12 @@ import random import pytest + import dask_cudf -from pylibcugraph.testing import gen_fixture_params_product -import cugraph.dask as dcg import cugraph +import cugraph.dask as dcg from cugraph.testing import utils +from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py index 03bee451f3c..586d534cd42 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py @@ -12,18 +12,16 @@ # limitations under the License. import gc + import pytest import numpy as np import scipy import cudf -from cudf.testing import assert_series_equal, assert_frame_equal - -from cugraph.experimental import overlap as exp_overlap - import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cugraph.experimental import overlap as exp_overlap +from cudf.testing import assert_series_equal, assert_frame_equal # ============================================================================= @@ -113,7 +111,7 @@ def cpu_call(M, first, second): # ============================================================================= # Pytest Fixtures # ============================================================================= -@pytest.fixture(scope="module", params=DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", params=UNDIRECTED_DATASETS) def read_csv(request): """ Read csv file for both networkx and cugraph @@ -171,7 +169,7 @@ def test_overlap_edge_vals(gpubenchmark, read_csv, extract_two_hop): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_overlap_multi_column(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) @@ -216,7 +214,7 @@ def test_overlap_multi_column(graph_file): @pytest.mark.sg def test_weighted_exp_overlap(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] G = karate.get_graph() with pytest.raises(ValueError): exp_overlap(G) @@ -229,7 +227,7 @@ def test_weighted_exp_overlap(): @pytest.mark.sg def test_invalid_datasets_overlap(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() df = df.add(1) G = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py index 541e3123e78..220b90cbb47 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py @@ -15,12 +15,12 @@ import random import pytest -import dask_cudf -from pylibcugraph.testing import gen_fixture_params_product import cugraph +import dask_cudf import cugraph.dask as dcg from cugraph.testing import utils +from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py index 14d84784161..3da33a3e853 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py @@ -12,26 +12,16 @@ # limitations under the License. import gc + import pytest +import networkx as nx import cudf -from cudf.testing import assert_series_equal, assert_frame_equal - import cugraph -from cugraph.testing import utils +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cugraph.datasets import netscience from cugraph.experimental import sorensen as exp_sorensen -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, netscience - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cudf.testing import assert_series_equal, assert_frame_equal print("Networkx version : {} ".format(nx.__version__)) @@ -145,7 +135,7 @@ def networkx_call(M, benchmark_callable=None): # ============================================================================= # Pytest Fixtures # ============================================================================= -@pytest.fixture(scope="module", params=DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", params=UNDIRECTED_DATASETS) def read_csv(request): """ Read csv file for both networkx and cugraph @@ -280,7 +270,7 @@ def test_sorensen_multi_column(read_csv): @pytest.mark.sg def test_weighted_exp_sorensen(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] G = karate.get_graph() with pytest.raises(ValueError): exp_sorensen(G) @@ -293,7 +283,7 @@ def test_weighted_exp_sorensen(): @pytest.mark.sg def test_invalid_datasets_sorensen(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() df = df.add(1) G = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py index 7c84fce989b..d9d013c7e35 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py @@ -12,16 +12,16 @@ # limitations under the License. import gc + import random import pytest +import cugraph import dask_cudf -from pylibcugraph.testing import gen_fixture_params_product -from cugraph.dask.common.mg_utils import is_single_gpu - import cugraph.dask as dcg -import cugraph from cugraph.testing import utils +from cugraph.dask.common.mg_utils import is_single_gpu +from pylibcugraph.testing import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py index 2bc39b877ea..36a21df46b8 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py @@ -13,27 +13,15 @@ import gc -import numpy as np import pytest +import numpy as np +import networkx as nx import cudf -from cudf.testing import assert_series_equal - import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings +from cugraph.testing import utils, UNDIRECTED_DATASETS +from cudf.testing import assert_series_equal -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx print("Networkx version : {} ".format(nx.__version__)) @@ -97,7 +85,7 @@ def networkx_call(M, benchmark_callable=None): # ============================================================================= # Pytest Fixtures # ============================================================================= -@pytest.fixture(scope="module", params=DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", params=UNDIRECTED_DATASETS) def read_csv(request): """ Read csv file for both networkx and cugraph @@ -180,7 +168,7 @@ def test_wjaccard_multi_column(read_csv): @pytest.mark.sg def test_invalid_datasets_jaccard_w(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() df = df.add(1) G = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py index 5e35bb66f07..1dffb9fca41 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py @@ -16,12 +16,11 @@ import pytest import scipy import numpy as np -import cudf -from cudf.testing import assert_series_equal -from cugraph.experimental.datasets import DATASETS_UNDIRECTED +import cudf import cugraph -from cugraph.testing import utils +from cudf.testing import assert_series_equal +from cugraph.testing import utils, UNDIRECTED_DATASETS # ============================================================================= @@ -94,7 +93,7 @@ def cpu_call(M, first, second): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_woverlap(gpubenchmark, graph_file): dataset_path = graph_file.get_path() Mnx = utils.read_csv_for_nx(dataset_path) @@ -122,7 +121,7 @@ def test_woverlap(gpubenchmark, graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_woverlap_multi_column(graph_file): dataset_path = graph_file.get_path() M = utils.read_csv_for_nx(dataset_path) @@ -163,7 +162,7 @@ def test_woverlap_multi_column(graph_file): @pytest.mark.sg def test_invalid_datasets_overlap_w(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() df = df.add(1) G = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py index cca2363d2d6..8d09b3e25b3 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py @@ -13,27 +13,15 @@ import gc -import numpy as np import pytest +import numpy as np +import networkx as nx import cudf -from cudf.testing import assert_series_equal - import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings +from cudf.testing import assert_series_equal +from cugraph.testing import utils, UNDIRECTED_DATASETS -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx print("Networkx version : {} ".format(nx.__version__)) @@ -101,7 +89,7 @@ def networkx_call(M, benchmark_callable=None): # ============================================================================= # Pytest Fixtures # ============================================================================= -@pytest.fixture(scope="module", params=DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", params=UNDIRECTED_DATASETS) def read_csv(request): """ Read csv file for both networkx and cugraph @@ -184,7 +172,7 @@ def test_wsorensen_multi_column(read_csv): @pytest.mark.sg def test_invalid_datasets_sorensen_w(): - karate = DATASETS_UNDIRECTED[0] + karate = UNDIRECTED_DATASETS[0] df = karate.get_edgelist() df = df.add(1) G = cugraph.Graph(directed=False) diff --git a/python/cugraph/cugraph/tests/nx/test_compat_algo.py b/python/cugraph/cugraph/tests/nx/test_compat_algo.py index 2a074d588eb..4b9160da416 100644 --- a/python/cugraph/cugraph/tests/nx/test_compat_algo.py +++ b/python/cugraph/cugraph/tests/nx/test_compat_algo.py @@ -11,8 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph.experimental.compat.nx as nx import pytest +import cugraph.experimental.compat.nx as nx @pytest.mark.sg diff --git a/python/cugraph/cugraph/tests/nx/test_compat_pr.py b/python/cugraph/cugraph/tests/nx/test_compat_pr.py index a8dc6f7bb22..9be3912a33f 100644 --- a/python/cugraph/cugraph/tests/nx/test_compat_pr.py +++ b/python/cugraph/cugraph/tests/nx/test_compat_pr.py @@ -22,10 +22,10 @@ import pytest import numpy as np -from pylibcugraph.testing.utils import gen_fixture_params_product from cugraph.testing import utils from cugraph.experimental.datasets import karate +from pylibcugraph.testing.utils import gen_fixture_params_product MAX_ITERATIONS = [100, 200] diff --git a/python/cugraph/cugraph/tests/nx/test_nx_convert.py b/python/cugraph/cugraph/tests/nx/test_nx_convert.py index ee14bfe361c..58b89a4bda9 100644 --- a/python/cugraph/cugraph/tests/nx/test_nx_convert.py +++ b/python/cugraph/cugraph/tests/nx/test_nx_convert.py @@ -11,25 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd import pytest -import cudf +import pandas as pd +import networkx as nx +import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cugraph.testing import utils, DEFAULT_DATASETS def _compare_graphs(nxG, cuG, has_wt=True): @@ -70,7 +58,7 @@ def _compare_graphs(nxG, cuG, has_wt=True): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs # Read in the graph @@ -97,7 +85,7 @@ def test_networkx_compatibility(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_nx_convert_undirected(graph_file): # read data and create a Nx Graph dataset_path = graph_file.get_path() @@ -114,7 +102,7 @@ def test_nx_convert_undirected(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_nx_convert_directed(graph_file): # read data and create a Nx DiGraph dataset_path = graph_file.get_path() @@ -130,7 +118,7 @@ def test_nx_convert_directed(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_nx_convert_weighted(graph_file): # read data and create a Nx DiGraph dataset_path = graph_file.get_path() @@ -147,7 +135,7 @@ def test_nx_convert_weighted(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_nx_convert_multicol(graph_file): # read data and create a Nx Graph dataset_path = graph_file.get_path() diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index 553cd5cf788..cbd8321a338 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -12,10 +12,11 @@ # limitations under the License. import pytest + import cudf import cupy import cugraph -from cugraph.experimental.datasets import karate +from cugraph.datasets import karate from cugraph.experimental.gnn import BulkSampler from cugraph.utilities.utils import create_directory_with_overwrite diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py index ffbba74f229..0b4b9fa73de 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py @@ -11,11 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -import cudf import os import shutil +import pytest + +import cudf from cugraph.gnn.data_loading.bulk_sampler_io import write_samples from cugraph.utilities.utils import create_directory_with_overwrite diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py index 7c96c899ce1..4115eca1c1f 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py @@ -11,14 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import shutil + import pytest import cudf import dask_cudf - -import os -import shutil - from cugraph.gnn.data_loading.bulk_sampler_io import write_samples from cugraph.utilities.utils import create_directory_with_overwrite diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py index e20b6883209..b7cd4b0822b 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py @@ -11,19 +11,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import shutil +import re + import pytest + import cudf -import dask_cudf import cupy import cugraph -from cugraph.experimental.datasets import karate +import dask_cudf +from cugraph.datasets import karate from cugraph.experimental import BulkSampler from cugraph.utilities.utils import create_directory_with_overwrite -import os -import shutil -import re - @pytest.mark.mg def test_bulk_sampler_simple(dask_client, scratch_dir): diff --git a/python/cugraph/cugraph/tests/sampling/test_egonet.py b/python/cugraph/cugraph/tests/sampling/test_egonet.py index 2af31438a13..1ae7fcc0c88 100644 --- a/python/cugraph/cugraph/tests/sampling/test_egonet.py +++ b/python/cugraph/cugraph/tests/sampling/test_egonet.py @@ -12,23 +12,14 @@ # limitations under the License. import gc + import pytest +import networkx as nx import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings +from cugraph.testing import utils, DEFAULT_DATASETS -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx print("Networkx version : {} ".format(nx.__version__)) @@ -37,7 +28,7 @@ @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) def test_ego_graph_nx(graph_file, seed, radius): @@ -58,7 +49,7 @@ def test_ego_graph_nx(graph_file, seed, radius): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) def test_batched_ego_graphs(graph_file, seeds, radius): @@ -83,7 +74,7 @@ def test_batched_ego_graphs(graph_file, seeds, radius): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) def test_multi_column_ego_graph(graph_file, seed, radius): diff --git a/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py b/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py index 172296c07f9..7f5891abdd3 100644 --- a/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py @@ -12,16 +12,16 @@ # limitations under the License. import gc -import pytest -import dask_cudf -from cudf.testing.testing import assert_frame_equal, assert_series_equal -from pylibcugraph.testing import gen_fixture_params_product +import pytest import cugraph +import dask_cudf import cugraph.dask as dcg from cugraph.testing import utils from cugraph.dask.common.mg_utils import is_single_gpu +from pylibcugraph.testing import gen_fixture_params_product +from cudf.testing.testing import assert_frame_equal, assert_series_equal # ============================================================================= diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py index 60b937cc1b2..0bfdd460cae 100644 --- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py +++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py @@ -13,12 +13,13 @@ import gc import random + import pytest -from cugraph.testing import utils -import cugraph import cudf -from cugraph.experimental.datasets import small_line, karate, DATASETS_SMALL +import cugraph +from cugraph.datasets import small_line, karate +from cugraph.testing import utils, SMALL_DATASETS # ============================================================================= @@ -75,7 +76,7 @@ def calc_node2vec(G, start_vertices, max_depth, compress_result, p=1.0, q=1.0): @pytest.mark.sg @pytest.mark.parametrize(*_get_param_args("graph_file", [KARATE])) def test_node2vec_invalid(graph_file): - G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) + G = graph_file.get_graph(download=True, create_using=cugraph.Graph(directed=True)) k = random.randint(1, 10) start_vertices = cudf.Series( random.sample(range(G.number_of_vertices()), k), dtype="int32" @@ -135,7 +136,9 @@ def test_node2vec_invalid(graph_file): @pytest.mark.parametrize(*_get_param_args("graph_file", [LINE])) @pytest.mark.parametrize(*_get_param_args("directed", DIRECTED_GRAPH_OPTIONS)) def test_node2vec_line(graph_file, directed): - G = graph_file.get_graph(create_using=cugraph.Graph(directed=directed)) + G = graph_file.get_graph( + download=True, create_using=cugraph.Graph(directed=directed) + ) max_depth = 3 start_vertices = cudf.Series([0, 3, 6], dtype="int32") df, seeds = calc_node2vec( @@ -144,7 +147,7 @@ def test_node2vec_line(graph_file, directed): @pytest.mark.sg -@pytest.mark.parametrize(*_get_param_args("graph_file", DATASETS_SMALL)) +@pytest.mark.parametrize(*_get_param_args("graph_file", SMALL_DATASETS)) @pytest.mark.parametrize(*_get_param_args("directed", DIRECTED_GRAPH_OPTIONS)) @pytest.mark.parametrize(*_get_param_args("compress", COMPRESSED)) def test_node2vec( diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_random_walks.py index 508f927c296..48629fa03a6 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks.py @@ -15,21 +15,22 @@ import random import pytest -from cudf.testing import assert_series_equal +import networkx as nx -import cugraph import cudf -import networkx as nx +import cugraph +from cudf.testing import assert_series_equal from cugraph.utilities import ensure_cugraph_obj_for_nx -from cugraph.experimental.datasets import DATASETS, DATASETS_SMALL +from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS + # ============================================================================= # Parameters # ============================================================================= DIRECTED_GRAPH_OPTIONS = [False, True] WEIGHTED_GRAPH_OPTIONS = [False, True] -DATASETS = [pytest.param(d) for d in DATASETS] -DATASETS_SMALL = [pytest.param(d) for d in DATASETS_SMALL] +DATASETS = [pytest.param(d) for d in DEFAULT_DATASETS] +SMALL_DATASETS = [pytest.param(d) for d in SMALL_DATASETS] # ============================================================================= @@ -207,7 +208,7 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("max_depth", [None]) def test_random_walks_invalid_max_dept(graph_file, directed, max_depth): @@ -219,7 +220,7 @@ def test_random_walks_invalid_max_dept(graph_file, directed, max_depth): @pytest.mark.sg @pytest.mark.cugraph_ops -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks_coalesced(graph_file, directed): max_depth = random.randint(2, 10) @@ -243,7 +244,7 @@ def test_random_walks_coalesced(graph_file, directed): @pytest.mark.sg @pytest.mark.cugraph_ops -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks_padded_0(graph_file, directed): max_depth = random.randint(2, 10) @@ -291,7 +292,7 @@ def test_random_walks_padded_1(): @pytest.mark.sg @pytest.mark.cugraph_ops -@pytest.mark.parametrize("graph_file", DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) def test_random_walks_nx(graph_file): G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py index df1db0a95a9..a8aa34710ec 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py @@ -15,14 +15,13 @@ import random import pytest -import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product import cugraph - -# from cugraph.dask.common.mg_utils import is_single_gpu +import dask_cudf import cugraph.dask as dcg -from cugraph.experimental.datasets import DATASETS_SMALL, karate_asymmetric +from cugraph.testing import SMALL_DATASETS +from cugraph.datasets import karate_asymmetric +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= @@ -41,7 +40,7 @@ def setup_function(): # Pytest fixtures # ============================================================================= -datasets = DATASETS_SMALL + [karate_asymmetric] +datasets = SMALL_DATASETS + [karate_asymmetric] fixture_params = gen_fixture_params_product( (datasets, "graph_file"), diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 49fce5dbe61..62599291d04 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -14,12 +14,13 @@ import random import pytest -import cudf -from pylibcugraph.testing.utils import gen_fixture_params_product +import cudf import cugraph from cugraph import uniform_neighbor_sample -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core, small_tree +from cugraph.testing import UNDIRECTED_DATASETS +from cugraph.datasets import email_Eu_core, small_tree +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= @@ -34,7 +35,7 @@ def setup_function(): # ============================================================================= IS_DIRECTED = [True, False] -datasets = DATASETS_UNDIRECTED + [email_Eu_core] +datasets = UNDIRECTED_DATASETS + [email_Eu_core] fixture_params = gen_fixture_params_product( (datasets, "graph_file"), diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index f1003a8a75b..9d87c097287 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -10,22 +10,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import gc import random import os import pytest + import cupy import cudf +import cugraph import dask_cudf -from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.dask.common.mg_utils import is_single_gpu - import cugraph.dask as dcg -import cugraph - +from cugraph.testing import UNDIRECTED_DATASETS from cugraph.dask import uniform_neighbor_sample -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core, small_tree +from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.datasets import email_Eu_core, small_tree +from pylibcugraph.testing.utils import gen_fixture_params_product # If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" # fixture will be available automatically. Check that this fixture is available @@ -53,7 +54,7 @@ def setup_function(): # ============================================================================= IS_DIRECTED = [True, False] -datasets = DATASETS_UNDIRECTED + [email_Eu_core] +datasets = UNDIRECTED_DATASETS + [email_Eu_core] fixture_params = gen_fixture_params_product( (datasets, "graph_file"), diff --git a/python/cugraph/cugraph/tests/structure/test_convert_matrix.py b/python/cugraph/cugraph/tests/structure/test_convert_matrix.py index cc044d774af..2158cae3ab2 100644 --- a/python/cugraph/cugraph/tests/structure/test_convert_matrix.py +++ b/python/cugraph/cugraph/tests/structure/test_convert_matrix.py @@ -12,21 +12,13 @@ # limitations under the License. import gc + import pytest -import cugraph -from cugraph.testing import utils import numpy as np +import networkx as nx -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +import cugraph +from cugraph.testing import utils # ============================================================================= diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index 1b883b91e92..a80c47662e2 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -12,43 +12,29 @@ # limitations under the License. import gc - import time -import pandas as pd import pytest - +import pandas as pd import scipy +import networkx as nx + +import cupy import cudf -from cudf.testing.testing import assert_frame_equal import cugraph from cugraph.testing import utils from cudf.testing import assert_series_equal - -import cupy +from cudf.testing.testing import assert_frame_equal # MG +import dask_cudf import cugraph.dask as dcg -from cugraph.dask.common.mg_utils import is_single_gpu -from dask_cuda import LocalCUDACluster from dask.distributed import Client -import dask_cudf - -from pylibcugraph import bfs as pylibcugraph_bfs +from dask_cuda import LocalCUDACluster from pylibcugraph import ResourceHandle - +from pylibcugraph import bfs as pylibcugraph_bfs from cugraph.dask.traversal.bfs import convert_to_cudf - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +from cugraph.dask.common.mg_utils import is_single_gpu # ============================================================================= diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index ebaae38a8a4..707b195dfa8 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -12,23 +12,24 @@ # limitations under the License. import gc import random -import copy + import pytest +import copy + import cupy -from dask.distributed import wait import cudf -import dask_cudf -from pylibcugraph import bfs as pylibcugraph_bfs -from pylibcugraph import ResourceHandle -from pylibcugraph.testing.utils import gen_fixture_params_product -from cudf.testing.testing import assert_frame_equal - import cugraph +import dask_cudf import cugraph.dask as dcg +import cugraph.dask.comms.comms as Comms from cugraph.testing import utils +from dask.distributed import wait +from pylibcugraph import ResourceHandle +from pylibcugraph import bfs as pylibcugraph_bfs +from cudf.testing.testing import assert_frame_equal from cugraph.dask.traversal.bfs import convert_to_cudf -import cugraph.dask.comms.comms as Comms from cugraph.dask.common.input_utils import get_distributed_data +from pylibcugraph.testing.utils import gen_fixture_params_product # ============================================================================= diff --git a/python/cugraph/cugraph/tests/structure/test_multigraph.py b/python/cugraph/cugraph/tests/structure/test_multigraph.py index e317a935cfc..af78c238d4e 100644 --- a/python/cugraph/cugraph/tests/structure/test_multigraph.py +++ b/python/cugraph/cugraph/tests/structure/test_multigraph.py @@ -14,12 +14,11 @@ import gc import pytest -import networkx as nx import numpy as np +import networkx as nx import cugraph -from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS +from cugraph.testing import utils, DEFAULT_DATASETS # ============================================================================= @@ -30,7 +29,7 @@ def setup_function(): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_multigraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available G = graph_file.get_graph(create_using=cugraph.MultiGraph(directed=True)) @@ -61,7 +60,7 @@ def test_multigraph(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_Graph_from_MultiGraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available GM = graph_file.get_graph(create_using=cugraph.MultiGraph()) @@ -92,7 +91,7 @@ def test_Graph_from_MultiGraph(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) def test_multigraph_sssp(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available G = graph_file.get_graph(create_using=cugraph.MultiGraph(directed=True)) diff --git a/python/cugraph/cugraph/tests/traversal/test_bfs.py b/python/cugraph/cugraph/tests/traversal/test_bfs.py index 7446b32ee5d..89b00e66baa 100644 --- a/python/cugraph/cugraph/tests/traversal/test_bfs.py +++ b/python/cugraph/cugraph/tests/traversal/test_bfs.py @@ -15,33 +15,22 @@ import random import pytest -import pandas as pd import cupy as cp import numpy as np -from cupyx.scipy.sparse import coo_matrix as cp_coo_matrix -from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix -from cupyx.scipy.sparse import csc_matrix as cp_csc_matrix +import pandas as pd +import networkx as nx +import networkx.algorithms.centrality.betweenness as nxacb from scipy.sparse import coo_matrix as sp_coo_matrix from scipy.sparse import csr_matrix as sp_csr_matrix from scipy.sparse import csc_matrix as sp_csc_matrix -import cudf -from pylibcugraph.testing.utils import gen_fixture_params_product +import cudf import cugraph -from cugraph.testing import utils -from cugraph.experimental import datasets - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx - import networkx.algorithms.centrality.betweenness as nxacb +from cupyx.scipy.sparse import coo_matrix as cp_coo_matrix +from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix +from cupyx.scipy.sparse import csc_matrix as cp_csc_matrix +from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.testing import utils, DEFAULT_DATASETS, SMALL_DATASETS # ============================================================================= @@ -291,8 +280,8 @@ def get_cu_graph_nx_results_and_params(seed, depth_limit, G, dataset, directed, # ============================================================================= SEEDS = [pytest.param(s) for s in SUBSET_SEED_OPTIONS] DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] -DATASETS = [pytest.param(d) for d in datasets.DATASETS] -DATASETS_SMALL = [pytest.param(d) for d in datasets.DATASETS_SMALL] +DATASETS = [pytest.param(d) for d in DEFAULT_DATASETS] +SMALL_DATASETS = [pytest.param(d) for d in SMALL_DATASETS] DEPTH_LIMIT = [pytest.param(d) for d in DEPTH_LIMITS] # Call gen_fixture_params_product() to caluculate the cartesian product of @@ -309,7 +298,7 @@ def get_cu_graph_nx_results_and_params(seed, depth_limit, G, dataset, directed, ) small_graph_fixture_params = gen_fixture_params_product( - (DATASETS_SMALL, "ds"), (DIRECTED, "dirctd") + (SMALL_DATASETS, "ds"), (DIRECTED, "dirctd") ) # The single param list variants are used when only 1 param combination is @@ -320,7 +309,7 @@ def get_cu_graph_nx_results_and_params(seed, depth_limit, G, dataset, directed, ) single_small_graph_fixture_params = gen_fixture_params_product( - ([DATASETS_SMALL[0]], "ds"), (DIRECTED, "dirctd") + ([SMALL_DATASETS[0]], "ds"), (DIRECTED, "dirctd") ) @@ -449,7 +438,7 @@ def test_bfs_invalid_start( @pytest.mark.sg def test_scipy_api_compat(): - graph_file = datasets.DATASETS[0] + graph_file = DEFAULT_DATASETS[0] dataset_path = graph_file.get_path() input_cugraph_graph = graph_file.get_graph(ignore_weights=True) diff --git a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py index 4b5c270034f..38b5a2734d6 100644 --- a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py +++ b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py @@ -11,16 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -import cugraph.dask as dcg import gc -# import pytest +import pytest + +import cudf import cugraph import dask_cudf -import cudf - -# from cugraph.dask.common.mg_utils import is_single_gpu +import cugraph.dask as dcg from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH # ============================================================================= diff --git a/python/cugraph/cugraph/tests/traversal/test_filter_unreachable.py b/python/cugraph/cugraph/tests/traversal/test_filter_unreachable.py index 495e1a4ec11..c9a44eea1e6 100644 --- a/python/cugraph/cugraph/tests/traversal/test_filter_unreachable.py +++ b/python/cugraph/cugraph/tests/traversal/test_filter_unreachable.py @@ -13,18 +13,13 @@ import gc import time + import pytest import numpy as np +import networkx as nx import cugraph -from cugraph.experimental.datasets import DATASETS - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings +from cugraph.testing import DEFAULT_DATASETS # ============================================================================= @@ -34,18 +29,13 @@ def setup_function(): gc.collect() -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx - - print("Networkx version : {} ".format(nx.__version__)) SOURCES = [1] @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_filter_unreachable(graph_file, source): G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) diff --git a/python/cugraph/cugraph/tests/traversal/test_paths.py b/python/cugraph/cugraph/tests/traversal/test_paths.py index 8938ae74553..8a751ba8840 100644 --- a/python/cugraph/cugraph/tests/traversal/test_paths.py +++ b/python/cugraph/cugraph/tests/traversal/test_paths.py @@ -15,13 +15,13 @@ from tempfile import NamedTemporaryFile import math -import cudf -from cupyx.scipy.sparse import coo_matrix as cupy_coo_matrix -import cupy -import networkx as nx import pytest +import networkx as nx +import cudf +import cupy import cugraph +from cupyx.scipy.sparse import coo_matrix as cupy_coo_matrix CONNECTED_GRAPH = """1,5,3 diff --git a/python/cugraph/cugraph/tests/traversal/test_sssp.py b/python/cugraph/cugraph/tests/traversal/test_sssp.py index 1c99123f866..0d2646b29be 100644 --- a/python/cugraph/cugraph/tests/traversal/test_sssp.py +++ b/python/cugraph/cugraph/tests/traversal/test_sssp.py @@ -14,36 +14,24 @@ import gc import time -import numpy as np import pytest +import numpy as np import pandas as pd -import cupy as cp +import networkx as nx + +import cudf import cupyx +import cugraph +import cupy as cp from cupyx.scipy.sparse import coo_matrix as cp_coo_matrix from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix from cupyx.scipy.sparse import csc_matrix as cp_csc_matrix from scipy.sparse import coo_matrix as sp_coo_matrix from scipy.sparse import csr_matrix as sp_csr_matrix from scipy.sparse import csc_matrix as sp_csc_matrix -import cudf from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.experimental.datasets import DATASETS_UNDIRECTED - -import cugraph -from cugraph.testing import utils -from cugraph.experimental import datasets - - -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings +from cugraph.testing import utils, UNDIRECTED_DATASETS, SMALL_DATASETS -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx print("Networkx version : {} ".format(nx.__version__)) @@ -186,7 +174,7 @@ def networkx_call(graph_file, source, edgevals=True): # FIXME: tests with datasets like 'netscience' which has a weight column different # than than 1's fail because it looks like netwokX doesn't consider weights during # the computation. -DATASETS = [pytest.param(d) for d in datasets.DATASETS_SMALL] +DATASETS = [pytest.param(d) for d in SMALL_DATASETS] SOURCES = [pytest.param(1)] fixture_params = gen_fixture_params_product((DATASETS, "ds"), (SOURCES, "src")) fixture_params_single_dataset = gen_fixture_params_product( @@ -396,7 +384,7 @@ def test_sssp_networkx_edge_attr(): @pytest.mark.sg def test_scipy_api_compat(): - graph_file = datasets.DATASETS[0] + graph_file = SMALL_DATASETS[0] dataset_path = graph_file.get_path() input_cugraph_graph = graph_file.get_graph() input_coo_matrix = utils.create_obj_from_csv( @@ -465,7 +453,7 @@ def test_scipy_api_compat(): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", UNDIRECTED_DATASETS) def test_sssp_csr_graph(graph_file): df = graph_file.get_edgelist() @@ -507,7 +495,7 @@ def test_sssp_csr_graph(graph_file): @pytest.mark.sg def test_sssp_unweighted_graph(): - karate = DATASETS_UNDIRECTED[0] + karate = SMALL_DATASETS[0] G = karate.get_graph(ignore_weights=True) error_msg = ( diff --git a/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py b/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py index 867f125ea6f..1720a051ee7 100644 --- a/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py +++ b/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py @@ -11,18 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -import cugraph.dask as dcg import gc -# import pytest +import pytest + +import cudf import cugraph import dask_cudf -import cudf - -# from cugraph.dask.common.mg_utils import is_single_gpu +import cugraph.dask as dcg from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH + # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= diff --git a/python/cugraph/cugraph/tests/tree/test_maximum_spanning_tree.py b/python/cugraph/cugraph/tests/tree/test_maximum_spanning_tree.py index 8a3852595fc..824239dfea7 100644 --- a/python/cugraph/cugraph/tests/tree/test_maximum_spanning_tree.py +++ b/python/cugraph/cugraph/tests/tree/test_maximum_spanning_tree.py @@ -14,33 +14,26 @@ import time import gc +import rmm import pytest import numpy as np -import rmm -import cudf +import networkx as nx +import cudf import cugraph from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED_WEIGHTS +from cugraph.datasets import netscience -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx - print("Networkx version : {} ".format(nx.__version__)) +UNDIRECTED_WEIGHTED_DATASET = [netscience] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() @@ -55,7 +48,7 @@ def _get_param_args(param_name, param_values): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED_WEIGHTS) +@pytest.mark.parametrize("graph_file", UNDIRECTED_WEIGHTED_DATASET) def test_maximum_spanning_tree_nx(graph_file): # cugraph G = graph_file.get_graph() @@ -86,7 +79,7 @@ def test_maximum_spanning_tree_nx(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED_WEIGHTS) +@pytest.mark.parametrize("graph_file", UNDIRECTED_WEIGHTED_DATASET) @pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False])) def test_maximum_spanning_tree_graph_repr_compat(graph_file, use_adjlist): G = graph_file.get_graph() diff --git a/python/cugraph/cugraph/tests/tree/test_minimum_spanning_tree.py b/python/cugraph/cugraph/tests/tree/test_minimum_spanning_tree.py index 6f36864f552..cef963af445 100644 --- a/python/cugraph/cugraph/tests/tree/test_minimum_spanning_tree.py +++ b/python/cugraph/cugraph/tests/tree/test_minimum_spanning_tree.py @@ -14,32 +14,26 @@ import time import gc -import pytest -import numpy as np import rmm import cudf +import pytest +import numpy as np +import networkx as nx import cugraph from cugraph.testing import utils -from cugraph.experimental.datasets import DATASETS_UNDIRECTED_WEIGHTS +from cugraph.datasets import netscience -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx print("Networkx version : {} ".format(nx.__version__)) +UNDIRECTED_WEIGHTED_DATASET = [netscience] # ============================================================================= # Pytest Setup / Teardown - called for each test function # ============================================================================= + + def setup_function(): gc.collect() @@ -54,7 +48,7 @@ def _get_param_args(param_name, param_values): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED_WEIGHTS) +@pytest.mark.parametrize("graph_file", UNDIRECTED_WEIGHTED_DATASET) def test_minimum_spanning_tree_nx(graph_file): # cugraph G = graph_file.get_graph() @@ -83,7 +77,7 @@ def test_minimum_spanning_tree_nx(graph_file): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", DATASETS_UNDIRECTED_WEIGHTS) +@pytest.mark.parametrize("graph_file", UNDIRECTED_WEIGHTED_DATASET) @pytest.mark.parametrize(*_get_param_args("use_adjlist", [True, False])) def test_minimum_spanning_tree_graph_repr_compat(graph_file, use_adjlist): G = graph_file.get_graph() diff --git a/python/cugraph/cugraph/tests/utils/mg_context.py b/python/cugraph/cugraph/tests/utils/mg_context.py index 25ad4ee5e15..af147c7633d 100644 --- a/python/cugraph/cugraph/tests/utils/mg_context.py +++ b/python/cugraph/cugraph/tests/utils/mg_context.py @@ -16,11 +16,10 @@ import pytest +import cugraph.dask.comms as Comms from dask.distributed import Client - -from cugraph.dask.common.mg_utils import get_visible_devices from dask_cuda import LocalCUDACluster as CUDACluster -import cugraph.dask.comms as Comms +from cugraph.dask.common.mg_utils import get_visible_devices # Maximal number of verifications of the number of workers diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index e72de2ecf8a..643d0468d46 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -11,22 +11,24 @@ # See the License for the specific language governing permissions and # limitations under the License. - import os +import gc +import sys +import warnings from pathlib import Path from tempfile import TemporaryDirectory -import gc import pytest +import cudf from cugraph.structure import Graph -from cugraph.testing import RAPIDS_DATASET_ROOT_DIR_PATH -from cugraph.experimental.datasets import ( +from cugraph.testing import ( + RAPIDS_DATASET_ROOT_DIR_PATH, ALL_DATASETS, - ALL_DATASETS_WGT, + WEIGHTED_DATASETS, SMALL_DATASETS, ) -from cugraph.experimental import datasets +from cugraph import datasets # Add the sg marker to all tests in this module. pytestmark = pytest.mark.sg @@ -72,6 +74,51 @@ def setup(tmpdir): gc.collect() +@pytest.fixture() +def setup_deprecation_warning_tests(): + """ + Fixture used to set warning filters to 'default' and reload + experimental.datasets module if it has been previously + imported. Tests that import this fixture are expected to + import cugraph.experimental.datasets + """ + warnings.filterwarnings("default") + + if "cugraph.experimental.datasets" in sys.modules: + del sys.modules["cugraph.experimental.datasets"] + + yield + + +############################################################################### +# Helpers + +# check if there is a row where src == dst +def has_loop(df): + df.rename(columns={df.columns[0]: "src", df.columns[1]: "dst"}, inplace=True) + res = df.where(df["src"] == df["dst"]) + + return res.notnull().values.any() + + +# check if dataset object is symmetric +def is_symmetric(dataset): + # undirected graphs are symmetric + if not dataset.metadata["is_directed"]: + return True + else: + df = dataset.get_edgelist(download=True) + df_a = df.sort_values("src") + df_b = df_a[["dst", "src", "wgt"]] + df_b.rename(columns={"dst": "src", "src": "dst"}, inplace=True) + # created a df by appending the two + res = cudf.concat([df_a, df_b]) + # sort/unique + res = res.drop_duplicates().sort_values("src") + + return len(df_a) == len(res) + + ############################################################################### # Tests @@ -103,8 +150,8 @@ def test_set_download_dir(): @pytest.mark.parametrize("dataset", ALL_DATASETS) -def test_fetch(dataset): - E = dataset.get_edgelist(fetch=True) +def test_download(dataset): + E = dataset.get_edgelist(download=True) assert E is not None assert dataset.get_path().is_file() @@ -112,13 +159,13 @@ def test_fetch(dataset): @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_get_edgelist(dataset): - E = dataset.get_edgelist(fetch=True) + E = dataset.get_edgelist(download=True) assert E is not None @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_get_graph(dataset): - G = dataset.get_graph(fetch=True) + G = dataset.get_graph(download=True) assert G is not None @@ -133,32 +180,32 @@ def test_metadata(dataset): def test_get_path(dataset): tmpd = TemporaryDirectory() datasets.set_download_dir(tmpd.name) - dataset.get_edgelist(fetch=True) + dataset.get_edgelist(download=True) assert dataset.get_path().is_file() tmpd.cleanup() -@pytest.mark.parametrize("dataset", ALL_DATASETS_WGT) +@pytest.mark.parametrize("dataset", WEIGHTED_DATASETS) def test_weights(dataset): - G = dataset.get_graph(fetch=True) + G = dataset.get_graph(download=True) assert G.is_weighted() - G = dataset.get_graph(fetch=True, ignore_weights=True) + G = dataset.get_graph(download=True, ignore_weights=True) assert not G.is_weighted() @pytest.mark.parametrize("dataset", SMALL_DATASETS) def test_create_using(dataset): - G = dataset.get_graph(fetch=True) + G = dataset.get_graph(download=True) assert not G.is_directed() - G = dataset.get_graph(fetch=True, create_using=Graph) + G = dataset.get_graph(download=True, create_using=Graph) assert not G.is_directed() - G = dataset.get_graph(fetch=True, create_using=Graph(directed=True)) + G = dataset.get_graph(download=True, create_using=Graph(directed=True)) assert G.is_directed() def test_ctor_with_datafile(): - from cugraph.experimental.datasets import karate + from cugraph.datasets import karate karate_csv = RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv" @@ -185,9 +232,9 @@ def test_ctor_with_datafile(): csv_col_dtypes=["int32", "int32", "float32"], ) - expected_karate_edgelist = karate.get_edgelist(fetch=True) + expected_karate_edgelist = karate.get_edgelist(download=True) - # test with file path as string, ensure fetch=True does not break + # test with file path as string, ensure download=True does not break ds = datasets.Dataset( csv_file=karate_csv.as_posix(), csv_col_names=["src", "dst", "wgt"], @@ -239,3 +286,72 @@ def test_unload(): assert ds._edgelist is not None ds.unload() assert ds._edgelist is None + + +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_node_and_edge_count(dataset): + dataset_is_directed = dataset.metadata["is_directed"] + G = dataset.get_graph( + download=True, create_using=Graph(directed=dataset_is_directed) + ) + + assert G.number_of_nodes() == dataset.metadata["number_of_nodes"] + assert G.number_of_edges() == dataset.metadata["number_of_edges"] + + +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_is_directed(dataset): + dataset_is_directed = dataset.metadata["is_directed"] + G = dataset.get_graph( + download=True, create_using=Graph(directed=dataset_is_directed) + ) + + assert G.is_directed() == dataset.metadata["is_directed"] + + +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_has_loop(dataset): + df = dataset.get_edgelist(download=True) + + assert has_loop(df) == dataset.metadata["has_loop"] + + +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_is_symmetric(dataset): + assert is_symmetric(dataset) == dataset.metadata["is_symmetric"] + + +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_is_multigraph(dataset): + G = dataset.get_graph(download=True) + + assert G.is_multigraph() == dataset.metadata["is_multigraph"] + + +# +# Test experimental for DeprecationWarnings +# +def test_experimental_dataset_import(setup_deprecation_warning_tests): + with pytest.deprecated_call(): + from cugraph.experimental.datasets import karate + + # unload() is called to pass flake8 + karate.unload() + + +def test_experimental_method_warnings(setup_deprecation_warning_tests): + from cugraph.experimental.datasets import ( + load_all, + set_download_dir, + get_download_dir, + ) + + warnings.filterwarnings("default") + tmpd = TemporaryDirectory() + + with pytest.deprecated_call(): + set_download_dir(tmpd.name) + get_download_dir() + load_all() + + tmpd.cleanup() diff --git a/python/cugraph/cugraph/tests/utils/test_replication_mg.py b/python/cugraph/cugraph/tests/utils/test_replication_mg.py index 95e7c1c7dbd..2f9c0d0189b 100644 --- a/python/cugraph/cugraph/tests/utils/test_replication_mg.py +++ b/python/cugraph/cugraph/tests/utils/test_replication_mg.py @@ -11,16 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import gc +import pytest -import cudf -from cudf.testing import assert_series_equal, assert_frame_equal +import cudf import cugraph +import cugraph.testing.utils as utils import cugraph.dask.structure.replication as replication from cugraph.dask.common.mg_utils import is_single_gpu -import cugraph.testing.utils as utils +from cudf.testing import assert_series_equal, assert_frame_equal + DATASETS_OPTIONS = utils.DATASETS_SMALL DIRECTED_GRAPH_OPTIONS = [False, True] diff --git a/python/cugraph/cugraph/tests/utils/test_utils.py b/python/cugraph/cugraph/tests/utils/test_utils.py index c6956cfbfcf..9d35b326f6d 100644 --- a/python/cugraph/cugraph/tests/utils/test_utils.py +++ b/python/cugraph/cugraph/tests/utils/test_utils.py @@ -14,12 +14,12 @@ import gc import pytest +import numpy as np -import cugraph import cudf +import cugraph from cugraph.testing import utils -from cugraph.experimental.datasets import karate -import numpy as np +from cugraph.datasets import karate @pytest.mark.sg diff --git a/python/cugraph/cugraph/tests/utils/test_utils_mg.py b/python/cugraph/cugraph/tests/utils/test_utils_mg.py index 68e76302616..23ff17aa00b 100644 --- a/python/cugraph/cugraph/tests/utils/test_utils_mg.py +++ b/python/cugraph/cugraph/tests/utils/test_utils_mg.py @@ -11,21 +11,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph.dask as dcg -from dask.distributed import default_client, futures_of, wait import gc -import cugraph -import dask_cudf -import pytest -from cugraph.dask.common.part_utils import concat_within_workers -from cugraph.dask.common.read_utils import get_n_workers -from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH - import os import time + +import pytest import numpy as np + +import cugraph +import dask_cudf +import cugraph.dask as dcg from cugraph.testing import utils +from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.read_utils import get_n_workers +from dask.distributed import default_client, futures_of, wait +from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.dask.common.part_utils import concat_within_workers # ============================================================================= diff --git a/python/cugraph/cugraph/traversal/bfs.py b/python/cugraph/cugraph/traversal/bfs.py index f2c1f5c5662..cad96947f8b 100644 --- a/python/cugraph/cugraph/traversal/bfs.py +++ b/python/cugraph/cugraph/traversal/bfs.py @@ -197,8 +197,8 @@ def bfs( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.bfs(G, 0) """ @@ -315,8 +315,8 @@ def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> df = cugraph.bfs_edges(G, 0) """ diff --git a/python/cugraph/cugraph/traversal/sssp.py b/python/cugraph/cugraph/traversal/sssp.py index c2705b70383..5ab97e60390 100644 --- a/python/cugraph/cugraph/traversal/sssp.py +++ b/python/cugraph/cugraph/traversal/sssp.py @@ -198,8 +198,8 @@ def sssp( Examples -------- - >>> from cugraph.experimental.datasets import karate - >>> G = karate.get_graph(fetch=True) + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) >>> distances = cugraph.sssp(G, 0) >>> distances distance vertex predecessor diff --git a/python/cugraph/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/cugraph/tree/minimum_spanning_tree.py index 821e5b38fec..b297042f199 100644 --- a/python/cugraph/cugraph/tree/minimum_spanning_tree.py +++ b/python/cugraph/cugraph/tree/minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -91,8 +91,8 @@ def minimum_spanning_tree(G, weight=None, algorithm="boruvka", ignore_nan=False) Examples -------- - >>> from cugraph.experimental.datasets import netscience - >>> G = netscience.get_graph(fetch=True) + >>> from cugraph.datasets import netscience + >>> G = netscience.get_graph(download=True) >>> G_mst = cugraph.minimum_spanning_tree(G) """ @@ -134,8 +134,8 @@ def maximum_spanning_tree(G, weight=None, algorithm="boruvka", ignore_nan=False) Examples -------- - >>> from cugraph.experimental.datasets import netscience - >>> G = netscience.get_graph(fetch=True) + >>> from cugraph.datasets import netscience + >>> G = netscience.get_graph(download=True) >>> G_mst = cugraph.maximum_spanning_tree(G) """ From 9a60e51c81ee75b50207de74a268feda7027e200 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Fri, 28 Jul 2023 09:25:45 -0400 Subject: [PATCH 02/10] Fix `build.yaml` workflow (#3756) This PR fixes the `build.yaml` workflow. The `node_type` reusable workflow input was placed incorrectly in #3712. Skipping CI since this file isn't tested in PRs. [skip ci] --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 74838271093..e257fcc90a8 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,7 +27,6 @@ concurrency: jobs: cpp-build: - node_type: cpu32 secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.08 with: @@ -35,6 +34,7 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} + node_type: cpu32 python-build: needs: [cpp-build] secrets: inherit From f2b49123947ecef3d3e48348c67c1013586a72b0 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 28 Jul 2023 14:36:49 -0500 Subject: [PATCH 03/10] Fixes options for `--pydevelop` to remove unneeded CWD path ("."), restores use of `setup.py` temporarily for develop builds (#3747) This PR adds the following: * Removes the now unnecessary "." path used in the install options when `--pydevelop` is used, which resulted in an error when called. * Temporarily adds a call to `setup.py bdist_ext --inplace` only when `--pydevelop` is used to workaround a [bug in scikit-build](https://github.com/scikit-build/scikit-build/issues/981) when `setuptools>=64` is installed. Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cugraph/pull/3747 --- build.sh | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 18359229822..a4232bdaed0 100755 --- a/build.sh +++ b/build.sh @@ -175,7 +175,7 @@ if hasArg --cmake_default_generator; then CMAKE_GENERATOR_OPTION="" fi if hasArg --pydevelop; then - PYTHON_ARGS_FOR_INSTALL="-m pip install --no-build-isolation --no-deps -e ." + PYTHON_ARGS_FOR_INSTALL="-m pip install --no-build-isolation --no-deps -e" fi # Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. @@ -302,6 +302,22 @@ if buildAll || hasArg pylibcugraph; then if hasArg --clean; then cleanPythonDir ${REPODIR}/python/pylibcugraph else + # FIXME: skbuild with setuptools>=64 has a bug when called from a "pip + # install -e" command, resulting in a broken editable wheel. Continue + # to use "setup.py bdist_ext --inplace" for a develop build until + # https://github.com/scikit-build/scikit-build/issues/981 is closed. + if hasArg --pydevelop; then + cd ${REPODIR}/python/pylibcugraph + python setup.py build_ext \ + --inplace \ + -- \ + -DFIND_CUGRAPH_CPP=ON \ + -DUSE_CUGRAPH_OPS=${BUILD_WITH_CUGRAPHOPS} \ + -Dcugraph_ROOT=${LIBCUGRAPH_BUILD_DIR} \ + -- \ + -j${PARALLEL_LEVEL:-1} + cd - + fi SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS} -DUSE_CUGRAPH_OPS=${BUILD_WITH_CUGRAPHOPS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \ python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/pylibcugraph @@ -313,6 +329,22 @@ if buildAll || hasArg cugraph; then if hasArg --clean; then cleanPythonDir ${REPODIR}/python/cugraph else + # FIXME: skbuild with setuptools>=64 has a bug when called from a "pip + # install -e" command, resulting in a broken editable wheel. Continue + # to use "setup.py bdist_ext --inplace" for a develop build until + # https://github.com/scikit-build/scikit-build/issues/981 is closed. + if hasArg --pydevelop; then + cd ${REPODIR}/python/cugraph + python setup.py build_ext \ + --inplace \ + -- \ + -DFIND_CUGRAPH_CPP=ON \ + -DUSE_CUGRAPH_OPS=${BUILD_WITH_CUGRAPHOPS} \ + -Dcugraph_ROOT=${LIBCUGRAPH_BUILD_DIR} \ + -- \ + -j${PARALLEL_LEVEL:-1} + cd - + fi SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS} -DUSE_CUGRAPH_OPS=${BUILD_WITH_CUGRAPHOPS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \ python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/cugraph From be2a63ed77f8bfa7f3cd2588cc0953853acdc06f Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 28 Jul 2023 21:37:53 +0200 Subject: [PATCH 04/10] Support MFG creation on sampling gpus for cugraph dgl (#3742) This PR builds on top of https://github.com/rapidsai/cugraph/pull/3734 Merge after https://github.com/rapidsai/cugraph/pull/3734 Todo tests for: - [x] _get_renumber_map - [x] _get_tensor_d_from_sampled_df - [x] create_homogeneous_sampled_graphs_from_dataframe - [x] pytests pass - [x] dataset_from_disk_cudf.ipynb (`obgn_products_sampling/`) - [x] Verify training benchmark Authors: - Vibhu Jawa (https://github.com/VibhuJawa) - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) - Alex Barghi (https://github.com/alexbarghi-nv) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/3742 --- .../cugraph_dgl/dataloading/dataloader.py | 12 + .../cugraph_dgl/dataloading/dataset.py | 5 +- .../dataloading/utils/sampling_helpers.py | 221 +++++++++++------- .../examples/dataset_from_disk_cudf.ipynb | 14 +- python/cugraph-dgl/tests/test_dataset.py | 19 +- python/cugraph-dgl/tests/test_utils.py | 121 +++++++++- 6 files changed, 298 insertions(+), 94 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index e3358f1dca6..0480f61807a 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -210,6 +210,15 @@ def __iter__(self): output_dir = os.path.join( self._sampling_output_dir, "epoch_" + str(self.epoch_number) ) + if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset): + deduplicate_sources = True + prior_sources_behavior = "carryover" + renumber = True + else: + deduplicate_sources = False + prior_sources_behavior = None + renumber = False + bs = BulkSampler( output_path=output_dir, batch_size=self._batch_size, @@ -218,6 +227,9 @@ def __iter__(self): seeds_per_call=self._seeds_per_call, fanout_vals=self.graph_sampler._reversed_fanout_vals, with_replacement=self.graph_sampler.replace, + deduplicate_sources=deduplicate_sources, + prior_sources_behavior=prior_sources_behavior, + renumber=renumber, ) if self.shuffle: self.tensorized_indices_ds.shuffle() diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py index a4d848de66f..57f7db3be01 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py @@ -33,6 +33,9 @@ def __init__( total_number_of_nodes: int, edge_dir: str, ): + # TODO: Deprecate `total_number_of_nodes` + # as it is no longer needed + # in the next release self.total_number_of_nodes = total_number_of_nodes self.edge_dir = edge_dir self._current_batch_fn = None @@ -52,7 +55,7 @@ def __getitem__(self, idx: int): if fn != self._current_batch_fn: df = _load_sampled_file(dataset_obj=self, fn=fn) self._current_batches = create_homogeneous_sampled_graphs_from_dataframe( - df, self.total_number_of_nodes, self.edge_dir + df, self.edge_dir ) current_offset = idx - batch_offset return self._current_batches[current_offset] diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index 051464f08bb..9fc0f6a559b 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -29,130 +29,189 @@ def cast_to_tensor(ser: cudf.Series): return torch.as_tensor(ser.values, device="cuda") -def _get_tensor_ls_from_sampled_df(df): +def _split_tensor(t, split_indices): + """ + Split a tensor into a list of tensors based on split_indices. + """ + # TODO: Switch to something below + # return [t[i:j] for i, j in zip(split_indices[:-1], split_indices[1:])] + if split_indices.device.type != "cpu": + split_indices = split_indices.to("cpu") + return torch.tensor_split(t, split_indices) + + +def _get_renumber_map(df): + map = df["map"] + df.drop(columns=["map"], inplace=True) + + map_starting_offset = map.iloc[0] + renumber_map = map[map_starting_offset:].dropna().reset_index(drop=True) + renumber_map_batch_indices = map[1 : map_starting_offset - 1].reset_index(drop=True) + renumber_map_batch_indices = renumber_map_batch_indices - map_starting_offset + + # Drop all rows with NaN values + df.dropna(axis=0, how="all", inplace=True) + df.reset_index(drop=True, inplace=True) + + return df, cast_to_tensor(renumber_map), cast_to_tensor(renumber_map_batch_indices) + + +def _get_tensor_d_from_sampled_df(df): """ Converts a sampled cuDF DataFrame into a list of tensors. Args: df (cudf.DataFrame): The sampled cuDF DataFrame containing columns - 'batch_id', 'sources', 'destinations', 'edge_id', and 'hop_id'. - Returns: - list: A list of tuples, where each tuple contains three tensors: - 'sources', 'destinations', and 'edge_id'. - The tensors are split based on 'batch_id' and 'hop_id'. - + dict: A dictionary of tensors, keyed by batch_id and hop_id. """ + df, renumber_map, renumber_map_batch_indices = _get_renumber_map(df) batch_id_tensor = cast_to_tensor(df["batch_id"]) + batch_id_min = batch_id_tensor.min() + batch_id_max = batch_id_tensor.max() batch_indices = torch.arange( - start=batch_id_tensor.min() + 1, - end=batch_id_tensor.max() + 1, + start=batch_id_min + 1, + end=batch_id_max + 1, device=batch_id_tensor.device, ) - batch_indices = torch.searchsorted(batch_id_tensor, batch_indices) - - split_d = {} - - for column in ["sources", "destinations", "edge_id", "hop_id"]: - if column in df.columns: - tensor = cast_to_tensor(df[column]) - split_d[column] = torch.tensor_split(tensor, batch_indices.cpu()) + # TODO: Fix below + # batch_indices = _get_id_tensor_boundaries(batch_id_tensor) + batch_indices = torch.searchsorted(batch_id_tensor, batch_indices).to("cpu") + split_d = {i: {} for i in range(batch_id_min, batch_id_max + 1)} + + for column in df.columns: + if column != "batch_id": + t = cast_to_tensor(df[column]) + split_t = _split_tensor(t, batch_indices) + for bid, batch_t in zip(split_d.keys(), split_t): + split_d[bid][column] = batch_t + + split_t = _split_tensor(renumber_map, renumber_map_batch_indices) + for bid, batch_t in zip(split_d.keys(), split_t): + split_d[bid]["map"] = batch_t + del df + result_tensor_d = {} + for batch_id, batch_d in split_d.items(): + hop_id_tensor = batch_d["hop_id"] + hop_id_min = hop_id_tensor.min() + hop_id_max = hop_id_tensor.max() - result_tensor_ls = [] - for i, hop_id_tensor in enumerate(split_d["hop_id"]): hop_indices = torch.arange( - start=hop_id_tensor.min() + 1, - end=hop_id_tensor.max() + 1, + start=hop_id_min + 1, + end=hop_id_max + 1, device=hop_id_tensor.device, ) - hop_indices = torch.searchsorted(hop_id_tensor, hop_indices) - s = torch.tensor_split(split_d["sources"][i], hop_indices.cpu()) - d = torch.tensor_split(split_d["destinations"][i], hop_indices.cpu()) - if "edge_id" in split_d: - eid = torch.tensor_split(split_d["edge_id"][i], hop_indices.cpu()) - else: - eid = [None] * len(s) - - result_tensor_ls.append((x, y, z) for x, y, z in zip(s, d, eid)) - - return result_tensor_ls + # TODO: Fix below + # hop_indices = _get_id_tensor_boundaries(hop_id_tensor) + hop_indices = torch.searchsorted(hop_id_tensor, hop_indices).to("cpu") + hop_split_d = {i: {} for i in range(hop_id_min, hop_id_max + 1)} + for column, t in batch_d.items(): + if column not in ["hop_id", "map"]: + split_t = _split_tensor(t, hop_indices) + for hid, ht in zip(hop_split_d.keys(), split_t): + hop_split_d[hid][column] = ht + + result_tensor_d[batch_id] = hop_split_d + if "map" in batch_d: + result_tensor_d[batch_id]["map"] = batch_d["map"] + return result_tensor_d def create_homogeneous_sampled_graphs_from_dataframe( sampled_df: cudf.DataFrame, - total_number_of_nodes: int, edge_dir: str = "in", ): """ This helper function creates DGL MFGS for homogeneous graphs from cugraph sampled dataframe + + Args: + sampled_df (cudf.DataFrame): The sampled cuDF DataFrame containing + columns `sources`, `destinations`, `edge_id`, `batch_id` and + `hop_id`. + edge_dir (str): Direction of edges from samples + Returns: + list: A list containing three elements: + - input_nodes: The input nodes for the batch. + - output_nodes: The output nodes for the batch. + - graph_per_hop_ls: A list of DGL MFGS for each hop. """ - result_tensor_ls = _get_tensor_ls_from_sampled_df(sampled_df) + result_tensor_d = _get_tensor_d_from_sampled_df(sampled_df) + del sampled_df result_mfgs = [ _create_homogeneous_sampled_graphs_from_tensors_perhop( - tensors_perhop_ls, total_number_of_nodes, edge_dir + tensors_batch_d, edge_dir ) - for tensors_perhop_ls in result_tensor_ls + for tensors_batch_d in result_tensor_d.values() ] - del result_tensor_ls + del result_tensor_d return result_mfgs -def _create_homogeneous_sampled_graphs_from_tensors_perhop( - tensors_perhop_ls, total_number_of_nodes, edge_dir -): +def _create_homogeneous_sampled_graphs_from_tensors_perhop(tensors_batch_d, edge_dir): + """ + This helper function creates sampled DGL MFGS for + homogeneous graphs from tensors per hop for a single + batch + + Args: + tensors_batch_d (dict): A dictionary of tensors, keyed by hop_id. + edge_dir (str): Direction of edges from samples + Returns: + tuple: A tuple of three elements: + - input_nodes: The input nodes for the batch. + - output_nodes: The output nodes for the batch. + - graph_per_hop_ls: A list of DGL MFGS for each hop. + """ if edge_dir not in ["in", "out"]: raise ValueError(f"Invalid edge_dir {edge_dir} provided") if edge_dir == "out": raise ValueError("Outwards edges not supported yet") graph_per_hop_ls = [] - output_nodes = None - seed_nodes = None - for src_ids, dst_ids, edge_ids in tensors_perhop_ls: - # print("Creating block", flush=True) - block = create_homogeneous_dgl_block_from_tensors_ls( - src_ids=src_ids, - dst_ids=dst_ids, - edge_ids=edge_ids, - seed_nodes=seed_nodes, - total_number_of_nodes=total_number_of_nodes, - ) - seed_nodes = block.srcdata[dgl.NID] - if output_nodes is None: - output_nodes = block.dstdata[dgl.NID] - graph_per_hop_ls.append(block) + seednodes = None + for hop_id, tensor_per_hop_d in tensors_batch_d.items(): + if hop_id != "map": + block = _create_homogeneous_dgl_block_from_tensor_d( + tensor_per_hop_d, tensors_batch_d["map"], seednodes + ) + seednodes = torch.concat( + [tensor_per_hop_d["sources"], tensor_per_hop_d["destinations"]] + ) + graph_per_hop_ls.append(block) # default DGL behavior if edge_dir == "in": graph_per_hop_ls.reverse() - return seed_nodes, output_nodes, graph_per_hop_ls - - -def create_homogeneous_dgl_block_from_tensors_ls( - src_ids: torch.Tensor, - dst_ids: torch.Tensor, - edge_ids: Optional[torch.Tensor], - seed_nodes: Optional[torch.Tensor], - total_number_of_nodes: int, -): - sampled_graph = dgl.graph( - (src_ids, dst_ids), - num_nodes=total_number_of_nodes, - ) - if edge_ids is not None: - sampled_graph.edata[dgl.EID] = edge_ids - # TODO: Check if unique is needed - if seed_nodes is None: - seed_nodes = dst_ids.unique() - block = dgl.to_block( - sampled_graph, - dst_nodes=seed_nodes, - src_nodes=src_ids.unique(), - include_dst_in_src=True, + input_nodes = graph_per_hop_ls[0].srcdata[dgl.NID] + output_nodes = graph_per_hop_ls[-1].dstdata[dgl.NID] + return input_nodes, output_nodes, graph_per_hop_ls + + +def _create_homogeneous_dgl_block_from_tensor_d(tensor_d, renumber_map, seednodes=None): + rs = tensor_d["sources"] + rd = tensor_d["destinations"] + + max_src_nodes = rs.max() + max_dst_nodes = rd.max() + if seednodes is not None: + # If we have isolated vertices + # sources can be missing from seednodes + # so we add them + # to ensure all the blocks are + # linedup correctly + max_dst_nodes = max(max_dst_nodes, seednodes.max()) + + data_dict = {("_N", "_E", "_N"): (rs, rd)} + num_src_nodes = {"_N": max_src_nodes.item() + 1} + num_dst_nodes = {"_N": max_dst_nodes.item() + 1} + block = dgl.create_block( + data_dict=data_dict, num_src_nodes=num_src_nodes, num_dst_nodes=num_dst_nodes ) - if edge_ids is not None: - block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] + if "edge_id" in tensor_d: + block.edata[dgl.EID] = tensor_d["edge_id"] + block.srcdata[dgl.NID] = renumber_map[block.srcnodes()] + block.dstdata[dgl.NID] = renumber_map[block.dstnodes()] return block diff --git a/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb b/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb index 97fefddb5ef..15708f5dea6 100644 --- a/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb +++ b/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb @@ -24,7 +24,7 @@ "import torch\n", "from rmm.allocators.torch import rmm_torch_allocator\n", "rmm.reinitialize(initial_pool_size=15e9)\n", - "#Switch to async pool in case of memory issues due to fragmentation of the pool\n", + "#Switch to async pool in case of memory issues due to fragmentation of the pool\n", "#rmm.mr.set_current_device_resource(rmm.mr.CudaAsyncMemoryResource(initial_pool_size=15e9))\n", "torch.cuda.memory.change_current_allocator(rmm_torch_allocator)" ] @@ -106,7 +106,7 @@ "g, train_idx = load_dgl_dataset()\n", "g = cugraph_dgl.cugraph_storage_from_heterograph(g, single_gpu=single_gpu)\n", "\n", - "batch_size = 1024\n", + "batch_size = 1024*2\n", "fanout_vals=[25, 25]\n", "sampler = cugraph_dgl.dataloading.NeighborSampler(fanout_vals)\n", "dataloader = cugraph_dgl.dataloading.DataLoader(\n", @@ -135,7 +135,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "7.25 s ± 916 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "7.08 s ± 596 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -190,8 +190,8 @@ "outputs": [], "source": [ "g, train_idx = load_dgl_dataset()\n", - "batch_size = 1024\n", - "fanout_vals=[25, 25]\n", + "batch_size = 1024*2\n", + "fanout_vals = [25, 25]\n", "sampler = dgl.dataloading.MultiLayerNeighborSampler(fanout_vals)\n", "dataloader = dgl.dataloading.DataLoader(\n", " g, \n", @@ -217,7 +217,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "4.22 s ± 345 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "7.34 s ± 353 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], @@ -256,7 +256,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/python/cugraph-dgl/tests/test_dataset.py b/python/cugraph-dgl/tests/test_dataset.py index b50aeba9f72..a1da77721a3 100644 --- a/python/cugraph-dgl/tests/test_dataset.py +++ b/python/cugraph-dgl/tests/test_dataset.py @@ -49,6 +49,7 @@ def create_dgl_mfgs(g, seed_nodes, fanout): def create_cugraph_dgl_homogenous_mfgs(g, seed_nodes, fanout): df_ls = [] + unique_vertices_ls = [] for hop_id, fanout in enumerate(reversed(fanout)): frontier = g.sample_neighbors(seed_nodes, fanout) # Set include_dst_in_src to match cugraph behavior @@ -57,9 +58,11 @@ def create_cugraph_dgl_homogenous_mfgs(g, seed_nodes, fanout): seed_nodes = block.srcdata[dgl.NID] block = block.to("cpu") src, dst, eid = block.edges("all") - src = block.srcdata[dgl.NID][src] - dst = block.dstdata[dgl.NID][dst] eid = block.edata[dgl.EID][eid] + unique_vertices = pd.concat( + [pd.Series(dst.numpy()), pd.Series(src.numpy())] + ).drop_duplicates(keep="first") + unique_vertices_ls.append(unique_vertices) df = cudf.DataFrame( { "sources": cp.asarray(src), @@ -71,7 +74,17 @@ def create_cugraph_dgl_homogenous_mfgs(g, seed_nodes, fanout): df_ls.append(df) df = cudf.concat(df_ls, ignore_index=True) df["batch_id"] = 0 - return create_homogeneous_sampled_graphs_from_dataframe(df, g.num_nodes())[0] + + # Add map column + # to the dataframe + renumberd_map = pd.concat(unique_vertices_ls).drop_duplicates(keep="first").values + offsets = np.asarray([2, 2 + len(renumberd_map)]) + map_ar = np.concatenate([offsets, renumberd_map]) + map_ser = cudf.Series(map_ar) + # Have to reindex cause map_ser can be of larger length than df + df = df.reindex(df.index.union(map_ser.index)) + df["map"] = map_ser + return create_homogeneous_sampled_graphs_from_dataframe(df)[0] @pytest.mark.parametrize("seed_node", [3, 4, 5]) diff --git a/python/cugraph-dgl/tests/test_utils.py b/python/cugraph-dgl/tests/test_utils.py index cee255d85b0..fd75b1537b5 100644 --- a/python/cugraph-dgl/tests/test_utils.py +++ b/python/cugraph-dgl/tests/test_utils.py @@ -13,9 +13,18 @@ import cudf import cupy as cp -import torch +import numpy as np +from cugraph_dgl.dataloading.utils.sampling_helpers import ( + cast_to_tensor, + _get_renumber_map, + _split_tensor, + _get_tensor_d_from_sampled_df, + create_homogeneous_sampled_graphs_from_dataframe, +) +from cugraph.utilities.utils import import_optional -from cugraph_dgl.dataloading.utils.sampling_helpers import cast_to_tensor +dgl = import_optional("dgl") +torch = import_optional("torch") def test_casting_empty_array(): @@ -23,3 +32,111 @@ def test_casting_empty_array(): ser = cudf.Series(ar) output_tensor = cast_to_tensor(ser) assert output_tensor.dtype == torch.int32 + + +def get_dummy_sampled_df(): + df = cudf.DataFrame() + df["sources"] = [0, 0, 1, 0, 0, 1, 0, 0, 2] + [np.nan] * 4 + df["destinations"] = [1, 2, 0, 1, 2, 1, 2, 0, 1] + [np.nan] * 4 + df["batch_id"] = [0, 0, 0, 1, 1, 1, 2, 2, 2] + [np.nan] * 4 + df["hop_id"] = [0, 1, 1, 0, 1, 1, 0, 1, 1] + [np.nan] * 4 + df["map"] = [4, 7, 10, 13, 10, 11, 12, 13, 14, 15, 16, 17, 18] + df = df.astype("int32") + df["hop_id"] = df["hop_id"].astype("uint8") + df["map"] = df["map"].astype("int64") + return df + + +def test_get_renumber_map(): + + sampled_df = get_dummy_sampled_df() + + df, renumber_map, renumber_map_batch_indices = _get_renumber_map(sampled_df) + + # Ensure that map was dropped + assert "map" not in df.columns + + expected_map = torch.as_tensor( + [10, 11, 12, 13, 14, 15, 16, 17, 18], dtype=torch.int32, device="cuda" + ) + assert torch.equal(renumber_map, expected_map) + + expected_batch_indices = torch.as_tensor([3, 6], dtype=torch.int32, device="cuda") + assert torch.equal(renumber_map_batch_indices, expected_batch_indices) + + # Ensure we dropped the Nans for rows corresponding to the renumber_map + assert len(df) == 9 + + t_ls = _split_tensor(renumber_map, renumber_map_batch_indices) + assert torch.equal( + t_ls[0], torch.as_tensor([10, 11, 12], dtype=torch.int64, device="cuda") + ) + assert torch.equal( + t_ls[1], torch.as_tensor([13, 14, 15], dtype=torch.int64, device="cuda") + ) + assert torch.equal( + t_ls[2], torch.as_tensor([16, 17, 18], dtype=torch.int64, device="cuda") + ) + + +def test_get_tensor_d_from_sampled_df(): + df = get_dummy_sampled_df() + tensor_d = _get_tensor_d_from_sampled_df(df) + + expected_maps = {} + expected_maps[0] = torch.as_tensor([10, 11, 12], dtype=torch.int64, device="cuda") + expected_maps[1] = torch.as_tensor([13, 14, 15], dtype=torch.int64, device="cuda") + expected_maps[2] = torch.as_tensor([16, 17, 18], dtype=torch.int64, device="cuda") + + for batch_id, batch_td in tensor_d.items(): + batch_df = df[df["batch_id"] == batch_id] + for hop_id, hop_t in batch_td.items(): + if hop_id != "map": + hop_df = batch_df[batch_df["hop_id"] == hop_id] + assert torch.equal(hop_t["sources"], cast_to_tensor(hop_df["sources"])) + assert torch.equal( + hop_t["destinations"], cast_to_tensor(hop_df["destinations"]) + ) + + assert torch.equal(batch_td["map"], expected_maps[batch_id]) + + +def test_create_homogeneous_sampled_graphs_from_dataframe(): + sampler = dgl.dataloading.MultiLayerNeighborSampler([2, 2]) + g = dgl.graph(([0, 10, 20], [0, 0, 10])).to("cuda") + dgl_input_nodes, dgl_output_nodes, dgl_blocks = sampler.sample_blocks( + g, torch.as_tensor([0]).to("cuda") + ) + + # Directions are reversed in dgl + s1, d1 = dgl_blocks[0].edges() + s0, d0 = dgl_blocks[1].edges() + srcs = cp.concatenate([cp.asarray(s0), cp.asarray(s1)]) + dsts = cp.concatenate([cp.asarray(d0), cp.asarray(d1)]) + + nids = dgl_blocks[0].srcdata[dgl.NID] + nids = cp.concatenate( + [cp.asarray([2]), cp.asarray([len(nids) + 2]), cp.asarray(nids)] + ) + + df = cudf.DataFrame() + df["sources"] = srcs + df["destinations"] = dsts + df["hop_id"] = [0] * len(s0) + [1] * len(s1) + df["batch_id"] = 0 + df["map"] = nids + + ( + cugraph_input_nodes, + cugraph_output_nodes, + cugraph_blocks, + ) = create_homogeneous_sampled_graphs_from_dataframe(df)[0] + + assert torch.equal(dgl_input_nodes, cugraph_input_nodes) + assert torch.equal(dgl_output_nodes, cugraph_output_nodes) + + for c_block, d_block in zip(cugraph_blocks, dgl_blocks): + ce, cd = c_block.edges() + de, dd = d_block.edges() + assert torch.equal(ce, de) + assert torch.equal(cd, dd) From 14862c66501a5c8e076ab9b545032b86968418be Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Mon, 31 Jul 2023 16:02:48 +0200 Subject: [PATCH 05/10] Update primitive to compute weighted Jaccard, Sorensen and Overlap similarity (#3728) This PR - changes `per_v_pair_transform_dst_nbr_intersection` to support computing weighted intersection - updates implementation of `similarity`, `jaccard_coefficients`, `sorensen_coefficients`, `overlap_coefficients` for weighted graphs NOTE: current implementation doesn't support computing similarity for multi-edge graphs. closes #2748 closes #3477 Authors: - Naim (https://github.com/naimnv) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/3728 --- cpp/src/c_api/similarity.cpp | 2 +- cpp/src/link_prediction/jaccard_impl.cuh | 37 +- cpp/src/link_prediction/overlap_impl.cuh | 37 +- cpp/src/link_prediction/similarity_impl.cuh | 105 ++- cpp/src/link_prediction/sorensen_impl.cuh | 37 +- .../detail/extract_transform_v_frontier_e.cuh | 78 +-- cpp/src/prims/detail/nbr_intersection.cuh | 634 +++++++++++++++--- .../detail/optional_dataframe_buffer.hpp | 102 +++ ..._v_pair_transform_dst_nbr_intersection.cuh | 130 +++- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 7 + cpp/tests/CMakeLists.txt | 14 + .../mg_weighted_similarity_test.cpp | 298 ++++++++ .../link_prediction/similarity_compare.cpp | 213 +++++- .../link_prediction/similarity_compare.hpp | 46 +- .../weighted_similarity_test.cpp | 338 ++++++++++ ...r_v_pair_transform_dst_nbr_intersection.cu | 11 +- ...transform_dst_nbr_weighted_intersection.cu | 402 +++++++++++ cpp/tests/utilities/test_utilities.hpp | 16 + 18 files changed, 2202 insertions(+), 305 deletions(-) create mode 100644 cpp/src/prims/detail/optional_dataframe_buffer.hpp create mode 100644 cpp/tests/link_prediction/mg_weighted_similarity_test.cpp create mode 100644 cpp/tests/link_prediction/weighted_similarity_test.cpp create mode 100644 cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp index 3241018bfbd..730416abd7b 100644 --- a/cpp/src/c_api/similarity.cpp +++ b/cpp/src/c_api/similarity.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index b9675e3a578..bd4e2d5e58e 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -24,22 +24,15 @@ namespace cugraph { namespace detail { struct jaccard_functor_t { - template - weight_t __device__ compute_score(weight_t cardinality_a, - weight_t cardinality_b, - weight_t cardinality_a_intersect_b) const - { - return cardinality_a_intersect_b / (cardinality_a + cardinality_b - cardinality_a_intersect_b); - } -}; - -struct weighted_jaccard_functor_t { template weight_t __device__ compute_score(weight_t weight_a, weight_t weight_b, - weight_t min_weight_a_intersect_b) const + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return min_weight_a_intersect_b / (weight_a + weight_b - min_weight_a_intersect_b); + return weight_a_union_b <= std::numeric_limits::min() + ? weight_t{0} + : weight_a_intersect_b / weight_a_union_b; } }; @@ -55,20 +48,12 @@ rmm::device_uvector jaccard_coefficients( { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (!edge_weight_view) - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::jaccard_functor_t{}, - do_expensive_check); - else - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::weighted_jaccard_functor_t{}, - do_expensive_check); + return detail::similarity(handle, + graph_view, + edge_weight_view, + vertex_pairs, + detail::jaccard_functor_t{}, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index 4c001a8f243..1810df2f76b 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -24,22 +24,15 @@ namespace cugraph { namespace detail { struct overlap_functor_t { - template - weight_t __device__ compute_score(weight_t cardinality_a, - weight_t cardinality_b, - weight_t cardinality_a_intersect_b) const - { - return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b); - } -}; - -struct weighted_overlap_functor_t { template weight_t __device__ compute_score(weight_t weight_a, weight_t weight_b, - weight_t min_weight_a_intersect_b) const + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return min_weight_a_intersect_b / std::min(weight_a, weight_b); + return std::min(weight_a, weight_b) <= std::numeric_limits::min() + ? weight_t{0} + : weight_a_intersect_b / std::min(weight_a, weight_b); } }; @@ -55,20 +48,12 @@ rmm::device_uvector overlap_coefficients( { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (!edge_weight_view) - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::overlap_functor_t{}, - do_expensive_check); - else - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::weighted_overlap_functor_t{}, - do_expensive_check); + return detail::similarity(handle, + graph_view, + edge_weight_view, + vertex_pairs, + detail::overlap_functor_t{}, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 97c8017c668..55e8f5c88d7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -15,9 +15,11 @@ */ #pragma once +#include #include #include +#include #include #include @@ -51,33 +53,106 @@ rmm::device_uvector similarity( auto vertex_pairs_begin = thrust::make_zip_iterator(std::get<0>(vertex_pairs).data(), std::get<1>(vertex_pairs).data()); + if (do_expensive_check) { + auto num_invalids = detail::count_invalid_vertex_pairs( + handle, graph_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input arguments: there are invalid input vertex pairs."); + + if (edge_weight_view) { + auto num_negative_edge_weights = + count_if_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; }); + CUGRAPH_EXPECTS( + num_negative_edge_weights == 0, + "Invalid input argument: input edge weights should have non-negative values."); + } + } + if (edge_weight_view) { - // FIXME: need implementation, similar to unweighted - // Use compute_out_weight_sums instead of compute_out_degrees - // Sum up for each common edge compute (u,a,v): min weight ((u,a), (a,v)) and - // max weight((u,a), (a,v)). - // Use these to compute weighted score - // - CUGRAPH_FAIL("weighted similarity computations are not supported in this release"); + rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); + rmm::device_uvector weighted_out_degrees = + compute_out_weight_sums(handle, graph_view, *edge_weight_view); + + per_v_pair_transform_dst_nbr_intersection( + handle, + graph_view, + *edge_weight_view, + vertex_pairs_begin, + vertex_pairs_begin + num_vertex_pairs, + weighted_out_degrees.begin(), + [functor] __device__(auto a, + auto b, + auto weight_a, + auto weight_b, + auto intersection, + auto intersected_properties_a, + auto intersected_properties_b) { + weight_t sum_of_min_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_max_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_intersected_a = weight_t{0}; + weight_t sum_of_intersected_b = weight_t{0}; + + auto pair_first = thrust::make_zip_iterator(intersected_properties_a.data(), + intersected_properties_b.data()); + thrust::tie(sum_of_min_weight_a_intersect_b, + sum_of_max_weight_a_intersect_b, + sum_of_intersected_a, + sum_of_intersected_b) = + thrust::transform_reduce( + thrust::seq, + pair_first, + pair_first + intersected_properties_a.size(), + [] __device__(auto property_pair) { + auto prop_a = thrust::get<0>(property_pair); + auto prop_b = thrust::get<1>(property_pair); + return thrust::make_tuple(min(prop_a, prop_b), max(prop_a, prop_b), prop_a, prop_b); + }, + thrust::make_tuple(weight_t{0}, weight_t{0}, weight_t{0}, weight_t{0}), + [] __device__(auto lhs, auto rhs) { + return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs), + thrust::get<1>(lhs) + thrust::get<1>(rhs), + thrust::get<2>(lhs) + thrust::get<2>(rhs), + thrust::get<3>(lhs) + thrust::get<3>(rhs)); + }); + + weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; + weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b; + + sum_of_max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; + + return functor.compute_score(static_cast(weight_a), + static_cast(weight_b), + static_cast(sum_of_min_weight_a_intersect_b), + static_cast(sum_of_max_weight_a_intersect_b)); + }, + similarity_score.begin(), + do_expensive_check); + + return similarity_score; } else { rmm::device_uvector similarity_score(num_vertex_pairs, handle.get_stream()); - // - // Compute vertex_degree for all vertices, then distribute to each GPU. - // Need to use this instead of the dummy properties below - // auto out_degrees = graph_view.compute_out_degrees(handle); per_v_pair_transform_dst_nbr_intersection( handle, graph_view, + cugraph::edge_dummy_property_t{}.view(), vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs, out_degrees.begin(), - [functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) { - return functor.compute_score(static_cast(v1_degree), - static_cast(v2_degree), - static_cast(intersection.size())); + [functor] __device__( + auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection, auto, auto) { + return functor.compute_score( + static_cast(v1_degree), + static_cast(v2_degree), + static_cast(intersection.size()), + static_cast(v1_degree + v2_degree - intersection.size())); }, similarity_score.begin(), do_expensive_check); diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index ac84358049a..00c9a8107f3 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -24,22 +24,15 @@ namespace cugraph { namespace detail { struct sorensen_functor_t { - template - weight_t __device__ compute_score(weight_t cardinality_a, - weight_t cardinality_b, - weight_t cardinality_a_intersect_b) const - { - return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b); - } -}; - -struct weighted_sorensen_functor_t { template weight_t __device__ compute_score(weight_t weight_a, weight_t weight_b, - weight_t min_weight_a_intersect_b) const + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return (2 * min_weight_a_intersect_b) / (weight_a + weight_b); + return (weight_a + weight_b) <= std::numeric_limits::min() + ? weight_t{0} + : (2 * weight_a_intersect_b) / (weight_a + weight_b); } }; @@ -55,20 +48,12 @@ rmm::device_uvector sorensen_coefficients( { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (!edge_weight_view) - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::sorensen_functor_t{}, - do_expensive_check); - else - return detail::similarity(handle, - graph_view, - edge_weight_view, - vertex_pairs, - detail::weighted_sorensen_functor_t{}, - do_expensive_check); + return detail::similarity(handle, + graph_view, + edge_weight_view, + vertex_pairs, + detail::sorensen_functor_t{}, + do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index febdf61943b..2d77d64e1ff 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -60,83 +61,6 @@ namespace detail { int32_t constexpr extract_transform_v_frontier_e_kernel_block_size = 512; -// we cannot use thrust::iterator_traits::value_type if Iterator is void* (reference to -// void is not allowed) -template -struct optional_dataframe_buffer_value_type_t; - -template -struct optional_dataframe_buffer_value_type_t>> { - using value = typename thrust::iterator_traits::value_type; -}; - -template -struct optional_dataframe_buffer_value_type_t>> { - using value = void; -}; - -template >* = nullptr> -std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) -{ - return std::byte{0}; // dummy -} - -template >* = nullptr> -auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) -{ - return allocate_dataframe_buffer(size, stream); -} - -template >* = nullptr> -void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer) -{ - return static_cast(nullptr); -} - -template >* = nullptr> -auto get_optional_dataframe_buffer_begin( - std::add_lvalue_reference_t( - size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer) -{ - return get_dataframe_buffer_begin(optional_dataframe_buffer); -} - -template >* = nullptr> -void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - size_t new_buffer_size, - rmm::cuda_stream_view stream_view) -{ - return; -} - -template >* = nullptr> -void resize_optional_dataframe_buffer( - std::add_lvalue_reference_t( - size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, - size_t new_buffer_size, - rmm::cuda_stream_view stream_view) -{ - return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view); -} - -template >* = nullptr> -void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - rmm::cuda_stream_view stream_view) -{ - return; -} - -template >* = nullptr> -void shrink_to_fit_optional_dataframe_buffer( - std::add_lvalue_reference_t( - size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, - rmm::cuda_stream_view stream_view) -{ - return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); -} - template diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 98453d46c3f..f4c4745b14c 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -15,9 +15,11 @@ */ #pragma once +#include #include #include +#include #include #include #include @@ -168,12 +170,17 @@ struct update_rx_major_local_degree_t { } }; -template +template struct update_rx_major_local_nbrs_t { int major_comm_size{}; int minor_comm_size{}; edge_partition_device_view_t edge_partition{}; + edge_partition_e_input_device_view_t edge_partition_e_value_input{}; size_t reordered_idx_first{}; size_t local_edge_partition_idx{}; @@ -182,12 +189,13 @@ struct update_rx_major_local_nbrs_t { raft::device_span rx_group_firsts{nullptr}; raft::device_span rx_majors{}; raft::device_span local_nbr_offsets_for_rx_majors{}; - raft::device_span local_nbrs_for_rx_majors{}; + optional_property_buffer_view_t local_nbrs_properties_for_rx_majors{}; - __device__ void operator()(size_t idx) const + __device__ void operator()(size_t idx) { - auto it = thrust::upper_bound( + using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; + auto it = thrust::upper_bound( thrust::seq, rx_reordered_group_lasts.begin(), rx_reordered_group_lasts.end(), idx); auto major_comm_rank = static_cast(thrust::distance(rx_reordered_group_lasts.begin(), it)); auto offset_in_local_edge_partition = @@ -214,14 +222,22 @@ struct update_rx_major_local_nbrs_t { // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - thrust::copy( - thrust::seq, - indices, - indices + local_degree, - local_nbrs_for_rx_majors.begin() + - local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + - local_edge_partition_idx] + - offset_in_local_edge_partition]); + + size_t start_offset = + local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + + local_edge_partition_idx] + + offset_in_local_edge_partition]; + thrust::copy(thrust::seq, + indices, + indices + local_degree, + local_nbrs_for_rx_majors.begin() + start_offset); + + if constexpr (!std::is_same_v) { + thrust::copy(thrust::seq, + edge_partition_e_value_input.value_first() + edge_offset, + edge_partition_e_value_input.value_first() + (edge_offset + local_degree), + local_nbrs_properties_for_rx_majors.begin() + start_offset); + } } }; @@ -317,30 +333,43 @@ template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; + optional_property_buffer_view_t first_element_properties{}; SecondElementToIdxMap second_element_to_idx_map{}; raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; + optional_property_buffer_view_t second_element_properties{}; edge_partition_device_view_t edge_partition{}; + edge_partition_e_input_device_view_t edge_partition_e_value_input{}; VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; + optional_property_buffer_view_t nbr_intersection_properties0{}; + optional_property_buffer_view_t nbr_intersection_properties1{}; vertex_t invalid_id{}; - - __device__ edge_t operator()(size_t i) const + __device__ edge_t operator()(size_t i) { - auto pair = *(vertex_pair_first + i); + using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; + using optional_const_property_buffer_view_t = + std::conditional_t, + raft::device_span, + std::byte /* dummy */>; + auto pair = *(vertex_pair_first + i); vertex_t const* indices0{nullptr}; - [[maybe_unused]] edge_t local_edge_offset0{0}; + optional_const_property_buffer_view_t properties0{}; + + edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { vertex_t major = thrust::get<0>(pair); @@ -362,14 +391,27 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { thrust::tie(indices0, local_edge_offset0, local_degree0) = edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } + + if constexpr (!std::is_same_v) { + properties0 = raft::device_span( + edge_partition_e_value_input.value_first() + local_edge_offset0, local_degree0); + } + } else { - auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); - local_degree0 = - static_cast(first_element_offsets[idx + 1] - first_element_offsets[idx]); - indices0 = first_element_indices.begin() + first_element_offsets[idx]; + auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); + local_edge_offset0 = first_element_offsets[idx]; + local_degree0 = static_cast(first_element_offsets[idx + 1] - local_edge_offset0); + indices0 = first_element_indices.begin() + local_edge_offset0; + + if constexpr (!std::is_same_v) { + properties0 = raft::device_span( + first_element_properties.begin() + local_edge_offset0, local_degree0); + } } vertex_t const* indices1{nullptr}; + optional_const_property_buffer_view_t properties1{}; + [[maybe_unused]] edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { @@ -392,31 +434,71 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { thrust::tie(indices1, local_edge_offset1, local_degree1) = edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); } + + if constexpr (!std::is_same_v) { + properties1 = raft::device_span( + edge_partition_e_value_input.value_first() + local_edge_offset1, local_degree1); + } + } else { - auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); - local_degree1 = - static_cast(second_element_offsets[idx + 1] - second_element_offsets[idx]); - indices1 = second_element_indices.begin() + second_element_offsets[idx]; + auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); + local_edge_offset1 = second_element_offsets[idx]; + local_degree1 = static_cast(second_element_offsets[idx + 1] - local_edge_offset1); + indices1 = second_element_indices.begin() + local_edge_offset1; + + if constexpr (!std::is_same_v) { + properties1 = raft::device_span( + second_element_properties.begin() + local_edge_offset1, local_degree1); + } } // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - auto it = - thrust::set_intersection(thrust::seq, - indices0, - indices0 + local_degree0, - indices1, - indices1 + local_degree1, - nbr_intersection_indices.begin() + nbr_intersection_offsets[i]); + auto nbr_intersection_first = nbr_intersection_indices.begin() + nbr_intersection_offsets[i]; + + auto nbr_intersection_last = thrust::set_intersection(thrust::seq, + indices0, + indices0 + local_degree0, + indices1, + indices1 + local_degree1, + nbr_intersection_first); thrust::fill(thrust::seq, - it, + nbr_intersection_last, nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], invalid_id); - return static_cast( - thrust::distance(nbr_intersection_indices.begin() + nbr_intersection_offsets[i], it)); + auto insection_size = + static_cast(thrust::distance(nbr_intersection_first, nbr_intersection_last)); + if constexpr (!std::is_same_v) { + auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; + + // copy edge properties from first vertex to common neighbors + thrust::transform(thrust::seq, + nbr_intersection_first, + nbr_intersection_last, + ip0_start, + [indices0, local_degree0, properties0] __device__(auto v) { + auto position = + thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, v); + return properties0[thrust::distance(indices0, position)]; + }); + + auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; + + // copy edge properties from second vertex to common neighbors + thrust::transform(thrust::seq, + nbr_intersection_first, + nbr_intersection_last, + ip1_start, + [indices1, local_degree1, properties1] __device__(auto v) { + auto position = + thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, v); + return properties1[thrust::distance(indices1, position)]; + }); + } + return insection_size; } }; @@ -436,7 +518,9 @@ struct strided_accumulate_t { } }; -template +template struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -444,9 +528,13 @@ struct gatherv_indices_t { raft::device_span gathered_intersection_offsets{}; raft::device_span gathered_intersection_indices{}; raft::device_span combined_nbr_intersection_offsets{}; - raft::device_span combined_nbr_intersection_indices{}; + optional_property_buffer_view_t gathered_nbr_intersection_properties0{}; + optional_property_buffer_view_t gathered_nbr_intersection_properties1{}; + optional_property_buffer_view_t combined_nbr_intersection_properties0{}; + optional_property_buffer_view_t combined_nbr_intersection_properties1{}; + __device__ void operator()(size_t i) const { auto output_offset = combined_nbr_intersection_offsets[i]; @@ -455,12 +543,29 @@ struct gatherv_indices_t { // in a single warp (better optimize if this becomes a performance bottleneck) for (int j = 0; j < minor_comm_size; ++j) { - thrust::copy( - thrust::seq, - gathered_intersection_indices.begin() + gathered_intersection_offsets[output_size * j + i], - gathered_intersection_indices.begin() + - gathered_intersection_offsets[output_size * j + i + 1], - combined_nbr_intersection_indices.begin() + output_offset); + if constexpr (!std::is_same_v) { + auto zipped_gathered_begin = thrust::make_zip_iterator( + thrust::make_tuple(gathered_intersection_indices.begin(), + gathered_nbr_intersection_properties0.begin(), + gathered_nbr_intersection_properties1.begin())); + + auto zipped_combined_begin = thrust::make_zip_iterator( + thrust::make_tuple(combined_nbr_intersection_indices.begin(), + combined_nbr_intersection_properties0.begin(), + combined_nbr_intersection_properties1.begin())); + + thrust::copy(thrust::seq, + zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], + zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i + 1], + zipped_combined_begin + output_offset); + } else { + thrust::copy(thrust::seq, + gathered_intersection_indices.begin() + + gathered_intersection_offsets[output_size * j + i], + gathered_intersection_indices.begin() + + gathered_intersection_offsets[output_size * j + i + 1], + combined_nbr_intersection_indices.begin() + output_offset); + } output_offset += gathered_intersection_offsets[output_size * j + i + 1] - gathered_intersection_offsets[output_size * j + i]; } @@ -553,10 +658,17 @@ size_t count_invalid_vertex_pairs(raft::handle_t const& handle, // thrust::distance(vertex_pair_first, vertex_pair_last) should be comparable across the global // communicator. If we need to build the neighbor lists, grouping based on applying "vertex ID % // number of groups" is recommended for load-balancing. -template -std::tuple, rmm::device_uvector> +template +std::conditional_t< + !std::is_same_v, + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector>, + std::tuple, rmm::device_uvector>> nbr_intersection(raft::handle_t const& handle, GraphViewType const& graph_view, + EdgeValueInputIterator edge_value_input, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, std::array intersect_dst_nbr, @@ -565,6 +677,31 @@ nbr_intersection(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; + using edge_property_value_t = typename EdgeValueInputIterator::value_type; + + using edge_partition_e_input_device_view_t = + std::conditional_t, + detail::edge_partition_edge_dummy_property_device_view_t, + detail::edge_partition_edge_property_device_view_t< + edge_t, + typename EdgeValueInputIterator::value_iterator, + edge_property_value_t>>; + + using optional_property_buffer_value_type = + std::conditional_t, + edge_property_value_t, + void>; + + using optional_property_buffer_view_t = + std::conditional_t, + raft::device_span, + std::byte /* dummy */>; + + using optional_nbr_intersected_edge_partitions_t = + std::conditional_t, + std::vector>, + std::byte /* dummy */>; + static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -601,6 +738,11 @@ nbr_intersection(raft::handle_t const& handle, std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; + [[maybe_unused]] auto major_nbr_properties = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + optional_property_buffer_view_t optional_major_nbr_properties{}; + if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { auto& comm = handle.get_comms(); @@ -716,6 +858,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); + + [[maybe_unused]] auto local_nbrs_properties_for_rx_majors = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + std::vector local_nbr_counts{}; { rmm::device_uvector rx_reordered_group_counts( @@ -788,10 +935,23 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); + + optional_property_buffer_view_t optional_local_nbrs_properties{}; + + if constexpr (!std::is_same_v) { + local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), + handle.get_stream()); + optional_local_nbrs_properties = raft::device_span( + local_nbrs_properties_for_rx_majors.data(), local_nbrs_properties_for_rx_majors.size()); + } + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, i); auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -801,10 +961,15 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(reordered_idx_first), thrust::make_counting_iterator(reordered_idx_last), - update_rx_major_local_nbrs_t{ + update_rx_major_local_nbrs_t{ major_comm_size, minor_comm_size, edge_partition, + edge_partition_e_value_input, reordered_idx_first, i, raft::device_span( @@ -814,7 +979,8 @@ nbr_intersection(raft::handle_t const& handle, raft::device_span(local_nbr_offsets_for_rx_majors.data(), local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), - local_nbrs_for_rx_majors.size())}); + local_nbrs_for_rx_majors.size()), + optional_local_nbrs_properties}); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -860,6 +1026,17 @@ nbr_intersection(raft::handle_t const& handle, std::tie(*major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); + if constexpr (!std::is_same_v) { + std::tie(major_nbr_properties, std::ignore) = + shuffle_values(major_comm, + local_nbrs_properties_for_rx_majors.begin(), + local_nbr_counts, + handle.get_stream()); + + optional_major_nbr_properties = raft::device_span( + major_nbr_properties.data(), major_nbr_properties.size()); + } + major_to_idx_map_ptr = std::make_unique>( unique_majors.begin(), unique_majors.end(), @@ -887,6 +1064,15 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); + + [[maybe_unused]] auto nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + + [[maybe_unused]] auto nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_rank = minor_comm.get_rank(); @@ -929,6 +1115,19 @@ nbr_intersection(raft::handle_t const& handle, std::vector> edge_partition_nbr_intersection_indices{}; edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); + + [[maybe_unused]] optional_nbr_intersected_edge_partitions_t + edge_partition_nbr_intersection_property0{}; + [[maybe_unused]] optional_nbr_intersected_edge_partitions_t + edge_partition_nbr_intersection_property1{}; + + if constexpr (!std::is_same_v) { + edge_partition_nbr_intersection_property0.reserve( + graph_view.number_of_local_edge_partitions()); + edge_partition_nbr_intersection_property1.reserve( + graph_view.number_of_local_edge_partitions()); + } + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto rx_v_pair_counts = host_scalar_allgather(minor_comm, input_counts[i], handle.get_stream()); @@ -944,6 +1143,15 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_sizes(size_t{0}, handle.get_stream()); rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); + + [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + + [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); + std::vector rx_v_pair_nbr_intersection_index_tx_counts(size_t{0}); { auto vertex_pair_buffer = allocate_dataframe_buffer>( @@ -966,6 +1174,9 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, i); auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); rx_v_pair_nbr_intersection_sizes.resize( @@ -1003,6 +1214,25 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_indices.resize( rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); + + optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties0{}; + optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties1{}; + + if constexpr (!std::is_same_v) { + rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + + rx_v_pair_optional_nbr_intersection_properties0 = + raft::device_span(rx_v_pair_nbr_intersection_properties0.data(), + rx_v_pair_nbr_intersection_properties0.size()); + + rx_v_pair_optional_nbr_intersection_properties1 = + raft::device_span(rx_v_pair_nbr_intersection_properties1.data(), + rx_v_pair_nbr_intersection_properties1.size()); + } + if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { auto second_element_to_idx_map = detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); @@ -1016,33 +1246,70 @@ nbr_intersection(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer)), vertex_t, edge_t, + edge_partition_e_input_device_view_t, + optional_property_buffer_view_t, true>{nullptr, raft::device_span(), raft::device_span(), + optional_property_buffer_view_t{}, second_element_to_idx_map, raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), raft::device_span((*major_nbr_indices).data(), (*major_nbr_indices).size()), + optional_major_nbr_properties, edge_partition, + edge_partition_e_value_input, get_dataframe_buffer_begin(vertex_pair_buffer), raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), rx_v_pair_nbr_intersection_offsets.size()), raft::device_span(rx_v_pair_nbr_intersection_indices.data(), rx_v_pair_nbr_intersection_indices.size()), + rx_v_pair_optional_nbr_intersection_properties0, + rx_v_pair_optional_nbr_intersection_properties1, + invalid_vertex_id::value}); + } else { CUGRAPH_FAIL("unimplemented."); } - rx_v_pair_nbr_intersection_indices.resize( - thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), - thrust::remove(handle.get_thrust_policy(), - rx_v_pair_nbr_intersection_indices.begin(), - rx_v_pair_nbr_intersection_indices.end(), - invalid_vertex_id::value)), - handle.get_stream()); - rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); + if constexpr (std::is_same_v) { + rx_v_pair_nbr_intersection_indices.resize( + thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), + thrust::remove(handle.get_thrust_policy(), + rx_v_pair_nbr_intersection_indices.begin(), + rx_v_pair_nbr_intersection_indices.end(), + invalid_vertex_id::value)), + handle.get_stream()); + rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); + } else { + auto common_nbr_and_properties_begin = thrust::make_zip_iterator( + thrust::make_tuple(rx_v_pair_nbr_intersection_indices.begin(), + rx_v_pair_nbr_intersection_properties0.begin(), + rx_v_pair_nbr_intersection_properties1.begin())); + + auto last = thrust::remove_if( + handle.get_thrust_policy(), + common_nbr_and_properties_begin, + common_nbr_and_properties_begin + rx_v_pair_nbr_intersection_indices.size(), + [] __device__(auto nbr_p0_p1) { + return thrust::get<0>(nbr_p0_p1) == invalid_vertex_id::value; + }); + + rx_v_pair_nbr_intersection_indices.resize( + thrust::distance(common_nbr_and_properties_begin, last), handle.get_stream()); + + rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); + + rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); + + rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), + handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); + } thrust::inclusive_scan(handle.get_thrust_policy(), rx_v_pair_nbr_intersection_sizes.begin(), @@ -1159,6 +1426,15 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector combined_nbr_intersection_indices(size_t{0}, handle.get_stream()); + + [[maybe_unused]] auto combined_nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + size_t{0}, handle.get_stream()); + + [[maybe_unused]] auto combined_nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + size_t{0}, handle.get_stream()); + { std::vector ranks(minor_comm_size); std::iota(ranks.begin(), ranks.end(), int{0}); @@ -1194,26 +1470,108 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(), handle.get_stream()); - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ - rx_v_pair_counts[minor_comm_rank], - minor_comm_size, - raft::device_span(gathered_nbr_intersection_offsets.data(), - gathered_nbr_intersection_offsets.size()), - raft::device_span(gathered_nbr_intersection_indices.data(), - gathered_nbr_intersection_indices.size()), - raft::device_span(combined_nbr_intersection_offsets.data(), - combined_nbr_intersection_offsets.size()), - raft::device_span(combined_nbr_intersection_indices.data(), - combined_nbr_intersection_indices.size())}); + [[maybe_unused]] auto gathered_nbr_intersection_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + handle.get_stream()); + + [[maybe_unused]] auto gathered_nbr_intersection_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), + handle.get_stream()); + + if constexpr (!std::is_same_v) { + device_multicast_sendrecv(minor_comm, + rx_v_pair_nbr_intersection_properties0.begin(), + rx_v_pair_nbr_intersection_index_tx_counts, + tx_displacements, + ranks, + gathered_nbr_intersection_properties0.begin(), + gathered_nbr_intersection_index_rx_counts, + rx_displacements, + ranks, + handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); + + combined_nbr_intersection_properties0.resize(gathered_nbr_intersection_properties0.size(), + handle.get_stream()); + + device_multicast_sendrecv(minor_comm, + rx_v_pair_nbr_intersection_properties1.begin(), + rx_v_pair_nbr_intersection_index_tx_counts, + tx_displacements, + ranks, + gathered_nbr_intersection_properties1.begin(), + gathered_nbr_intersection_index_rx_counts, + rx_displacements, + ranks, + handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); + combined_nbr_intersection_properties1.resize(gathered_nbr_intersection_properties1.size(), + handle.get_stream()); + } + + if constexpr (!std::is_same_v) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), + gatherv_indices_t{ + rx_v_pair_counts[minor_comm_rank], + minor_comm_size, + raft::device_span(gathered_nbr_intersection_offsets.data(), + gathered_nbr_intersection_offsets.size()), + raft::device_span(gathered_nbr_intersection_indices.data(), + gathered_nbr_intersection_indices.size()), + raft::device_span(combined_nbr_intersection_offsets.data(), + combined_nbr_intersection_offsets.size()), + raft::device_span(combined_nbr_intersection_indices.data(), + combined_nbr_intersection_indices.size()), + raft::device_span( + gathered_nbr_intersection_properties0.data(), + gathered_nbr_intersection_properties0.size()), + raft::device_span( + gathered_nbr_intersection_properties1.data(), + gathered_nbr_intersection_properties1.size()), + raft::device_span( + combined_nbr_intersection_properties0.data(), + combined_nbr_intersection_properties0.size()), + raft::device_span( + combined_nbr_intersection_properties1.data(), + combined_nbr_intersection_properties1.size())}); + + } else { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), + gatherv_indices_t{ + rx_v_pair_counts[minor_comm_rank], + minor_comm_size, + raft::device_span(gathered_nbr_intersection_offsets.data(), + gathered_nbr_intersection_offsets.size()), + raft::device_span(gathered_nbr_intersection_indices.data(), + gathered_nbr_intersection_indices.size()), + raft::device_span(combined_nbr_intersection_offsets.data(), + combined_nbr_intersection_offsets.size()), + raft::device_span(combined_nbr_intersection_indices.data(), + combined_nbr_intersection_indices.size()) + + }); + } } edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes)); edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); + if constexpr (!std::is_same_v) { + edge_partition_nbr_intersection_property0.push_back( + std::move(combined_nbr_intersection_properties0)); + edge_partition_nbr_intersection_property1.push_back( + std::move(combined_nbr_intersection_properties1)); + } } rmm::device_uvector nbr_intersection_sizes(input_size, handle.get_stream()); @@ -1222,6 +1580,10 @@ nbr_intersection(raft::handle_t const& handle, num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); + if constexpr (!std::is_same_v) { + nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + } size_t size_offset{0}; size_t index_offset{0}; for (size_t i = 0; i < edge_partition_nbr_intersection_sizes.size(); ++i) { @@ -1234,6 +1596,19 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices[i].begin(), edge_partition_nbr_intersection_indices[i].end(), nbr_intersection_indices.begin() + index_offset); + + if constexpr (!std::is_same_v) { + thrust::copy(handle.get_thrust_policy(), + edge_partition_nbr_intersection_property0[i].begin(), + edge_partition_nbr_intersection_property0[i].end(), + nbr_intersection_properties0.begin() + index_offset); + + thrust::copy(handle.get_thrust_policy(), + edge_partition_nbr_intersection_property1[i].begin(), + edge_partition_nbr_intersection_property1[i].end(), + nbr_intersection_properties1.begin() + index_offset); + } + index_offset += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_offsets.resize(nbr_intersection_sizes.size() + size_t{1}, handle.get_stream()); @@ -1244,11 +1619,13 @@ nbr_intersection(raft::handle_t const& handle, size_first, size_first + nbr_intersection_sizes.size(), nbr_intersection_offsets.begin() + 1); + } else { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(size_t{0})); + auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0); rmm::device_uvector nbr_intersection_sizes( input_size, handle.get_stream()); // initially store minimum degrees (upper bound for intersection sizes) @@ -1278,6 +1655,21 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); + + optional_property_buffer_view_t optional_nbr_intersection_properties0{}; + optional_property_buffer_view_t optional_nbr_intersection_properties1{}; + + if constexpr (!std::is_same_v) { + nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + + optional_nbr_intersection_properties0 = raft::device_span( + nbr_intersection_properties0.data(), nbr_intersection_properties0.size()); + + optional_nbr_intersection_properties1 = raft::device_span( + nbr_intersection_properties1.data(), nbr_intersection_properties1.size()); + } + if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { thrust::tabulate( handle.get_thrust_policy(), @@ -1288,19 +1680,26 @@ nbr_intersection(raft::handle_t const& handle, decltype(vertex_pair_first), vertex_t, edge_t, + edge_partition_e_input_device_view_t, + optional_property_buffer_view_t, false>{ nullptr, raft::device_span(), raft::device_span(), + optional_property_buffer_view_t{}, nullptr, raft::device_span(), raft::device_span(), + optional_property_buffer_view_t{}, edge_partition, + edge_partition_e_value_input, vertex_pair_first, raft::device_span(nbr_intersection_offsets.data(), nbr_intersection_offsets.size()), raft::device_span(nbr_intersection_indices.data(), nbr_intersection_indices.size()), + optional_nbr_intersection_properties0, + optional_nbr_intersection_properties1, invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); @@ -1314,31 +1713,87 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.end(), detail::not_equal_t{invalid_vertex_id::value}), handle.get_stream()); + + [[maybe_unused]] auto tmp_properties0 = + cugraph::detail::allocate_optional_dataframe_buffer( + tmp_indices.size(), handle.get_stream()); + + [[maybe_unused]] auto tmp_properties1 = + cugraph::detail::allocate_optional_dataframe_buffer( + tmp_indices.size(), handle.get_stream()); + size_t num_copied{0}; size_t num_scanned{0}; + while (num_scanned < nbr_intersection_indices.size()) { size_t this_scan_size = std::min( - size_t{1} << 30, + size_t{1} << 27, static_cast(thrust::distance(nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.end()))); - num_copied += static_cast(thrust::distance( - tmp_indices.begin() + num_copied, - thrust::copy_if(handle.get_thrust_policy(), - nbr_intersection_indices.begin() + num_scanned, - nbr_intersection_indices.begin() + num_scanned + this_scan_size, - tmp_indices.begin() + num_copied, - detail::not_equal_t{invalid_vertex_id::value}))); + if constexpr (std::is_same_v) { + num_copied += static_cast(thrust::distance( + tmp_indices.begin() + num_copied, + thrust::copy_if(handle.get_thrust_policy(), + nbr_intersection_indices.begin() + num_scanned, + nbr_intersection_indices.begin() + num_scanned + this_scan_size, + tmp_indices.begin() + num_copied, + detail::not_equal_t{invalid_vertex_id::value}))); + } else { + auto zipped_itr_to_indices_and_properties_begin = + thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), + nbr_intersection_properties0.begin(), + nbr_intersection_properties1.begin())); + + auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( + tmp_indices.begin(), tmp_properties0.begin(), tmp_properties1.begin())); + + num_copied += static_cast(thrust::distance( + zipped_itr_to_tmps_begin + num_copied, + thrust::copy_if(handle.get_thrust_policy(), + zipped_itr_to_indices_and_properties_begin + num_scanned, + zipped_itr_to_indices_and_properties_begin + num_scanned + this_scan_size, + zipped_itr_to_tmps_begin + num_copied, + [] __device__(auto nbr_p0_p1) { + auto nbr = thrust::get<0>(nbr_p0_p1); + auto p0 = thrust::get<1>(nbr_p0_p1); + auto p1 = thrust::get<2>(nbr_p0_p1); + return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; + }))); + } num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); + if constexpr (!std::is_same_v) { + nbr_intersection_properties0 = std::move(tmp_properties0); + nbr_intersection_properties1 = std::move(tmp_properties1); + } + #else - nbr_intersection_indices.resize( - thrust::distance(nbr_intersection_indices.begin(), - thrust::remove(handle.get_thrust_policy(), - nbr_intersection_indices.begin(), - nbr_intersection_indices.end(), - invalid_vertex_id::value)), - handle.get_stream()); + + if constexpr (std::is_same_v) { + nbr_intersection_indices.resize( + thrust::distance(nbr_intersection_indices.begin(), + thrust::remove(handle.get_thrust_policy(), + nbr_intersection_indices.begin(), + nbr_intersection_indices.end(), + invalid_vertex_id::value)), + handle.get_stream()); + } else { + nbr_intersection_indices.resize( + thrust::distance(zipped_itr_to_indices_and_properties_begin, + thrust::remove_if(handle.get_thrust_policy(), + zipped_itr_to_indices_and_properties_begin, + zipped_itr_to_indices_and_properties_begin + + nbr_intersection_indices.size(), + [] __device__(auto nbr_p0_p1) { + return thrust::get<0>(nbr_p0_p1) == + invalid_vertex_id::value; + })), + handle.get_stream()); + + nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + } #endif thrust::inclusive_scan(handle.get_thrust_policy(), @@ -1349,7 +1804,16 @@ nbr_intersection(raft::handle_t const& handle, // 5. Return - return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices)); + if constexpr (std::is_same_v) { + return std::make_tuple(std::move(nbr_intersection_offsets), + std::move(nbr_intersection_indices)); + + } else { + return std::make_tuple(std::move(nbr_intersection_offsets), + std::move(nbr_intersection_indices), + std::move(nbr_intersection_properties0), + std::move(nbr_intersection_properties1)); + } } } // namespace detail diff --git a/cpp/src/prims/detail/optional_dataframe_buffer.hpp b/cpp/src/prims/detail/optional_dataframe_buffer.hpp new file mode 100644 index 00000000000..dd40e6932e4 --- /dev/null +++ b/cpp/src/prims/detail/optional_dataframe_buffer.hpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { + +namespace detail { + +// we cannot use thrust::iterator_traits::value_type if Iterator is void* (reference to +// void is not allowed) +template +struct optional_dataframe_buffer_value_type_t; + +template +struct optional_dataframe_buffer_value_type_t>> { + using value = typename thrust::iterator_traits::value_type; +}; + +template +struct optional_dataframe_buffer_value_type_t>> { + using value = void; +}; + +template >* = nullptr> +std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) +{ + return std::byte{0}; // dummy +} + +template >* = nullptr> +auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) +{ + return allocate_dataframe_buffer(size, stream); +} + +template >* = nullptr> +void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer) +{ + return static_cast(nullptr); +} + +template >* = nullptr> +auto get_optional_dataframe_buffer_begin( + std::add_lvalue_reference_t( + size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer) +{ + return get_dataframe_buffer_begin(optional_dataframe_buffer); +} + +template >* = nullptr> +void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, + size_t new_buffer_size, + rmm::cuda_stream_view stream_view) +{ + return; +} + +template >* = nullptr> +void resize_optional_dataframe_buffer( + std::add_lvalue_reference_t( + size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, + size_t new_buffer_size, + rmm::cuda_stream_view stream_view) +{ + return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view); +} + +template >* = nullptr> +void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, + rmm::cuda_stream_view stream_view) +{ + return; +} + +template >* = nullptr> +void shrink_to_fit_optional_dataframe_buffer( + std::add_lvalue_reference_t( + size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer, + rmm::cuda_stream_view stream_view) +{ + return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); +} +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index d69bb8af25e..640c3c04bfd 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include @@ -97,6 +97,7 @@ struct indirection_compare_less_t { template ::value_type; + using edge_property_value_t = + typename thrust::iterator_traits::value_type; auto index = *(major_minor_pair_index_first + i); auto pair = *(major_minor_pair_first + index); @@ -128,6 +133,25 @@ struct call_intersection_op_t { auto intersection = raft::device_span( nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); + std::conditional_t, + raft::device_span, + std::byte /* dummy */> + properties0{}; + + std::conditional_t, + raft::device_span, + std::byte /* dummy */> + properties1{}; + + if constexpr (!std::is_same_v) { + properties0 = raft::device_span( + nbr_intersection_properties0 + nbr_offsets[i], + nbr_intersection_properties0 + +nbr_offsets[i + 1]); + properties1 = raft::device_span( + nbr_intersection_properties1 + nbr_offsets[i], + nbr_intersection_properties1 + +nbr_offsets[i + 1]); + } + property_t src_prop{}; property_t dst_prop{}; if (unique_vertices) { @@ -149,8 +173,9 @@ struct call_intersection_op_t { src_prop = *(vertex_property_first + src_offset); dst_prop = *(vertex_property_first + dst_offset); } + *(major_minor_pair_value_output_first + index) = - intersection_op(src, dst, src_prop, dst_prop, intersection); + intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1); } }; @@ -165,7 +190,8 @@ struct call_intersection_op_t { * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexPairIterator Type of the iterator for input vertex pairs. - * @tparam VertexValueInputWrapper Type of the wrapper for vertex property values. + * @tparam VertexValueInputIterator Type of the iterator for vertex property values. + * @tparam EdgeValueInputIterator Type of the iterator for edge property values. * @tparam IntersectionOp Type of the quinary per intersection operator. * @tparam VertexPairValueOutputIterator Type of the iterator for vertex pair output property * variables. @@ -176,6 +202,10 @@ struct call_intersection_op_t { * @param vertex_pair_last Iterator pointing to the last (exclusive) input vertex pair. * @param vertex_src_value_input Wrapper used to access vertex input property values (for the * vertices assigned to this process in multi-GPU). + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p intersection_op + * needs to access edge property values) or cugraph::edge_dummy_property_t::view() (if @p + * intersection_op does not access edge property values). * @param intersection_op quinary operator takes first vertex of the pair, second vertex of the * pair, property values for the first vertex, property values for the second vertex, and a list of * vertices in the intersection of the first & second vertices' destination neighbors and returns an @@ -188,11 +218,13 @@ struct call_intersection_op_t { template void per_v_pair_transform_dst_nbr_intersection( raft::handle_t const& handle, GraphViewType const& graph_view, + EdgeValueInputIterator edge_value_input, VertexPairIterator vertex_pair_first, VertexPairIterator vertex_pair_last, VertexValueInputIterator vertex_value_input_first, @@ -205,7 +237,8 @@ void per_v_pair_transform_dst_nbr_intersection( using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using property_t = typename thrust::iterator_traits::value_type; - using result_t = typename thrust::iterator_traits::value_type; + using edge_property_value_t = typename EdgeValueInputIterator::value_type; + using result_t = typename thrust::iterator_traits::value_type; CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -344,16 +377,40 @@ void per_v_pair_transform_dst_nbr_intersection( // FIXME: better restrict detail::nbr_intersection input vertex pairs to a single edge // partition? This may provide additional performance improvement opportunities??? + auto chunk_vertex_pair_first = thrust::make_transform_iterator( chunk_vertex_pair_index_first, detail::indirection_t{vertex_pair_first}); - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - graph_view, - chunk_vertex_pair_first, - chunk_vertex_pair_first + this_chunk_size, - std::array{true, true}, - do_expensive_check); + + rmm::device_uvector intersection_offsets(size_t{0}, handle.get_stream()); + rmm::device_uvector intersection_indices(size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties0( + size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties1( + size_t{0}, handle.get_stream()); + + if constexpr (!std::is_same_v) { + std::tie(intersection_offsets, + intersection_indices, + r_nbr_intersection_properties0, + r_nbr_intersection_properties1) = + detail::nbr_intersection(handle, + graph_view, + edge_value_input, + chunk_vertex_pair_first, + chunk_vertex_pair_first + this_chunk_size, + std::array{true, true}, + do_expensive_check); + } else { + std::tie(intersection_offsets, intersection_indices) = + detail::nbr_intersection(handle, + graph_view, + edge_value_input, + chunk_vertex_pair_first, + chunk_vertex_pair_first + this_chunk_size, + std::array{true, true}, + do_expensive_check); + } if (unique_vertices) { auto vertex_value_input_for_unique_vertices_first = @@ -362,38 +419,45 @@ void per_v_pair_transform_dst_nbr_intersection( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t{ - edge_partition, - thrust::make_optional>((*unique_vertices).data(), - (*unique_vertices).size()), - vertex_value_input_for_unique_vertices_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + detail::call_intersection_op_t< + GraphViewType, + decltype(vertex_value_input_for_unique_vertices_first), + typename decltype(r_nbr_intersection_properties0)::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{edge_partition, + thrust::make_optional>( + (*unique_vertices).data(), (*unique_vertices).size()), + vertex_value_input_for_unique_vertices_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_properties0.data(), + r_nbr_intersection_properties1.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } else { thrust::for_each(handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t{ + detail::call_intersection_op_t< + GraphViewType, + VertexValueInputIterator, + typename decltype(r_nbr_intersection_properties0)::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ edge_partition, thrust::optional>{thrust::nullopt}, vertex_value_input_first, intersection_op, intersection_offsets.data(), intersection_indices.data(), + r_nbr_intersection_properties0.data(), + r_nbr_intersection_properties1.data(), chunk_vertex_pair_index_first, vertex_pair_first, vertex_pair_value_output_first}); diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index b5cfdf4b16b..f773a102959 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -65,6 +65,7 @@ struct compute_chunk_id_t { template struct call_intersection_op_t { @@ -77,6 +78,8 @@ struct call_intersection_op_t { IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; typename GraphViewType::vertex_type const* nbr_indices{nullptr}; + EdgeValueInputIterator nbr_intersection_properties0{nullptr}; + EdgeValueInputIterator nbr_intersection_properties1{nullptr}; VertexPairIterator major_minor_pair_first{}; __device__ auto operator()(size_t i) const @@ -342,6 +345,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, + cugraph::edge_dummy_property_t{}.view(), chunk_vertex_pair_first, chunk_vertex_pair_first + this_chunk_size, std::array{true, true}, @@ -362,6 +366,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( detail::call_intersection_op_t{ edge_partition, @@ -370,6 +375,8 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( intersection_op, intersection_offsets.data(), intersection_indices.data(), + nullptr, + nullptr, chunk_vertex_pair_first}); rmm::device_uvector endpoint_vertices(size_t{0}, handle.get_stream()); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e91b7e71537..da1e0e50919 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -384,6 +384,10 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo # - SIMILARITY tests ------------------------------------------------------------------------------ ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cpp) +################################################################################################### +# - WEIGHTED_SIMILARITY tests ------------------------------------------------------------------------------ +ConfigureTest(WEIGHTED_SIMILARITY_TEST link_prediction/weighted_similarity_test.cpp) + ################################################################################################### # - RANDOM_WALKS tests ---------------------------------------------------------------------------- # FIXME: Rename to random_walks_test.cu once the legacy implementation is deleted @@ -627,6 +631,12 @@ if(BUILD_CUGRAPH_MG_TESTS) prims/mg_per_v_pair_transform_dst_nbr_intersection.cu) target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST PRIVATE cuco::cuco) + ############################################################################################### + # - MG PRIMS PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION tests ------------------------- + ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST + prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu) + target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST PRIVATE cuco::cuco) + ############################################################################################### # - MG NBR SAMPLING tests --------------------------------------------------------------------- ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cu) @@ -636,6 +646,10 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG RANDOM_WALKS tests --------------------------------------------------------------------- ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp) + ############################################################################################### + # - MG WEIGHTED_SIMILARITY tests ----------------------------------------------------------------------- + ConfigureTestMG(MG_WEIGHTED_SIMILARITY_TEST link_prediction/mg_weighted_similarity_test.cpp) + ############################################################################################### # - MG SIMILARITY tests ----------------------------------------------------------------------- ConfigureTestMG(MG_SIMILARITY_TEST link_prediction/mg_similarity_test.cpp) diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp new file mode 100644 index 00000000000..cf3179d51a3 --- /dev/null +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +struct Weighted_Similarity_Usecase { + bool use_weights{true}; + size_t max_seeds{std::numeric_limits::max()}; + bool check_correctness{true}; +}; + +template +class Tests_MGSimilarity + : public ::testing::TestWithParam> { + public: + Tests_MGSimilarity() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test( + std::tuple param, + test_functor_t const& test_functor) + { + auto [similarity_usecase, input_usecase] = param; + HighResTimer hr_timer{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + auto [mg_graph, mg_edge_weights, d_mg_renumber_map_labels] = + cugraph::test::construct_graph( + *handle_, input_usecase, true, true, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 2. run similarity + + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; + + rmm::device_uvector d_start_vertices( + std::min( + static_cast(mg_graph_view.local_vertex_partition_range_size()), + similarity_usecase.max_seeds / comm_size + + (static_cast(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)), + handle_->get_stream()); + + cugraph::test::populate_vertex_ids( + *handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first()); + + auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs( + *handle_, + mg_graph_view, + raft::device_span(d_start_vertices.data(), d_start_vertices.size()), + 2); + + auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices); + auto h_offsets = cugraph::test::to_host(*handle_, d_offsets); + + std::vector h_v1(h_offsets.back()); + for (size_t i = 0; i < h_start_vertices.size(); ++i) { + std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]); + } + + auto d_v1 = cugraph::test::to_device(*handle_, h_v1); + auto d_v2 = std::move(two_hop_nbrs); + + std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = + cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< + vertex_t, + edge_t, + weight_t, + int32_t>(*handle_, + std::move(d_v1), + std::move(d_v2), + std::nullopt, + std::nullopt, + std::nullopt, + mg_graph_view.vertex_partition_range_lasts()); + + std::tuple, raft::device_span> vertex_pairs{ + {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG similarity test"); + } + + auto result_score = test_functor.run( + *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. compare SG & MG results + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = + cugraph::test::graph_to_host_coo(*handle_, mg_graph_view, mg_edge_weight_view); + + d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size()); + d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size()); + result_score = + cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size()); + + if (d_v1.size() > 0) { + auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1); + auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2); + auto h_result_score = cugraph::test::to_host(*handle_, result_score); + + if (wgt && similarity_usecase.use_weights) { + weighted_similarity_compare(mg_graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair1, h_vertex_pair2), + h_result_score, + test_functor); + } else { + similarity_compare(mg_graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair1, h_vertex_pair2), + h_result_score, + test_functor); + } + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGSimilarity::handle_ = nullptr; + +using Tests_MGWeightedSimilarity_File = Tests_MGSimilarity; +using Tests_MGWeightedSimilarity_Rmat = Tests_MGSimilarity; + +TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatFloatJaccard) +{ + auto param = GetParam(); + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGWeightedSimilarity_File, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Weighted_Similarity_Usecase{true, 20, true}, + //: Weighted_Similarity_Usecase{false, 20, true}), + ::testing::Values(Weighted_Similarity_Usecase{true, 20, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGWeightedSimilarity_Rmat, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Weighted_Similarity_Usecase{true, 20, true}, + // Weighted_Similarity_Usecase{false, 20, true}), + ::testing::Values(Weighted_Similarity_Usecase{true, 20, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGWeightedSimilarity_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + ::testing::Values(Weighted_Similarity_Usecase{true, 20, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_prediction/similarity_compare.cpp b/cpp/tests/link_prediction/similarity_compare.cpp index f005b4ddcef..b39ee983fa7 100644 --- a/cpp/tests/link_prediction/similarity_compare.cpp +++ b/cpp/tests/link_prediction/similarity_compare.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,6 +37,159 @@ struct intersection_count_t { namespace cugraph { namespace test { +template +void weighted_similarity_compare( + vertex_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& similarity_score, + test_t const& test_functor) +{ + auto& [graph_src, graph_dst, graph_wgt] = edge_list; + auto& [v1, v2] = vertex_pairs; + + auto compare_pairs = [](thrust::tuple lhs, + thrust::tuple rhs) { + return ((thrust::get<0>(lhs) < thrust::get<0>(rhs)) || + ((thrust::get<0>(lhs) == thrust::get<0>(rhs)) && + (thrust::get<1>(lhs) < thrust::get<1>(rhs)))); + }; + + std::sort(thrust::make_zip_iterator(graph_src.begin(), graph_dst.begin(), (*graph_wgt).begin()), + thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()), + compare_pairs); + + std::vector vertex_degrees(static_cast(num_vertices), size_t{0}); + std::vector weighted_vertex_degrees(static_cast(num_vertices), weight_t{0}); + + std::for_each( + graph_src.begin(), graph_src.end(), [&vertex_degrees](auto v) { ++vertex_degrees[v]; }); + + std::for_each( + thrust::make_zip_iterator(graph_src.begin(), graph_dst.begin(), (*graph_wgt).begin()), + thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()), + [&weighted_vertex_degrees](thrust::tuple src_dst_wgt) { + auto src = thrust::get<0>(src_dst_wgt); + auto dst = thrust::get<1>(src_dst_wgt); + auto wgt = thrust::get<2>(src_dst_wgt); + + weighted_vertex_degrees[src] += wgt / weight_t{2}; + weighted_vertex_degrees[dst] += wgt / weight_t{2}; + }); + + auto compare_functor = cugraph::test::nearly_equal{ + weight_t{1e-3}, weight_t{(weight_t{1} / static_cast(num_vertices)) * weight_t{1e-3}}}; + + if (graph_wgt) { + assert(true); + } else { + assert(false); + } + + auto graph_wgt_first = (*graph_wgt).begin(); + std::for_each( + thrust::make_zip_iterator(v1.begin(), v2.begin(), similarity_score.begin()), + thrust::make_zip_iterator(v1.end(), v2.end(), similarity_score.end()), + [compare_functor, + test_functor, + &vertex_degrees, + &weighted_vertex_degrees, + &graph_src, + &graph_dst, + &graph_wgt_first](auto tuple) { + auto v1 = thrust::get<0>(tuple); + auto v2 = thrust::get<1>(tuple); + auto score = thrust::get<2>(tuple); + + auto v1_begin = + std::distance(graph_src.begin(), std::lower_bound(graph_src.begin(), graph_src.end(), v1)); + auto v1_end = + std::distance(graph_src.begin(), std::upper_bound(graph_src.begin(), graph_src.end(), v1)); + + auto v2_begin = + std::distance(graph_src.begin(), std::lower_bound(graph_src.begin(), graph_src.end(), v2)); + auto v2_end = + std::distance(graph_src.begin(), std::upper_bound(graph_src.begin(), graph_src.end(), v2)); + + std::vector intersection(std::min((v1_end - v1_begin), (v2_end - v2_begin))); + + auto intersection_end = std::set_intersection(graph_dst.begin() + v1_begin, + graph_dst.begin() + v1_end, + graph_dst.begin() + v2_begin, + graph_dst.begin() + v2_end, + intersection.begin()); + + auto intersection_size = + static_cast(std::distance(intersection.begin(), intersection_end)); + + std::vector intersected_weights_v1(static_cast(intersection_size), + weight_t{0}); + + std::vector intersected_weights_v2(static_cast(intersection_size), + weight_t{0}); + + int intersected_weight_idx = 0; + + std::for_each( + intersection.begin(), + intersection_end, + [&graph_dst, + &graph_wgt_first, + &v1_begin, + &v1_end, + &v2_begin, + &v2_end, + &intersected_weights_v1, + &intersected_weights_v2, + &intersected_weight_idx](auto inbr) { + auto lower = + std::lower_bound(graph_dst.begin() + v1_begin, graph_dst.begin() + v1_end, inbr); + auto offset = std::distance(graph_dst.begin() + v1_begin, lower); + + intersected_weights_v1[intersected_weight_idx] = + static_cast(graph_wgt_first[v1_begin + offset]); + + lower = std::lower_bound(graph_dst.begin() + v2_begin, graph_dst.begin() + v2_end, inbr); + + offset = std::distance(graph_dst.begin() + v2_begin, lower); + + intersected_weights_v2[intersected_weight_idx] = + static_cast(graph_wgt_first[v2_begin + offset]); + + ++intersected_weight_idx; + }); + + weight_t sum_intersected_weights_v1 = + std::accumulate(intersected_weights_v1.begin(), intersected_weights_v1.end(), 0.0); + weight_t sum_intersected_weights_v2 = + std::accumulate(intersected_weights_v2.begin(), intersected_weights_v2.end(), 0.0); + + weight_t sum_of_uniq_weights_v1 = weighted_vertex_degrees[v1] - sum_intersected_weights_v1; + weight_t sum_of_uniq_weights_v2 = weighted_vertex_degrees[v2] - sum_intersected_weights_v2; + + weight_t min_weight_v1_intersect_v2 = weight_t{0}; + weight_t max_weight_v1_intersect_v2 = weight_t{0}; + + std::for_each( + thrust::make_zip_iterator(intersected_weights_v1.begin(), intersected_weights_v2.begin()), + thrust::make_zip_iterator(intersected_weights_v1.end(), intersected_weights_v2.end()), + [&min_weight_v1_intersect_v2, + &max_weight_v1_intersect_v2](thrust::tuple w1_w2) { + min_weight_v1_intersect_v2 += std::min(thrust::get<0>(w1_w2), thrust::get<1>(w1_w2)); + max_weight_v1_intersect_v2 += std::max(thrust::get<0>(w1_w2), thrust::get<1>(w1_w2)); + }); + + max_weight_v1_intersect_v2 += (sum_of_uniq_weights_v1 + sum_of_uniq_weights_v2); + auto expected_score = test_functor.compute_score(weighted_vertex_degrees[v1], + weighted_vertex_degrees[v2], + min_weight_v1_intersect_v2, + max_weight_v1_intersect_v2); + EXPECT_TRUE(compare_functor(score, expected_score)) + << "score mismatch, got " << score << ", expected " << expected_score; + }); +} + template void similarity_compare( vertex_t num_vertices, @@ -96,9 +249,11 @@ void similarity_compare( intersection.begin()); auto expected_score = test_functor.compute_score( - vertex_degrees[v1], - vertex_degrees[v2], - static_cast(std::distance(intersection.begin(), intersection_end))); + static_cast(vertex_degrees[v1]), + static_cast(vertex_degrees[v2]), + static_cast(std::distance(intersection.begin(), intersection_end)), + static_cast(vertex_degrees[v1] + vertex_degrees[v2] - + std::distance(intersection.begin(), intersection_end))); EXPECT_TRUE(compare_functor(score, expected_score)) << "score mismatch, got " << score << ", expected " << expected_score; @@ -153,5 +308,55 @@ template void similarity_compare( std::vector& result_score, test_overlap_t const& test_functor); +//// + +template void weighted_similarity_compare( + int32_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_jaccard_t const& test_functor); + +template void weighted_similarity_compare( + int32_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_sorensen_t const& test_functor); + +template void weighted_similarity_compare( + int32_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_overlap_t const& test_functor); + +template void weighted_similarity_compare( + int64_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_jaccard_t const& test_functor); + +template void weighted_similarity_compare( + int64_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_sorensen_t const& test_functor); + +template void weighted_similarity_compare( + int64_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& result_score, + test_overlap_t const& test_functor); + } // namespace test } // namespace cugraph diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp index 0fbb3b40b39..5c312a768d0 100644 --- a/cpp/tests/link_prediction/similarity_compare.hpp +++ b/cpp/tests/link_prediction/similarity_compare.hpp @@ -29,10 +29,17 @@ struct test_jaccard_t { std::string testname{"Jaccard"}; template - weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const + weight_t compute_score(weight_t weight_a, + weight_t weight_b, + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return static_cast(intersection_count) / - static_cast(u_size + v_size - intersection_count); + if (std::abs(static_cast(weight_a_union_b) - double{0}) < + double{2} / std::numeric_limits::max()) { + return weight_t{0}; + } else { + return weight_a_intersect_b / weight_a_union_b; + } } template @@ -51,9 +58,17 @@ struct test_sorensen_t { std::string testname{"Sorensen"}; template - weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const + weight_t compute_score(weight_t weight_a, + weight_t weight_b, + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return static_cast(2 * intersection_count) / static_cast(u_size + v_size); + if (std::abs(static_cast(weight_a_union_b) - double{0}) < + double{2} / std::numeric_limits::max()) { + return weight_t{0}; + } else { + return (2 * weight_a_intersect_b) / (weight_a + weight_b); + } } template @@ -72,10 +87,17 @@ struct test_overlap_t { std::string testname{"Overlap"}; template - weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const + weight_t compute_score(weight_t weight_a, + weight_t weight_b, + weight_t weight_a_intersect_b, + weight_t weight_a_union_b) const { - return static_cast(intersection_count) / - static_cast(std::min(u_size, v_size)); + if (std::abs(static_cast(weight_a_union_b) - double{0}) < + double{2} / std::numeric_limits::max()) { + return weight_t{0}; + } else { + return weight_a_intersect_b / std::min(weight_a, weight_b); + } } template @@ -99,5 +121,13 @@ void similarity_compare( std::vector& similarity_score, test_t const& test_functor); +template +void weighted_similarity_compare( + vertex_t num_vertices, + std::tuple&, std::vector&, std::optional>&> + edge_list, + std::tuple&, std::vector&> vertex_pairs, + std::vector& similarity_score, + test_t const& test_functor); } // namespace test } // namespace cugraph diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp new file mode 100644 index 00000000000..ca644b76c5a --- /dev/null +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +struct Similarity_Usecase { + bool use_weights{false}; + bool check_correctness{true}; + size_t max_seeds{std::numeric_limits::max()}; + size_t max_vertex_pairs_to_check{std::numeric_limits::max()}; +}; + +template +class Tests_Similarity + : public ::testing::TestWithParam> { + public: + Tests_Similarity() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param, + test_functor_t const& test_functor) + { + constexpr bool renumber = true; + auto [similarity_usecase, input_usecase] = param; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + // 2. create SG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, similarity_usecase.use_weights, renumber, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. run similarity + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Similarity test"); + } + + // + // FIXME: Don't currently have an MG implementation of 2-hop neighbors. + // For now we'll do that on the CPU (really slowly, so keep max_seed + // small) + // + rmm::device_uvector d_v1(0, handle.get_stream()); + rmm::device_uvector d_v2(0, handle.get_stream()); + + { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t max_vertices = std::min(static_cast(graph_view.number_of_vertices()), + similarity_usecase.max_seeds); + std::vector h_v1; + std::vector h_v2; + std::vector one_hop_v1; + std::vector one_hop_v2; + + for (size_t seed = 0; seed < max_vertices; ++seed) { + std::for_each(thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end()), + [&one_hop_v1, &one_hop_v2, seed](auto t) { + auto u = thrust::get<0>(t); + auto v = thrust::get<1>(t); + if (u == seed) { + one_hop_v1.push_back(u); + one_hop_v2.push_back(v); + } + }); + } + + std::for_each(thrust::make_zip_iterator(one_hop_v1.begin(), one_hop_v2.begin()), + thrust::make_zip_iterator(one_hop_v1.end(), one_hop_v2.end()), + [&](auto t1) { + auto seed = thrust::get<0>(t1); + auto neighbor = thrust::get<1>(t1); + std::for_each(thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end()), + [&](auto t2) { + auto u = thrust::get<0>(t2); + auto v = thrust::get<1>(t2); + if (u == neighbor) { + h_v1.push_back(seed); + h_v2.push_back(v); + } + }); + }); + + std::sort(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), + thrust::make_zip_iterator(h_v1.end(), h_v2.end())); + + auto end_iter = std::unique(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), + thrust::make_zip_iterator(h_v1.end(), h_v2.end()), + [](auto t1, auto t2) { + return (thrust::get<0>(t1) == thrust::get<0>(t2)) && + (thrust::get<1>(t1) == thrust::get<1>(t2)); + }); + + h_v1.resize( + thrust::distance(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), end_iter)); + h_v2.resize(h_v1.size()); + + d_v1.resize(h_v1.size(), handle.get_stream()); + d_v2.resize(h_v2.size(), handle.get_stream()); + + raft::update_device(d_v1.data(), h_v1.data(), h_v1.size(), handle.get_stream()); + raft::update_device(d_v2.data(), h_v2.data(), h_v2.size(), handle.get_stream()); + } + + // FIXME: Need to add some tests that specify actual vertex pairs + // FIXME: Need to a variation that calls call the two hop neighbors function + // FIXME: Debugging state as of EOD 9/28: + // 1) Tested case of no vertex pairs... works great :-) + // 2) Don't have a 2-hop on GPU yet. Perhaps write a 2-hop on CPU + // for now? We could then use that for testing the 2-hop function + // later. + std::tuple, raft::device_span> vertex_pairs{ + {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}}; + + auto result_score = test_functor.run( + handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t check_size = std::min(d_v1.size(), similarity_usecase.max_vertex_pairs_to_check); + + // + // FIXME: Need to reorder here. thrust::shuffle on the tuples (vertex_pairs_1, + // vertex_pairs_2, result_score) would + // be sufficient. + // + std::vector h_vertex_pair_1(check_size); + std::vector h_vertex_pair_2(check_size); + std::vector h_result_score(check_size); + + raft::update_host( + h_vertex_pair_1.data(), std::get<0>(vertex_pairs).data(), check_size, handle.get_stream()); + raft::update_host( + h_vertex_pair_2.data(), std::get<1>(vertex_pairs).data(), check_size, handle.get_stream()); + raft::update_host( + h_result_score.data(), result_score.data(), check_size, handle.get_stream()); + + if (wgt && similarity_usecase.use_weights) { + weighted_similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } else { + similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } + } + } +}; + +using Tests_Similarity_File = Tests_Similarity; +using Tests_Similarity_Rmat = Tests_Similarity; + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Similarity_File, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20, + //: 100}), + ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Similarity_Rmat, + ::testing::Combine( + // enable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{true, true, 20, 100}, + //: Similarity_Usecase{false,true,20,100}), + ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with + --gtest_filter to select only the file_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one File_Usecase that differ only in filename + (to avoid running same benchmarks more than once) */ + Tests_Similarity_File, + ::testing::Combine( + // disable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Similarity_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 0ff0a041a71..a7cd8a989b0 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -50,7 +50,10 @@ struct intersection_op_t { vertex_t v1, edge_t v0_prop /* out degree */, edge_t v1_prop /* out degree */, - raft::device_span intersection) const + raft::device_span intersection, + std::byte, /* dummy */ + std::byte /* dummy */ + ) const { return thrust::make_tuple(v0_prop + v1_prop, static_cast(intersection.size())); } @@ -160,6 +163,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, + cugraph::edge_dummy_property_t{}.view(), cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), mg_out_degrees.begin(), @@ -227,6 +231,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, sg_graph_view, + cugraph::edge_dummy_property_t{}.view(), cugraph::get_dataframe_buffer_begin( mg_aggregate_vertex_pair_buffer /* now unrenumbered */), cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered */), @@ -324,9 +329,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::Values(Prims_Usecase{size_t{1024}, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), - cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), - cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGPerVPairTransformDstNbrIntersection_Rmat, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu new file mode 100644 index 00000000000..3b6a6b9c4c5 --- /dev/null +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +template +struct intersection_op_t { + __device__ thrust::tuple operator()( + vertex_t a, + vertex_t b, + weight_t weight_a /* weighted out degree */, + weight_t weight_b /* weighted out degree */, + raft::device_span intersection, + raft::device_span intersected_properties_a, + raft::device_span intersected_properties_b) const + { + weight_t min_weight_a_intersect_b = weight_t{0}; + weight_t max_weight_a_intersect_b = weight_t{0}; + weight_t sum_of_intersected_a = weight_t{0}; + weight_t sum_of_intersected_b = weight_t{0}; + + for (size_t k = 0; k < intersection.size(); k++) { + min_weight_a_intersect_b += min(intersected_properties_a[k], intersected_properties_b[k]); + max_weight_a_intersect_b += max(intersected_properties_a[k], intersected_properties_b[k]); + sum_of_intersected_a += intersected_properties_a[k]; + sum_of_intersected_b += intersected_properties_b[k]; + } + + weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; + weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b; + + max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b; + + return thrust::make_tuple(min_weight_a_intersect_b, max_weight_a_intersect_b); + } +}; + +struct Prims_Usecase { + size_t num_vertex_pairs{0}; + bool check_correctness{true}; +}; + +template +class Tests_MGPerVPairTransformDstNbrIntersection + : public ::testing::TestWithParam> { + public: + Tests_MGPerVPairTransformDstNbrIntersection() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResTimer hr_timer{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + constexpr bool store_transposed = false; + constexpr bool multi_gpu = true; + + cugraph::graph_t mg_graph(*handle_); + std::optional< + cugraph::edge_property_t, + weight_t>> + mg_edge_weight{std::nullopt}; + + std::optional> mg_renumber_map{std::nullopt}; + + constexpr bool test_weighted = true; + constexpr bool renumber = true; + constexpr bool drop_self_loops = false; + constexpr bool drop_multi_edges = true; + + std::tie(mg_graph, mg_edge_weight, mg_renumber_map) = + cugraph::test::construct_graph( + *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges); + + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = (*mg_edge_weight).view(); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive + + ASSERT_TRUE( + mg_graph_view.number_of_vertices() > + vertex_t{0}); // the code below to generate vertex pairs is invalid for an empty graph. + + auto mg_vertex_pair_buffer = + cugraph::allocate_dataframe_buffer>( + prims_usecase.num_vertex_pairs / comm_size + + (static_cast(comm_rank) < prims_usecase.num_vertex_pairs % comm_size ? 1 : 0), + handle_->get_stream()); + + thrust::tabulate( + handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), + [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) { + cuco::detail::MurmurHash3_32 + hash_func{}; // use hash_func to generate arbitrary vertex pairs + auto v0 = static_cast(hash_func(i + comm_rank) % num_vertices); + auto v1 = static_cast(hash_func(i + num_vertices + comm_rank) % num_vertices); + return thrust::make_tuple(v0, v1); + }); + + auto h_vertex_partition_range_lasts = mg_graph_view.vertex_partition_range_lasts(); + std::tie(std::get<0>(mg_vertex_pair_buffer), + std::get<1>(mg_vertex_pair_buffer), + std::ignore, + std::ignore, + std::ignore) = + cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< + vertex_t, + edge_t, + weight_t, + int32_t>(*handle_, + std::move(std::get<0>(mg_vertex_pair_buffer)), + std::move(std::get<1>(mg_vertex_pair_buffer)), + std::nullopt, + std::nullopt, + std::nullopt, + h_vertex_partition_range_lasts); + + auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); + auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG per_v_pair_transform_dst_nbr_intersection"); + } + + rmm::device_uvector mg_out_weight_sums = + compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view); + + cugraph::per_v_pair_transform_dst_nbr_intersection( + *handle_, + mg_graph_view, + mg_edge_weight_view, + cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer), + cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer), + mg_out_weight_sums.begin(), + intersection_op_t{}, + cugraph::get_dataframe_buffer_begin(mg_result_buffer)); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. validate MG results + + if (prims_usecase.check_correctness) { + cugraph::unrenumber_int_vertices( + *handle_, + std::get<0>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + (*mg_renumber_map).data(), + h_vertex_partition_range_lasts); + cugraph::unrenumber_int_vertices( + *handle_, + std::get<1>(mg_vertex_pair_buffer).data(), + cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), + (*mg_renumber_map).data(), + h_vertex_partition_range_lasts); + + auto mg_aggregate_vertex_pair_buffer = + cugraph::allocate_dataframe_buffer>( + 0, handle_->get_stream()); + std::get<0>(mg_aggregate_vertex_pair_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<0>(mg_vertex_pair_buffer).data(), + std::get<0>(mg_vertex_pair_buffer).size()); + std::get<1>(mg_aggregate_vertex_pair_buffer) = + cugraph::test::device_gatherv(*handle_, + std::get<1>(mg_vertex_pair_buffer).data(), + std::get<1>(mg_vertex_pair_buffer).size()); + + auto mg_aggregate_result_buffer = + cugraph::allocate_dataframe_buffer>( + 0, handle_->get_stream()); + std::get<0>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( + *handle_, std::get<0>(mg_result_buffer).data(), std::get<0>(mg_result_buffer).size()); + std::get<1>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv( + *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size()); + + cugraph::graph_t sg_graph(*handle_); + + std::optional< + cugraph::edge_property_t, + weight_t>> + sg_edge_weight{std::nullopt}; + + std::tie(sg_graph, sg_edge_weight, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight + ? std::make_optional(mg_edge_weight_view) + : std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + if (handle_->get_comms().get_rank() == 0) { + auto sg_graph_view = sg_graph.view(); + + auto sg_result_buffer = + cugraph::allocate_dataframe_buffer>( + cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); + + rmm::device_uvector sg_out_weight_sums = + compute_out_weight_sums(*handle_, sg_graph_view, (*sg_edge_weight).view()); + + cugraph::per_v_pair_transform_dst_nbr_intersection( + *handle_, + sg_graph_view, + (*sg_edge_weight).view(), + cugraph::get_dataframe_buffer_begin( + mg_aggregate_vertex_pair_buffer /* now unrenumbered */), + cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered + */), sg_out_weight_sums.begin(), intersection_op_t{}, + cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + bool valid = thrust::equal(handle_->get_thrust_policy(), + cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), + cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), + cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + + ASSERT_TRUE(valid); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr + Tests_MGPerVPairTransformDstNbrIntersection::handle_ = nullptr; + +using Tests_MGPerVPairTransformDstNbrIntersection_File = + Tests_MGPerVPairTransformDstNbrIntersection; +using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = + Tests_MGPerVPairTransformDstNbrIntersection; + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGPerVPairTransformDstNbrIntersection_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{10}, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 1fa869ac2df..0eff8dedc8f 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -154,6 +154,21 @@ read_edgelist_from_csv_file(raft::handle_t const& handle, bool store_transposed, bool multi_gpu); +template +std::tuple, + std::optional< + cugraph::edge_property_t, + weight_t>>, + std::optional>> +read_graph_from_csv_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + // alias for easy customization for debug purposes: // template @@ -521,6 +536,7 @@ mg_graph_to_sg_graph( bool renumber); // Only the rank 0 GPU holds the valid data + template std::tuple>, rmm::device_uvector> mg_vertex_property_values_to_sg_vertex_property_values( From b6de2676368501eaa3973d603e957248b689b951 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Mon, 31 Jul 2023 11:45:45 -0400 Subject: [PATCH 06/10] Fix bug discovered in Jaccard testing (#3758) A customer identified an issue trying to run Jaccard. In MG calls they were seeing failed memory allocation calls. Vertices were being shuffled incorrectly in the C API, so we were getting vertices processed on the wrong GPU, resulting in out-of-bounds memory references. Moved the shuffle before renumbering, which puts vertices to be on proper GPU Closes #3746 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/3758 --- cpp/src/c_api/graph_functions.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index 679b7475ef4..aedb8f8e287 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -146,6 +146,12 @@ struct two_hop_neighbors_functor : public cugraph::c_api::abstract_functor { start_vertices_->size_, handle_.get_stream()); + if constexpr (multi_gpu) { + start_vertices = + cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( + handle_, std::move(start_vertices)); + } + cugraph::renumber_ext_vertices( handle_, start_vertices.data(), @@ -155,11 +161,6 @@ struct two_hop_neighbors_functor : public cugraph::c_api::abstract_functor { graph_view.local_vertex_partition_range_last(), do_expensive_check_); - if constexpr (multi_gpu) { - start_vertices = - cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( - handle_, std::move(start_vertices)); - } } else { start_vertices.resize(graph_view.local_vertex_partition_range_size(), handle_.get_stream()); cugraph::detail::sequence_fill(handle_.get_stream(), From f4627f868e6b0dbc0caffaa710287daa7ba988d1 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 2 Aug 2023 14:14:24 -0500 Subject: [PATCH 07/10] Pin `dask` and `distributed` for `23.08` release (#3761) This PR pins `dask` & `distributed` to `2023.7.1` version for `23.08` release. xref: https://github.com/rapidsai/cudf/pull/13802 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Brad Rees (https://github.com/BradReesWork) - Ray Douglass (https://github.com/raydouglass) - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/cugraph/pull/3761 --- ci/test_wheel_cugraph.sh | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 6 +++--- conda/environments/all_cuda-120_arch-x86_64.yaml | 6 +++--- conda/recipes/cugraph-pyg/meta.yaml | 2 +- conda/recipes/cugraph-service/meta.yaml | 2 +- conda/recipes/cugraph/meta.yaml | 6 +++--- dependencies.yaml | 6 +++--- python/cugraph-service/server/pyproject.toml | 4 ++-- python/cugraph/pyproject.toml | 4 ++-- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/ci/test_wheel_cugraph.sh b/ci/test_wheel_cugraph.sh index 90af5313619..e7175fbde88 100755 --- a/ci/test_wheel_cugraph.sh +++ b/ci/test_wheel_cugraph.sh @@ -9,7 +9,7 @@ RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-whe python -m pip install --no-deps ./local-pylibcugraph-dep/pylibcugraph*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08 +python -m pip install git+https://github.com/dask/dask.git@2023.7.1 git+https://github.com/dask/distributed.git@2023.7.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.08 # Only download test data for x86 arch=$(uname -m) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 16a4d4f0dbc..94dab3814ae 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -18,11 +18,11 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=0.29,<0.30 -- dask-core>=2023.5.1 +- dask-core==2023.7.1 - dask-cuda==23.8.* - dask-cudf==23.8.* -- dask>=2023.5.1 -- distributed>=2023.5.1 +- dask==2023.7.1 +- distributed==2023.7.1 - doxygen - fsspec[http]>=0.6.0 - gcc_linux-64=11.* diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index f053184c93a..4018e9c8b2f 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -18,11 +18,11 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=0.29,<0.30 -- dask-core>=2023.5.1 +- dask-core==2023.7.1 - dask-cuda==23.8.* - dask-cudf==23.8.* -- dask>=2023.5.1 -- distributed>=2023.5.1 +- dask==2023.7.1 +- distributed==2023.7.1 - doxygen - fsspec[http]>=0.6.0 - gcc_linux-64=11.* diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 6ff0fa01c96..0202c5d98c9 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -26,7 +26,7 @@ requirements: - python - scikit-build >=0.13.1 run: - - distributed >=2023.5.1 + - distributed ==2023.7.1 - numba >=0.57 - numpy >=1.21 - python diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml index 4d7f013cac9..d928bae1de4 100644 --- a/conda/recipes/cugraph-service/meta.yaml +++ b/conda/recipes/cugraph-service/meta.yaml @@ -57,7 +57,7 @@ outputs: - cupy >=12.0.0 - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - - distributed >=2023.5.1 + - distributed ==2023.7.1 - numba >=0.57 - numpy >=1.21 - python diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index e2b9d38c181..8ca8773bd01 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -75,9 +75,9 @@ requirements: - cupy >=12.0.0 - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - - dask >=2023.5.1 - - dask-core >=2023.5.1 - - distributed >=2023.5.1 + - dask ==2023.7.1 + - dask-core ==2023.7.1 + - distributed ==2023.7.1 - libcugraph ={{ version }} - pylibcugraph ={{ version }} - pylibraft ={{ minor_version }} diff --git a/dependencies.yaml b/dependencies.yaml index 1af82ef8ebe..e5838ce5a4d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -322,8 +322,8 @@ dependencies: - output_types: [conda, pyproject] packages: - &cudf cudf==23.8.* - - &dask dask>=2023.5.1 - - &distributed distributed>=2023.5.1 + - &dask dask==2023.7.1 + - &distributed distributed==2023.7.1 - &dask_cuda dask-cuda==23.8.* - &dask_cudf dask-cudf==23.8.* - &numba numba>=0.57 @@ -333,7 +333,7 @@ dependencies: - output_types: conda packages: - &cupy cupy>=12.0.0 - - &dask-core dask-core>=2023.5.1 + - &dask-core dask-core==2023.7.1 - libcudf==23.8.* - nccl>=2.9.9 - ucx-proc=*=gpu diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml index 680811512c3..c0f789fea5f 100644 --- a/python/cugraph-service/server/pyproject.toml +++ b/python/cugraph-service/server/pyproject.toml @@ -25,8 +25,8 @@ dependencies = [ "cupy-cuda11x>=12.0.0", "dask-cuda==23.8.*", "dask-cudf==23.8.*", - "dask>=2023.5.1", - "distributed>=2023.5.1", + "dask==2023.7.1", + "distributed==2023.7.1", "numba>=0.57", "numpy>=1.21", "rmm==23.8.*", diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index 8dac14db659..034a38d2c4a 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -33,8 +33,8 @@ dependencies = [ "cupy-cuda11x>=12.0.0", "dask-cuda==23.8.*", "dask-cudf==23.8.*", - "dask>=2023.5.1", - "distributed>=2023.5.1", + "dask==2023.7.1", + "distributed==2023.7.1", "numba>=0.57", "pylibcugraph==23.8.*", "raft-dask==23.8.*", From f6543f635a95c481046c5f3bec777139590705a2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 3 Aug 2023 14:56:56 -0700 Subject: [PATCH 08/10] Change the renumber_sampled_edgelist function behavior. (#3762) There was a misalignment between the `renumber_sampled_edgelist` function behavior and what PyG and DGL need. This PR fixes this. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/3762 --- cpp/include/cugraph/graph_functions.hpp | 25 +- cpp/src/c_api/uniform_neighbor_sampling.cpp | 2 +- .../renumber_sampled_edgelist_impl.cuh | 680 +++++++++--------- .../sampling/renumber_sampled_edgelist_sg.cu | 4 +- .../renumber_sampled_edgelist_test.cu | 245 ++++--- 5 files changed, 521 insertions(+), 435 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index caffef60076..200ee725b7a 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -922,15 +922,16 @@ rmm::device_uvector select_random_vertices( * This function renumbers sampling function (e.g. uniform_neighbor_sample) outputs satisfying the * following requirements. * - * 1. Say @p edgelist_srcs has N unique vertices. These N unique vertices will be mapped to [0, N). - * 2. Among the N unique vertices, an original vertex with a smaller attached hop number will be - * renumbered to a smaller vertex ID than any other original vertices with a larger attached hop - * number (if @p edgelist_hops.has_value() is true). If a single vertex is attached to multiple hop - * numbers, the minimum hop number is used. - * 3. Say @p edgelist_dsts has M unique vertices that appear only in @p edgelist_dsts (the set of M - * unique vertices does not include any vertices that appear in @p edgelist_srcs). Then, these M - * unique vertices will be mapped to [N, N + M). - * 4. If label_offsets.has_value() is ture, edge lists for different labels will be renumbered + * 1. If @p edgelist_hops is valid, we can consider (vertex ID, flag=src, hop) triplets for each + * vertex ID in @p edgelist_srcs and (vertex ID, flag=dst, hop) triplets for each vertex ID in @p + * edgelist_dsts. From these triplets, we can find the minimum (hop, flag) pairs for every unique + * vertex ID (hop is the primary key and flag is the secondary key, flag=src is considered smaller + * than flag=dst if hop numbers are same). Vertex IDs with smaller (hop, flag) pairs precede vertex + * IDs with larger (hop, flag) pairs in renumbering. Ordering can be arbitrary among the vertices + * with the same (hop, flag) pairs. + * 2. If @p edgelist_hops is invalid, unique vertex IDs in @p edgelist_srcs precede vertex IDs that + * appear only in @p edgelist_dsts. + * 3. If label_offsets.has_value() is ture, edge lists for different labels will be renumbered * separately. * * This function is single-GPU only (we are not aware of any practical multi-GPU use cases). @@ -940,10 +941,10 @@ rmm::device_uvector select_random_vertices( * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param edgelist_srcs A vector storing original edgelist source vertices. - * @param edgelist_hops An optional pointer to the array storing hops for each edge list source - * vertices (size = @p edgelist_srcs.size()). * @param edgelist_dsts A vector storing original edgelist destination vertices (size = @p * edgelist_srcs.size()). + * @param edgelist_hops An optional pointer to the array storing hops for each edge list (source, + * destination) pairs (size = @p edgelist_srcs.size() if valid). * @param label_offsets An optional tuple of unique labels and the input edge list (@p * edgelist_srcs, @p edgelist_hops, and @p edgelist_dsts) offsets for the labels (siez = # unique * labels + 1). @@ -962,8 +963,8 @@ std::tuple, renumber_sampled_edgelist( raft::handle_t const& handle, rmm::device_uvector&& edgelist_srcs, - std::optional> edgelist_hops, rmm::device_uvector&& edgelist_dsts, + std::optional> edgelist_hops, std::optional, raft::device_span>> label_offsets, bool do_expensive_check = false); diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index ff6a6c49437..caaba8e9c8d 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -236,9 +236,9 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct std::tie(src, dst, renumber_map, renumber_map_offsets) = cugraph::renumber_sampled_edgelist( handle_, std::move(src), + std::move(dst), hop ? std::make_optional(raft::device_span{hop->data(), hop->size()}) : std::nullopt, - std::move(dst), std::make_optional(std::make_tuple( raft::device_span{edge_label->data(), edge_label->size()}, raft::device_span{offsets->data(), offsets->size()})), diff --git a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh index a4a6d64029a..6fdb1c887f2 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh +++ b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh @@ -45,260 +45,70 @@ namespace cugraph { namespace { +// output sorted by (primary key:label_index, secondary key:vertex) template -std::tuple, std::optional>> -compute_renumber_map(raft::handle_t const& handle, - raft::device_span edgelist_srcs, - std::optional> edgelist_hops, - raft::device_span edgelist_dsts, - std::optional> label_offsets) +std::tuple> /* label indices */, + rmm::device_uvector /* vertices */, + std::optional> /* minimum hops for the vertices */, + std::optional> /* label offsets for the output */> +compute_min_hop_for_unique_label_vertex_pairs( + raft::handle_t const& handle, + raft::device_span vertices, + std::optional> hops, + std::optional> label_indices, + std::optional> label_offsets) { auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - std::optional> edgelist_label_indices{std::nullopt}; - if (label_offsets) { - edgelist_label_indices = - detail::expand_sparse_offsets(*label_offsets, label_index_t{0}, handle.get_stream()); - } + if (label_indices) { + auto num_labels = (*label_offsets).size() - 1; - std::optional> unique_label_src_pair_label_indices{ - std::nullopt}; - rmm::device_uvector unique_label_src_pair_vertices( - 0, handle.get_stream()); // sorted by (label, hop, src) - std::optional> sorted_srcs{ - std::nullopt}; // sorted by (label, src), relevant only when edgelist_hops is valid - { - if (label_offsets) { - rmm::device_uvector label_indices((*edgelist_label_indices).size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - (*edgelist_label_indices).begin(), - (*edgelist_label_indices).end(), - label_indices.begin()); + rmm::device_uvector tmp_label_indices((*label_indices).size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*label_indices).begin(), + (*label_indices).end(), + tmp_label_indices.begin()); - if (edgelist_hops) { - rmm::device_uvector srcs(edgelist_srcs.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), edgelist_srcs.begin(), edgelist_srcs.end(), srcs.begin()); - - rmm::device_uvector hops((*edgelist_hops).size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - (*edgelist_hops).begin(), - (*edgelist_hops).end(), - hops.begin()); - auto triplet_first = - thrust::make_zip_iterator(label_indices.begin(), srcs.begin(), hops.begin()); - thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + srcs.size()); - auto num_uniques = static_cast( - thrust::distance(triplet_first, - thrust::unique(handle.get_thrust_policy(), - triplet_first, - triplet_first + srcs.size(), - [] __device__(auto lhs, auto rhs) { - return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && - (thrust::get<1>(lhs) == thrust::get<1>(rhs)); - }))); - label_indices.resize(num_uniques, handle.get_stream()); - srcs.resize(num_uniques, handle.get_stream()); - hops.resize(num_uniques, handle.get_stream()); - label_indices.shrink_to_fit(handle.get_stream()); - srcs.shrink_to_fit(handle.get_stream()); - hops.shrink_to_fit(handle.get_stream()); - - auto num_labels = (*label_offsets).size() - 1; - rmm::device_uvector tmp_label_offsets(num_labels + 1, handle.get_stream()); - tmp_label_offsets.set_element_to_zero_async(0, handle.get_stream()); - thrust::upper_bound(handle.get_thrust_policy(), - label_indices.begin(), - label_indices.end(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(num_labels), - tmp_label_offsets.begin() + 1); - - unique_label_src_pair_label_indices = std::move(label_indices); - sorted_srcs = rmm::device_uvector(srcs.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), srcs.begin(), srcs.end(), (*sorted_srcs).begin()); - - rmm::device_uvector segment_sorted_srcs(srcs.size(), handle.get_stream()); - - rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( - handle, - tmp_label_offsets.data(), - static_cast(tmp_label_offsets.size() - 1), - hops.size(), - approx_edges_to_sort_per_iteration); - auto num_chunks = h_label_offsets.size() - 1; - size_t max_chunk_size{0}; - for (size_t i = 0; i < num_chunks; ++i) { - max_chunk_size = std::max(max_chunk_size, - static_cast(h_edge_offsets[i + 1] - h_edge_offsets[i])); - } - rmm::device_uvector segment_sorted_hops(max_chunk_size, handle.get_stream()); - - for (size_t i = 0; i < num_chunks; ++i) { - size_t tmp_storage_bytes{0}; - - auto offset_first = - thrust::make_transform_iterator(tmp_label_offsets.data() + h_label_offsets[i], - detail::shift_left_t{h_edge_offsets[i]}); - cub::DeviceSegmentedSort::SortPairs(static_cast(nullptr), - tmp_storage_bytes, - hops.begin() + h_edge_offsets[i], - segment_sorted_hops.begin(), - srcs.begin() + h_edge_offsets[i], - segment_sorted_srcs.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - - if (tmp_storage_bytes > d_tmp_storage.size()) { - d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); - } - - cub::DeviceSegmentedSort::SortPairs(d_tmp_storage.data(), - tmp_storage_bytes, - hops.begin() + h_edge_offsets[i], - segment_sorted_hops.begin(), - srcs.begin() + h_edge_offsets[i], - segment_sorted_srcs.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - } + rmm::device_uvector tmp_vertices(0, handle.get_stream()); + std::optional> tmp_hops{std::nullopt}; - unique_label_src_pair_vertices = std::move(segment_sorted_srcs); - } else { - rmm::device_uvector segment_sorted_srcs(edgelist_srcs.size(), - handle.get_stream()); - - rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( - handle, - (*label_offsets).data(), - static_cast((*label_offsets).size() - 1), - edgelist_srcs.size(), - approx_edges_to_sort_per_iteration); - auto num_chunks = h_label_offsets.size() - 1; - - for (size_t i = 0; i < num_chunks; ++i) { - size_t tmp_storage_bytes{0}; - - auto offset_first = - thrust::make_transform_iterator((*label_offsets).data() + h_label_offsets[i], - detail::shift_left_t{h_edge_offsets[i]}); - cub::DeviceSegmentedSort::SortKeys(static_cast(nullptr), - tmp_storage_bytes, - edgelist_srcs.begin() + h_edge_offsets[i], - segment_sorted_srcs.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - - if (tmp_storage_bytes > d_tmp_storage.size()) { - d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); - } - - cub::DeviceSegmentedSort::SortKeys(d_tmp_storage.data(), - tmp_storage_bytes, - edgelist_srcs.begin() + h_edge_offsets[i], - segment_sorted_srcs.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - } - d_tmp_storage.resize(0, handle.get_stream()); - d_tmp_storage.shrink_to_fit(handle.get_stream()); - - auto pair_first = - thrust::make_zip_iterator(label_indices.begin(), segment_sorted_srcs.begin()); - auto num_uniques = static_cast(thrust::distance( - pair_first, - thrust::unique( - handle.get_thrust_policy(), pair_first, pair_first + label_indices.size()))); - label_indices.resize(num_uniques, handle.get_stream()); - segment_sorted_srcs.resize(num_uniques, handle.get_stream()); - label_indices.shrink_to_fit(handle.get_stream()); - segment_sorted_srcs.shrink_to_fit(handle.get_stream()); - - unique_label_src_pair_label_indices = std::move(label_indices); - unique_label_src_pair_vertices = std::move(segment_sorted_srcs); - } - } else { - rmm::device_uvector srcs(edgelist_srcs.size(), handle.get_stream()); + if (hops) { + tmp_vertices.resize(vertices.size(), handle.get_stream()); thrust::copy( - handle.get_thrust_policy(), edgelist_srcs.begin(), edgelist_srcs.end(), srcs.begin()); - - if (edgelist_hops) { - rmm::device_uvector hops((*edgelist_hops).size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - (*edgelist_hops).begin(), - (*edgelist_hops).end(), - hops.begin()); - - auto pair_first = thrust::make_zip_iterator( - srcs.begin(), hops.begin()); // src is a primary key, hop is a secondary key - thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + srcs.size()); - srcs.resize( - thrust::distance(srcs.begin(), - thrust::get<0>(thrust::unique_by_key( - handle.get_thrust_policy(), srcs.begin(), srcs.end(), hops.begin()))), - handle.get_stream()); - hops.resize(srcs.size(), handle.get_stream()); - - sorted_srcs = rmm::device_uvector(srcs.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), srcs.begin(), srcs.end(), (*sorted_srcs).begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), hops.begin(), hops.end(), srcs.begin()); - } else { - thrust::sort(handle.get_thrust_policy(), srcs.begin(), srcs.end()); - srcs.resize( - thrust::distance(srcs.begin(), - thrust::unique(handle.get_thrust_policy(), srcs.begin(), srcs.end())), - handle.get_stream()); - srcs.shrink_to_fit(handle.get_stream()); - } - - unique_label_src_pair_vertices = std::move(srcs); - } - } - - std::optional> unique_label_dst_pair_label_indices{ - std::nullopt}; - rmm::device_uvector unique_label_dst_pair_vertices(0, handle.get_stream()); - { - rmm::device_uvector dsts(edgelist_dsts.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), edgelist_dsts.begin(), edgelist_dsts.end(), dsts.begin()); - if (label_offsets) { - rmm::device_uvector label_indices((*edgelist_label_indices).size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - (*edgelist_label_indices).begin(), - (*edgelist_label_indices).end(), - label_indices.begin()); - - rmm::device_uvector segment_sorted_dsts(dsts.size(), handle.get_stream()); + handle.get_thrust_policy(), vertices.begin(), vertices.end(), tmp_vertices.begin()); + tmp_hops = rmm::device_uvector((*hops).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), (*hops).begin(), (*hops).end(), (*tmp_hops).begin()); + + auto triplet_first = thrust::make_zip_iterator( + tmp_label_indices.begin(), tmp_vertices.begin(), (*tmp_hops).begin()); + thrust::sort( + handle.get_thrust_policy(), triplet_first, triplet_first + tmp_label_indices.size()); + auto key_first = thrust::make_zip_iterator(tmp_label_indices.begin(), tmp_vertices.begin()); + auto num_uniques = static_cast( + thrust::distance(key_first, + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + key_first, + key_first + tmp_label_indices.size(), + (*tmp_hops).begin())))); + tmp_label_indices.resize(num_uniques, handle.get_stream()); + tmp_vertices.resize(num_uniques, handle.get_stream()); + (*tmp_hops).resize(num_uniques, handle.get_stream()); + tmp_label_indices.shrink_to_fit(handle.get_stream()); + tmp_vertices.shrink_to_fit(handle.get_stream()); + (*tmp_hops).shrink_to_fit(handle.get_stream()); + } else { + rmm::device_uvector segment_sorted_vertices(vertices.size(), handle.get_stream()); rmm::device_uvector d_tmp_storage(0, handle.get_stream()); auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks(handle, (*label_offsets).data(), - static_cast((*label_offsets).size() - 1), - dsts.size(), + num_labels, + vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; @@ -310,8 +120,8 @@ compute_renumber_map(raft::handle_t const& handle, detail::shift_left_t{h_edge_offsets[i]}); cub::DeviceSegmentedSort::SortKeys(static_cast(nullptr), tmp_storage_bytes, - dsts.begin() + h_edge_offsets[i], - segment_sorted_dsts.begin() + h_edge_offsets[i], + vertices.begin() + h_edge_offsets[i], + segment_sorted_vertices.begin() + h_edge_offsets[i], h_edge_offsets[i + 1] - h_edge_offsets[i], h_label_offsets[i + 1] - h_label_offsets[i], offset_first, @@ -324,121 +134,329 @@ compute_renumber_map(raft::handle_t const& handle, cub::DeviceSegmentedSort::SortKeys(d_tmp_storage.data(), tmp_storage_bytes, - dsts.begin() + h_edge_offsets[i], - segment_sorted_dsts.begin() + h_edge_offsets[i], + vertices.begin() + h_edge_offsets[i], + segment_sorted_vertices.begin() + h_edge_offsets[i], h_edge_offsets[i + 1] - h_edge_offsets[i], h_label_offsets[i + 1] - h_label_offsets[i], offset_first, offset_first + 1, handle.get_stream()); } - dsts.resize(0, handle.get_stream()); d_tmp_storage.resize(0, handle.get_stream()); - dsts.shrink_to_fit(handle.get_stream()); d_tmp_storage.shrink_to_fit(handle.get_stream()); auto pair_first = - thrust::make_zip_iterator(label_indices.begin(), segment_sorted_dsts.begin()); + thrust::make_zip_iterator(tmp_label_indices.begin(), segment_sorted_vertices.begin()); auto num_uniques = static_cast(thrust::distance( pair_first, - thrust::unique(handle.get_thrust_policy(), pair_first, pair_first + label_indices.size()))); - label_indices.resize(num_uniques, handle.get_stream()); - segment_sorted_dsts.resize(num_uniques, handle.get_stream()); - label_indices.shrink_to_fit(handle.get_stream()); - segment_sorted_dsts.shrink_to_fit(handle.get_stream()); - - unique_label_dst_pair_label_indices = std::move(label_indices); - unique_label_dst_pair_vertices = std::move(segment_sorted_dsts); + thrust::unique( + handle.get_thrust_policy(), pair_first, pair_first + tmp_label_indices.size()))); + tmp_label_indices.resize(num_uniques, handle.get_stream()); + segment_sorted_vertices.resize(num_uniques, handle.get_stream()); + tmp_label_indices.shrink_to_fit(handle.get_stream()); + segment_sorted_vertices.shrink_to_fit(handle.get_stream()); + + tmp_vertices = std::move(segment_sorted_vertices); + } + + rmm::device_uvector tmp_label_offsets(num_labels + 1, handle.get_stream()); + tmp_label_offsets.set_element_to_zero_async(0, handle.get_stream()); + thrust::upper_bound(handle.get_thrust_policy(), + tmp_label_indices.begin(), + tmp_label_indices.end(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_labels), + tmp_label_offsets.begin() + 1); + + return std::make_tuple(std::move(tmp_label_indices), + std::move(tmp_vertices), + std::move(tmp_hops), + std::move(tmp_label_offsets)); + } else { + rmm::device_uvector tmp_vertices(vertices.size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), vertices.begin(), vertices.end(), tmp_vertices.begin()); + + if (hops) { + rmm::device_uvector tmp_hops((*hops).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), (*hops).begin(), (*hops).end(), tmp_hops.begin()); + + auto pair_first = thrust::make_zip_iterator( + tmp_vertices.begin(), tmp_hops.begin()); // vertex is a primary key, hop is a secondary key + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_vertices.size()); + tmp_vertices.resize( + thrust::distance(tmp_vertices.begin(), + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + tmp_vertices.begin(), + tmp_vertices.end(), + tmp_hops.begin()))), + handle.get_stream()); + tmp_hops.resize(tmp_vertices.size(), handle.get_stream()); + + return std::make_tuple( + std::nullopt, std::move(tmp_vertices), std::move(tmp_hops), std::nullopt); } else { - thrust::sort(handle.get_thrust_policy(), dsts.begin(), dsts.end()); - dsts.resize( - thrust::distance(dsts.begin(), - thrust::unique(handle.get_thrust_policy(), dsts.begin(), dsts.end())), + thrust::sort(handle.get_thrust_policy(), tmp_vertices.begin(), tmp_vertices.end()); + tmp_vertices.resize( + thrust::distance( + tmp_vertices.begin(), + thrust::unique(handle.get_thrust_policy(), tmp_vertices.begin(), tmp_vertices.end())), handle.get_stream()); - dsts.shrink_to_fit(handle.get_stream()); + tmp_vertices.shrink_to_fit(handle.get_stream()); - unique_label_dst_pair_vertices = std::move(dsts); + return std::make_tuple(std::nullopt, std::move(tmp_vertices), std::nullopt, std::nullopt); } } +} + +template +std::tuple, std::optional>> +compute_renumber_map(raft::handle_t const& handle, + raft::device_span edgelist_srcs, + raft::device_span edgelist_dsts, + std::optional> edgelist_hops, + std::optional> label_offsets) +{ + auto approx_edges_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for segmented sort + + std::optional> edgelist_label_indices{std::nullopt}; + if (label_offsets) { + edgelist_label_indices = + detail::expand_sparse_offsets(*label_offsets, label_index_t{0}, handle.get_stream()); + } + + auto [unique_label_src_pair_label_indices, + unique_label_src_pair_vertices, + unique_label_src_pair_hops, + unique_label_src_pair_label_offsets] = + compute_min_hop_for_unique_label_vertex_pairs( + handle, + edgelist_srcs, + edgelist_hops, + edgelist_label_indices ? std::make_optional>( + (*edgelist_label_indices).data(), (*edgelist_label_indices).size()) + : std::nullopt, + label_offsets); + + auto [unique_label_dst_pair_label_indices, + unique_label_dst_pair_vertices, + unique_label_dst_pair_hops, + unique_label_dst_pair_label_offsets] = + compute_min_hop_for_unique_label_vertex_pairs( + handle, + edgelist_dsts, + edgelist_hops, + edgelist_label_indices ? std::make_optional>( + (*edgelist_label_indices).data(), (*edgelist_label_indices).size()) + : std::nullopt, + label_offsets); edgelist_label_indices = std::nullopt; if (label_offsets) { - auto label_src_pair_first = thrust::make_zip_iterator( - (*unique_label_src_pair_label_indices).begin(), - edgelist_hops ? (*sorted_srcs).begin() : unique_label_src_pair_vertices.begin()); - auto label_dst_pair_first = thrust::make_zip_iterator( - (*unique_label_dst_pair_label_indices).begin(), unique_label_dst_pair_vertices.begin()); - rmm::device_uvector output_label_indices( - (*unique_label_dst_pair_label_indices).size(), handle.get_stream()); - rmm::device_uvector output_vertices((*unique_label_dst_pair_label_indices).size(), - handle.get_stream()); - auto output_label_dst_pair_first = - thrust::make_zip_iterator(output_label_indices.begin(), output_vertices.begin()); - auto output_label_dst_pair_last = - thrust::set_difference(handle.get_thrust_policy(), - label_dst_pair_first, - label_dst_pair_first + (*unique_label_dst_pair_label_indices).size(), - label_src_pair_first, - label_src_pair_first + (*unique_label_src_pair_label_indices).size(), - output_label_dst_pair_first); - - sorted_srcs = std::nullopt; - output_label_indices.resize( - thrust::distance(output_label_dst_pair_first, output_label_dst_pair_last), - handle.get_stream()); - output_vertices.resize(output_label_indices.size(), handle.get_stream()); - output_label_indices.shrink_to_fit(handle.get_stream()); - output_vertices.shrink_to_fit(handle.get_stream()); - unique_label_dst_pair_label_indices = std::move(output_label_indices); - unique_label_dst_pair_vertices = std::move(output_vertices); + auto num_labels = (*label_offsets).size() - 1; - rmm::device_uvector merged_label_indices( + rmm::device_uvector renumber_map(0, handle.get_stream()); + rmm::device_uvector renumber_map_label_indices(0, handle.get_stream()); + + renumber_map.reserve( (*unique_label_src_pair_label_indices).size() + (*unique_label_dst_pair_label_indices).size(), handle.get_stream()); - rmm::device_uvector merged_vertices(merged_label_indices.size(), handle.get_stream()); - auto label_src_triplet_first = - thrust::make_zip_iterator((*unique_label_src_pair_label_indices).begin(), - thrust::make_constant_iterator(uint8_t{0}), - unique_label_src_pair_vertices.begin()); - auto label_dst_triplet_first = - thrust::make_zip_iterator((*unique_label_dst_pair_label_indices).begin(), - thrust::make_constant_iterator(uint8_t{1}), - unique_label_dst_pair_vertices.begin()); - thrust::merge( - handle.get_thrust_policy(), - label_src_triplet_first, - label_src_triplet_first + (*unique_label_src_pair_label_indices).size(), - label_dst_triplet_first, - label_dst_triplet_first + (*unique_label_dst_pair_label_indices).size(), - thrust::make_zip_iterator( - merged_label_indices.begin(), thrust::make_discard_iterator(), merged_vertices.begin())); - - return std::make_tuple(std::move(merged_vertices), std::move(merged_label_indices)); + renumber_map_label_indices.reserve(renumber_map.capacity(), handle.get_stream()); + + auto num_chunks = (edgelist_srcs.size() + (approx_edges_to_sort_per_iteration - 1)) / + approx_edges_to_sort_per_iteration; + auto chunk_size = (num_chunks > 0) ? ((num_labels + (num_chunks - 1)) / num_chunks) : 0; + + size_t copy_offset{0}; + for (size_t i = 0; i < num_chunks; ++i) { + auto src_start_offset = + (*unique_label_src_pair_label_offsets).element(chunk_size * i, handle.get_stream()); + auto src_end_offset = + (*unique_label_src_pair_label_offsets) + .element(std::min(chunk_size * (i + 1), num_labels), handle.get_stream()); + auto dst_start_offset = + (*unique_label_dst_pair_label_offsets).element(chunk_size * i, handle.get_stream()); + auto dst_end_offset = + (*unique_label_dst_pair_label_offsets) + .element(std::min(chunk_size * (i + 1), num_labels), handle.get_stream()); + + rmm::device_uvector merged_label_indices( + (src_end_offset - src_start_offset) + (dst_end_offset - dst_start_offset), + handle.get_stream()); + rmm::device_uvector merged_vertices(merged_label_indices.size(), + handle.get_stream()); + rmm::device_uvector merged_flags(merged_label_indices.size(), handle.get_stream()); + + if (edgelist_hops) { + rmm::device_uvector merged_hops(merged_label_indices.size(), handle.get_stream()); + auto src_quad_first = + thrust::make_zip_iterator((*unique_label_src_pair_label_indices).begin(), + unique_label_src_pair_vertices.begin(), + (*unique_label_src_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto dst_quad_first = + thrust::make_zip_iterator((*unique_label_dst_pair_label_indices).begin(), + unique_label_dst_pair_vertices.begin(), + (*unique_label_dst_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + src_quad_first + src_start_offset, + src_quad_first + src_end_offset, + dst_quad_first + dst_start_offset, + dst_quad_first + dst_end_offset, + thrust::make_zip_iterator(merged_label_indices.begin(), + merged_vertices.begin(), + merged_hops.begin(), + merged_flags.begin())); + + auto unique_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_vertices.begin()); + merged_label_indices.resize( + thrust::distance( + unique_key_first, + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + unique_key_first, + unique_key_first + merged_label_indices.size(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); + merged_hops.resize(merged_label_indices.size(), handle.get_stream()); + merged_flags.resize(merged_label_indices.size(), handle.get_stream()); + auto sort_key_first = thrust::make_zip_iterator( + merged_label_indices.begin(), merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_label_indices.size(), + merged_vertices.begin()); + } else { + auto src_triplet_first = + thrust::make_zip_iterator((*unique_label_src_pair_label_indices).begin(), + unique_label_src_pair_vertices.begin(), + thrust::make_constant_iterator(int8_t{0})); + auto dst_triplet_first = + thrust::make_zip_iterator((*unique_label_dst_pair_label_indices).begin(), + unique_label_dst_pair_vertices.begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge( + handle.get_thrust_policy(), + src_triplet_first + src_start_offset, + src_triplet_first + src_end_offset, + dst_triplet_first + dst_start_offset, + dst_triplet_first + dst_end_offset, + thrust::make_zip_iterator( + merged_label_indices.begin(), merged_vertices.begin(), merged_flags.begin())); + + auto unique_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_vertices.begin()); + merged_label_indices.resize( + thrust::distance( + unique_key_first, + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + unique_key_first, + unique_key_first + merged_label_indices.size(), + merged_flags.begin()))), + handle.get_stream()); + merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); + merged_flags.resize(merged_label_indices.size(), handle.get_stream()); + auto sort_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_label_indices.size(), + merged_vertices.begin()); + } + + renumber_map.resize(copy_offset + merged_vertices.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + renumber_map.begin() + copy_offset); + renumber_map_label_indices.resize(copy_offset + merged_label_indices.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + merged_label_indices.begin(), + merged_label_indices.end(), + renumber_map_label_indices.begin() + copy_offset); + + copy_offset += merged_vertices.size(); + } + + renumber_map.shrink_to_fit(handle.get_stream()); + renumber_map_label_indices.shrink_to_fit(handle.get_stream()); + + return std::make_tuple(std::move(renumber_map), std::move(renumber_map_label_indices)); } else { - rmm::device_uvector output_vertices(unique_label_dst_pair_vertices.size(), - handle.get_stream()); - auto output_last = thrust::set_difference( - handle.get_thrust_policy(), - unique_label_dst_pair_vertices.begin(), - unique_label_dst_pair_vertices.end(), - edgelist_hops ? (*sorted_srcs).begin() : unique_label_src_pair_vertices.begin(), - edgelist_hops ? (*sorted_srcs).end() : unique_label_src_pair_vertices.end(), - output_vertices.begin()); - - sorted_srcs = std::nullopt; - - auto num_unique_srcs = unique_label_src_pair_vertices.size(); - auto renumber_map = std::move(unique_label_src_pair_vertices); - renumber_map.resize( - renumber_map.size() + thrust::distance(output_vertices.begin(), output_last), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - output_vertices.begin(), - output_last, - renumber_map.begin() + num_unique_srcs); + if (edgelist_hops) { + rmm::device_uvector merged_vertices( + unique_label_src_pair_vertices.size() + unique_label_dst_pair_vertices.size(), + handle.get_stream()); + rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); + rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); + auto src_triplet_first = thrust::make_zip_iterator(unique_label_src_pair_vertices.begin(), + (*unique_label_src_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto dst_triplet_first = thrust::make_zip_iterator(unique_label_dst_pair_vertices.begin(), + (*unique_label_dst_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + src_triplet_first, + src_triplet_first + unique_label_src_pair_vertices.size(), + dst_triplet_first, + dst_triplet_first + unique_label_dst_pair_vertices.size(), + thrust::make_zip_iterator( + merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); + + unique_label_src_pair_vertices.resize(0, handle.get_stream()); + unique_label_src_pair_vertices.shrink_to_fit(handle.get_stream()); + unique_label_src_pair_hops = std::nullopt; + unique_label_dst_pair_vertices.resize(0, handle.get_stream()); + unique_label_dst_pair_vertices.shrink_to_fit(handle.get_stream()); + unique_label_dst_pair_hops = std::nullopt; + + merged_vertices.resize( + thrust::distance(merged_vertices.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_hops.resize(merged_vertices.size(), handle.get_stream()); + merged_flags.resize(merged_vertices.size(), handle.get_stream()); + + auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + merged_vertices.begin()); + + return std::make_tuple(std::move(merged_vertices), std::nullopt); + } else { + rmm::device_uvector output_vertices(unique_label_dst_pair_vertices.size(), + handle.get_stream()); + auto output_last = thrust::set_difference(handle.get_thrust_policy(), + unique_label_dst_pair_vertices.begin(), + unique_label_dst_pair_vertices.end(), + unique_label_src_pair_vertices.begin(), + unique_label_src_pair_vertices.end(), + output_vertices.begin()); + + auto num_unique_srcs = unique_label_src_pair_vertices.size(); + auto renumber_map = std::move(unique_label_src_pair_vertices); + renumber_map.resize( + renumber_map.size() + thrust::distance(output_vertices.begin(), output_last), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + output_vertices.begin(), + output_last, + renumber_map.begin() + num_unique_srcs); - return std::make_tuple(std::move(renumber_map), std::nullopt); + return std::make_tuple(std::move(renumber_map), std::nullopt); + } } } @@ -452,8 +470,8 @@ std::tuple, renumber_sampled_edgelist( raft::handle_t const& handle, rmm::device_uvector&& edgelist_srcs, - std::optional> edgelist_hops, rmm::device_uvector&& edgelist_dsts, + std::optional> edgelist_hops, std::optional, raft::device_span>> label_offsets, bool do_expensive_check) @@ -504,8 +522,8 @@ renumber_sampled_edgelist( auto [renumber_map, renumber_map_label_indices] = compute_renumber_map( handle, raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - edgelist_hops, raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), + edgelist_hops, label_offsets ? std::make_optional>(std::get<1>(*label_offsets)) : std::nullopt); diff --git a/cpp/src/sampling/renumber_sampled_edgelist_sg.cu b/cpp/src/sampling/renumber_sampled_edgelist_sg.cu index 629fa45e1f9..46e2264a0c1 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_sg.cu +++ b/cpp/src/sampling/renumber_sampled_edgelist_sg.cu @@ -27,8 +27,8 @@ template std::tuple, renumber_sampled_edgelist( raft::handle_t const& handle, rmm::device_uvector&& edgelist_srcs, - std::optional> edgelist_hops, rmm::device_uvector&& edgelist_dsts, + std::optional> edgelist_hops, std::optional, raft::device_span>> label_offsets, bool do_expensive_check); @@ -40,8 +40,8 @@ template std::tuple, renumber_sampled_edgelist( raft::handle_t const& handle, rmm::device_uvector&& edgelist_srcs, - std::optional> edgelist_hops, rmm::device_uvector&& edgelist_dsts, + std::optional> edgelist_hops, std::optional, raft::device_span>> label_offsets, bool do_expensive_check); diff --git a/cpp/tests/sampling/renumber_sampled_edgelist_test.cu b/cpp/tests/sampling/renumber_sampled_edgelist_test.cu index 6d944314605..96c8d6173e7 100644 --- a/cpp/tests/sampling/renumber_sampled_edgelist_test.cu +++ b/cpp/tests/sampling/renumber_sampled_edgelist_test.cu @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -25,9 +26,12 @@ #include +#include #include #include +#include #include +#include #include #include @@ -147,10 +151,10 @@ class Tests_RenumberSampledEdgelist cugraph::renumber_sampled_edgelist( handle, std::move(renumbered_edgelist_srcs), + std::move(renumbered_edgelist_dsts), edgelist_hops ? std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) : std::nullopt, - std::move(renumbered_edgelist_dsts), label_offsets ? std::make_optional< std::tuple, raft::device_span>>( @@ -173,6 +177,8 @@ class Tests_RenumberSampledEdgelist size_t edgelist_end_offset = label_offsets ? std::get<1>(*label_offsets).element(i + 1, handle.get_stream()) : usecase.num_sampled_edges; + if (edgelist_start_offset == edgelist_end_offset) continue; + auto this_label_org_edgelist_srcs = raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, edgelist_end_offset - edgelist_start_offset); @@ -229,11 +235,11 @@ class Tests_RenumberSampledEdgelist }); ASSERT_TRUE(num_renumber_errors == 0) << "Renumber error in edge list destinations."; - // check the invariants in renumber_map (1. vertices appeared in edge list sources should - // have a smaller renumbered vertex ID than the vertices appear only in edge list - // destinations, 2. edge list source vertices with a smaller minimum hop number should have - // a smaller renumbered vertex ID than the edge list source vertices with a larger hop - // number) + // Check the invariants in renumber_map + // Say we found the minimum (primary key:hop, secondary key:flag) pairs for every unique + // vertices, where flag is 0 for sources and 1 for destinations. Then, vertices with smaller + // (hop, flag) pairs should be renumbered to smaller numbers than vertices with larger (hop, + // flag) pairs. rmm::device_uvector unique_srcs(this_label_org_edgelist_srcs.size(), handle.get_stream()); @@ -277,27 +283,35 @@ class Tests_RenumberSampledEdgelist this_label_org_edgelist_dsts.begin(), this_label_org_edgelist_dsts.end(), unique_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), unique_dsts.begin(), unique_dsts.end()); - unique_dsts.resize( - thrust::distance( - unique_dsts.begin(), - thrust::unique(handle.get_thrust_policy(), unique_dsts.begin(), unique_dsts.end())), - handle.get_stream()); + std::optional> unique_dst_hops = + this_label_edgelist_hops ? std::make_optional>( + (*this_label_edgelist_hops).size(), handle.get_stream()) + : std::nullopt; + if (this_label_edgelist_hops) { + thrust::copy(handle.get_thrust_policy(), + (*this_label_edgelist_hops).begin(), + (*this_label_edgelist_hops).end(), + (*unique_dst_hops).begin()); - unique_dsts.resize( - thrust::distance( - unique_dsts.begin(), - thrust::remove_if(handle.get_thrust_policy(), - unique_dsts.begin(), - unique_dsts.end(), - [sorted_unique_srcs = raft::device_span( - unique_srcs.data(), unique_srcs.size())] __device__(auto dst) { - return thrust::binary_search(thrust::seq, - sorted_unique_srcs.begin(), - sorted_unique_srcs.end(), - dst); - })), - handle.get_stream()); + auto pair_first = + thrust::make_zip_iterator(unique_dsts.begin(), (*unique_dst_hops).begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_dsts.size()); + unique_dsts.resize( + thrust::distance(unique_dsts.begin(), + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + unique_dsts.begin(), + unique_dsts.end(), + (*unique_dst_hops).begin()))), + handle.get_stream()); + (*unique_dst_hops).resize(unique_dsts.size(), handle.get_stream()); + } else { + thrust::sort(handle.get_thrust_policy(), unique_dsts.begin(), unique_dsts.end()); + unique_dsts.resize( + thrust::distance( + unique_dsts.begin(), + thrust::unique(handle.get_thrust_policy(), unique_dsts.begin(), unique_dsts.end())), + handle.get_stream()); + } rmm::device_uvector sorted_org_vertices(this_label_renumber_map.size(), handle.get_stream()); @@ -316,51 +330,56 @@ class Tests_RenumberSampledEdgelist sorted_org_vertices.end(), matching_renumbered_vertices.begin()); - auto max_src_renumbered_vertex = thrust::transform_reduce( - handle.get_thrust_policy(), - unique_srcs.begin(), - unique_srcs.end(), - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t src) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), src); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }, - std::numeric_limits::lowest(), - thrust::maximum{}); - - auto min_dst_renumbered_vertex = thrust::transform_reduce( - handle.get_thrust_policy(), - unique_dsts.begin(), - unique_dsts.end(), - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t dst) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), dst); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }, - std::numeric_limits::max(), - thrust::minimum{}); - - ASSERT_TRUE(max_src_renumbered_vertex < min_dst_renumbered_vertex) - << "Invariants violated, a source vertex is renumbered to a non-smaller value than a " - "vertex that appear only in the edge list destinations."; - if (this_label_edgelist_hops) { + rmm::device_uvector merged_vertices(unique_srcs.size() + unique_dsts.size(), + handle.get_stream()); + rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); + rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); + + auto src_triplet_first = + thrust::make_zip_iterator(unique_srcs.begin(), + (*unique_src_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto dst_triplet_first = + thrust::make_zip_iterator(unique_dsts.begin(), + (*unique_dst_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + src_triplet_first, + src_triplet_first + unique_srcs.size(), + dst_triplet_first, + dst_triplet_first + unique_dsts.size(), + thrust::make_zip_iterator( + merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); + merged_vertices.resize( + thrust::distance( + merged_vertices.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_hops.resize(merged_vertices.size(), handle.get_stream()); + merged_flags.resize(merged_vertices.size(), handle.get_stream()); + + auto sort_key_first = + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); thrust::sort_by_key(handle.get_thrust_policy(), - (*unique_src_hops).begin(), - (*unique_src_hops).end(), - unique_srcs.begin()); - rmm::device_uvector min_vertices(usecase.num_hops, handle.get_stream()); - rmm::device_uvector max_vertices(usecase.num_hops, handle.get_stream()); - auto unique_renumbered_src_first = thrust::make_transform_iterator( - unique_srcs.begin(), + sort_key_first, + sort_key_first + merged_hops.size(), + merged_vertices.begin()); + + auto num_unique_keys = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(merged_hops.size()), + cugraph::detail::is_first_in_run_t{sort_key_first}); + rmm::device_uvector min_vertices(num_unique_keys, handle.get_stream()); + rmm::device_uvector max_vertices(num_unique_keys, handle.get_stream()); + + auto renumbered_merged_vertex_first = thrust::make_transform_iterator( + merged_vertices.begin(), [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), matching_renumbered_vertices = raft::device_span( @@ -372,32 +391,27 @@ class Tests_RenumberSampledEdgelist it)]; }); - auto this_label_num_unique_hops = static_cast( - thrust::distance(min_vertices.begin(), - thrust::get<1>(thrust::reduce_by_key(handle.get_thrust_policy(), - (*unique_src_hops).begin(), - (*unique_src_hops).end(), - unique_renumbered_src_first, - thrust::make_discard_iterator(), - min_vertices.begin(), - thrust::equal_to{}, - thrust::minimum{})))); - min_vertices.resize(this_label_num_unique_hops, handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - (*unique_src_hops).begin(), - (*unique_src_hops).end(), - unique_renumbered_src_first, + sort_key_first, + sort_key_first + merged_hops.size(), + renumbered_merged_vertex_first, + thrust::make_discard_iterator(), + min_vertices.begin(), + thrust::equal_to>{}, + thrust::minimum{}); + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + renumbered_merged_vertex_first, thrust::make_discard_iterator(), max_vertices.begin(), - thrust::equal_to{}, + thrust::equal_to>{}, thrust::maximum{}); - max_vertices.resize(this_label_num_unique_hops, handle.get_stream()); auto num_violations = thrust::count_if(handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{1}), - thrust::make_counting_iterator(this_label_num_unique_hops), + thrust::make_counting_iterator(min_vertices.size()), [min_vertices = raft::device_span(min_vertices.data(), min_vertices.size()), max_vertices = raft::device_span( @@ -406,8 +420,61 @@ class Tests_RenumberSampledEdgelist }); ASSERT_TRUE(num_violations == 0) - << "Invariant violated, a vertex with a smaller hop is renumbered to a non-smaller " - "value than a vertex with a larger hop."; + << "Invariant violated, a vertex with a smaller (hop,flag) pair is renumbered to a " + "larger value than a vertex with a larger (hop, flag) pair."; + } else { + unique_dsts.resize( + thrust::distance( + unique_dsts.begin(), + thrust::remove_if(handle.get_thrust_policy(), + unique_dsts.begin(), + unique_dsts.end(), + [sorted_unique_srcs = raft::device_span( + unique_srcs.data(), unique_srcs.size())] __device__(auto dst) { + return thrust::binary_search(thrust::seq, + sorted_unique_srcs.begin(), + sorted_unique_srcs.end(), + dst); + })), + handle.get_stream()); + + auto max_src_renumbered_vertex = thrust::transform_reduce( + handle.get_thrust_policy(), + unique_srcs.begin(), + unique_srcs.end(), + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t src) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), src); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), + it)]; + }, + std::numeric_limits::lowest(), + thrust::maximum{}); + + auto min_dst_renumbered_vertex = thrust::transform_reduce( + handle.get_thrust_policy(), + unique_dsts.begin(), + unique_dsts.end(), + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t dst) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), dst); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), + it)]; + }, + std::numeric_limits::max(), + thrust::minimum{}); + + ASSERT_TRUE(max_src_renumbered_vertex < min_dst_renumbered_vertex) + << "Invariants violated, a source vertex is renumbered to a non-smaller value than a " + "vertex that appear only in the edge list destinations."; } } } From 62ecea2e0539f97742ab6f217eaca960753fc8f0 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:22:34 +0100 Subject: [PATCH 09/10] fix inconsistent graph properties between the SG and the MG API (#3757) Several graph methods are failing, some being an effect of migrating away from cython.cu renumbering. This PR fixes couple graph methods and fixes the inconsistency in results returned by the SG and MG API closes #3740 closes #3766 Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3757 --- .../cugraph/structure/graph_classes.py | 7 +- .../simpleDistributedGraph.py | 138 +++++++++++------- .../graph_implementation/simpleGraph.py | 121 +++++++++++++-- .../cugraph/cugraph/structure/number_map.py | 55 ++++++- .../cugraph/cugraph/structure/symmetrize.py | 1 + python/cugraph/cugraph/testing/utils.py | 2 +- .../centrality/test_betweenness_centrality.py | 2 +- .../test_edge_betweenness_centrality_mg.py | 4 +- .../tests/community/test_balanced_cut.py | 4 +- .../community/test_induced_subgraph_mg.py | 4 +- .../tests/community/test_k_truss_subgraph.py | 8 +- .../tests/community/test_triangle_count_mg.py | 4 +- .../cugraph/tests/core/test_k_core_mg.py | 12 +- .../tests/link_analysis/test_pagerank.py | 4 +- .../cugraph/tests/nx/test_nx_convert.py | 6 +- .../tests/sampling/test_random_walks.py | 41 +++--- .../cugraph/tests/structure/test_graph.py | 127 ++++++++++++++-- .../cugraph/tests/structure/test_graph_mg.py | 53 ++++++- .../tests/structure/test_multigraph.py | 2 +- .../cugraph/cugraph/utilities/nx_factory.py | 8 +- python/pylibcugraph/pylibcugraph/graphs.pxd | 1 + python/pylibcugraph/pylibcugraph/graphs.pyx | 17 +-- .../pylibcugraph/uniform_random_walks.pyx | 11 +- 23 files changed, 498 insertions(+), 134 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_classes.py b/python/cugraph/cugraph/structure/graph_classes.py index b89ada9bf50..6f6c7e5a26c 100644 --- a/python/cugraph/cugraph/structure/graph_classes.py +++ b/python/cugraph/cugraph/structure/graph_classes.py @@ -68,11 +68,14 @@ def __init__(self, m_graph=None, directed=False): if isinstance(m_graph, MultiGraph): elist = m_graph.view_edge_list() if m_graph.is_weighted(): - weights = "weights" + weights = m_graph.weight_column else: weights = None self.from_cudf_edgelist( - elist, source="src", destination="dst", edge_attr=weights + elist, + source=m_graph.source_columns, + destination=m_graph.destination_columns, + edge_attr=weights, ) else: raise TypeError( diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index ae2c57f5ef3..90db2c6b1f5 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -79,6 +79,8 @@ def __init__(self, properties): self.properties = simpleDistributedGraphImpl.Properties(properties) self.source_columns = None self.destination_columns = None + self.weight_column = None + self.vertex_columns = None def _make_plc_graph( sID, @@ -175,6 +177,7 @@ def __from_edgelist( "and destination parameters" ) ddf_columns = s_col + d_col + self.vertex_columns = ddf_columns.copy() _client = default_client() workers = _client.scheduler_info()["workers"] # Repartition to 2 partitions per GPU for memory efficient process @@ -214,10 +217,11 @@ def __from_edgelist( # The symmetrize step may add additional edges with unknown # ids and types for an undirected graph. Therefore, only # directed graphs may be used with ids and types. + # FIXME: Drop the check in symmetrize.py as it is redundant if len(edge_attr) == 3: if not self.properties.directed: raise ValueError( - "User-provided edge ids and edge " + "User-provided edge ids and/or edge " "types are not permitted for an " "undirected graph." ) @@ -285,6 +289,7 @@ def __from_edgelist( self.properties.renumber = renumber self.source_columns = source self.destination_columns = destination + self.weight_column = weight # If renumbering is not enabled, this function will only create # the edgelist_df and not do any renumbering. @@ -316,7 +321,6 @@ def __from_edgelist( ddf = ddf.map_partitions(lambda df: df.copy()) ddf = persist_dask_df_equal_parts_per_worker(ddf, _client) num_edges = len(ddf) - self._number_of_edges = num_edges ddf = get_persisted_df_worker_map(ddf, _client) delayed_tasks_d = { w: delayed(simpleDistributedGraphImpl._make_plc_graph)( @@ -356,6 +360,8 @@ def renumbered(self): def view_edge_list(self): """ + FIXME: Should this also return the edge ids and types? + Display the edge list. Compute it if needed. NOTE: If the graph is of type Graph() then the displayed undirected edges are the same as displayed by networkx Graph(), but the direction @@ -386,7 +392,59 @@ def view_edge_list(self): """ if self.edgelist is None: raise RuntimeError("Graph has no Edgelist.") - return self.edgelist.edgelist_df + + edgelist_df = self.input_df + is_string_dtype = False + is_multi_column = False + wgtCol = simpleDistributedGraphImpl.edgeWeightCol + if not self.properties.directed: + srcCol = self.source_columns + dstCol = self.destination_columns + if self.renumber_map.unrenumbered_id_type == "object": + # FIXME: Use the renumbered vertices instead and then un-renumber. + # This operation can be expensive. + is_string_dtype = True + edgelist_df = self.edgelist.edgelist_df + srcCol = self.renumber_map.renumbered_src_col_name + dstCol = self.renumber_map.renumbered_dst_col_name + + if isinstance(srcCol, list): + srcCol = self.renumber_map.renumbered_src_col_name + dstCol = self.renumber_map.renumbered_dst_col_name + edgelist_df = self.edgelist.edgelist_df + # unrenumber before extracting the upper triangular part + if len(self.source_columns) == 1: + edgelist_df = self.renumber_map.unrenumber(edgelist_df, srcCol) + edgelist_df = self.renumber_map.unrenumber(edgelist_df, dstCol) + else: + is_multi_column = True + + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ + [srcCol, dstCol] + ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) + + edgelist_df = edgelist_df.groupby(by=[srcCol, dstCol]).sum().reset_index() + if wgtCol in edgelist_df.columns: + # FIXME: This breaks if there are are multi edges as those will + # be dropped during the symmetrization step and the original 'weight' + # will be halved. + edgelist_df[wgtCol] /= 2 + + if is_string_dtype or is_multi_column: + # unrenumber the vertices + edgelist_df = self.renumber_map.unrenumber(edgelist_df, srcCol) + edgelist_df = self.renumber_map.unrenumber(edgelist_df, dstCol) + + if self.properties.renumbered: + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) + + # If there is no 'wgt' column, nothing will happen + edgelist_df = edgelist_df.rename(columns={wgtCol: self.weight_column}) + + self.properties.edge_count = len(edgelist_df) + return edgelist_df def delete_edge_list(self): """ @@ -405,23 +463,7 @@ def number_of_vertices(self): Get the number of nodes in the graph. """ if self.properties.node_count is None: - if self.edgelist is not None: - if self.renumbered is True: - src_col_name = self.renumber_map.renumbered_src_col_name - dst_col_name = self.renumber_map.renumbered_dst_col_name - # FIXME: from_dask_cudf_edgelist() currently requires - # renumber=True for MG, so this else block will not be - # used. Should this else block be removed and added back when - # the restriction is removed? - else: - src_col_name = "src" - dst_col_name = "dst" - - ddf = self.edgelist.edgelist_df[[src_col_name, dst_col_name]] - # ddf = self.edgelist.edgelist_df[["src", "dst"]] - self.properties.node_count = ddf.max().max().compute() + 1 - else: - raise RuntimeError("Graph is Empty") + self.properties.node_count = len(self.nodes()) return self.properties.node_count def number_of_nodes(self): @@ -434,10 +476,16 @@ def number_of_edges(self, directed_edges=False): """ Get the number of edges in the graph. """ - if self.edgelist is not None: - return self._number_of_edges - else: - raise RuntimeError("Graph is Empty") + + if directed_edges and self.edgelist is not None: + return len(self.edgelist.edgelist_df) + + if self.properties.edge_count is None: + if self.edgelist is not None: + self.view_edge_list() + else: + raise RuntimeError("Graph is Empty") + return self.properties.edge_count def in_degree(self, vertex_subset=None): """ @@ -1021,19 +1069,8 @@ def edges(self): sources and destinations. It does not return the edge weights. For viewing edges with weights use view_edge_list() """ - if self.renumbered is True: - src_col_name = self.renumber_map.renumbered_src_col_name - dst_col_name = self.renumber_map.renumbered_dst_col_name - # FIXME: from_dask_cudf_edgelist() currently requires - # renumber=True for MG, so this else block will not be - # used. Should this else block be removed and added back when - # the restriction is removed? - else: - src_col_name = "src" - dst_col_name = "dst" - # return self.view_edge_list()[["src", "dst"]] - return self.view_edge_list()[[src_col_name, dst_col_name]] + return self.view_edge_list()[self.vertex_columns] def nodes(self): """ @@ -1045,23 +1082,26 @@ def nodes(self): a dataframe and do 'renumber_map.unrenumber' or 'G.unrenumber' """ - if self.renumbered: - # FIXME: This relies on current implementation - # of NumberMap, should not really expose - # this, perhaps add a method to NumberMap + if self.edgelist is not None: + if self.renumbered: + # FIXME: This relies on current implementation + # of NumberMap, should not really expose + # this, perhaps add a method to NumberMap - df = self.renumber_map.implementation.ddf.drop(columns="global_id") + df = self.renumber_map.implementation.ddf.drop(columns="global_id") - if len(df.columns) > 1: - return df - else: - return df[df.columns[0]] + if len(df.columns) > 1: + return df + else: + return df[df.columns[0]] + else: + df = self.input_df + return dask_cudf.concat( + [df[self.source_columns], df[self.destination_columns]] + ).drop_duplicates() else: - df = self.input_df - return dask_cudf.concat( - [df[self.source_columns], df[self.destination_columns]] - ).drop_duplicates() + raise RuntimeError("Graph is Empty") def neighbors(self, n): if self.edgelist is None: diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index d0c0ded5eb4..2690ab88c13 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -47,8 +47,8 @@ class simpleGraphImpl: class EdgeList: def __init__( self, - source: str, - destination: str, + source: cudf.Series, + destination: cudf.Series, edge_attr: Union[cudf.DataFrame, Dict[str, cudf.DataFrame]] = None, ): self.edgelist_df = cudf.DataFrame() @@ -96,6 +96,7 @@ def __init__(self, properties): def __init__(self, properties): # Structure self.edgelist = None + self.input_df = None self.adjlist = None self.transposedadjlist = None self.renumber_map = None @@ -109,6 +110,11 @@ def __init__(self, properties): self.batch_adjlists = None self.batch_transposed_adjlists = None + self.source_columns = None + self.destination_columns = None + self.vertex_columns = None + self.weight_column = None + # Functions # FIXME: Change to public function # FIXME: Make function more modular @@ -149,6 +155,7 @@ def __from_edgelist( "destination parameters" ) df_columns = s_col + d_col + self.vertex_columns = df_columns.copy() if edge_attr is not None: if weight is not None or edge_id is not None or edge_type is not None: @@ -212,9 +219,11 @@ def __from_edgelist( elist = input_df.compute().reset_index(drop=True) else: raise TypeError("input should be a cudf.DataFrame or a dask_cudf dataFrame") - - # Original, unmodified input dataframe. + # initial, unmodified input dataframe. self.input_df = elist + self.weight_column = weight + self.source_columns = source + self.destination_columns = destination # Renumbering self.renumber_map = None @@ -233,6 +242,8 @@ def __from_edgelist( # Use renumber_map to figure out if the python renumbering occured self.properties.renumbered = renumber_map.is_renumbered self.renumber_map = renumber_map + self.renumber_map.implementation.src_col_names = simpleGraphImpl.srcCol + self.renumber_map.implementation.dst_col_names = simpleGraphImpl.dstCol else: if type(source) is list and type(destination) is list: raise ValueError("set renumber to True for multi column ids") @@ -405,24 +416,104 @@ def view_edge_list(self): src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) - edgelist_df = self.edgelist.edgelist_df + srcCol = self.source_columns + dstCol = self.destination_columns + """ + Only use the initial input dataframe if the graph is directed with: + 1) single vertex column names with integer vertex type + 2) list of vertex column names of size 1 with integer vertex type + """ + use_initial_input_df = True + + if self.input_df is not None: + if type(srcCol) is list and type(dstCol) is list: + if len(srcCol) == 1: + srcCol = srcCol[0] + dstCol = dstCol[0] + if self.input_df[srcCol].dtype not in [ + np.int32, + np.int64, + ] or self.input_df[dstCol].dtype not in [np.int32, np.int64]: + # hypergraph case + use_initial_input_df = False + else: + use_initial_input_df = False + + elif self.input_df[srcCol].dtype not in [ + np.int32, + np.int64, + ] or self.input_df[dstCol].dtype not in [np.int32, np.int64]: + use_initial_input_df = False + else: + use_initial_input_df = False - if self.properties.renumbered: + if use_initial_input_df and self.properties.directed: + edgelist_df = self.input_df + else: + edgelist_df = self.edgelist.edgelist_df + if srcCol is None and dstCol is None: + srcCol = simpleGraphImpl.srcCol + dstCol = simpleGraphImpl.dstCol + + if use_initial_input_df and not self.properties.directed: + # unrenumber before extracting the upper triangular part + # case when the vertex column name is of size 1 + if self.properties.renumbered: + edgelist_df = self.renumber_map.unrenumber( + edgelist_df, simpleGraphImpl.srcCol + ) + edgelist_df = self.renumber_map.unrenumber( + edgelist_df, simpleGraphImpl.dstCol + ) + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) + # extract the upper triangular part + edgelist_df = edgelist_df[edgelist_df[srcCol] <= edgelist_df[dstCol]] + else: + edgelist_df = edgelist_df[ + edgelist_df[simpleGraphImpl.srcCol] + <= edgelist_df[simpleGraphImpl.dstCol] + ] + elif not use_initial_input_df and self.properties.renumbered: + # Do not unrenumber the vertices if the initial input df was used + if not self.properties.directed: + edgelist_df = edgelist_df[ + edgelist_df[simpleGraphImpl.srcCol] + <= edgelist_df[simpleGraphImpl.dstCol] + ] edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.dstCol ) + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) - # FIXME: revisit this approach - if not self.properties.directed: - edgelist_df = edgelist_df[ - edgelist_df[simpleGraphImpl.srcCol] - <= edgelist_df[simpleGraphImpl.dstCol] - ] - edgelist_df = edgelist_df.reset_index(drop=True) - self.properties.edge_count = len(edgelist_df) + if self.vertex_columns is not None and len(self.vertex_columns) == 2: + # single column vertices internally renamed to 'simpleGraphImpl.srcCol' + # and 'simpleGraphImpl.dstCol'. + if not set(self.vertex_columns).issubset(set(edgelist_df.columns)): + # Get the initial column names passed by the user. + if srcCol is not None and dstCol is not None: + edgelist_df = edgelist_df.rename( + columns={ + simpleGraphImpl.srcCol: srcCol, + simpleGraphImpl.dstCol: dstCol, + } + ) + + # FIXME: When renumbered, the MG API uses renumbered col names which + # is not consistant with the SG API. + + self.properties.edge_count = len(edgelist_df) + + wgtCol = simpleGraphImpl.edgeWeightCol + edgelist_df = edgelist_df.rename( + columns={wgtCol: self.weight_column} + ).reset_index(drop=True) return edgelist_df @@ -1175,7 +1266,7 @@ def edges(self): sources and destinations. It does not return the edge weights. For viewing edges with weights use view_edge_list() """ - return self.view_edge_list()[[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol]] + return self.view_edge_list()[self.vertex_columns] def nodes(self): """ diff --git a/python/cugraph/cugraph/structure/number_map.py b/python/cugraph/cugraph/structure/number_map.py index 481f99b9060..d7da20f9d84 100644 --- a/python/cugraph/cugraph/structure/number_map.py +++ b/python/cugraph/cugraph/structure/number_map.py @@ -25,6 +25,8 @@ class NumberMap: class SingleGPU: def __init__(self, df, src_col_names, dst_col_names, id_type, store_transposed): self.col_names = NumberMap.compute_vals(src_col_names) + # FIXME: rename the next two attributes to its singular conterpart as there + # is only one 'src' and 'dst' col name self.src_col_names = src_col_names self.dst_col_names = dst_col_names self.df = df @@ -141,6 +143,8 @@ def __init__( self, ddf, src_col_names, dst_col_names, id_type, store_transposed ): self.col_names = NumberMap.compute_vals(src_col_names) + self.src_col_names = src_col_names + self.dst_col_names = dst_col_names self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) self.val_types["count"] = np.int32 self.id_type = id_type @@ -258,6 +262,7 @@ def __init__( # The column name 'id' contains the renumbered vertices and the other column(s) # contain the original vertices self.df_internal_to_external = None + self.internal_to_external_col_names = {} @staticmethod def compute_vals_types(df, column_names): @@ -480,7 +485,15 @@ def renumber_and_segment( # For columns with mismatch dtypes, set the renumbered # id_type to either 'int32' or 'int64' - if df.dtypes.nunique() > 1: + if isinstance(src_col_names, list): + vertex_col_names = src_col_names.copy() + else: + vertex_col_names = [src_col_names] + if isinstance(dst_col_names, list): + vertex_col_names += dst_col_names + else: + vertex_col_names += [dst_col_names] + if df[vertex_col_names].dtypes.nunique() > 1: # can't determine the edgelist input type unrenumbered_id_type = None else: @@ -503,7 +516,9 @@ def renumber_and_segment( renumbered = True renumber_map = NumberMap(renumber_id_type, unrenumbered_id_type, renumbered) - if not isinstance(src_col_names, list): + renumber_map.input_src_col_names = src_col_names + renumber_map.input_dst_col_names = dst_col_names + if not isinstance(renumber_map.input_src_col_names, list): src_col_names = [src_col_names] dst_col_names = [dst_col_names] @@ -512,6 +527,10 @@ def renumber_and_segment( # renumbered_dst_col_name) renumber_map.set_renumbered_col_names(src_col_names, dst_col_names, df.columns) + # FIXME: Remove 'src_col_names' and 'dst_col_names' from this initialization as + # those will capture 'simpleGraph.srcCol' and 'simpleGraph.dstCol'. + # In fact the input src and dst col names are already captured in + # 'renumber_map.input_src_col_names' and 'renumber_map.input_dst_col_names'. if isinstance(df, cudf.DataFrame): renumber_map.implementation = NumberMap.SingleGPU( df, @@ -648,6 +667,35 @@ def unrenumber(self, df, column_name, preserve_order=False, get_column_names=Fal mapping[nm] = nm + "_" + column_name col_names = list(mapping.values()) + if isinstance(self.input_src_col_names, list): + input_src_col_names = self.input_src_col_names.copy() + input_dst_col_names = self.input_dst_col_names.copy() + else: + # Assuming the src and dst columns are of the same length + # if they are lists. + input_src_col_names = [self.input_src_col_names] + input_dst_col_names = [self.input_dst_col_names] + if not isinstance(col_names, list): + col_names = [col_names] + + if column_name in [ + self.renumbered_src_col_name, + self.implementation.src_col_names, + ]: + self.internal_to_external_col_names.update( + dict(zip(col_names, input_src_col_names)) + ) + elif column_name in [ + self.renumbered_dst_col_name, + self.implementation.dst_col_names, + ]: + self.internal_to_external_col_names.update( + dict(zip(col_names, input_dst_col_names)) + ) + + if len(self.implementation.col_names) == 1: + col_names = col_names[0] + if preserve_order: index_name = NumberMap.generate_unused_column_name(df) df[index_name] = df.index @@ -665,6 +713,9 @@ def unrenumber(self, df, column_name, preserve_order=False, get_column_names=Fal df = df.map_partitions(lambda df: df.rename(columns=mapping, copy=False)) else: df = df.rename(columns=mapping, copy=False) + # FIXME: This parameter is not working as expected as it oesn't return + # the unrenumbered column names: leverage 'self.internal_to_external_col_names' + # instead. if get_column_names: return df, col_names else: diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 15011fa8dbc..4c00e68344d 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -230,6 +230,7 @@ def symmetrize( """ + # FIXME: Redundant check that should be done at the graph creation if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") diff --git a/python/cugraph/cugraph/testing/utils.py b/python/cugraph/cugraph/testing/utils.py index 0dae17ed14e..6d58076e6fe 100644 --- a/python/cugraph/cugraph/testing/utils.py +++ b/python/cugraph/cugraph/testing/utils.py @@ -407,7 +407,7 @@ def compare_mst(mst_cugraph, mst_nx): pass # check total weight - cg_sum = edgelist_df["weights"].sum() + cg_sum = edgelist_df[mst_cugraph.weight_column].sum() nx_sum = mst_nx_df["weight"].sum() print(cg_sum) print(nx_sum) diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py index 3e4dd3af4fc..db34c68a054 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py @@ -118,7 +118,7 @@ def calc_betweenness_centrality( ) M = G.to_pandas_edgelist().rename( - columns={"src": "0", "dst": "1", "weights": "weight"} + columns={"src": "0", "dst": "1", "wgt": edge_attr} ) Gnx = nx.from_pandas_edgelist( diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index aa41f8e1c82..97e503e5428 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -16,7 +16,7 @@ import dask_cudf from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core +from cugraph.experimental.datasets import DATASETS_UNDIRECTED import cugraph import cugraph.dask as dcg @@ -41,7 +41,7 @@ def setup_function(): # email_Eu_core is too expensive to test -datasets = DATASETS_UNDIRECTED + [email_Eu_core] +datasets = DATASETS_UNDIRECTED # ============================================================================= diff --git a/python/cugraph/cugraph/tests/community/test_balanced_cut.py b/python/cugraph/cugraph/tests/community/test_balanced_cut.py index 0a95a1846ce..f6c1a741011 100644 --- a/python/cugraph/cugraph/tests/community/test_balanced_cut.py +++ b/python/cugraph/cugraph/tests/community/test_balanced_cut.py @@ -102,7 +102,7 @@ def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): gc.collect() @@ -111,7 +111,7 @@ def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): # read_weights_in_sp=True => value column dtype is float32 G = graph_file.get_graph() NM = G.to_pandas_edgelist().rename( - columns={"src": "0", "dst": "1", "weights": "weight"} + columns={"src": "0", "dst": "1", "wgt": "weight"} ) G = nx.from_pandas_edgelist( diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 3a6a6e0d409..d93fa3b547d 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -90,8 +90,8 @@ def input_expected_output(input_combo): # Sample k vertices from the cuGraph graph # FIXME: Leverage the method 'select_random_vertices' instead - srcs = G.view_edge_list()["src"] - dsts = G.view_edge_list()["dst"] + srcs = G.view_edge_list()["0"] + dsts = G.view_edge_list()["1"] vertices = cudf.concat([srcs, dsts]).drop_duplicates() vertices = vertices.sample(num_seeds).astype("int32") diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py index b0dcc2ede3d..c1f8f4c3546 100644 --- a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py @@ -39,7 +39,7 @@ def setup_function(): # currently in networkx master and will hopefully will make it to a release # soon. def ktruss_ground_truth(graph_file): - G = nx.read_edgelist(str(graph_file), nodetype=int, data=(("weights", float),)) + G = nx.read_edgelist(str(graph_file), nodetype=int, data=(("weight", float),)) df = nx.to_pandas_edgelist(G) return df @@ -50,18 +50,18 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): edgelist_df = k_truss_cugraph.view_edge_list() src = edgelist_df["src"] dst = edgelist_df["dst"] - wgt = edgelist_df["weights"] + wgt = edgelist_df["weight"] assert len(edgelist_df) == len(k_truss_nx) for i in range(len(src)): has_edge = ( (k_truss_nx["source"] == src[i]) & (k_truss_nx["target"] == dst[i]) - & np.isclose(k_truss_nx["weights"], wgt[i]) + & np.isclose(k_truss_nx["weight"], wgt[i]) ).any() has_opp_edge = ( (k_truss_nx["source"] == dst[i]) & (k_truss_nx["target"] == src[i]) - & np.isclose(k_truss_nx["weights"], wgt[i]) + & np.isclose(k_truss_nx["weight"], wgt[i]) ).any() assert has_edge or has_opp_edge return True diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 2cf0525d2ad..0f7bb14581f 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -69,8 +69,8 @@ def input_expected_output(dask_client, input_combo): if start_list: # sample k nodes from the cuGraph graph k = random.randint(1, 10) - srcs = G.view_edge_list()["src"] - dsts = G.view_edge_list()["dst"] + srcs = G.view_edge_list()[G.source_columns] + dsts = G.view_edge_list()[G.destination_columns] nodes = cudf.concat([srcs, dsts]).drop_duplicates() start_list = nodes.sample(k) else: diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index c68108ce241..7f4eeeb69d5 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -83,9 +83,12 @@ def input_expected_output(dask_client, input_combo): ) sg_k_core_results = sg_k_core_graph.view_edge_list() # FIXME: The result will come asymetric. Symmetrize the results + srcCol = sg_k_core_graph.source_columns + dstCol = sg_k_core_graph.destination_columns + wgtCol = sg_k_core_graph.weight_column sg_k_core_results = ( - symmetrize_df(sg_k_core_results, "src", "dst", "weights") - .sort_values(["src", "dst"]) + symmetrize_df(sg_k_core_results, srcCol, dstCol, wgtCol) + .sort_values([srcCol, dstCol]) .reset_index(drop=True) ) @@ -144,7 +147,10 @@ def test_dask_k_core(dask_client, benchmark, input_expected_output): expected_k_core_results = input_expected_output["sg_k_core_results"] k_core_results = ( - k_core_results.compute().sort_values(["src", "dst"]).reset_index(drop=True) + k_core_results.compute() + .sort_values(["src", "dst"]) + .reset_index(drop=True) + .rename(columns={"weights": "weight"}) ) assert_frame_equal( diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py index 8e8ab13574d..9d9572b88b2 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py @@ -187,11 +187,11 @@ def test_pagerank( G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) if has_precomputed_vertex_out_weight == 1: - df = G.view_edge_list()[["src", "weights"]] + df = G.view_edge_list()[["src", "wgt"]] pre_vtx_o_wgt = ( df.groupby(["src"], as_index=False) .sum() - .rename(columns={"src": "vertex", "weights": "sums"}) + .rename(columns={"src": "vertex", "wgt": "sums"}) ) cugraph_pr = cugraph_call( diff --git a/python/cugraph/cugraph/tests/nx/test_nx_convert.py b/python/cugraph/cugraph/tests/nx/test_nx_convert.py index 58b89a4bda9..e20897572d0 100644 --- a/python/cugraph/cugraph/tests/nx/test_nx_convert.py +++ b/python/cugraph/cugraph/tests/nx/test_nx_convert.py @@ -25,8 +25,9 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert nxG.number_of_edges() == cuG.number_of_edges() cu_df = cuG.view_edge_list().to_pandas() + cu_df = cu_df.rename(columns={"0": "src", "1": "dst"}) if has_wt is True: - cu_df = cu_df.drop(columns=["weights"]) + cu_df = cu_df.drop(columns=["weight"]) out_of_order = cu_df[cu_df["src"] > cu_df["dst"]] if len(out_of_order) > 0: @@ -72,12 +73,11 @@ def test_networkx_compatibility(graph_file): # create a cuGraph Directed Graph gdf = cudf.from_pandas(M) - gdf = gdf.rename(columns={"weight": "weights"}) cuG = cugraph.from_cudf_edgelist( gdf, source="0", destination="1", - edge_attr="weights", + edge_attr="weight", create_using=cugraph.Graph(directed=True), ) diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_random_walks.py index 48629fa03a6..9c94e036683 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks.py @@ -76,7 +76,7 @@ def calc_random_walks(G, max_depth=None, use_padding=False, legacy_result_type=T """ assert G is not None - G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="weights") + G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt") k = random.randint(1, 6) @@ -136,8 +136,9 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type e_wgt_paths = path_data[1] e_wgt_idx = 0 - G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="weights") + G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt") df_G = G.input_df + if "weight" in df_G.columns: df_G = df_G.rename(columns={"weight": "wgt"}) @@ -176,17 +177,18 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type else: # check valid edge wgt - expected_wgt = edge["wgt"].iloc[0] - result_wgt = e_wgt_paths.iloc[e_wgt_idx] - - if expected_wgt != result_wgt: - print( - "[ERR] Invalid edge wgt: " - "The edge src {} dst {} has wgt {} but got {}".format( - src, dst, expected_wgt, result_wgt + if G.is_weighted(): + expected_wgt = edge["wgt"].iloc[0] + result_wgt = e_wgt_paths.iloc[e_wgt_idx] + + if expected_wgt != result_wgt: + print( + "[ERR] Invalid edge wgt: " + "The edge src {} dst {} has wgt {} but got {}".format( + src, dst, expected_wgt, result_wgt + ) ) - ) - invalid_edge_wgt += 1 + invalid_edge_wgt += 1 e_wgt_idx += 1 if src != -1 and dst == -1: @@ -195,9 +197,10 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type assert invalid_seeds == 0 assert invalid_edge == 0 - assert invalid_edge_wgt == 0 assert len(v_paths) == (max_depth) * len(seeds) - assert len(e_wgt_paths) == (max_depth - 1) * len(seeds) + if G.is_weighted(): + assert invalid_edge_wgt == 0 + assert len(e_wgt_paths) == (max_depth - 1) * len(seeds) if legacy_result_type: sizes = path_data[2] @@ -298,11 +301,15 @@ def test_random_walks_nx(graph_file): M = G.to_pandas_edgelist() + source = G.source_columns + target = G.destination_columns + edge_attr = G.weight_column + Gnx = nx.from_pandas_edgelist( M, - source="src", - target="dst", - edge_attr="weights", + source=source, + target=target, + edge_attr=edge_attr, create_using=nx.DiGraph(), ) max_depth = random.randint(2, 10) diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index a80c47662e2..de306309ca4 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -62,8 +62,8 @@ def compare_graphs(nx_graph, cu_graph): edgelist_df = cu_graph.view_edge_list().reset_index(drop=True) df = cudf.DataFrame() - df["source"] = edgelist_df["src"] - df["target"] = edgelist_df["dst"] + df["source"] = edgelist_df["source"] + df["target"] = edgelist_df["target"] if len(edgelist_df.columns) > 2: df["weight"] = edgelist_df["weights"] cu_to_nx_graph = nx.from_pandas_edgelist( @@ -319,10 +319,10 @@ def test_edges_for_Graph(graph_file): edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) - nx_edge_list = cudf.DataFrame(list(edges), columns=["src", "dst"]) + nx_edge_list = cudf.DataFrame(list(edges), columns=["0", "1"]) assert_frame_equal( - nx_edge_list.sort_values(by=["src", "dst"]).reset_index(drop=True), - cu_edge_list.sort_values(by=["src", "dst"]).reset_index(drop=True), + nx_edge_list.sort_values(by=["0", "1"]).reset_index(drop=True), + cu_edge_list.sort_values(by=["0", "1"]).reset_index(drop=True), check_dtype=False, ) @@ -344,7 +344,8 @@ def test_view_edge_list_for_Graph(graph_file): G = cugraph.from_cudf_edgelist( cu_M, source="0", destination="1", create_using=cugraph.Graph ) - cu_edge_list = G.view_edge_list().sort_values(["src", "dst"]) + + cu_edge_list = G.view_edge_list().sort_values(["0", "1"]) # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) @@ -359,12 +360,12 @@ def test_view_edge_list_for_Graph(graph_file): edges.append([edge[0], edge[1]]) edges = list(edges) edges.sort() - nx_edge_list = cudf.DataFrame(edges, columns=["src", "dst"]) + nx_edge_list = cudf.DataFrame(edges, columns=["0", "1"]) # Compare nx and cugraph edges when viewing edgelist # assert cu_edge_list.equals(nx_edge_list) - assert (cu_edge_list["src"].to_numpy() == nx_edge_list["src"].to_numpy()).all() - assert (cu_edge_list["dst"].to_numpy() == nx_edge_list["dst"].to_numpy()).all() + assert (cu_edge_list["0"].to_numpy() == nx_edge_list["0"].to_numpy()).all() + assert (cu_edge_list["1"].to_numpy() == nx_edge_list["1"].to_numpy()).all() # Test @@ -682,8 +683,8 @@ def test_to_pandas_edgelist(graph_file): G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") - assert "s" in G.to_pandas_edgelist("s", "d").columns - assert "s" in G.to_pandas_edgelist(source="s", destination="d").columns + assert "0" in G.to_pandas_edgelist("0", "1").columns + assert "0" in G.to_pandas_edgelist(source="0", destination="1").columns @pytest.mark.sg @@ -877,3 +878,107 @@ def test_graph_creation_edge_properties(graph_file, edge_props): G = cugraph.Graph(directed=True) G.from_cudf_edgelist(df, source="0", destination="1", **prop_keys) + + +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +def test_graph_creation_edges(graph_file, directed, renumber): + # Verifies that the input dataframe passed the user is the same + # retrieved from the graph when the graph is directed + srcCol = "source" + dstCol = "target" + wgtCol = "weight" + input_df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol, dstCol, wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + + G = cugraph.Graph(directed=directed) + + if renumber: + # trigger renumbering by passing a list of vertex column + srcCol = [srcCol] + dstCol = [dstCol] + vertexCol = srcCol + dstCol + else: + vertexCol = [srcCol, dstCol] + G.from_cudf_edgelist(input_df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + columns = vertexCol.copy() + columns.append(wgtCol) + + edge_list_view = G.view_edge_list().loc[:, columns] + edges = G.edges().loc[:, vertexCol] + + assert_frame_equal( + edge_list_view.drop(columns=wgtCol) + .sort_values(by=vertexCol) + .reset_index(drop=True), + edges.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + + if directed: + assert_frame_equal( + edge_list_view.sort_values(by=vertexCol).reset_index(drop=True), + input_df.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + else: + # If the graph is undirected, ensures that only the upper triangular + # matrix of the adjacency matrix is returned + if isinstance(srcCol, list): + srcCol = srcCol[0] + dstCol = dstCol[0] + is_upper_triangular = edge_list_view[srcCol] <= edge_list_view[dstCol] + is_upper_triangular = list(set(is_upper_triangular.values_host)) + assert len(is_upper_triangular) == 1 + assert is_upper_triangular[0] + + +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", [True, False]) +def test_graph_creation_edges_multi_col_vertices(graph_file, directed): + srcCol = ["src_0", "src_1"] + dstCol = ["dst_0", "dst_1"] + wgtCol = "weight" + vertexCol = srcCol + dstCol + columns = vertexCol.copy() + columns.append(wgtCol) + + input_df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol[0], dstCol[0], wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + input_df["src_1"] = input_df["src_0"] + 1000 + input_df["dst_1"] = input_df["dst_0"] + 1000 + + G = cugraph.Graph(directed=directed) + G.from_cudf_edgelist(input_df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + input_df = input_df.loc[:, columns] + edge_list_view = G.view_edge_list().loc[:, columns] + edges = G.edges().loc[:, vertexCol] + + assert_frame_equal( + edge_list_view.drop(columns=wgtCol) + .sort_values(by=vertexCol) + .reset_index(drop=True), + edges.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + if directed: + assert_frame_equal( + edge_list_view.sort_values(by=vertexCol).reset_index(drop=True), + input_df.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index 707b195dfa8..3024e50402a 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -338,7 +338,7 @@ def test_mg_select_random_vertices( assert len(join) == len(sampled_vertices) -@pytest.mark.sg +@pytest.mark.mg @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize( "edge_props", @@ -363,3 +363,54 @@ def test_graph_creation_edge_properties(dask_client, graph_file, edge_props): G = cugraph.Graph(directed=True) G.from_dask_cudf_edgelist(df, source="0", destination="1", **prop_keys) + + +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +@pytest.mark.parametrize("graph_file", datasets) +def test_graph_creation_properties(dask_client, graph_file, directed, renumber): + srcCol = "src" + dstCol = "dst" + wgtCol = "wgt" + df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol, dstCol, wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + ddf = dask_cudf.from_cudf(df, npartitions=2) + + if renumber: + # trigger renumbering by passing a list of vertex column + srcCol = [srcCol] + dstCol = [dstCol] + vertexCol = srcCol + dstCol + else: + vertexCol = [srcCol, dstCol] + + sG = cugraph.Graph(directed=directed) + mG = cugraph.Graph(directed=directed) + sG.from_cudf_edgelist(df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + mG.from_dask_cudf_edgelist(ddf, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + columns = vertexCol.copy() + columns.append(wgtCol) + + sG_edgelist_view = ( + sG.view_edge_list() + .sort_values(by=vertexCol) + .reset_index(drop=True) + .loc[:, columns] + ) + mG_edgelist_view = ( + mG.view_edge_list() + .compute() + .sort_values(by=vertexCol) + .reset_index(drop=True) + .loc[:, columns] + ) + + assert sG.number_of_nodes() == mG.number_of_nodes() + assert sG.number_of_edges() == mG.number_of_edges() + assert_frame_equal(sG_edgelist_view, mG_edgelist_view, check_dtype=False) diff --git a/python/cugraph/cugraph/tests/structure/test_multigraph.py b/python/cugraph/cugraph/tests/structure/test_multigraph.py index af78c238d4e..a9ea617fdb8 100644 --- a/python/cugraph/cugraph/tests/structure/test_multigraph.py +++ b/python/cugraph/cugraph/tests/structure/test_multigraph.py @@ -47,7 +47,7 @@ def test_multigraph(graph_file): assert G.number_of_nodes() == Gnx.number_of_nodes() cuedges = cugraph.to_pandas_edgelist(G) cuedges.rename( - columns={"src": "source", "dst": "target", "weights": "weight"}, inplace=True + columns={"src": "source", "dst": "target", "wgt": "weight"}, inplace=True ) cuedges["weight"] = cuedges["weight"].round(decimals=3) nxedges = nx.to_pandas_edgelist(Gnx).astype( diff --git a/python/cugraph/cugraph/utilities/nx_factory.py b/python/cugraph/cugraph/utilities/nx_factory.py index 2448a511229..d07d17978d7 100644 --- a/python/cugraph/cugraph/utilities/nx_factory.py +++ b/python/cugraph/cugraph/utilities/nx_factory.py @@ -236,11 +236,15 @@ def cugraph_to_nx(G): pdf = G.view_edge_list().to_pandas() num_col = len(pdf.columns) + source = G.source_columns + target = G.destination_columns + if num_col == 2: - Gnx = nx.from_pandas_edgelist(pdf, source="src", target="dst") + Gnx = nx.from_pandas_edgelist(pdf, source=source, target=target) else: + edge_attr = G.weight_column Gnx = nx.from_pandas_edgelist( - pdf, source="src", target="dst", edge_attr="weights" + pdf, source=source, target=target, edge_attr=edge_attr ) return Gnx diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd index 4e52ed557ed..a2df44ba26e 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pxd +++ b/python/pylibcugraph/pylibcugraph/graphs.pxd @@ -25,6 +25,7 @@ from pylibcugraph._cugraph_c.graph cimport ( cdef class _GPUGraph: cdef cugraph_graph_t* c_graph_ptr cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr + cdef cugraph_type_erased_device_array_view_t* weights_view_ptr cdef class SGGraph(_GPUGraph): pass diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index fb4692bf3a8..33a8a09c6f4 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -166,11 +166,10 @@ cdef class SGGraph(_GPUGraph): dst_or_index_array ) - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( + + self.weights_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) - self.edge_id_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( edge_id_array @@ -187,7 +186,7 @@ cdef class SGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -205,7 +204,7 @@ cdef class SGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -224,7 +223,7 @@ cdef class SGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_or_offsets_view_ptr) cugraph_type_erased_device_array_view_free(dsts_or_indices_view_ptr) - cugraph_type_erased_device_array_view_free(weights_view_ptr) + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: @@ -337,7 +336,7 @@ cdef class MGGraph(_GPUGraph): create_cugraph_type_erased_device_array_view_from_py_obj( dst_array ) - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ + self.weights_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) @@ -355,7 +354,7 @@ cdef class MGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_view_ptr, dsts_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -369,7 +368,7 @@ cdef class MGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_view_ptr) cugraph_type_erased_device_array_view_free(dsts_view_ptr) - cugraph_type_erased_device_array_view_free(weights_view_ptr) + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx index 4a2b8a70189..1570523beb8 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx @@ -96,6 +96,8 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef uintptr_t cai_start_ptr = \ start_vertices.__cuda_array_interface__["data"][0] + cdef cugraph_type_erased_device_array_view_t* weights_ptr + cdef cugraph_type_erased_device_array_view_t* start_ptr = \ cugraph_type_erased_device_array_view_create( cai_start_ptr, @@ -113,14 +115,17 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* path_ptr = \ cugraph_random_walk_result_get_paths(result_ptr) - cdef cugraph_type_erased_device_array_view_t* weights_ptr = \ - cugraph_random_walk_result_get_weights(result_ptr) + + if input_graph.weights_view_ptr is NULL: + cupy_weights = None + else: + weights_ptr = cugraph_random_walk_result_get_weights(result_ptr) + cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) max_path_length = \ cugraph_random_walk_result_get_max_path_length(result_ptr) cupy_paths = copy_to_cupy_array(c_resource_handle_ptr, path_ptr) - cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) cugraph_random_walk_result_free(result_ptr) cugraph_type_erased_device_array_view_free(start_ptr) From 5204c36757e3f3c61af8ada52fef4779a663d756 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 9 Aug 2023 14:16:01 -0500 Subject: [PATCH 10/10] Updates latest dask versions needed for testing 23.10. --- ci/test_wheel_cugraph.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test_wheel_cugraph.sh b/ci/test_wheel_cugraph.sh index a117e00b8a2..1c356ba3073 100755 --- a/ci/test_wheel_cugraph.sh +++ b/ci/test_wheel_cugraph.sh @@ -9,7 +9,7 @@ RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-whe python -m pip install --no-deps ./local-pylibcugraph-dep/pylibcugraph*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 +python -m pip install git+https://github.com/dask/dask.git@2023.7.1 git+https://github.com/dask/distributed.git@2023.7.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 # Only download test data for x86 arch=$(uname -m)