Merge branch 'branch-24.04' of https://github.com/rapidsai/cugraph in…

…to enh_edge_mask_perf
rapidsai · Feb 6, 2024 · 8116905 · 8116905
2 parents 6ee6ad9 + bf5aa60
commit 8116905
Show file tree

Hide file tree

Showing 12 changed files with 62 additions and 38 deletions.
diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh
@@ -11,6 +11,11 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g')
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
+# Download wheels built during this job.
+RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
+RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
+python -m pip install ./local-deps/*.whl
+
 # use 'ls' to expand wildcard before adding `[extra]` requires for pip
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
 # pip creates wheels using python package names

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
@@ -11,6 +11,11 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g')
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
+# Download wheels built during this job.
+RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
+RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
+python -m pip install ./local-deps/*.whl
+
 # use 'ls' to expand wildcard before adding `[extra]` requires for pip
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
 # pip creates wheels using python package names

diff --git a/cpp/libcugraph_etl/CMakeLists.txt b/cpp/libcugraph_etl/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -190,7 +190,7 @@ rapids_export(INSTALL cugraph_etl
 
 ################################################################################
 # - build export ---------------------------------------------------------------
-rapids_export(BUILD cugraph
+rapids_export(BUILD cugraph_etl
     EXPORT_SET cugraph_etl-exports
     GLOBAL_TARGETS cugraph cugraph_c cugraph_etl
     NAMESPACE cugraph::

diff --git a/cpp/libcugraph_etl/src/renumbering.cu b/cpp/libcugraph_etl/src/renumbering.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -776,15 +776,15 @@ struct renumber_functor {
     for (int i = 0; i < src_view.num_columns(); i++) {
       auto str_col_view = cudf::strings_column_view(src_view.column(i));
       src_vertex_chars_ptrs.push_back(
-        const_cast<char_type*>(str_col_view.chars().data<char_type>()));
+        const_cast<char_type*>(str_col_view.parent().data<char_type>()));
       src_vertex_offset_ptrs.push_back(
         const_cast<str_offset_type*>(str_col_view.offsets().data<str_offset_type>()));
     }
 
     for (int i = 0; i < dst_view.num_columns(); i++) {
       auto str_col_view = cudf::strings_column_view(dst_view.column(i));
       dst_vertex_chars_ptrs.push_back(
-        const_cast<char_type*>(str_col_view.chars().data<char_type>()));
+        const_cast<char_type*>(str_col_view.parent().data<char_type>()));
       dst_vertex_offset_ptrs.push_back(
         const_cast<str_offset_type*>(str_col_view.offsets().data<str_offset_type>()));
     }
@@ -970,13 +970,14 @@ struct renumber_functor {
                                      std::move(unrenumber_col1_chars),
                                      rmm::device_buffer{},
                                      0);
+    auto str_col_1_contents = str_col_1->release();
 
     renumber_table_vectors.push_back(
       cudf::make_strings_column(size_type(key_value_count),
                                 std::move(offset_col_1),
-                                std::move(str_col_1),
+                                std::move(*str_col_1_contents.data),
                                 0,
-                                rmm::device_buffer(size_type(0), exec_strm)));
+                                std::move(*str_col_1_contents.null_mask)));
 
     auto offset_col_2 =
       std::make_unique<cudf::column>(cudf::data_type(cudf::type_id::INT32),
@@ -991,13 +992,14 @@ struct renumber_functor {
                                      std::move(unrenumber_col2_chars),
                                      rmm::device_buffer{},
                                      0);
+    auto str_col_2_contents = str_col_2->release();
 
     renumber_table_vectors.push_back(
       cudf::make_strings_column(size_type(key_value_count),
                                 std::move(offset_col_2),
-                                std::move(str_col_2),
+                                std::move(*str_col_2_contents.data),
                                 0,
-                                rmm::device_buffer(size_type(0), exec_strm)));
+                                std::move(*str_col_2_contents.null_mask)));
 
     // make table from string columns - did at the end
 

diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -446,7 +446,7 @@ def _process_sampled_df_csc(
     major_offsets = cast_to_tensor(df.major_offsets.dropna())
     label_hop_offsets = cast_to_tensor(df.label_hop_offsets.dropna())
     renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna())
-    renumber_map = cast_to_tensor(df.map.dropna())
+    renumber_map = cast_to_tensor(df["map"].dropna())
     minors = cast_to_tensor(df.minors.dropna())
 
     n_batches = len(renumber_map_offsets) - 1

diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -181,7 +181,9 @@ def _write_samples_to_parquet_csr(
             [
                 cudf.Series(minors_array[results_start:results_end], name="minors"),
                 cudf.Series(
-                    renumber_map.map.values[renumber_map_start:renumber_map_end],
+                    renumber_map.renumber_map.values[
+                        renumber_map_start:renumber_map_end
+                    ],
                     name="map",
                 ),
                 label_hop_offsets_current_partition,
@@ -299,7 +301,7 @@ def _write_samples_to_parquet_coo(
             else:
                 renumber_map_end_ix = offsets_z.renumber_map_offsets.iloc[0]
 
-            renumber_map_p = renumber_map.map.iloc[
+            renumber_map_p = renumber_map.renumber_map.iloc[
                 renumber_map_start_ix:renumber_map_end_ix
             ]
 

diff --git a/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py b/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -120,9 +120,9 @@ def sample_neighbors(
             return self._get_edgeid_type_d(sampled_df)
         else:
             return (
-                sampled_df[src_n].values,
-                sampled_df[dst_n].values,
-                sampled_df["indices"].values,
+                sampled_df[src_n].astype("float").values,
+                sampled_df[dst_n].astype("float").values,
+                sampled_df["indices"].astype("float").values,
             )
 
     def _get_edgeid_type_d(self, df):
@@ -134,7 +134,11 @@ def _get_edgeid_type_d(self, df):
             for etype, etype_id in self.etype_id_dict.items()
         }
         return {
-            etype: (df[src_n].values, df[dst_n].values, df["indices"].values)
+            etype: (
+                df[src_n].astype("float").values,
+                df[dst_n].astype("float").values,
+                df["indices"].astype("float").values,
+            )
             for etype, df in result_d.items()
         }
 

diff --git a/python/cugraph/cugraph/sampling/sampling_utilities.py b/python/cugraph/cugraph/sampling/sampling_utilities.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -74,7 +74,7 @@ def sampling_results_from_cupy_array_dict(
         if renumber:
             renumber_df = cudf.DataFrame(
                 {
-                    "map": cupy_array_dict["renumber_map"],
+                    "renumber_map": cupy_array_dict["renumber_map"],
                 }
             )
 

diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -222,7 +222,7 @@ def test_bulk_sampler_partitions(scratch_dir):
         ]
 
         recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
-        recovered_map = recovered_samples.map
+        recovered_map = recovered_samples["map"]
         recovered_samples = recovered_samples.drop("map", axis=1).dropna()
 
         for current_batch_id in range(start_batch_id, end_batch_id + 1):

diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -166,7 +166,7 @@ def test_bulk_sampler_partitions(dask_client, scratch_dir, mg_input):
         ]
 
         recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
-        recovered_map = recovered_samples.map
+        recovered_map = recovered_samples["map"]
         recovered_samples = recovered_samples.drop("map", axis=1).dropna()
 
         for current_batch_id in range(start_batch_id, end_batch_id + 1):

diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -796,7 +796,9 @@ def test_uniform_neighbor_sample_renumber(hops):
         expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
 
         assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
-            renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+            renumber_map.renumber_map[
+                0 : len(expected_renumber_map)
+            ].values_host.tolist()
         )
     assert (renumber_map.batch_id == 0).all()
 
@@ -854,7 +856,9 @@ def test_uniform_neighbor_sample_offset_renumber(hops):
         expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
 
         assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
-            renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+            renumber_map.renumber_map[
+                0 : len(expected_renumber_map)
+            ].values_host.tolist()
         )
 
     renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
@@ -902,8 +906,8 @@ def test_uniform_neighbor_sample_csr_csc_global(hops, seed):
     minors = sampling_results["minors"].dropna()
     assert len(majors) == len(minors)
 
-    majors = renumber_map.map.iloc[majors]
-    minors = renumber_map.map.iloc[minors]
+    majors = renumber_map.renumber_map.iloc[majors]
+    minors = renumber_map.renumber_map.iloc[minors]
 
     for i in range(len(majors)):
         assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
@@ -952,8 +956,8 @@ def test_uniform_neighbor_sample_csr_csc_local(hops, seed):
         majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
         majors = majors.repeat(cupy.diff(major_offsets))
 
-        majors = renumber_map.map.iloc[majors]
-        minors = renumber_map.map.iloc[minors]
+        majors = renumber_map.renumber_map.iloc[majors]
+        minors = renumber_map.renumber_map.iloc[minors]
 
         for i in range(len(majors)):
             assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])

diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
@@ -1015,7 +1015,7 @@ def test_uniform_neighbor_sample_renumber(dask_client, hops):
 
     assert (renumber_map.batch_id == 0).all()
     assert (
-        renumber_map.map.nunique()
+        renumber_map.renumber_map.nunique()
         == cudf.concat(
             [sources_hop_0, sampling_results_renumbered.destinations]
         ).nunique()
@@ -1091,7 +1091,9 @@ def test_uniform_neighbor_sample_offset_renumber(dask_client, hops):
         expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
 
         assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
-            renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+            renumber_map.renumber_map[
+                0 : len(expected_renumber_map)
+            ].values_host.tolist()
         )
 
     renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
@@ -1153,8 +1155,8 @@ def test_uniform_neighbor_sample_csr_csc_global(dask_client, hops, seed):
     minors = sampling_results["minors"].dropna()
     assert len(majors) == len(minors)
 
-    majors = renumber_map.map.iloc[majors]
-    minors = renumber_map.map.iloc[minors]
+    majors = renumber_map.renumber_map.iloc[majors]
+    minors = renumber_map.renumber_map.iloc[minors]
 
     for i in range(len(majors)):
         assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
@@ -1221,8 +1223,8 @@ def test_uniform_neighbor_sample_csr_csc_local(dask_client, hops, seed):
         majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
         majors = majors.repeat(cupy.diff(major_offsets))
 
-        majors = renumber_map.map.iloc[majors]
-        minors = renumber_map.map.iloc[minors]
+        majors = renumber_map.renumber_map.iloc[majors]
+        minors = renumber_map.renumber_map.iloc[minors]
 
         for i in range(len(majors)):
             assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])