Skip to content

Commit

Permalink
Merge branch 'branch-24.04' of https://github.com/rapidsai/cugraph in…
Browse files Browse the repository at this point in the history
…to enh_edge_mask_perf
  • Loading branch information
seunghwak committed Feb 6, 2024
2 parents 6ee6ad9 + bf5aa60 commit 8116905
Show file tree
Hide file tree
Showing 12 changed files with 62 additions and 38 deletions.
5 changes: 5 additions & 0 deletions ci/test_wheel_cugraph-dgl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g')
mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

# Download wheels built during this job.
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
python -m pip install ./local-deps/*.whl

# use 'ls' to expand wildcard before adding `[extra]` requires for pip
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
# pip creates wheels using python package names
Expand Down
5 changes: 5 additions & 0 deletions ci/test_wheel_cugraph-pyg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g')
mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

# Download wheels built during this job.
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
python -m pip install ./local-deps/*.whl

# use 'ls' to expand wildcard before adding `[extra]` requires for pip
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
# pip creates wheels using python package names
Expand Down
4 changes: 2 additions & 2 deletions cpp/libcugraph_etl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,7 +190,7 @@ rapids_export(INSTALL cugraph_etl

################################################################################
# - build export ---------------------------------------------------------------
rapids_export(BUILD cugraph
rapids_export(BUILD cugraph_etl
EXPORT_SET cugraph_etl-exports
GLOBAL_TARGETS cugraph cugraph_c cugraph_etl
NAMESPACE cugraph::
Expand Down
16 changes: 9 additions & 7 deletions cpp/libcugraph_etl/src/renumbering.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -776,15 +776,15 @@ struct renumber_functor {
for (int i = 0; i < src_view.num_columns(); i++) {
auto str_col_view = cudf::strings_column_view(src_view.column(i));
src_vertex_chars_ptrs.push_back(
const_cast<char_type*>(str_col_view.chars().data<char_type>()));
const_cast<char_type*>(str_col_view.parent().data<char_type>()));
src_vertex_offset_ptrs.push_back(
const_cast<str_offset_type*>(str_col_view.offsets().data<str_offset_type>()));
}

for (int i = 0; i < dst_view.num_columns(); i++) {
auto str_col_view = cudf::strings_column_view(dst_view.column(i));
dst_vertex_chars_ptrs.push_back(
const_cast<char_type*>(str_col_view.chars().data<char_type>()));
const_cast<char_type*>(str_col_view.parent().data<char_type>()));
dst_vertex_offset_ptrs.push_back(
const_cast<str_offset_type*>(str_col_view.offsets().data<str_offset_type>()));
}
Expand Down Expand Up @@ -970,13 +970,14 @@ struct renumber_functor {
std::move(unrenumber_col1_chars),
rmm::device_buffer{},
0);
auto str_col_1_contents = str_col_1->release();

renumber_table_vectors.push_back(
cudf::make_strings_column(size_type(key_value_count),
std::move(offset_col_1),
std::move(str_col_1),
std::move(*str_col_1_contents.data),
0,
rmm::device_buffer(size_type(0), exec_strm)));
std::move(*str_col_1_contents.null_mask)));

auto offset_col_2 =
std::make_unique<cudf::column>(cudf::data_type(cudf::type_id::INT32),
Expand All @@ -991,13 +992,14 @@ struct renumber_functor {
std::move(unrenumber_col2_chars),
rmm::device_buffer{},
0);
auto str_col_2_contents = str_col_2->release();

renumber_table_vectors.push_back(
cudf::make_strings_column(size_type(key_value_count),
std::move(offset_col_2),
std::move(str_col_2),
std::move(*str_col_2_contents.data),
0,
rmm::device_buffer(size_type(0), exec_strm)));
std::move(*str_col_2_contents.null_mask)));

// make table from string columns - did at the end

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -446,7 +446,7 @@ def _process_sampled_df_csc(
major_offsets = cast_to_tensor(df.major_offsets.dropna())
label_hop_offsets = cast_to_tensor(df.label_hop_offsets.dropna())
renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna())
renumber_map = cast_to_tensor(df.map.dropna())
renumber_map = cast_to_tensor(df["map"].dropna())
minors = cast_to_tensor(df.minors.dropna())

n_batches = len(renumber_map_offsets) - 1
Expand Down
8 changes: 5 additions & 3 deletions python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -181,7 +181,9 @@ def _write_samples_to_parquet_csr(
[
cudf.Series(minors_array[results_start:results_end], name="minors"),
cudf.Series(
renumber_map.map.values[renumber_map_start:renumber_map_end],
renumber_map.renumber_map.values[
renumber_map_start:renumber_map_end
],
name="map",
),
label_hop_offsets_current_partition,
Expand Down Expand Up @@ -299,7 +301,7 @@ def _write_samples_to_parquet_coo(
else:
renumber_map_end_ix = offsets_z.renumber_map_offsets.iloc[0]

renumber_map_p = renumber_map.map.iloc[
renumber_map_p = renumber_map.renumber_map.iloc[
renumber_map_start_ix:renumber_map_end_ix
]

Expand Down
14 changes: 9 additions & 5 deletions python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -120,9 +120,9 @@ def sample_neighbors(
return self._get_edgeid_type_d(sampled_df)
else:
return (
sampled_df[src_n].values,
sampled_df[dst_n].values,
sampled_df["indices"].values,
sampled_df[src_n].astype("float").values,
sampled_df[dst_n].astype("float").values,
sampled_df["indices"].astype("float").values,
)

def _get_edgeid_type_d(self, df):
Expand All @@ -134,7 +134,11 @@ def _get_edgeid_type_d(self, df):
for etype, etype_id in self.etype_id_dict.items()
}
return {
etype: (df[src_n].values, df[dst_n].values, df["indices"].values)
etype: (
df[src_n].astype("float").values,
df[dst_n].astype("float").values,
df["indices"].astype("float").values,
)
for etype, df in result_d.items()
}

Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/sampling/sampling_utilities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -74,7 +74,7 @@ def sampling_results_from_cupy_array_dict(
if renumber:
renumber_df = cudf.DataFrame(
{
"map": cupy_array_dict["renumber_map"],
"renumber_map": cupy_array_dict["renumber_map"],
}
)

Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_bulk_sampler_partitions(scratch_dir):
]

recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
recovered_map = recovered_samples.map
recovered_map = recovered_samples["map"]
recovered_samples = recovered_samples.drop("map", axis=1).dropna()

for current_batch_id in range(start_batch_id, end_batch_id + 1):
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -166,7 +166,7 @@ def test_bulk_sampler_partitions(dask_client, scratch_dir, mg_input):
]

recovered_samples = cudf.read_parquet(os.path.join(samples_path, file))
recovered_map = recovered_samples.map
recovered_map = recovered_samples["map"]
recovered_samples = recovered_samples.drop("map", axis=1).dropna()

for current_batch_id in range(start_batch_id, end_batch_id + 1):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -796,7 +796,9 @@ def test_uniform_neighbor_sample_renumber(hops):
expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()

assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
renumber_map.renumber_map[
0 : len(expected_renumber_map)
].values_host.tolist()
)
assert (renumber_map.batch_id == 0).all()

Expand Down Expand Up @@ -854,7 +856,9 @@ def test_uniform_neighbor_sample_offset_renumber(hops):
expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()

assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
renumber_map.renumber_map[
0 : len(expected_renumber_map)
].values_host.tolist()
)

renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
Expand Down Expand Up @@ -902,8 +906,8 @@ def test_uniform_neighbor_sample_csr_csc_global(hops, seed):
minors = sampling_results["minors"].dropna()
assert len(majors) == len(minors)

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down Expand Up @@ -952,8 +956,8 @@ def test_uniform_neighbor_sample_csr_csc_local(hops, seed):
majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
majors = majors.repeat(cupy.diff(major_offsets))

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ def test_uniform_neighbor_sample_renumber(dask_client, hops):

assert (renumber_map.batch_id == 0).all()
assert (
renumber_map.map.nunique()
renumber_map.renumber_map.nunique()
== cudf.concat(
[sources_hop_0, sampling_results_renumbered.destinations]
).nunique()
Expand Down Expand Up @@ -1091,7 +1091,9 @@ def test_uniform_neighbor_sample_offset_renumber(dask_client, hops):
expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()

assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
renumber_map.renumber_map[
0 : len(expected_renumber_map)
].values_host.tolist()
)

renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
Expand Down Expand Up @@ -1153,8 +1155,8 @@ def test_uniform_neighbor_sample_csr_csc_global(dask_client, hops, seed):
minors = sampling_results["minors"].dropna()
assert len(majors) == len(minors)

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down Expand Up @@ -1221,8 +1223,8 @@ def test_uniform_neighbor_sample_csr_csc_local(dask_client, hops, seed):
majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
majors = majors.repeat(cupy.diff(major_offsets))

majors = renumber_map.map.iloc[majors]
minors = renumber_map.map.iloc[minors]
majors = renumber_map.renumber_map.iloc[majors]
minors = renumber_map.renumber_map.iloc[minors]

for i in range(len(majors)):
assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
Expand Down

0 comments on commit 8116905

Please sign in to comment.