Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address primitives performance regression with the added edge masking support (when edge masking is not in use) #4119

Merged
merged 29 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a36eb62
update tests to include edge masking
seunghwak Jan 6, 2024
8bdc17e
remove unnecessary multiply by 1.0
seunghwak Jan 7, 2024
9751332
update to support edge masking
seunghwak Jan 10, 2024
a2e99fb
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 10, 2024
a3adff9
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 18, 2024
b646a10
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 20, 2024
4e4b72b
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 22, 2024
050b78a
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 22, 2024
3782e1b
rename call_e_op_t to transform_reduce_v_froniter_call_e_op_t to avoi…
seunghwak Jan 23, 2024
23874af
fewer checking for edge_partition_e_mask.has_value() if edge masking …
seunghwak Jan 24, 2024
5e611c6
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 24, 2024
65104df
copyright year
seunghwak Jan 24, 2024
55927e9
reduce code repetition
seunghwak Jan 24, 2024
58799d9
remove debug printout
seunghwak Jan 24, 2024
a421a76
get large value by reference
seunghwak Jan 24, 2024
a413978
copyright year
seunghwak Jan 24, 2024
304b5c0
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 24, 2024
b2308d6
code refactor for re-use for other primitives
seunghwak Jan 24, 2024
d6653db
code cleanup
seunghwak Jan 25, 2024
022be70
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 25, 2024
e9c3b7e
fewer edge_partition_e_mask.has_value() if edge masking is disabled
seunghwak Jan 25, 2024
71fa51d
Merge branch 'branch-24.02' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 26, 2024
fe2bf35
minimize performance regression in transform_reduce_e with the edge m…
seunghwak Jan 26, 2024
be93f4f
update to reduce performance overhead with edge masking support
seunghwak Jan 30, 2024
061f2fa
copyright year
seunghwak Jan 30, 2024
4692c02
Merge branch 'branch-24.04' of https://github.com/rapidsai/cugraph in…
seunghwak Jan 30, 2024
127af88
code cosmetics
seunghwak Jan 30, 2024
6ee6ad9
Merge branch 'branch-24.04' of https://github.com/rapidsai/cugraph in…
seunghwak Feb 2, 2024
8116905
Merge branch 'branch-24.04' of https://github.com/rapidsai/cugraph in…
seunghwak Feb 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 48 additions & 25 deletions cpp/src/prims/detail/nbr_intersection.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ struct pick_min_degree_t {
}
};

template <typename InputKeyIterator0,
template <bool check_edge_mask,
typename InputKeyIterator0,
typename InputKeyIterator1,
typename InputValueIterator0, // should be void* if invalid
typename InputValueIterator1, // should be void* if invalid
Expand Down Expand Up @@ -356,10 +357,14 @@ __device__ edge_t set_intersection_by_key_with_mask(InputKeyIterator0 input_key_
auto output_idx = output_start_offset;
while ((idx0 < (input_start_offset0 + input_size0)) &&
(idx1 < (input_start_offset1 + input_size1))) {
bool valid0 = apply_mask0 ? check_bit_set(idx0) : true;
bool valid1 = apply_mask1 ? check_bit_set(idx1) : true;
if (!valid0) { ++idx0; }
if (!valid1) { ++idx1; }
bool valid0 = true;
bool valid1 = true;
if constexpr (check_edge_mask) {
valid0 = apply_mask0 ? check_bit_set(idx0) : true;
valid1 = apply_mask1 ? check_bit_set(idx1) : true;
if (!valid0) { ++idx0; }
if (!valid1) { ++idx1; }
}

if (valid0 && valid1) {
auto key0 = *(input_key_first0 + idx0);
Expand Down Expand Up @@ -522,24 +527,42 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
// vertices in a single warp (better optimize if this becomes a performance
// bottleneck)

auto mask_first = edge_partition_e_mask ? (*edge_partition_e_mask).value_first()
: static_cast<uint32_t const*>(nullptr);
auto intersection_size = set_intersection_by_key_with_mask(
indices0,
indices1,
edge_property_values0,
edge_property_values1,
mask_first,
nbr_intersection_indices.begin(),
nbr_intersection_e_property_values0,
nbr_intersection_e_property_values1,
local_edge_offset0,
local_degree0,
(std::is_same_v<FirstElementToIdxMap, void*> && edge_partition_e_mask),
local_edge_offset1,
local_degree1,
(std::is_same_v<SecondElementToIdxMap, void*> && edge_partition_e_mask),
nbr_intersection_offsets[i]);
edge_t intersection_size{};
if (edge_partition_e_mask) {
intersection_size =
set_intersection_by_key_with_mask<true>(indices0,
indices1,
edge_property_values0,
edge_property_values1,
(*edge_partition_e_mask).value_first(),
nbr_intersection_indices.begin(),
nbr_intersection_e_property_values0,
nbr_intersection_e_property_values1,
local_edge_offset0,
local_degree0,
std::is_same_v<FirstElementToIdxMap, void*>,
local_edge_offset1,
local_degree1,
std::is_same_v<SecondElementToIdxMap, void*>,
nbr_intersection_offsets[i]);
} else {
intersection_size =
set_intersection_by_key_with_mask<false>(indices0,
indices1,
edge_property_values0,
edge_property_values1,
static_cast<uint32_t const*>(nullptr),
nbr_intersection_indices.begin(),
nbr_intersection_e_property_values0,
nbr_intersection_e_property_values1,
local_edge_offset0,
local_degree0,
false,
local_edge_offset1,
local_degree1,
false,
nbr_intersection_offsets[i]);
}

thrust::fill(
thrust::seq,
Expand Down Expand Up @@ -714,7 +737,7 @@ nbr_intersection(raft::handle_t const& handle,
auto edge_mask_view = graph_view.edge_mask_view();

std::optional<std::unique_ptr<kv_store_t<vertex_t, vertex_t, false>>> major_to_idx_map_ptr{
std::nullopt};
std::nullopt}; // idx to major_nbr_offsets
std::optional<rmm::device_uvector<edge_t>> major_nbr_offsets{std::nullopt};
std::optional<rmm::device_uvector<vertex_t>> major_nbr_indices{std::nullopt};

Expand Down Expand Up @@ -1041,7 +1064,7 @@ nbr_intersection(raft::handle_t const& handle,
// 3. Collect neighbor list for minors (for the neighbors within the minor range for this GPU)

std::optional<std::unique_ptr<kv_store_t<vertex_t, vertex_t, false>>> minor_to_idx_map_ptr{
std::nullopt};
std::nullopt}; // idx to minor_nbr_offsets
std::optional<rmm::device_uvector<size_t>> minor_nbr_offsets{std::nullopt};
std::optional<rmm::device_uvector<vertex_t>> minor_nbr_indices{std::nullopt};

Expand Down
Loading
Loading