Skip to content

Commit

Permalink
delete machine specific/benchmark specific tunings
Browse files Browse the repository at this point in the history
  • Loading branch information
seunghwak committed Nov 14, 2024
1 parent dd7357f commit b9a0ed6
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 57 deletions.
3 changes: 1 addition & 2 deletions cpp/include/cugraph/graph_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ namespace detail {
// use (key, value) pairs to store source/destination properties if (unique edge
// sources/destinations) over (V / major_comm_size|minor_comm_size) is smaller than the threshold
// value
double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold =
0.0; // FIXME: just for benchmarking
double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold = 0.1;

// FIXME: threshold values require tuning
// use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller
Expand Down
25 changes: 0 additions & 25 deletions cpp/include/cugraph/partition_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,35 +163,10 @@ class partition_manager {
int row_idx = rank / gpu_row_comm_size;
int col_idx = rank % gpu_row_comm_size;

#if 1 // FIXME: a trick to use InfiniBand SHARP in a sub-communicator (currently, a GPU can
// participate in only one SHARP accelerated communicator)
comm.barrier(); // to enforce initialization in comm
std::cerr << "start intializing node_comm" << std::endl;
std::cerr << "start intializing major_comm" << std::endl;
handle.set_subcomm("gpu_row_comm",
std::make_shared<raft::comms::comms_t>(comm.comm_split(row_idx, col_idx)));
auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
major_comm.barrier(); /// to enforce initialization in major_comm
std::cerr << "major_comm initialized" << std::endl;
#if 1 // for EOS
auto ret = setenv("NCCL_COLLNET_ENABLE", "1", 1);
if (ret != 0)
std::cerr << "setenv(\"NCCL_COLLNET_ENABLE\", \"1\", 1) returned " << ret << std::endl;
ret = setenv("NCCL_SHARP_DISABLE", "0", 1);
if (ret != 0)
std::cerr << "setenv(\"NCCL_SHARP_DISABLE\", \"0\", 1) returned " << ret << std::endl;
#endif
handle.set_subcomm("gpu_col_comm",
std::make_shared<raft::comms::comms_t>(comm.comm_split(col_idx, row_idx)));
auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
minor_comm.barrier(); /// to enforce initialization in minor_comm
std::cerr << "minor_comm initialized" << std::endl;
#else
handle.set_subcomm("gpu_row_comm",
std::make_shared<raft::comms::comms_t>(comm.comm_split(row_idx, col_idx)));
handle.set_subcomm("gpu_col_comm",
std::make_shared<raft::comms::comms_t>(comm.comm_split(col_idx, row_idx)));
#endif
};
};

Expand Down
27 changes: 0 additions & 27 deletions cpp/tests/traversal/mg_graph500_bfs_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,33 +72,6 @@ class Tests_GRAPH500_MGBFS

static void SetUpTestCase()
{
#if 1
auto ret = setenv("NCCL_DEBUG", "WARN", 1);
if (ret != 0) std::cout << "setenv(\"NCCL_DEBUG\", \"TRACE\", 1) returned " << ret << std::endl;
#endif
#if 0 // workstation
// nothing
#else
#if 0 // for CW
ret = setenv("NCCL_NET", "IB", 1);
if (ret != 0) std::cout << "setenv(\"NCCL_NET\", \"IB\", 1) returned " << ret << std::endl;
ret = setenv("NCCL_SOCKET_IFNAME", "enp90s0f0np0", 1);
if (ret != 0)
std::cout << "setenv(\"NCCL_SOCKET_IFNAME\", \"enp90s0f0np0\", 1) returned " << ret
<< std::endl;
#else // for EOS
ret = setenv("NCCL_COLLNET_ENABLE", "0", 1);
if (ret != 0)
std::cout << "setenv(\"NCCL_COLLNET_ENABLE\", \"0\", 1) returned " << ret << std::endl;
ret = setenv("NCCL_SHARP_DISABLE", "1", 1);
if (ret != 0)
std::cout << "setenv(\"NCCL_SHARP_DISABLE\", \"1\", 1) returned " << ret << std::endl;
ret = setenv("NCCL_SHARP_GROUP_SIZE_THRESH", "8", 1);
if (ret != 0)
std::cout << "setenv(\"NCCL_SHARP_GROUP_SIZE_THRESH\", \"8\", 1) returned " << ret
<< std::endl;
#endif
#endif
size_t pool_size =
16; // note that CUDA_DEVICE_MAX_CONNECTIONS (default: 8) should be set to a value larger
// than pool_size to avoid false dependency among different streams
Expand Down
4 changes: 1 addition & 3 deletions cpp/tests/utilities/base_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,13 @@ inline auto make_pool(bool use_max = false)
// effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt`
// `_CUGRAPH_TEST_PERCENT` default value will need to be audited.
auto const [free, total] = rmm::available_device_memory();
double init_alloc_ratio{0.92};
auto const init_alloc =
use_max ? rmm::align_down(std::min(free, static_cast<size_t>(total * init_alloc_ratio)), rmm::CUDA_ALLOCATION_ALIGNMENT)
use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT)
: rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT);
std::optional<size_t> max_alloc{};
if (use_max) {
max_alloc = init_alloc;
}
std::cout << "init_alloc ratio=" << static_cast<double>(init_alloc) / static_cast<double>(total) << std::endl;
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), init_alloc, max_alloc);
}

Expand Down

0 comments on commit b9a0ed6

Please sign in to comment.