From b9a0ed646b585d141c167bac5dd65cbe7b331ea3 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 14 Nov 2024 11:33:06 -0800 Subject: [PATCH] delete machine specific/benchmark specific tunings --- cpp/include/cugraph/graph_view.hpp | 3 +-- cpp/include/cugraph/partition_manager.hpp | 25 ------------------- cpp/tests/traversal/mg_graph500_bfs_test.cu | 27 --------------------- cpp/tests/utilities/base_fixture.hpp | 4 +-- 4 files changed, 2 insertions(+), 57 deletions(-) diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index 6d3da3740b..d109fbdac9 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -248,8 +248,7 @@ namespace detail { // use (key, value) pairs to store source/destination properties if (unique edge // sources/destinations) over (V / major_comm_size|minor_comm_size) is smaller than the threshold // value -double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold = - 0.0; // FIXME: just for benchmarking +double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold = 0.1; // FIXME: threshold values require tuning // use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller diff --git a/cpp/include/cugraph/partition_manager.hpp b/cpp/include/cugraph/partition_manager.hpp index 377fd0a4de..682eb5b151 100644 --- a/cpp/include/cugraph/partition_manager.hpp +++ b/cpp/include/cugraph/partition_manager.hpp @@ -163,35 +163,10 @@ class partition_manager { int row_idx = rank / gpu_row_comm_size; int col_idx = rank % gpu_row_comm_size; -#if 1 // FIXME: a trick to use InfiniBand SHARP in a sub-communicator (currently, a GPU can - // participate in only one SHARP accelerated communicator) - comm.barrier(); // to enforce initialization in comm - std::cerr << "start intializing node_comm" << std::endl; - std::cerr << "start intializing major_comm" << std::endl; - handle.set_subcomm("gpu_row_comm", - std::make_shared(comm.comm_split(row_idx, col_idx))); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - major_comm.barrier(); /// to enforce initialization in major_comm - std::cerr << "major_comm initialized" << std::endl; -#if 1 // for EOS - auto ret = setenv("NCCL_COLLNET_ENABLE", "1", 1); - if (ret != 0) - std::cerr << "setenv(\"NCCL_COLLNET_ENABLE\", \"1\", 1) returned " << ret << std::endl; - ret = setenv("NCCL_SHARP_DISABLE", "0", 1); - if (ret != 0) - std::cerr << "setenv(\"NCCL_SHARP_DISABLE\", \"0\", 1) returned " << ret << std::endl; -#endif - handle.set_subcomm("gpu_col_comm", - std::make_shared(comm.comm_split(col_idx, row_idx))); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - minor_comm.barrier(); /// to enforce initialization in minor_comm - std::cerr << "minor_comm initialized" << std::endl; -#else handle.set_subcomm("gpu_row_comm", std::make_shared(comm.comm_split(row_idx, col_idx))); handle.set_subcomm("gpu_col_comm", std::make_shared(comm.comm_split(col_idx, row_idx))); -#endif }; }; diff --git a/cpp/tests/traversal/mg_graph500_bfs_test.cu b/cpp/tests/traversal/mg_graph500_bfs_test.cu index ca51ae8aa1..88708cfd47 100644 --- a/cpp/tests/traversal/mg_graph500_bfs_test.cu +++ b/cpp/tests/traversal/mg_graph500_bfs_test.cu @@ -72,33 +72,6 @@ class Tests_GRAPH500_MGBFS static void SetUpTestCase() { -#if 1 - auto ret = setenv("NCCL_DEBUG", "WARN", 1); - if (ret != 0) std::cout << "setenv(\"NCCL_DEBUG\", \"TRACE\", 1) returned " << ret << std::endl; -#endif -#if 0 // workstation - // nothing -#else -#if 0 // for CW - ret = setenv("NCCL_NET", "IB", 1); - if (ret != 0) std::cout << "setenv(\"NCCL_NET\", \"IB\", 1) returned " << ret << std::endl; - ret = setenv("NCCL_SOCKET_IFNAME", "enp90s0f0np0", 1); - if (ret != 0) - std::cout << "setenv(\"NCCL_SOCKET_IFNAME\", \"enp90s0f0np0\", 1) returned " << ret - << std::endl; -#else // for EOS - ret = setenv("NCCL_COLLNET_ENABLE", "0", 1); - if (ret != 0) - std::cout << "setenv(\"NCCL_COLLNET_ENABLE\", \"0\", 1) returned " << ret << std::endl; - ret = setenv("NCCL_SHARP_DISABLE", "1", 1); - if (ret != 0) - std::cout << "setenv(\"NCCL_SHARP_DISABLE\", \"1\", 1) returned " << ret << std::endl; - ret = setenv("NCCL_SHARP_GROUP_SIZE_THRESH", "8", 1); - if (ret != 0) - std::cout << "setenv(\"NCCL_SHARP_GROUP_SIZE_THRESH\", \"8\", 1) returned " << ret - << std::endl; -#endif -#endif size_t pool_size = 16; // note that CUDA_DEVICE_MAX_CONNECTIONS (default: 8) should be set to a value larger // than pool_size to avoid false dependency among different streams diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index b7065c1c36..46d8f7041e 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -77,15 +77,13 @@ inline auto make_pool(bool use_max = false) // effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt` // `_CUGRAPH_TEST_PERCENT` default value will need to be audited. auto const [free, total] = rmm::available_device_memory(); - double init_alloc_ratio{0.92}; auto const init_alloc = - use_max ? rmm::align_down(std::min(free, static_cast(total * init_alloc_ratio)), rmm::CUDA_ALLOCATION_ALIGNMENT) + use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT) : rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT); std::optional max_alloc{}; if (use_max) { max_alloc = init_alloc; } - std::cout << "init_alloc ratio=" << static_cast(init_alloc) / static_cast(total) << std::endl; return rmm::mr::make_owning_wrapper(make_cuda(), init_alloc, max_alloc); }