delete machine specific/benchmark specific tunings

rapidsai · Nov 14, 2024 · b9a0ed6 · b9a0ed6
1 parent dd7357f
commit b9a0ed6
Show file tree

Hide file tree

Showing 4 changed files with 2 additions and 57 deletions.
diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp
@@ -248,8 +248,7 @@ namespace detail {
 // use (key, value) pairs to store source/destination properties if (unique edge
 // sources/destinations) over (V / major_comm_size|minor_comm_size) is smaller than the threshold
 // value
-double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold =
-  0.0;  // FIXME: just for benchmarking
+double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold = 0.1;
 
 // FIXME: threshold values require tuning
 // use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller

diff --git a/cpp/include/cugraph/partition_manager.hpp b/cpp/include/cugraph/partition_manager.hpp
@@ -163,35 +163,10 @@ class partition_manager {
     int row_idx = rank / gpu_row_comm_size;
     int col_idx = rank % gpu_row_comm_size;
 
-#if 1  // FIXME: a trick to use InfiniBand SHARP in a sub-communicator (currently, a GPU can
-       // participate in only one SHARP accelerated communicator)
-    comm.barrier();  // to enforce initialization in comm
-    std::cerr << "start intializing node_comm" << std::endl;
-    std::cerr << "start intializing major_comm" << std::endl;
-    handle.set_subcomm("gpu_row_comm",
-                       std::make_shared<raft::comms::comms_t>(comm.comm_split(row_idx, col_idx)));
-    auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
-    major_comm.barrier();  /// to enforce initialization in major_comm
-    std::cerr << "major_comm initialized" << std::endl;
-#if 1 // for EOS
-    auto ret = setenv("NCCL_COLLNET_ENABLE", "1", 1);
-    if (ret != 0)
-      std::cerr << "setenv(\"NCCL_COLLNET_ENABLE\", \"1\", 1) returned " << ret << std::endl;
-    ret = setenv("NCCL_SHARP_DISABLE", "0", 1);
-    if (ret != 0)
-      std::cerr << "setenv(\"NCCL_SHARP_DISABLE\", \"0\", 1) returned " << ret << std::endl;
-#endif
-    handle.set_subcomm("gpu_col_comm",
-                       std::make_shared<raft::comms::comms_t>(comm.comm_split(col_idx, row_idx)));
-    auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
-    minor_comm.barrier();  /// to enforce initialization in minor_comm
-    std::cerr << "minor_comm initialized" << std::endl;
-#else
     handle.set_subcomm("gpu_row_comm",
                        std::make_shared<raft::comms::comms_t>(comm.comm_split(row_idx, col_idx)));
     handle.set_subcomm("gpu_col_comm",
                        std::make_shared<raft::comms::comms_t>(comm.comm_split(col_idx, row_idx)));
-#endif
   };
 };
 

diff --git a/cpp/tests/traversal/mg_graph500_bfs_test.cu b/cpp/tests/traversal/mg_graph500_bfs_test.cu
@@ -72,33 +72,6 @@ class Tests_GRAPH500_MGBFS
 
   static void SetUpTestCase()
   {
-#if 1
-    auto ret = setenv("NCCL_DEBUG", "WARN", 1);
-    if (ret != 0) std::cout << "setenv(\"NCCL_DEBUG\", \"TRACE\", 1) returned " << ret << std::endl;
-#endif
-#if 0  // workstation
-       // nothing
-#else
-#if 0  // for CW
-    ret = setenv("NCCL_NET", "IB", 1);
-    if (ret != 0) std::cout << "setenv(\"NCCL_NET\", \"IB\", 1) returned " << ret << std::endl;
-    ret = setenv("NCCL_SOCKET_IFNAME", "enp90s0f0np0", 1);
-    if (ret != 0)
-      std::cout << "setenv(\"NCCL_SOCKET_IFNAME\", \"enp90s0f0np0\", 1) returned " << ret
-                << std::endl;
-#else  // for EOS
-    ret = setenv("NCCL_COLLNET_ENABLE", "0", 1);
-    if (ret != 0)
-      std::cout << "setenv(\"NCCL_COLLNET_ENABLE\", \"0\", 1) returned " << ret << std::endl;
-    ret = setenv("NCCL_SHARP_DISABLE", "1", 1);
-    if (ret != 0)
-      std::cout << "setenv(\"NCCL_SHARP_DISABLE\", \"1\", 1) returned " << ret << std::endl;
-    ret = setenv("NCCL_SHARP_GROUP_SIZE_THRESH", "8", 1);
-    if (ret != 0)
-      std::cout << "setenv(\"NCCL_SHARP_GROUP_SIZE_THRESH\", \"8\", 1) returned " << ret
-                << std::endl;
-#endif
-#endif
     size_t pool_size =
       16;  // note that CUDA_DEVICE_MAX_CONNECTIONS (default: 8) should be set to a value larger
            // than pool_size to avoid false dependency among different streams

diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
@@ -77,15 +77,13 @@ inline auto make_pool(bool use_max = false)
   // effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt`
   // `_CUGRAPH_TEST_PERCENT` default value will need to be audited.
   auto const [free, total] = rmm::available_device_memory();
-  double init_alloc_ratio{0.92};
   auto const init_alloc =
-    use_max ? rmm::align_down(std::min(free, static_cast<size_t>(total * init_alloc_ratio)), rmm::CUDA_ALLOCATION_ALIGNMENT)
+    use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT)
             : rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT);
   std::optional<size_t> max_alloc{};
   if (use_max) {
     max_alloc = init_alloc;
   }
-  std::cout << "init_alloc ratio=" << static_cast<double>(init_alloc) / static_cast<double>(total) << std::endl;
   return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), init_alloc, max_alloc);
 }