Merge remote-tracking branch 'upstream/branch-24.12' into feat/logger

rapidsai · Nov 14, 2024 · ebdd741 · ebdd741
2 parents 5d11768 + 52d61c5
commit ebdd741
Show file tree

Hide file tree

Showing 26 changed files with 222 additions and 128 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -41,8 +41,8 @@ rapids_cmake_build_type(Release)
 option(RMM_NVTX "Build RMM with NVTX support" OFF)
 option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
-# This is mostly so that dependent libraries, such as fmt, are configured in shared mode for
-# downstream dependents of RMM that get their common dependencies transitively.
+# This is mostly so that dependent libraries are configured in shared mode for downstream dependents
+# of RMM that get their common dependencies transitively.
 option(BUILD_SHARED_LIBS "Build RMM shared libraries" ON)
 set(RMM_LOGGING_LEVEL
     "INFO"
@@ -121,7 +121,6 @@ endif()
 
 target_link_libraries(rmm INTERFACE rmm_logger)
 target_link_libraries(rmm INTERFACE CCCL::CCCL)
-target_link_libraries(rmm INTERFACE fmt::fmt-header-only)
 target_link_libraries(rmm INTERFACE dl)
 target_link_libraries(rmm INTERFACE nvtx3::nvtx3-cpp)
 target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -42,9 +42,8 @@ function(ConfigureBench BENCH_NAME)
     target_compile_definitions(${BENCH_NAME} PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM)
   endif()
 
-  target_compile_options(
-    ${BENCH_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
-                         -Wno-error=deprecated-declarations -Wno-unknown-pragmas>)
+  target_compile_options(${BENCH_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
+                                              -Wno-unknown-pragmas>)
 
   # When not building in compatibility mode, each benchmark has to recompile the logger.
   if(TARGET rmm_bench_logger)

diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
@@ -16,6 +16,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
+#include <rmm/logger.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
@@ -176,11 +177,7 @@ struct replay_benchmark {
   void SetUp(const ::benchmark::State& state)
   {
     if (state.thread_index() == 0) {
-#ifdef RMM_BACKWARDS_COMPATIBILITY
-      rmm::detail::logger().log(spdlog::level::info, "------ Start of Benchmark -----");
-#else
-      rmm::default_logger().log(rmm::level_enum::info, "------ Start of Benchmark -----");
-#endif
+      RMM_LOG_INFO("------ Start of Benchmark -----");
       mr_ = factory_(simulated_size_);
     }
   }
@@ -189,11 +186,7 @@ struct replay_benchmark {
   void TearDown(const ::benchmark::State& state)
   {
     if (state.thread_index() == 0) {
-#ifdef RMM_BACKWARDS_COMPATIBILITY
-      rmm::detail::logger().log(spdlog::level::info, "------ End of Benchmark -----");
-#else
-      rmm::default_logger().log(rmm::level_enum::info, "------ End of Benchmark -----");
-#endif
+      RMM_LOG_INFO("------ End of Benchmark -----");
       // clean up any leaked allocations
       std::size_t total_leaked{0};
       std::size_t num_leaked{0};

diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -151,7 +151,7 @@ inline std::vector<event> parse_csv(std::string const& filename)
 
   auto parse_pointer = [](std::string const& str, uintptr_t& ptr) {
     auto const base{16};
-    ptr = std::stoll(str, nullptr, base);
+    ptr = (str == "(nil)") ? 0 : std::stoll(str, nullptr, base);
   };
 
   std::vector<uintptr_t> pointers = csv.GetColumn<uintptr_t>("Pointer", parse_pointer);

diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh
@@ -22,4 +22,7 @@ sccache --show-adv-stats
 
 python -m pip install wheel
 python -m wheel tags --platform any dist/* --remove
+
+../../ci/validate_wheel.sh dist
+
 RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp dist
diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh
@@ -32,6 +32,8 @@ sccache --show-adv-stats
 mkdir -p final_dist
 python -m auditwheel repair -w final_dist dist/*
 
+../../ci/validate_wheel.sh final_dist
+
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
 
 # switch back to the root of the repo and check symbol visibility

diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+wheel_dir_relative_path=$1
+
+rapids-logger "validate packages with 'pydistcheck'"
+
+pydistcheck \
+    --inspect \
+    "$(echo ${wheel_dir_relative_path}/*.whl)"
+
+rapids-logger "validate packages with 'twine'"
+
+twine check \
+    --strict \
+    "$(echo ${wheel_dir_relative_path}/*.whl)"
diff --git a/cmake/thirdparty/get_fmt.cmake b/cmake/thirdparty/get_fmt.cmake
diff --git a/cmake/thirdparty/get_spdlog.cmake b/cmake/thirdparty/get_spdlog.cmake
@@ -40,8 +40,6 @@ function(find_and_configure_spdlog)
   #endif()
 # cmake-format: on
   rapids_cpm_spdlog(
-    # TODO: We can drop this when we stop using header-only spdlog.
-    FMT_OPTION "EXTERNAL_FMT_HO"
     INSTALL_EXPORT_SET rmm-exports
     BUILD_EXPORT_SET rmm-exports # cmake-format: off
     #OPTIONS "SPDLOG_BUILD_SHARED OFF" "BUILD_SHARED_LIBS OFF"

diff --git a/include/rmm/detail/format.hpp b/include/rmm/detail/format.hpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <array>
+#include <cstdio>
+#include <ios>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace RMM_NAMESPACE {
+namespace detail {
+
+/**
+ * @brief Format a message string with printf-style formatting
+ *
+ * This function performs printf-style formatting to avoid the need for fmt
+ * or spdlog's own templated APIs (which would require exposing spdlog
+ * symbols publicly) and returns the formatted message as a `std::string`.
+ *
+ * @param format The format string
+ * @param args The format arguments
+ * @return The formatted message
+ * @throw rmm::logic_error if an error occurs during formatting
+ */
+template <typename... Args>
+std::string formatted_log(std::string const& format, Args&&... args)
+{
+  auto convert_to_c_string = [](auto&& arg) -> decltype(auto) {
+    using ArgType = std::decay_t<decltype(arg)>;
+    if constexpr (std::is_same_v<ArgType, std::string>) {
+      return arg.c_str();
+    } else {
+      return std::forward<decltype(arg)>(arg);
+    }
+  };
+
+  // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg)
+  auto retsize =
+    std::snprintf(nullptr, 0, format.c_str(), convert_to_c_string(std::forward<Args>(args))...);
+  RMM_EXPECTS(retsize >= 0, "Error during formatting.");
+  if (retsize == 0) { return {}; }
+  auto size = static_cast<std::size_t>(retsize) + 1;  // for null terminator
+  // NOLINTNEXTLINE(modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays)
+  std::unique_ptr<char[]> buf(new char[size]);
+  std::snprintf(buf.get(), size, format.c_str(), convert_to_c_string(std::forward<Args>(args))...);
+  // NOLINTEND(cppcoreguidelines-pro-type-vararg)
+  return {buf.get(), buf.get() + size - 1};  // drop '\0'
+}
+
+// specialization for no arguments
+template <>
+inline std::string formatted_log(std::string const& format)
+{
+  return format;
+}
+
+// Stringify a size in bytes to a human-readable value
+inline std::string format_bytes(std::size_t value)
+{
+  static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
+
+  int index = 0;
+  auto size = static_cast<double>(value);
+  while (size > 1024) {
+    size /= 1024;
+    index++;
+  }
+
+  return std::to_string(size) + ' ' + units.at(index);
+}
+
+// Stringify a stream ID
+inline std::string format_stream(rmm::cuda_stream_view stream)
+{
+  std::stringstream sstr{};
+  sstr << std::hex << stream.value();
+  return sstr.str();
+}
+
+}  // namespace detail
+}  // namespace RMM_NAMESPACE
diff --git a/include/rmm/detail/logging_assert.hpp b/include/rmm/detail/logging_assert.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@
     if (!success) {                                                                               \
       RMM_LOG_CRITICAL(                                                                           \
         "[" __FILE__ ":" RMM_STRINGIFY(__LINE__) "] Assertion " RMM_STRINGIFY(_expr) " failed."); \
-      rmm::logger().flush();                                                                      \
+      rmm::detail::logger().flush();                                                              \
       /* NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) */                   \
       assert(success);                                                                            \
     }                                                                                             \

diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -18,6 +18,7 @@
 #include <rmm/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/detail/arena.hpp>
@@ -335,11 +336,8 @@ class arena_memory_resource final : public device_memory_resource {
   void dump_memory_log(size_t bytes)
   {
     logger_->info("**************************************************");
-#ifdef RMM_BACKWARDS_COMPATIBILITY
-    logger_->info("Ran out of memory trying to allocate {}.", rmm::detail::bytes{bytes});
-#else
-    logger_->info("Ran out of memory trying to allocate %s.", rmm::detail::bytes{bytes});
-#endif
+    logger_->info(rmm::detail::formatted_log("Ran out of memory trying to allocate %s.",
+                                             rmm::detail::format_bytes(bytes)));
     logger_->info("**************************************************");
     logger_->info("Global arena:");
     global_arena_.dump_memory_log(logger_);

diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
@@ -21,13 +21,13 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
-#include <fmt/core.h>
 #ifdef RMM_BACKWARDS_COMPATIBILITY
 #include <spdlog/common.h>
 #endif
@@ -657,33 +657,38 @@ class global_arena final {
   {
     std::lock_guard lock(mtx_);
 
-    logger->info("  Arena size: %s", rmm::detail::bytes{upstream_block_.size()});
-    logger->info("  # superblocks: %d", superblocks_.size());
+    logger->info(rmm::detail::formatted_log("  Arena size: %s",
+                                            rmm::detail::format_bytes(upstream_block_.size())));
+    logger->info(rmm::detail::formatted_log("  # superblocks: %zu", superblocks_.size()));
     if (!superblocks_.empty()) {
-      logger->debug("  Total size of superblocks: %s",
-                    rmm::detail::bytes{total_memory_size(superblocks_)});
+      logger->debug(
+        rmm::detail::formatted_log("  Total size of superblocks: %s",
+                                   rmm::detail::format_bytes(total_memory_size(superblocks_))));
       auto const total_free    = total_free_size(superblocks_);
       auto const max_free      = max_free_size(superblocks_);
       auto const fragmentation = (1 - max_free / static_cast<double>(total_free)) * 100;
-      logger->info("  Total free memory: %s", rmm::detail::bytes{total_free});
-      logger->info("  Largest block of free memory: %s", rmm::detail::bytes{max_free});
-      logger->info("  Fragmentation: %0.2f%%", fragmentation);
+      logger->info(rmm::detail::formatted_log("  Total free memory: %s",
+                                              rmm::detail::format_bytes(total_free)));
+      logger->info(rmm::detail::formatted_log("  Largest block of free memory: %s",
+                                              rmm::detail::format_bytes(max_free)));
+      logger->info(rmm::detail::formatted_log("  Fragmentation: %0.2f", fragmentation));
 
-      auto index = 0;
+      auto index = decltype(superblocks_.size()){0};
       char* prev_end{};
       for (auto const& sblk : superblocks_) {
         if (prev_end == nullptr) { prev_end = sblk.pointer(); }
-        logger->debug(
-          "    Superblock %d: start=%p, end=%p, size=%s, empty=%d, # free blocks=%d, max free=%s, "
+        logger->debug(rmm::detail::formatted_log(
+          "    Superblock %zu: start=%p, end=%p, size=%s, empty=%s, # free blocks=%zu, max "
+          "free=%s, "
           "gap=%s",
           index,
-          fmt::ptr(sblk.pointer()),
-          fmt::ptr(sblk.end()),
-          rmm::detail::bytes{sblk.size()},
-          sblk.empty(),
+          sblk.pointer(),
+          sblk.end(),
+          rmm::detail::format_bytes(sblk.size()),
+          sblk.empty() ? "T" : "F",
           sblk.free_blocks(),
-          rmm::detail::bytes{sblk.max_free_size()},
-          rmm::detail::bytes{static_cast<size_t>(sblk.pointer() - prev_end)});
+          rmm::detail::format_bytes(sblk.max_free_size()),
+          rmm::detail::format_bytes(static_cast<size_t>(sblk.pointer() - prev_end))));
         prev_end = sblk.end();
         index++;
       }

diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp
@@ -20,8 +20,6 @@
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/detail/free_list.hpp>
 
-#include <fmt/core.h>
-
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -131,10 +129,7 @@ struct block : public block_base {
   /**
    * @brief Print this block. For debugging.
    */
-  inline void print() const
-  {
-    std::cout << fmt::format("{} {} B", fmt::ptr(pointer()), size()) << std::endl;
-  }
+  inline void print() const { std::cout << pointer() << " " << size() << " B" << std::endl; }
 #endif
 
  private:
@@ -146,7 +141,7 @@ struct block : public block_base {
 /// Print block on an ostream
 inline std::ostream& operator<<(std::ostream& out, const block& blk)
 {
-  out << fmt::format("{} {} B\n", fmt::ptr(blk.pointer()), blk.size());
+  out << blk.pointer() << " " << blk.size() << " B" << std::endl;
   return out;
 }
 #endif