From 56f3a464053a66f801a39c65cde47dbab46ce513 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 13 Nov 2024 12:40:18 -0600
Subject: [PATCH 1/3] enforce wheel size limits, README formatting in CI
 (#1726)

Contributes to https://github.com/rapidsai/build-planning/issues/110

Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI.

* checks on wheel size (compressed),
  - *to be sure they're under PyPI limits*
  - *and to prompt discussion on PRs that significantly increase wheel sizes*
* checks on README formatting
  - *to ensure they'll render properly as the PyPI project homepages*
  - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/*

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/rmm/pull/1726
---
 ci/build_wheel_cpp.sh        |  3 +++
 ci/build_wheel_python.sh     |  2 ++
 ci/validate_wheel.sh         | 18 ++++++++++++++++++
 python/librmm/pyproject.toml |  8 ++++++++
 python/rmm/docs/conf.py      |  5 ++++-
 python/rmm/pyproject.toml    |  8 ++++++++
 6 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100755 ci/validate_wheel.sh
diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh
index 12e099bdb..1ec979372 100755
--- a/ci/build_wheel_cpp.sh
+++ b/ci/build_wheel_cpp.sh
@@ -22,4 +22,7 @@ sccache --show-adv-stats
 
 python -m pip install wheel
 python -m wheel tags --platform any dist/* --remove
+
+../../ci/validate_wheel.sh dist
+
 RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp dist
diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh
index b497b76d3..4e4d3bf61 100755
--- a/ci/build_wheel_python.sh
+++ b/ci/build_wheel_python.sh
@@ -32,6 +32,8 @@ sccache --show-adv-stats
 mkdir -p final_dist
 python -m auditwheel repair -w final_dist dist/*
 
+../../ci/validate_wheel.sh final_dist
+
 RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
 
 # switch back to the root of the repo and check symbol visibility
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
new file mode 100755
index 000000000..60a80fce6
--- /dev/null
+++ b/ci/validate_wheel.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+wheel_dir_relative_path=$1
+
+rapids-logger "validate packages with 'pydistcheck'"
+
+pydistcheck \
+    --inspect \
+    "$(echo ${wheel_dir_relative_path}/*.whl)"
+
+rapids-logger "validate packages with 'twine'"
+
+twine check \
+    --strict \
+    "$(echo ${wheel_dir_relative_path}/*.whl)"
diff --git a/python/librmm/pyproject.toml b/python/librmm/pyproject.toml
index 0f0b4e397..bae2ef36b 100644
--- a/python/librmm/pyproject.toml
+++ b/python/librmm/pyproject.toml
@@ -67,3 +67,11 @@ wheel.py-api = "py3"
 provider = "scikit_build_core.metadata.regex"
 input = "librmm/VERSION"
 regex = "(?P<value>.*)"
+
+[tool.pydistcheck]
+select = [
+    "distro-too-large-compressed",
+]
+
+# PyPI limit is 100 MiB, fail CI before we get too close to that
+max_allowed_size_compressed = '75M'
diff --git a/python/rmm/docs/conf.py b/python/rmm/docs/conf.py
index 0b2c21d5a..2aad3a82c 100644
--- a/python/rmm/docs/conf.py
+++ b/python/rmm/docs/conf.py
@@ -197,7 +197,10 @@
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3", None),
     "numba": ("https://numba.readthedocs.io/en/stable", None),
-    "cuda-python": ("https://nvidia.github.io/cuda-python/", None),
+    "cuda-python": (
+        "https://nvidia.github.io/cuda-python/cuda-bindings/",
+        None,
+    ),
 }
 
 # Config numpydoc
diff --git a/python/rmm/pyproject.toml b/python/rmm/pyproject.toml
index 22ed94660..aaaa15482 100644
--- a/python/rmm/pyproject.toml
+++ b/python/rmm/pyproject.toml
@@ -134,6 +134,14 @@ requires = [
     "ninja",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
+[tool.pydistcheck]
+select = [
+    "distro-too-large-compressed",
+]
+
+# PyPI limit is 100 MiB, fail CI before we get too close to that
+max_allowed_size_compressed = '75M'
+
 [tool.pytest.ini_options]
 # treat warnings as errors
 filterwarnings = [

From 220003e57532d1276fbecdba9dd82ed04efa1db4 Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Thu, 14 Nov 2024 12:37:07 +1100
Subject: [PATCH 2/3] Treat deprecation warnings as errors and fix deprecation
 warnings in replay benchmark (#1728)

Fixes #1727
Contributes to https://github.com/rapidsai/build-planning/issues/26

 - Removes `-Wno-error=deprecated-declarations`
 - Replaces deprecated usage of `rmm::logger()` in reply benchmark with supported `RMM_LOG_INFO` macros.

Note the latter duplicates a change in #1724 which allows the two PRs to be merged independently.

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Rong Ou (https://github.com/rongou)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/rmm/pull/1728
---
 benchmarks/CMakeLists.txt    | 7 +++----
 benchmarks/replay/replay.cpp | 6 +++---
 tests/CMakeLists.txt         | 3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index 9dfb2c538..0487a2dfa 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -42,9 +42,8 @@ function(ConfigureBench BENCH_NAME)
     target_compile_definitions(${BENCH_NAME} PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM)
   endif()
 
-  target_compile_options(
-    ${BENCH_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
-                         -Wno-error=deprecated-declarations -Wno-unknown-pragmas>)
+  target_compile_options(${BENCH_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
+                                              -Wno-unknown-pragmas>)
   if(DISABLE_DEPRECATION_WARNING)
     target_compile_options(
       ${BENCH_NAME} PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-deprecated-declarations>)
diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
index 5afed036a..d80841321 100644
--- a/benchmarks/replay/replay.cpp
+++ b/benchmarks/replay/replay.cpp
@@ -172,7 +172,7 @@ struct replay_benchmark {
   void SetUp(const ::benchmark::State& state)
   {
     if (state.thread_index() == 0) {
-      rmm::logger().log(spdlog::level::info, "------ Start of Benchmark -----");
+      RMM_LOG_INFO("------ Start of Benchmark -----");
       mr_ = factory_(simulated_size_);
     }
   }
@@ -181,7 +181,7 @@ struct replay_benchmark {
   void TearDown(const ::benchmark::State& state)
   {
     if (state.thread_index() == 0) {
-      rmm::logger().log(spdlog::level::info, "------ End of Benchmark -----");
+      RMM_LOG_INFO("------ End of Benchmark -----");
       // clean up any leaked allocations
       std::size_t total_leaked{0};
       std::size_t num_leaked{0};
@@ -402,7 +402,7 @@ int main(int argc, char** argv)
     auto const num_threads = per_thread_events.size();
 
     // Uncomment to enable / change default log level
-    // rmm::logger().set_level(spdlog::level::trace);
+    // rmm::detail::logger().set_level(spdlog::level::trace);
 
     if (args.count("resource") > 0) {
       std::string mr_name = args["resource"].as<std::string>();
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 0258c59c5..a482c8cc1 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -41,8 +41,7 @@ function(ConfigureTestInternal TEST_NAME)
                CUDA_STANDARD_REQUIRED ON)
   target_compile_definitions(${TEST_NAME}
                              PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
-  target_compile_options(${TEST_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
-                                             -Wno-error=deprecated-declarations>)
+  target_compile_options(${TEST_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror>)
 
   if(DISABLE_DEPRECATION_WARNING)
     target_compile_options(

From 52d61c528fa46a1f579ce294757ed7fe6d0b2970 Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Thu, 14 Nov 2024 13:41:18 +1100
Subject: [PATCH 3/3] Remove all explicit usage of fmtlib (#1724)

Fixes #1717
Also fixes #1710 in 5330063

I have replaced fmt-style format string placeholders (`"... {} ..."`) with printf-style placeholders by adding a function `rmm::detail::formatted_log()`, which I modified from @vyasr 's #1722.

~The only remaining mention of fmt is in CMakeLists.txt. Do we still need to explicitly fetch fmt?~
Removed.

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/rmm/pull/1724
---
 CMakeLists.txt                                |   6 +-
 benchmarks/replay/replay.cpp                  |   1 +
 benchmarks/utilities/log_parser.hpp           |   4 +-
 cmake/thirdparty/get_fmt.cmake                |  22 ----
 include/rmm/detail/format.hpp                 | 101 ++++++++++++++++++
 include/rmm/detail/logging_assert.hpp         |   4 +-
 include/rmm/logger.hpp                        |  55 +++-------
 .../rmm/mr/device/arena_memory_resource.hpp   |   4 +-
 include/rmm/mr/device/detail/arena.hpp        |  41 +++----
 .../mr/device/detail/coalescing_free_list.hpp |   9 +-
 .../detail/stream_ordered_memory_resource.hpp |  29 +++--
 .../mr/device/logging_resource_adaptor.hpp    |  12 ++-
 .../rmm/mr/device/pool_memory_resource.hpp    |   9 +-
 .../mr/device/tracking_resource_adaptor.hpp   |  16 ++-
 tests/logger_tests.cpp                        |   1 -
 tests/mr/device/arena_mr_tests.cpp            |   4 +-
 tests/mr/device/callback_mr_tests.cpp         |   7 +-
 17 files changed, 189 insertions(+), 136 deletions(-)
 delete mode 100644 cmake/thirdparty/get_fmt.cmake
 create mode 100644 include/rmm/detail/format.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26fcf1fd0..44d7fbb79 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,8 +41,8 @@ rapids_cmake_build_type(Release)
 option(RMM_NVTX "Build RMM with NVTX support" OFF)
 option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
-# This is mostly so that dependent libraries, such as fmt, are configured in shared mode for
-# downstream dependents of RMM that get their common dependencies transitively.
+# This is mostly so that dependent libraries are configured in shared mode for downstream dependents
+# of RMM that get their common dependencies transitively.
 option(BUILD_SHARED_LIBS "Build RMM shared libraries" ON)
 set(RMM_LOGGING_LEVEL
     "INFO"
@@ -73,7 +73,6 @@ rapids_find_package(
 # add third party dependencies using CPM
 rapids_cpm_init()
 
-include(cmake/thirdparty/get_fmt.cmake)
 include(cmake/thirdparty/get_spdlog.cmake)
 include(cmake/thirdparty/get_cccl.cmake)
 include(cmake/thirdparty/get_nvtx.cmake)
@@ -96,7 +95,6 @@ else()
 endif()
 
 target_link_libraries(rmm INTERFACE CCCL::CCCL)
-target_link_libraries(rmm INTERFACE fmt::fmt-header-only)
 target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
 target_link_libraries(rmm INTERFACE dl)
 target_link_libraries(rmm INTERFACE nvtx3::nvtx3-cpp)
diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp
index d80841321..7f45b7691 100644
--- a/benchmarks/replay/replay.cpp
+++ b/benchmarks/replay/replay.cpp
@@ -16,6 +16,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
+#include <rmm/logger.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp
index 2283ace93..4dfa5bae4 100644
--- a/benchmarks/utilities/log_parser.hpp
+++ b/benchmarks/utilities/log_parser.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -151,7 +151,7 @@ inline std::vector<event> parse_csv(std::string const& filename)
 
   auto parse_pointer = [](std::string const& str, uintptr_t& ptr) {
     auto const base{16};
-    ptr = std::stoll(str, nullptr, base);
+    ptr = (str == "(nil)") ? 0 : std::stoll(str, nullptr, base);
   };
 
   std::vector<uintptr_t> pointers = csv.GetColumn<uintptr_t>("Pointer", parse_pointer);
diff --git a/cmake/thirdparty/get_fmt.cmake b/cmake/thirdparty/get_fmt.cmake
deleted file mode 100644
index 5787fb73f..000000000
--- a/cmake/thirdparty/get_fmt.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-# =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-# Use CPM to find or clone fmt
-function(find_and_configure_fmt)
-
-  include(${rapids-cmake-dir}/cpm/fmt.cmake)
-  rapids_cpm_fmt(INSTALL_EXPORT_SET rmm-exports BUILD_EXPORT_SET rmm-exports)
-endfunction()
-
-find_and_configure_fmt()
diff --git a/include/rmm/detail/format.hpp b/include/rmm/detail/format.hpp
new file mode 100644
index 000000000..21acac032
--- /dev/null
+++ b/include/rmm/detail/format.hpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <array>
+#include <cstdio>
+#include <ios>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace RMM_NAMESPACE {
+namespace detail {
+
+/**
+ * @brief Format a message string with printf-style formatting
+ *
+ * This function performs printf-style formatting to avoid the need for fmt
+ * or spdlog's own templated APIs (which would require exposing spdlog
+ * symbols publicly) and returns the formatted message as a `std::string`.
+ *
+ * @param format The format string
+ * @param args The format arguments
+ * @return The formatted message
+ * @throw rmm::logic_error if an error occurs during formatting
+ */
+template <typename... Args>
+std::string formatted_log(std::string const& format, Args&&... args)
+{
+  auto convert_to_c_string = [](auto&& arg) -> decltype(auto) {
+    using ArgType = std::decay_t<decltype(arg)>;
+    if constexpr (std::is_same_v<ArgType, std::string>) {
+      return arg.c_str();
+    } else {
+      return std::forward<decltype(arg)>(arg);
+    }
+  };
+
+  // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg)
+  auto retsize =
+    std::snprintf(nullptr, 0, format.c_str(), convert_to_c_string(std::forward<Args>(args))...);
+  RMM_EXPECTS(retsize >= 0, "Error during formatting.");
+  if (retsize == 0) { return {}; }
+  auto size = static_cast<std::size_t>(retsize) + 1;  // for null terminator
+  // NOLINTNEXTLINE(modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays)
+  std::unique_ptr<char[]> buf(new char[size]);
+  std::snprintf(buf.get(), size, format.c_str(), convert_to_c_string(std::forward<Args>(args))...);
+  // NOLINTEND(cppcoreguidelines-pro-type-vararg)
+  return {buf.get(), buf.get() + size - 1};  // drop '\0'
+}
+
+// specialization for no arguments
+template <>
+inline std::string formatted_log(std::string const& format)
+{
+  return format;
+}
+
+// Stringify a size in bytes to a human-readable value
+inline std::string format_bytes(std::size_t value)
+{
+  static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
+
+  int index = 0;
+  auto size = static_cast<double>(value);
+  while (size > 1024) {
+    size /= 1024;
+    index++;
+  }
+
+  return std::to_string(size) + ' ' + units.at(index);
+}
+
+// Stringify a stream ID
+inline std::string format_stream(rmm::cuda_stream_view stream)
+{
+  std::stringstream sstr{};
+  sstr << std::hex << stream.value();
+  return sstr.str();
+}
+
+}  // namespace detail
+}  // namespace RMM_NAMESPACE
diff --git a/include/rmm/detail/logging_assert.hpp b/include/rmm/detail/logging_assert.hpp
index 7eb667211..4d702ee2b 100644
--- a/include/rmm/detail/logging_assert.hpp
+++ b/include/rmm/detail/logging_assert.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@
     if (!success) {                                                                               \
       RMM_LOG_CRITICAL(                                                                           \
         "[" __FILE__ ":" RMM_STRINGIFY(__LINE__) "] Assertion " RMM_STRINGIFY(_expr) " failed."); \
-      rmm::logger().flush();                                                                      \
+      rmm::detail::logger().flush();                                                              \
       /* NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) */                   \
       assert(success);                                                                            \
     }                                                                                             \
diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index eba3f122b..2cfd921b1 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -16,15 +16,13 @@
 
 #pragma once
 
+#include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 
-#include <fmt/format.h>
-#include <fmt/ostream.h>
 #include <spdlog/sinks/basic_file_sink.h>
 #include <spdlog/spdlog.h>
 
-#include <array>
-#include <iostream>
 #include <string>
 
 namespace RMM_NAMESPACE {
@@ -70,32 +68,6 @@ struct logger_wrapper {
   }
 };
 
-/**
- * @brief Represent a size in number of bytes.
- */
-struct bytes {
-  std::size_t value;  ///< The size in bytes
-
-  /**
-   * @brief Construct a new bytes object
-   *
-   * @param os The output stream
-   * @param value The size in bytes
-   */
-  friend std::ostream& operator<<(std::ostream& os, bytes const& value)
-  {
-    static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
-
-    int index = 0;
-    auto size = static_cast<double>(value.value);
-    while (size > 1024) {
-      size /= 1024;
-      index++;
-    }
-    return os << size << ' ' << units.at(index);
-  }
-};
-
 inline spdlog::logger& logger()
 {
   static detail::logger_wrapper wrapped{};
@@ -125,20 +97,21 @@ logger()
 // The default is INFO, but it should be used sparingly, so that by default a log file is only
 // output if there is important information, warnings, errors, and critical failures
 // Log messages that require computation should only be used at level TRACE and DEBUG
-#define RMM_LOG_TRACE(...)    SPDLOG_LOGGER_TRACE(&rmm::detail::logger(), __VA_ARGS__)
-#define RMM_LOG_DEBUG(...)    SPDLOG_LOGGER_DEBUG(&rmm::detail::logger(), __VA_ARGS__)
-#define RMM_LOG_INFO(...)     SPDLOG_LOGGER_INFO(&rmm::detail::logger(), __VA_ARGS__)
-#define RMM_LOG_WARN(...)     SPDLOG_LOGGER_WARN(&rmm::detail::logger(), __VA_ARGS__)
-#define RMM_LOG_ERROR(...)    SPDLOG_LOGGER_ERROR(&rmm::detail::logger(), __VA_ARGS__)
-#define RMM_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&rmm::detail::logger(), __VA_ARGS__)
+#define RMM_LOG_TRACE(...) \
+  SPDLOG_LOGGER_TRACE(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__))
+#define RMM_LOG_DEBUG(...) \
+  SPDLOG_LOGGER_DEBUG(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__))
+#define RMM_LOG_INFO(...) \
+  SPDLOG_LOGGER_INFO(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__))
+#define RMM_LOG_WARN(...) \
+  SPDLOG_LOGGER_WARN(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__))
+#define RMM_LOG_ERROR(...) \
+  SPDLOG_LOGGER_ERROR(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__))
+#define RMM_LOG_CRITICAL(...) \
+  SPDLOG_LOGGER_CRITICAL(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__))
 
 //! @endcond
 
 }  // namespace RMM_NAMESPACE
 
-// Doxygen doesn't like this because we're overloading something from fmt
-//! @cond Doxygen_Suppress
-template <>
-struct fmt::formatter<rmm::detail::bytes> : fmt::ostream_formatter {};
-
 //! @endcond
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 9b380ffb9..d3a4bb09d 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -18,6 +18,7 @@
 #include <rmm/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/detail/arena.hpp>
@@ -335,7 +336,8 @@ class arena_memory_resource final : public device_memory_resource {
   void dump_memory_log(size_t bytes)
   {
     logger_->info("**************************************************");
-    logger_->info("Ran out of memory trying to allocate {}.", rmm::detail::bytes{bytes});
+    logger_->info(rmm::detail::formatted_log("Ran out of memory trying to allocate %s.",
+                                             rmm::detail::format_bytes(bytes)));
     logger_->info("**************************************************");
     logger_->info("Global arena:");
     global_arena_.dump_memory_log(logger_);
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index da64ca85b..419c4fcf4 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -21,13 +21,13 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
-#include <fmt/core.h>
 #include <spdlog/common.h>
 
 #include <algorithm>
@@ -651,33 +651,38 @@ class global_arena final {
   {
     std::lock_guard lock(mtx_);
 
-    logger->info("  Arena size: {}", rmm::detail::bytes{upstream_block_.size()});
-    logger->info("  # superblocks: {}", superblocks_.size());
+    logger->info(rmm::detail::formatted_log("  Arena size: %s",
+                                            rmm::detail::format_bytes(upstream_block_.size())));
+    logger->info(rmm::detail::formatted_log("  # superblocks: %zu", superblocks_.size()));
     if (!superblocks_.empty()) {
-      logger->debug("  Total size of superblocks: {}",
-                    rmm::detail::bytes{total_memory_size(superblocks_)});
+      logger->debug(
+        rmm::detail::formatted_log("  Total size of superblocks: %s",
+                                   rmm::detail::format_bytes(total_memory_size(superblocks_))));
       auto const total_free    = total_free_size(superblocks_);
       auto const max_free      = max_free_size(superblocks_);
       auto const fragmentation = (1 - max_free / static_cast<double>(total_free)) * 100;
-      logger->info("  Total free memory: {}", rmm::detail::bytes{total_free});
-      logger->info("  Largest block of free memory: {}", rmm::detail::bytes{max_free});
-      logger->info("  Fragmentation: {:.2f}%", fragmentation);
+      logger->info(rmm::detail::formatted_log("  Total free memory: %s",
+                                              rmm::detail::format_bytes(total_free)));
+      logger->info(rmm::detail::formatted_log("  Largest block of free memory: %s",
+                                              rmm::detail::format_bytes(max_free)));
+      logger->info(rmm::detail::formatted_log("  Fragmentation: %0.2f", fragmentation));
 
-      auto index = 0;
+      auto index = decltype(superblocks_.size()){0};
       char* prev_end{};
       for (auto const& sblk : superblocks_) {
         if (prev_end == nullptr) { prev_end = sblk.pointer(); }
-        logger->debug(
-          "    Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}, "
-          "gap={}",
+        logger->debug(rmm::detail::formatted_log(
+          "    Superblock %zu: start=%p, end=%p, size=%s, empty=%s, # free blocks=%zu, max "
+          "free=%s, "
+          "gap=%s",
           index,
-          fmt::ptr(sblk.pointer()),
-          fmt::ptr(sblk.end()),
-          rmm::detail::bytes{sblk.size()},
-          sblk.empty(),
+          sblk.pointer(),
+          sblk.end(),
+          rmm::detail::format_bytes(sblk.size()),
+          sblk.empty() ? "T" : "F",
           sblk.free_blocks(),
-          rmm::detail::bytes{sblk.max_free_size()},
-          rmm::detail::bytes{static_cast<size_t>(sblk.pointer() - prev_end)});
+          rmm::detail::format_bytes(sblk.max_free_size()),
+          rmm::detail::format_bytes(static_cast<size_t>(sblk.pointer() - prev_end))));
         prev_end = sblk.end();
         index++;
       }
diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp
index 8d5cbf9ed..8b056e6d9 100644
--- a/include/rmm/mr/device/detail/coalescing_free_list.hpp
+++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp
@@ -20,8 +20,6 @@
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/detail/free_list.hpp>
 
-#include <fmt/core.h>
-
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -131,10 +129,7 @@ struct block : public block_base {
   /**
    * @brief Print this block. For debugging.
    */
-  inline void print() const
-  {
-    std::cout << fmt::format("{} {} B", fmt::ptr(pointer()), size()) << std::endl;
-  }
+  inline void print() const { std::cout << pointer() << " " << size() << " B" << std::endl; }
 #endif
 
  private:
@@ -146,7 +141,7 @@ struct block : public block_base {
 /// Print block on an ostream
 inline std::ostream& operator<<(std::ostream& out, const block& blk)
 {
-  out << fmt::format("{} {} B\n", fmt::ptr(blk.pointer()), blk.size());
+  out << blk.pointer() << " " << blk.size() << " B" << std::endl;
   return out;
 }
 #endif
diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
index 9cf674d6e..f177504f2 100644
--- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
+++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -19,13 +19,12 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <cuda_runtime_api.h>
 
-#include <fmt/core.h>
-
 #include <cstddef>
 #include <map>
 #include <mutex>
@@ -201,7 +200,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    */
   void* do_allocate(std::size_t size, cuda_stream_view stream) override
   {
-    RMM_LOG_TRACE("[A][stream {:p}][{}B]", fmt::ptr(stream.value()), size);
+    RMM_LOG_TRACE("[A][stream %s][%zuB]", rmm::detail::format_stream(stream), size);
 
     if (size <= 0) { return nullptr; }
 
@@ -215,10 +214,10 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
                 rmm::out_of_memory);
     auto const block = this->underlying().get_block(size, stream_event);
 
-    RMM_LOG_TRACE("[A][stream {:p}][{}B][{:p}]",
-                  fmt::ptr(stream_event.stream),
+    RMM_LOG_TRACE("[A][stream %s][%zuB][%p]",
+                  rmm::detail::format_stream(stream_event.stream),
                   size,
-                  fmt::ptr(block.pointer()));
+                  block.pointer());
 
     log_summary_trace();
 
@@ -234,7 +233,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
    */
   void do_deallocate(void* ptr, std::size_t size, cuda_stream_view stream) override
   {
-    RMM_LOG_TRACE("[D][stream {:p}][{}B][{:p}]", fmt::ptr(stream.value()), size, ptr);
+    RMM_LOG_TRACE("[D][stream %s][%zuB][%p]", rmm::detail::format_stream(stream), size, ptr);
 
     if (size <= 0 || ptr == nullptr) { return; }
 
@@ -384,10 +383,10 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
       if (merge_first) {
         merge_lists(stream_event, blocks, other_event, std::move(other_blocks));
 
-        RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]",
-                      fmt::ptr(stream_event.stream),
+        RMM_LOG_DEBUG("[A][Stream %s][%zuB][Merged stream %s]",
+                      rmm::detail::format_stream(stream_event.stream),
                       size,
-                      fmt::ptr(iter->first.stream));
+                      rmm::detail::format_stream(iter->first.stream));
 
         stream_free_blocks_.erase(iter);
 
@@ -414,11 +413,11 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
         block_type const block = find_block(iter);
 
         if (block.is_valid()) {
-          RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]"
-                                      : "[A][Stream {:p}][{}B][Taken from stream {:p}]",
-                        fmt::ptr(stream_event.stream),
+          RMM_LOG_DEBUG((merge_first) ? "[A][Stream %s][%zuB][Found after merging stream %s]"
+                                      : "[A][Stream %s][%zuB][Taken from stream %s]",
+                        rmm::detail::format_stream(stream_event.stream),
                         size,
-                        fmt::ptr(iter->first.stream));
+                        rmm::detail::format_stream(iter->first.stream));
           return block;
         }
       }
@@ -471,7 +470,7 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
                     max_block    = std::max(summary.first, max_block);
                     free_mem += summary.second;
                   });
-    RMM_LOG_TRACE("[Summary][Free lists: {}][Blocks: {}][Max Block: {}][Total Free: {}]",
+    RMM_LOG_TRACE("[Summary][Free lists: %zu][Blocks: %zu][Max Block: %zu][Total Free: %zu]",
                   stream_free_blocks_.size(),
                   num_blocks,
                   max_block,
diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 595ab2e4e..578543852 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -18,16 +18,17 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
-#include <fmt/core.h>
 #include <spdlog/common.h>
 #include <spdlog/sinks/basic_file_sink.h>
 #include <spdlog/sinks/ostream_sink.h>
 #include <spdlog/spdlog.h>
 
 #include <cstddef>
+#include <cstdio>
 #include <memory>
 #include <sstream>
 #include <string_view>
@@ -297,10 +298,12 @@ class logging_resource_adaptor final : public device_memory_resource {
   {
     try {
       auto const ptr = get_upstream_resource().allocate_async(bytes, stream);
-      logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
+      logger_->info(rmm::detail::formatted_log(
+        "allocate,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream)));
       return ptr;
     } catch (...) {
-      logger_->info("allocate failure,{},{},{}", nullptr, bytes, fmt::ptr(stream.value()));
+      logger_->info(rmm::detail::formatted_log(
+        "allocate failure,%p,%zu,%s", nullptr, bytes, rmm::detail::format_stream(stream)));
       throw;
     }
   }
@@ -321,7 +324,8 @@ class logging_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
+    logger_->info(
+      rmm::detail::formatted_log("free,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream)));
     get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index f63de21ff..037147de3 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -19,6 +19,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
+#include <rmm/detail/format.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/detail/thrust_namespace.h>
 #include <rmm/logger.hpp>
@@ -34,8 +35,6 @@
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/optional.h>
 
-#include <fmt/core.h>
-
 #include <algorithm>
 #include <cstddef>
 #include <iostream>
@@ -271,8 +270,8 @@ class pool_memory_resource final
       }
       try_size = std::max(min_size, try_size / 2);
     }
-    RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]",
-                  fmt::ptr(stream.value()),
+    RMM_LOG_ERROR("[A][Stream %s][Upstream %zuB][FAILURE maximum pool size exceeded]",
+                  rmm::detail::format_stream(stream),
                   min_size);
     RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory);
   }
@@ -351,7 +350,7 @@ class pool_memory_resource final
    */
   std::optional<block_type> block_from_upstream(std::size_t size, cuda_stream_view stream)
   {
-    RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size);
+    RMM_LOG_DEBUG("[A][Stream %s][Upstream %zuB]", rmm::detail::format_stream(stream), size);
 
     if (size == 0) { return {}; }
 
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 6a5916e5c..8131eef4d 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -23,8 +23,6 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
-#include <fmt/core.h>
-
 #include <cstddef>
 #include <map>
 #include <mutex>
@@ -188,7 +186,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
   void log_outstanding_allocations() const
   {
 #if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
-    RMM_LOG_DEBUG("Outstanding Allocations: {}", get_outstanding_allocations_str());
+    RMM_LOG_DEBUG("Outstanding Allocations: %s", get_outstanding_allocations_str());
 #endif  // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
   }
 
@@ -236,12 +234,12 @@ class tracking_resource_adaptor final : public device_memory_resource {
 
       // Ensure the allocation is found and the number of bytes match
       if (found == allocations_.end()) {
-        // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call
+        // Don't throw but log an error. Throwing in a destructor (or any noexcept) will call
         // std::terminate
         RMM_LOG_ERROR(
-          "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: "
-          "{}",
-          fmt::ptr(ptr),
+          "Deallocating a pointer that was not tracked. Ptr: %p [%zuB], Current Num. Allocations: "
+          "%zu",
+          ptr,
           bytes,
           this->allocations_.size());
       } else {
@@ -250,10 +248,10 @@ class tracking_resource_adaptor final : public device_memory_resource {
         auto allocated_bytes = found->second.allocation_size;
 
         if (allocated_bytes != bytes) {
-          // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call
+          // Don't throw but log an error. Throwing in a destructor (or any noexcept) will call
           // std::terminate
           RMM_LOG_ERROR(
-            "Alloc bytes ({}) and Dealloc bytes ({}) do not match", allocated_bytes, bytes);
+            "Alloc bytes (%zu) and Dealloc bytes (%zu) do not match", allocated_bytes, bytes);
 
           bytes = allocated_bytes;
         }
diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp
index 643281d91..8a5d37be2 100644
--- a/tests/logger_tests.cpp
+++ b/tests/logger_tests.cpp
@@ -112,7 +112,6 @@ void expect_log_events(std::string const& filename,
                // EXPECT_EQ(expected.thread_id, actual.thread_id);
                // EXPECT_EQ(expected.stream, actual.stream);
                EXPECT_EQ(expected.act, actual.act);
-               // device_memory_resource automatically pads an allocation to a multiple of 8 bytes
                EXPECT_EQ(expected.size, actual.size);
                EXPECT_EQ(expected.pointer, actual.pointer);
                return true;
diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 95cc9c9c1..67f183a23 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -574,10 +574,10 @@ TEST_F(ArenaTest, DumpLogOnFailure)  // NOLINT
     std::size_t num_threads{4};
     threads.reserve(num_threads);
     for (std::size_t i = 0; i < num_threads; ++i) {
-      threads.emplace_back(std::thread([&] {
+      threads.emplace_back([&] {
         void* ptr = mr.allocate(32_KiB);
         mr.deallocate(ptr, 32_KiB);
-      }));
+      });
     }
 
     for (auto& thread : threads) {
diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp
index a56efa60c..a7f6ab7be 100644
--- a/tests/mr/device/callback_mr_tests.cpp
+++ b/tests/mr/device/callback_mr_tests.cpp
@@ -23,11 +23,11 @@
 #include <rmm/mr/device/callback_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
-#include <fmt/core.h>
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include <cstddef>
+#include <string>
 
 namespace rmm::test {
 namespace {
@@ -78,8 +78,9 @@ TEST(CallbackTest, LoggingTest)
   auto* ptr       = mr.allocate(size);
   mr.deallocate(ptr, size);
 
-  std::string output = testing::internal::GetCapturedStdout();
-  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", size, size);
+  auto output = testing::internal::GetCapturedStdout();
+  auto expect = std::string("Allocating ") + std::to_string(size) + " bytes\nDeallocating " +
+                std::to_string(size) + " bytes\n";
   ASSERT_EQ(expect, output);
 }