diff --git a/.devcontainer/cuda11.1-gcc6/devcontainer.json b/.devcontainer/cuda11.1-gcc6/devcontainer.json
deleted file mode 100644
index 6311f6a882b..00000000000
--- a/.devcontainer/cuda11.1-gcc6/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc6-cuda11.1",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-gcc6",
-    "CCCL_CUDA_VERSION": "11.1",
-    "CCCL_HOST_COMPILER": "gcc",
-    "CCCL_HOST_COMPILER_VERSION": "6",
-    "CCCL_BUILD_INFIX": "cuda11.1-gcc6",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda11.1-gcc6"
-}
diff --git a/.devcontainer/cuda11.1-gcc9/devcontainer.json b/.devcontainer/cuda11.1-gcc9/devcontainer.json
deleted file mode 100644
index 9d711be5f66..00000000000
--- a/.devcontainer/cuda11.1-gcc9/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc9-cuda11.1",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-gcc9",
-    "CCCL_CUDA_VERSION": "11.1",
-    "CCCL_HOST_COMPILER": "gcc",
-    "CCCL_HOST_COMPILER_VERSION": "9",
-    "CCCL_BUILD_INFIX": "cuda11.1-gcc9",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda11.1-gcc9"
-}
diff --git a/.devcontainer/cuda11.1-llvm9/devcontainer.json b/.devcontainer/cuda11.1-llvm9/devcontainer.json
deleted file mode 100644
index e39eb910443..00000000000
--- a/.devcontainer/cuda11.1-llvm9/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm9-cuda11.1",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-llvm9",
-    "CCCL_CUDA_VERSION": "11.1",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "9",
-    "CCCL_BUILD_INFIX": "cuda11.1-llvm9",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda11.1-llvm9"
-}
diff --git a/.devcontainer/cuda11.8-gcc11/devcontainer.json b/.devcontainer/cuda11.8-gcc11/devcontainer.json
deleted file mode 100644
index 87098679264..00000000000
--- a/.devcontainer/cuda11.8-gcc11/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc11-cuda11.8",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.8-gcc11",
-    "CCCL_CUDA_VERSION": "11.8",
-    "CCCL_HOST_COMPILER": "gcc",
-    "CCCL_HOST_COMPILER_VERSION": "11",
-    "CCCL_BUILD_INFIX": "cuda11.8-gcc11",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda11.8-gcc11"
-}
diff --git a/.devcontainer/cuda11.1-gcc7/devcontainer.json b/.devcontainer/cuda12.0-gcc7/devcontainer.json
similarity index 90%
rename from .devcontainer/cuda11.1-gcc7/devcontainer.json
rename to .devcontainer/cuda12.0-gcc7/devcontainer.json
index e7d2a6572f8..96a32136eb1 100644
--- a/.devcontainer/cuda11.1-gcc7/devcontainer.json
+++ b/.devcontainer/cuda12.0-gcc7/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc7-cuda11.1",
+  "image": "rapidsai/devcontainers:25.02-cpp-gcc7-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
@@ -15,11 +15,11 @@
     "SCCACHE_BUCKET": "rapids-sccache-devs",
     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-gcc7",
-    "CCCL_CUDA_VERSION": "11.1",
+    "DEVCONTAINER_NAME": "cuda12.0-gcc7",
+    "CCCL_CUDA_VERSION": "12.0",
     "CCCL_HOST_COMPILER": "gcc",
     "CCCL_HOST_COMPILER_VERSION": "7",
-    "CCCL_BUILD_INFIX": "cuda11.1-gcc7",
+    "CCCL_BUILD_INFIX": "cuda12.0-gcc7",
     "CCCL_CUDA_EXTENDED": "false"
   },
   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
@@ -50,5 +50,5 @@
       }
     }
   },
-  "name": "cuda11.1-gcc7"
+  "name": "cuda12.0-gcc7"
 }
diff --git a/.devcontainer/cuda11.1-gcc8/devcontainer.json b/.devcontainer/cuda12.0-gcc8/devcontainer.json
similarity index 90%
rename from .devcontainer/cuda11.1-gcc8/devcontainer.json
rename to .devcontainer/cuda12.0-gcc8/devcontainer.json
index f590606adef..9cfe4709e07 100644
--- a/.devcontainer/cuda11.1-gcc8/devcontainer.json
+++ b/.devcontainer/cuda12.0-gcc8/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc8-cuda11.1",
+  "image": "rapidsai/devcontainers:25.02-cpp-gcc8-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
@@ -15,11 +15,11 @@
     "SCCACHE_BUCKET": "rapids-sccache-devs",
     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-gcc8",
-    "CCCL_CUDA_VERSION": "11.1",
+    "DEVCONTAINER_NAME": "cuda12.0-gcc8",
+    "CCCL_CUDA_VERSION": "12.0",
     "CCCL_HOST_COMPILER": "gcc",
     "CCCL_HOST_COMPILER_VERSION": "8",
-    "CCCL_BUILD_INFIX": "cuda11.1-gcc8",
+    "CCCL_BUILD_INFIX": "cuda12.0-gcc8",
     "CCCL_CUDA_EXTENDED": "false"
   },
   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
@@ -50,5 +50,5 @@
       }
     }
   },
-  "name": "cuda11.1-gcc8"
+  "name": "cuda12.0-gcc8"
 }
diff --git a/.devcontainer/cuda12.0-llvm10/devcontainer.json b/.devcontainer/cuda12.0-llvm10/devcontainer.json
deleted file mode 100644
index 6f75525f808..00000000000
--- a/.devcontainer/cuda12.0-llvm10/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm10-cuda12.0",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.0-llvm10",
-    "CCCL_CUDA_VERSION": "12.0",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "10",
-    "CCCL_BUILD_INFIX": "cuda12.0-llvm10",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.0-llvm10"
-}
diff --git a/.devcontainer/cuda12.0-llvm11/devcontainer.json b/.devcontainer/cuda12.0-llvm11/devcontainer.json
deleted file mode 100644
index fd21f30fbd3..00000000000
--- a/.devcontainer/cuda12.0-llvm11/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm11-cuda12.0",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.0-llvm11",
-    "CCCL_CUDA_VERSION": "12.0",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "11",
-    "CCCL_BUILD_INFIX": "cuda12.0-llvm11",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.0-llvm11"
-}
diff --git a/.devcontainer/cuda12.0-llvm12/devcontainer.json b/.devcontainer/cuda12.0-llvm12/devcontainer.json
deleted file mode 100644
index b402063c837..00000000000
--- a/.devcontainer/cuda12.0-llvm12/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm12-cuda12.0",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.0-llvm12",
-    "CCCL_CUDA_VERSION": "12.0",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "12",
-    "CCCL_BUILD_INFIX": "cuda12.0-llvm12",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.0-llvm12"
-}
diff --git a/.devcontainer/cuda12.0-llvm13/devcontainer.json b/.devcontainer/cuda12.0-llvm13/devcontainer.json
deleted file mode 100644
index 40187a60e6c..00000000000
--- a/.devcontainer/cuda12.0-llvm13/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm13-cuda12.0",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.0-llvm13",
-    "CCCL_CUDA_VERSION": "12.0",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "13",
-    "CCCL_BUILD_INFIX": "cuda12.0-llvm13",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.0-llvm13"
-}
diff --git a/.devcontainer/cuda12.0-llvm9/devcontainer.json b/.devcontainer/cuda12.0-llvm9/devcontainer.json
deleted file mode 100644
index e72c6da2fdd..00000000000
--- a/.devcontainer/cuda12.0-llvm9/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm9-cuda12.0",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.0-llvm9",
-    "CCCL_CUDA_VERSION": "12.0",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "9",
-    "CCCL_BUILD_INFIX": "cuda12.0-llvm9",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.0-llvm9"
-}
diff --git a/.devcontainer/cuda12.6-llvm10/devcontainer.json b/.devcontainer/cuda12.6-llvm10/devcontainer.json
deleted file mode 100644
index ef06f7cf9a2..00000000000
--- a/.devcontainer/cuda12.6-llvm10/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm10-cuda12.6",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.6-llvm10",
-    "CCCL_CUDA_VERSION": "12.6",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "10",
-    "CCCL_BUILD_INFIX": "cuda12.6-llvm10",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.6-llvm10"
-}
diff --git a/.devcontainer/cuda12.6-llvm11/devcontainer.json b/.devcontainer/cuda12.6-llvm11/devcontainer.json
deleted file mode 100644
index 38c13841ee6..00000000000
--- a/.devcontainer/cuda12.6-llvm11/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm11-cuda12.6",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.6-llvm11",
-    "CCCL_CUDA_VERSION": "12.6",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "11",
-    "CCCL_BUILD_INFIX": "cuda12.6-llvm11",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.6-llvm11"
-}
diff --git a/.devcontainer/cuda12.6-llvm12/devcontainer.json b/.devcontainer/cuda12.6-llvm12/devcontainer.json
deleted file mode 100644
index 8898d216573..00000000000
--- a/.devcontainer/cuda12.6-llvm12/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm12-cuda12.6",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.6-llvm12",
-    "CCCL_CUDA_VERSION": "12.6",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "12",
-    "CCCL_BUILD_INFIX": "cuda12.6-llvm12",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.6-llvm12"
-}
diff --git a/.devcontainer/cuda12.6-llvm13/devcontainer.json b/.devcontainer/cuda12.6-llvm13/devcontainer.json
deleted file mode 100644
index 8d713720c51..00000000000
--- a/.devcontainer/cuda12.6-llvm13/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm13-cuda12.6",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.6-llvm13",
-    "CCCL_CUDA_VERSION": "12.6",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "13",
-    "CCCL_BUILD_INFIX": "cuda12.6-llvm13",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.6-llvm13"
-}
diff --git a/.devcontainer/cuda12.6-llvm9/devcontainer.json b/.devcontainer/cuda12.6-llvm9/devcontainer.json
deleted file mode 100644
index be41e2506c5..00000000000
--- a/.devcontainer/cuda12.6-llvm9/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-llvm9-cuda12.6",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.6-llvm9",
-    "CCCL_CUDA_VERSION": "12.6",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "9",
-    "CCCL_BUILD_INFIX": "cuda12.6-llvm9",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.6-llvm9"
-}
diff --git a/.devcontainer/cuda12.6-oneapi2023.2.0/devcontainer.json b/.devcontainer/cuda12.6-oneapi2023.2.0/devcontainer.json
deleted file mode 100644
index 7c54383deeb..00000000000
--- a/.devcontainer/cuda12.6-oneapi2023.2.0/devcontainer.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-oneapi2023.2.0-cuda12.6",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}; mkdir -m 0755 -p ${localWorkspaceFolder}/build;",
-    "if [[ -n ${WSLENV+set} ]]; then docker volume create cccl-build; else docker volume create --driver local --opt type=none --opt device=${localWorkspaceFolder}/build --opt o=bind cccl-build fi;"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.6-oneapi2023.2.0",
-    "CCCL_CUDA_VERSION": "12.6",
-    "CCCL_HOST_COMPILER": "oneapi",
-    "CCCL_HOST_COMPILER_VERSION": "2023.2.0",
-    "CCCL_BUILD_INFIX": "cuda12.6-oneapi2023.2.0",
-    "CCCL_CUDA_EXTENDED": "false"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent",
-    "source=cccl-build,target=/home/coder/cccl/build"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.6-oneapi2023.2.0"
-}
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 725009e6256..74de63e7a94 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -37,6 +37,11 @@ body:
         - Thrust
         - CUB
         - libcu++
+        - CUDA Experimental (cudax)
+        - cuda.cooperative (Python)
+        - cuda.parallel (Python)
+        - General CCCL
+        - Infrastructure
         - Not sure
     validations:
       required: true
diff --git a/.github/actions/workflow-build/build-workflow.py b/.github/actions/workflow-build/build-workflow.py
index 15d91a7ab60..62b90f1472d 100755
--- a/.github/actions/workflow-build/build-workflow.py
+++ b/.github/actions/workflow-build/build-workflow.py
@@ -62,8 +62,8 @@
 import re
 import struct
 import sys
-import yaml
 
+import yaml
 
 matrix_yaml = None
 
diff --git a/.github/actions/workflow-run-job-windows/action.yml b/.github/actions/workflow-run-job-windows/action.yml
index 805beff3446..1b5289a5a7d 100644
--- a/.github/actions/workflow-run-job-windows/action.yml
+++ b/.github/actions/workflow-run-job-windows/action.yml
@@ -50,6 +50,7 @@ runs:
         docker run \
           --mount type=bind,source="${{steps.paths.outputs.HOST_REPO}}",target="${{steps.paths.outputs.MOUNT_REPO}}" \
           --workdir "${{steps.paths.outputs.MOUNT_REPO}}" \
+          --isolation=process \
           ${{ inputs.image }} \
           powershell -c "
             [System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}');
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 27e4a3ec4ea..d317e931e78 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v18.1.8
+    rev: v19.1.6
     hooks:
       - id: clang-format
         types_or: [file]
@@ -39,13 +39,10 @@ repos:
   # TODO/REMINDER: add the Ruff vscode extension to the devcontainers
   # Ruff, the Python auto-correcting linter/formatter written in Rust
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.3
+    rev: v0.8.6
     hooks:
-    - id: ruff
-      args: ["--fix", "--show-fixes"]
-      exclude: "^docs/tools/"
-    - id: ruff-format
-      exclude: "^docs/tools/"
+    - id: ruff  # linter
+    - id: ruff-format  # formatter
   - repo: https://github.com/codespell-project/codespell
     rev: v2.3.0
     hooks:
@@ -60,7 +57,7 @@ repos:
 
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.13.0'
+    rev: 'v1.14.1'
     hooks:
       - id: mypy
         additional_dependencies: [types-cachetools, numpy]
diff --git a/CMakePresets.json b/CMakePresets.json
index 2519ca09adf..bd10a95200b 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -22,7 +22,6 @@
         "CCCL_ENABLE_TESTING": false,
         "CCCL_ENABLE_EXAMPLES": false,
         "CCCL_ENABLE_C": false,
-        "CCCL_SUPPRESS_ICC_DEPRECATION_WARNING": true,
         "CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING": true,
         "libcudacxx_ENABLE_INSTALL_RULES": true,
         "CUB_ENABLE_INSTALL_RULES": true,
@@ -70,8 +69,6 @@
         "CUB_ENABLE_TESTING": true,
         "CUB_ENABLE_EXAMPLES": true,
         "CUB_SEPARATE_CATCH2": true,
-        "CUB_ENABLE_DIALECT_CPP11": true,
-        "CUB_ENABLE_DIALECT_CPP14": true,
         "CUB_ENABLE_DIALECT_CPP17": true,
         "CUB_ENABLE_DIALECT_CPP20": true,
         "THRUST_ENABLE_MULTICONFIG": true,
@@ -233,30 +230,10 @@
         "CUB_ENABLE_TESTING": true,
         "CUB_ENABLE_EXAMPLES": true,
         "CUB_SEPARATE_CATCH2": true,
-        "CUB_ENABLE_DIALECT_CPP11": false,
-        "CUB_ENABLE_DIALECT_CPP14": false,
         "CUB_ENABLE_DIALECT_CPP17": false,
         "CUB_ENABLE_DIALECT_CPP20": false
       }
     },
-    {
-      "name": "cub-cpp11",
-      "displayName": "CUB: C++11",
-      "inherits": "cub-base",
-      "cacheVariables": {
-        "CCCL_IGNORE_DEPRECATED_CPP_DIALECT": true,
-        "CUB_ENABLE_DIALECT_CPP11": true
-      }
-    },
-    {
-      "name": "cub-cpp14",
-      "displayName": "CUB: C++14",
-      "inherits": "cub-base",
-      "cacheVariables": {
-        "CCCL_IGNORE_DEPRECATED_CPP_DIALECT": true,
-        "CUB_ENABLE_DIALECT_CPP14": true
-      }
-    },
     {
       "name": "cub-cpp17",
       "displayName": "CUB: C++17",
@@ -290,24 +267,6 @@
         "THRUST_MULTICONFIG_ENABLE_DIALECT_CPP20": false
       }
     },
-    {
-      "name": "thrust-cpp11",
-      "displayName": "Thrust: C++11",
-      "inherits": "thrust-base",
-      "cacheVariables": {
-        "CCCL_IGNORE_DEPRECATED_CPP_DIALECT": true,
-        "THRUST_MULTICONFIG_ENABLE_DIALECT_CPP11": true
-      }
-    },
-    {
-      "name": "thrust-cpp14",
-      "displayName": "Thrust: C++14",
-      "inherits": "thrust-base",
-      "cacheVariables": {
-        "CCCL_IGNORE_DEPRECATED_CPP_DIALECT": true,
-        "THRUST_MULTICONFIG_ENABLE_DIALECT_CPP14": true
-      }
-    },
     {
       "name": "thrust-cpp17",
       "displayName": "Thrust: C++17",
@@ -521,14 +480,6 @@
         "libcudacxx-base"
       ]
     },
-    {
-      "name": "cub-cpp11",
-      "configurePreset": "cub-cpp11"
-    },
-    {
-      "name": "cub-cpp14",
-      "configurePreset": "cub-cpp14"
-    },
     {
       "name": "cub-cpp17",
       "configurePreset": "cub-cpp17"
@@ -537,14 +488,6 @@
       "name": "cub-cpp20",
       "configurePreset": "cub-cpp20"
     },
-    {
-      "name": "thrust-cpp11",
-      "configurePreset": "thrust-cpp11"
-    },
-    {
-      "name": "thrust-cpp14",
-      "configurePreset": "thrust-cpp14"
-    },
     {
       "name": "thrust-cpp17",
       "configurePreset": "thrust-cpp17"
@@ -737,16 +680,6 @@
         }
       }
     },
-    {
-      "name": "cub-nolid-cpp11",
-      "configurePreset": "cub-cpp11",
-      "inherits": "cub-nolid-base"
-    },
-    {
-      "name": "cub-nolid-cpp14",
-      "configurePreset": "cub-cpp14",
-      "inherits": "cub-nolid-base"
-    },
     {
       "name": "cub-nolid-cpp17",
       "configurePreset": "cub-cpp17",
@@ -757,16 +690,6 @@
       "configurePreset": "cub-cpp20",
       "inherits": "cub-nolid-base"
     },
-    {
-      "name": "cub-lid0-cpp11",
-      "configurePreset": "cub-cpp11",
-      "inherits": "cub-lid0-base"
-    },
-    {
-      "name": "cub-lid0-cpp14",
-      "configurePreset": "cub-cpp14",
-      "inherits": "cub-lid0-base"
-    },
     {
       "name": "cub-lid0-cpp17",
       "configurePreset": "cub-cpp17",
@@ -777,16 +700,6 @@
       "configurePreset": "cub-cpp20",
       "inherits": "cub-lid0-base"
     },
-    {
-      "name": "cub-lid1-cpp11",
-      "configurePreset": "cub-cpp11",
-      "inherits": "cub-lid1-base"
-    },
-    {
-      "name": "cub-lid1-cpp14",
-      "configurePreset": "cub-cpp14",
-      "inherits": "cub-lid1-base"
-    },
     {
       "name": "cub-lid1-cpp17",
       "configurePreset": "cub-cpp17",
@@ -797,16 +710,6 @@
       "configurePreset": "cub-cpp20",
       "inherits": "cub-lid1-base"
     },
-    {
-      "name": "cub-lid2-cpp11",
-      "configurePreset": "cub-cpp11",
-      "inherits": "cub-lid2-base"
-    },
-    {
-      "name": "cub-lid2-cpp14",
-      "configurePreset": "cub-cpp14",
-      "inherits": "cub-lid2-base"
-    },
     {
       "name": "cub-lid2-cpp17",
       "configurePreset": "cub-cpp17",
@@ -817,16 +720,6 @@
       "configurePreset": "cub-cpp20",
       "inherits": "cub-lid2-base"
     },
-    {
-      "name": "cub-cpp11",
-      "configurePreset": "cub-cpp11",
-      "inherits": "cub-base"
-    },
-    {
-      "name": "cub-cpp14",
-      "configurePreset": "cub-cpp14",
-      "inherits": "cub-base"
-    },
     {
       "name": "cub-cpp17",
       "configurePreset": "cub-cpp17",
@@ -867,16 +760,6 @@
         }
       }
     },
-    {
-      "name": "thrust-gpu-cpp11",
-      "configurePreset": "thrust-cpp11",
-      "inherits": "thrust-gpu-base"
-    },
-    {
-      "name": "thrust-gpu-cpp14",
-      "configurePreset": "thrust-cpp14",
-      "inherits": "thrust-gpu-base"
-    },
     {
       "name": "thrust-gpu-cpp17",
       "configurePreset": "thrust-cpp17",
@@ -887,16 +770,6 @@
       "configurePreset": "thrust-cpp20",
       "inherits": "thrust-gpu-base"
     },
-    {
-      "name": "thrust-cpu-cpp11",
-      "configurePreset": "thrust-cpp11",
-      "inherits": "thrust-cpu-base"
-    },
-    {
-      "name": "thrust-cpu-cpp14",
-      "configurePreset": "thrust-cpp14",
-      "inherits": "thrust-cpu-base"
-    },
     {
       "name": "thrust-cpu-cpp17",
       "configurePreset": "thrust-cpp17",
@@ -907,16 +780,6 @@
       "configurePreset": "thrust-cpp20",
       "inherits": "thrust-cpu-base"
     },
-    {
-      "name": "thrust-cpp11",
-      "configurePreset": "thrust-cpp11",
-      "inherits": "thrust-base"
-    },
-    {
-      "name": "thrust-cpp14",
-      "configurePreset": "thrust-cpp14",
-      "inherits": "thrust-base"
-    },
     {
       "name": "thrust-cpp17",
       "configurePreset": "thrust-cpp17",
diff --git a/README.md b/README.md
index b9795ea2f06..358adadc87b 100644
--- a/README.md
+++ b/README.md
@@ -219,18 +219,16 @@ CCCL users are encouraged to capitalize on the latest enhancements and ["live at
 For a seamless experience, you can upgrade CCCL independently of the entire CUDA Toolkit.
 This is possible because CCCL maintains backward compatibility with the latest patch release of every minor CTK release from both the current and previous major version series.
 In some exceptional cases, the minimum supported minor version of the CUDA Toolkit release may need to be newer than the oldest release within its major version series.
-For instance, CCCL requires a minimum supported version of 11.1 from the 11.x series due to an unavoidable compiler issue present in CTK 11.0.
 
 When a new major CTK is released, we drop support for the oldest supported major version.
 
 | CCCL Version | Supports CUDA Toolkit Version                  |
 |--------------|------------------------------------------------|
 | 2.x          | 11.1 - 11.8, 12.x (only latest patch releases) |
-| 3.x (Future) | 12.x, 13.x  (only latest patch releases)       |
+| 3.x          | 12.x, 13.x  (only latest patch releases)       |
 
 [Well-behaved code](#compatibility-guidelines) using the latest CCCL should compile and run successfully with any supported CTK version.
 Exceptions may occur for new features that depend on new CTK features, so those features would not work on older versions of the CTK.
-For example, C++20 support was not added to `nvcc` until CUDA 12.0, so CCCL features that depend on C++20 would not work with CTK 11.x.
 
 Users can integrate a newer version of CCCL into an older CTK, but not the other way around.
 This means an older version of CCCL is not compatible with a newer CTK.
@@ -260,6 +258,8 @@ Unless otherwise specified, CCCL supports the same host compilers as the latest
 - [Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#host-compiler-support-policy)
 - [Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#system-requirements)
 
+For GCC on Linux, at least 7.x is required.
+
 When using older CUDA Toolkits, we also only support the host compilers of the latest CUDA Toolkit,
 but at least the most recent host compiler of any supported older CUDA Toolkit.
 
@@ -269,8 +269,8 @@ But we will not invest significant time in triaging or fixing issues for older c
 In the spirit of "You only support what you test", see our [CI Overview](https://github.com/NVIDIA/cccl/blob/main/ci-overview.md) for more information on exactly what we test.
 
 ### C++ Dialects
-- C++11 (Deprecated in Thrust/CUB, to be removed in next major version)
-- C++14 (Deprecated in Thrust/CUB, to be removed in next major version)
+- C++11 (only libcu++)
+- C++14 (only libcu++)
 - C++17
 - C++20
 
@@ -285,7 +285,7 @@ Note that some features may only support certain architectures/Compute Capabilit
 CCCL's testing strategy strikes a balance between testing as many configurations as possible and maintaining reasonable CI times.
 
 For CUDA Toolkit versions, testing is done against both the oldest and the newest supported versions.
-For instance, if the latest version of the CUDA Toolkit is 12.3, tests are conducted against 11.1 and 12.3.
+For instance, if the latest version of the CUDA Toolkit is 12.6, tests are conducted against 11.1 and 12.6.
 For each CUDA version, builds are completed against all supported host compilers with all supported C++ dialects.
 
 The testing strategy and matrix are constantly evolving.
diff --git a/benchmarks/scripts/analyze.py b/benchmarks/scripts/analyze.py
index 39223f20dac..f3eceb258ad 100755
--- a/benchmarks/scripts/analyze.py
+++ b/benchmarks/scripts/analyze.py
@@ -1,16 +1,17 @@
 #!/usr/bin/env python3
 
+import argparse
+import functools
+import itertools
+import json
+import math
 import os
 import re
-import json
+
 import cccl
-import math
-import argparse
-import itertools
-import functools
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
 from scipy.stats import mannwhitneyu
 from scipy.stats.mstats import hdquantiles
 
@@ -330,8 +331,8 @@ def coverage(args):
 def parallel_coordinates_plot(df, title):
     # Parallel coordinates plot adaptation of https://stackoverflow.com/a/69411450
     import matplotlib.cm as cm
-    from matplotlib.path import Path
     import matplotlib.patches as patches
+    from matplotlib.path import Path
 
     # Variables (the first variable must be categoric):
     my_vars = df.columns.tolist()
diff --git a/benchmarks/scripts/cccl/bench/__init__.py b/benchmarks/scripts/cccl/bench/__init__.py
index a01f59b7b5c..6f3755648db 100644
--- a/benchmarks/scripts/cccl/bench/__init__.py
+++ b/benchmarks/scripts/cccl/bench/__init__.py
@@ -1,6 +1,6 @@
-from .config import *  # noqa: F403
-from .storage import *  # noqa: F403
 from .bench import Bench  # noqa: F401
 from .cmake import CMake  # noqa: F401
+from .config import *  # noqa: F403
 from .score import *  # noqa: F403
 from .search import *  # noqa: F403
+from .storage import *  # noqa: F403
diff --git a/benchmarks/scripts/cccl/bench/bench.py b/benchmarks/scripts/cccl/bench/bench.py
index e3c2de7cddb..a4e8c34f7cc 100644
--- a/benchmarks/scripts/cccl/bench/bench.py
+++ b/benchmarks/scripts/cccl/bench/bench.py
@@ -1,17 +1,18 @@
-import os
+import itertools
 import json
-import time
-import fpzip
+import os
 import signal
-import itertools
 import subprocess
+import time
+
+import fpzip
 import numpy as np
 
 from .cmake import CMake
 from .config import BasePoint, Config
-from .storage import Storage, get_bench_table_name
-from .score import compute_axes_ids, compute_weight_matrices, get_workload_weight
 from .logger import Logger
+from .score import compute_axes_ids, compute_weight_matrices, get_workload_weight
+from .storage import Storage, get_bench_table_name
 
 
 def first_val(my_dict):
diff --git a/benchmarks/scripts/cccl/bench/cmake.py b/benchmarks/scripts/cccl/bench/cmake.py
index db72f979709..65f3e786ef1 100644
--- a/benchmarks/scripts/cccl/bench/cmake.py
+++ b/benchmarks/scripts/cccl/bench/cmake.py
@@ -1,12 +1,12 @@
 import os
-import time
 import signal
 import subprocess
+import time
 
 from .build import Build
 from .config import Config
-from .storage import Storage
 from .logger import Logger
+from .storage import Storage
 
 
 def create_builds_table(conn):
diff --git a/benchmarks/scripts/cccl/bench/config.py b/benchmarks/scripts/cccl/bench/config.py
index 6c3792f8a3e..0d1a724a422 100644
--- a/benchmarks/scripts/cccl/bench/config.py
+++ b/benchmarks/scripts/cccl/bench/config.py
@@ -1,6 +1,6 @@
 import os
-import sys
 import random
+import sys
 
 
 def randomized_cartesian_product(list_of_lists):
diff --git a/benchmarks/scripts/cccl/bench/score.py b/benchmarks/scripts/cccl/bench/score.py
index 7102db940c8..5a2ab099213 100644
--- a/benchmarks/scripts/cccl/bench/score.py
+++ b/benchmarks/scripts/cccl/bench/score.py
@@ -1,4 +1,5 @@
 import math
+
 import numpy as np
 
 
diff --git a/benchmarks/scripts/cccl/bench/search.py b/benchmarks/scripts/cccl/bench/search.py
index 9573dd73d5e..7419bcd40a7 100644
--- a/benchmarks/scripts/cccl/bench/search.py
+++ b/benchmarks/scripts/cccl/bench/search.py
@@ -1,11 +1,12 @@
-import re
 import argparse
+import re
+
 import numpy as np
 
-from .bench import Bench, BaseBench
+from .bench import BaseBench, Bench
+from .cmake import CMake
 from .config import Config
 from .storage import Storage
-from .cmake import CMake
 
 
 def list_benches(algnames):
diff --git a/benchmarks/scripts/cccl/bench/storage.py b/benchmarks/scripts/cccl/bench/storage.py
index d3cafca625c..f4008c88efe 100644
--- a/benchmarks/scripts/cccl/bench/storage.py
+++ b/benchmarks/scripts/cccl/bench/storage.py
@@ -1,10 +1,10 @@
 import os
-import fpzip
 import sqlite3
+
+import fpzip
 import numpy as np
 import pandas as pd
 
-
 db_name = "cccl_meta_bench.db"
 
 
diff --git a/benchmarks/scripts/compare.py b/benchmarks/scripts/compare.py
index 443eb5f9e1d..64428b37400 100755
--- a/benchmarks/scripts/compare.py
+++ b/benchmarks/scripts/compare.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 
+import argparse
 import os
+
 import cccl
-import argparse
 import numpy as np
 import pandas as pd
-
 from colorama import Fore
 
 
diff --git a/benchmarks/scripts/run.py b/benchmarks/scripts/run.py
index e8cdd9adb45..6bdd2fad789 100755
--- a/benchmarks/scripts/run.py
+++ b/benchmarks/scripts/run.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 
+import math
 import os
 import sys
-import math
+
 import cccl.bench
 
 
diff --git a/benchmarks/scripts/search.py b/benchmarks/scripts/search.py
index 8d5d2d5a65b..9d5ba0af0bb 100755
--- a/benchmarks/scripts/search.py
+++ b/benchmarks/scripts/search.py
@@ -2,7 +2,6 @@
 
 import cccl.bench as bench
 
-
 # TODO:
 # - driver version
 # - host compiler + version
diff --git a/benchmarks/scripts/sol.py b/benchmarks/scripts/sol.py
index 7cc26c30d21..e93175f07a6 100755
--- a/benchmarks/scripts/sol.py
+++ b/benchmarks/scripts/sol.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 
+import argparse
 import os
+
 import cccl
-import argparse
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import seaborn as sns
-import matplotlib.pyplot as plt
 
 
 def is_finite(x):
diff --git a/benchmarks/scripts/verify.py b/benchmarks/scripts/verify.py
index a1c4c39623f..7a98243016e 100755
--- a/benchmarks/scripts/verify.py
+++ b/benchmarks/scripts/verify.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 
-import sys
 import argparse
+import sys
+
 import cccl.bench
 
 
diff --git a/cccl-version.json b/cccl-version.json
index fc6b155463e..d274eaaa5f3 100644
--- a/cccl-version.json
+++ b/cccl-version.json
@@ -1,6 +1,6 @@
 {
-  "full": "2.8.0",
-  "major": 2,
-  "minor": 8,
+  "full": "3.0.0",
+  "major": 3,
+  "minor": 0,
   "patch": 0
 }
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index fd7f2f079c4..881f553f65d 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -5,18 +5,19 @@ workflows:
   #
   # Example:
   # override:
-  #   - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'llvm16']}
+  #   - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'clang16']}
   #
   override:
 
   pull_request:
-    # Old CTK
-    - {jobs: ['build'], std: 'minmax', ctk: '11.1', cxx: ['gcc6', 'gcc9', 'clang9', 'msvc2017']}
+    # Old CTK/compiler
+    - {jobs: ['build'], std: 'minmax', ctk: '12.0', cxx: ['gcc7', 'gcc9', 'clang14', 'msvc2019']}
     # Current CTK build-only
-    - {jobs: ['build'], std: [11, 14], cxx: ['gcc7', 'clang9']}
+    - {jobs: ['build'], std: [11, 14], cxx: ['gcc7', 'clang14'], project: 'libcudacxx'}
+    - {jobs: ['build'], std: [17], cxx: ['gcc7', 'clang14']}
     - {jobs: ['build'], std: 'max', cxx: ['gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
-    - {jobs: ['build'], std: 'max', cxx: ['clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
-    - {jobs: ['build'], std: 'max', cxx: ['intel', 'msvc2019']}
+    - {jobs: ['build'], std: 'max', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
+    - {jobs: ['build'], std: 'max', cxx: ['msvc2019']}
     - {jobs: ['build'], std: [17, 20], cxx: ['gcc', 'clang', 'msvc']}
     # Current CTK testing:
     - {jobs: ['test'],  project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc']}
@@ -41,10 +42,8 @@ workflows:
     # verify-codegen:
     - {jobs: ['verify_codegen'], project: 'libcudacxx'}
     # cudax has different CTK reqs:
-    - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 17,       cxx: ['gcc9', 'clang9']}
     - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20,       cxx: ['msvc14.36']}
     - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20,       cxx: ['gcc10', 'gcc11', 'gcc12']}
-    - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20,       cxx: ['clang10', 'clang11', 'clang12', 'clang13']}
     - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20,       cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
     - {jobs: ['build'], project: 'cudax', ctk: ['12.5'], std: [17, 20], cxx: ['nvhpc']}
     - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20,       cxx: ['msvc2022']}
@@ -55,7 +54,6 @@ workflows:
     # Python and c/parallel jobs:
     - {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
     # cccl-infra:
-    - {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6',  'clang9']}
     - {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
     - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc',   'clang']}
 
@@ -64,13 +62,13 @@ workflows:
     - {jobs: ['limited'], project: 'cub', std: 17}
     - {jobs: ['test_gpu'],  project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
     - {jobs: ['test_gpu'],  project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
-    # Old CTK
-    - {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
-    - {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
+    # Old CTK/compiler
+    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']}
+    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'}
     # Current CTK build-only
     - {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
-    - {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
-    - {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
+    - {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
+    - {jobs: ['build'], std: 'all', cxx: ['msvc2019']}
     # Test current CTK
     - {jobs: ['test'],  std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
     # Modded builds:
@@ -83,7 +81,6 @@ workflows:
     - {jobs: ['build'], project: 'libcudacxx', std: 'all', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
     # cudax
     - {jobs: ['build'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc9', 'gcc10', 'gcc11']}
-    - {jobs: ['build'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13']}
     - {jobs: ['build'], project: 'cudax', ctk: [        'curr'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
     - {jobs: ['build'], project: 'cudax', ctk: [        '12.5'], std: 'all', cxx: ['nvhpc']}
     - {jobs: ['build'], project: 'cudax', ctk: ['12.0',       ], std: 'all', cxx: ['msvc14.36']}
@@ -96,12 +93,12 @@ workflows:
     - {jobs: ['test'],  project: 'cudax', ctk: [        'curr'], std: 'all', cxx: ['clang18']}
 
 #  # These are waiting on the NVKS nodes:
-#    - {jobs: ['test'],  ctk: '11.1', gpu: 'v100',     sm: 'gpu', cxx: 'gcc6',    std: [11]}
-#    - {jobs: ['test'],  ctk: '11.1', gpu: 't4',       sm: 'gpu', cxx: 'clang9',  std: [17]}
+#    - {jobs: ['test'],  ctk: '11.1', gpu: 'v100',     sm: 'gpu', cxx: 'gcc7',    std: [11]}
+#    - {jobs: ['test'],  ctk: '11.1', gpu: 't4',       sm: 'gpu', cxx: 'clang14',  std: [17]}
 #    - {jobs: ['test'],  ctk: '11.8', gpu: 'rtx2080',  sm: 'gpu', cxx: 'gcc11',   std: [17]}
 #    - {jobs: ['test'],  ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7',    std: [14]}
 #    - {jobs: ['test'],  ctk: 'curr', gpu: 'l4',       sm: 'gpu', cxx: 'gcc13',   std: 'all'}
-#    - {jobs: ['test'],  ctk: 'curr', gpu: 'rtx4090',  sm: 'gpu', cxx: 'clang9',  std: [11]}
+#    - {jobs: ['test'],  ctk: 'curr', gpu: 'rtx4090',  sm: 'gpu', cxx: 'clang14',  std: [11]}
 #    # H100 runners are currently flakey, only build since those use CPU-only runners:
 #    - {jobs: ['build'], ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'gcc12',   std: [11, 20]}
 #    - {jobs: ['build'], ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'clang18', std: [17]}
@@ -115,10 +112,7 @@ workflows:
   # Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
   exclude:
     # GPU runners are not available on Windows.
-    - {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], cxx: ['msvc2017', 'msvc2019', 'msvc14.36', 'msvc2022']}
-    # Ubuntu 18.04 is EOL and we only use it to get access to CTK 11.1 containers for CUDA testing.
-    # Disable non-CUDA tests on this platform.
-    - {jobs: ['test_cpu'], ctk: '11.1'}
+    - {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], cxx: ['msvc2019', 'msvc14.36', 'msvc2022']}
 
 
 #############################################################################################
@@ -131,8 +125,6 @@ devcontainer_version: '25.02'
 all_stds: [11, 14, 17, 20]
 
 ctk_versions:
-  11.1: { stds: [11, 14, 17,   ] }
-  11.8: { stds: [11, 14, 17,   ] }
   12.0: { stds: [11, 14, 17, 20] }
   12.5: { stds: [11, 14, 17, 20] }
   12.6: { stds: [11, 14, 17, 20], aka: 'curr' }
@@ -151,7 +143,6 @@ host_compilers:
     container_tag: 'gcc'
     exe: 'g++'
     versions:
-      6:  { stds: [11, 14,       ] }
       7:  { stds: [11, 14, 17,   ] }
       8:  { stds: [11, 14, 17,   ] }
       9:  { stds: [11, 14, 17,   ] }
@@ -164,11 +155,6 @@ host_compilers:
     container_tag: 'llvm'
     exe: 'clang++'
     versions:
-      9:  { stds: [11, 14, 17,   ] }
-      10: { stds: [11, 14, 17,   ] }
-      11: { stds: [11, 14, 17, 20] }
-      12: { stds: [11, 14, 17, 20] }
-      13: { stds: [11, 14, 17, 20] }
       14: { stds: [11, 14, 17, 20] }
       15: { stds: [11, 14, 17, 20] }
       16: { stds: [11, 14, 17, 20] }
@@ -179,16 +165,9 @@ host_compilers:
     container_tag: 'cl'
     exe: cl
     versions:
-      14.16: { stds: [    14,       ], aka: '2017' }
       14.29: { stds: [    14, 17,   ], aka: '2019' }
       14.36: { stds: [    14, 17, 20]              }
       14.39: { stds: [    14, 17, 20], aka: '2022' }
-  intel:
-    name: 'Intel'
-    container_tag: 'oneapi'
-    exe: icpc
-    versions:
-      2023.2.0: { stds: [11, 14, 17,   ] }
   nvhpc:
     name: 'NVHPC'
     container_tag: 'nvhpc'
@@ -261,11 +240,11 @@ projects:
     stds: [11, 14, 17, 20]
   cub:
     name: 'CUB'
-    stds: [11, 14, 17, 20]
+    stds: [17, 20]
     job_map: { test: ['test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'] }
   thrust:
     name: 'Thrust'
-    stds: [11, 14, 17, 20]
+    stds: [17, 20]
     job_map: { test: ['test_cpu', 'test_gpu'] }
   cudax:
     stds: [17, 20]
diff --git a/cmake/CCCLBuildCompilerTargets.cmake b/cmake/CCCLBuildCompilerTargets.cmake
index 906e287e439..98644b69779 100644
--- a/cmake/CCCLBuildCompilerTargets.cmake
+++ b/cmake/CCCLBuildCompilerTargets.cmake
@@ -23,7 +23,6 @@ set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
 option(CCCL_ENABLE_EXCEPTIONS "Enable exceptions within CCCL libraries." ON)
 option(CCCL_ENABLE_RTTI "Enable RTTI within CCCL libraries." ON)
 option(CCCL_ENABLE_WERROR "Treat warnings as errors for CCCL targets." ON)
-option(CCCL_SUPPRESS_ICC_DEPRECATION_WARNING "Suppress Intel Compiler deprecation warnings" OFF)
 option(CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING "Suppress Visual Studio 2017 deprecation warnings" OFF)
 
 function(cccl_build_compiler_interface interface_target cuda_compile_options cxx_compile_options compile_defs)
@@ -69,10 +68,6 @@ function(cccl_build_compiler_targets)
     list(APPEND cxx_compile_definitions "CCCL_DISABLE_RTTI")
   endif()
 
-  if (CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-    list(APPEND cxx_compile_definitions "CCCL_SUPPRESS_ICC_DEPRECATION_WARNING")
-  endif()
-
   if (CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
     list(APPEND cxx_compile_definitions "CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING")
   endif()
@@ -160,16 +155,6 @@ function(cccl_build_compiler_targets)
     endif()
   endif()
 
-  if ("Intel" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
-    # Do not flush denormal floats to zero
-    append_option_if_available("-no-ftz" cxx_compile_options)
-    # Disable warning that inlining is inhibited by compiler thresholds.
-    append_option_if_available("-diag-disable=11074" cxx_compile_options)
-    append_option_if_available("-diag-disable=11076" cxx_compile_options)
-    # Disable warning about deprecated classic compiler
-    append_option_if_available("-diag-disable=10441" cxx_compile_options)
-  endif()
-
   cccl_build_compiler_interface(cccl.compiler_interface
     "${cuda_compile_options}"
     "${cxx_compile_options}"
diff --git a/cub/benchmarks/nvbench_helper/nvbench_helper/nvbench_helper.cu b/cub/benchmarks/nvbench_helper/nvbench_helper/nvbench_helper.cu
index cf41124b2df..87beac9adee 100644
--- a/cub/benchmarks/nvbench_helper/nvbench_helper/nvbench_helper.cu
+++ b/cub/benchmarks/nvbench_helper/nvbench_helper/nvbench_helper.cu
@@ -752,7 +752,9 @@ void do_not_optimize(const void* ptr)
     seed_t, cuda::std::span<TYPE>, std::size_t, std::size_t)
 
 INSTANTIATE(int32_t);
+INSTANTIATE(uint32_t);
 INSTANTIATE(int64_t);
+INSTANTIATE(uint64_t);
 
 #undef INSTANTIATE
 
diff --git a/cub/cmake/CubBuildTargetList.cmake b/cub/cmake/CubBuildTargetList.cmake
index 27dedd68210..7c6f59e8856 100644
--- a/cub/cmake/CubBuildTargetList.cmake
+++ b/cub/cmake/CubBuildTargetList.cmake
@@ -150,16 +150,6 @@ function(cub_build_target_list)
     cmake_minimum_required(VERSION 3.18.3)
   endif()
 
-  # Supported versions of MSVC do not distinguish between C++11 and C++14.
-  # Warn the user that they may be generating a ton of redundant targets.
-  if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}" AND
-      CUB_ENABLE_DIALECT_CPP11)
-    message(WARNING
-      "Supported versions of MSVC (2017+) do not distinguish between C++11 "
-      "and C++14. The requested C++11 targets will be built with C++14."
-    )
-  endif()
-
   # Generic config flags:
   macro(add_flag_option flag docstring default)
     set(opt "CCCL_${flag}")
diff --git a/cub/cub/agent/agent_histogram.cuh b/cub/cub/agent/agent_histogram.cuh
index 21a487828ca..e454dc837b1 100644
--- a/cub/cub/agent/agent_histogram.cuh
+++ b/cub/cub/agent/agent_histogram.cuh
@@ -629,7 +629,7 @@ struct AgentHistogram
 
     // Set valid flags
     MarkValid<IS_FULL_TILE>(
-      is_valid, valid_samples, Int2Type<AgentHistogramPolicyT::LOAD_ALGORITHM == BLOCK_LOAD_STRIPED>{});
+      is_valid, valid_samples, Int2Type < AgentHistogramPolicyT::LOAD_ALGORITHM == BLOCK_LOAD_STRIPED > {});
 
     // Accumulate samples
     if (prefer_smem)
diff --git a/cub/cub/agent/agent_reduce.cuh b/cub/cub/agent/agent_reduce.cuh
index 2e0d94b219c..d5e3514f369 100644
--- a/cub/cub/agent/agent_reduce.cuh
+++ b/cub/cub/agent/agent_reduce.cuh
@@ -382,8 +382,8 @@ struct AgentReduce
     even_share.template BlockInit<TILE_ITEMS>(block_offset, block_end);
 
     return (IsAligned(d_in + block_offset, Int2Type<ATTEMPT_VECTORIZATION>()))
-           ? ConsumeRange(even_share, Int2Type < true && ATTEMPT_VECTORIZATION > ())
-           : ConsumeRange(even_share, Int2Type < false && ATTEMPT_VECTORIZATION > ());
+           ? ConsumeRange(even_share, Int2Type<true && ATTEMPT_VECTORIZATION>())
+           : ConsumeRange(even_share, Int2Type<false && ATTEMPT_VECTORIZATION>());
   }
 
   /**
@@ -396,8 +396,8 @@ struct AgentReduce
     even_share.template BlockInit<TILE_ITEMS, GRID_MAPPING_STRIP_MINE>();
 
     return (IsAligned(d_in, Int2Type<ATTEMPT_VECTORIZATION>()))
-           ? ConsumeRange(even_share, Int2Type < true && ATTEMPT_VECTORIZATION > ())
-           : ConsumeRange(even_share, Int2Type < false && ATTEMPT_VECTORIZATION > ());
+           ? ConsumeRange(even_share, Int2Type<true && ATTEMPT_VECTORIZATION>())
+           : ConsumeRange(even_share, Int2Type<false && ATTEMPT_VECTORIZATION>());
   }
 
 private:
diff --git a/cub/cub/agent/agent_three_way_partition.cuh b/cub/cub/agent/agent_three_way_partition.cuh
index 3a07944d4e2..eec24057163 100644
--- a/cub/cub/agent/agent_three_way_partition.cuh
+++ b/cub/cub/agent/agent_three_way_partition.cuh
@@ -175,7 +175,8 @@ template <typename PolicyT,
           typename UnselectedOutputIteratorT,
           typename SelectFirstPartOp,
           typename SelectSecondPartOp,
-          typename OffsetT>
+          typename OffsetT,
+          typename StreamingContextT>
 struct AgentThreeWayPartition
 {
   //---------------------------------------------------------------------
@@ -251,6 +252,9 @@ struct AgentThreeWayPartition
   SelectSecondPartOp select_second_part_op;
   OffsetT num_items; ///< Total number of input items
 
+  // Note: This is a const reference because we have seen double-digit percentage perf regressions otherwise
+  const StreamingContextT& streaming_context; ///< Context for the current partition
+
   //---------------------------------------------------------------------
   // Constructor
   //---------------------------------------------------------------------
@@ -264,7 +268,8 @@ struct AgentThreeWayPartition
     UnselectedOutputIteratorT d_unselected_out,
     SelectFirstPartOp select_first_part_op,
     SelectSecondPartOp select_second_part_op,
-    OffsetT num_items)
+    OffsetT num_items,
+    const StreamingContextT& streaming_context)
       : temp_storage(temp_storage.Alias())
       , d_in(d_in)
       , d_first_part_out(d_first_part_out)
@@ -273,6 +278,7 @@ struct AgentThreeWayPartition
       , select_first_part_op(select_first_part_op)
       , select_second_part_op(select_second_part_op)
       , num_items(num_items)
+      , streaming_context(streaming_context)
   {}
 
   //---------------------------------------------------------------------
@@ -350,6 +356,11 @@ struct AgentThreeWayPartition
     CTA_SYNC();
 
     // Gather items from shared memory and scatter to global
+    auto first_base =
+      d_first_part_out + (streaming_context.num_previously_selected_first() + num_first_selections_prefix);
+    auto second_base =
+      d_second_part_out + (streaming_context.num_previously_selected_second() + num_second_selections_prefix);
+    auto unselected_base = d_unselected_out + (streaming_context.num_previously_rejected() + num_rejected_prefix);
     for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
     {
       int item_idx = (ITEM * BLOCK_THREADS) + threadIdx.x;
@@ -360,16 +371,16 @@ struct AgentThreeWayPartition
 
         if (item_idx < first_item_end)
         {
-          d_first_part_out[num_first_selections_prefix + item_idx] = item;
+          first_base[item_idx] = item;
         }
         else if (item_idx < second_item_end)
         {
-          d_second_part_out[num_second_selections_prefix + item_idx - first_item_end] = item;
+          second_base[item_idx - first_item_end] = item;
         }
         else
         {
-          int rejection_idx                                     = item_idx - second_item_end;
-          d_unselected_out[num_rejected_prefix + rejection_idx] = item;
+          int rejection_idx              = item_idx - second_item_end;
+          unselected_base[rejection_idx] = item;
         }
       }
     }
@@ -400,11 +411,12 @@ struct AgentThreeWayPartition
     // Load items
     if (IS_LAST_TILE)
     {
-      BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items);
+      BlockLoadT(temp_storage.load_items)
+        .Load(d_in + streaming_context.input_offset() + tile_offset, items, num_tile_items);
     }
     else
     {
-      BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items);
+      BlockLoadT(temp_storage.load_items).Load(d_in + streaming_context.input_offset() + tile_offset, items);
     }
 
     // Initialize selection_flags
@@ -464,11 +476,12 @@ struct AgentThreeWayPartition
     // Load items
     if (IS_LAST_TILE)
     {
-      BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items);
+      BlockLoadT(temp_storage.load_items)
+        .Load(d_in + streaming_context.input_offset() + tile_offset, items, num_tile_items);
     }
     else
     {
-      BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items);
+      BlockLoadT(temp_storage.load_items).Load(d_in + streaming_context.input_offset() + tile_offset, items);
     }
 
     // Initialize selection_flags
@@ -551,7 +564,7 @@ struct AgentThreeWayPartition
   {
     // Blocks are launched in increasing order, so just assign one tile per block
     // Current tile index
-    const int tile_idx = static_cast<int>((blockIdx.x * gridDim.y) + blockIdx.y);
+    const int tile_idx = blockIdx.x;
 
     // Global offset for the current tile
     const OffsetT tile_offset = tile_idx * TILE_ITEMS;
@@ -572,9 +585,9 @@ struct AgentThreeWayPartition
 
       if (threadIdx.x == 0)
       {
-        // Output the total number of items selection_flags
-        d_num_selected_out[0] = AccumPackHelperT::first(accum);
-        d_num_selected_out[1] = AccumPackHelperT::second(accum);
+        // Update the number of selected items with this partition's selections
+        streaming_context.update_num_selected(
+          d_num_selected_out, AccumPackHelperT::first(accum), AccumPackHelperT::second(accum), num_items);
       }
     }
   }
diff --git a/cub/cub/block/block_radix_rank.cuh b/cub/cub/block/block_radix_rank.cuh
index 490abb86bda..92605b5168d 100644
--- a/cub/cub/block/block_radix_rank.cuh
+++ b/cub/cub/block/block_radix_rank.cuh
@@ -606,8 +606,7 @@ private:
     {
       volatile DigitCounterT warp_digit_counters[RADIX_DIGITS][PADDED_WARPS];
       DigitCounterT raking_grid[BLOCK_THREADS][PADDED_RAKING_SEGMENT];
-    }
-    aliasable;
+    } aliasable;
   };
 #endif // !_CCCL_DOXYGEN_INVOKED
 
diff --git a/cub/cub/config.cuh b/cub/cub/config.cuh
index 16e7edd4905..d05078cdd2d 100644
--- a/cub/cub/config.cuh
+++ b/cub/cub/config.cuh
@@ -44,7 +44,6 @@
 #endif // no system header
 
 #include <cub/util_arch.cuh> // IWYU pragma: export
-#include <cub/util_compiler.cuh> // IWYU pragma: export
 #include <cub/util_cpp_dialect.cuh> // IWYU pragma: export
 #include <cub/util_macro.cuh> // IWYU pragma: export
 #include <cub/util_namespace.cuh> // IWYU pragma: export
diff --git a/cub/cub/detail/detect_cuda_runtime.cuh b/cub/cub/detail/detect_cuda_runtime.cuh
index d83b2c1179a..7666f9b2d23 100644
--- a/cub/cub/detail/detect_cuda_runtime.cuh
+++ b/cub/cub/detail/detect_cuda_runtime.cuh
@@ -73,40 +73,15 @@
  */
 #  define CUB_RUNTIME_FUNCTION
 
-/**
- * \def CUB_RUNTIME_ENABLED
- *
- * Whether or not the active compiler pass is allowed to invoke device kernels
- * or methods from the CUDA runtime API.
- *
- * This macro should not be used in CUB, as it depends on `__CUDA_ARCH__`
- * and is not compatible with `NV_IF_TARGET`. It is provided for legacy
- * purposes only.
- *
- * Replace any usages with `CUB_RDC_ENABLED` and `NV_IF_TARGET`.
- */
-#  define CUB_RUNTIME_ENABLED
-
 #else // Non-doxygen pass:
 
 #  ifndef CUB_RUNTIME_FUNCTION
-
 #    if defined(__CUDACC_RDC__) && !defined(CUB_DISABLE_CDP)
-
 #      define CUB_RDC_ENABLED
 #      define CUB_RUNTIME_FUNCTION _CCCL_HOST_DEVICE
-
 #    else // RDC disabled:
-
 #      define CUB_RUNTIME_FUNCTION _CCCL_HOST
-
 #    endif // RDC enabled
-
-#    if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__)
-// Legacy only -- do not use in new code.
-#      define CUB_RUNTIME_ENABLED
-#    endif
-
 #  endif // CUB_RUNTIME_FUNCTION predefined
 
 #  ifdef CUB_RDC_ENABLED
diff --git a/cub/cub/detail/fast_modulo_division.cuh b/cub/cub/detail/fast_modulo_division.cuh
index 2ae65400e71..4a5f2048e32 100644
--- a/cub/cub/detail/fast_modulo_division.cuh
+++ b/cub/cub/detail/fast_modulo_division.cuh
@@ -109,8 +109,6 @@ multiply_extract_higher_bits(T value, R multiplier)
 {
   static_assert(supported_integral<T>::value, "unsupported type");
   static_assert(supported_integral<R>::value, "unsupported type");
-  _CCCL_DIAG_PUSH
-  _CCCL_DIAG_SUPPRESS_ICC(186) // pointless comparison of unsigned integer with zero
   _CCCL_IF_CONSTEXPR (_CCCL_TRAIT(::cuda::std::is_signed, T))
   {
     _CCCL_ASSERT(value >= 0, "value must be non-negative");
@@ -119,7 +117,6 @@ multiply_extract_higher_bits(T value, R multiplier)
   {
     _CCCL_ASSERT(multiplier >= 0, "multiplier must be non-negative");
   }
-  _CCCL_DIAG_POP
   static constexpr int NumBits = sizeof(DivisorType) * CHAR_BIT;
   using unsigned_t             = unsigned_implicit_prom_t<DivisorType>;
   using larger_t               = larger_unsigned_type_t<DivisorType>;
diff --git a/cub/cub/detail/strong_load.cuh b/cub/cub/detail/strong_load.cuh
index 61693d808e2..b6ba4bb5fc8 100644
--- a/cub/cub/detail/strong_load.cuh
+++ b/cub/cub/detail/strong_load.cuh
@@ -59,14 +59,14 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint4 load_relaxed(uint4 const* ptr)
   uint4 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v4.u32 {%0, %1, %2, %3}, [%4];"
-                  : "=r"(retval.x), "=r"(retval.y), "=r"(retval.z), "=r"(retval.w)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v4.u32 {%0, %1, %2, %3}, [%4];"
-                  : "=r"(retval.x), "=r"(retval.y), "=r"(retval.z), "=r"(retval.w)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v4.u32 {%0, %1, %2, %3}, [%4];" : "=r"(retval.x),
+                  "=r"(retval.y),
+                  "=r"(retval.z),
+                  "=r"(retval.w) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v4.u32 {%0, %1, %2, %3}, [%4];" : "=r"(retval.x),
+                  "=r"(retval.y),
+                  "=r"(retval.z),
+                  "=r"(retval.w) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -75,14 +75,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ulonglong2 load_relaxed(ulonglong2 const*
   ulonglong2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -91,14 +85,14 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ushort4 load_relaxed(ushort4 const* ptr)
   ushort4 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v4.u16 {%0, %1, %2, %3}, [%4];"
-                  : "=h"(retval.x), "=h"(retval.y), "=h"(retval.z), "=h"(retval.w)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v4.u16 {%0, %1, %2, %3}, [%4];"
-                  : "=h"(retval.x), "=h"(retval.y), "=h"(retval.z), "=h"(retval.w)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v4.u16 {%0, %1, %2, %3}, [%4];" : "=h"(retval.x),
+                  "=h"(retval.y),
+                  "=h"(retval.z),
+                  "=h"(retval.w) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v4.u16 {%0, %1, %2, %3}, [%4];" : "=h"(retval.x),
+                  "=h"(retval.y),
+                  "=h"(retval.z),
+                  "=h"(retval.w) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -107,46 +101,26 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_relaxed(uint2 const* ptr)
   uint2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");));
   return retval;
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned long long load_relaxed(unsigned long long const* ptr)
 {
   unsigned long long retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.u64 %0, [%1];"
-                  : "=l"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u64 %0, [%1];"
-                  : "=l"(retval)
-                  : "l"(ptr)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.relaxed.gpu.u64 %0, [%1];" : "=l"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u64 %0, [%1];" : "=l"(retval) : "l"(ptr) : "memory");));
   return retval;
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_relaxed(unsigned int const* ptr)
 {
   unsigned int retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.relaxed.gpu.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");));
 
   return retval;
 }
@@ -154,16 +128,9 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_relaxed(unsigned int con
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned short load_relaxed(unsigned short const* ptr)
 {
   unsigned short retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.u16 %0, [%1];"
-                  : "=h"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u16 %0, [%1];"
-                  : "=h"(retval)
-                  : "l"(ptr)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.relaxed.gpu.u16 %0, [%1];" : "=h"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u16 %0, [%1];" : "=h"(retval) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -172,24 +139,16 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned char load_relaxed(unsigned char c
   unsigned short retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile(
-       "{"
-       "  .reg .u8 datum;"
-       "  ld.relaxed.gpu.u8 datum, [%1];"
-       "  cvt.u16.u8 %0, datum;"
-       "}"
-       : "=h"(retval)
-       : "l"(ptr)
-       : "memory");),
-    (asm volatile(
-       "{"
-       "  .reg .u8 datum;"
-       "  ld.cg.u8 datum, [%1];"
-       "  cvt.u16.u8 %0, datum;"
-       "}"
-       : "=h"(retval)
-       : "l"(ptr)
-       : "memory");));
+    (asm volatile("{"
+                  "  .reg .u8 datum;"
+                  "  ld.relaxed.gpu.u8 datum, [%1];"
+                  "  cvt.u16.u8 %0, datum;"
+                  "}" : "=h"(retval) : "l"(ptr) : "memory");),
+    (asm volatile("{"
+                  "  .reg .u8 datum;"
+                  "  ld.cg.u8 datum, [%1];"
+                  "  cvt.u16.u8 %0, datum;"
+                  "}" : "=h"(retval) : "l"(ptr) : "memory");));
   return (unsigned char) retval;
 }
 
@@ -198,14 +157,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ulonglong2 load_acquire(ulonglong2 const*
   ulonglong2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.acquire.gpu.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");
+    (asm volatile("ld.acquire.gpu.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");
      __threadfence();));
   return retval;
 }
@@ -215,14 +168,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_acquire(uint2 const* ptr)
   uint2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.acquire.gpu.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");
+    (asm volatile("ld.acquire.gpu.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");
      __threadfence();));
   return retval;
 }
@@ -230,17 +177,9 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_acquire(uint2 const* ptr)
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_acquire(unsigned int const* ptr)
 {
   unsigned int retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.acquire.gpu.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");
-     __threadfence();));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.acquire.gpu.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory"); __threadfence();));
 
   return retval;
 }
diff --git a/cub/cub/detail/strong_store.cuh b/cub/cub/detail/strong_store.cuh
index 9b8091738db..cc0e8f60e71 100644
--- a/cub/cub/detail/strong_store.cuh
+++ b/cub/cub/detail/strong_store.cuh
@@ -56,98 +56,61 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(uint4* ptr, uint4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");),
-    (asm volatile("st.cg.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");));
+    (asm volatile("st.relaxed.gpu.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "r"(val.x),
+                  "r"(val.y),
+                  "r"(val.z),
+                  "r"(val.w) : "memory");),
+    (asm volatile(
+       "st.cg.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(ulonglong2* ptr, ulonglong2 val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");),
-    (asm volatile("st.cg.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");),
+               (asm volatile("st.cg.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(ushort4* ptr, ushort4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");),
-    (asm volatile("st.cg.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");));
+    (asm volatile("st.relaxed.gpu.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "h"(val.x),
+                  "h"(val.y),
+                  "h"(val.z),
+                  "h"(val.w) : "memory");),
+    (asm volatile(
+       "st.cg.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(uint2* ptr, uint2 val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");),
-    (asm volatile("st.cg.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");),
+               (asm volatile("st.cg.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned long long* ptr, unsigned long long val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");),
-    (asm volatile("st.cg.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");),
+               (asm volatile("st.cg.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned int* ptr, unsigned int val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");),
-    (asm volatile("st.cg.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");),
+               (asm volatile("st.cg.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned short* ptr, unsigned short val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");),
-    (asm volatile("st.cg.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");),
+               (asm volatile("st.cg.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned char* ptr, unsigned char val)
@@ -158,123 +121,77 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned char* ptr, uns
                   "  .reg .u8 datum;"
                   "  cvt.u8.u16 datum, %1;"
                   "  st.relaxed.gpu.u8 [%0], datum;"
-                  "}"
-                  :
-                  : "l"(ptr), "h"((unsigned short) val)
-                  : "memory");),
+                  "}" : : "l"(ptr),
+                  "h"((unsigned short) val) : "memory");),
     (asm volatile("{"
                   "  .reg .u8 datum;"
                   "  cvt.u8.u16 datum, %1;"
                   "  st.cg.u8 [%0], datum;"
-                  "}"
-                  :
-                  : "l"(ptr), "h"((unsigned short) val)
-                  : "memory");));
+                  "}" : : "l"(ptr),
+                  "h"((unsigned short) val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(uint4* ptr, uint4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "r"(val.x),
+                  "r"(val.y),
+                  "r"(val.z),
+                  "r"(val.w) : "memory");),
+    (__threadfence(); asm volatile(
+       "st.cg.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(ulonglong2* ptr, ulonglong2 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");),
+    (__threadfence(); asm volatile("st.cg.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(ushort4* ptr, ushort4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "h"(val.x),
+                  "h"(val.y),
+                  "h"(val.z),
+                  "h"(val.w) : "memory");),
+    (__threadfence(); asm volatile(
+       "st.cg.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(uint2* ptr, uint2 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");),
+    (__threadfence(); asm volatile("st.cg.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned long long* ptr, unsigned long long val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.release.gpu.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");),
+               (__threadfence(); asm volatile("st.cg.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned int* ptr, unsigned int val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.release.gpu.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");),
+               (__threadfence(); asm volatile("st.cg.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned short* ptr, unsigned short val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.release.gpu.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");),
+               (__threadfence(); asm volatile("st.cg.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned char* ptr, unsigned char val)
@@ -285,19 +202,15 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned char* ptr, unsigned c
                   "  .reg .u8 datum;"
                   "  cvt.u8.u16 datum, %1;"
                   "  st.release.gpu.u8 [%0], datum;"
-                  "}"
-                  :
-                  : "l"(ptr), "h"((unsigned short) val)
-                  : "memory");),
+                  "}" : : "l"(ptr),
+                  "h"((unsigned short) val) : "memory");),
     (__threadfence(); asm volatile(
        "{"
        "  .reg .u8 datum;"
        "  cvt.u8.u16 datum, %1;"
        "  st.cg.u8 [%0], datum;"
-       "}"
-       :
-       : "l"(ptr), "h"((unsigned short) val)
-       : "memory");));
+       "}" : : "l"(ptr),
+       "h"((unsigned short) val) : "memory");));
 }
 
 } // namespace detail
diff --git a/cub/cub/device/device_partition.cuh b/cub/cub/device/device_partition.cuh
index c68f6cf4d61..1b9eef947fa 100644
--- a/cub/cub/device/device_partition.cuh
+++ b/cub/cub/device/device_partition.cuh
@@ -445,7 +445,8 @@ private:
             typename UnselectedOutputIteratorT,
             typename NumSelectedIteratorT,
             typename SelectFirstPartOp,
-            typename SelectSecondPartOp>
+            typename SelectSecondPartOp,
+            typename NumItemsT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t IfNoNVTX(
     void* d_temp_storage,
     std::size_t& temp_storage_bytes,
@@ -454,12 +455,13 @@ private:
     SecondOutputIteratorT d_second_part_out,
     UnselectedOutputIteratorT d_unselected_out,
     NumSelectedIteratorT d_num_selected_out,
-    int num_items,
+    NumItemsT num_items,
     SelectFirstPartOp select_first_part_op,
     SelectSecondPartOp select_second_part_op,
     cudaStream_t stream = 0)
   {
-    using OffsetT                      = int;
+    using ChooseOffsetT                = detail::choose_signed_offset<NumItemsT>;
+    using OffsetT                      = typename ChooseOffsetT::type;
     using DispatchThreeWayPartitionIfT = DispatchThreeWayPartitionIf<
       InputIteratorT,
       FirstOutputIteratorT,
@@ -470,6 +472,14 @@ private:
       SelectSecondPartOp,
       OffsetT>;
 
+    // Signed integer type for global offsets
+    // Check if the number of items exceeds the range covered by the selected signed offset type
+    cudaError_t error = ChooseOffsetT::is_exceeding_offset_type(num_items);
+    if (error)
+    {
+      return error;
+    }
+
     return DispatchThreeWayPartitionIfT::Dispatch(
       d_temp_storage,
       temp_storage_bytes,
@@ -625,6 +635,9 @@ public:
   //! @tparam SelectSecondPartOp
   //!   **[inferred]** Selection functor type having member `bool operator()(const T &a)`
   //!
+  //! @tparam NumItemsT
+  //!   **[inferred]** Type of num_items
+  //!
   //! @param[in] d_temp_storage
   //!   Device-accessible allocation of temporary storage. When `nullptr`, the
   //!   required allocation size is written to `temp_storage_bytes` and no work is done.
@@ -670,7 +683,8 @@ public:
             typename UnselectedOutputIteratorT,
             typename NumSelectedIteratorT,
             typename SelectFirstPartOp,
-            typename SelectSecondPartOp>
+            typename SelectSecondPartOp,
+            typename NumItemsT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t
   If(void* d_temp_storage,
      std::size_t& temp_storage_bytes,
@@ -679,7 +693,7 @@ public:
      SecondOutputIteratorT d_second_part_out,
      UnselectedOutputIteratorT d_unselected_out,
      NumSelectedIteratorT d_num_selected_out,
-     int num_items,
+     NumItemsT num_items,
      SelectFirstPartOp select_first_part_op,
      SelectSecondPartOp select_second_part_op,
      cudaStream_t stream = 0)
@@ -706,7 +720,8 @@ public:
             typename UnselectedOutputIteratorT,
             typename NumSelectedIteratorT,
             typename SelectFirstPartOp,
-            typename SelectSecondPartOp>
+            typename SelectSecondPartOp,
+            typename NumItemsT>
   CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t
   If(void* d_temp_storage,
      std::size_t& temp_storage_bytes,
@@ -715,7 +730,7 @@ public:
      SecondOutputIteratorT d_second_part_out,
      UnselectedOutputIteratorT d_unselected_out,
      NumSelectedIteratorT d_num_selected_out,
-     int num_items,
+     NumItemsT num_items,
      SelectFirstPartOp select_first_part_op,
      SelectSecondPartOp select_second_part_op,
      cudaStream_t stream,
@@ -729,7 +744,8 @@ public:
               UnselectedOutputIteratorT,
               NumSelectedIteratorT,
               SelectFirstPartOp,
-              SelectSecondPartOp>(
+              SelectSecondPartOp,
+              NumItemsT>(
       d_temp_storage,
       temp_storage_bytes,
       d_in,
diff --git a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh
index 90295f2c06f..fc259499b85 100644
--- a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh
+++ b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh
@@ -46,17 +46,97 @@
 
 #include <thrust/system/cuda/detail/core/triple_chevron_launch.h>
 
-#include <cstdio>
 #include <iterator>
 
 #include <nv/target>
 
 CUB_NAMESPACE_BEGIN
 
+namespace detail
+{
+
+namespace three_way_partition
+{
+// Offset type used to instantiate the stream three-way-partition-kernel and agent to index the items within one
+// partition
+using per_partition_offset_t = ::cuda::std::int32_t;
+
+template <typename TotalNumItemsT>
+class streaming_context_t
+{
+private:
+  bool first_partition = true;
+  bool last_partition  = false;
+  TotalNumItemsT total_previous_num_items{};
+
+  // We use a double-buffer for keeping track of the number of previously selected items
+  TotalNumItemsT* d_num_selected_in  = nullptr;
+  TotalNumItemsT* d_num_selected_out = nullptr;
+
+public:
+  _CCCL_HOST_DEVICE _CCCL_FORCEINLINE
+  streaming_context_t(TotalNumItemsT* d_num_selected_in, TotalNumItemsT* d_num_selected_out, bool is_last_partition)
+      : last_partition(is_last_partition)
+      , d_num_selected_in(d_num_selected_in)
+      , d_num_selected_out(d_num_selected_out)
+  {}
+
+  _CCCL_HOST_DEVICE _CCCL_FORCEINLINE void advance(TotalNumItemsT num_items, bool next_partition_is_the_last)
+  {
+    ::cuda::std::swap(d_num_selected_in, d_num_selected_out);
+    first_partition = false;
+    last_partition  = next_partition_is_the_last;
+    total_previous_num_items += num_items;
+  };
+
+  _CCCL_HOST_DEVICE _CCCL_FORCEINLINE TotalNumItemsT input_offset() const
+  {
+    return first_partition ? TotalNumItemsT{0} : total_previous_num_items;
+  };
+
+  _CCCL_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_selected_first() const
+  {
+    return first_partition ? TotalNumItemsT{0} : d_num_selected_in[0];
+  };
+
+  _CCCL_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_selected_second() const
+  {
+    return first_partition ? TotalNumItemsT{0} : d_num_selected_in[1];
+  };
+
+  _CCCL_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_rejected() const
+  {
+    return first_partition ? TotalNumItemsT{0} : d_num_selected_in[2];
+    ;
+  };
+
+  template <typename NumSelectedIteratorT>
+  _CCCL_DEVICE _CCCL_FORCEINLINE void update_num_selected(
+    NumSelectedIteratorT user_num_selected_out_it,
+    TotalNumItemsT num_selected_first,
+    TotalNumItemsT num_selected_second,
+    TotalNumItemsT num_items_in_partition) const
+  {
+    if (last_partition)
+    {
+      user_num_selected_out_it[0] = num_previously_selected_first() + num_selected_first;
+      user_num_selected_out_it[1] = num_previously_selected_second() + num_selected_second;
+    }
+    else
+    {
+      d_num_selected_out[0] = num_previously_selected_first() + num_selected_first;
+      d_num_selected_out[1] = num_previously_selected_second() + num_selected_second;
+      d_num_selected_out[2] =
+        num_previously_rejected() + (num_items_in_partition - num_selected_second - num_selected_first);
+    }
+  }
+};
+} // namespace three_way_partition
+} // namespace detail
+
 /******************************************************************************
  * Kernel entry points
  *****************************************************************************/
-
 template <typename ChainedPolicyT,
           typename InputIteratorT,
           typename FirstOutputIteratorT,
@@ -66,7 +146,8 @@ template <typename ChainedPolicyT,
           typename ScanTileStateT,
           typename SelectFirstPartOp,
           typename SelectSecondPartOp,
-          typename OffsetT>
+          typename OffsetT,
+          typename StreamingContextT>
 __launch_bounds__(int(ChainedPolicyT::ActivePolicy::ThreeWayPartitionPolicy::BLOCK_THREADS))
   CUB_DETAIL_KERNEL_ATTRIBUTES void DeviceThreeWayPartitionKernel(
     InputIteratorT d_in,
@@ -78,7 +159,8 @@ __launch_bounds__(int(ChainedPolicyT::ActivePolicy::ThreeWayPartitionPolicy::BLO
     SelectFirstPartOp select_first_part_op,
     SelectSecondPartOp select_second_part_op,
     OffsetT num_items,
-    int num_tiles)
+    int num_tiles,
+    _CCCL_GRID_CONSTANT const StreamingContextT streaming_context)
 {
   using AgentThreeWayPartitionPolicyT = typename ChainedPolicyT::ActivePolicy::ThreeWayPartitionPolicy;
 
@@ -91,7 +173,8 @@ __launch_bounds__(int(ChainedPolicyT::ActivePolicy::ThreeWayPartitionPolicy::BLO
     UnselectedOutputIteratorT,
     SelectFirstPartOp,
     SelectSecondPartOp,
-    OffsetT>;
+    OffsetT,
+    StreamingContextT>;
 
   // Shared memory for AgentThreeWayPartition
   __shared__ typename AgentThreeWayPartitionT::TempStorage temp_storage;
@@ -105,7 +188,8 @@ __launch_bounds__(int(ChainedPolicyT::ActivePolicy::ThreeWayPartitionPolicy::BLO
     d_unselected_out,
     select_first_part_op,
     select_second_part_op,
-    num_items)
+    num_items,
+    streaming_context)
     .ConsumeRange(num_tiles, tile_status, d_num_selected_out);
 }
 
@@ -160,14 +244,23 @@ template <typename InputIteratorT,
           typename SelectFirstPartOp,
           typename SelectSecondPartOp,
           typename OffsetT,
-          typename PolicyHub = detail::three_way_partition::policy_hub<cub::detail::value_t<InputIteratorT>, OffsetT>>
+          typename PolicyHub = detail::three_way_partition::
+            policy_hub<cub::detail::value_t<InputIteratorT>, detail::three_way_partition::per_partition_offset_t>>
 struct DispatchThreeWayPartitionIf
 {
   /*****************************************************************************
    * Types and constants
    ****************************************************************************/
 
-  using AccumPackHelperT = detail::three_way_partition::accumulator_pack_t<OffsetT>;
+  // Offset type used to instantiate the three-way partition-kernel and agent to index the items within one partition
+  using per_partition_offset_t = detail::three_way_partition::per_partition_offset_t;
+
+  // Type used to provide streaming information about each partition's context
+  static constexpr per_partition_offset_t partition_size = ::cuda::std::numeric_limits<per_partition_offset_t>::max();
+
+  using streaming_context_t = detail::three_way_partition::streaming_context_t<OffsetT>;
+
+  using AccumPackHelperT = detail::three_way_partition::accumulator_pack_t<per_partition_offset_t>;
   using AccumPackT       = typename AccumPackHelperT::pack_t;
   using ScanTileStateT   = cub::ScanTileState<AccumPackT>;
 
@@ -222,64 +315,77 @@ struct DispatchThreeWayPartitionIf
 
     constexpr int block_threads    = ActivePolicyT::ThreeWayPartitionPolicy::BLOCK_THREADS;
     constexpr int items_per_thread = ActivePolicyT::ThreeWayPartitionPolicy::ITEMS_PER_THREAD;
+    constexpr int tile_size        = block_threads * items_per_thread;
 
-    do
-    {
-      // Get device ordinal
-      int device_ordinal;
-      error = CubDebug(cudaGetDevice(&device_ordinal));
-      if (cudaSuccess != error)
-      {
-        break;
-      }
+    // The maximum number of items for which we will ever invoke the kernel (i.e. largest partition size)
+    auto const max_partition_size =
+      static_cast<OffsetT>(::cuda::std::min(static_cast<uint64_t>(num_items), static_cast<uint64_t>(partition_size)));
 
-      // Number of input tiles
-      int tile_size = block_threads * items_per_thread;
-      int num_tiles = static_cast<int>(::cuda::ceil_div(num_items, tile_size));
+    // The number of partitions required to "iterate" over the total input
+    auto const num_partitions =
+      (max_partition_size == 0) ? OffsetT{1} : ::cuda::ceil_div(num_items, max_partition_size);
 
-      // Specify temporary storage allocation requirements
-      size_t allocation_sizes[1]; // bytes needed for tile status descriptors
+    // The maximum number of tiles for which we will ever invoke the kernel
+    auto const max_num_tiles_per_invocation = static_cast<OffsetT>(::cuda::ceil_div(max_partition_size, tile_size));
 
-      error = CubDebug(ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]));
-      if (cudaSuccess != error)
-      {
-        break;
-      }
+    // For streaming invocations, we need two sets (for double-buffering) of three counters each
+    constexpr ::cuda::std::size_t num_counters_per_pass  = 3;
+    constexpr ::cuda::std::size_t num_streaming_counters = 2 * num_counters_per_pass;
+    ::cuda::std::size_t streaming_selection_storage_bytes =
+      (num_partitions > 1) ? num_streaming_counters * sizeof(OffsetT) : ::cuda::std::size_t{0};
 
-      // Compute allocation pointers into the single storage blob (or compute
-      // the necessary size of the blob)
-      void* allocations[1] = {};
+    // Specify temporary storage allocation requirements
+    size_t allocation_sizes[2] = {0ULL, streaming_selection_storage_bytes};
 
-      error = CubDebug(cub::AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes));
-      if (cudaSuccess != error)
-      {
-        break;
-      }
+    error =
+      CubDebug(ScanTileStateT::AllocationSize(static_cast<int>(max_num_tiles_per_invocation), allocation_sizes[0]));
+    if (cudaSuccess != error)
+    {
+      return error;
+    }
 
-      if (d_temp_storage == nullptr)
-      {
-        // Return if the caller is simply requesting the size of the storage
-        // allocation
-        break;
-      }
+    // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob)
+    void* allocations[2] = {};
 
-      // Return if empty problem
-      if (num_items == 0)
-      {
-        break;
-      }
+    error = CubDebug(cub::AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes));
+    if (cudaSuccess != error)
+    {
+      return error;
+    }
+
+    if (d_temp_storage == nullptr)
+    {
+      // Return if the caller is simply requesting the size of the storage
+      // allocation
+      return cudaSuccess;
+    }
+
+    // Initialize the streaming context with the temporary storage for double-buffering the previously selected items
+    // and the total number (across all partitions) of items
+    OffsetT* tmp_num_selected_out = static_cast<OffsetT*>(allocations[1]);
+    streaming_context_t streaming_context{
+      tmp_num_selected_out, (tmp_num_selected_out + num_counters_per_pass), (num_partitions <= 1)};
+
+    // Iterate over the partitions until all input is processed
+    for (OffsetT partition_idx = 0; partition_idx < num_partitions; partition_idx++)
+    {
+      OffsetT current_partition_offset = partition_idx * max_partition_size;
+      OffsetT current_num_items =
+        (partition_idx + 1 == num_partitions) ? (num_items - current_partition_offset) : max_partition_size;
 
       // Construct the tile status interface
-      ScanTileStateT tile_status;
+      const auto current_num_tiles = static_cast<int>(::cuda::ceil_div(current_num_items, tile_size));
 
-      error = CubDebug(tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]));
+      // Construct the tile status interface
+      ScanTileStateT tile_status;
+      error = CubDebug(tile_status.Init(current_num_tiles, allocations[0], allocation_sizes[0]));
       if (cudaSuccess != error)
       {
-        break;
+        return error;
       }
 
       // Log three_way_partition_init_kernel configuration
-      int init_grid_size = CUB_MAX(1, ::cuda::ceil_div(num_tiles, INIT_KERNEL_THREADS));
+      int init_grid_size = CUB_MAX(1, ::cuda::ceil_div(current_num_tiles, INIT_KERNEL_THREADS));
 
 #ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
       _CubLog("Invoking three_way_partition_init_kernel<<<%d, %d, 0, %lld>>>()\n",
@@ -290,36 +396,29 @@ struct DispatchThreeWayPartitionIf
 
       // Invoke three_way_partition_init_kernel to initialize tile descriptors
       THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(init_grid_size, INIT_KERNEL_THREADS, 0, stream)
-        .doit(three_way_partition_init_kernel, tile_status, num_tiles, d_num_selected_out);
+        .doit(three_way_partition_init_kernel, tile_status, current_num_tiles, d_num_selected_out);
 
       // Check for failure to launch
       error = CubDebug(cudaPeekAtLastError());
       if (cudaSuccess != error)
       {
-        break;
+        return error;
       }
 
       // Sync the stream if specified to flush runtime errors
       error = CubDebug(detail::DebugSyncStream(stream));
       if (cudaSuccess != error)
       {
-        break;
+        return error;
       }
 
-      // Get max x-dimension of grid
-      int max_dim_x;
-      error = CubDebug(cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal));
-      if (cudaSuccess != error)
+      // No more items to process (note, we do not want to return early for num_items==0, because we need to make sure
+      // that `three_way_partition_init_kernel` has written '0' to d_num_selected_out)
+      if (current_num_items == 0)
       {
-        break;
+        return cudaSuccess;
       }
 
-      // Get grid size for scanning tiles
-      dim3 scan_grid_size;
-      scan_grid_size.z = 1;
-      scan_grid_size.y = ::cuda::ceil_div(num_tiles, max_dim_x);
-      scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x);
-
 // Log select_if_kernel configuration
 #ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
       {
@@ -330,14 +429,12 @@ struct DispatchThreeWayPartitionIf
                                         block_threads));
         if (cudaSuccess != error)
         {
-          break;
+          return error;
         }
 
-        _CubLog("Invoking three_way_partition_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d "
+        _CubLog("Invoking three_way_partition_kernel<<<%d, %d, 0, %lld>>>(), %d "
                 "items per thread, %d SM occupancy\n",
-                scan_grid_size.x,
-                scan_grid_size.y,
-                scan_grid_size.z,
+                current_num_tiles,
                 block_threads,
                 reinterpret_cast<long long>(stream),
                 items_per_thread,
@@ -346,7 +443,7 @@ struct DispatchThreeWayPartitionIf
 #endif // CUB_DETAIL_DEBUG_ENABLE_LOG
 
       // Invoke select_if_kernel
-      THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(scan_grid_size, block_threads, 0, stream)
+      THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(current_num_tiles, block_threads, 0, stream)
         .doit(three_way_partition_kernel,
               d_in,
               d_first_part_out,
@@ -356,23 +453,27 @@ struct DispatchThreeWayPartitionIf
               tile_status,
               select_first_part_op,
               select_second_part_op,
-              num_items,
-              num_tiles);
+              static_cast<per_partition_offset_t>(current_num_items),
+              current_num_tiles,
+              streaming_context);
 
       // Check for failure to launch
       error = CubDebug(cudaPeekAtLastError());
       if (cudaSuccess != error)
       {
-        break;
+        return error;
       }
 
       // Sync the stream if specified to flush runtime errors
       error = CubDebug(detail::DebugSyncStream(stream));
       if (cudaSuccess != error)
       {
-        break;
+        return error;
       }
-    } while (0);
+
+      // Prepare streaming context for next partition (swap double buffers, advance number of processed items, etc.)
+      streaming_context.advance(current_num_items, (partition_idx + OffsetT{2} == num_partitions));
+    }
 
     return error;
   }
@@ -393,7 +494,8 @@ struct DispatchThreeWayPartitionIf
         ScanTileStateT,
         SelectFirstPartOp,
         SelectSecondPartOp,
-        OffsetT>);
+        per_partition_offset_t,
+        streaming_context_t>);
   }
 
   /**
diff --git a/cub/cub/device/dispatch/dispatch_transform.cuh b/cub/cub/device/dispatch/dispatch_transform.cuh
index 386a6276dfa..fa4fa80d0ef 100644
--- a/cub/cub/device/dispatch/dispatch_transform.cuh
+++ b/cub/cub/device/dispatch/dispatch_transform.cuh
@@ -169,11 +169,10 @@ _CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply_impl(F&& f, Tuple&& t, ::cuda::st
 }
 
 template <class F, class Tuple>
-_CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t)
-  -> decltype(poor_apply_impl(
-    ::cuda::std::forward<F>(f),
-    ::cuda::std::forward<Tuple>(t),
-    ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t<Tuple>>::value>{}))
+_CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t) -> decltype(poor_apply_impl(
+  ::cuda::std::forward<F>(f),
+  ::cuda::std::forward<Tuple>(t),
+  ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t<Tuple>>::value>{}))
 {
   return poor_apply_impl(
     ::cuda::std::forward<F>(f),
@@ -473,8 +472,9 @@ using needs_aligned_ptr_t =
 
 #ifdef _CUB_HAS_TRANSFORM_UBLKCP
 template <Algorithm Alg, typename It, ::cuda::std::enable_if_t<needs_aligned_ptr_t<Alg>::value, int> = 0>
-_CCCL_DEVICE _CCCL_FORCEINLINE auto select_kernel_arg(
-  ::cuda::std::integral_constant<Algorithm, Alg>, kernel_arg<It>&& arg) -> aligned_base_ptr<value_t<It>>&&
+_CCCL_DEVICE _CCCL_FORCEINLINE auto
+select_kernel_arg(::cuda::std::integral_constant<Algorithm, Alg>, kernel_arg<It>&& arg)
+  -> aligned_base_ptr<value_t<It>>&&
 {
   return ::cuda::std::move(arg.aligned_ptr);
 }
@@ -660,10 +660,9 @@ struct dispatch_t<RequiresStableAddress,
   // TODO(bgruber): I want to write tests for this but those are highly depending on the architecture we are running
   // on?
   template <typename ActivePolicy>
-  CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto configure_ublkcp_kernel()
-    -> PoorExpected<
-      ::cuda::std::
-        tuple<THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron, decltype(CUB_DETAIL_TRANSFORM_KERNEL_PTR), int>>
+  CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto configure_ublkcp_kernel() -> PoorExpected<
+    ::cuda::std::
+      tuple<THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron, decltype(CUB_DETAIL_TRANSFORM_KERNEL_PTR), int>>
   {
     using policy_t          = typename ActivePolicy::algo_policy;
     constexpr int block_dim = policy_t::block_threads;
diff --git a/cub/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh b/cub/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh
index 992398c5cfb..02bfb443fc1 100644
--- a/cub/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh
@@ -613,6 +613,7 @@ struct policy_hub
   static constexpr int max_input_bytes      = static_cast<int>(::cuda::std::max(sizeof(KeyT), sizeof(AccumT)));
   static constexpr int combined_input_bytes = sizeof(KeyT) + sizeof(AccumT);
 
+  template <CacheLoadModifier LoadModifier>
   struct DefaultPolicy
   {
     static constexpr int nominal_4B_items_per_thread = 6;
@@ -627,13 +628,13 @@ struct policy_hub
       AgentReduceByKeyPolicy<128,
                              items_per_thread,
                              BLOCK_LOAD_DIRECT,
-                             LOAD_LDG,
+                             LoadModifier,
                              BLOCK_SCAN_WARP_SCANS,
                              default_reduce_by_key_delay_constructor_t<AccumT, int>>;
   };
 
   struct Policy350
-      : DefaultPolicy
+      : DefaultPolicy<LOAD_LDG>
       , ChainedPolicy<350, Policy350, Policy350>
   {};
 
@@ -648,7 +649,7 @@ struct policy_hub
                               typename Tuning::delay_constructor>;
 
   template <typename Tuning>
-  static auto select_agent_policy(long) -> typename DefaultPolicy::ReduceByKeyPolicyT;
+  static auto select_agent_policy(long) -> typename DefaultPolicy<LOAD_DEFAULT>::ReduceByKeyPolicyT;
 
   struct Policy800 : ChainedPolicy<800, Policy800, Policy350>
   {
@@ -657,7 +658,7 @@ struct policy_hub
   };
 
   struct Policy860
-      : DefaultPolicy
+      : DefaultPolicy<LOAD_LDG>
       , ChainedPolicy<860, Policy860, Policy800>
   {};
 
diff --git a/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh b/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh
index 783da6820d5..33771f6882f 100644
--- a/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh
@@ -239,6 +239,7 @@ struct policy_hub
   static constexpr int max_input_bytes      = static_cast<int>(::cuda::std::max(sizeof(KeyT), sizeof(LengthT)));
   static constexpr int combined_input_bytes = sizeof(KeyT) + sizeof(LengthT);
 
+  template <CacheLoadModifier LoadModifier>
   struct DefaultPolicy
   {
     static constexpr int nominal_4B_items_per_thread = 6;
@@ -252,14 +253,14 @@ struct policy_hub
       AgentReduceByKeyPolicy<128,
                              items,
                              BLOCK_LOAD_DIRECT,
-                             LOAD_LDG,
+                             LoadModifier,
                              BLOCK_SCAN_WARP_SCANS,
                              default_reduce_by_key_delay_constructor_t<LengthT, int>>;
   };
 
   // SM35
   struct Policy350
-      : DefaultPolicy
+      : DefaultPolicy<LOAD_LDG>
       , ChainedPolicy<350, Policy350, Policy350>
   {};
 
@@ -273,7 +274,7 @@ struct policy_hub
                               BLOCK_SCAN_WARP_SCANS,
                               typename Tuning::delay_constructor>;
   template <typename Tuning>
-  static auto select_agent_policy(long) -> typename DefaultPolicy::ReduceByKeyPolicyT;
+  static auto select_agent_policy(long) -> typename DefaultPolicy<LOAD_DEFAULT>::ReduceByKeyPolicyT;
 
   // SM80
   struct Policy800 : ChainedPolicy<800, Policy800, Policy350>
@@ -283,7 +284,7 @@ struct policy_hub
 
   // SM86
   struct Policy860
-      : DefaultPolicy
+      : DefaultPolicy<LOAD_LDG>
       , ChainedPolicy<860, Policy860, Policy800>
   {};
 
@@ -433,7 +434,7 @@ struct sm90_tuning<LengthT, __uint128_t, primitive_length::yes, primitive_key::n
 template <class LengthT, class KeyT>
 struct policy_hub
 {
-  template <BlockLoadAlgorithm BlockLoad, typename DelayConstructorKey>
+  template <BlockLoadAlgorithm BlockLoad, typename DelayConstructorKey, CacheLoadModifier LoadModifier>
   struct DefaultPolicy
   {
     static constexpr int nominal_4B_items_per_thread = 15;
@@ -444,7 +445,7 @@ struct policy_hub
       AgentRlePolicy<96,
                      ITEMS_PER_THREAD,
                      BlockLoad,
-                     LOAD_LDG,
+                     LoadModifier,
                      true,
                      BLOCK_SCAN_WARP_SCANS,
                      default_reduce_by_key_delay_constructor_t<DelayConstructorKey, int>>;
@@ -452,7 +453,7 @@ struct policy_hub
 
   // SM35
   struct Policy350
-      : DefaultPolicy<BLOCK_LOAD_DIRECT, int> // TODO(bgruber): I think we want `LengthT` instead of `int`
+      : DefaultPolicy<BLOCK_LOAD_DIRECT, int, LOAD_LDG> // TODO(bgruber): I think we want `LengthT` instead of `int`
       , ChainedPolicy<350, Policy350, Policy350>
   {};
 
@@ -467,7 +468,8 @@ struct policy_hub
                       BLOCK_SCAN_WARP_SCANS,
                       typename Tuning::delay_constructor>;
   template <typename Tuning>
-  static auto select_agent_policy(long) -> typename DefaultPolicy<BLOCK_LOAD_WARP_TRANSPOSE, LengthT>::RleSweepPolicyT;
+  static auto select_agent_policy(long) ->
+    typename DefaultPolicy<BLOCK_LOAD_WARP_TRANSPOSE, LengthT, LOAD_DEFAULT>::RleSweepPolicyT;
 
   // SM80
   struct Policy800 : ChainedPolicy<800, Policy800, Policy350>
@@ -477,7 +479,7 @@ struct policy_hub
 
   // SM86
   struct Policy860
-      : DefaultPolicy<BLOCK_LOAD_DIRECT, int> // TODO(bgruber): I think we want `LengthT` instead of `int`
+      : DefaultPolicy<BLOCK_LOAD_DIRECT, int, LOAD_LDG> // TODO(bgruber): I think we want `LengthT` instead of `int`
       , ChainedPolicy<860, Policy860, Policy800>
   {};
 
diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
index ac5dbfc5868..2efa551d4c6 100644
--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
@@ -108,14 +108,6 @@ constexpr accum_size classify_accum_size()
          : accum_size::unknown;
 }
 
-template <int Threads, int Items, int L2B, int L2W>
-struct tuning
-{
-  static constexpr int threads = Threads;
-  static constexpr int items   = Items;
-  using delay_constructor      = fixed_delay_constructor_t<L2B, L2W>;
-};
-
 template <class AccumT,
           primitive_op PrimitiveOp,
           primitive_accum PrimitiveAccumulator = is_primitive_accum<AccumT>(),
@@ -205,17 +197,31 @@ template <class AccumT,
           accum_size AccumSize                 = classify_accum_size<AccumT>()>
 struct sm90_tuning;
 
+template <class AccumT, int Threads, int Items, int L2B, int L2W>
+struct sm90_tuning_vals
+{
+  static constexpr int threads = Threads;
+  static constexpr int items   = Items;
+  using delay_constructor      = fixed_delay_constructor_t<L2B, L2W>;
+  // same logic as default policy:
+  static constexpr bool large_values = sizeof(AccumT) > 128;
+  static constexpr BlockLoadAlgorithm load_algorithm =
+    large_values ? BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED : BLOCK_LOAD_WARP_TRANSPOSE;
+  static constexpr BlockStoreAlgorithm store_algorithm =
+    large_values ? BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED : BLOCK_STORE_WARP_TRANSPOSE;
+};
+
 // clang-format off
-template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_1> : tuning<192, 22, 168, 1140> {};
-template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_2> : tuning<512, 12, 376, 1125> {};
-template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_4> : tuning<128, 24, 648, 1245> {};
-template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_8> : tuning<224, 24, 632, 1290> {};
+template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_1> : sm90_tuning_vals<T, 192, 22, 168, 1140> {};
+template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_2> : sm90_tuning_vals<T, 512, 12, 376, 1125> {};
+template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_4> : sm90_tuning_vals<T, 128, 24, 648, 1245> {};
+template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes, accum_size::_8> : sm90_tuning_vals<T, 224, 24, 632, 1290> {};
 
-template <> struct sm90_tuning<float,  primitive_op::yes, primitive_accum::yes, accum_size::_4> : tuning<128, 24, 688, 1140> {};
-template <> struct sm90_tuning<double, primitive_op::yes, primitive_accum::yes, accum_size::_8> : tuning<224, 24, 576, 1215> {};
+template <> struct sm90_tuning<float,  primitive_op::yes, primitive_accum::yes, accum_size::_4> : sm90_tuning_vals<float,  128, 24, 688, 1140> {};
+template <> struct sm90_tuning<double, primitive_op::yes, primitive_accum::yes, accum_size::_8> : sm90_tuning_vals<double, 224, 24, 576, 1215> {};
 
 #if CUB_IS_INT128_ENABLED
-template <> struct sm90_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16> : tuning<576, 21, 860, 630> {};
+template <> struct sm90_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16> : sm90_tuning_vals<__int128_t, 576, 21, 860, 630> {};
 template <>
 struct sm90_tuning<__uint128_t, primitive_op::yes, primitive_accum::no, accum_size::_16>
     : sm90_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16>
diff --git a/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh b/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh
index c6894ccbc86..3645e4b9ed7 100644
--- a/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh
@@ -255,9 +255,8 @@ struct policy_hub
                                     typename Tuning::delay_constructor>;
 
   template <typename Tuning>
-  static auto select_agent_policy(long) ->
-    typename DefaultPolicy<
-      default_delay_constructor_t<typename accumulator_pack_t<OffsetT>::pack_t>>::ThreeWayPartitionPolicy;
+  static auto select_agent_policy(long) -> typename DefaultPolicy<
+    default_delay_constructor_t<typename accumulator_pack_t<OffsetT>::pack_t>>::ThreeWayPartitionPolicy;
 
   struct Policy800 : ChainedPolicy<800, Policy800, Policy350>
   {
diff --git a/cub/cub/grid/grid_barrier.cuh b/cub/cub/grid/grid_barrier.cuh
index 74ff85d6153..f2ae69fc091 100644
--- a/cub/cub/grid/grid_barrier.cuh
+++ b/cub/cub/grid/grid_barrier.cuh
@@ -50,8 +50,10 @@ CUB_NAMESPACE_BEGIN
 
 /**
  * \brief GridBarrier implements a software global barrier among thread blocks within a CUDA grid
+ *
+ * deprecated [Since 2.9.0]
  */
-class GridBarrier
+class CCCL_DEPRECATED_BECAUSE("Use the APIs from cooperative groups instead") GridBarrier
 {
 protected:
   using SyncFlag = unsigned int;
@@ -131,8 +133,11 @@ public:
  *
  * Uses RAII for lifetime, i.e., device resources are reclaimed when
  * the destructor is called.
+ *
+ * deprecated [Since 2.9.0]
  */
-class GridBarrierLifetime : public GridBarrier
+_CCCL_SUPPRESS_DEPRECATED_PUSH
+class CCCL_DEPRECATED_BECAUSE("Use the APIs from cooperative groups instead") GridBarrierLifetime : public GridBarrier
 {
 protected:
   // Number of bytes backed by d_sync
@@ -211,5 +216,6 @@ public:
     return retval;
   }
 };
+_CCCL_SUPPRESS_DEPRECATED_POP
 
 CUB_NAMESPACE_END
diff --git a/cub/cub/host/mutex.cuh b/cub/cub/host/mutex.cuh
deleted file mode 100644
index efffa159ff1..00000000000
--- a/cub/cub/host/mutex.cuh
+++ /dev/null
@@ -1,70 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2011, Duane Merrill.  All rights reserved.
- * Copyright (c) 2011-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of the NVIDIA CORPORATION nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- ******************************************************************************/
-
-/**
- * \file
- * Simple portable mutex
- */
-
-#pragma once
-
-#include <cub/config.cuh>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#include <mutex>
-
-CUB_NAMESPACE_BEGIN
-
-/**
- * Wraps std::mutex
- *  deprecated [Since CUB 2.1.0] The `cub::Mutex` is deprecated and will be removed
- *             in a future release. Use `std::mutex` instead.
- */
-struct CCCL_DEPRECATED Mutex
-{
-  std::mutex mtx;
-
-  void Lock()
-  {
-    mtx.lock();
-  }
-
-  void Unlock()
-  {
-    mtx.unlock();
-  }
-};
-
-CUB_NAMESPACE_END
diff --git a/cub/cub/thread/thread_operators.cuh b/cub/cub/thread/thread_operators.cuh
index 7af32df392c..feef89776a9 100644
--- a/cub/cub/thread/thread_operators.cuh
+++ b/cub/cub/thread/thread_operators.cuh
@@ -391,8 +391,8 @@ struct CCCL_DEPRECATED BinaryFlip
   {}
 
   template <typename T, typename U>
-  _CCCL_DEVICE auto
-  operator()(T&& t, U&& u) -> decltype(binary_op(::cuda::std::forward<U>(u), ::cuda::std::forward<T>(t)))
+  _CCCL_DEVICE auto operator()(T&& t, U&& u)
+    -> decltype(binary_op(::cuda::std::forward<U>(u), ::cuda::std::forward<T>(t)))
   {
     return binary_op(::cuda::std::forward<U>(u), ::cuda::std::forward<T>(t));
   }
diff --git a/cub/cub/thread/thread_reduce.cuh b/cub/cub/thread/thread_reduce.cuh
index 5727b395b04..d3850051ca7 100644
--- a/cub/cub/thread/thread_reduce.cuh
+++ b/cub/cub/thread/thread_reduce.cuh
@@ -543,8 +543,8 @@ ThreadReduceTernaryTree(const Input& input, ReductionOp reduction_op)
 // never reached. Protect instantion of ThreadReduceSimd with arbitrary types and operators
 _CCCL_TEMPLATE(typename Input, typename ReductionOp)
 _CCCL_REQUIRES((!cub::internal::enable_generic_simd_reduction<Input, ReductionOp>()))
-_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto
-ThreadReduceSimd(const Input& input, ReductionOp) -> ::cuda::std::remove_cvref_t<decltype(input[0])>
+_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto ThreadReduceSimd(const Input& input, ReductionOp)
+  -> ::cuda::std::remove_cvref_t<decltype(input[0])>
 {
   assert(false);
   return input[0];
@@ -552,8 +552,8 @@ ThreadReduceSimd(const Input& input, ReductionOp) -> ::cuda::std::remove_cvref_t
 
 _CCCL_TEMPLATE(typename Input, typename ReductionOp)
 _CCCL_REQUIRES((cub::internal::enable_generic_simd_reduction<Input, ReductionOp>()))
-_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto
-ThreadReduceSimd(const Input& input, ReductionOp reduction_op) -> ::cuda::std::remove_cvref_t<decltype(input[0])>
+_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto ThreadReduceSimd(const Input& input, ReductionOp reduction_op)
+  -> ::cuda::std::remove_cvref_t<decltype(input[0])>
 {
   using cub::detail::unsafe_bitcast;
   using T                       = ::cuda::std::remove_cvref_t<decltype(input[0])>;
@@ -627,7 +627,8 @@ _CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE AccumT ThreadReduce(const Input&
                         ::cuda::minimum<>,
                         ::cuda::minimum<ValueT>,
                         cub::internal::SimdMin<ValueT>,
-                        cub::internal::SimdMax<ValueT>>())
+                        cub::internal::SimdMax<ValueT>>()
+                      || sizeof(ValueT) >= 8)
   {
     return cub::internal::ThreadReduceSequential<AccumT>(input, reduction_op);
   }
diff --git a/cub/cub/util_arch.cuh b/cub/cub/util_arch.cuh
index 1d6d7289b78..b1da6a03b5d 100644
--- a/cub/cub/util_arch.cuh
+++ b/cub/cub/util_arch.cuh
@@ -54,9 +54,6 @@ CUB_NAMESPACE_BEGIN
 
 #ifndef _CCCL_DOXYGEN_INVOKED // Do not document
 
-// \deprecated [Since 2.1.0]
-#  define CUB_USE_COOPERATIVE_GROUPS
-
 /// In device code, CUB_PTX_ARCH expands to the PTX version for which we are
 /// compiling. In host code, CUB_PTX_ARCH's value is implementation defined.
 #  ifndef CUB_PTX_ARCH
@@ -72,33 +69,6 @@ CUB_NAMESPACE_BEGIN
 #    endif
 #  endif
 
-// These definitions were intended for internal use only and are now obsolete.
-// If you relied on them, consider porting your code to use the functionality
-// in libcu++'s <nv/target> header.
-// For a temporary workaround, define CUB_PROVIDE_LEGACY_ARCH_MACROS to make
-// them available again. These should be considered deprecated and will be
-// fully removed in a future version.
-#  ifdef CUB_PROVIDE_LEGACY_ARCH_MACROS
-#    ifndef CUB_IS_DEVICE_CODE
-#      if defined(_NVHPC_CUDA)
-#        define CUB_IS_DEVICE_CODE      __builtin_is_device_code()
-#        define CUB_IS_HOST_CODE        (!__builtin_is_device_code())
-#        define CUB_INCLUDE_DEVICE_CODE 1
-#        define CUB_INCLUDE_HOST_CODE   1
-#      elif CUB_PTX_ARCH > 0
-#        define CUB_IS_DEVICE_CODE      1
-#        define CUB_IS_HOST_CODE        0
-#        define CUB_INCLUDE_DEVICE_CODE 1
-#        define CUB_INCLUDE_HOST_CODE   0
-#      else
-#        define CUB_IS_DEVICE_CODE      0
-#        define CUB_IS_HOST_CODE        1
-#        define CUB_INCLUDE_DEVICE_CODE 0
-#        define CUB_INCLUDE_HOST_CODE   1
-#      endif
-#    endif
-#  endif // CUB_PROVIDE_LEGACY_ARCH_MACROS
-
 /// Maximum number of devices supported.
 #  ifndef CUB_MAX_DEVICES
 #    define CUB_MAX_DEVICES (128)
diff --git a/cub/cub/util_compiler.cuh b/cub/cub/util_compiler.cuh
index b34a889fd21..8279c6e1fbd 100644
--- a/cub/cub/util_compiler.cuh
+++ b/cub/cub/util_compiler.cuh
@@ -42,69 +42,3 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-
-// enumerate host compilers we know about
-//! deprecated [Since 2.7]
-#define CUB_HOST_COMPILER_UNKNOWN 0
-//! deprecated [Since 2.7]
-#define CUB_HOST_COMPILER_MSVC 1
-//! deprecated [Since 2.7]
-#define CUB_HOST_COMPILER_GCC 2
-//! deprecated [Since 2.7]
-#define CUB_HOST_COMPILER_CLANG 3
-
-// enumerate device compilers we know about
-//! deprecated [Since 2.7]
-#define CUB_DEVICE_COMPILER_UNKNOWN 0
-//! deprecated [Since 2.7]
-#define CUB_DEVICE_COMPILER_MSVC 1
-//! deprecated [Since 2.7]
-#define CUB_DEVICE_COMPILER_GCC 2
-//! deprecated [Since 2.7]
-#define CUB_DEVICE_COMPILER_NVCC 3
-//! deprecated [Since 2.7]
-#define CUB_DEVICE_COMPILER_CLANG 4
-
-// figure out which host compiler we're using
-#if _CCCL_COMPILER(MSVC)
-//! deprecated [Since 2.7]
-#  define CUB_HOST_COMPILER CUB_HOST_COMPILER_MSVC
-//! deprecated [Since 2.7]
-#  define CUB_MSVC_VERSION _MSC_VER
-//! deprecated [Since 2.7]
-#  define CUB_MSVC_VERSION_FULL _MSC_FULL_VER
-#elif _CCCL_COMPILER(CLANG)
-//! deprecated [Since 2.7]
-#  define CUB_HOST_COMPILER CUB_HOST_COMPILER_CLANG
-//! deprecated [Since 2.7]
-#  define CUB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
-#elif _CCCL_COMPILER(GCC)
-//! deprecated [Since 2.7]
-#  define CUB_HOST_COMPILER CUB_HOST_COMPILER_GCC
-//! deprecated [Since 2.7]
-#  define CUB_GCC_VERSION   (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#endif
-
-// figure out which device compiler we're using
-#if _CCCL_CUDA_COMPILER(NVCC) || _CCCL_CUDA_COMPILER(NVHPC)
-//! deprecated [Since 2.7]
-#  define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_NVCC
-#elif _CCCL_COMPILER(MSVC)
-//! deprecated [Since 2.7]
-#  define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_MSVC
-#elif _CCCL_COMPILER(GCC)
-//! deprecated [Since 2.7]
-#  define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_GCC
-#elif _CCCL_COMPILER(CLANG)
-// CUDA-capable clang should behave similar to NVCC.
-#  if _CCCL_CUDA_COMPILER(NVCC)
-//! deprecated [Since 2.7]
-#    define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_NVCC
-#  else
-//! deprecated [Since 2.7]
-#    define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_CLANG
-#  endif
-#else
-//! deprecated [Since 2.7]
-#  define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_UNKNOWN
-#endif
diff --git a/cub/cub/util_cpp_dialect.cuh b/cub/cub/util_cpp_dialect.cuh
index 6a85b971884..a6eee36539c 100644
--- a/cub/cub/util_cpp_dialect.cuh
+++ b/cub/cub/util_cpp_dialect.cuh
@@ -40,23 +40,13 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cub/util_compiler.cuh> // IWYU pragma: export
-
 #ifndef _CCCL_DOXYGEN_INVOKED // Do not document
 
 // Deprecation warnings may be silenced by defining the following macros. These
 // may be combined.
-// - CCCL_IGNORE_DEPRECATED_CPP_DIALECT:
-//   Ignore all deprecated C++ dialects and outdated compilers.
-// - CCCL_IGNORE_DEPRECATED_CPP_11:
-//   Ignore deprecation warnings when compiling with C++11. C++03 and outdated
-//   compilers will still issue warnings.
-// - CCCL_IGNORE_DEPRECATED_CPP_14:
-//   Ignore deprecation warnings when compiling with C++14. C++03 and outdated
-//   compilers will still issue warnings.
 // - CCCL_IGNORE_DEPRECATED_COMPILER
 //   Ignore deprecation warnings when using deprecated compilers. Compiling
-//   with C++03, C++11 and C++14 will still issue warnings.
+//   with deprecated C++ dialects will still issue warnings.
 
 #  define CUB_CPP_DIALECT _CCCL_STD_VER
 
@@ -67,6 +57,7 @@
 #    define CUB_COMP_DEPR_IMPL(msg) _CCCL_PRAGMA(GCC warning #msg)
 #  endif
 
+// Compiler checks:
 // clang-format off
 #  define CUB_COMPILER_DEPRECATION(REQ) \
     CUB_COMP_DEPR_IMPL(CUB requires at least REQ. Define CCCL_IGNORE_DEPRECATED_COMPILER to suppress this message.)
@@ -74,14 +65,12 @@
 #  define CUB_COMPILER_DEPRECATION_SOFT(REQ, CUR)                                                        \
     CUB_COMP_DEPR_IMPL(                                                                                  \
       CUB requires at least REQ. CUR is deprecated but still supported. CUR support will be removed in a \
-        future release. Define CCCL_IGNORE_DEPRECATED_CPP_DIALECT to suppress this message.)
+        future release. Define CCCL_IGNORE_DEPRECATED_COMPILER to suppress this message.)
 // clang-format on
 
 #  ifndef CCCL_IGNORE_DEPRECATED_COMPILER
-
-// Compiler checks:
-#    if _CCCL_COMPILER(GCC, <, 5)
-CUB_COMPILER_DEPRECATION(GCC 5.0);
+#    if _CCCL_COMPILER(GCC, <, 7)
+CUB_COMPILER_DEPRECATION(GCC 7.0);
 #    elif _CCCL_COMPILER(CLANG, <, 7)
 CUB_COMPILER_DEPRECATION(Clang 7.0);
 #    elif _CCCL_COMPILER(MSVC, <, 19, 10)
@@ -91,24 +80,18 @@ CUB_COMPILER_DEPRECATION(MSVC 2019(19.20 / 16.0 / 14.20));
 // >=2017, <2019. Soft deprecation message:
 CUB_COMPILER_DEPRECATION_SOFT(MSVC 2019(19.20 / 16.0 / 14.20), MSVC 2017);
 #    endif
-
 #  endif // CCCL_IGNORE_DEPRECATED_COMPILER
 
-#  if _CCCL_STD_VER < 2011
-// <C++11. Hard upgrade message:
-CUB_COMPILER_DEPRECATION(C++ 17);
-#  elif _CCCL_STD_VER == 2011 && !defined(CCCL_IGNORE_DEPRECATED_CPP_11)
-// =C++11. Soft upgrade message:
-CUB_COMPILER_DEPRECATION_SOFT(C++ 17, C++ 11);
-#  elif _CCCL_STD_VER == 2014 && !defined(CCCL_IGNORE_DEPRECATED_CPP_14)
-// =C++14. Soft upgrade message:
-CUB_COMPILER_DEPRECATION_SOFT(C++ 17, C++ 14);
-#  endif // _CCCL_STD_VER >= 2017
-
 #  undef CUB_COMPILER_DEPRECATION_SOFT
 #  undef CUB_COMPILER_DEPRECATION
+
+// C++17 dialect check:
+#  ifndef CCCL_IGNORE_DEPRECATED_CPP_DIALECT
+#    if _CCCL_STD_VER < 2017
+CUB_COMP_DEPR_IMPL(CUB requires at least C++ 17. Define CCCL_IGNORE_DEPRECATED_CPP_DIALECT to suppress this message.)
+#    endif // _CCCL_STD_VER >= 2017
+#  endif
+
 #  undef CUB_COMP_DEPR_IMPL
-#  undef CUB_COMP_DEPR_IMPL0
-#  undef CUB_COMP_DEPR_IMPL1
 
 #endif // !_CCCL_DOXYGEN_INVOKED
diff --git a/cub/cub/util_macro.cuh b/cub/cub/util_macro.cuh
index ae42e5fe875..c58d90682e1 100644
--- a/cub/cub/util_macro.cuh
+++ b/cub/cub/util_macro.cuh
@@ -112,12 +112,6 @@ _CCCL_DIAG_SUPPRESS_CLANG("-Wattributes")
 #  if !_CCCL_CUDA_COMPILER(NVHPC)
 _CCCL_DIAG_SUPPRESS_NVHPC(attribute_requires_external_linkage)
 #  endif // !_CCCL_CUDA_COMPILER(NVHPC)
-#  if _CCCL_COMPILER(ICC)
-#    pragma nv_diag_suppress 1407 // the "__visibility__" attribute can only appear on functions and
-                                  // variables with external linkage'
-#    pragma warning(disable : 1890) // the "__visibility__" attribute can only appear on functions and
-                                    // variables with external linkage'
-#  endif // _CCCL_COMPILER(ICC)
 #endif // !CUB_DISABLE_KERNEL_VISIBILITY_WARNING_SUPPRESSION
 
 #ifndef CUB_DEFINE_KERNEL_GETTER
diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh
index c6be2a5209f..4d1db99a821 100644
--- a/cub/cub/util_type.cuh
+++ b/cub/cub/util_type.cuh
@@ -82,9 +82,9 @@ CUB_NAMESPACE_BEGIN
 #    endif // !defined(__CUDACC_RTC_INT128__)
 #  else // !defined(__CUDACC_RTC__)
 #    if _CCCL_CUDACC_AT_LEAST(11, 5)
-#      if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG) || _CCCL_COMPILER(ICC) || _CCCL_COMPILER(NVHPC)
+#      if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG) || _CCCL_COMPILER(NVHPC)
 #        define CUB_IS_INT128_ENABLED 1
-#      endif // GCC || CLANG || ICC || NVHPC
+#      endif // GCC || CLANG || NVHPC
 #    endif // _CCCL_CUDACC_AT_LEAST(11, 5)
 #  endif // !defined(__CUDACC_RTC__)
 #endif // !defined(CUB_IS_INT128_ENABLED)
diff --git a/cub/cub/version.cuh b/cub/cub/version.cuh
index 2d5232939c8..19024741926 100644
--- a/cub/cub/version.cuh
+++ b/cub/cub/version.cuh
@@ -58,7 +58,7 @@
  *         <tt>CUB_VERSION / 100 % 1000</tt> is the minor version.
  *         <tt>CUB_VERSION / 100000</tt> is the major version.
  */
-#define CUB_VERSION 200800 // macro expansion with ## requires this to be a single value
+#define CUB_VERSION 300000 // macro expansion with ## requires this to be a single value
 
 /*! \def CUB_MAJOR_VERSION
  *  \brief The preprocessor macro \p CUB_MAJOR_VERSION encodes the
diff --git a/cub/test/CMakeLists.txt b/cub/test/CMakeLists.txt
index 17201c4704f..c86d24754de 100644
--- a/cub/test/CMakeLists.txt
+++ b/cub/test/CMakeLists.txt
@@ -227,10 +227,8 @@ function(cub_add_test target_name_var test_name test_src cub_target launcher_id)
     endif() # CUB_SEPARATE_CATCH2
 
     if ("${test_target}" MATCHES "nvrtc")
-      target_compile_definitions(${test_target} PRIVATE NVRTC_CUB_PATH="-I${CMAKE_SOURCE_DIR}/cub")
-      target_compile_definitions(${test_target} PRIVATE NVRTC_THRUST_PATH="-I${CMAKE_SOURCE_DIR}/thrust")
-      target_compile_definitions(${test_target} PRIVATE NVRTC_LIBCUDACXX_PATH="-I${CMAKE_SOURCE_DIR}/libcudacxx/include")
-      target_compile_definitions(${test_target} PRIVATE NVRTC_CTK_PATH="-I${CUDAToolkit_INCLUDE_DIRS}")
+      configure_file("cmake/nvrtc_args.h.in" ${CMAKE_CURRENT_BINARY_DIR}/nvrtc_args.h)
+      target_include_directories(${test_target} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
     endif()
 
     if ("${test_target}" MATCHES "test.iterator")
diff --git a/cub/test/catch2_segmented_sort_helper.cuh b/cub/test/catch2_segmented_sort_helper.cuh
index 3e93b3f0d62..0852921bebf 100644
--- a/cub/test/catch2_segmented_sort_helper.cuh
+++ b/cub/test/catch2_segmented_sort_helper.cuh
@@ -1388,11 +1388,11 @@ struct generate_edge_case_offsets_dispatch
   static constexpr int a_bunch_of = 42;
   static constexpr int a_lot_of   = 420;
 
-  int small_segment_max_segment_size;
-  int items_per_small_segment;
-  int medium_segment_max_segment_size;
-  int single_thread_segment_size;
-  int large_cached_segment_max_segment_size;
+  int small_segment_max_segment_size{};
+  int items_per_small_segment{};
+  int medium_segment_max_segment_size{};
+  int single_thread_segment_size{};
+  int large_cached_segment_max_segment_size{};
 
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION cudaError_t Invoke()
diff --git a/cub/test/catch2_test_device_for_each_in_extents.cu b/cub/test/catch2_test_device_for_each_in_extents.cu
index 6f11810101c..3e5a6c6689a 100644
--- a/cub/test/catch2_test_device_for_each_in_extents.cu
+++ b/cub/test/catch2_test_device_for_each_in_extents.cu
@@ -55,7 +55,7 @@ static auto fill_linear_impl(c2h::host_vector<T>& vector, const ExtentType&, siz
   _CCCL_TRAILING_REQUIRES(void)((Rank == ExtentType::rank()))
 {
   vector[pos++] = {indices...};
-  return void(); // Intel and nvc++ require a return statement
+  return void(); // nvc++ requires a return statement
 }
 
 template <int Rank = 0, typename T, typename ExtentType, typename... IndicesType>
@@ -67,7 +67,7 @@ static auto fill_linear_impl(c2h::host_vector<T>& vector, const ExtentType& ext,
   {
     fill_linear_impl<Rank + 1>(vector, ext, pos, indices..., i);
   }
-  return void(); // Intel and nvc++ require a return statement
+  return void(); // nvc++ requires a return statement
 }
 
 template <typename T, typename IndexType, size_t... Extents>
@@ -135,8 +135,8 @@ using dimensions =
                  cuda::std::index_sequence<3, 2, 5, 4>>;
 
 template <typename IndexType, size_t... Dimensions>
-auto build_static_extents(IndexType,
-                          cuda::std::index_sequence<Dimensions...>) -> cuda::std::extents<IndexType, Dimensions...>
+auto build_static_extents(IndexType, cuda::std::index_sequence<Dimensions...>)
+  -> cuda::std::extents<IndexType, Dimensions...>
 {
   return {};
 }
diff --git a/cub/test/catch2_test_device_three_way_partition.cu b/cub/test/catch2_test_device_three_way_partition.cu
index 8c6524adf7c..3b5f96c8d60 100644
--- a/cub/test/catch2_test_device_three_way_partition.cu
+++ b/cub/test/catch2_test_device_three_way_partition.cu
@@ -30,6 +30,10 @@
 
 #include <cub/device/device_partition.cuh>
 
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/tabulate_output_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
 #include <thrust/partition.h>
 #include <thrust/random.h>
 #include <thrust/reduce.h>
@@ -38,6 +42,8 @@
 
 #include <cuda/std/utility>
 
+#include "catch2_large_problem_helper.cuh"
+#include "catch2_test_device_select_common.cuh"
 #include "catch2_test_launch_helper.h"
 #include "cub/util_type.cuh"
 #include <c2h/catch2_test_helper.h>
@@ -48,20 +54,8 @@ DECLARE_LAUNCH_WRAPPER(cub::DevicePartition::If, partition);
 
 using types = c2h::type_list<std::int32_t, std::int64_t>;
 
-template <typename T>
-struct less_than_t
-{
-  T compare;
-
-  explicit __host__ less_than_t(T compare)
-      : compare(compare)
-  {}
-
-  __device__ bool operator()(const T& a) const
-  {
-    return a < compare;
-  }
-};
+// List of offset types to be used for testing large number of items
+using offset_types = c2h::type_list<std::int32_t, std::uint32_t, std::uint64_t>;
 
 template <typename T>
 struct equal_to_t
@@ -103,6 +97,29 @@ struct count_to_pair_t
   }
 };
 
+template <typename T>
+struct mod_equal_to
+{
+  T mod;
+  T val;
+  __host__ __device__ bool operator()(T x) const
+  {
+    return x % mod == val;
+  }
+};
+
+template <typename T>
+struct multiply_and_add
+{
+  T mul;
+  T add;
+
+  __host__ __device__ T operator()(T x) const
+  {
+    return x * mul + add;
+  }
+};
+
 C2H_TEST("Device three-way partition can handle empty problems", "[partition][device]", types)
 {
   using type = typename c2h::get<0, TestType>;
@@ -113,12 +130,15 @@ C2H_TEST("Device three-way partition can handle empty problems", "[partition][de
   type* d_first_part_out{};
   type* d_second_part_out{};
   type* d_unselected_out{};
-  type* d_num_selected_out{};
+  c2h::device_vector<type> num_selected_out{42, 42};
+  type* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data());
 
   less_than_t<type> le(type{0});
   greater_or_equal_t<type> ge(type{1});
 
   partition(in, d_first_part_out, d_second_part_out, d_unselected_out, d_num_selected_out, num_items, le, ge);
+  REQUIRE(num_selected_out[0] == 0);
+  REQUIRE(num_selected_out[1] == 0);
 }
 
 template <typename T>
@@ -440,3 +460,60 @@ C2H_TEST("Device three-way partition handles single output", "[partition][device
     second_part_val);
   REQUIRE(actual_num_items_in_second_part == num_items_in_second_part);
 }
+
+C2H_TEST("Device three-way partition works for very large number of items", "[device][partition]", offset_types)
+try
+{
+  using offset_t = typename c2h::get<0, TestType>;
+
+  auto num_items_max_ull =
+    std::min(static_cast<std::size_t>(::cuda::std::numeric_limits<offset_t>::max()),
+             ::cuda::std::numeric_limits<std::uint32_t>::max() + static_cast<std::size_t>(2000000ULL));
+  offset_t num_items_max = static_cast<offset_t>(num_items_max_ull);
+  offset_t num_items_min =
+    num_items_max_ull > 10000 ? static_cast<offset_t>(num_items_max_ull - 10000ULL) : offset_t{0};
+  offset_t num_items = GENERATE_COPY(
+    values(
+      {num_items_max, static_cast<offset_t>(num_items_max - 1), static_cast<offset_t>(1), static_cast<offset_t>(3)}),
+    take(2, random(num_items_min, num_items_max)));
+
+  auto in = thrust::make_counting_iterator(offset_t{0});
+
+  auto first_selector  = mod_equal_to<offset_t>{3, 0};
+  auto second_selector = mod_equal_to<offset_t>{3, 1};
+
+  offset_t expected_first  = num_items / offset_t{3} + (num_items % offset_t{3} >= 1);
+  offset_t expected_second = num_items / offset_t{3} + (num_items % offset_t{3} >= 2);
+  offset_t expected_third  = num_items / offset_t{3};
+
+  auto expected_first_it  = thrust::make_transform_iterator(in, multiply_and_add<offset_t>{3, 0});
+  auto expected_second_it = thrust::make_transform_iterator(in, multiply_and_add<offset_t>{3, 1});
+  auto expected_third_it  = thrust::make_transform_iterator(in, multiply_and_add<offset_t>{3, 2});
+
+  // Prepare tabulate output iterators to verify results in a memory-efficient way
+  auto check_first_partition_helper  = detail::large_problem_test_helper(expected_first);
+  auto check_first_it                = check_first_partition_helper.get_flagging_output_iterator(expected_first_it);
+  auto check_second_partition_helper = detail::large_problem_test_helper(expected_second);
+  auto check_second_it               = check_second_partition_helper.get_flagging_output_iterator(expected_second_it);
+  auto check_third_partition_helper  = detail::large_problem_test_helper(expected_third);
+  auto check_third_it                = check_third_partition_helper.get_flagging_output_iterator(expected_third_it);
+
+  // Needs to be device accessible
+  c2h::device_vector<offset_t> num_selected_out{0, 0};
+  offset_t* d_num_selected_out = thrust::raw_pointer_cast(num_selected_out.data());
+
+  // Run test
+  partition(
+    in, check_first_it, check_second_it, check_third_it, d_num_selected_out, num_items, first_selector, second_selector);
+
+  // Ensure that we created the correct output
+  REQUIRE(num_selected_out[0] == expected_first);
+  REQUIRE(num_selected_out[1] == expected_second);
+  check_first_partition_helper.check_all_results_correct();
+  check_second_partition_helper.check_all_results_correct();
+  check_third_partition_helper.check_all_results_correct();
+}
+catch (std::bad_alloc&)
+{
+  // Exceeding memory is not a failure.
+}
diff --git a/cub/test/catch2_test_device_transform.cu b/cub/test/catch2_test_device_transform.cu
index 06f2b7c31a7..95c4794b8cf 100644
--- a/cub/test/catch2_test_device_transform.cu
+++ b/cub/test/catch2_test_device_transform.cu
@@ -166,8 +166,8 @@ struct alignas(Alignment) overaligned_addable_t
     return a.value == b.value;
   }
 
-  _CCCL_HOST_DEVICE friend auto
-  operator+(const overaligned_addable_t& a, const overaligned_addable_t& b) -> overaligned_addable_t
+  _CCCL_HOST_DEVICE friend auto operator+(const overaligned_addable_t& a, const overaligned_addable_t& b)
+    -> overaligned_addable_t
   {
     check(a);
     check(b);
diff --git a/cub/test/catch2_test_nvrtc.cu b/cub/test/catch2_test_nvrtc.cu
index 01f39027ce0..71187ecc83a 100644
--- a/cub/test/catch2_test_nvrtc.cu
+++ b/cub/test/catch2_test_nvrtc.cu
@@ -31,6 +31,7 @@
 
 #include <c2h/catch2_test_helper.h>
 #include <nvrtc.h>
+#include <nvrtc_args.h>
 
 TEST_CASE("Test nvrtc", "[test][nvrtc]")
 {
diff --git a/cub/test/cmake/check_source_files.cmake b/cub/test/cmake/check_source_files.cmake
index 1554a2256e1..1fba8476f67 100644
--- a/cub/test/cmake/check_source_files.cmake
+++ b/cub/test/cmake/check_source_files.cmake
@@ -83,24 +83,6 @@ if (NOT valid_count EQUAL 5)
     "Matched ${valid_count} times, expected 5.")
 endif()
 
-################################################################################
-# Legacy macro checks.
-# Check all files in CUB to make sure that they aren't using the legacy
-# CUB_RUNTIME_ENABLED and __THRUST_HAS_CUDART__ macros.
-#
-# These macros depend on __CUDA_ARCH__ and are not compatible with NV_IF_TARGET.
-# They are provided for legacy purposes and should be replaced with
-# [THRUST|CUB]_RDC_ENABLED and NV_IF_TARGET in Thrust/CUB code.
-#
-#
-set(legacy_macro_header_exclusions
-  # This header defines a legacy CUDART macro:
-  cub/detail/detect_cuda_runtime.cuh
-)
-
-set(cub_legacy_macro_regex "CUB_RUNTIME_ENABLED")
-set(thrust_legacy_macro_regex "__THRUST_HAS_CUDART__")
-
 ################################################################################
 # Read source files:
 foreach(src ${cub_srcs})
@@ -156,21 +138,6 @@ foreach(src ${cub_srcs})
       set(found_errors 1)
     endif()
   endif()
-
-  if (NOT ${src} IN_LIST legacy_macro_header_exclusions)
-    count_substrings("${src_contents}" "${thrust_legacy_macro_regex}" thrust_count)
-    count_substrings("${src_contents}" "${cub_legacy_macro_regex}" cub_count)
-
-    if (NOT thrust_count EQUAL 0)
-      message("'${src}' uses __THRUST_HAS_CUDART__. Replace with THRUST_RDC_ENABLED and NV_IF_TARGET.")
-      set(found_errors 1)
-    endif()
-
-    if (NOT cub_count EQUAL 0)
-      message("'${src}' uses CUB_RUNTIME_ENABLED. Replace with CUB_RDC_ENABLED and NV_IF_TARGET.")
-      set(found_errors 1)
-    endif()
-  endif()
 endforeach()
 
 if (NOT found_errors EQUAL 0)
diff --git a/cub/test/cmake/nvrtc_args.h.in b/cub/test/cmake/nvrtc_args.h.in
new file mode 100644
index 00000000000..215804ad0f0
--- /dev/null
+++ b/cub/test/cmake/nvrtc_args.h.in
@@ -0,0 +1,6 @@
+#pragma once
+
+const char* NVRTC_CUB_PATH        = "-I@CMAKE_SOURCE_DIR@/cub";
+const char* NVRTC_THRUST_PATH     = "-I@CMAKE_SOURCE_DIR@/thrust";
+const char* NVRTC_LIBCUDACXX_PATH = "-I@CMAKE_SOURCE_DIR@/libcudacxx/include";
+const char* NVRTC_CTK_PATH        = "-I@CUDAToolkit_INCLUDE_DIRS@";
diff --git a/cub/test/test_block_radix_rank.cu b/cub/test/test_block_radix_rank.cu
index 8c1df1a80c7..c53c6b179e3 100644
--- a/cub/test/test_block_radix_rank.cu
+++ b/cub/test/test_block_radix_rank.cu
@@ -310,7 +310,7 @@ void Test()
   Test<cub::RadixRankAlgorithm::RADIX_RANK_BASIC, BlockThreads>();
   Test<cub::RadixRankAlgorithm::RADIX_RANK_MEMOIZE, BlockThreads>();
 
-  Test<BlockThreads>(cub::Int2Type<(BlockThreads % 32) == 0>{});
+  Test<BlockThreads>(cub::Int2Type < (BlockThreads % 32) == 0 > {});
 }
 
 int main(int argc, char** argv)
diff --git a/cub/test/test_grid_barrier.cu b/cub/test/test_grid_barrier.cu
index 2f5ecfa3ebb..e763b48d1e2 100644
--- a/cub/test/test_grid_barrier.cu
+++ b/cub/test/test_grid_barrier.cu
@@ -47,7 +47,9 @@ using namespace cub;
 /**
  * Kernel that iterates through the specified number of software global barriers
  */
-__global__ void Kernel(GridBarrier global_barrier, int iterations)
+_CCCL_SUPPRESS_DEPRECATED_PUSH
+__global__ void Kernel(GridBarrier global_barrier, int iterations) //
+  _CCCL_SUPPRESS_DEPRECATED_POP
 {
   for (int i = 0; i < iterations; i++)
   {
@@ -126,7 +128,9 @@ int main(int argc, char** argv)
   fflush(stdout);
 
   // Init global barrier
+  _CCCL_SUPPRESS_DEPRECATED_PUSH
   GridBarrierLifetime global_barrier;
+  _CCCL_SUPPRESS_DEPRECATED_POP
   global_barrier.Setup(grid_size);
 
   // Time kernel
diff --git a/cub/test/test_warning_suppression.cuh b/cub/test/test_warning_suppression.cuh
index 46c6080fed7..448230343f3 100644
--- a/cub/test/test_warning_suppression.cuh
+++ b/cub/test/test_warning_suppression.cuh
@@ -27,7 +27,6 @@
 
 #pragma once
 
-#include <cub/util_compiler.cuh>
 #include <cub/util_cpp_dialect.cuh>
 
 // C4127: conditional expression is constant
diff --git a/cudax/examples/stf/void_data_interface.cu b/cudax/examples/stf/void_data_interface.cu
index 72ac76e6fe1..9e7b5096cd7 100644
--- a/cudax/examples/stf/void_data_interface.cu
+++ b/cudax/examples/stf/void_data_interface.cu
@@ -23,14 +23,23 @@ int main()
 {
   context ctx;
 
-  auto ltask_res = ctx.logical_data(shape_of<void_interface>());
-  ctx.task(ltask_res.write())->*[](cudaStream_t, auto) {
+  auto token = ctx.logical_data(shape_of<void_interface>());
+  ctx.task(token.write())->*[](cudaStream_t, auto) {
 
   };
 
   void_interface sync;
-  auto ltask2_res = ctx.logical_data(sync);
-  ctx.task(ltask2_res.write(), ltask_res.read())->*[](cudaStream_t, auto, auto) {
+  auto token2 = ctx.logical_data(sync);
+
+  auto token3 = ctx.logical_token();
+  ctx.task(token2.write(), token.read())->*[](cudaStream_t, auto, auto) {
+
+  };
+
+  // Do not pass useless arguments by removing void_interface arguments
+  // Note that the rw() access is possible even if there was no prior write()
+  // or actual underlying data.
+  ctx.task(token3.rw(), token.read())->*[](cudaStream_t) {
 
   };
 
diff --git a/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh b/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh
index 459beddee22..ae8ad239d46 100644
--- a/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh
@@ -60,8 +60,8 @@ struct receiver_defaults
   }
 
   template <class _Rcvr>
-  _CUDAX_TRIVIAL_API static auto
-  set_stopped(__ignore, _Rcvr& __rcvr) noexcept -> __async::completion_signatures<__async::set_stopped_t()>
+  _CUDAX_TRIVIAL_API static auto set_stopped(__ignore, _Rcvr& __rcvr) noexcept
+    -> __async::completion_signatures<__async::set_stopped_t()>
   {
     __async::set_stopped(static_cast<_Rcvr&&>(__rcvr));
     return {};
@@ -198,15 +198,15 @@ _CUDAX_TRIVIAL_API auto __make_opstate(_Sndr __sndr, _Rcvr __rcvr)
 }
 
 template <class _Data, class... _Sndrs>
-_CUDAX_TRIVIAL_API auto
-__get_attrs(int, const _Data& __data, const _Sndrs&... __sndrs) noexcept -> decltype(__data.get_attrs(__sndrs...))
+_CUDAX_TRIVIAL_API auto __get_attrs(int, const _Data& __data, const _Sndrs&... __sndrs) noexcept
+  -> decltype(__data.get_attrs(__sndrs...))
 {
   return __data.get_attrs(__sndrs...);
 }
 
 template <class _Data, class... _Sndrs>
-_CUDAX_TRIVIAL_API auto
-__get_attrs(long, const _Data&, const _Sndrs&... __sndrs) noexcept -> decltype(__async::get_env(__sndrs...))
+_CUDAX_TRIVIAL_API auto __get_attrs(long, const _Data&, const _Sndrs&... __sndrs) noexcept
+  -> decltype(__async::get_env(__sndrs...))
 {
   return __async::get_env(__sndrs...);
 }
diff --git a/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh b/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh
index 25d5ef04d76..868c911b1da 100644
--- a/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh
@@ -76,48 +76,36 @@ template <class _Sig, template <class...> class _Vy, template <class...> class _
 using __transform_sig_t = decltype(__transform_sig<_Sig, _Vy, _Ey, _Sy>());
 
 template <class _Sigs,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 extern _DIAGNOSTIC<_Sigs> __transform_completion_signatures_v;
 
 template <class... _What,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 extern __fn_t<_ERROR<_What...>>*
   __transform_completion_signatures_v<_ERROR<_What...>, _Vy, _Ey, _Sy, _Variant, _More...>;
 
 template <class... _Sigs,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 extern __fn_t<_Variant<__transform_sig_t<_Sigs, _Vy, _Ey, _Sy>..., _More...>>*
   __transform_completion_signatures_v<completion_signatures<_Sigs...>, _Vy, _Ey, _Sy, _Variant, _More...>;
 
 template <class _Sigs,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 using __transform_completion_signatures =
   decltype(__transform_completion_signatures_v<_Sigs, _Vy, _Ey, _Sy, _Variant, _More...>());
@@ -129,12 +117,9 @@ template <>
 struct __gather_sigs_fn<set_value_t>
 {
   template <class _Sigs,
-            template <class...>
-            class _Then,
-            template <class...>
-            class _Else,
-            template <class...>
-            class _Variant,
+            template <class...> class _Then,
+            template <class...> class _Else,
+            template <class...> class _Variant,
             class... _More>
   using __call = __transform_completion_signatures<
     _Sigs,
@@ -149,12 +134,9 @@ template <>
 struct __gather_sigs_fn<set_error_t>
 {
   template <class _Sigs,
-            template <class...>
-            class _Then,
-            template <class...>
-            class _Else,
-            template <class...>
-            class _Variant,
+            template <class...> class _Then,
+            template <class...> class _Else,
+            template <class...> class _Variant,
             class... _More>
   using __call = __transform_completion_signatures<
     _Sigs,
@@ -169,12 +151,9 @@ template <>
 struct __gather_sigs_fn<set_stopped_t>
 {
   template <class _Sigs,
-            template <class...>
-            class _Then,
-            template <class...>
-            class _Else,
-            template <class...>
-            class _Variant,
+            template <class...> class _Then,
+            template <class...> class _Else,
+            template <class...> class _Variant,
             class... _More>
   using __call = __transform_completion_signatures<
     _Sigs,
@@ -187,12 +166,9 @@ struct __gather_sigs_fn<set_stopped_t>
 
 template <class _Sigs,
           class _WantedTag,
-          template <class...>
-          class _Then,
-          template <class...>
-          class _Else,
-          template <class...>
-          class _Variant,
+          template <class...> class _Then,
+          template <class...> class _Else,
+          template <class...> class _Variant,
           class... _More>
 using __gather_completion_signatures =
   typename __gather_sigs_fn<_WantedTag>::template __call<_Sigs, _Then, _Else, _Variant, _More...>;
@@ -404,13 +380,12 @@ template <class _Tag, class... _Args>
 auto completion(_Tag, _Args&&...) -> __csig::__sigs<_Tag(_Args...)>&;
 
 template <class _Sndr, class _Rcvr = receiver_archetype>
-auto completions_of(_Sndr&&,
-                    _Rcvr = {}) -> decltype(__csig::__to_sigs(__declval<completion_signatures_of_t<_Sndr, _Rcvr>&>()));
+auto completions_of(_Sndr&&, _Rcvr = {})
+  -> decltype(__csig::__to_sigs(__declval<completion_signatures_of_t<_Sndr, _Rcvr>&>()));
 
 template <bool _PotentiallyThrowing>
-auto eptr_completion_if()
-  -> _CUDA_VSTD::
-    conditional_t<_PotentiallyThrowing, __csig::__sigs<set_error_t(::std::exception_ptr)>, __csig::__sigs<>>&;
+auto eptr_completion_if() -> _CUDA_VSTD::
+  conditional_t<_PotentiallyThrowing, __csig::__sigs<set_error_t(::std::exception_ptr)>, __csig::__sigs<>>&;
 } // namespace meta
 } // namespace cuda::experimental::__async
 
diff --git a/cudax/include/cuda/experimental/__async/sender/continue_on.cuh b/cudax/include/cuda/experimental/__async/sender/continue_on.cuh
index 9a0c142e21c..8da87a443a3 100644
--- a/cudax/include/cuda/experimental/__async/sender/continue_on.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/continue_on.cuh
@@ -267,8 +267,8 @@ struct continue_on_t::__sndr_t
 };
 
 template <class _Sndr, class _Sch>
-_CUDAX_API auto
-continue_on_t::operator()(_Sndr __sndr, _Sch __sch) const noexcept -> continue_on_t::__sndr_t<_Sndr, _Sch>
+_CUDAX_API auto continue_on_t::operator()(_Sndr __sndr, _Sch __sch) const noexcept
+  -> continue_on_t::__sndr_t<_Sndr, _Sch>
 {
   return __sndr_t<_Sndr, _Sch>{{}, __sch, static_cast<_Sndr&&>(__sndr)};
 }
diff --git a/cudax/include/cuda/experimental/__async/sender/cpos.cuh b/cudax/include/cuda/experimental/__async/sender/cpos.cuh
index 7f1fb383a71..dab62e7ac10 100644
--- a/cudax/include/cuda/experimental/__async/sender/cpos.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/cpos.cuh
@@ -110,8 +110,8 @@ _CCCL_GLOBAL_CONSTANT struct set_error_t
 _CCCL_GLOBAL_CONSTANT struct set_stopped_t
 {
   template <class _Rcvr>
-  _CUDAX_TRIVIAL_API auto
-  operator()(_Rcvr&& __rcvr) const noexcept -> decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped())
+  _CUDAX_TRIVIAL_API auto operator()(_Rcvr&& __rcvr) const noexcept
+    -> decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped())
   {
     static_assert(_CUDA_VSTD::is_same_v<decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped()), void>);
     static_assert(noexcept(static_cast<_Rcvr&&>(__rcvr).set_stopped()));
@@ -119,8 +119,8 @@ _CCCL_GLOBAL_CONSTANT struct set_stopped_t
   }
 
   template <class _Rcvr>
-  _CUDAX_TRIVIAL_API auto
-  operator()(_Rcvr* __rcvr) const noexcept -> decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped())
+  _CUDAX_TRIVIAL_API auto operator()(_Rcvr* __rcvr) const noexcept
+    -> decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped())
   {
     static_assert(_CUDA_VSTD::is_same_v<decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped()), void>);
     static_assert(noexcept(static_cast<_Rcvr&&>(*__rcvr).set_stopped()));
diff --git a/cudax/include/cuda/experimental/__async/sender/let_value.cuh b/cudax/include/cuda/experimental/__async/sender/let_value.cuh
index 7d06e071fe0..6742a1c1d6c 100644
--- a/cudax/include/cuda/experimental/__async/sender/let_value.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/let_value.cuh
@@ -243,8 +243,9 @@ private:
     _Sndr __sndr_;
 
     template <class _Rcvr>
-    _CUDAX_API auto connect(_Rcvr __rcvr) && noexcept(
-      __nothrow_constructible<__opstate_t<_Rcvr, _Sndr, _Fn>, _Sndr, _Fn, _Rcvr>) -> __opstate_t<_Rcvr, _Sndr, _Fn>
+    _CUDAX_API auto
+    connect(_Rcvr __rcvr) && noexcept(__nothrow_constructible<__opstate_t<_Rcvr, _Sndr, _Fn>, _Sndr, _Fn, _Rcvr>)
+      -> __opstate_t<_Rcvr, _Sndr, _Fn>
     {
       return __opstate_t<_Rcvr, _Sndr, _Fn>(
         static_cast<_Sndr&&>(__sndr_), static_cast<_Fn&&>(__fn_), static_cast<_Rcvr&&>(__rcvr));
diff --git a/cudax/include/cuda/experimental/__async/sender/stop_token.cuh b/cudax/include/cuda/experimental/__async/sender/stop_token.cuh
index 35e6d4d164a..693816dbb45 100644
--- a/cudax/include/cuda/experimental/__async/sender/stop_token.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/stop_token.cuh
@@ -369,8 +369,8 @@ _CUDAX_API inline void inplace_stop_source::__unlock(uint8_t __old_state) const
   (void) __state_.store(__old_state, _CUDA_VSTD::memory_order_release);
 }
 
-_CUDAX_API inline auto
-inplace_stop_source::__try_lock_unless_stop_requested(bool __set_stop_requested) const noexcept -> bool
+_CUDAX_API inline auto inplace_stop_source::__try_lock_unless_stop_requested(bool __set_stop_requested) const noexcept
+  -> bool
 {
   __stok::__spin_wait __spin;
   auto __old_state = __state_.load(_CUDA_VSTD::memory_order_relaxed);
diff --git a/cudax/include/cuda/experimental/__async/sender/tuple.cuh b/cudax/include/cuda/experimental/__async/sender/tuple.cuh
index 98a1d0997f1..0229ed8b9c7 100644
--- a/cudax/include/cuda/experimental/__async/sender/tuple.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/tuple.cuh
@@ -65,8 +65,8 @@ struct __tupl<_CUDA_VSTD::index_sequence<_Idx...>, _Ts...> : __box<_Idx, _Ts>...
 
   template <class _Fn, class _Self, class... _Us>
   _CUDAX_TRIVIAL_API static auto __for_each(_Fn&& __fn, _Self&& __self, _Us&&... __us) //
-    noexcept((__nothrow_callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>>
-              && ...)) -> _CUDA_VSTD::enable_if_t<(__callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)>
+    noexcept((__nothrow_callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...))
+      -> _CUDA_VSTD::enable_if_t<(__callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)>
   {
     return (
       static_cast<_Fn&&>(__fn)(static_cast<_Us&&>(__us)..., static_cast<_Self&&>(__self).__box<_Idx, _Ts>::__value_),
diff --git a/cudax/include/cuda/experimental/__detail/config.cuh b/cudax/include/cuda/experimental/__detail/config.cuh
index 632e689f564..2ac25eb0a3b 100644
--- a/cudax/include/cuda/experimental/__detail/config.cuh
+++ b/cudax/include/cuda/experimental/__detail/config.cuh
@@ -35,15 +35,9 @@
 // two attributes:
 // - `_CUDAX_API` declares the function host/device and hides the symbol from the ABI
 // - `_CUDAX_TRIVIAL_API` does the same while also forcing inlining and hiding the function from debuggers
-#if _CCCL_COMPILER(ICC) // ICC has issues with visibility attributes on symbols with internal linkage
-#  define _CUDAX_API        _CCCL_HOST_DEVICE
-#  define _CUDAX_HOST_API   _CCCL_HOST
-#  define _CUDAX_DEVICE_API _CCCL_DEVICE
-#else // ^^^ _CCCL_COMPILER(ICC) ^^^ / vvv !_CCCL_COMPILER(ICC) vvv
-#  define _CUDAX_API        _CCCL_HOST_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
-#  define _CUDAX_HOST_API   _CCCL_HOST _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
-#  define _CUDAX_DEVICE_API _CCCL_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
-#endif // !_CCCL_COMPILER(ICC)
+#define _CUDAX_API        _CCCL_HOST_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
+#define _CUDAX_HOST_API   _CCCL_HOST _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
+#define _CUDAX_DEVICE_API _CCCL_DEVICE _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
 
 // _CUDAX_TRIVIAL_API force-inlines a function, marks its visibility as hidden, and causes debuggers to skip it.
 // This is useful for trivial internal functions that do dispatching or other plumbing work. It is particularly
diff --git a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
index 8a42bab40ca..0e1dceff19b 100644
--- a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
@@ -80,8 +80,8 @@ struct __with_property
   template <class...>
   struct __iproperty : interface<__iproperty>
   {
-    _CUDAX_HOST_API friend auto
-    get_property([[maybe_unused]] const __iproperty& __obj, _Property) -> __property_result_t<_Property>
+    _CUDAX_HOST_API friend auto get_property([[maybe_unused]] const __iproperty& __obj, _Property)
+      -> __property_result_t<_Property>
     {
       if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
       {
@@ -268,8 +268,8 @@ template <class _Derived>
 struct __with_try_get_property
 {
   template <class _Property>
-  _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto
-  try_get_property(const _Derived& __self, _Property) noexcept -> __try_property_result_t<_Property>
+  _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto try_get_property(const _Derived& __self, _Property) noexcept
+    -> __try_property_result_t<_Property>
   {
     auto __prop = __cudax::dynamic_any_cast<const __iproperty<_Property>*>(&__self);
     if constexpr (_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
diff --git a/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh b/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
index aefaa699f21..8a1a8a41168 100644
--- a/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
+++ b/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
@@ -27,6 +27,7 @@
 
 #include <cuda/experimental/__stf/graph/graph_task.cuh>
 #include <cuda/experimental/__stf/graph/interfaces/slice.cuh>
+#include <cuda/experimental/__stf/graph/interfaces/void_interface.cuh>
 #include <cuda/experimental/__stf/internal/acquire_release.cuh>
 #include <cuda/experimental/__stf/internal/backend_allocator_setup.cuh>
 #include <cuda/experimental/__stf/internal/launch.cuh>
diff --git a/cudax/include/cuda/experimental/__stf/graph/graph_task.cuh b/cudax/include/cuda/experimental/__stf/graph/graph_task.cuh
index a435c85126e..c64d52437c4 100644
--- a/cudax/include/cuda/experimental/__stf/graph/graph_task.cuh
+++ b/cudax/include/cuda/experimental/__stf/graph/graph_task.cuh
@@ -31,6 +31,7 @@
 #include <cuda/experimental/__stf/internal/backend_ctx.cuh> // graph_task<> has-a backend_ctx_untyped
 #include <cuda/experimental/__stf/internal/frozen_logical_data.cuh>
 #include <cuda/experimental/__stf/internal/logical_data.cuh>
+#include <cuda/experimental/__stf/internal/void_interface.cuh>
 
 namespace cuda::experimental::stf
 {
@@ -508,8 +509,12 @@ public:
       dot.template add_vertex<task, logical_data_untyped>(*this);
     }
 
+    constexpr bool fun_invocable_stream_deps = ::std::is_invocable_v<Fun, cudaStream_t, Deps...>;
+    constexpr bool fun_invocable_stream_non_void_deps =
+      reserved::is_invocable_with_filtered<Fun, cudaStream_t, Deps...>::value;
+
     // Default for the first argument is a `cudaStream_t`.
-    if constexpr (::std::is_invocable_v<Fun, cudaStream_t, Deps...>)
+    if constexpr (fun_invocable_stream_deps || fun_invocable_stream_non_void_deps)
     {
       //
       // CAPTURE the lambda
@@ -522,7 +527,16 @@ public:
       cuda_safe_call(cudaStreamBeginCapture(capture_stream, cudaStreamCaptureModeThreadLocal));
 
       // Launch the user provided function
-      ::std::apply(f, tuple_prepend(mv(capture_stream), typed_deps()));
+      if constexpr (fun_invocable_stream_deps)
+      {
+        ::std::apply(f, tuple_prepend(mv(capture_stream), typed_deps()));
+      }
+      else if constexpr (fun_invocable_stream_non_void_deps)
+      {
+        // Remove void arguments
+        ::std::apply(::std::forward<Fun>(f),
+                     tuple_prepend(mv(capture_stream), reserved::remove_void_interface_types(typed_deps())));
+      }
 
       cuda_safe_call(cudaStreamEndCapture(capture_stream, &childGraph));
 
@@ -534,7 +548,12 @@ public:
     }
     else
     {
-      static_assert(::std::is_invocable_v<Fun, cudaGraph_t, Deps...>, "Incorrect lambda function signature.");
+      constexpr bool fun_invocable_graph_deps = ::std::is_invocable_v<Fun, cudaGraph_t, Deps...>;
+      constexpr bool fun_invocable_graph_non_void_deps =
+        reserved::is_invocable_with_filtered<Fun, cudaGraph_t, Deps...>::value;
+
+      static_assert(fun_invocable_graph_deps || fun_invocable_graph_non_void_deps,
+                    "Incorrect lambda function signature.");
       //
       // Give the lambda a child graph
       //
diff --git a/cudax/include/cuda/experimental/__stf/graph/interfaces/void_interface.cuh b/cudax/include/cuda/experimental/__stf/graph/interfaces/void_interface.cuh
new file mode 100644
index 00000000000..12f36d06c16
--- /dev/null
+++ b/cudax/include/cuda/experimental/__stf/graph/interfaces/void_interface.cuh
@@ -0,0 +1,114 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDASTF in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+/**
+ * @file
+ *
+ * @brief This implements a void data interface over the graph_ctx backend
+ */
+
+#pragma once
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/experimental/__stf/graph/graph_data_interface.cuh>
+#include <cuda/experimental/__stf/internal/void_interface.cuh>
+
+namespace cuda::experimental::stf
+{
+
+template <typename T>
+struct graphed_interface_of;
+
+/**
+ * @brief Data interface to manipulate the void interface in the CUDA graph backend
+ */
+class void_graph_interface : public graph_data_interface<void_interface>
+{
+public:
+  /// @brief Alias for the base class
+  using base = graph_data_interface<void_interface>;
+  /// @brief Alias for the shape type
+  using base::shape_t;
+
+  void_graph_interface(void_interface s)
+      : base(mv(s))
+  {}
+  void_graph_interface(shape_of<void_interface> s)
+      : base(mv(s))
+  {}
+
+  void data_allocate(
+    backend_ctx_untyped&,
+    block_allocator_untyped&,
+    const data_place&,
+    instance_id_t,
+    ::std::ptrdiff_t& s,
+    void**,
+    event_list&) override
+  {
+    s = 0;
+  }
+
+  void data_deallocate(
+    backend_ctx_untyped&, block_allocator_untyped&, const data_place&, instance_id_t, void*, event_list&) final
+  {}
+
+  cudaGraphNode_t graph_data_copy(
+    cudaMemcpyKind,
+    instance_id_t,
+    instance_id_t,
+    cudaGraph_t graph,
+    const cudaGraphNode_t* input_nodes,
+    size_t input_cnt) override
+  {
+    cudaGraphNode_t dummy;
+    cuda_safe_call(cudaGraphAddEmptyNode(&dummy, graph, input_nodes, input_cnt));
+    return dummy;
+  }
+
+  bool pin_host_memory(instance_id_t) override
+  {
+    // no-op
+    return false;
+  }
+
+  void unpin_host_memory(instance_id_t) override {}
+
+  /* This helps detecting when we are manipulating a void data interface, so
+   * that we can optimize useless stages such as allocations or copies */
+  bool is_void_interface() const override final
+  {
+    return true;
+  }
+};
+
+/**
+ * @brief Define how the CUDA stream backend must manipulate this void interface
+ *
+ * Note that we specialize cuda::experimental::stf::shape_of to avoid ambiguous specialization
+ *
+ * @extends graphed_interface_of
+ */
+template <>
+struct graphed_interface_of<void_interface>
+{
+  using type = void_graph_interface;
+};
+
+} // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/internal/backend_ctx.cuh b/cudax/include/cuda/experimental/__stf/internal/backend_ctx.cuh
index 119c4e52d40..15d05f1f894 100644
--- a/cudax/include/cuda/experimental/__stf/internal/backend_ctx.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/backend_ctx.cuh
@@ -39,6 +39,7 @@
 #include <cuda/experimental/__stf/internal/slice.cuh> // backend_ctx<T> uses shape_of
 #include <cuda/experimental/__stf/internal/task_state.cuh> // backend_ctx_untyped::impl has-a ctx_stack
 #include <cuda/experimental/__stf/internal/thread_hierarchy.cuh>
+#include <cuda/experimental/__stf/internal/void_interface.cuh>
 #include <cuda/experimental/__stf/localization/composite_slice.cuh>
 
 // XXX there is currently a dependency on this header for places.h
@@ -195,7 +196,22 @@ public:
       {
         delete w;
       };
-      ::std::apply(::std::forward<Fun>(w->first), mv(w->second));
+
+      constexpr bool fun_invocable_task_deps = reserved::is_tuple_invocable_v<Fun, decltype(payload)>;
+      constexpr bool fun_invocable_task_non_void_deps =
+        reserved::is_tuple_invocable_with_filtered<Fun, decltype(payload)>::value;
+
+      static_assert(fun_invocable_task_deps || fun_invocable_task_non_void_deps,
+                    "Incorrect lambda function signature in host_launch.");
+
+      if constexpr (fun_invocable_task_deps)
+      {
+        ::std::apply(::std::forward<Fun>(w->first), mv(w->second));
+      }
+      else if constexpr (fun_invocable_task_non_void_deps)
+      {
+        ::std::apply(::std::forward<Fun>(w->first), reserved::remove_void_interface_types(mv(w->second)));
+      }
     };
 
     if constexpr (::std::is_same_v<Ctx, graph_ctx>)
@@ -1067,6 +1083,16 @@ public:
     return logical_data(make_slice(p, n), mv(dplace));
   }
 
+  auto logical_token()
+  {
+    // We do not use a shape because we want the first rw() access to succeed
+    // without an initial write()
+    //
+    // Note that we do not disable write back as the write-back mechanism is
+    // handling void_interface specifically to ignore it anyway.
+    return logical_data(void_interface{});
+  }
+
   template <typename T>
   frozen_logical_data<T> freeze(cuda::experimental::stf::logical_data<T> d,
                                 access_mode m    = access_mode::read,
diff --git a/cudax/include/cuda/experimental/__stf/internal/data_interface.cuh b/cudax/include/cuda/experimental/__stf/internal/data_interface.cuh
index 1caf710496b..c2d91313451 100644
--- a/cudax/include/cuda/experimental/__stf/internal/data_interface.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/data_interface.cuh
@@ -296,6 +296,15 @@ public:
     return tp.find_data_instance_id(d);
   }
 
+  /**
+   * @brief Indicates whether this is a void data interface, which permits to
+   * skip some operations to allocate or move data for example
+   */
+  virtual bool is_void_interface() const
+  {
+    return false;
+  }
+
 private:
   /**
    * @brief Get the common implementation of the data interface.
diff --git a/cudax/include/cuda/experimental/__stf/internal/launch.cuh b/cudax/include/cuda/experimental/__stf/internal/launch.cuh
index 2b50d480fff..61b01525093 100644
--- a/cudax/include/cuda/experimental/__stf/internal/launch.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/launch.cuh
@@ -451,21 +451,17 @@ public:
     }
 
     size_t p_rank = 0;
-    if constexpr (::std::is_same_v<Ctx, stream_ctx>)
+    for (auto p : e_place)
     {
-      for (auto p : e_place)
+      if constexpr (::std::is_same_v<Ctx, stream_ctx>)
       {
         reserved::launch_impl(interpreted_policy, p, f, args, t.get_stream(p_rank), p_rank);
-        p_rank++;
       }
-    }
-    else
-    {
-      for (auto p : e_place)
+      else
       {
         reserved::graph_launch_impl(t, interpreted_policy, p, f, args, p_rank);
-        p_rank++;
       }
+      p_rank++;
     }
   }
 
diff --git a/cudax/include/cuda/experimental/__stf/internal/logical_data.cuh b/cudax/include/cuda/experimental/__stf/internal/logical_data.cuh
index 5858a809f46..ee01f53a90b 100644
--- a/cudax/include/cuda/experimental/__stf/internal/logical_data.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/logical_data.cuh
@@ -467,6 +467,12 @@ public:
     return dinterface != nullptr;
   }
 
+  bool is_void_interface() const
+  {
+    _CCCL_ASSERT(has_interface(), "uninitialized logical data");
+    return dinterface->is_void_interface();
+  }
+
   bool has_ref() const
   {
     assert(refcnt.load() >= 0);
@@ -1255,6 +1261,15 @@ public:
     return pimpl->dinterface != nullptr;
   }
 
+  /**
+   * @brief Returns true if the data is a void data interface
+   */
+  bool is_void_interface() const
+  {
+    assert(pimpl);
+    return pimpl->is_void_interface();
+  }
+
   // This function applies the reduction operator over 2 instances, the one
   // identified by "in_instance_id" is not modified, the one identified as
   // "inout_instance_id" is where the result is put.
@@ -1727,7 +1742,7 @@ inline void reserved::logical_data_untyped_impl::erase()
 
   /* If there is a reference instance id, it needs to be updated with a
    * valid copy if that is not the case yet */
-  if (enable_write_back)
+  if (enable_write_back && !is_void_interface())
   {
     instance_id_t ref_id = reference_instance_id;
     assert(ref_id != instance_id_t::invalid);
@@ -2032,7 +2047,7 @@ inline void fetch_data(
 {
   event_list stf_prereq = reserved::enforce_stf_deps_before(ctx, d, instance_id, t, mode, eplace);
 
-  if (d.has_interface())
+  if (d.has_interface() && !d.is_void_interface())
   {
     // Allocate data if needed (and possibly reclaim memory to do so)
     reserved::dep_allocate(ctx, d, mode, dplace, eplace, instance_id, stf_prereq);
diff --git a/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh b/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh
index 21e5ac7fb0f..06f1f7f689b 100644
--- a/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh
@@ -916,7 +916,7 @@ public:
     const size_t n = shape.size();
 
     // Tuple <tuple<instances...>, size_t , fun, shape>
-    using args_t = ::std::tuple<deps_tup_t, size_t, Fun&&, sub_shape_t>;
+    using args_t = ::std::tuple<deps_tup_t, size_t, Fun, sub_shape_t>;
 
     // Create a tuple with all instances (eg. tuple<slice<double>, slice<int>>)
     deps_tup_t instances = ::std::apply(
@@ -940,7 +940,7 @@ public:
 
       auto& data               = ::std::get<0>(*p);
       const size_t n           = ::std::get<1>(*p);
-      Fun&& f                  = mv(::std::get<2>(*p));
+      Fun& f                   = ::std::get<2>(*p);
       const sub_shape_t& shape = ::std::get<3>(*p);
 
       // deps_ops_t are pairs of data instance type, and a reduction operator,
diff --git a/cudax/include/cuda/experimental/__stf/internal/void_interface.cuh b/cudax/include/cuda/experimental/__stf/internal/void_interface.cuh
index 5a557ad1a29..e238bff7641 100644
--- a/cudax/include/cuda/experimental/__stf/internal/void_interface.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/void_interface.cuh
@@ -11,7 +11,7 @@
 /**
  * @file
  *
- * @brief This implements a void data interface useful to implement STF
+ * @brief This defines a void data interface useful to implement STF
  * dependencies without actual data (e.g. to enforce task dependencies)
  */
 
@@ -27,8 +27,6 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/experimental/__stf/graph/graph_data_interface.cuh>
-#include <cuda/experimental/__stf/stream/stream_data_interface.cuh>
 #include <cuda/experimental/__stf/utility/hash.cuh>
 
 namespace cuda::experimental::stf
@@ -37,12 +35,6 @@ namespace cuda::experimental::stf
 template <typename T>
 class shape_of;
 
-template <typename T>
-struct streamed_interface_of;
-
-template <typename T>
-struct graphed_interface_of;
-
 class void_interface
 {};
 
@@ -71,136 +63,123 @@ public:
 };
 
 /**
- * @brief Data interface to manipulate the void interface in the CUDA stream backend
+ * @brief A hash of the matrix
  */
-class void_stream_interface : public stream_data_interface_simple<void_interface>
+template <>
+struct hash<void_interface>
 {
-public:
-  using base = stream_data_interface_simple<void_interface>;
-  using base::shape_t;
+  ::std::size_t operator()(void_interface const&) const noexcept
+  {
+    return 42;
+  }
+};
 
-  void_stream_interface(void_interface m)
-      : base(::std::move(m))
-  {}
-  void_stream_interface(typename base::shape_t s)
-      : base(s)
-  {}
+namespace reserved
+{
 
-  /// Copy the content of an instance to another instance : this is a no-op
-  void stream_data_copy(const data_place&, instance_id_t, const data_place&, instance_id_t, cudaStream_t) override {}
+template <typename... Ts>
+struct remove_void_interface
+{
+  using type = ::std::tuple<>;
+};
 
-  /// Pretend we allocate an instance on a specific data place : we do not do any allocation here
-  void stream_data_allocate(
-    backend_ctx_untyped&, const data_place&, instance_id_t, ::std::ptrdiff_t& s, void**, cudaStream_t) override
-  {
-    // By filling a non negative number, we notify that the allocation was successful
-    s = 0;
-  }
+template <typename T, typename... Ts>
+struct remove_void_interface<T, Ts...>
+{
+private:
+  using tail = typename remove_void_interface<Ts...>::type;
 
-  /// Pretend we deallocate an instance (no-op)
-  void stream_data_deallocate(backend_ctx_untyped&, const data_place&, instance_id_t, void*, cudaStream_t) override {}
+  // If T is void_interface, skip it, otherwise prepend it to tail
+  using filtered =
+    std::conditional_t<::std::is_same_v<T, void_interface>,
+                       tail,
+                       decltype(::std::tuple_cat(::std::declval<::std::tuple<T>>(), ::std::declval<tail>()))>;
 
-  bool pin_host_memory(instance_id_t) override
-  {
-    // no-op
-    return false;
-  }
+public:
+  using type = filtered;
+};
 
-  void unpin_host_memory(instance_id_t) override {}
+template <typename... Ts>
+using remove_void_interface_t = typename remove_void_interface<Ts...>::type;
+
+template <typename T>
+struct remove_void_interface_from_tuple
+{
+  // By default, if T is not a std::tuple, do nothing special
+  using type = T;
 };
 
-/**
- * @brief Define how the CUDA stream backend must manipulate this void interface
- *
- * Note that we specialize cuda::experimental::stf::shape_of to avoid ambiguous specialization
- *
- * @extends streamed_interface_of
- */
-template <>
-struct streamed_interface_of<void_interface>
+template <typename... Ts>
+struct remove_void_interface_from_tuple<::std::tuple<Ts...>>
 {
-  using type = void_stream_interface;
+  using type = remove_void_interface_t<Ts...>;
 };
 
+template <typename T>
+using remove_void_interface_from_tuple_t = typename remove_void_interface_from_tuple<T>::type;
+
 /**
- * @brief Data interface to manipulate the void interface in the CUDA graph backend
+ * @brief Check if a function can be invoked while eliding arguments with a void_interface type.
  */
-class void_graph_interface : public graph_data_interface<void_interface>
+template <typename Fun, typename... Data>
+struct is_invocable_with_filtered
 {
-public:
-  /// @brief Alias for the base class
-  using base = graph_data_interface<void_interface>;
-  /// @brief Alias for the shape type
-  using base::shape_t;
-
-  void_graph_interface(void_interface s)
-      : base(mv(s))
-  {}
-  void_graph_interface(shape_of<void_interface> s)
-      : base(mv(s))
-  {}
-
-  void data_allocate(
-    backend_ctx_untyped&,
-    block_allocator_untyped&,
-    const data_place&,
-    instance_id_t,
-    ::std::ptrdiff_t& s,
-    void**,
-    event_list&) override
+private:
+  template <typename F, typename... Args>
+  static auto test(int) -> ::std::bool_constant<::std::is_invocable_v<F, Args...>>
   {
-    s = 0;
+    return {};
   }
 
-  void data_deallocate(
-    backend_ctx_untyped&, block_allocator_untyped&, const data_place&, instance_id_t, void*, event_list&) final
-  {}
-
-  cudaGraphNode_t graph_data_copy(
-    cudaMemcpyKind,
-    instance_id_t,
-    instance_id_t,
-    cudaGraph_t graph,
-    const cudaGraphNode_t* input_nodes,
-    size_t input_cnt) override
+  template <typename F>
+  static auto test(...) -> ::std::false_type
   {
-    cudaGraphNode_t dummy;
-    cuda_safe_call(cudaGraphAddEmptyNode(&dummy, graph, input_nodes, input_cnt));
-    return dummy;
+    return {};
   }
 
-  bool pin_host_memory(instance_id_t) override
+  template <::std::size_t... Idx>
+  static auto check(::std::index_sequence<Idx...>)
   {
-    // no-op
-    return false;
+    using filtered = remove_void_interface_t<Data...>;
+    return test<Fun, ::std::tuple_element_t<Idx, filtered>...>(0);
   }
 
-  void unpin_host_memory(instance_id_t) override {}
+public:
+  static constexpr bool value =
+    decltype(check(::std::make_index_sequence<::std::tuple_size_v<remove_void_interface_t<Data...>>>{}))::value;
 };
 
 /**
- * @brief Define how the CUDA stream backend must manipulate this void interface
- *
- * Note that we specialize cuda::experimental::stf::shape_of to avoid ambiguous specialization
- *
- * @extends graphed_interface_of
+ * @brief Check if a function can be invoked using std::apply while eliding tuple arguments with a void_interface type.
  */
-template <>
-struct graphed_interface_of<void_interface>
-{
-  using type = void_graph_interface;
-};
+template <typename F, typename Tuple>
+struct is_tuple_invocable_with_filtered : is_tuple_invocable<F, remove_void_interface_from_tuple_t<Tuple>>
+{};
 
 /**
- * @brief A hash of the matrix
+ * @brief Strip tuple entries with a "void_interface" type
  */
-template <>
-struct hash<void_interface>
+template <typename... Ts>
+auto remove_void_interface_types(const ::std::tuple<Ts...>& tpl)
 {
-  ::std::size_t operator()(void_interface const&) const noexcept
-  {
-    return 42;
-  }
-};
+  return ::std::apply(
+    [](auto&&... args) {
+      auto filter_one = [](auto&& arg) {
+        using T = ::std::decay_t<decltype(arg)>;
+        if constexpr (::std::is_same_v<T, void_interface>)
+        {
+          return ::std::tuple<>{};
+        }
+        else
+        {
+          return ::std::tuple<T>(::std::forward<decltype(arg)>(arg));
+        }
+      };
+      return ::std::tuple_cat(filter_one(::std::forward<decltype(args)>(args))...);
+    },
+    tpl);
+}
+
+} // end namespace reserved
 
 } // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/stream/interfaces/void_interface.cuh b/cudax/include/cuda/experimental/__stf/stream/interfaces/void_interface.cuh
new file mode 100644
index 00000000000..78ca63d5aab
--- /dev/null
+++ b/cudax/include/cuda/experimental/__stf/stream/interfaces/void_interface.cuh
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDASTF in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+/**
+ * @file
+ *
+ * @brief This implements the void data interface in the stream_ctx backend
+ */
+
+#pragma once
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/experimental/__stf/internal/void_interface.cuh>
+#include <cuda/experimental/__stf/stream/stream_data_interface.cuh>
+
+namespace cuda::experimental::stf
+{
+
+template <typename T>
+struct streamed_interface_of;
+
+/**
+ * @brief Data interface to manipulate the void interface in the CUDA stream backend
+ */
+class void_stream_interface : public stream_data_interface_simple<void_interface>
+{
+public:
+  using base = stream_data_interface_simple<void_interface>;
+  using base::shape_t;
+
+  void_stream_interface(void_interface m)
+      : base(::std::move(m))
+  {}
+  void_stream_interface(typename base::shape_t s)
+      : base(s)
+  {}
+
+  /// Copy the content of an instance to another instance : this is a no-op
+  void stream_data_copy(const data_place&, instance_id_t, const data_place&, instance_id_t, cudaStream_t) override {}
+
+  /// Pretend we allocate an instance on a specific data place : we do not do any allocation here
+  void stream_data_allocate(
+    backend_ctx_untyped&, const data_place&, instance_id_t, ::std::ptrdiff_t& s, void**, cudaStream_t) override
+  {
+    // By filling a non negative number, we notify that the allocation was successful
+    s = 0;
+  }
+
+  /// Pretend we deallocate an instance (no-op)
+  void stream_data_deallocate(backend_ctx_untyped&, const data_place&, instance_id_t, void*, cudaStream_t) override {}
+
+  bool pin_host_memory(instance_id_t) override
+  {
+    // no-op
+    return false;
+  }
+
+  void unpin_host_memory(instance_id_t) override {}
+
+  /* This helps detecting when we are manipulating a void data interface, so
+   * that we can optimize useless stages such as allocations or copies */
+  bool is_void_interface() const override final
+  {
+    return true;
+  }
+};
+
+/**
+ * @brief Define how the CUDA stream backend must manipulate this void interface
+ *
+ * Note that we specialize cuda::experimental::stf::shape_of to avoid ambiguous specialization
+ *
+ * @extends streamed_interface_of
+ */
+template <>
+struct streamed_interface_of<void_interface>
+{
+  using type = void_stream_interface;
+};
+
+} // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh b/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
index 7416fb922ab..3a729bdb2c2 100644
--- a/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
+++ b/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
@@ -34,6 +34,7 @@
 #include <cuda/experimental/__stf/internal/reorderer.cuh>
 #include <cuda/experimental/__stf/places/blocked_partition.cuh> // for unit test!
 #include <cuda/experimental/__stf/stream/interfaces/slice.cuh> // For implicit logical_data_untyped constructors
+#include <cuda/experimental/__stf/stream/interfaces/void_interface.cuh>
 #include <cuda/experimental/__stf/stream/stream_task.cuh>
 
 namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/stream/stream_task.cuh b/cudax/include/cuda/experimental/__stf/stream/stream_task.cuh
index 540d3b0424d..348136778b7 100644
--- a/cudax/include/cuda/experimental/__stf/stream/stream_task.cuh
+++ b/cudax/include/cuda/experimental/__stf/stream/stream_task.cuh
@@ -29,6 +29,7 @@
 
 #include <cuda/experimental/__stf/internal/frozen_logical_data.cuh>
 #include <cuda/experimental/__stf/internal/logical_data.cuh>
+#include <cuda/experimental/__stf/internal/void_interface.cuh>
 #include <cuda/experimental/__stf/stream/internal/event_types.cuh>
 
 #include <deque>
@@ -593,12 +594,31 @@ public:
       auto t = tuple_prepend(get_stream(), typed_deps());
       return ::std::apply(::std::forward<Fun>(fun), t);
     }
+    else if constexpr (reserved::is_invocable_with_filtered<Fun, cudaStream_t, Data...>::value)
+    {
+      // Use the filtered tuple
+      auto t = tuple_prepend(get_stream(), reserved::remove_void_interface_types(typed_deps()));
+      return ::std::apply(::std::forward<Fun>(fun), t);
+    }
     else
     {
+      constexpr bool fun_invocable_task_deps = ::std::is_invocable_v<Fun, decltype(*this), Data...>;
+      constexpr bool fun_invocable_task_non_void_deps =
+        reserved::is_invocable_with_filtered<Fun, decltype(*this), Data...>::value;
+
       // Invoke passing `*this` as the first argument, followed by the slices
-      static_assert(::std::is_invocable_v<Fun, decltype(*this), Data...>, "Incorrect lambda function signature.");
-      auto t = tuple_prepend(*this, typed_deps());
-      return ::std::apply(::std::forward<Fun>(fun), t);
+      static_assert(fun_invocable_task_deps || fun_invocable_task_non_void_deps,
+                    "Incorrect lambda function signature.");
+
+      if constexpr (fun_invocable_task_deps)
+      {
+        return ::std::apply(::std::forward<Fun>(fun), tuple_prepend(*this, typed_deps()));
+      }
+      else if constexpr (fun_invocable_task_non_void_deps)
+      {
+        return ::std::apply(::std::forward<Fun>(fun),
+                            tuple_prepend(*this, reserved::remove_void_interface_types(typed_deps())));
+      }
     }
   }
 
diff --git a/cudax/include/cuda/experimental/__stf/utility/traits.cuh b/cudax/include/cuda/experimental/__stf/utility/traits.cuh
index 8308e56d702..a30596f7bde 100644
--- a/cudax/include/cuda/experimental/__stf/utility/traits.cuh
+++ b/cudax/include/cuda/experimental/__stf/utility/traits.cuh
@@ -562,6 +562,22 @@ auto shuffled_array_tuple(ArgTypes... args)
 namespace reserved
 {
 
+/**
+ * @brief Trait class to check if a function can be invoked with std::apply using a tuple type
+ */
+template <typename F, typename Tuple>
+struct is_tuple_invocable : ::std::false_type
+{};
+
+// Partial specialization that unpacks the tuple
+template <typename F, typename... Args>
+struct is_tuple_invocable<F, ::std::tuple<Args...>> : ::std::is_invocable<F, Args...>
+{};
+
+// Convenient alias template
+template <typename F, typename Tuple>
+inline constexpr bool is_tuple_invocable_v = is_tuple_invocable<F, Tuple>::value;
+
 /**
  * @brief A compile-time boolean that checks if a type supports streaming with std::ostream <<.
  *
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh
index 5b64dbc531d..bd481b3dea2 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh
@@ -50,8 +50,8 @@ _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super>&
 }
 
 template <template <class...> class _Interface, class _Super>
-_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-basic_any_from(_Interface<_Super> const& __self) noexcept -> basic_any<_Super> const&
+_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super> const& __self) noexcept
+  -> basic_any<_Super> const&
 {
   return static_cast<basic_any<_Super> const&>(__self);
 }
@@ -72,8 +72,8 @@ _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super>*
 }
 
 template <template <class...> class _Interface, class _Super>
-_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-basic_any_from(_Interface<_Super> const* __self) noexcept -> basic_any<_Super> const*
+_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super> const* __self) noexcept
+  -> basic_any<_Super> const*
 {
   return static_cast<basic_any<_Super> const*>(__self);
 }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh
index 8c9e67e757d..03e05648dae 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh
@@ -169,8 +169,8 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT basic_any<_Interface*>
   //!
   _CCCL_TEMPLATE(class _Tp, class _Up = _CUDA_VSTD::remove_pointer_t<_Tp>, class _Vp = _CUDA_VSTD::remove_const_t<_Up>)
   _CCCL_REQUIRES(__satisfies<_Vp, _Interface> _CCCL_AND(__is_const_ptr || !_CUDA_VSTD::is_const_v<_Up>))
-  _CUDAX_HOST_API auto
-  emplace(_CUDA_VSTD::type_identity_t<_Up>* __obj) noexcept -> _CUDA_VSTD::__maybe_const<__is_const_ptr, _Vp>*&
+  _CUDAX_HOST_API auto emplace(_CUDA_VSTD::type_identity_t<_Up>* __obj) noexcept
+    -> _CUDA_VSTD::__maybe_const<__is_const_ptr, _Vp>*&
   {
     __vptr_for<interface_type> __vptr = __cudax::__get_vtable_ptr_for<interface_type, _Vp>();
     __ref_.__set_ref(__obj ? __vptr : nullptr, __obj);
@@ -184,7 +184,8 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT basic_any<_Interface*>
     return *static_cast<__void_ptr_t>(__get_optr()) == *static_cast<__void_ptr_t>(__other.__get_optr());
   }
 #else // ^^^ !_CCCL_NO_THREE_WAY_COMPARISON ^^^ / vvv _CCCL_NO_THREE_WAY_COMPARISON vvv
-  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API auto operator==(basic_any const& __lhs, basic_any const& __rhs) noexcept -> bool
+  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API auto operator==(basic_any const& __lhs, basic_any const& __rhs) noexcept
+    -> bool
   {
     using __void_ptr_t _CCCL_NODEBUG_ALIAS = _CUDA_VSTD::__maybe_const<__is_const_ptr, void>* const*;
     return *static_cast<__void_ptr_t>(__lhs.__get_optr()) == *static_cast<__void_ptr_t>(__rhs.__get_optr());
@@ -271,8 +272,8 @@ private:
     return &__ref_.__optr_;
   }
 
-  _CCCL_NODISCARD _CUDAX_HOST_API auto
-  __get_optr() const noexcept -> _CUDA_VSTD::__maybe_const<__is_const_ptr, void>* const*
+  _CCCL_NODISCARD _CUDAX_HOST_API auto __get_optr() const noexcept
+    -> _CUDA_VSTD::__maybe_const<__is_const_ptr, void>* const*
   {
     return &__ref_.__optr_;
   }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
index 435e43ee699..fcd05a6600b 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
@@ -318,22 +318,22 @@ template <template <class...> class _Interface>
 struct __interface_cast_fn<_Interface<>>
 {
   template <class _Super>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-  operator()(_Interface<_Super>&& __self) const noexcept -> _Interface<_Super>&&
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto operator()(_Interface<_Super>&& __self) const noexcept
+    -> _Interface<_Super>&&
   {
     return _CUDA_VSTD::move(__self);
   }
 
   template <class _Super>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-  operator()(_Interface<_Super>& __self) const noexcept -> _Interface<_Super>&
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto operator()(_Interface<_Super>& __self) const noexcept
+    -> _Interface<_Super>&
   {
     return __self;
   }
 
   template <class _Super>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-  operator()(_Interface<_Super> const& __self) noexcept -> _Interface<_Super> const&
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto operator()(_Interface<_Super> const& __self) noexcept
+    -> _Interface<_Super> const&
   {
     return __self;
   }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh b/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh
index d16fdc43fd7..419be0e7660 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh
@@ -102,8 +102,8 @@ struct __iset_vptr : __base_vptr
   }
 
   template <class _Interface>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API constexpr auto
-  __query_interface(_Interface) const noexcept -> __vptr_for<_Interface>
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API constexpr auto __query_interface(_Interface) const noexcept
+    -> __vptr_for<_Interface>
   {
     if (__vptr_->__kind_ == __vtable_kind::__normal)
     {
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh b/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh
index cda9a72e789..470c7c84228 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh
@@ -137,8 +137,8 @@ struct __rtti : __rtti_base
   {}
 
   template <class... _Interfaces>
-  _CCCL_NODISCARD _CUDAX_HOST_API auto
-  __query_interface(__iset<_Interfaces...>) const noexcept -> __vptr_for<__iset<_Interfaces...>>
+  _CCCL_NODISCARD _CUDAX_HOST_API auto __query_interface(__iset<_Interfaces...>) const noexcept
+    -> __vptr_for<__iset<_Interfaces...>>
   {
     // TODO: find a way to check at runtime that the requested __iset is a subset
     // of the interfaces in the vtable.
@@ -201,8 +201,8 @@ struct __rtti_ex : __rtti
 //! interfaces.
 //!
 template <class _SrcInterface, class _DstInterface>
-_CCCL_NODISCARD _CUDAX_HOST_API auto
-__try_vptr_cast(__vptr_for<_SrcInterface> __src_vptr) noexcept -> __vptr_for<_DstInterface>
+_CCCL_NODISCARD _CUDAX_HOST_API auto __try_vptr_cast(__vptr_for<_SrcInterface> __src_vptr) noexcept
+  -> __vptr_for<_DstInterface>
 {
   static_assert(_CUDA_VSTD::is_class_v<_SrcInterface> && _CUDA_VSTD::is_class_v<_DstInterface>, "expected class types");
   if (__src_vptr == nullptr)
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh b/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh
index 1d41a05d05f..902477040ab 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh
@@ -61,12 +61,14 @@ struct __base_vptr
 #if !defined(_CCCL_NO_THREE_WAY_COMPARISON)
   bool operator==(__base_vptr const& __other) const noexcept = default;
 #else // ^^^ !_CCCL_NO_THREE_WAY_COMPARISON ^^^ / vvv _CCCL_NO_THREE_WAY_COMPARISON vvv
-  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator==(__base_vptr __lhs, __base_vptr __rhs) noexcept -> bool
+  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator==(__base_vptr __lhs, __base_vptr __rhs) noexcept
+    -> bool
   {
     return __lhs.__vptr_ == __rhs.__vptr_;
   }
 
-  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator!=(__base_vptr __lhs, __base_vptr __rhs) noexcept -> bool
+  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator!=(__base_vptr __lhs, __base_vptr __rhs) noexcept
+    -> bool
   {
     return !(__lhs == __rhs);
   }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh b/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh
index a673ecd7746..289be9d8112 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh
@@ -71,8 +71,8 @@ struct _CCCL_DECLSPEC_EMPTY_BASES __basic_vtable
   }
 
   template <class... _Others>
-  _CCCL_NODISCARD _CUDAX_HOST_API auto
-  __query_interface(__iset<_Others...>) const noexcept -> __vptr_for<__iset<_Others...>>
+  _CCCL_NODISCARD _CUDAX_HOST_API auto __query_interface(__iset<_Others...>) const noexcept
+    -> __vptr_for<__iset<_Others...>>
   {
     using __remainder _CCCL_NODEBUG_ALIAS =
       _CUDA_VSTD::__type_list_size<_CUDA_VSTD::__type_find<__unique_interfaces<interface>, __iset<_Others...>>>;
diff --git a/cudax/include/cuda/experimental/stf.cuh b/cudax/include/cuda/experimental/stf.cuh
index 00a998c9d06..b43f7cf27a0 100644
--- a/cudax/include/cuda/experimental/stf.cuh
+++ b/cudax/include/cuda/experimental/stf.cuh
@@ -308,6 +308,16 @@ public:
     }
   }
 
+  auto logical_token()
+  {
+    _CCCL_ASSERT(payload.index() != ::std::variant_npos, "Context is not initialized");
+    return ::std::visit(
+      [&](auto& self) {
+        return self.logical_token();
+      },
+      payload);
+  }
+
   template <typename T>
   frozen_logical_data<T> freeze(::cuda::experimental::stf::logical_data<T> d,
                                 access_mode m    = access_mode::read,
@@ -1387,6 +1397,32 @@ UNITTEST("cuda stream place multi-gpu")
   ctx.finalize();
 };
 
+// Ensure we can skip logical tokens
+UNITTEST("logical token elision")
+{
+  context ctx;
+
+  int buf[1024];
+
+  auto lA = ctx.logical_token();
+  auto lB = ctx.logical_token();
+  auto lC = ctx.logical_data(buf);
+
+  // with all arguments
+  ctx.task(lA.read(), lB.read(), lC.write())->*[](cudaStream_t, void_interface, void_interface, slice<int>) {};
+
+  // with argument elision
+  ctx.task(lA.read(), lB.read(), lC.write())->*[](cudaStream_t, slice<int>) {};
+
+  // with all arguments
+  ctx.host_launch(lA.read(), lB.read(), lC.write())->*[](void_interface, void_interface, slice<int>) {};
+
+  // with argument elision
+  ctx.host_launch(lA.read(), lB.read(), lC.write())->*[](slice<int>) {};
+
+  ctx.finalize();
+};
+
 #endif // UNITTESTED_FILE
 
 /**
diff --git a/cudax/test/stf/CMakeLists.txt b/cudax/test/stf/CMakeLists.txt
index 5472de687c2..941b8a3fbe8 100644
--- a/cudax/test/stf/CMakeLists.txt
+++ b/cudax/test/stf/CMakeLists.txt
@@ -122,6 +122,7 @@ set(stf_test_codegen_sources
   parallel_for/test2_parallel_for_context.cu
   parallel_for/test_parallel_for.cu
   parallel_for/tiled_loops.cu
+  parallel_for/parallel_for_host.cu
   places/cuda_stream_place.cu
   places/managed_from_shape.cu
   reductions/reduce_sum.cu
diff --git a/cudax/test/stf/error_checks/ctx_mismatch.cu b/cudax/test/stf/error_checks/ctx_mismatch.cu
index c04d589c367..6e44900393c 100644
--- a/cudax/test/stf/error_checks/ctx_mismatch.cu
+++ b/cudax/test/stf/error_checks/ctx_mismatch.cu
@@ -56,8 +56,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/data_interface_mismatch.cu b/cudax/test/stf/error_checks/data_interface_mismatch.cu
index ea2ada7e633..400b913fa10 100644
--- a/cudax/test/stf/error_checks/data_interface_mismatch.cu
+++ b/cudax/test/stf/error_checks/data_interface_mismatch.cu
@@ -66,8 +66,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/double_finalize.cu b/cudax/test/stf/error_checks/double_finalize.cu
index 37913ca6e36..cae7cecfc50 100644
--- a/cudax/test/stf/error_checks/double_finalize.cu
+++ b/cudax/test/stf/error_checks/double_finalize.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/erase_frozen.cu b/cudax/test/stf/error_checks/erase_frozen.cu
index 624dfb062f8..eaec786bf96 100644
--- a/cudax/test/stf/error_checks/erase_frozen.cu
+++ b/cudax/test/stf/error_checks/erase_frozen.cu
@@ -43,8 +43,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu b/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu
index fa28e5467e0..4b04ae3a182 100644
--- a/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu
+++ b/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu b/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu
index b35cb99457f..84d3e33518f 100644
--- a/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu
+++ b/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu
@@ -40,8 +40,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/non_managed_data.cu b/cudax/test/stf/error_checks/non_managed_data.cu
index a1188c7750f..6ac0121470d 100644
--- a/cudax/test/stf/error_checks/non_managed_data.cu
+++ b/cudax/test/stf/error_checks/non_managed_data.cu
@@ -44,8 +44,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/slice_check_bounds.cu b/cudax/test/stf/error_checks/slice_check_bounds.cu
index fecea9e7a55..a6b77c6bcc5 100644
--- a/cudax/test/stf/error_checks/slice_check_bounds.cu
+++ b/cudax/test/stf/error_checks/slice_check_bounds.cu
@@ -51,8 +51,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/uninitialized_data.cu b/cudax/test/stf/error_checks/uninitialized_data.cu
index 6af57556ad5..cf30b023bfb 100644
--- a/cudax/test/stf/error_checks/uninitialized_data.cu
+++ b/cudax/test/stf/error_checks/uninitialized_data.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/unsatisfiable_spec.cu b/cudax/test/stf/error_checks/unsatisfiable_spec.cu
index a0e4277979c..041b535fe61 100644
--- a/cudax/test/stf/error_checks/unsatisfiable_spec.cu
+++ b/cudax/test/stf/error_checks/unsatisfiable_spec.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/write_frozen.cu b/cudax/test/stf/error_checks/write_frozen.cu
index b4e08642a5e..011f4afd88e 100644
--- a/cudax/test/stf/error_checks/write_frozen.cu
+++ b/cudax/test/stf/error_checks/write_frozen.cu
@@ -43,8 +43,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/local_stf/legacy_to_stf.cu b/cudax/test/stf/local_stf/legacy_to_stf.cu
index 3373f39ec0b..2895b6b934c 100644
--- a/cudax/test/stf/local_stf/legacy_to_stf.cu
+++ b/cudax/test/stf/local_stf/legacy_to_stf.cu
@@ -144,6 +144,32 @@ void lib_call_generic(async_resources_handle& handle, cudaStream_t stream, doubl
   ctx.submit();
 }
 
+template <typename Ctx_t>
+void lib_call_logical_token(
+  async_resources_handle& handle, cudaStream_t stream, double* d_ptrA, double* d_ptrB, size_t N)
+{
+  Ctx_t ctx(stream, handle);
+  auto lA = ctx.logical_token();
+  auto lB = ctx.logical_token();
+  ctx.task(lA.write())->*[=](cudaStream_t s) {
+    initA<<<128, 32, 0, s>>>(d_ptrA, N);
+  };
+
+  ctx.task(lB.write())->*[=](cudaStream_t s) {
+    initB<<<128, 32, 0, s>>>(d_ptrB, N);
+  };
+
+  ctx.task(lA.read(), lB.rw())->*[=](cudaStream_t s) {
+    axpy<<<128, 32, 0, s>>>(3.0, d_ptrA, d_ptrB, N);
+  };
+
+  ctx.task()->*[](cudaStream_t s) {
+    empty_kernel<<<16, 8, 0, s>>>();
+  };
+
+  ctx.submit();
+}
+
 int main()
 {
   double *d_ptrA, *d_ptrB;
@@ -211,5 +237,12 @@ int main()
   }
   cuda_safe_call(cudaStreamSynchronize(stream));
 
+  nvtxRangePushA("logical token");
+  for (size_t i = 0; i < NITER; i++)
+  {
+    lib_call_logical_token<context>(handle, stream, d_ptrA, d_ptrB, N);
+  }
+  cuda_safe_call(cudaStreamSynchronize(stream));
+
   nvtxRangePop();
 }
diff --git a/cudax/test/stf/parallel_for/parallel_for_host.cu b/cudax/test/stf/parallel_for/parallel_for_host.cu
new file mode 100644
index 00000000000..5befff67517
--- /dev/null
+++ b/cudax/test/stf/parallel_for/parallel_for_host.cu
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDASTF in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/experimental/stf.cuh>
+
+using namespace cuda::experimental::stf;
+
+int main()
+{
+  context ctx;
+
+  int nqpoints = 3;
+  auto ltoken  = ctx.logical_token();
+
+  ctx.parallel_for(exec_place::host, box(5), ltoken.read())->*[nqpoints] __host__ __device__(size_t, void_interface) {
+    _CCCL_ASSERT(nqpoints == 3, "invalid value");
+  };
+
+  ctx.finalize();
+}
diff --git a/docs/cccl_development/index.rst b/docs/cccl_development/index.rst
new file mode 100644
index 00000000000..395a9ddcfc1
--- /dev/null
+++ b/docs/cccl_development/index.rst
@@ -0,0 +1,16 @@
+.. _cccl-development-module:
+
+CCCL Development Guide
+======================
+
+.. toctree::
+   :hidden:
+   :maxdepth: 1
+
+   macro
+
+This living document serves to describe the internal details and the development process of CCCL libraries.
+
+Documentation:
+
+- `CCCL Internal Macros <https://nvidia.github.io/cccl/cccl_development/macro/>`__
diff --git a/docs/cccl_development/macro.rst b/docs/cccl_development/macro.rst
new file mode 100644
index 00000000000..e8cb5afa618
--- /dev/null
+++ b/docs/cccl_development/macro.rst
@@ -0,0 +1,326 @@
+.. _cccl-development-module-macros:
+
+CCCL Internal Macros
+====================
+
+The document describes the main *internal* macros used by CCCL. They are not intended to be used by end users, but for development of CCCL features only. We reserve the right to change them at any time without warning.
+
+----
+
+Compiler Macros
+---------------
+
+**Host compiler macros**:
+
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(CLANG)``   | Clang                          |
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(GCC)``     | GCC                            |
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(NVHPC)``   | Nvidia HPC compiler            |
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(MSVC)``    | Microsoft Visual Studio        |
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(MSVC2017)`` | Microsoft Visual Studio 2017  |
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(MSVC2019)`` | Microsoft Visual Studio 2019  |
++-----------------------------+--------------------------------+
+| ``_CCCL_COMPILER(MSVC2022)`` | Microsoft Visual Studio 2022  |
++-----------------------------+--------------------------------+
+
+The ``_CCCL_COMPILER`` function-like macro can also be used to check the version of a compiler.
+
+.. code:: cpp
+
+   _CCCL_COMPILER(MSVC, <, 19, 24)
+   _CCCL_COMPILER(GCC, >=, 9)
+
+*Pitfalls*: ``_CCCL_COMPILER(GCC, >, 9)`` internally expands ``_CCCL_COMPILER(GCC, >, 9, 0)`` to matches any GCC 9.x. Avoid using ``>`` and rather use ``>=``
+
+**CUDA compiler macros**:
+
++--------------------------------+-------------------------+
+| ``_CCCL_CUDA_COMPILER(NVCC)``  | Nvidia compiler         |
++--------------------------------+-------------------------+
+| ``_CCCL_CUDA_COMPILER(NVHPC)`` | Nvidia HPC compiler     |
++--------------------------------+-------------------------+
+| ``_CCCL_CUDA_COMPILER(NVRTC)`` | Nvidia Runtime Compiler |
++--------------------------------+-------------------------+
+| ``_CCCL_CUDA_COMPILER(CLANG)`` | Clang                   |
++--------------------------------+-------------------------+
+
+The ``_CCCL_CUDA_COMPILER`` function-like macro can also be used to check the version of a compiler.
+
+.. code:: cpp
+
+   _CCCL_CUDA_COMPILER(NVCC, <, 12, 3)
+   _CCCL_CUDA_COMPILER(CLANG, >=, 14)
+
+**CUDA identification/version macros**:
+
++----------------------------------+-----------------------------+
+| ``_CCCL_HAS_CUDA_COMPILER``      | CUDA compiler is available  |
++----------------------------------+-----------------------------+
+| ``_CCCL_CUDACC_BELOW(12, 7)``    | CUDA version below 12.7     |
++----------------------------------+-----------------------------+
+| ``_CCCL_CUDACC_AT_LEAST(12, 7)`` | CUDA version at least 12.7  |
++----------------------------------+-----------------------------+
+
+**PTX macros**:
+
++-------------------------+-------------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_PTX_ARCH``      | Alias of ``__CUDA_ARCH__`` with value equal to 0 if cuda compiler is not available                                |
++-------------------------+-------------------------------------------------------------------------------------------------------------------+
+| ``__cccl_ptx_isa``      | PTX ISA version available with the current CUDA compiler, e.g. PTX ISA 8.4 (``840``) is available from CUDA 12.4  |
++-------------------------+-------------------------------------------------------------------------------------------------------------------+
+
+----
+
+Architecture Macros
+-------------------
+
+The following macros are used to check the target architecture. They comply with the compiler supported by the CUDA toolkit. Compilers outside the CUDA toolkit may define such macros in a different way.
+
++-------------------------+-------------------------------------+
+| ``_CCCL_ARCH(ARM64)``   |  ARM 64-bit                         |
++-------------------------+-------------------------------------+
+| ``_CCCL_ARCH(X86_64)``  |  X86 64-bit                         |
++-------------------------+-------------------------------------+
+
+----
+
+OS Macros
+---------
+
++-----------------------+---------+
+| ``_CCCL_OS(WINDOWS)`` | Windows |
++-----------------------+---------+
+| ``_CCCL_OS(LINUX)``   | Linux   |
++-----------------------+---------+
+| ``_CCCL_OS(ANDROID)`` | Android |
++-----------------------+---------+
+| ``_CCCL_OS(QNX)``     | QNX     |
++-----------------------+---------+
+
+----
+
+CUDA Extension Macros
+---------------------
+
+**Execution space**:
+
++-----------------------+-----------------------+
+| ``_CCCL_HOST``        | Host function         |
++-----------------------+-----------------------+
+| ``_CCCL_DEVICE``      | Device function       |
++-----------------------+-----------------------+
+| ``_CCCL_HOST_DEVICE`` | Host/Device function  |
++-----------------------+-----------------------+
+
+**Other CUDA attributes**:
+
++------------------------------+----------------------------------------------------------+
+| ``_CCCL_GRID_CONSTANT``      | Grid constant kernel parameter                           |
++------------------------------+----------------------------------------------------------+
+| ``_CCCL_GLOBAL_CONSTANT``    | Host/device global scope constant (``inline constexpr``) |
++------------------------------+----------------------------------------------------------+
+| ``_CCCL_EXEC_CHECK_DISABLE`` | Disable execution space check for the NVHPC compiler     |
++------------------------------+----------------------------------------------------------+
+
+**Extended floating-point types**:
+
++------------------------------+-----------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_HAS_NVFP16``         | `__half/__half2` data types are supported and enabled. Prefer over ``__CUDA_FP16_TYPES_EXIST__``                |
++------------------------------+-----------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_HAS_NVBF16``         | `__nv_bfloat16/__nv_bfloat162` data types are supported and enabled.  Prefer over ``__CUDA_BF16_TYPES_EXIST__`` |
++------------------------------+-----------------------------------------------------------------------------------------------------------------+
+
++------------------------------+----------------------------------------------------------------+
+| ``_LIBCUDACXX_HAS_NVFP16``   | `__half/__half2` host/device support  (CUDA 12.2)              |
++------------------------------+----------------------------------------------------------------+
+| ``_LIBCUDACXX_HAS_NVBF16``   | `__nv_bfloat16/__nv_bfloat162` host/device support (CUDA 12.2) |
++------------------------------+----------------------------------------------------------------+
+
+
+----
+
+C++ Language Macros
+-------------------
+
+The following macros are required only if the target C++ version does not support the corresponding attribute
+
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_STD_VER``           | C++ standard version, e.g. ``#if _CCCL_STD_VER >= 2017`` |
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_IF_CONSTEXPR``      | Portable ``if constexpr`` (before C++17)                 |
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_CONSTEXPR_CXX14``   | Enable ``constexpr`` for C++14 or newer                  |
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_CONSTEXPR_CXX17``   | Enable ``constexpr`` for C++17 or newer                  |
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_CONSTEXPR_CXX20``   | Enable ``constexpr`` for C++20 or newer                  |
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_CONSTEXPR_CXX23``   | Enable ``constexpr`` for C++23 or newer                  |
++-----------------------------+----------------------------------------------------------+
+| ``_CCCL_INLINE_VAR``        | Portable ``inline constexpr`` variable (before C++17)    |
++-----------------------------+----------------------------------------------------------+
+
+**Concept-like Macros**:
+
++------------------------+--------------------------------------------------------------------------------------------+
+| ``_CCCL_TEMPLATE(X)``  | ``template`` clause                                                                        |
++------------------------+--------------------------------------------------------------------------------------------+
+| ``_CCCL_REQUIRES(X)``  | ``requires`` clause                                                                        |
++------------------------+--------------------------------------------------------------------------------------------+
+| ``_CCCL_TRAIT(X)``     | Selects variable template ``is_meow_v<T>`` instead of ``is_meow<T>::value`` when available |
++------------------------+--------------------------------------------------------------------------------------------+
+| ``_CCCL_AND``          | Traits conjunction only used with ``_CCCL_REQUIRES``                                       |
++------------------------+--------------------------------------------------------------------------------------------+
+
+Usage example:
+
+.. code-block:: c++
+
+    _CCCL_TEMPLATE(typename T)
+    _CCCL_REQUIRES(_CCCL_TRAIT(is_integral, T) _CCCL_AND(sizeof(T) > 1))
+
+.. code-block:: c++
+
+    _CCCL_TEMPLATE(typename T)
+    _CCCL_REQUIRES(_CCCL_TRAIT(is_arithmetic, T) _CCCL_AND (!_CCCL_TRAIT(is_integral, T)))
+
+
+**Portable feature testing**:
+
++--------------------------+--------------------------------------------------+
+| ``_CCCL_HAS_BUILTIN(X)`` |  Portable ``__has_builtin(X)``                   |
++--------------------------+--------------------------------------------------+
+| ``_CCCL_HAS_FEATURE(X)`` |  Portable ``__has_feature(X)``                   |
++--------------------------+--------------------------------------------------+
+| ``_CCCL_HAS_INCLUDE(X)`` |  Portable ``__has_include(X)`` (before C++17)    |
++--------------------------+--------------------------------------------------+
+
+**Portable attributes**:
+
++----------------------------------+------------------------------------------------------------------------------+
+| ``_CCCL_FALLTHROUGH()``          | Portable ``[[fallthrough]]`` attribute (before C++17)                        |
++----------------------------------+------------------------------------------------------------------------------+
+| ``_CCCL_NO_UNIQUE_ADDRESS``      | Portable ``[[no_unique_address]]`` attribute                                 |
++----------------------------------+------------------------------------------------------------------------------+
+| ``_CCCL_NODISCARD``              | Portable ``[[nodiscard]]`` attribute (before C++17)                          |
++----------------------------------+------------------------------------------------------------------------------+
+| ``_CCCL_NODISCARD_FRIEND``       | Portable ``[[nodiscard]]`` attribute for ``friend`` functions (before C++17) |
++----------------------------------+------------------------------------------------------------------------------+
+| ``_CCCL_NORETURN``               | Portable ``[[noreturn]]`` attribute (before C++11)                           |
++----------------------------------+------------------------------------------------------------------------------+
+| ``CCCL_DEPRECATED``              | Portable ``[[deprecated]]`` attribute (before C++14)                         |
++----------------------------------+------------------------------------------------------------------------------+
+| ``CCCL_DEPRECATED_BECAUSE(MSG)`` | Portable ``[[deprecated]]`` attribute with custom message (before C++14)     |
++----------------------------------+------------------------------------------------------------------------------+
+| ``_CCCL_FORCEINLINE``            | Portable "always inline" attribute                                           |
++----------------------------------+------------------------------------------------------------------------------+
+
+**Portable Builtin Macros**:
+
++-----------------------------+--------------------------------------------+
+| ``_CCCL_UNREACHABLE()``     | Portable ``__builtin_unreachable()``       |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_BUILTIN_ASSUME(X)`` | Portable ``__builtin_assume(X)``           |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_BUILTIN_EXPECT(X)`` | Portable ``__builtin_expected(X)``         |
++-----------------------------+--------------------------------------------+
+
+**Portable Keyword Macros**
+
++-----------------------------+--------------------------------------------+
+| ``_CCCL_RESTRICT``          | Portable ``restrict`` keyword              |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_ALIGNAS(X)``        | Portable ``alignas(X)`` keyword (variable) |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_ALIGNAS_TYPE(X)``   | Portable ``alignas(X)`` keyword (type)     |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_PRAGMA(X)``         | Portable ``_Pragma(X)`` keyword            |
++-----------------------------+--------------------------------------------+
+
+----
+
+Visibility Macros
+-----------------
+
++-------------------------------+-----------------------------------------------------------------------------------------------------+
+| ``_CCCL_VISIBILITY_HIDDEN``   | Hidden visibility attribute (e.g. ``__attribute__((visibility("hidden")))``)                        |
++-------------------------------+-----------------------------------------------------------------------------------------------------+
+| ``_CCCL_HIDE_FROM_ABI``       | Hidden visibility (i.e. ``inline``, not exported, not instantiated)                                 |
++-------------------------------+-----------------------------------------------------------------------------------------------------+
+| ``_LIBCUDACXX_HIDE_FROM_ABI`` | Host/device function with hidden visibility. Most libcu++ functions are hidden with this attribute  |
++-------------------------------+-----------------------------------------------------------------------------------------------------+
+
+----
+
+Other Common Macros
+-------------------
+
++-----------------------------+--------------------------------------------+
+| ``_CUDA_VSTD``              | ``cuda::std`` namespace. To use in libcu++ |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_TO_STRING(X)``      | ``X`` to literal string                    |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_DOXYGEN_INVOKED``   | Defined during Doxygen parsing             |
++-----------------------------+--------------------------------------------+
+
+----
+
+Debugging Macros
+----------------
+
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_ASSERT(COND, MSG)``       | Portable CCCL assert macro. Requires (``CCCL_ENABLE_HOST_ASSERTIONS`` or ``CCCL_ENABLE_DEVICE_ASSERTIONS``) |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_VERIFY(COND, MSG)``       | Portable ``alignas(X)`` keyword (variable)                                                                  |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_ENABLE_ASSERTIONS``       | Enable assertions                                                                                           |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| ``CCCL_ENABLE_HOST_ASSERTIONS``   | Enable host-side assertions                                                                                 |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| ``CCCL_ENABLE_DEVICE_ASSERTIONS`` | Enable device-side assertions                                                                               |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| ``_CCCL_ENABLE_DEBUG_MODE``       | Enable debug mode (and assertions)                                                                          |
++-----------------------------------+-------------------------------------------------------------------------------------------------------------+
+
+----
+
+Warning Suppression Macros
+--------------------------
+
++-----------------------------+--------------------------------------------+
+| ``_CCCL_DIAG_PUSH``         | Portable ``#pragma push``                  |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_DIAG_POP``          | Portable ``#pragma pop``                   |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_PUSH_MACROS``       | Push common msvc warning suppressions      |
++-----------------------------+--------------------------------------------+
+| ``_CCCL_POP_MACROS``        | Pop common msvc warning suppressions       |
++-----------------------------+--------------------------------------------+
+
+**Compiler-specific Suppression Macros**:
+
++-----------------------------------+-------------------------------------------------------------+
+| ``_CCCL_DIAG_SUPPRESS_CLANG(X)``  | Suppress clang warning, e.g. ``"-Wattributes"``             |
++-----------------------------------+-------------------------------------------------------------+
+| ``_CCCL_DIAG_SUPPRESS_GCC(X)``    | Suppress gcc warning, e.g. ``"-Wattributes"``               |
++-----------------------------------+-------------------------------------------------------------+
+| ``_CCCL_DIAG_SUPPRESS_NVHPC(X)``  | Suppress nvhpc warning, e.g. ``expr_has_no_effect``         |
++-----------------------------------+-------------------------------------------------------------+
+| ``_CCCL_DIAG_SUPPRESS_MSVC(X)``   | Suppress msvc warning, e.g. ``4127``                        |
++-----------------------------------+-------------------------------------------------------------+
+| ``_CCCL_NV_DIAG_SUPPRESS(X)``     | Suppress nvcc warning, e.g. ``177``                         |
++-----------------------------------+-------------------------------------------------------------+
+
+Usage example:
+
+.. code-block:: c++
+
+    _CCCL_DIAG_PUSH
+    _CCCL_DIAG_SUPPRESS_GCC("-Wattributes")
+    // code ..
+    _CCCL_DIAG_POP
diff --git a/docs/cpp.rst b/docs/cpp.rst
index cd86acb6810..b0b92e520a5 100644
--- a/docs/cpp.rst
+++ b/docs/cpp.rst
@@ -11,6 +11,7 @@ CUDA C++ Core Libraries
    CUB <https://nvidia.github.io/cccl/cub/>
    Thrust <https://nvidia.github.io/cccl/thrust/>
    Cuda Experimental <https://nvidia.github.io/cccl/cudax/>
+   CCCL development <cccl_development/index>
 
 Welcome to the CUDA Core Compute Libraries (CCCL) libraries for C++.
 
diff --git a/docs/cudax/stf.rst b/docs/cudax/stf.rst
index f66a7a4a7c4..63c8d6363d0 100644
--- a/docs/cudax/stf.rst
+++ b/docs/cudax/stf.rst
@@ -1772,6 +1772,132 @@ all the benefits of statically available types):
 
    stream_task<> t = ctx.task(lX.read());
 
+Modular use of CUDASTF
+----------------------
+
+CUDASTF maintains data consistency throughout the system, and infers
+concurrency opportunities based on data accesses. Depending on the use cases,
+one may however already manage coherency or enforce dependencies.
+
+- The "logical data freezing" mechanism ensures data availability while letting
+  the application take care of synchronization.
+- Logical token makes it possible to enforce concurrent execution while
+  letting the application manage data allocations and data transfers.
+
+Freezing logical data
+^^^^^^^^^^^^^^^^^^^^^
+
+When a piece of data is used very often, it can be beneficial to avoid enforcing
+data dependencies every time it is accessed. A common example would be data that
+is written once and then read many times.
+
+CUDASTF provides a mechanism called logical data freeze that allows a
+logical data to be accessed outside of tasks—or within tasks—without
+enforcing data dependencies for every access, which reduces overhead to a minimum.
+
+
+By default, calling the ``freeze`` method returns a frozen logical data object
+that can be accessed in read-only mode without additional synchronization. The
+``get`` method of the frozen logical data returns a view of the underlying data
+on the specified data place. This view can be used asynchronously with respect
+to the stream passed to ``get`` until calling the non-blocking unfreeze
+method on the frozen logical data. It is possible to call ``get`` multiple times.
+Modifying these frozen read-only views results in undefined behavior.
+If necessary, implicit data transfers or allocations are performed asynchronously
+when calling ``get``.
+
+.. code:: cpp
+
+    auto frozen_ld = ctx.freeze(ld);
+    auto dX = frozen_ld.get(data_place::current_device(), stream);
+    kernel<<<..., stream>>>(dX);
+
+    // Get a read-only copy of the frozen data on other data places
+    auto dX1 = frozen_ld.get(data_place::device(1), stream);
+    auto hX = frozen_ld.get(data_place::host, stream);
+
+    fx.unfreeze(stream);
+
+While data are frozen, it is still possible to launch tasks which access
+them. CUDASTF will allow tasks with a read access modes to run
+concurrently before ``unfreeze`` is called, but it will defer write accesses
+until data is made is made modifiable again, after ``unfreeze``.
+
+.. code:: cpp
+
+    auto frozen_ld = ctx.freeze(ld, access_mode::rw, data_place::current_device());
+    auto dX = frozen_ld.get(data_place::current_device(), stream);
+    // kernel can modify dX
+    kernel<<<..., stream>>>(dX);
+    fx.unfreeze(stream);
+
+As shown above, it is also possible to create a modifiable frozen logical data,
+allowing an application to temporarily transfer ownership of the logical data
+to code that does not use tasks.  Because no further synchronization is
+performed to ensure the consistency of this logical data once it is frozen,
+users need to specify where the view of the data is needed.  Any tasks that
+access this modifiable frozen logical data will be deferred until ``unfreeze``
+is called.
+
+It is not possible to freeze the same logical data concurrently. Therefore, we
+need to call ``unfreeze`` before calling ``freeze`` again, and it is the
+programmer's responsibility to ensure that the stream passed to ``freeze``
+depends on the completions of all operations in the stream previously passed to
+``unfreeze``.
+
+It is possible to use different streams in the ``freeze``, ``get`` and
+``unfreeze`` methods. However it is also programmer's responsibility to ensure
+that the stream passed to ``get`` depends on the completion of the work in the
+stream passed to ``freeze`` (for example, by using a blocking call such as
+``cudaStreamSynchronize``). Similarly, the stream passed to ``unfreeze`` must
+depend on the completion of the work in the streams used for any preceding
+``freeze`` and ``get`` calls.
+
+Logical token
+^^^^^^^^^^^^^
+
+A logical token is a specific type of logical data whose only purpose is to
+automate synchronization, while letting the application manage the actual data.
+This can, for example, be useful with user-provided buffers on a single device,
+where no allocations or transfers are required, but where concurrent accesses
+may occur.
+
+A logical token internally relies on the ``void_interface`` data interface,
+which is specifically optimized to skip unnecessary stages in the cache
+coherency protocol (e.g., data allocations or copying data). When appropriate,
+using a logical token rather than a logical data with a full-fledged data
+interface therefore minimizes runtime overhead.
+
+.. code:: cpp
+
+    auto token = ctx.logical_token();
+
+    // A and B are assumed to be two other valid logical data
+    ctx.task(token.rw(), A.read(), B.rw())->*[](cudaStream_t stream, auto a, auto b)
+    {
+        ...
+    };
+
+The example above shows how to create a logical token and how to use it in a
+task.
+
+Since the logical token is only used for synchronization purposes, the
+corresponding argument may be omitted in the lambda function passed as the
+task’s implementation. Thus, the above task is equivalent to this code:
+
+.. code:: cpp
+
+    ctx.task(token.rw(), A.read(), B.rw())->*[](cudaStream_t stream, void_interface dummy, auto a, auto b)
+
+To avoid ambiguities, you must either consistently ignore every
+``void_interface`` data instance or include them all, even if they remain
+unused. Eliding these token arguments is possible in the ``ctx.task`` and
+``ctx.host_launch`` constructs.
+
+Note that the token created by the ``logical_token`` method of the context
+object is already valid, which means the first access can be either a ``read()``
+or an ``rw()`` access. There is no need to set any content in the token
+(unlike a logical data object created from a shape).
 
 Tools
 -----
diff --git a/docs/repo.bat b/docs/repo.bat
index 9d37b64e5ed..aeb625b79f5 100644
--- a/docs/repo.bat
+++ b/docs/repo.bat
@@ -1,10 +1,37 @@
-@echo off
-
-call "%~dp0tools\packman\python.bat" %~dp0tools\repoman\repoman.py %*
-if %errorlevel% neq 0 ( goto Error )
-
-:Success
-exit /b 0
-
-:Error
-exit /b %errorlevel%
+@echo off
+
+:: Set OMNI_REPO_ROOT early so `repo` bootstrapping can target the repository
+:: root when writing out Python dependencies.
+:: Use SETLOCAL and ENDLOCAL to constrain these variables to this batch file.
+:: Use ENABLEDELAYEDEXPANSION to evaluate the value of PM_PACKAGES_ROOT
+:: at execution time.
+SETLOCAL ENABLEDELAYEDEXPANSION
+set OMNI_REPO_ROOT="%~dp0"
+
+:: Set Packman cache directory early if repo-cache.json is configured
+:: so that the Packman Python version is not fetched from the web.
+IF NOT EXIST "%~dp0repo-cache.json" goto :RepoCacheEnd
+
+:: Read PM_PACKAGES_ROOT from repo-cache.json and make sure it is an absolute path (assume relative to the script directory).
+for /f "usebackq tokens=*" %%i in (`powershell -NoProfile -Command "$PM_PACKAGES_ROOT = (Get-Content '%~dp0repo-cache.json' | ConvertFrom-Json).PM_PACKAGES_ROOT; if ([System.IO.Path]::IsPathRooted($PM_PACKAGES_ROOT)) { Write-Output ('absolute;' + $PM_PACKAGES_ROOT) } else { Write-Output ('relative;' + $PM_PACKAGES_ROOT) }"`) do (
+    for /f "tokens=1,2 delims=;" %%A in ("%%i") do (
+        if /i "%%A" == "relative" (
+            set PM_PACKAGES_ROOT=%~dp0%%B
+        ) else (
+            set PM_PACKAGES_ROOT=%%B
+        )
+    )
+)
+
+:RepoCacheEnd
+
+call "%~dp0tools\packman\python.bat" "%~dp0tools\repoman\repoman.py" %*
+if %errorlevel% neq 0 ( goto Error )
+
+:Success
+ENDLOCAL
+exit /b 0
+
+:Error
+ENDLOCAL
+exit /b %errorlevel%
diff --git a/docs/repo.sh b/docs/repo.sh
index 2297585db31..489bd9810b0 100755
--- a/docs/repo.sh
+++ b/docs/repo.sh
@@ -2,7 +2,35 @@
 
 set -e
 
+# Set OMNI_REPO_ROOT early so `repo` bootstrapping can target the repository
+# root when writing out Python dependencies.
+export OMNI_REPO_ROOT="$( cd "$(dirname "$0")" ; pwd -P )"
+
+# By default custom caching is disabled in repo_man. But if a repo-cache.json
+# caching configuration file is generated via the `repo cache` command, it's
+# presence will trigger the configuration of custom caching.
+if [[ -f "${OMNI_REPO_ROOT}/repo-cache.json" ]]; then
+    PM_PACKAGES_ROOT=$(grep '"PM_PACKAGES_ROOT"' "${OMNI_REPO_ROOT}/repo-cache.json" | sed 's/.*"PM_PACKAGES_ROOT": "\(.*\)".*/\1/')
+
+    # PM_PACKAGES_ROOT is present in the config file. We set this early
+    # so Packman will reference our cached package repository.
+    if [[ -n "${PM_PACKAGES_ROOT}" ]]; then
+        # Use eval to resolve ~ and perform parameter expansion
+        RESOLVED_PACKAGES_ROOT=$(eval echo "$PM_PACKAGES_ROOT")
+
+        if [[ "${RESOLVED_PACKAGES_ROOT}" != /* ]]; then
+            # PM_PACKAGES_ROOT is not an abs path, assumption is then
+            # that it is a relative path to the repository root.
+            PM_PACKAGES_ROOT="${OMNI_REPO_ROOT}/${RESOLVED_PACKAGES_ROOT}"
+        else
+            PM_PACKAGES_ROOT=${RESOLVED_PACKAGES_ROOT}
+        fi
+        export PM_PACKAGES_ROOT
+    fi
+fi
+
 SCRIPT_DIR=$(dirname ${BASH_SOURCE})
 cd "$SCRIPT_DIR"
 
-exec "tools/packman/python.sh" tools/repoman/repoman.py $@
+# Use "exec" to ensure that environment variables don't accidentally affect other processes.
+exec "tools/packman/python.sh" tools/repoman/repoman.py "$@"
diff --git a/docs/repo.toml b/docs/repo.toml
index 87655b3c65d..64cf4a1f870 100644
--- a/docs/repo.toml
+++ b/docs/repo.toml
@@ -15,6 +15,8 @@ social_media     = [
     [ "discord", "https://discord.com/channels/1019361803752456192/1161051667945508884" ],
 ]
 
+sphinx_version = "4.5.0.2-py3.10-${platform}"
+
 enhanced_search_enabled = true
 api_output_directory = "api"
 use_fast_doxygen_conversion = true
@@ -157,7 +159,6 @@ doxygen_predefined = [
     "_CCCL_DIAG_POP=",
     "_CCCL_DIAG_SUPPRESS_CLANG(x)=",
     "_CCCL_DIAG_SUPPRESS_GCC(x)=",
-    "_CCCL_DIAG_SUPPRESS_ICC(x)=",
     "_CCCL_DIAG_SUPPRESS_MSVC(x)=",
     "_CCCL_DIAG_SUPPRESS_NVHPC(x)=",
     "_CCCL_DOXYGEN_INVOKED",
@@ -270,7 +271,6 @@ doxygen_predefined = [
   "_CCCL_DIAG_POP=",
   "_CCCL_DIAG_SUPPRESS_CLANG(x)=",
   "_CCCL_DIAG_SUPPRESS_GCC(x)=",
-  "_CCCL_DIAG_SUPPRESS_ICC(x)=",
   "_CCCL_DIAG_SUPPRESS_MSVC(x)=",
   "_CCCL_DIAG_SUPPRESS_NVHPC(x)=",
   "CUDASTF_HOST=",
@@ -435,7 +435,6 @@ doxygen_predefined = [
   "_CCCL_DIAG_POP=",
   "_CCCL_DIAG_SUPPRESS_CLANG(x)=",
   "_CCCL_DIAG_SUPPRESS_GCC(x)=",
-  "_CCCL_DIAG_SUPPRESS_ICC(x)=",
   "_CCCL_DIAG_SUPPRESS_MSVC(x)=",
   "_CCCL_DIAG_SUPPRESS_NVHPC(x)=",
   "_CCCL_REQUIRES(x)= ::cuda::std::enable_if_t<x, int> = 0>",
diff --git a/docs/tools/deps/repo-deps.packman.xml b/docs/tools/deps/repo-deps.packman.xml
index 70417fb4b09..21d1acf02af 100644
--- a/docs/tools/deps/repo-deps.packman.xml
+++ b/docs/tools/deps/repo-deps.packman.xml
@@ -1,6 +1,6 @@
 <project toolsVersion="5.0">
   <dependency name="repo_man" linkPath="../../_repo/deps/repo_man">
-    <package name="repo_man" version="1.10.1"/>
+    <package name="repo_man" version="1.71.1"/>
   </dependency>
   <dependency name="repo_docs" linkPath="../../_repo/deps/repo_docs">
     <package name="repo_docs" version="0.10.4"/>
diff --git a/docs/tools/packman/bootstrap/configure.bat b/docs/tools/packman/bootstrap/configure.bat
index 0391abfa195..f04572ef6b5 100755
--- a/docs/tools/packman/bootstrap/configure.bat
+++ b/docs/tools/packman/bootstrap/configure.bat
@@ -1,4 +1,4 @@
-:: Copyright 2019 NVIDIA CORPORATION
+:: Copyright 2019-2023 NVIDIA CORPORATION
 ::
 :: Licensed under the Apache License, Version 2.0 (the "License");
 :: you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 :: See the License for the specific language governing permissions and
 :: limitations under the License.
 
-set PM_PACKMAN_VERSION=6.57
+set PM_PACKMAN_VERSION=7.24.4
 
 :: Specify where packman command is rooted
 set PM_INSTALL_PATH=%~dp0..
@@ -59,7 +59,7 @@ if defined PM_PYTHON_EXT (
 	goto PACKMAN
 )
 
-set PM_PYTHON_VERSION=3.7.13-nv1-windows-x86_64
+set PM_PYTHON_VERSION=3.10.5-1-windows-x86_64
 set PM_PYTHON_BASE_DIR=%PM_PACKAGES_ROOT%\python
 set PM_PYTHON_DIR=%PM_PYTHON_BASE_DIR%\%PM_PYTHON_VERSION%
 set PM_PYTHON=%PM_PYTHON_DIR%\python.exe
@@ -95,11 +95,16 @@ if exist "%PM_PYTHON%" (
     if exist "%PM_PYTHON_DIR%" ( rd /s /q "%PM_PYTHON_DIR%" > nul )
 )
 
-:: Perform atomic rename
-rename "%TEMP_FOLDER_NAME%" "%PM_PYTHON_VERSION%" 1> nul
-:: Failure during move, need to clean up and abort
+:: Perform atomic move (allowing overwrite, /y)
+move /y "%TEMP_FOLDER_NAME%" "%PM_PYTHON_DIR%" 1> nul
+:: Verify that python.exe is now where we expect
+if exist "%PM_PYTHON%" goto PACKMAN
+
+:: Wait a second and try again (can help with access denied weirdness)
+timeout /t 1 /nobreak 1> nul
+move /y "%TEMP_FOLDER_NAME%" "%PM_PYTHON_DIR%" 1> nul
 if %errorlevel% neq 0 (
-    echo !!! Error renaming python !!!
+    echo !!! Error moving python %TEMP_FOLDER_NAME% -> %PM_PYTHON_DIR% !!!
     call :CLEAN_UP_TEMP_FOLDER
     goto ERROR
 )
@@ -114,7 +119,7 @@ if defined PM_MODULE_DIR_EXT (
 
 set PM_MODULE=%PM_MODULE_DIR%\run.py
 
-if exist "%PM_MODULE%" goto ENSURE_7ZA
+if exist "%PM_MODULE%" goto END
 
 :: Clean out broken PM_MODULE_DIR if it exists
 if exist "%PM_MODULE_DIR%" ( rd /s /q "%PM_MODULE_DIR%" > nul )
@@ -137,19 +142,6 @@ if %errorlevel% neq 0 (
 
 del "%TARGET%"
 
-:ENSURE_7ZA
-set PM_7Za_VERSION=22.01-1
-set PM_7Za_PATH=%PM_PACKAGES_ROOT%\7za\%PM_7ZA_VERSION%
-if exist "%PM_7Za_PATH%" goto END
-set PM_7Za_PATH=%PM_PACKAGES_ROOT%\chk\7za\%PM_7ZA_VERSION%
-if exist "%PM_7Za_PATH%" goto END
-
-"%PM_PYTHON%" -S -s -u -E "%PM_MODULE%" pull "%PM_MODULE_DIR%\deps.packman.xml"
-if %errorlevel% neq 0 (
-    echo !!! Error fetching packman dependencies !!!
-    goto ERROR
-)
-
 goto END
 
 :ERROR_MKDIR_PACKAGES_ROOT
diff --git a/docs/tools/packman/bootstrap/install_package.py b/docs/tools/packman/bootstrap/install_package.py
index d62252d6cba..82ba12e1548 100644
--- a/docs/tools/packman/bootstrap/install_package.py
+++ b/docs/tools/packman/bootstrap/install_package.py
@@ -19,7 +19,8 @@
 import os
 import stat
 import time
-from typing import Any, Callable
+import hashlib
+from typing import Any, Callable, Union
 
 
 RENAME_RETRY_COUNT = 100
@@ -130,7 +131,24 @@ def rename_folder_with_retry(staging_dir: StagingDirectory, folder_name):
     )
 
 
-def install_package(package_path, install_path):
+def generate_sha256_for_file(file_path: Union[str, os.PathLike]) -> str:
+    """Returns the SHA-256 hex digest for the file at `file_path`"""
+    hash = hashlib.sha256()
+    # Read the file in binary mode and update the hash object with data
+    with open(file_path, "rb") as file:
+        for chunk in iter(lambda: file.read(4096), b""):
+            hash.update(chunk)
+    return hash.hexdigest()
+
+
+def install_common_module(package_path, install_path):
+    COMMON_SHA256 = "ef974608cf903f39dbd3f22b3e26c15a1dd21f6e71d05c1510dcf423128cd7a4"
+    package_sha256 = generate_sha256_for_file(package_path)
+    if package_sha256 != COMMON_SHA256:
+        raise RuntimeError(
+            f"Package at '{package_path}' must have a sha256 of '{COMMON_SHA256}' "
+            f"but was found to have '{package_sha256}'"
+        )
     staging_path, version = os.path.split(install_path)
     with StagingDirectory(staging_path) as staging_dir:
         output_folder = staging_dir.get_temp_folder_path()
@@ -151,4 +169,4 @@ def install_package(package_path, install_path):
     for exec_path in paths_list:
         if os.path.normcase(os.path.normpath(exec_path)) == target_path_np_nc:
             raise RuntimeError(f"packman will not install to executable path '{exec_path}'")
-    install_package(sys.argv[1], target_path_np)
+    install_common_module(sys.argv[1], target_path_np)
diff --git a/docs/tools/packman/packman b/docs/tools/packman/packman
index e342154a4b9..c14607865ea 100755
--- a/docs/tools/packman/packman
+++ b/docs/tools/packman/packman
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright 2019-2020 NVIDIA CORPORATION
+# Copyright 2019-2023 NVIDIA CORPORATION
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,11 +18,13 @@ set -eu
 
 if echo ${PM_VERBOSITY-} | grep -i "debug" > /dev/null ; then
 	set -x
+	PM_CURL_SILENT=""
+	PM_WGET_QUIET=""
 else
 	PM_CURL_SILENT="-s -S"
 	PM_WGET_QUIET="--quiet"
 fi
-PM_PACKMAN_VERSION=6.57
+export PM_PACKMAN_VERSION=7.24.4
 
 # This is necessary for newer macOS
 if [ `uname` == 'Darwin' ]; then
@@ -58,17 +60,49 @@ if [ ! -d "$PM_PACKAGES_ROOT" ]; then
 	mkdir -p -m a+rwx "$PM_PACKAGES_ROOT"
 fi
 
+execute_with_retry()
+{
+    # Don't exit on error, we need to handle them
+    set +e
+
+    local CMD="$1"
+    local MAX_TRIES=4
+    local DELAY=2
+    local TRIES=0
+    local exit_code
+
+    while [ $TRIES -lt $MAX_TRIES ]
+    do
+        ((TRIES++))
+        eval $CMD
+        exit_code=$?
+        if [ $exit_code -eq 0 ]; then
+            return 0
+        fi
+
+        if [ $TRIES -lt $MAX_TRIES ]; then
+            echo "Attempt $TRIES failed. Retrying in $DELAY seconds ..."
+            sleep $DELAY
+			DELAY=$((DELAY * DELAY))
+			echo "Retrying ..."
+        fi
+    done
+
+    echo "Command failed after $MAX_TRIES attempts: $CMD"
+    return $exit_code
+}
+
 fetch_file_from_s3()
 {
-	SOURCE=$1
-	SOURCE_URL=http://bootstrap.packman.nvidia.com/$SOURCE
-	TARGET=$2
+	local SOURCE=$1
+	local SOURCE_URL=http://bootstrap.packman.nvidia.com/$SOURCE
+	local TARGET=$2
 	echo "Fetching $SOURCE from bootstrap.packman.nvidia.com ..."
+	local CMD="curl -o $TARGET $SOURCE_URL $PM_CURL_SILENT"
 	if command -v wget >/dev/null 2>&1; then
-		wget $PM_WGET_QUIET -O$TARGET $SOURCE_URL
-	else
-		curl -o $TARGET $SOURCE_URL $PM_CURL_SILENT
+		CMD="wget $PM_WGET_QUIET -O$TARGET $SOURCE_URL"
 	fi
+	execute_with_retry "$CMD"
 }
 
 generate_temp_file_name()
@@ -85,7 +119,7 @@ install_python()
 {
 	PLATFORM=`uname`
 	PROCESSOR=`uname -m`
-	PYTHON_VERSION=3.7.13-nv1
+	PYTHON_VERSION=3.10.5-1
 
 	if [ $PLATFORM == 'Darwin' ]; then
 		PYTHON_PACKAGE=$PYTHON_VERSION-macos-x86_64
@@ -154,20 +188,6 @@ if [ ! -f "$PM_MODULE" ]; then
 	fi
 fi
 
-# Ensure 7za package exists:
-PM_7za_VERSION=22.01-1
-export PM_7za_PATH="$PM_PACKAGES_ROOT/7za/$PM_7za_VERSION"
-if [ ! -d "$PM_7za_PATH" ]; then
-    export PM_7za_PATH="$PM_PACKAGES_ROOT/chk/7za/$PM_7za_VERSION"
-    if [ ! -d "$PM_7za_PATH" ]; then
-        "$PM_PYTHON" -S -s -u -E "$PM_MODULE" pull "$PM_MODULE_DIR/deps.packman.xml"
-        if [ "$?" -ne 0 ]; then
-           echo "Failure while installing required 7za package"
-           exit 1
-        fi
-    fi
-fi
-
 # Generate temporary file name for environment variables:
 PM_VAR_PATH=`mktemp -u -t tmp.$$.pmvars.XXXXXX`
 
diff --git a/docs/tools/packman/packman.cmd b/docs/tools/packman/packman.cmd
index f555331fef4..41bc55507a9 100755
--- a/docs/tools/packman/packman.cmd
+++ b/docs/tools/packman/packman.cmd
@@ -1,11 +1,12 @@
 :: RUN_PM_MODULE must always be at the same spot for packman update to work (batch reloads file during update!)
-:: [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]
+:: [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]
 :: Reset errorlevel status (don't inherit from caller)
 @call :ECHO_AND_RESET_ERROR
-:: You can remove the call below if you do your own manual configuration of the dev machines
-call "%~dp0\bootstrap\configure.bat"
 
+:: You can remove this section if you do your own manual configuration of the dev machines
+call :CONFIGURE
 if %errorlevel% neq 0 ( exit /b %errorlevel% )
+
 :: Everything below is mandatory
 if not defined PM_PYTHON goto :PYTHON_ENV_ERROR
 if not defined PM_MODULE goto :MODULE_ENV_ERROR
@@ -46,6 +47,7 @@ exit /b 1
 @echo Error while processing and setting environment variables!
 exit /b 1
 
+:: pad [xxxx]
 :ECHO_AND_RESET_ERROR
 @echo off
 if /I "%PM_VERBOSITY%"=="debug" (
@@ -58,3 +60,30 @@ exit /b 0
 for /f "delims=" %%a in ('%PM_PYTHON% -S -s -u -E -c "import tempfile;file = tempfile.NamedTemporaryFile(mode='w+t', delete=False);print(file.name)"') do (set PM_VAR_PATH=%%a)
 set PM_VAR_PATH_ARG=--var-path="%PM_VAR_PATH%"
 goto :RUN_PM_MODULE
+
+:CONFIGURE
+:: Must capture and set code page to work around issue #279, powershell invocation mutates console font
+:: This issue only happens in Windows CMD shell when using 65001 code page. Some Git Bash implementations
+:: don't support chcp so this workaround is a bit convoluted.
+:: Test for chcp:
+chcp > nul 2>&1
+if %errorlevel% equ 0 (
+	for /f "tokens=2 delims=:" %%a in ('chcp') do (set PM_OLD_CODE_PAGE=%%a)
+) else (
+	call :ECHO_AND_RESET_ERROR
+)
+:: trim leading space (this is safe even when PM_OLD_CODE_PAGE has not been set)
+set PM_OLD_CODE_PAGE=%PM_OLD_CODE_PAGE:~1%
+if "%PM_OLD_CODE_PAGE%" equ "65001" (
+	chcp 437 > nul
+	set PM_RESTORE_CODE_PAGE=1
+)
+call "%~dp0\bootstrap\configure.bat"
+set PM_CONFIG_ERRORLEVEL=%errorlevel%
+if defined PM_RESTORE_CODE_PAGE (
+	:: Restore code page
+	chcp %PM_OLD_CODE_PAGE% > nul
+)
+set PM_OLD_CODE_PAGE=
+set PM_RESTORE_CODE_PAGE=
+exit /b %PM_CONFIG_ERRORLEVEL%
diff --git a/docs/tools/packman/packmanconf.py b/docs/tools/packman/packmanconf.py
index f1bcd2feaf9..79d0ae39c85 100644
--- a/docs/tools/packman/packmanconf.py
+++ b/docs/tools/packman/packmanconf.py
@@ -1,4 +1,18 @@
-# Use this file to bootstrap packman into your Python environment (3.7.x). Simply
+# Copyright 2021-2024 NVIDIA CORPORATION
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use this file to bootstrap packman into your Python environment. Simply
 # add the path by doing sys.insert to where packmanconf.py is located and then execute:
 #
 # >>> import packmanconf
@@ -32,11 +46,16 @@ def init():
         >>> import packmanapi
         >>> packmanapi.set_verbosity_level(packmanapi.VERBOSITY_HIGH)
     """
-    major = sys.version_info[0]
-    minor = sys.version_info[1]
-    if major != 3 or minor != 7:
+    major = sys.version_info.major
+    minor = sys.version_info.minor
+    patch = sys.version_info.micro
+    if major == 3 and (minor == 10 or (minor == 11 and patch <= 2)):
+        # we are good
+        pass
+    else:
         raise RuntimeError(
-            f"This version of packman requires Python 3.7.x, but {major}.{minor} was provided"
+            f"This version of packman requires Python 3.10.0 up to 3.11.2, "
+            f"but {major}.{minor}.{patch} was provided"
         )
     conf_dir = os.path.dirname(os.path.abspath(__file__))
     os.environ["PM_INSTALL_PATH"] = conf_dir
@@ -56,7 +75,7 @@ def get_packages_root(conf_dir: str) -> str:
         elif platform_name == "Darwin":
             # macOS
             root = os.path.join(
-                os.path.expanduser("~"), "/Library/Application Support/packman-cache"
+                os.path.expanduser("~"), "Library/Application Support/packman-cache"
             )
         elif platform_name == "Linux":
             try:
@@ -79,7 +98,13 @@ def get_module_dir(conf_dir, packages_root: str, version: str) -> str:
         tf = tempfile.NamedTemporaryFile(delete=False)
         target_name = tf.name
         tf.close()
-        url = f"http://bootstrap.packman.nvidia.com/packman-common@{version}.zip"
+        # Using http here and not https is by design. Unfortunately SSL keeps getting revised
+        # which breaks old clients when servers are forced to upgrade to newer version of TLS
+        # and refuse to downgrade when asked. Instead of relying on SSL for transport security
+        # packman does SHA256 verification of the downloaded package in the `install_package`
+        # method. We therefore inform SonarQube to stop complaining about the line below.
+        # See issue #367 for more detail.
+        url = f"http://bootstrap.packman.nvidia.com/packman-common@{version}.zip"  # NOSONAR
         print(f"Downloading '{url}' ...")
         import urllib.request
 
@@ -90,7 +115,7 @@ def get_module_dir(conf_dir, packages_root: str, version: str) -> str:
         script_path = os.path.join(conf_dir, "bootstrap", "install_package.py")
         ip = SourceFileLoader("install_package", script_path).load_module()
         print("Unpacking ...")
-        ip.install_package(target_name, module_dir)
+        ip.install_common_module(target_name, module_dir)
         os.unlink(tf.name)
     return module_dir
 
@@ -101,7 +126,7 @@ def get_version(conf_dir: str):
         path += ".sh"
     with open(path, "rt", encoding="utf8") as launch_file:
         for line in launch_file.readlines():
-            if line.startswith("PM_PACKMAN_VERSION"):
+            if "PM_PACKMAN_VERSION" in line:
                 _, value = line.split("=")
                 return value.strip()
     raise RuntimeError(f"Unable to find 'PM_PACKMAN_VERSION' in '{path}'")
diff --git a/docs/tools/packman/python.bat b/docs/tools/packman/python.bat
index e5490a98b26..e60b7a6e103 100755
--- a/docs/tools/packman/python.bat
+++ b/docs/tools/packman/python.bat
@@ -13,9 +13,20 @@
 :: limitations under the License.
 
 @echo off
-setlocal
+setlocal enableextensions
 
 call "%~dp0\packman" init
 set "PYTHONPATH=%PM_MODULE_DIR%;%PYTHONPATH%"
-set PYTHONNOUSERSITE=1
-"%PM_PYTHON%" -u %*
+
+if not defined PYTHONNOUSERSITE (
+    set PYTHONNOUSERSITE=1
+)
+
+REM For performance, default to unbuffered; however, allow overriding via
+REM PYTHONUNBUFFERED=0 since PYTHONUNBUFFERED on windows can truncate output
+REM when printing long strings
+if not defined PYTHONUNBUFFERED (
+    set PYTHONUNBUFFERED=1
+)
+
+"%PM_PYTHON%" %*
diff --git a/docs/tools/packman/python.sh b/docs/tools/packman/python.sh
index 37c9f1b8922..74328bf0d48 100755
--- a/docs/tools/packman/python.sh
+++ b/docs/tools/packman/python.sh
@@ -22,11 +22,21 @@ if [ ! -f "$PACKMAN_CMD" ]; then
 fi
 source "$PACKMAN_CMD" init
 export PYTHONPATH="${PM_MODULE_DIR}:${PYTHONPATH}"
-export PYTHONNOUSERSITE=1
+
+if [ -z "${PYTHONNOUSERSITE:-}" ]; then
+    export PYTHONNOUSERSITE=1
+fi
+
+# For performance, default to unbuffered; however, allow overriding via
+# PYTHONUNBUFFERED=0 since PYTHONUNBUFFERED on windows can truncate output
+# when printing long strings
+if [ -z "${PYTHONUNBUFFERED:-}" ]; then
+    export PYTHONUNBUFFERED=1
+fi
 
 # workaround for our python not shipping with certs
 if [[ -z ${SSL_CERT_DIR:-} ]]; then
     export SSL_CERT_DIR=/etc/ssl/certs/
 fi
 
-"${PM_PYTHON}" -u "$@"
+"${PM_PYTHON}" "$@"
diff --git a/lib/cmake/cccl/cccl-config-version.cmake b/lib/cmake/cccl/cccl-config-version.cmake
index e19e23cc7f1..7dfcf6a1e77 100644
--- a/lib/cmake/cccl/cccl-config-version.cmake
+++ b/lib/cmake/cccl/cccl-config-version.cmake
@@ -1,5 +1,5 @@
-set(CCCL_VERSION_MAJOR 2)
-set(CCCL_VERSION_MINOR 8)
+set(CCCL_VERSION_MAJOR 3)
+set(CCCL_VERSION_MINOR 0)
 set(CCCL_VERSION_PATCH 0)
 set(CCCL_VERSION_TWEAK 0)
 
diff --git a/lib/cmake/cub/cub-config-version.cmake b/lib/cmake/cub/cub-config-version.cmake
index 684d81af37f..09a95a9ff4d 100644
--- a/lib/cmake/cub/cub-config-version.cmake
+++ b/lib/cmake/cub/cub-config-version.cmake
@@ -1,8 +1,8 @@
 # Parse version information from version.cuh:
 include("${CMAKE_CURRENT_LIST_DIR}/cub-header-search.cmake")
 
-set(CUB_VERSION_MAJOR 2)
-set(CUB_VERSION_MINOR 8)
+set(CUB_VERSION_MAJOR 3)
+set(CUB_VERSION_MINOR 0)
 set(CUB_VERSION_PATCH 0)
 set(CUB_VERSION_TWEAK 0)
 set(CUB_VERSION "${CUB_VERSION_MAJOR}.${CUB_VERSION_MINOR}.${CUB_VERSION_PATCH}.${CUB_VERSION_TWEAK}")
diff --git a/lib/cmake/cudax/cudax-config-version.cmake b/lib/cmake/cudax/cudax-config-version.cmake
index 18de91cf056..90e927263e6 100644
--- a/lib/cmake/cudax/cudax-config-version.cmake
+++ b/lib/cmake/cudax/cudax-config-version.cmake
@@ -1,5 +1,5 @@
-set(cudax_VERSION_MAJOR 2)
-set(cudax_VERSION_MINOR 8)
+set(cudax_VERSION_MAJOR 3)
+set(cudax_VERSION_MINOR 0)
 set(cudax_VERSION_PATCH 0)
 set(cudax_VERSION_TWEAK 0)
 
diff --git a/lib/cmake/libcudacxx/libcudacxx-config-version.cmake b/lib/cmake/libcudacxx/libcudacxx-config-version.cmake
index 09af8c9cdd9..e9f615a4f7e 100644
--- a/lib/cmake/libcudacxx/libcudacxx-config-version.cmake
+++ b/lib/cmake/libcudacxx/libcudacxx-config-version.cmake
@@ -1,8 +1,8 @@
 # Parse version information from version header:
 include("${CMAKE_CURRENT_LIST_DIR}/libcudacxx-header-search.cmake")
 
-set(libcudacxx_VERSION_MAJOR 2)
-set(libcudacxx_VERSION_MINOR 8)
+set(libcudacxx_VERSION_MAJOR 3)
+set(libcudacxx_VERSION_MINOR 0)
 set(libcudacxx_VERSION_PATCH 0)
 set(libcudacxx_VERSION_TWEAK 0)
 
diff --git a/lib/cmake/thrust/thrust-config-version.cmake b/lib/cmake/thrust/thrust-config-version.cmake
index 5e206f1c78a..c277931a68a 100644
--- a/lib/cmake/thrust/thrust-config-version.cmake
+++ b/lib/cmake/thrust/thrust-config-version.cmake
@@ -1,8 +1,8 @@
 # Parse version information from version.h:
 include("${CMAKE_CURRENT_LIST_DIR}/thrust-header-search.cmake")
 
-set(THRUST_VERSION_MAJOR 2)
-set(THRUST_VERSION_MINOR 8)
+set(THRUST_VERSION_MAJOR 3)
+set(THRUST_VERSION_MINOR 0)
 set(THRUST_VERSION_PATCH 0) # Thrust: "subminor" CMake: "patch"
 set(THRUST_VERSION_TWEAK 0)
 set(THRUST_VERSION "${THRUST_VERSION_MAJOR}.${THRUST_VERSION_MINOR}.${THRUST_VERSION_PATCH}.${THRUST_VERSION_TWEAK}")
diff --git a/lib/cmake/thrust/thrust-config.cmake b/lib/cmake/thrust/thrust-config.cmake
index e13bb8c5d32..fc7db1d86ba 100644
--- a/lib/cmake/thrust/thrust-config.cmake
+++ b/lib/cmake/thrust/thrust-config.cmake
@@ -237,36 +237,10 @@ function(thrust_create_target target_name)
     target_compile_definitions(${target_name} INTERFACE "THRUST_FORCE_64_BIT_OFFSET_TYPE")
   endif()
 
-  # This would be nice to enforce, but breaks when using old cmake + new
-  # compiler, since cmake doesn't know what features the new compiler version
-  # supports.
-  # Leaving this here as a reminder not to add it back. Just let the
-  # compile-time checks in thrust/detail/config/cpp_dialect.h handle it.
-  #
-  #  if (NOT TCT_IGNORE_DEPRECATED_CPP_DIALECT)
-  #    if (TCT_IGNORE_DEPRECATED_CPP_11)
-  #      target_compile_features(${target_name} INTERFACE cxx_std_11)
-  #    else()
-  #      target_compile_features(${target_name} INTERFACE cxx_std_14)
-  #    endif()
-  #  endif()
-
-  if (TCT_IGNORE_DEPRECATED_CPP_DIALECT OR CCCL_IGNORE_DEPRECATED_CPP_DIALECT)
-    target_compile_definitions(${target_name} INTERFACE "CCCL_IGNORE_DEPRECATED_CPP_DIALECT")
-  endif()
-
   if (TCT_IGNORE_DEPRECATED_API OR CCCL_IGNORE_DEPRECATED_API)
     target_compile_definitions(${target_name} INTERFACE "CCCL_IGNORE_DEPRECATED_API")
   endif()
 
-  if (TCT_IGNORE_DEPRECATED_CPP_11 OR CCCL_IGNORE_DEPRECATED_CPP_11)
-    target_compile_definitions(${target_name} INTERFACE "CCCL_IGNORE_DEPRECATED_CPP_11")
-  endif()
-
-  if (TCT_IGNORE_DEPRECATED_CPP_14 OR CCCL_IGNORE_DEPRECATED_CPP_14)
-    target_compile_definitions(${target_name} INTERFACE "CCCL_IGNORE_DEPRECATED_CPP_14")
-  endif()
-
   if (TCT_IGNORE_DEPRECATED_COMPILER OR CCCL_IGNORE_DEPRECATED_COMPILER)
     target_compile_definitions(${target_name} INTERFACE "CCCL_IGNORE_DEPRECATED_COMPILER")
   endif()
diff --git a/libcudacxx/cmake/DetermineGCCCompatible.cmake b/libcudacxx/cmake/DetermineGCCCompatible.cmake
index 1369ebe9d0e..1bf15fcba72 100644
--- a/libcudacxx/cmake/DetermineGCCCompatible.cmake
+++ b/libcudacxx/cmake/DetermineGCCCompatible.cmake
@@ -7,7 +7,5 @@ if(NOT DEFINED LLVM_COMPILER_IS_GCC_COMPATIBLE)
     set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF)
   elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
     set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
-  elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
-    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
   endif()
 endif()
diff --git a/libcudacxx/codegen/add_ptx_instruction.py b/libcudacxx/codegen/add_ptx_instruction.py
new file mode 100644
index 00000000000..811c9cdb021
--- /dev/null
+++ b/libcudacxx/codegen/add_ptx_instruction.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+import argparse
+from pathlib import Path
+
+docs = Path("docs/libcudacxx/ptx/instructions")
+test = Path("libcudacxx/test/libcudacxx/cuda/ptx")
+src = Path("libcudacxx/include/cuda/__ptx/instructions")
+ptx_header = Path("libcudacxx/include/cuda/ptx")
+
+
+def add_docs(ptx_instr, url):
+    cpp_instr = ptx_instr.replace(".", "_")
+    underbar = "=" * len(ptx_instr)
+
+    (docs / f"{cpp_instr}.rst").write_text(
+        f""".. _libcudacxx-ptx-instructions-{ptx_instr.replace(".", "-")}:
+
+{ptx_instr}
+{underbar}
+
+-  PTX ISA:
+   `{ptx_instr} <{url}>`__
+
+.. include:: generated/{cpp_instr}.rst
+"""
+    )
+
+
+def add_test(ptx_instr):
+    cpp_instr = ptx_instr.replace(".", "_")
+    (test / f"ptx.{ptx_instr}.compile.pass.cpp").write_text(
+        f"""//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: libcpp-has-no-threads
+
+// <cuda/ptx>
+
+#include <cuda/ptx>
+#include <cuda/std/utility>
+
+#include "generated/{cpp_instr}.h"
+
+int main(int, char**)
+{{
+  return 0;
+}}
+"""
+    )
+
+
+def add_src(ptx_instr):
+    cpp_instr = ptx_instr.replace(".", "_")
+    (src / f"{cpp_instr}.h").write_text(
+        f"""// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_PTX_{cpp_instr.upper()}_H_
+#define _CUDA_PTX_{cpp_instr.upper()}_H_
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/__ptx/ptx_dot_variants.h>
+#include <cuda/__ptx/ptx_helper_functions.h>
+#include <cuda/std/cstdint>
+
+#include <nv/target> // __CUDA_MINIMUM_ARCH__ and friends
+
+_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX
+
+#include <cuda/__ptx/instructions/generated/{cpp_instr}.h>
+
+_LIBCUDACXX_END_NAMESPACE_CUDA_PTX
+
+#endif // _CUDA_PTX_{cpp_instr.upper()}_H_
+"""
+    )
+
+
+def add_include_reminder(ptx_instr):
+    cpp_instr = ptx_instr.replace(".", "_")
+    txt = ptx_header.read_text()
+    reminder = f"""// TODO: #include <cuda/__ptx/instructions/{cpp_instr}.h>"""
+    ptx_header.write_text(f"{txt}\n{reminder}\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("ptx_instruction", type=str)
+    parser.add_argument("url", type=str)
+
+    args = parser.parse_args()
+
+    ptx_instr = args.ptx_instruction
+    url = args.url
+
+    # Enable using internal urls in the command-line, to be automatically converted to public URLs.
+    if url.startswith("index.html"):
+        url = url.replace(
+            "index.html",
+            "https://docs.nvidia.com/cuda/parallel-thread-execution/index.html",
+        )
+
+    add_test(ptx_instr)
+    add_docs(ptx_instr, url)
+    add_src(ptx_instr)
+    add_include_reminder(ptx_instr)
diff --git a/libcudacxx/include/cuda/__barrier/barrier_block_scope.h b/libcudacxx/include/cuda/__barrier/barrier_block_scope.h
index 13027dfc581..af38ab97c8b 100644
--- a/libcudacxx/include/cuda/__barrier/barrier_block_scope.h
+++ b/libcudacxx/include/cuda/__barrier/barrier_block_scope.h
@@ -157,8 +157,7 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
         int __inc              = __popc(__active) * __update;
 
         unsigned __laneid;
-        asm("mov.u32 %0, %%laneid;"
-            : "=r"(__laneid));
+        asm("mov.u32 %0, %%laneid;" : "=r"(__laneid));
         int __leader = __ffs(__active) - 1;
         // All threads in mask synchronize here, establishing cummulativity to the __leader:
         __syncwarp(__mask);
@@ -181,10 +180,8 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
                     ".reg .pred p;\n\t"
                     "mbarrier.test_wait.shared.b64 p, [%1], %2;\n\t"
                     "selp.b32 %0, 1, 0, p;\n\t"
-                    "}"
-                    : "=r"(__ready)
-                    : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))), "l"(__token)
-                    : "memory");))
+                    "}" : "=r"(__ready) : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                    "l"(__token) : "memory");))
     return __ready;
   }
 
@@ -203,10 +200,9 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
                        ".reg .pred p;\n\t"
                        "mbarrier.try_wait.shared.b64 p, [%1], %2;\n\t"
                        "selp.b32 %0, 1, 0, p;\n\t"
-                       "}"
-                       : "=r"(__ready)
-                       : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))), "l"(__token)
-                       : "memory");
+                       "}" : "=r"(__ready) : "r"(
+                         static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                       "l"(__token) : "memory");
         return __ready;),
       NV_PROVIDES_SM_80,
       (if (!__isShared(&__barrier)) {
@@ -278,15 +274,12 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
     uint16_t __ready = 0;
     NV_DISPATCH_TARGET(
       NV_PROVIDES_SM_80,
-      (asm volatile(
-         "{"
-         ".reg .pred %%p;"
-         "mbarrier.test_wait.parity.shared.b64 %%p, [%1], %2;"
-         "selp.u16 %0, 1, 0, %%p;"
-         "}"
-         : "=h"(__ready)
-         : "r"(static_cast<uint32_t>(__cvta_generic_to_shared(&__barrier))), "r"(static_cast<uint32_t>(__phase_parity))
-         : "memory");))
+      (asm volatile("{"
+                    ".reg .pred %%p;"
+                    "mbarrier.test_wait.parity.shared.b64 %%p, [%1], %2;"
+                    "selp.u16 %0, 1, 0, %%p;"
+                    "}" : "=h"(__ready) : "r"(static_cast<uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                    "r"(static_cast<uint32_t>(__phase_parity)) : "memory");))
     return __ready;
   }
 
@@ -299,16 +292,12 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
           return _CUDA_VSTD::__call_try_wait_parity(__barrier, __phase_parity);
         } else if (!__isShared(&__barrier)) { __trap(); } int32_t __ready = 0;
 
-        asm volatile(
-          "{\n\t"
-          ".reg .pred p;\n\t"
-          "mbarrier.try_wait.parity.shared.b64 p, [%1], %2;\n\t"
-          "selp.b32 %0, 1, 0, p;\n\t"
-          "}"
-          : "=r"(__ready)
-          : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
-            "r"(static_cast<_CUDA_VSTD::uint32_t>(__phase_parity))
-          :);
+        asm volatile("{\n\t"
+                     ".reg .pred p;\n\t"
+                     "mbarrier.try_wait.parity.shared.b64 p, [%1], %2;\n\t"
+                     "selp.b32 %0, 1, 0, p;\n\t"
+                     "}" : "=r"(__ready) : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                     "r"(static_cast<_CUDA_VSTD::uint32_t>(__phase_parity)) :);
 
         return __ready;),
       NV_PROVIDES_SM_80,
@@ -402,9 +391,8 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
           __trap();
         }
 
-        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(static_cast<_CUDA_VSTD::uint32_t>(
-          __cvta_generic_to_shared(&__barrier)))
-                     : "memory");),
+        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(
+          static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))) : "memory");),
       NV_PROVIDES_SM_80,
       (
         // Fallback to slowpath on device
@@ -413,9 +401,8 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
           return;
         }
 
-        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(static_cast<_CUDA_VSTD::uint32_t>(
-          __cvta_generic_to_shared(&__barrier)))
-                     : "memory");),
+        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(
+          static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))) : "memory");),
       NV_ANY_TARGET,
       (
         // Fallback to slowpath on device
diff --git a/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h b/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h
index 4d9f063512f..1b8fc49d400 100644
--- a/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h
+++ b/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h
@@ -54,11 +54,8 @@ barrier_expect_tx(barrier<thread_scope_block>& __b, _CUDA_VSTD::ptrdiff_t __tran
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (auto __bh = __cvta_generic_to_shared(barrier_native_handle(__b));
-     asm("mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;"
-         :
-         : "r"(static_cast<_CUDA_VSTD::uint32_t>(__bh)),
-           "r"(static_cast<_CUDA_VSTD::uint32_t>(__transaction_count_update))
-         : "memory");),
+     asm("mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" : : "r"(static_cast<_CUDA_VSTD::uint32_t>(__bh)),
+         "r"(static_cast<_CUDA_VSTD::uint32_t>(__transaction_count_update)) : "memory");),
     (__cuda_ptx_barrier_expect_tx_is_not_supported_before_SM_90__();));
 }
 
diff --git a/libcudacxx/include/cuda/__cccl_config b/libcudacxx/include/cuda/__cccl_config
index 10f5f49456a..fc28d63ace1 100644
--- a/libcudacxx/include/cuda/__cccl_config
+++ b/libcudacxx/include/cuda/__cccl_config
@@ -11,6 +11,7 @@
 #ifndef _CUDA__CCCL_CONFIG
 #define _CUDA__CCCL_CONFIG
 
+#include <cuda/std/__cccl/architecture.h> // IWYU pragma: export
 #include <cuda/std/__cccl/assert.h> // IWYU pragma: export
 #include <cuda/std/__cccl/attributes.h> // IWYU pragma: export
 #include <cuda/std/__cccl/builtin.h> // IWYU pragma: export
@@ -21,6 +22,7 @@
 #include <cuda/std/__cccl/exceptions.h> // IWYU pragma: export
 #include <cuda/std/__cccl/execution_space.h> // IWYU pragma: export
 #include <cuda/std/__cccl/extended_floating_point.h> // IWYU pragma: export
+#include <cuda/std/__cccl/os.h> // IWYU pragma: export
 #include <cuda/std/__cccl/preprocessor.h> // IWYU pragma: export
 #include <cuda/std/__cccl/ptx_isa.h> // IWYU pragma: export
 #include <cuda/std/__cccl/rtti.h> // IWYU pragma: export
diff --git a/libcudacxx/include/cuda/__functional/address_stability.h b/libcudacxx/include/cuda/__functional/address_stability.h
index d3c88b8bfa4..3402c3cea0e 100644
--- a/libcudacxx/include/cuda/__functional/address_stability.h
+++ b/libcudacxx/include/cuda/__functional/address_stability.h
@@ -68,8 +68,8 @@ struct proclaims_copyable_arguments<__callable_permitting_copied_arguments<F>> :
 //! implementation.
 //! @see proclaims_copyable_arguments
 template <typename F>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-proclaim_copyable_arguments(F&& f) -> __callable_permitting_copied_arguments<::cuda::std::decay_t<F>>
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto proclaim_copyable_arguments(F&& f)
+  -> __callable_permitting_copied_arguments<::cuda::std::decay_t<F>>
 {
   return {::cuda::std::forward<F>(f)};
 }
diff --git a/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h b/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h
index 867dad16111..07eb5c84c93 100644
--- a/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h
+++ b/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h
@@ -47,12 +47,10 @@ inline __device__ void __cp_async_shared_global(char* __dest, const char* __src)
 
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (asm volatile("cp.async.ca.shared.global [%0], [%1], %2, %2;"
-                  :
-                  : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
-                    "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
-                    "n"(_Copy_size)
-                  : "memory");),
+    (asm volatile("cp.async.ca.shared.global [%0], [%1], %2, %2;" : : "r"(
+                    static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
+                  "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
+                  "n"(_Copy_size) : "memory");),
     (__cuda_ptx_cp_async_shared_global_is_not_supported_before_SM_80__();));
 }
 
@@ -63,12 +61,10 @@ inline __device__ void __cp_async_shared_global<16>(char* __dest, const char* __
   // When copying 16 bytes, it is possible to skip L1 cache (.cg).
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (asm volatile("cp.async.cg.shared.global [%0], [%1], %2, %2;"
-                  :
-                  : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
-                    "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
-                    "n"(16)
-                  : "memory");),
+    (asm volatile("cp.async.cg.shared.global [%0], [%1], %2, %2;" : : "r"(
+                    static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
+                  "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
+                  "n"(16) : "memory");),
     (__cuda_ptx_cp_async_shared_global_is_not_supported_before_SM_80__();));
 }
 
diff --git a/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h b/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h
index 182357d591a..3dd3e91d125 100644
--- a/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h
+++ b/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h
@@ -76,9 +76,8 @@ struct __memcpy_completion_impl
             // have completed writing to shared memory.
             _CUDA_VSTD::uint64_t* __bh = __try_get_barrier_handle(__barrier);
 
-            asm volatile("cp.async.mbarrier.arrive.shared.b64 [%0];" ::"r"(static_cast<_CUDA_VSTD::uint32_t>(
-              __cvta_generic_to_shared(__bh)))
-                         : "memory");));
+            asm volatile("cp.async.mbarrier.arrive.shared.b64 [%0];" ::"r"(
+              static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__bh))) : "memory");));
         return async_contract_fulfillment::async;
       case __completion_mechanism::__async_bulk_group:
         // This completion mechanism should not be used with a shared
@@ -124,8 +123,7 @@ struct __memcpy_completion_impl
                      (
                        // Blocking: wait for all thread-local cp.async instructions to have
                        // completed writing to shared memory.
-                       asm volatile("cp.async.wait_all;" ::
-                                      : "memory");));
+                       asm volatile("cp.async.wait_all;" :: : "memory");));
         return async_contract_fulfillment::async;
       case __completion_mechanism::__mbarrier_complete_tx:
         // Non-smem barriers do not have an mbarrier_complete_tx mechanism..
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h b/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h
index 10d55714c5b..c8ce41c0a20 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h
@@ -16,10 +16,7 @@ _CCCL_DEVICE static inline void barrier_cluster_arrive()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.arrive;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.arrive;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_arrive_is_not_supported_before_SM_90__();));
@@ -39,10 +36,7 @@ _CCCL_DEVICE static inline void barrier_cluster_wait()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.wait;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.wait;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_wait_is_not_supported_before_SM_90__();));
@@ -65,10 +59,7 @@ _CCCL_DEVICE static inline void barrier_cluster_arrive(sem_release_t)
   // __sem == sem_release (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.arrive.release;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.arrive.release;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_arrive_is_not_supported_before_SM_90__();));
@@ -91,10 +82,7 @@ _CCCL_DEVICE static inline void barrier_cluster_arrive(sem_relaxed_t)
   // __sem == sem_relaxed (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.arrive.relaxed;"
-                  :
-                  :
-                  :);),
+    (asm volatile("barrier.cluster.arrive.relaxed;" : : :);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_arrive_is_not_supported_before_SM_90__();));
@@ -117,10 +105,7 @@ _CCCL_DEVICE static inline void barrier_cluster_wait(sem_acquire_t)
   // __sem == sem_acquire (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.wait.acquire;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.wait.acquire;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_wait_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h
index 8ba40d45f64..d2196402e7a 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h
@@ -32,10 +32,11 @@ _CCCL_DEVICE static inline void cp_async_bulk(
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // 1a. unicast"
-         :
-         : "r"(__as_ptr_smem(__dstMem)), "l"(__as_ptr_gmem(__srcMem)), "r"(__size), "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+    (asm("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // "
+         "1a. unicast" : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__as_ptr_gmem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90__();));
@@ -70,13 +71,11 @@ _CCCL_DEVICE static inline void cp_async_bulk(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // 2. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+    (asm("cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // 2. " : : "r"(
+           __as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90__();));
@@ -105,10 +104,9 @@ cp_async_bulk(space_global_t, space_shared_t, void* __dstMem, const void* __srcM
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.global.shared::cta.bulk_group [%0], [%1], %2; // 3. "
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.async.bulk.global.shared::cta.bulk_group [%0], [%1], %2; // 3. " : : "l"(__as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h
index 7bb58675ddb..3c32743e977 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h
@@ -15,10 +15,7 @@ _CCCL_DEVICE static inline void cp_async_bulk_commit_group()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("cp.async.bulk.commit_group;"
-                  :
-                  :
-                  :);),
+    (asm volatile("cp.async.bulk.commit_group;" : : :);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_commit_group_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h
index a5534ef0b48..f54bf8bbdeb 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h
@@ -35,14 +35,11 @@ _CCCL_DEVICE static inline void cp_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.multicast::cluster [%0], [%1], %2, [%3], "
-         "%4; // 1. "
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__as_ptr_gmem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "%4; // 1. " : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__as_ptr_gmem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90a__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h
index 3cbd26fda04..f7c60bb72f6 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h
@@ -33,10 +33,10 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2}], [%3];// "
-         "1a."
-         :
-         : "r"(__as_ptr_smem(__dstMem)), "l"(__tensorMap), "r"(__tensorCoords[0]), "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "1a." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -69,10 +69,9 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%0, {%1}], [%2]; // 3a."
-         :
-         : "l"(__tensorMap), "r"(__tensorCoords[0]), "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%0, {%1}], [%2]; // 3a." : : "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -109,14 +108,11 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3}], "
-         "[%4];// 1b."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "[%4];// 1b." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -149,10 +145,10 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%0, {%1, %2}], [%3]; // 3b."
-         :
-         : "l"(__tensorMap), "r"(__tensorCoords[0]), "r"(__tensorCoords[1]), "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%0, {%1, %2}], [%3]; // 3b." : : "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -189,15 +185,12 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3, %4}], "
-         "[%5];// 1c."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "[%5];// 1c." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -230,14 +223,12 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3}], [%4]; // 3c."
-         :
-         : "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3}], [%4]; // 3c." : : "l"(
+           __tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -274,16 +265,13 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3, %4, "
-         "%5}], [%6];// 1d."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "%5}], [%6];// 1d." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -316,15 +304,13 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4}], [%5]; // 3d."
-         :
-         : "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4}], [%5]; // 3d." : : "l"(
+           __tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -361,17 +347,14 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3, %4, %5, "
-         "%6}], [%7];// 1e."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__tensorCoords[4]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "%6}], [%7];// 1e." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__tensorCoords[4]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -404,16 +387,14 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4, %5}], [%6]; // 3e."
-         :
-         : "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__tensorCoords[4]),
-           "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4, %5}], [%6]; // 3e." : : "l"(
+           __tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__tensorCoords[4]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h
index 915979d18f3..56c199d39ff 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h
@@ -35,14 +35,11 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2}], [%3], %4; // 2a."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2}], [%3], %4; // 2a." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -81,15 +78,12 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3}], [%4], %5; // 2b."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3}], [%4], %5; // 2b." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -128,16 +122,13 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3, %4}], [%5], %6; // 2c."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3, %4}], [%5], %6; // 2c." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -176,17 +167,14 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3, %4, %5}], [%6], %7; // 2d."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3, %4, %5}], [%6], %7; // 2d." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -225,18 +213,15 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3, %4, %5, %6}], [%7], %8; // 2e."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__tensorCoords[4]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3, %4, %5, %6}], [%7], %8; // 2e." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__tensorCoords[4]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h
index 2057323665a..85b1507f721 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h
@@ -16,10 +16,7 @@ _CCCL_DEVICE static inline void cp_async_bulk_wait_group(n32_t<_N32> __N)
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("cp.async.bulk.wait_group %0;"
-                  :
-                  : "n"(__N.value)
-                  : "memory");),
+    (asm volatile("cp.async.bulk.wait_group %0;" : : "n"(__N.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_wait_group_is_not_supported_before_SM_90__();));
@@ -39,10 +36,7 @@ _CCCL_DEVICE static inline void cp_async_bulk_wait_group_read(n32_t<_N32> __N)
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("cp.async.bulk.wait_group.read %0;"
-                  :
-                  : "n"(__N.value)
-                  : "memory");),
+    (asm volatile("cp.async.bulk.wait_group.read %0;" : : "n"(__N.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_wait_group_read_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h
index a35684c85e1..9b1bf35b290 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h
@@ -39,13 +39,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.and.b32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -88,13 +85,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.or.b32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -137,13 +131,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.xor.b32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -186,13 +177,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.min.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -235,13 +223,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.max.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -284,13 +269,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -333,13 +315,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.inc.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -382,13 +361,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.dec.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -431,13 +407,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.min.s32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -480,13 +453,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.max.s32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -529,13 +499,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.s32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -578,13 +545,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.u64 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -627,13 +591,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.u64 [%0], [%1], %2, [%3]; "
-         "// 2."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 2." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -807,10 +768,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -849,10 +810,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -891,10 +852,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -933,10 +894,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_inc (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.inc.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.inc.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -975,10 +936,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_dec (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.dec.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.dec.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1017,10 +978,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1059,10 +1020,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1101,10 +1062,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.s32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.s32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1143,10 +1104,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1185,10 +1146,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1227,10 +1188,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1269,10 +1230,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1311,10 +1272,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1348,10 +1309,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1385,10 +1346,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1427,10 +1388,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 6."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 6." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h
index 1e13bb5f4f2..da5cdb6bc9b 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h
@@ -35,10 +35,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.bf16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.bf16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -77,10 +77,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.bf16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.bf16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -119,10 +119,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.bf16  [%0], [%1], %2; // 5."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.bf16  [%0], [%1], %2; // 5." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h
index 0c4678c95bb..3d9d4520dcb 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h
@@ -30,10 +30,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.f16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.f16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -67,10 +67,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.f16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.f16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -104,10 +104,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.f16  [%0], [%1], %2; // 5."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.f16  [%0], [%1], %2; // 5." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h
index e185913b3cd..f8c4e6cf476 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h
@@ -21,10 +21,7 @@ _CCCL_DEVICE static inline void fence_mbarrier_init(sem_release_t, scope_cluster
   // __scope == scope_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("fence.mbarrier_init.release.cluster; // 3."
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("fence.mbarrier_init.release.cluster; // 3." : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_fence_mbarrier_init_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h
index 40229b84a96..cc413a0f511 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h
@@ -15,10 +15,7 @@ _CCCL_DEVICE static inline void fence_proxy_alias()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("fence.proxy.alias; // 4."
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("fence.proxy.alias; // 4." : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_fence_proxy_alias_is_not_supported_before_SM_70__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h
index f64b5faee5e..176d24ff73f 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h
@@ -15,10 +15,7 @@ _CCCL_DEVICE static inline void fence_proxy_async()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("fence.proxy.async; // 5."
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("fence.proxy.async; // 5." : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_fence_proxy_async_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h b/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h
index 08128cc00a1..da802adb9db 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h
@@ -135,11 +135,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nwarpid()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%nwarpid;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%nwarpid;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -264,11 +260,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nsmid()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%nsmid;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%nsmid;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -307,10 +299,7 @@ _CCCL_DEVICE static inline bool get_sreg_is_explicit_cluster()
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mov.pred P_OUT, %%is_explicit_cluster;\n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__sreg_value)
-         :
-         :);
+         "}" : "=r"(__sreg_value) : :);
      return static_cast<bool>(__sreg_value);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -330,11 +319,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clusterid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%clusterid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%clusterid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -354,11 +339,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clusterid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%clusterid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%clusterid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -378,11 +359,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clusterid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%clusterid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%clusterid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -402,11 +379,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nclusterid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%nclusterid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%nclusterid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -426,11 +399,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nclusterid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%nclusterid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%nclusterid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -450,11 +419,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nclusterid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%nclusterid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%nclusterid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -474,11 +439,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctaid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctaid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctaid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -498,11 +459,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctaid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctaid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctaid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -522,11 +479,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctaid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctaid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctaid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -546,11 +499,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctaid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctaid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctaid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -570,11 +519,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctaid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctaid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctaid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -594,11 +539,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctaid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctaid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctaid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -618,11 +559,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctarank()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctarank;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctarank;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -642,11 +579,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctarank()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctarank;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctarank;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -666,11 +599,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_eq()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_eq;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_eq;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -690,11 +619,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_le()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_le;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_le;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -714,11 +639,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_lt()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_lt;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_lt;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -738,11 +659,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_ge()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_ge;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_ge;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -762,11 +679,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_gt()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_gt;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_gt;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -801,11 +714,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clock_hi()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%clock_hi;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%clock_hi;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -825,11 +734,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t get_sreg_clock64()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile(
-       "mov.u64 %0, %%clock64;"
-       : "=l"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile("mov.u64 %0, %%clock64;" : "=l"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -849,11 +754,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t get_sreg_globaltimer()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile(
-       "mov.u64 %0, %%globaltimer;"
-       : "=l"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -873,11 +774,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_globaltimer_lo()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%globaltimer_lo;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%globaltimer_lo;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -897,11 +794,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_globaltimer_hi()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%globaltimer_hi;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%globaltimer_hi;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -921,11 +814,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_total_smem_size()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%total_smem_size;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%total_smem_size;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -945,11 +834,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_aggr_smem_size()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%aggr_smem_size;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%aggr_smem_size;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -969,11 +854,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_dynamic_smem_size()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%dynamic_smem_size;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%dynamic_smem_size;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -993,11 +874,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t get_sreg_current_graph_exec()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_50,
-    (_CUDA_VSTD::uint64_t __sreg_value;
-     asm("mov.u64 %0, %%current_graph_exec;"
-         : "=l"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint64_t __sreg_value; asm("mov.u64 %0, %%current_graph_exec;" : "=l"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h b/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h
index a769868f45c..22bb73180dc 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h
@@ -20,10 +20,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t getctarank(space_cluster_t, cons
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (_CUDA_VSTD::uint32_t __dest;
-     asm("getctarank.shared::cluster.u32 %0, %1;"
-         : "=r"(__dest)
-         : "r"(__as_ptr_smem(__addr))
-         :);
+     asm("getctarank.shared::cluster.u32 %0, %1;" : "=r"(__dest) : "r"(__as_ptr_smem(__addr)) :);
      return __dest;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h
index e1afe25d8c2..c7102ebfdb5 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h
@@ -16,11 +16,8 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t mbarrier_arrive(_CUDA_VSTD::uint
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (_CUDA_VSTD::uint64_t __state;
-     asm("mbarrier.arrive.shared.b64                                  %0,  [%1];           // 1. "
-         : "=l"(__state)
-         : "r"(__as_ptr_smem(__addr))
-         : "memory");
+    (_CUDA_VSTD::uint64_t __state; asm("mbarrier.arrive.shared.b64                                  %0,  [%1];         "
+                                       "  // 1. " : "=l"(__state) : "r"(__as_ptr_smem(__addr)) : "memory");
      return __state;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -44,10 +41,9 @@ mbarrier_arrive(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint32_t& __coun
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (_CUDA_VSTD::uint64_t __state;
-     asm("mbarrier.arrive.shared::cta.b64                             %0,  [%1], %2;    // 2. "
-         : "=l"(__state)
-         : "r"(__as_ptr_smem(__addr)), "r"(__count)
-         : "memory");
+     asm("mbarrier.arrive.shared::cta.b64                             %0,  [%1], %2;    "
+         "// 2. " : "=l"(__state) : "r"(__as_ptr_smem(__addr)),
+         "r"(__count) : "memory");
      return __state;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -165,10 +161,8 @@ mbarrier_arrive(sem_release_t, scope_cluster_t, space_cluster_t, _CUDA_VSTD::uin
   // __space == space_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0];                // 4a. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr))
-         : "memory");),
+    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0];               "
+         " // 4a. " : : "r"(__as_ptr_remote_dsmem(__addr)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_arrive_is_not_supported_before_SM_90__();));
@@ -199,10 +193,9 @@ _CCCL_DEVICE static inline void mbarrier_arrive(
   // __space == space_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0], %1;         // 4b. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr)), "r"(__count)
-         : "memory");),
+    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0], %1;         "
+         "// 4b. " : : "r"(__as_ptr_remote_dsmem(__addr)),
+         "r"(__count) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_arrive_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h
index 79301a57851..dc33b212e21 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h
@@ -73,10 +73,9 @@ _CCCL_DEVICE static inline void mbarrier_arrive_expect_tx(
   // __space == space_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64   _, [%0], %1; // 9. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr)), "r"(__tx_count)
-         : "memory");),
+    (asm("mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64   _, [%0], %1; // 9. " : : "r"(
+           __as_ptr_remote_dsmem(__addr)),
+         "r"(__tx_count) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_arrive_expect_tx_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h
index cbfb275baa4..45c444c5364 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h
@@ -19,10 +19,9 @@ mbarrier_arrive_no_complete(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
     (_CUDA_VSTD::uint64_t __state;
-     asm("mbarrier.arrive.noComplete.shared.b64                       %0,  [%1], %2;    // 5. "
-         : "=l"(__state)
-         : "r"(__as_ptr_smem(__addr)), "r"(__count)
-         : "memory");
+     asm("mbarrier.arrive.noComplete.shared.b64                       %0,  [%1], %2;    "
+         "// 5. " : "=l"(__state) : "r"(__as_ptr_smem(__addr)),
+         "r"(__count) : "memory");
      return __state;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h
index d1e5c57c97e..6b3041de0d2 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h
@@ -17,10 +17,7 @@ _CCCL_DEVICE static inline void mbarrier_init(_CUDA_VSTD::uint64_t* __addr, cons
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (asm("mbarrier.init.shared.b64 [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__addr)), "r"(__count)
-         : "memory");),
+    (asm("mbarrier.init.shared.b64 [%0], %1;" : : "r"(__as_ptr_smem(__addr)), "r"(__count) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_init_is_not_supported_before_SM_80__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
index f3dbb6ed1c3..9adc677c76d 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
@@ -21,10 +21,8 @@ _CCCL_DEVICE static inline bool mbarrier_test_wait(_CUDA_VSTD::uint64_t* __addr,
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.test_wait.shared.b64 P_OUT, [%1], %2;                                                  // 1. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "l"(__state)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "l"(__state) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
index b975434b2de..1166b336d2d 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
@@ -22,10 +22,8 @@ mbarrier_test_wait_parity(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint32
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.test_wait.parity.shared.b64 P_OUT, [%1], %2;                                     // 3. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "r"(__phaseParity)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "r"(__phaseParity) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
index dd50a2c9f41..52fa5a4928a 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
@@ -21,10 +21,8 @@ _CCCL_DEVICE static inline bool mbarrier_try_wait(_CUDA_VSTD::uint64_t* __addr,
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.shared::cta.b64         P_OUT, [%1], %2;                                      // 5a. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "l"(__state)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "l"(__state) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -52,10 +50,9 @@ _CCCL_DEVICE static inline bool mbarrier_try_wait(
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.shared::cta.b64         P_OUT, [%1], %2, %3;                    // 5b. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "l"(__state), "r"(__suspendTimeHint)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "l"(__state),
+         "r"(__suspendTimeHint) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
index d3deb3ca1d5..aa15e255352 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
@@ -22,10 +22,8 @@ mbarrier_try_wait_parity(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint32_
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.parity.shared::cta.b64  P_OUT, [%1], %2;                                // 7a. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "r"(__phaseParity)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "r"(__phaseParity) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -53,10 +51,9 @@ _CCCL_DEVICE static inline bool mbarrier_try_wait_parity(
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.parity.shared::cta.b64  P_OUT, [%1], %2, %3;               // 7b. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "r"(__phaseParity), "r"(__suspendTimeHint)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "r"(__phaseParity),
+         "r"(__suspendTimeHint) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h b/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h
index d88392f3635..74110933270 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h
@@ -25,10 +25,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_inc (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.inc.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.inc.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -57,10 +57,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_dec (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.dec.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.dec.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -89,10 +89,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -121,10 +121,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -153,10 +153,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -185,10 +185,10 @@ red_async(op_min_t, _CUDA_VSTD::int32_t* __dest, const _CUDA_VSTD::int32_t& __va
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.s32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.s32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -217,10 +217,10 @@ red_async(op_max_t, _CUDA_VSTD::int32_t* __dest, const _CUDA_VSTD::int32_t& __va
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.s32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.s32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -249,10 +249,10 @@ red_async(op_add_t, _CUDA_VSTD::int32_t* __dest, const _CUDA_VSTD::int32_t& __va
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.s32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.s32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -282,10 +282,10 @@ red_async(op_and_op_t, _B32* __dest, const _B32& __value, _CUDA_VSTD::uint64_t*
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.and.b32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__as_b32(__value)), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.and.b32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__as_b32(__value)),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -315,10 +315,10 @@ red_async(op_or_op_t, _B32* __dest, const _B32& __value, _CUDA_VSTD::uint64_t* _
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.or.b32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__as_b32(__value)), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.or.b32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__as_b32(__value)),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -348,10 +348,10 @@ red_async(op_xor_op_t, _B32* __dest, const _B32& __value, _CUDA_VSTD::uint64_t*
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.xor.b32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__as_b32(__value)), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.xor.b32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__as_b32(__value)),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -380,10 +380,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "l"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "l"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -411,10 +411,9 @@ red_async(op_add_t, _CUDA_VSTD::int64_t* __dest, const _CUDA_VSTD::int64_t& __va
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64  [%0], %1, [%2]; // .u64 "
-         "intentional"
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "l"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+         "intentional" : : "r"(__as_ptr_remote_dsmem(__dest)),
+         "l"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h b/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h
index 18fd2c03a41..e6c3fcf1737 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h
@@ -97,15 +97,13 @@ _CCCL_DEVICE static inline void st_async(_B32* __addr, const _B32 (&__value)[4],
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("st.async.weak.shared::cluster.mbarrier::complete_tx::bytes.v4.b32 [%0], {%1, %2, %3, %4}, [%5];    // 3. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr)),
-           "r"(__as_b32(__value[0])),
-           "r"(__as_b32(__value[1])),
-           "r"(__as_b32(__value[2])),
-           "r"(__as_b32(__value[3])),
-           "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("st.async.weak.shared::cluster.mbarrier::complete_tx::bytes.v4.b32 [%0], {%1, %2, %3, %4}, [%5];    // "
+         "3. " : : "r"(__as_ptr_remote_dsmem(__addr)),
+         "r"(__as_b32(__value[0])),
+         "r"(__as_b32(__value[1])),
+         "r"(__as_b32(__value[2])),
+         "r"(__as_b32(__value[3])),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_st_async_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h b/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h
index 3889026750d..598b56f90b0 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h
@@ -21,10 +21,8 @@ _CCCL_DEVICE static inline void tensormap_replace_global_address(space_global_t,
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_address.global.b1024.b64    [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_address.global.b1024.b64    [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_address_is_not_supported_before_SM_90a__();));
@@ -49,10 +47,8 @@ _CCCL_DEVICE static inline void tensormap_replace_global_address(space_shared_t,
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_address.shared::cta.b1024.b64    [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_address.shared::cta.b1024.b64    [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_address_is_not_supported_before_SM_90a__();));
@@ -77,10 +73,8 @@ _CCCL_DEVICE static inline void tensormap_replace_rank(space_global_t, void* __t
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.rank.global.b1024.b32              [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.rank.global.b1024.b32              [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_rank_is_not_supported_before_SM_90a__();));
@@ -105,10 +99,8 @@ _CCCL_DEVICE static inline void tensormap_replace_rank(space_shared_t, void* __t
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.rank.shared::cta.b1024.b32              [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.rank.shared::cta.b1024.b32              [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_rank_is_not_supported_before_SM_90a__();));
@@ -135,10 +127,9 @@ tensormap_replace_box_dim(space_global_t, void* __tm_addr, n32_t<_N32> __ord, _B
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.box_dim.global.b1024.b32           [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.box_dim.global.b1024.b32           [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_box_dim_is_not_supported_before_SM_90a__();));
@@ -165,10 +156,10 @@ tensormap_replace_box_dim(space_shared_t, void* __tm_addr, n32_t<_N32> __ord, _B
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.box_dim.shared::cta.b1024.b32           [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.box_dim.shared::cta.b1024.b32           [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_box_dim_is_not_supported_before_SM_90a__();));
@@ -195,10 +186,9 @@ tensormap_replace_global_dim(space_global_t, void* __tm_addr, n32_t<_N32> __ord,
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_dim.global.b1024.b32        [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_dim.global.b1024.b32        [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_dim_is_not_supported_before_SM_90a__();));
@@ -225,10 +215,10 @@ tensormap_replace_global_dim(space_shared_t, void* __tm_addr, n32_t<_N32> __ord,
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_dim.shared::cta.b1024.b32        [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.global_dim.shared::cta.b1024.b32        [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_dim_is_not_supported_before_SM_90a__();));
@@ -255,10 +245,9 @@ tensormap_replace_global_stride(space_global_t, void* __tm_addr, n32_t<_N32> __o
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_stride.global.b1024.b64     [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_stride.global.b1024.b64     [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_stride_is_not_supported_before_SM_90a__();));
@@ -285,10 +274,10 @@ tensormap_replace_global_stride(space_shared_t, void* __tm_addr, n32_t<_N32> __o
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_stride.shared::cta.b1024.b64     [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.global_stride.shared::cta.b1024.b64     [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_stride_is_not_supported_before_SM_90a__();));
@@ -315,10 +304,9 @@ tensormap_replace_element_size(space_global_t, void* __tm_addr, n32_t<_N32> __or
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.element_stride.global.b1024.b32    [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.element_stride.global.b1024.b32    [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_element_size_is_not_supported_before_SM_90a__();));
@@ -345,10 +333,10 @@ tensormap_replace_element_size(space_shared_t, void* __tm_addr, n32_t<_N32> __or
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.element_stride.shared::cta.b1024.b32    [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.element_stride.shared::cta.b1024.b32    [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_element_size_is_not_supported_before_SM_90a__();));
@@ -372,10 +360,8 @@ _CCCL_DEVICE static inline void tensormap_replace_elemtype(space_global_t, void*
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.elemtype.global.b1024.b32          [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.elemtype.global.b1024.b32          [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_elemtype_is_not_supported_before_SM_90a__();));
@@ -399,10 +385,8 @@ _CCCL_DEVICE static inline void tensormap_replace_elemtype(space_shared_t, void*
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.elemtype.shared::cta.b1024.b32          [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.elemtype.shared::cta.b1024.b32          [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_elemtype_is_not_supported_before_SM_90a__();));
@@ -427,10 +411,8 @@ tensormap_replace_interleave_layout(space_global_t, void* __tm_addr, n32_t<_N32>
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.interleave_layout.global.b1024.b32 [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.interleave_layout.global.b1024.b32 [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_interleave_layout_is_not_supported_before_SM_90a__();));
@@ -455,10 +437,8 @@ tensormap_replace_interleave_layout(space_shared_t, void* __tm_addr, n32_t<_N32>
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.interleave_layout.shared::cta.b1024.b32 [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.interleave_layout.shared::cta.b1024.b32 [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_interleave_layout_is_not_supported_before_SM_90a__();));
@@ -482,10 +462,8 @@ _CCCL_DEVICE static inline void tensormap_replace_swizzle_mode(space_global_t, v
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.swizzle_mode.global.b1024.b32      [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.swizzle_mode.global.b1024.b32      [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_swizzle_mode_is_not_supported_before_SM_90a__();));
@@ -509,10 +487,8 @@ _CCCL_DEVICE static inline void tensormap_replace_swizzle_mode(space_shared_t, v
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.swizzle_mode.shared::cta.b1024.b32      [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.swizzle_mode.shared::cta.b1024.b32      [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_swizzle_mode_is_not_supported_before_SM_90a__();));
@@ -536,10 +512,8 @@ _CCCL_DEVICE static inline void tensormap_replace_fill_mode(space_global_t, void
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.fill_mode.global.b1024.b32         [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.fill_mode.global.b1024.b32         [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_fill_mode_is_not_supported_before_SM_90a__();));
@@ -563,10 +537,8 @@ _CCCL_DEVICE static inline void tensormap_replace_fill_mode(space_shared_t, void
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.fill_mode.shared::cta.b1024.b32         [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.fill_mode.shared::cta.b1024.b32         [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_fill_mode_is_not_supported_before_SM_90a__();));
diff --git a/libcudacxx/include/cuda/pipeline b/libcudacxx/include/cuda/pipeline
index d034c931644..7946e8bdc91 100644
--- a/libcudacxx/include/cuda/pipeline
+++ b/libcudacxx/include/cuda/pipeline
@@ -199,12 +199,9 @@ struct __pipeline_asm_helper
 {
   _CCCL_DEVICE static inline uint32_t __lane_id()
   {
-    NV_IF_ELSE_TARGET(
-      NV_IS_DEVICE,
-      (uint32_t __lane_id; asm volatile("mov.u32 %0, %%laneid;"
-                                        : "=r"(__lane_id));
-       return __lane_id;),
-      (return 0;))
+    NV_IF_ELSE_TARGET(NV_IS_DEVICE,
+                      (uint32_t __lane_id; asm volatile("mov.u32 %0, %%laneid;" : "=r"(__lane_id)); return __lane_id;),
+                      (return 0;))
   }
 };
 
@@ -546,9 +543,7 @@ _CCCL_DEVICE void __pipeline_consumer_wait(pipeline<thread_scope_thread>& __pipe
   (void) __pipeline;
   NV_IF_TARGET(NV_PROVIDES_SM_80, constexpr uint8_t __max_prior = 8;
 
-               asm volatile("cp.async.wait_group %0;"
-                            :
-                            : "n"(_Prior < __max_prior ? _Prior : __max_prior));)
+               asm volatile("cp.async.wait_group %0;" : : "n"(_Prior < __max_prior ? _Prior : __max_prior));)
 }
 
 _CCCL_DEVICE inline void __pipeline_consumer_wait(pipeline<thread_scope_thread>& __pipeline, uint8_t __prior)
diff --git a/libcudacxx/include/cuda/std/__atomic/types/base.h b/libcudacxx/include/cuda/std/__atomic/types/base.h
index bbd9086dce5..18eb3713bfe 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/base.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/base.h
@@ -98,8 +98,8 @@ _CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memo
 }
 
 template <typename _Sto, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -109,8 +109,8 @@ __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __at
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -160,8 +160,8 @@ _CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_weak_dispatch(
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -171,8 +171,8 @@ __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -182,8 +182,8 @@ __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -193,8 +193,8 @@ __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -204,8 +204,8 @@ __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -215,8 +215,8 @@ __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_IF_TARGET(
     NV_IS_DEVICE,
@@ -225,8 +225,8 @@ __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_IF_TARGET(
     NV_IS_DEVICE,
diff --git a/libcudacxx/include/cuda/std/__atomic/types/common.h b/libcudacxx/include/cuda/std/__atomic/types/common.h
index 13d326bfe79..6706ad5181b 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/common.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/common.h
@@ -78,9 +78,7 @@ _CCCL_HOST_DEVICE inline int __atomic_memcmp(void const* __lhs, void const* __rh
     NV_IS_DEVICE,
     (unsigned char const* __lhs_c; unsigned char const* __rhs_c;
      // NVCC recommended laundering through inline asm to compare padding bytes.
-     asm("mov.b64 %0, %2;\n mov.b64 %1, %3;"
-         : "=l"(__lhs_c), "=l"(__rhs_c)
-         : "l"(__lhs), "l"(__rhs));
+     asm("mov.b64 %0, %2;\n mov.b64 %1, %3;" : "=l"(__lhs_c), "=l"(__rhs_c) : "l"(__lhs), "l"(__rhs));
      while (__count--) {
        auto const __lhs_v = *__lhs_c++;
        auto const __rhs_v = *__rhs_c++;
diff --git a/libcudacxx/include/cuda/std/__atomic/types/locked.h b/libcudacxx/include/cuda/std/__atomic/types/locked.h
index 5538abcce68..c462c5d16a6 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/locked.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/locked.h
@@ -85,8 +85,8 @@ _CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memo
 }
 
 template <typename _Sto, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -97,8 +97,8 @@ __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __at
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -152,8 +152,8 @@ __atomic_compare_exchange_weak_dispatch(_Sto* __a, _Up* __expected, _Up __value,
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -165,8 +165,8 @@ __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) ->
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -178,8 +178,8 @@ __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) ->
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -191,8 +191,8 @@ __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -204,8 +204,8 @@ __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) ->
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
diff --git a/libcudacxx/include/cuda/std/__atomic/types/small.h b/libcudacxx/include/cuda/std/__atomic/types/small.h
index 4f24753ca60..a4e969f0936 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/small.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/small.h
@@ -95,16 +95,16 @@ _CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memo
 }
 
 template <typename _Sto, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(__atomic_load_dispatch(&__a->__a_value, __order, _Sco{}));
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -155,8 +155,8 @@ _CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_strong_dispatch(
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -164,8 +164,8 @@ __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -173,8 +173,8 @@ __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -182,8 +182,8 @@ __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -191,8 +191,8 @@ __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -200,8 +200,8 @@ __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -209,8 +209,8 @@ __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
diff --git a/libcudacxx/include/cuda/std/__cccl/architecture.h b/libcudacxx/include/cuda/std/__cccl/architecture.h
new file mode 100644
index 00000000000..1c7f25d6b0e
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__cccl/architecture.h
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CCCL_ARCH_H
+#define __CCCL_ARCH_H
+
+// The header provides the following macros to determine the host architecture:
+//
+// _CCCL_ARCH(ARM64)     ARM64
+// _CCCL_ARCH(X86_64)    X86 64 bit
+
+// Determine the host compiler and its version
+
+// Arm 64-bit
+#if (defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /*emulation*/)
+#  define _CCCL_ARCH_ARM64_() 1
+#else
+#  define _CCCL_ARCH_ARM64_() 0
+#endif
+
+// X86 64-bit
+#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
+#  define _CCCL_ARCH_X86_64_() 1
+#else
+#  define _CCCL_ARCH_X86_64_() 0
+#endif
+
+#define _CCCL_ARCH(...) _CCCL_ARCH_##__VA_ARGS__##_()
+
+#endif // __CCCL_ARCH_H
diff --git a/libcudacxx/include/cuda/std/__cccl/assert.h b/libcudacxx/include/cuda/std/__cccl/assert.h
index 35b4fb6d2f7..5ef9314f310 100644
--- a/libcudacxx/include/cuda/std/__cccl/assert.h
+++ b/libcudacxx/include/cuda/std/__cccl/assert.h
@@ -79,15 +79,9 @@ _CCCL_HOST_DEVICE
   __attribute__((__noreturn__));
 }
 #  endif // NDEBUG
-// ICC cannot deal with `__builtin_expect` in the constexpr evaluator, so just drop it
-#  if _CCCL_COMPILER(ICC)
-#    define _CCCL_ASSERT_IMPL_HOST(expression, message) \
-      static_cast<bool>(expression) ? (void) 0 : __assert_fail(message, __FILE__, __LINE__, __func__);
-#  else // ^^^ _CCCL_COMPILER(ICC) ^^^ / vvv !_CCCL_COMPILER(ICC) vvv
-#    define _CCCL_ASSERT_IMPL_HOST(expression, message)      \
-      _CCCL_BUILTIN_EXPECT(static_cast<bool>(expression), 1) \
-      ? (void) 0 : __assert_fail(message, __FILE__, __LINE__, __func__)
-#  endif // !_CCCL_COMPILER(ICC)
+#  define _CCCL_ASSERT_IMPL_HOST(expression, message)      \
+    _CCCL_BUILTIN_EXPECT(static_cast<bool>(expression), 1) \
+    ? (void) 0 : __assert_fail(message, __FILE__, __LINE__, __func__)
 #endif // !MSVC STL
 
 //! Use custom implementations with nvcc on device and the host ones with clang-cuda and nvhpc
diff --git a/libcudacxx/include/cuda/std/__cccl/builtin.h b/libcudacxx/include/cuda/std/__cccl/builtin.h
index b19f7de4371..3a5fda2f0f5 100644
--- a/libcudacxx/include/cuda/std/__cccl/builtin.h
+++ b/libcudacxx/include/cuda/std/__cccl/builtin.h
@@ -288,6 +288,48 @@
 #  define _CCCL_BUILTIN_LINE() __LINE__
 #endif // _CCCL_CUDACC_BELOW(11, 3)
 
+#if _CCCL_CHECK_BUILTIN(builtin_huge_valf) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_HUGE_VALF() __builtin_huge_valf()
+#endif // _CCCL_CHECK_BUILTIN(builtin_huge_valf)
+
+#if _CCCL_CHECK_BUILTIN(builtin_huge_val) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_HUGE_VAL() __builtin_huge_val()
+#endif // _CCCL_CHECK_BUILTIN(builtin_huge_val)
+
+#if _CCCL_CHECK_BUILTIN(builtin_huge_vall) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_HUGE_VALL() __builtin_huge_vall()
+#elif _CCCL_COMPILER(MSVC)
+#  define _CCCL_BUILTIN_HUGE_VALL() static_cast<long double>(__builtin_huge_val())
+#endif // _CCCL_CHECK_BUILTIN(builtin_huge_vall)
+
+#if _CCCL_CHECK_BUILTIN(builtin_nanf) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_NANF(...) __builtin_nanf(__VA_ARGS__)
+#endif // _CCCL_CHECK_BUILTIN(builtin_nanf)
+
+#if _CCCL_CHECK_BUILTIN(builtin_nan) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_NAN(...) __builtin_nan(__VA_ARGS__)
+#endif // _CCCL_CHECK_BUILTIN(builtin_nan)
+
+#if _CCCL_CHECK_BUILTIN(builtin_nanl) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_NANL(...) __builtin_nanl(__VA_ARGS__)
+#elif _CCCL_COMPILER(MSVC)
+#  define _CCCL_BUILTIN_NANL(...) static_cast<long double>(__builtin_nan(__VA_ARGS__))
+#endif // _CCCL_CHECK_BUILTIN(builtin_nanl)
+
+#if _CCCL_CHECK_BUILTIN(builtin_nansf) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_NANSF(...) __builtin_nansf(__VA_ARGS__)
+#endif // _CCCL_CHECK_BUILTIN(builtin_nansf)
+
+#if _CCCL_CHECK_BUILTIN(builtin_nans) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_NANS(...) __builtin_nans(__VA_ARGS__)
+#endif // _CCCL_CHECK_BUILTIN(builtin_nans)
+
+#if _CCCL_CHECK_BUILTIN(builtin_nansl) || _CCCL_COMPILER(GCC, <, 10)
+#  define _CCCL_BUILTIN_NANSL(...) __builtin_nansl(__VA_ARGS__)
+#elif _CCCL_COMPILER(MSVC)
+#  define _CCCL_BUILTIN_NANSL(...) static_cast<long double>(__builtin_nans(__VA_ARGS__))
+#endif // _CCCL_CHECK_BUILTIN(builtin_nansl)
+
 #if _CCCL_CHECK_BUILTIN(builtin_log) || _CCCL_COMPILER(GCC)
 #  define _CCCL_BUILTIN_LOGF(...) __builtin_logf(__VA_ARGS__)
 #  define _CCCL_BUILTIN_LOG(...)  __builtin_log(__VA_ARGS__)
diff --git a/libcudacxx/include/cuda/std/__cccl/compiler.h b/libcudacxx/include/cuda/std/__cccl/compiler.h
index ed0ab5af3ac..166450fd84e 100644
--- a/libcudacxx/include/cuda/std/__cccl/compiler.h
+++ b/libcudacxx/include/cuda/std/__cccl/compiler.h
@@ -42,11 +42,10 @@
 
 // Determine the host compiler and its version
 #if defined(__INTEL_COMPILER)
-#  define _CCCL_COMPILER_ICC 1
-#  ifndef CCCL_SUPPRESS_ICC_DEPRECATION_WARNING
+#  ifndef CCCL_IGNORE_DEPRECATED_COMPILER
 #    warning \
-      "Support for the Intel C++ Compiler Classic is deprecated and will eventually be removed. Define CCCL_SUPPRESS_ICC_DEPRECATION_WARNING to suppress this warning"
-#  endif // CCCL_SUPPRESS_ICC_DEPRECATION_WARNING
+      "The Intel C++ Compiler Classic (icc/icpc) is not supported by CCCL. Define CCCL_IGNORE_DEPRECATED_COMPILER to suppress this message."
+#  endif // !CCCL_IGNORE_DEPRECATED_COMPILER
 #elif defined(__NVCOMPILER)
 #  define _CCCL_COMPILER_NVHPC _CCCL_COMPILER_MAKE_VERSION(__NVCOMPILER_MAJOR__, __NVCOMPILER_MINOR__)
 #elif defined(__clang__)
@@ -59,7 +58,7 @@
 #  if _CCCL_COMPILER_MSVC2017 && !defined(CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
 #    pragma message( \
       "Support for the Visual Studio 2017 (MSC_VER < 1920) is deprecated and will eventually be removed. Define CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING to suppress this warning")
-#  endif // CCCL_SUPPRESS_ICC_DEPRECATION_WARNING
+#  endif
 #  define _CCCL_COMPILER_MSVC2019                               \
     (_CCCL_COMPILER_MSVC >= _CCCL_COMPILER_MAKE_VERSION(19, 20) \
      && _CCCL_COMPILER_MSVC < _CCCL_COMPILER_MAKE_VERSION(19, 30))
diff --git a/libcudacxx/include/cuda/std/__cccl/deprecated.h b/libcudacxx/include/cuda/std/__cccl/deprecated.h
index aa10c47ebcd..f03ecfc07f7 100644
--- a/libcudacxx/include/cuda/std/__cccl/deprecated.h
+++ b/libcudacxx/include/cuda/std/__cccl/deprecated.h
@@ -24,20 +24,17 @@
 #endif // no system header
 
 // Check for deprecation opt outs
-#if defined(LIBCUDACXX_IGNORE_DEPRECATED_CPP_DIALECT) || defined(THRUST_IGNORE_DEPRECATED_CPP_DIALECT) \
-  || defined(CUB_IGNORE_DEPRECATED_CPP_DIALECT)
+#if defined(LIBCUDACXX_IGNORE_DEPRECATED_CPP_DIALECT)
 #  if !defined(CCCL_IGNORE_DEPRECATED_CPP_DIALECT)
 #    define CCCL_IGNORE_DEPRECATED_CPP_DIALECT
 #  endif
 #endif // suppress all dialect deprecation warnings
-#if defined(LIBCUDACXX_IGNORE_DEPRECATED_CPP_14) || defined(THRUST_IGNORE_DEPRECATED_CPP_14) \
-  || defined(CUB_IGNORE_DEPRECATED_CPP_14) || defined(CCCL_IGNORE_DEPRECATED_CPP_DIALECT)
+#if defined(LIBCUDACXX_IGNORE_DEPRECATED_CPP_14) || defined(CCCL_IGNORE_DEPRECATED_CPP_DIALECT)
 #  if !defined(CCCL_IGNORE_DEPRECATED_CPP_14)
 #    define CCCL_IGNORE_DEPRECATED_CPP_14
 #  endif
 #endif // suppress all c++14 dialect deprecation warnings
-#if defined(LIBCUDACXX_IGNORE_DEPRECATED_CPP_11) || defined(THRUST_IGNORE_DEPRECATED_CPP_11) \
-  || defined(CUB_IGNORE_DEPRECATED_CPP_11) || defined(CCCL_IGNORE_DEPRECATED_CPP_DIALECT)    \
+#if defined(LIBCUDACXX_IGNORE_DEPRECATED_CPP_11) || defined(CCCL_IGNORE_DEPRECATED_CPP_DIALECT) \
   || defined(CCCL_IGNORE_DEPRECATED_CPP_14)
 #  if !defined(CCCL_IGNORE_DEPRECATED_CPP_11)
 #    define CCCL_IGNORE_DEPRECATED_CPP_11
diff --git a/libcudacxx/include/cuda/std/__cccl/diagnostic.h b/libcudacxx/include/cuda/std/__cccl/diagnostic.h
index f1c1ec8c0fb..acb9064493c 100644
--- a/libcudacxx/include/cuda/std/__cccl/diagnostic.h
+++ b/libcudacxx/include/cuda/std/__cccl/diagnostic.h
@@ -30,7 +30,6 @@
 #  define _CCCL_DIAG_SUPPRESS_GCC(str)
 #  define _CCCL_DIAG_SUPPRESS_NVHPC(str)
 #  define _CCCL_DIAG_SUPPRESS_MSVC(str)
-#  define _CCCL_DIAG_SUPPRESS_ICC(str)
 #elif _CCCL_COMPILER(GCC)
 #  define _CCCL_DIAG_PUSH _CCCL_PRAGMA(GCC diagnostic push)
 #  define _CCCL_DIAG_POP  _CCCL_PRAGMA(GCC diagnostic pop)
@@ -38,15 +37,6 @@
 #  define _CCCL_DIAG_SUPPRESS_GCC(str) _CCCL_PRAGMA(GCC diagnostic ignored str)
 #  define _CCCL_DIAG_SUPPRESS_NVHPC(str)
 #  define _CCCL_DIAG_SUPPRESS_MSVC(str)
-#  define _CCCL_DIAG_SUPPRESS_ICC(str)
-#elif _CCCL_COMPILER(ICC)
-#  define _CCCL_DIAG_PUSH _CCCL_PRAGMA(GCC diagnostic push)
-#  define _CCCL_DIAG_POP  _CCCL_PRAGMA(GCC diagnostic pop)
-#  define _CCCL_DIAG_SUPPRESS_CLANG(str)
-#  define _CCCL_DIAG_SUPPRESS_GCC(str) _CCCL_PRAGMA(GCC diagnostic ignored str)
-#  define _CCCL_DIAG_SUPPRESS_NVHPC(str)
-#  define _CCCL_DIAG_SUPPRESS_MSVC(str)
-#  define _CCCL_DIAG_SUPPRESS_ICC(str) _CCCL_PRAGMA(warning disable str)
 #elif _CCCL_COMPILER(NVHPC)
 #  define _CCCL_DIAG_PUSH _CCCL_PRAGMA(diagnostic push)
 #  define _CCCL_DIAG_POP  _CCCL_PRAGMA(diagnostic pop)
@@ -54,7 +44,6 @@
 #  define _CCCL_DIAG_SUPPRESS_GCC(str)
 #  define _CCCL_DIAG_SUPPRESS_NVHPC(str) _CCCL_PRAGMA(diag_suppress str)
 #  define _CCCL_DIAG_SUPPRESS_MSVC(str)
-#  define _CCCL_DIAG_SUPPRESS_ICC(str)
 #elif _CCCL_COMPILER(MSVC)
 #  define _CCCL_DIAG_PUSH _CCCL_PRAGMA(warning(push))
 #  define _CCCL_DIAG_POP  _CCCL_PRAGMA(warning(pop))
@@ -62,7 +51,6 @@
 #  define _CCCL_DIAG_SUPPRESS_GCC(str)
 #  define _CCCL_DIAG_SUPPRESS_NVHPC(str)
 #  define _CCCL_DIAG_SUPPRESS_MSVC(str) _CCCL_PRAGMA(warning(disable : str))
-#  define _CCCL_DIAG_SUPPRESS_ICC(str)
 #else
 #  define _CCCL_DIAG_PUSH
 #  define _CCCL_DIAG_POP
@@ -70,7 +58,6 @@
 #  define _CCCL_DIAG_SUPPRESS_GCC(str)
 #  define _CCCL_DIAG_SUPPRESS_NVHPC(str)
 #  define _CCCL_DIAG_SUPPRESS_MSVC(str)
-#  define _CCCL_DIAG_SUPPRESS_ICC(str)
 #endif
 
 // Convenient shortcuts to silence common warnings
@@ -80,12 +67,6 @@
     _CCCL_DIAG_SUPPRESS_CLANG("-Wdeprecated") \
     _CCCL_DIAG_SUPPRESS_CLANG("-Wdeprecated-declarations")
 #  define _CCCL_SUPPRESS_DEPRECATED_POP _CCCL_DIAG_POP
-#elif _CCCL_COMPILER(ICC)
-#  define _CCCL_SUPPRESS_DEPRECATED_PUSH \
-    _CCCL_DIAG_PUSH                      \
-    _CCCL_DIAG_SUPPRESS_ICC(1478)        \
-    _CCCL_DIAG_SUPPRESS_ICC(1786)
-#  define _CCCL_SUPPRESS_DEPRECATED_POP _CCCL_DIAG_POP
 #elif _CCCL_COMPILER(GCC)
 #  define _CCCL_SUPPRESS_DEPRECATED_PUSH    \
     _CCCL_DIAG_PUSH                         \
@@ -103,23 +84,21 @@
     _CCCL_DIAG_PUSH                      \
     _CCCL_DIAG_SUPPRESS_MSVC(4996)
 #  define _CCCL_SUPPRESS_DEPRECATED_POP _CCCL_DIAG_POP
-#else // !_CCCL_COMPILER(CLANG) && !_CCCL_COMPILER(ICC) && && !_CCCL_COMPILER(GCC) && !_CCCL_COMPILER(NVHPC) &&
-      // !_CCCL_COMPILER(MSVC)
+#else // !_CCCL_COMPILER(CLANG) && !_CCCL_COMPILER(GCC) && !_CCCL_COMPILER(NVHPC) && !_CCCL_COMPILER(MSVC)
 #  define _CCCL_SUPPRESS_DEPRECATED_PUSH
 #  define _CCCL_SUPPRESS_DEPRECATED_POP
-#endif // !_CCCL_COMPILER(CLANG) && !_CCCL_COMPILER(ICC) && && !_CCCL_COMPILER(GCC) && !_CCCL_COMPILER(NVHPC) &&
-       // !_CCCL_COMPILER(MSVC)
+#endif // !_CCCL_COMPILER(CLANG) && !_CCCL_COMPILER(GCC) && !_CCCL_COMPILER(NVHPC) && !_CCCL_COMPILER(MSVC)
 
 // Enable us to selectively silence cuda compiler warnings
 #if _CCCL_HAS_CUDA_COMPILER
 #  if _CCCL_CUDA_COMPILER(CLANG)
 #    define _CCCL_NV_DIAG_SUPPRESS(_WARNING)
 #    define _CCCL_NV_DIAG_DEFAULT(_WARNING)
-#  elif defined(__NVCC_DIAG_PRAGMA_SUPPORT__) || _CCCL_COMPILER(ICC)
+#  elif defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
 #    if _CCCL_COMPILER(MSVC)
 #      define _CCCL_NV_DIAG_SUPPRESS(_WARNING) _CCCL_PRAGMA(nv_diag_suppress _WARNING)
 #      define _CCCL_NV_DIAG_DEFAULT(_WARNING)  _CCCL_PRAGMA(nv_diag_default _WARNING)
-#    else // ^^^ _CCCL_COMPILER_{MSVC,ICC}^^^ / vvv !_CCCL_COMPILER_{MSVC,ICC} vvv
+#    else // ^^^ _CCCL_COMPILER_{MSVC}^^^ / vvv !_CCCL_COMPILER_{MSVC} vvv
 #      define _CCCL_NV_DIAG_SUPPRESS(_WARNING) _CCCL_PRAGMA(nv_diagnostic push) _CCCL_PRAGMA(nv_diag_suppress _WARNING)
 #      define _CCCL_NV_DIAG_DEFAULT(_WARNING)  _CCCL_PRAGMA(nv_diagnostic pop)
 #    endif // !_CCCL_COMPILER(MSVC)
diff --git a/libcudacxx/include/cuda/std/__cccl/os.h b/libcudacxx/include/cuda/std/__cccl/os.h
new file mode 100644
index 00000000000..a3909f5d755
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__cccl/os.h
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CCCL_OS_H
+#define __CCCL_OS_H
+
+// The header provides the following macros to determine the host architecture:
+//
+// _CCCL_OS(WINDOWS)
+// _CCCL_OS(LINUX)
+// _CCCL_OS(ANDROID)
+// _CCCL_OS(QNX)
+
+// Determine the host compiler and its version
+#if defined(_WIN32) || defined(_WIN64) /* _WIN64 for NVRTC */
+#  define _CCCL_OS_WINDOWS_() 1
+#else
+#  define _CCCL_OS_WINDOWS_() 0
+#endif
+
+#if defined(__linux__) || defined(__LP64__) /* __LP64__ for NVRTC */
+#  define _CCCL_OS_LINUX_() 1
+#else
+#  define _CCCL_OS_LINUX_() 0
+#endif
+
+#if defined(__ANDROID__)
+#  define _CCCL_OS_ANDROID_() 1
+#else
+#  define _CCCL_OS_ANDROID_() 0
+#endif
+
+#if defined(__QNX__) || defined(__QNXNTO__)
+#  define _CCCL_OS_QNX_() 1
+#else
+#  define _CCCL_OS_QNX_() 0
+#endif
+
+#define _CCCL_OS(...) _CCCL_OS_##__VA_ARGS__##_()
+
+#endif // __CCCL_OS_H
diff --git a/libcudacxx/include/cuda/std/__cccl/rtti.h b/libcudacxx/include/cuda/std/__cccl/rtti.h
index 174b6313d87..14ce5dc5c6f 100644
--- a/libcudacxx/include/cuda/std/__cccl/rtti.h
+++ b/libcudacxx/include/cuda/std/__cccl/rtti.h
@@ -32,10 +32,6 @@
 #    define _CCCL_NO_RTTI
 #  elif defined(__CUDA_ARCH__)
 #    define _CCCL_NO_RTTI // No RTTI in CUDA device code
-#  elif _CCCL_COMPILER(ICC)
-#    if __RTTI == 0 && __INTEL_RTTI__ == 0 && __GXX_RTTI == 0 && _CPPRTTI == 0
-#      define _CCCL_NO_RTTI
-#    endif
 #  elif _CCCL_COMPILER(NVRTC)
 #    define _CCCL_NO_RTTI
 #  elif _CCCL_COMPILER(MSVC)
@@ -58,11 +54,6 @@
 #    define _CCCL_NO_TYPEID
 #  elif defined(__CUDA_ARCH__)
 #    define _CCCL_NO_TYPEID // No typeid in CUDA device code
-#  elif _CCCL_COMPILER(ICC)
-// when emulating MSVC, typeid is available even when RTTI is disabled
-#    if !defined(_MSC_VER) && __RTTI == 0 && __INTEL_RTTI__ == 0 && __GXX_RTTI == 0 && _CPPRTTI == 0
-#      define _CCCL_NO_TYPEID
-#    endif
 #  elif _CCCL_COMPILER(NVRTC)
 #    define _CCCL_NO_TYPEID
 #  elif _CCCL_COMPILER(MSVC)
diff --git a/libcudacxx/include/cuda/std/__cccl/system_header.h b/libcudacxx/include/cuda/std/__cccl/system_header.h
index d557dc88682..3d2d1ac6a78 100644
--- a/libcudacxx/include/cuda/std/__cccl/system_header.h
+++ b/libcudacxx/include/cuda/std/__cccl/system_header.h
@@ -15,7 +15,7 @@
 #include <cuda/std/__cccl/is_non_narrowing_convertible.h> // IWYU pragma: export
 
 // Enforce that cccl headers are treated as system headers
-#if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(NVHPC) || _CCCL_COMPILER(ICC)
+#if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(NVHPC)
 #  define _CCCL_FORCE_SYSTEM_HEADER_GCC
 #elif _CCCL_COMPILER(CLANG)
 #  define _CCCL_FORCE_SYSTEM_HEADER_CLANG
@@ -26,7 +26,7 @@
 // Potentially enable that cccl headers are treated as system headers
 #if !defined(_CCCL_NO_SYSTEM_HEADER) && !(_CCCL_COMPILER(MSVC) && defined(_LIBCUDACXX_DISABLE_PRAGMA_MSVC_WARNING)) \
   && !_CCCL_COMPILER(NVRTC) && !defined(_LIBCUDACXX_DISABLE_PRAGMA_GCC_SYSTEM_HEADER)
-#  if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(NVHPC) || _CCCL_COMPILER(ICC)
+#  if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(NVHPC)
 #    define _CCCL_IMPLICIT_SYSTEM_HEADER_GCC
 #  elif _CCCL_COMPILER(CLANG)
 #    define _CCCL_IMPLICIT_SYSTEM_HEADER_CLANG
diff --git a/libcudacxx/include/cuda/std/__cccl/version.h b/libcudacxx/include/cuda/std/__cccl/version.h
index 9bf8b38d625..c8439d6581b 100644
--- a/libcudacxx/include/cuda/std/__cccl/version.h
+++ b/libcudacxx/include/cuda/std/__cccl/version.h
@@ -14,7 +14,7 @@
 #ifndef __CCCL_VERSION_H
 #define __CCCL_VERSION_H
 
-#define CCCL_VERSION 2008000
+#define CCCL_VERSION 3000000
 #define CCCL_MAJOR_VERSION (CCCL_VERSION / 1000000)
 #define CCCL_MINOR_VERSION (((CCCL_VERSION / 1000) % 1000))
 #define CCCL_PATCH_VERSION (CCCL_VERSION % 1000)
diff --git a/libcudacxx/include/cuda/std/__cccl/visibility.h b/libcudacxx/include/cuda/std/__cccl/visibility.h
index af3eb9ec6ef..e994b7f1c2e 100644
--- a/libcudacxx/include/cuda/std/__cccl/visibility.h
+++ b/libcudacxx/include/cuda/std/__cccl/visibility.h
@@ -72,13 +72,11 @@
 #  define _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
 #endif // !exclude_from_explicit_instantiation
 
-#if _CCCL_COMPILER(ICC) // ICC has issues with visibility attributes on symbols with internal linkage
+#if _CCCL_COMPILER(NVHPC) // NVHPC has issues with visibility attributes on symbols with internal linkage
 #  define _CCCL_HIDE_FROM_ABI inline
-#elif _CCCL_COMPILER(NVHPC) // NVHPC has issues with visibility attributes on symbols with internal linkage
-#  define _CCCL_HIDE_FROM_ABI inline
-#else // ^^^ _CCCL_COMPILER(ICC) ^^^ / vvv !_CCCL_COMPILER(ICC) vvv
+#else // ^^^ _CCCL_COMPILER(NVHPC) ^^^ / vvv !_CCCL_COMPILER(NVHPC) vvv
 #  define _CCCL_HIDE_FROM_ABI _CCCL_VISIBILITY_HIDDEN _CCCL_EXCLUDE_FROM_EXPLICIT_INSTANTIATION inline
-#endif // !_CCCL_COMPILER(ICC)
+#endif // !_CCCL_COMPILER(NVHPC)
 
 //! Defined here to suppress any warnings from the definition
 #define _LIBCUDACXX_HIDE_FROM_ABI _CCCL_HIDE_FROM_ABI _CCCL_HOST_DEVICE
diff --git a/libcudacxx/include/cuda/std/__concepts/common_with.h b/libcudacxx/include/cuda/std/__concepts/common_with.h
index 20bb3680755..0c559aff98e 100644
--- a/libcudacxx/include/cuda/std/__concepts/common_with.h
+++ b/libcudacxx/include/cuda/std/__concepts/common_with.h
@@ -51,8 +51,8 @@ _CCCL_CONCEPT _Common_type_exists = _CCCL_FRAGMENT(__common_type_exists_, _Tp, _
 template <class _Tp, class _Up>
 _CCCL_CONCEPT_FRAGMENT(__common_type_constructible_,
                        requires()(requires(_Common_type_exists<_Tp, _Up>),
-                                  static_cast<common_type_t<_Tp, _Up>>(_CUDA_VSTD::declval<_Tp>()),
-                                  static_cast<common_type_t<_Tp, _Up>>(_CUDA_VSTD::declval<_Up>())));
+                                  (static_cast<common_type_t<_Tp, _Up>>(_CUDA_VSTD::declval<_Tp>())),
+                                  (static_cast<common_type_t<_Tp, _Up>>(_CUDA_VSTD::declval<_Up>()))));
 
 template <class _Tp, class _Up>
 _CCCL_CONCEPT _Common_type_constructible = _CCCL_FRAGMENT(__common_type_constructible_, _Tp, _Up);
diff --git a/libcudacxx/include/cuda/std/__concepts/concept_macros.h b/libcudacxx/include/cuda/std/__concepts/concept_macros.h
index a22231defa9..3acd8ae6841 100644
--- a/libcudacxx/include/cuda/std/__concepts/concept_macros.h
+++ b/libcudacxx/include/cuda/std/__concepts/concept_macros.h
@@ -86,11 +86,10 @@ _CCCL_INLINE_VAR constexpr int __cccl_requires = 0;
 #  endif // !_CCCL_COMPILER(MSVC)
 
 template <class _Tp, class... _Args>
-_LIBCUDACXX_HIDE_FROM_ABI auto __cccl_make_dependent(_Tp*, __cccl_tag<_Args...>*) -> _Tp;
+extern _Tp __cccl_make_dependent;
 
 template <class _Impl, class... _Args>
-using __cccl_requires_expr_impl =
-  decltype(__cccl_make_dependent(static_cast<_Impl*>(nullptr), static_cast<__cccl_tag<void, _Args...>*>(nullptr)));
+using __cccl_requires_expr_impl = decltype(__cccl_make_dependent<_Impl, _Args...>);
 
 // So that we can refer to the ::cuda::std namespace below
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
@@ -159,7 +158,7 @@ namespace __cccl_unqualified_cuda_std = _CUDA_VSTD; // NOLINT(misc-unused-alias-
       {                                               \
         _CCCL_PP_CAT4(_CCCL_PP_EAT_SAME_AS_, _REQ)    \
       } -> _CCCL_CONCEPT_VSTD::same_as<_CCCL_PP_EVAL( \
-          _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_AUX, _CCCL_PP_CAT4(_CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_, _REQ))>
+        _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_AUX, _CCCL_PP_CAT4(_CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_, _REQ))>
 #    define _CCCL_PP_EAT_SAME_AS__Same_as(...)
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_AUX(_TYPE, ...) _CCCL_PP_EXPAND _TYPE
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS__Same_as(...)   (__VA_ARGS__),
@@ -171,8 +170,7 @@ namespace __cccl_unqualified_cuda_std = _CUDA_VSTD; // NOLINT(misc-unused-alias-
 #    define _CCCL_CONCEPT _CCCL_INLINE_VAR constexpr bool
 
 #    define _CCCL_CONCEPT_FRAGMENT(_NAME, ...)                                                                         \
-      _LIBCUDACXX_HIDE_FROM_ABI auto _NAME##_CCCL_CONCEPT_FRAGMENT_impl_ _CCCL_CONCEPT_FRAGMENT_REQS_##__VA_ARGS__ > { \
-      }                                                                                                                \
+      _LIBCUDACXX_HIDE_FROM_ABI auto _NAME##_CCCL_CONCEPT_FRAGMENT_impl_ _CCCL_CONCEPT_FRAGMENT_REQS_##__VA_ARGS__> {} \
       template <class... _As>                                                                                          \
       _LIBCUDACXX_HIDE_FROM_ABI char _NAME##_CCCL_CONCEPT_FRAGMENT_(                                                   \
         ::__cccl_tag<_As...>*, decltype(&_NAME##_CCCL_CONCEPT_FRAGMENT_impl_<_As...>));                                \
@@ -186,9 +184,8 @@ namespace __cccl_unqualified_cuda_std = _CUDA_VSTD; // NOLINT(misc-unused-alias-
 #    endif
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_requires(...) (__VA_ARGS__)->__cccl_enable_if_t < _CCCL_CONCEPT_FRAGMENT_REQS_2_
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_2_(...)       _CCCL_CONCEPT_FRAGMENT_TRUE(__VA_ARGS__)
-#    define _CCCL_CONCEPT_FRAGMENT_REQS_M(_REQ)                             \
-      _CCCL_PP_CAT2(_CCCL_CONCEPT_FRAGMENT_REQS_M, _CCCL_PP_IS_PAREN(_REQ)) \
-      (_REQ),
+#    define _CCCL_CONCEPT_FRAGMENT_REQS_M(_REQ) \
+      void(), _CCCL_PP_CAT2(_CCCL_CONCEPT_FRAGMENT_REQS_M, _CCCL_PP_IS_PAREN(_REQ))(_REQ),
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_REQUIRES_requires(...) ::__cccl_requires<__VA_ARGS__>
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_REQUIRES_typename(...) static_cast<::__cccl_tag<__VA_ARGS__>*>(nullptr)
 #    if _CCCL_COMPILER(GCC, <, 14)
@@ -243,30 +240,27 @@ namespace __cccl_unqualified_cuda_std = _CUDA_VSTD; // NOLINT(misc-unused-alias-
 #    define _CCCL_REQUIRES_EXPR_EXPAND_TPARAMS(...) _CCCL_PP_FOR_EACH(_CCCL_REQUIRES_EXPR_EXPAND_TPARAM, __VA_ARGS__)
 
 #    define _CCCL_REQUIRES_EXPR(_TY, ...) _CCCL_REQUIRES_EXPR_IMPL(_TY, _CCCL_COUNTER(), __VA_ARGS__)
-#    define _CCCL_REQUIRES_EXPR_IMPL(_TY, _ID, ...)                                                                   \
-      ::__cccl_requires_expr_impl<                                                                                    \
-        struct _CCCL_PP_CAT(__cccl_requires_expr_detail_, _ID) _CCCL_REQUIRES_EXPR_EXPAND_TPARAMS                     \
-          _TY>::__cccl_is_satisfied(static_cast<::__cccl_tag<void _CCCL_REQUIRES_EXPR_EXPAND_TPARAMS _TY>*>(nullptr), \
-                                    static_cast<void (*)(__VA_ARGS__)>(nullptr));                                     \
-      struct _CCCL_PP_CAT(__cccl_requires_expr_detail_, _ID)                                                          \
-      {                                                                                                               \
-        using __cccl_self_t = _CCCL_PP_CAT(__cccl_requires_expr_detail_, _ID);                                        \
-        template <class _CCCL_REQUIRES_EXPR_TPARAMS _TY>                                                              \
+#    define _CCCL_REQUIRES_EXPR_IMPL(_TY, _ID, ...)                                                               \
+      ::__cccl_requires_expr_impl<struct _CCCL_PP_CAT(__cccl_requires_expr_detail_, _ID)                          \
+                                    _CCCL_REQUIRES_EXPR_EXPAND_TPARAMS _TY>::                                     \
+        __cccl_is_satisfied(static_cast<::__cccl_tag<void _CCCL_REQUIRES_EXPR_EXPAND_TPARAMS _TY>*>(nullptr), 0); \
+      struct _CCCL_PP_CAT(__cccl_requires_expr_detail_, _ID)                                                      \
+      {                                                                                                           \
+        using __cccl_self_t = _CCCL_PP_CAT(__cccl_requires_expr_detail_, _ID);                                    \
+        template <class _CCCL_REQUIRES_EXPR_TPARAMS _TY>                                                          \
         _LIBCUDACXX_HIDE_FROM_ABI static auto __cccl_well_formed(__VA_ARGS__) _CCCL_REQUIRES_EXPR_2
 
-#    define _CCCL_REQUIRES_EXPR_2(...)                                                                    \
-      ->decltype(_CCCL_PP_FOR_EACH(_CCCL_CONCEPT_FRAGMENT_REQS_M, __VA_ARGS__) void()) {}                 \
-      template <class... _Args,                                                                           \
-                class _Sig,                                                                               \
-                class = decltype(static_cast<_Sig*>(&__cccl_self_t::__cccl_well_formed<_Args...>))>       \
-      _LIBCUDACXX_HIDE_FROM_ABI static constexpr bool __cccl_is_satisfied(::__cccl_tag<_Args...>*, _Sig*) \
-      {                                                                                                   \
-        return true;                                                                                      \
-      }                                                                                                   \
-      _LIBCUDACXX_HIDE_FROM_ABI static constexpr bool __cccl_is_satisfied(void*, ...)                     \
-      {                                                                                                   \
-        return false;                                                                                     \
-      }                                                                                                   \
+#    define _CCCL_REQUIRES_EXPR_2(...)                                                                  \
+      ->decltype(_CCCL_PP_FOR_EACH(_CCCL_CONCEPT_FRAGMENT_REQS_M, __VA_ARGS__) void()) {}               \
+      template <class... _Args, class = decltype(&__cccl_self_t::__cccl_well_formed<_Args...>)>         \
+      _LIBCUDACXX_HIDE_FROM_ABI static constexpr bool __cccl_is_satisfied(::__cccl_tag<_Args...>*, int) \
+      {                                                                                                 \
+        return true;                                                                                    \
+      }                                                                                                 \
+      _LIBCUDACXX_HIDE_FROM_ABI static constexpr bool __cccl_is_satisfied(void*, long)                  \
+      {                                                                                                 \
+        return false;                                                                                   \
+      }                                                                                                 \
       }
 #  endif // ^^^ _CCCL_NO_CONCEPTS ^^^
 
diff --git a/libcudacxx/include/cuda/std/__cstddef/types.h b/libcudacxx/include/cuda/std/__cstddef/types.h
index 215b60fbc55..730243c32ee 100644
--- a/libcudacxx/include/cuda/std/__cstddef/types.h
+++ b/libcudacxx/include/cuda/std/__cstddef/types.h
@@ -26,7 +26,7 @@
 #  include <cstddef>
 #else
 #  if !defined(offsetof)
-#    define offsetof(type, member) (::size_t)((char*) &(((type*) 0)->member) - (char*) 0)
+#    define offsetof(type, member) (::size_t) ((char*) &(((type*) 0)->member) - (char*) 0)
 #  endif // !offsetof
 #endif // !_CCCL_COMPILER(NVRTC)
 
diff --git a/libcudacxx/include/cuda/std/__cuda/chrono.h b/libcudacxx/include/cuda/std/__cuda/chrono.h
index 1b3110a556c..c40284a18e8 100644
--- a/libcudacxx/include/cuda/std/__cuda/chrono.h
+++ b/libcudacxx/include/cuda/std/__cuda/chrono.h
@@ -40,8 +40,7 @@ _LIBCUDACXX_HIDE_FROM_ABI system_clock::time_point system_clock::now() noexcept
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
-    (uint64_t __time; asm volatile("mov.u64 %0, %%globaltimer;"
-                                   : "=l"(__time)::);
+    (uint64_t __time; asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(__time)::);
      return time_point(duration_cast<duration>(nanoseconds(__time)));),
     NV_IS_HOST,
     (return time_point(duration_cast<duration>(nanoseconds(
diff --git a/libcudacxx/include/cuda/std/__cuda/climits_prelude.h b/libcudacxx/include/cuda/std/__cuda/climits_prelude.h
deleted file mode 100644
index e788eaa29ea..00000000000
--- a/libcudacxx/include/cuda/std/__cuda/climits_prelude.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// -*- C++ -*-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCUDACXX___CUDA_CLIMITS_PRELUDE_H
-#define _LIBCUDACXX___CUDA_CLIMITS_PRELUDE_H
-
-#include <cuda/std/detail/__config>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#if !_CCCL_COMPILER(NVRTC)
-#  include <climits>
-#  include <cstdint>
-
-#  include <limits.h>
-#else // ^^^ !_CCCL_COMPILER(NVRTC) ^^^ / vvv _CCCL_COMPILER(NVRTC) vvv
-#  define CHAR_BIT 8
-
-#  define SCHAR_MIN         (-128)
-#  define SCHAR_MAX         127
-#  define UCHAR_MAX         255
-#  define __CHAR_UNSIGNED__ ('\xff' > 0) // CURSED
-#  if __CHAR_UNSIGNED__
-#    define CHAR_MIN 0
-#    define CHAR_MAX UCHAR_MAX
-#  else
-#    define CHAR_MIN SCHAR_MIN
-#    define CHAR_MAX SCHAR_MAX
-#  endif
-#  define SHRT_MIN  (-SHRT_MAX - 1)
-#  define SHRT_MAX  0x7fff
-#  define USHRT_MAX 0xffff
-#  define INT_MIN   (-INT_MAX - 1)
-#  define INT_MAX   0x7fffffff
-#  define UINT_MAX  0xffffffff
-#  define LONG_MIN  (-LONG_MAX - 1)
-#  ifdef __LP64__
-#    define LONG_MAX  LLONG_MAX
-#    define ULONG_MAX ULLONG_MAX
-#  else
-#    define LONG_MAX  INT_MAX
-#    define ULONG_MAX UINT_MAX
-#  endif
-#  define LLONG_MIN  (-LLONG_MAX - 1)
-#  define LLONG_MAX  0x7fffffffffffffffLL
-#  define ULLONG_MAX 0xffffffffffffffffUL
-
-#  define __FLT_RADIX__      2
-#  define __FLT_MANT_DIG__   24
-#  define __FLT_DIG__        6
-#  define __FLT_MIN__        1.17549435082228750796873653722224568e-38F
-#  define __FLT_MAX__        3.40282346638528859811704183484516925e+38F
-#  define __FLT_EPSILON__    1.19209289550781250000000000000000000e-7F
-#  define __FLT_MIN_EXP__    (-125)
-#  define __FLT_MIN_10_EXP__ (-37)
-#  define __FLT_MAX_EXP__    128
-#  define __FLT_MAX_10_EXP__ 38
-#  define __FLT_DENORM_MIN__ 1.40129846432481707092372958328991613e-45F
-#  define __DBL_MANT_DIG__   53
-#  define __DBL_DIG__        15
-#  define __DBL_MIN__        2.22507385850720138309023271733240406e-308
-#  define __DBL_MAX__        1.79769313486231570814527423731704357e+308
-#  define __DBL_EPSILON__    2.22044604925031308084726333618164062e-16
-#  define __DBL_MIN_EXP__    (-1021)
-#  define __DBL_MIN_10_EXP__ (-307)
-#  define __DBL_MAX_EXP__    1024
-#  define __DBL_MAX_10_EXP__ 308
-#  define __DBL_DENORM_MIN__ 4.94065645841246544176568792868221372e-324
-
-template <typename _To, typename _From>
-static _CCCL_DEVICE _CCCL_FORCEINLINE _To __cowchild_cast(_From __from)
-{
-  static_assert(sizeof(_From) == sizeof(_To), "");
-  union __cast
-  {
-    _From __from;
-    _To __to;
-  };
-  __cast __c;
-  __c.__from = __from;
-  return __c.__to;
-}
-
-#  define __builtin_huge_valf()    __cowchild_cast<float>(0x7f800000)
-#  define __builtin_nanf(__dummy)  __cowchild_cast<float>(0x7fc00000)
-#  define __builtin_nansf(__dummy) __cowchild_cast<float>(0x7fa00000)
-#  define __builtin_huge_val()     __cowchild_cast<double>(0x7ff0000000000000)
-#  define __builtin_nan(__dummy)   __cowchild_cast<double>(0x7ff8000000000000)
-#  define __builtin_nans(__dummy)  __cowchild_cast<double>(0x7ff4000000000000)
-#endif // _CCCL_COMPILER(NVRTC)
-
-#endif // _LIBCUDACXX___CUDA_CLIMITS_PRELUDE_H
diff --git a/libcudacxx/include/cuda/std/__functional/function.h b/libcudacxx/include/cuda/std/__functional/function.h
index 6870b89a88f..e2ec912e6fb 100644
--- a/libcudacxx/include/cuda/std/__functional/function.h
+++ b/libcudacxx/include/cuda/std/__functional/function.h
@@ -99,7 +99,7 @@ _LIBCUDACXX_HIDE_FROM_ABI bool __not_null(_Fp* __ptr)
 }
 
 template <class _Ret, class _Class>
-_LIBCUDACXX_HIDE_FROM_ABI bool __not_null(_Ret _Class::*__ptr)
+_LIBCUDACXX_HIDE_FROM_ABI bool __not_null(_Ret _Class::* __ptr)
 {
   return __ptr;
 }
diff --git a/libcudacxx/include/cuda/std/__functional/mem_fn.h b/libcudacxx/include/cuda/std/__functional/mem_fn.h
index 20a55850ea5..8327b4edfef 100644
--- a/libcudacxx/include/cuda/std/__functional/mem_fn.h
+++ b/libcudacxx/include/cuda/std/__functional/mem_fn.h
@@ -53,7 +53,7 @@ class __mem_fn : public __weak_result_type<_Tp>
 };
 
 template <class _Rp, class _Tp>
-_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 __mem_fn<_Rp _Tp::*> mem_fn(_Rp _Tp::*__pm) noexcept
+_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 __mem_fn<_Rp _Tp::*> mem_fn(_Rp _Tp::* __pm) noexcept
 {
   return __mem_fn<_Rp _Tp::*>(__pm);
 }
diff --git a/libcudacxx/include/cuda/std/__functional/ranges_operations.h b/libcudacxx/include/cuda/std/__functional/ranges_operations.h
index 059eda975f4..b15d3960202 100644
--- a/libcudacxx/include/cuda/std/__functional/ranges_operations.h
+++ b/libcudacxx/include/cuda/std/__functional/ranges_operations.h
@@ -24,7 +24,7 @@
 #include <cuda/std/__concepts/totally_ordered.h>
 #include <cuda/std/__utility/forward.h>
 
-#if _CCCL_STD_VER >= 2017
+#if _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI
diff --git a/libcudacxx/include/cuda/std/__internal/namespaces.h b/libcudacxx/include/cuda/std/__internal/namespaces.h
index d909bd3fe71..b23eed5366d 100644
--- a/libcudacxx/include/cuda/std/__internal/namespaces.h
+++ b/libcudacxx/include/cuda/std/__internal/namespaces.h
@@ -75,7 +75,7 @@
 #  define _CUDA_VSTD_NOVERSION ::cuda::std
 #  define _CUDA_VSTD           ::cuda::std::_LIBCUDACXX_ABI_NAMESPACE
 #  define _CUDA_VRANGES        ::cuda::std::ranges::_LIBCUDACXX_ABI_NAMESPACE
-#  define _CUDA_VIEWS          ::cuda::std::ranges::views::_LIBCUDACXX_CUDA_ABI_NAMESPACE
+#  define _CUDA_VIEWS          ::cuda::std::ranges::views::_LIBCUDACXX_ABI_NAMESPACE
 #  define _CUDA_VMR            ::cuda::mr::_LIBCUDACXX_ABI_NAMESPACE
 #  define _CUDA_VPTX           ::cuda::ptx::_LIBCUDACXX_ABI_NAMESPACE
 #  define _CUDA_VSTD_FS        ::cuda::std::__fs::filesystem::_LIBCUDACXX_ABI_NAMESPACE
diff --git a/libcudacxx/include/cuda/std/__iterator/access.h b/libcudacxx/include/cuda/std/__iterator/access.h
index 1cb9eb1382b..1fba194a63c 100644
--- a/libcudacxx/include/cuda/std/__iterator/access.h
+++ b/libcudacxx/include/cuda/std/__iterator/access.h
@@ -36,15 +36,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.begin())) -> decltype(__c.begin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.begin()))
+    -> decltype(__c.begin())
   {
     return __c.begin();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.begin())) -> decltype(__c.begin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.begin()))
+    -> decltype(__c.begin())
   {
     return __c.begin();
   }
@@ -67,15 +67,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.end())) -> decltype(__c.end())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.end()))
+    -> decltype(__c.end())
   {
     return __c.end();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.end())) -> decltype(__c.end())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.end()))
+    -> decltype(__c.end())
   {
     return __c.end();
   }
diff --git a/libcudacxx/include/cuda/std/__iterator/concepts.h b/libcudacxx/include/cuda/std/__iterator/concepts.h
index ef36ad11f9d..bd24b7e3803 100644
--- a/libcudacxx/include/cuda/std/__iterator/concepts.h
+++ b/libcudacxx/include/cuda/std/__iterator/concepts.h
@@ -254,7 +254,7 @@ concept indirectly_copyable_storable =
 // Note: indirectly_swappable is located in iter_swap.h to prevent a dependency cycle
 // (both iter_swap and indirectly_swappable require indirectly_readable).
 
-#elif _CCCL_STD_VER > 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
+#elif _CCCL_STD_VER >= 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
 
 // [iterator.concept.readable]
 template <class _In>
diff --git a/libcudacxx/include/cuda/std/__iterator/data.h b/libcudacxx/include/cuda/std/__iterator/data.h
index f51d84888df..2177c136d61 100644
--- a/libcudacxx/include/cuda/std/__iterator/data.h
+++ b/libcudacxx/include/cuda/std/__iterator/data.h
@@ -30,16 +30,16 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 _CCCL_EXEC_CHECK_DISABLE
 template <class _Cont>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-data(_Cont& __c) noexcept(noexcept(__c.data())) -> decltype(__c.data())
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto data(_Cont& __c) noexcept(noexcept(__c.data()))
+  -> decltype(__c.data())
 {
   return __c.data();
 }
 
 _CCCL_EXEC_CHECK_DISABLE
 template <class _Cont>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-data(const _Cont& __c) noexcept(noexcept(__c.data())) -> decltype(__c.data())
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto data(const _Cont& __c) noexcept(noexcept(__c.data()))
+  -> decltype(__c.data())
 {
   return __c.data();
 }
diff --git a/libcudacxx/include/cuda/std/__iterator/empty.h b/libcudacxx/include/cuda/std/__iterator/empty.h
index 4dea0eb53e0..e9775db3616 100644
--- a/libcudacxx/include/cuda/std/__iterator/empty.h
+++ b/libcudacxx/include/cuda/std/__iterator/empty.h
@@ -29,8 +29,8 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 #if _CCCL_STD_VER > 2011
 
 template <class _Cont>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-empty(const _Cont& __c) noexcept(noexcept(__c.empty())) -> decltype(__c.empty())
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto empty(const _Cont& __c) noexcept(noexcept(__c.empty()))
+  -> decltype(__c.empty())
 {
   return __c.empty();
 }
diff --git a/libcudacxx/include/cuda/std/__iterator/incrementable_traits.h b/libcudacxx/include/cuda/std/__iterator/incrementable_traits.h
index 4555b4ae412..d188a4ae66c 100644
--- a/libcudacxx/include/cuda/std/__iterator/incrementable_traits.h
+++ b/libcudacxx/include/cuda/std/__iterator/incrementable_traits.h
@@ -88,7 +88,7 @@ using iter_difference_t =
                          incrementable_traits<remove_cvref_t<_Ip>>,
                          iterator_traits<remove_cvref_t<_Ip>>>::difference_type;
 
-#elif _CCCL_STD_VER > 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
+#elif _CCCL_STD_VER >= 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
 
 // [incrementable.traits]
 template <class, class = void>
@@ -150,7 +150,7 @@ using iter_difference_t =
                          incrementable_traits<remove_cvref_t<_Ip>>,
                          iterator_traits<remove_cvref_t<_Ip>>>::difference_type;
 
-#endif // _CCCL_STD_VER > 2014
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
diff --git a/libcudacxx/include/cuda/std/__iterator/indirectly_comparable.h b/libcudacxx/include/cuda/std/__iterator/indirectly_comparable.h
index bc5c2b615e5..7646e3dfb94 100644
--- a/libcudacxx/include/cuda/std/__iterator/indirectly_comparable.h
+++ b/libcudacxx/include/cuda/std/__iterator/indirectly_comparable.h
@@ -33,7 +33,7 @@ template <class _Iter1, class _Iter2, class _BinaryPred, class _Proj1 = identity
 concept indirectly_comparable =
   indirect_binary_predicate<_BinaryPred, projected<_Iter1, _Proj1>, projected<_Iter2, _Proj2>>;
 
-#elif _CCCL_STD_VER > 2014
+#elif _CCCL_STD_VER >= 2014
 
 // clang-format off
 
@@ -50,7 +50,7 @@ _CCCL_CONCEPT indirectly_comparable =
 
 // clang-format on
 
-#endif // _CCCL_STD_VER > 2014
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
diff --git a/libcudacxx/include/cuda/std/__iterator/iter_move.h b/libcudacxx/include/cuda/std/__iterator/iter_move.h
index 54ce7692c1e..b8556ed1108 100644
--- a/libcudacxx/include/cuda/std/__iterator/iter_move.h
+++ b/libcudacxx/include/cuda/std/__iterator/iter_move.h
@@ -33,7 +33,7 @@
 _CCCL_DIAG_PUSH
 _CCCL_DIAG_SUPPRESS_CLANG("-Wvoid-ptr-dereference")
 
-#if _CCCL_STD_VER > 2014
+#if _CCCL_STD_VER >= 2014
 
 // [iterator.cust.move]
 
@@ -101,8 +101,9 @@ struct __fn
 
   _CCCL_TEMPLATE(class _Ip)
   _CCCL_REQUIRES(__move_deref<_Ip>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Ip&& __i) const noexcept(noexcept(
-    _CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i)))) -> decltype(_CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Ip&& __i) const
+    noexcept(noexcept(_CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i))))
+      -> decltype(_CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i)))
   {
     return _CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i));
   }
diff --git a/libcudacxx/include/cuda/std/__iterator/iter_swap.h b/libcudacxx/include/cuda/std/__iterator/iter_swap.h
index bafeed69742..d58e2b3740e 100644
--- a/libcudacxx/include/cuda/std/__iterator/iter_swap.h
+++ b/libcudacxx/include/cuda/std/__iterator/iter_swap.h
@@ -30,7 +30,7 @@
 #include <cuda/std/__utility/forward.h>
 #include <cuda/std/__utility/move.h>
 
-#if _CCCL_STD_VER > 2014
+#if _CCCL_STD_VER >= 2014
 
 // [iter.cust.swap]
 
@@ -158,6 +158,6 @@ _CCCL_INLINE_VAR constexpr bool __noexcept_swappable<_I1, _I2, enable_if_t<indir
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
-#endif // _CCCL_STD_VER > 2014
+#endif // _CCCL_STD_VER >= 2014
 
 #endif // _LIBCUDACXX___ITERATOR_ITER_SWAP_H
diff --git a/libcudacxx/include/cuda/std/__iterator/iterator_traits.h b/libcudacxx/include/cuda/std/__iterator/iterator_traits.h
index 2168ea2fd5c..27f9262e070 100644
--- a/libcudacxx/include/cuda/std/__iterator/iterator_traits.h
+++ b/libcudacxx/include/cuda/std/__iterator/iterator_traits.h
@@ -34,7 +34,6 @@
 #include <cuda/std/__type_traits/add_const.h>
 #include <cuda/std/__type_traits/integral_constant.h>
 #include <cuda/std/__type_traits/is_convertible.h>
-#include <cuda/std/__type_traits/is_pointer.h>
 #include <cuda/std/__type_traits/is_primary_template.h>
 #include <cuda/std/__type_traits/remove_cv.h>
 #include <cuda/std/__type_traits/void_t.h>
@@ -93,7 +92,7 @@ using iter_reference_t = decltype(*declval<_Tp&>());
 template <class>
 struct _CCCL_TYPE_VISIBILITY_DEFAULT iterator_traits;
 
-#elif _CCCL_STD_VER > 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
+#elif _CCCL_STD_VER >= 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
 
 template <class _Tp>
 using __with_reference = _Tp&;
@@ -119,10 +118,10 @@ using iter_reference_t = enable_if_t<__dereferenceable<_Tp>, decltype(*declval<_
 
 template <class, class>
 struct _CCCL_TYPE_VISIBILITY_DEFAULT iterator_traits;
-#else // ^^^ _CCCL_STD_VER > 2014 ^^^ / vvv _CCCL_STD_VER <= 2014 vvv
+#else // ^^^ _CCCL_STD_VER >= 2014 ^^^ / vvv _CCCL_STD_VER < 2014 vvv
 template <class>
 struct _CCCL_TYPE_VISIBILITY_DEFAULT iterator_traits;
-#endif // _CCCL_STD_VER <= 2014
+#endif // _CCCL_STD_VER < 2014
 
 #if _CCCL_COMPILER(NVRTC)
 
@@ -175,19 +174,20 @@ using _ITER_TRAITS = typename __iter_traits_cache<_Iter>::type;
 #  if defined(_GLIBCXX_DEBUG)
 _CCCL_TEMPLATE(class _Iter, class _Ty, class _Range)
 _CCCL_REQUIRES(_IsSame<_Iter, ::__gnu_debug::_Safe_iterator<_Ty*, _Range>>::value)
-_LIBCUDACXX_HIDE_FROM_ABI auto
-  __iter_concept_fn(::__gnu_debug::_Safe_iterator<_Ty*, _Range>, __priority_tag<3>) -> contiguous_iterator_tag;
+_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::__gnu_debug::_Safe_iterator<_Ty*, _Range>, __priority_tag<3>)
+  -> contiguous_iterator_tag;
 #  endif
 #  if defined(__GLIBCXX__)
 _CCCL_TEMPLATE(class _Iter, class _Ty, class _Range)
 _CCCL_REQUIRES(_IsSame<_Iter, ::__gnu_cxx::__normal_iterator<_Ty*, _Range>>::value)
-_LIBCUDACXX_HIDE_FROM_ABI auto
-  __iter_concept_fn(::__gnu_cxx::__normal_iterator<_Ty*, _Range>, __priority_tag<3>) -> contiguous_iterator_tag;
+_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::__gnu_cxx::__normal_iterator<_Ty*, _Range>, __priority_tag<3>)
+  -> contiguous_iterator_tag;
 #  endif // __GLIBCXX__
 #  if defined(_LIBCPP_VERSION)
 _CCCL_TEMPLATE(class _Iter, class _Ty)
 _CCCL_REQUIRES(_IsSame<_Iter, ::std::__wrap_iter<_Ty*>>::value)
-_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::std::__wrap_iter<_Ty*>, __priority_tag<3>) -> contiguous_iterator_tag;
+_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::std::__wrap_iter<_Ty*>, __priority_tag<3>)
+  -> contiguous_iterator_tag;
 #  elif defined(_MSVC_STL_VERSION) || defined(_IS_WRS)
 _CCCL_TEMPLATE(class _Iter)
 _CCCL_REQUIRES(_IsSame<_Iter, class _Iter::_Array_iterator>::value)
@@ -530,7 +530,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT iterator_traits : __iterator_traits<_Ip>
   using __primary_template = iterator_traits;
 };
 
-#elif _CCCL_STD_VER > 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^ / vvv _CCCL_STD_VER > 2014 vvv
+#elif _CCCL_STD_VER >= 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^ / vvv _CCCL_STD_VER > 2014 vvv
 
 // The `cpp17-*-iterator` exposition-only concepts have very similar names to the `Cpp17*Iterator` named requirements
 // from `[iterator.cpp17]`. To avoid confusion between the two, the exposition-only concepts have been banished to
@@ -860,7 +860,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT iterator_traits<_Tp*>
   typedef _Tp* pointer;
   typedef typename add_lvalue_reference<_Tp>::type reference;
   typedef random_access_iterator_tag iterator_category;
-#if _CCCL_STD_VER > 2014
+#if _CCCL_STD_VER >= 2014
   typedef contiguous_iterator_tag iterator_concept;
 #endif
 };
diff --git a/libcudacxx/include/cuda/std/__iterator/mergeable.h b/libcudacxx/include/cuda/std/__iterator/mergeable.h
index 7c788375bc6..62a9a90a662 100644
--- a/libcudacxx/include/cuda/std/__iterator/mergeable.h
+++ b/libcudacxx/include/cuda/std/__iterator/mergeable.h
@@ -41,7 +41,7 @@ concept mergeable =
   && indirectly_copyable<_Input1, _Output> && indirectly_copyable<_Input2, _Output>
   && indirect_strict_weak_order<_Comp, projected<_Input1, _Proj1>, projected<_Input2, _Proj2>>;
 
-#elif _CCCL_STD_VER > 2014
+#elif _CCCL_STD_VER >= 2014
 
 template <class _Input1, class _Input2, class _Output, class _Comp, class _Proj1, class _Proj2>
 _CCCL_CONCEPT_FRAGMENT(
diff --git a/libcudacxx/include/cuda/std/__iterator/permutable.h b/libcudacxx/include/cuda/std/__iterator/permutable.h
index 599ede609ef..36968925759 100644
--- a/libcudacxx/include/cuda/std/__iterator/permutable.h
+++ b/libcudacxx/include/cuda/std/__iterator/permutable.h
@@ -32,7 +32,7 @@ template <class _Iterator>
 concept permutable = forward_iterator<_Iterator> && indirectly_movable_storable<_Iterator, _Iterator>
                   && indirectly_swappable<_Iterator, _Iterator>;
 
-#elif _CCCL_STD_VER > 2014
+#elif _CCCL_STD_VER >= 2014
 
 template <class _Iterator>
 _CCCL_CONCEPT_FRAGMENT(__permutable_,
@@ -43,7 +43,7 @@ _CCCL_CONCEPT_FRAGMENT(__permutable_,
 template <class _Iterator>
 _CCCL_CONCEPT permutable = _CCCL_FRAGMENT(__permutable_, _Iterator);
 
-#endif // _CCCL_STD_VER > 2014
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
diff --git a/libcudacxx/include/cuda/std/__iterator/projected.h b/libcudacxx/include/cuda/std/__iterator/projected.h
index e8639b48d3b..d65eb462483 100644
--- a/libcudacxx/include/cuda/std/__iterator/projected.h
+++ b/libcudacxx/include/cuda/std/__iterator/projected.h
@@ -27,7 +27,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
-#if _CCCL_STD_VER > 2014
+#if _CCCL_STD_VER >= 2014
 
 template <class _It, class _Proj, class = void>
 struct __projected_impl
@@ -54,7 +54,7 @@ _CCCL_TEMPLATE(class _It, class _Proj)
 _CCCL_REQUIRES(indirectly_readable<_It> _CCCL_AND indirectly_regular_unary_invocable<_Proj, _It>)
 using projected = typename __projected_impl<_It, _Proj>::__type;
 
-#endif // _CCCL_STD_VER > 2014
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
diff --git a/libcudacxx/include/cuda/std/__iterator/readable_traits.h b/libcudacxx/include/cuda/std/__iterator/readable_traits.h
index b73086dd968..8e5a1266d55 100644
--- a/libcudacxx/include/cuda/std/__iterator/readable_traits.h
+++ b/libcudacxx/include/cuda/std/__iterator/readable_traits.h
@@ -106,7 +106,7 @@ using iter_value_t =
                          indirectly_readable_traits<remove_cvref_t<_Ip>>,
                          iterator_traits<remove_cvref_t<_Ip>>>::value_type;
 
-#elif _CCCL_STD_VER > 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
+#elif _CCCL_STD_VER >= 2014 // ^^^ !_CCCL_NO_CONCEPTS ^^^
 
 // [readable.traits]
 template <class, class = void>
diff --git a/libcudacxx/include/cuda/std/__iterator/reverse_access.h b/libcudacxx/include/cuda/std/__iterator/reverse_access.h
index ffeed85b900..9c66c6a645d 100644
--- a/libcudacxx/include/cuda/std/__iterator/reverse_access.h
+++ b/libcudacxx/include/cuda/std/__iterator/reverse_access.h
@@ -47,15 +47,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.rbegin())) -> decltype(__c.rbegin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.rbegin()))
+    -> decltype(__c.rbegin())
   {
     return __c.rbegin();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.rbegin())) -> decltype(__c.rbegin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.rbegin()))
+    -> decltype(__c.rbegin())
   {
     return __c.rbegin();
   }
@@ -85,15 +85,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.rend())) -> decltype(__c.rend())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.rend()))
+    -> decltype(__c.rend())
   {
     return __c.rend();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.rend())) -> decltype(__c.rend())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.rend()))
+    -> decltype(__c.rend())
   {
     return __c.rend();
   }
diff --git a/libcudacxx/include/cuda/std/__iterator/sortable.h b/libcudacxx/include/cuda/std/__iterator/sortable.h
index 51cd2b00398..9656add726e 100644
--- a/libcudacxx/include/cuda/std/__iterator/sortable.h
+++ b/libcudacxx/include/cuda/std/__iterator/sortable.h
@@ -34,7 +34,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 template <class _Iter, class _Comp = _CUDA_VRANGES::less, class _Proj = identity>
 concept sortable = permutable<_Iter> && indirect_strict_weak_order<_Comp, projected<_Iter, _Proj>>;
 
-#elif _CCCL_STD_VER > 2014
+#elif _CCCL_STD_VER >= 2014
 
 template <class _Iter, class _Comp, class _Proj>
 _CCCL_CONCEPT_FRAGMENT(
@@ -44,7 +44,7 @@ _CCCL_CONCEPT_FRAGMENT(
 template <class _Iter, class _Comp = _CUDA_VRANGES::less, class _Proj = identity>
 _CCCL_CONCEPT sortable = _CCCL_FRAGMENT(__sortable_, _Iter, _Comp, _Proj);
 
-#endif // _CCCL_STD_VER > 2014
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/win32/limits_msvc_win32.h b/libcudacxx/include/cuda/std/__limits/msvc_win32.h
similarity index 71%
rename from libcudacxx/include/cuda/std/detail/libcxx/include/support/win32/limits_msvc_win32.h
rename to libcudacxx/include/cuda/std/__limits/msvc_win32.h
index 1b4eeedba0b..d00c76895bc 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/win32/limits_msvc_win32.h
+++ b/libcudacxx/include/cuda/std/__limits/msvc_win32.h
@@ -1,14 +1,16 @@
 // -*- C++ -*-
-//===------------------ support/win32/limits_msvc_win32.h -----------------===//
+//===----------------------------------------------------------------------===//
 //
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCUDACXX_SUPPORT_WIN32_LIMITS_MSVC_WIN32_H
-#define _LIBCUDACXX_SUPPORT_WIN32_LIMITS_MSVC_WIN32_H
+#ifndef _LIBCUDACXX___LIMITS_MSVC_WIN32_H
+#define _LIBCUDACXX___LIMITS_MSVC_WIN32_H
 
 #if defined(__MINGW32__)
 #  error "This header complements the Microsoft C Runtime library, and should not be included otherwise."
@@ -19,8 +21,6 @@
 
 #include <float.h> // limit constants
 #include <limits.h> // CHAR_BIT
-#include <math.h> // HUGE_VAL
-#include <ymath.h> // internal MSVC header providing the needed functionality
 
 #define __CHAR_BIT__ CHAR_BIT
 
@@ -63,15 +63,4 @@
 // predefined by MinGW GCC
 #define __LDBL_DENORM_MIN__ 3.64519953188247460253e-4951L
 
-// __builtin replacements/workarounds
-#if _MSC_VER < 1934
-#  define __builtin_huge_vall()    _LInf._Long_double
-#  define __builtin_nanl(__dummmy) _LNan._Long_double
-#  define __builtin_nansl(__dummy) _LSnan._Long_double
-#else
-#  define __builtin_huge_vall() __builtin_huge_val()
-#  define __builtin_nanl(__v)   __builtin_nan(__v)
-#  define __builtin_nansl(__v)  __builtin_nans(__v)
-#endif
-
-#endif // _LIBCUDACXX_SUPPORT_WIN32_LIMITS_MSVC_WIN32_H
+#endif // _LIBCUDACXX___LIMITS_MSVC_WIN32_H
diff --git a/libcudacxx/include/cuda/std/__mdspan/extents.h b/libcudacxx/include/cuda/std/__mdspan/extents.h
index 981e51d35fc..b6ce539a901 100644
--- a/libcudacxx/include/cuda/std/__mdspan/extents.h
+++ b/libcudacxx/include/cuda/std/__mdspan/extents.h
@@ -251,7 +251,8 @@ class extents
   _CCCL_REQUIRES(
     /* multi-stage check to protect from invalid pack expansion when sizes don't match? */
     (decltype(__detail::__check_compatible_extents(
-      integral_constant<bool, sizeof...(_Extents) == sizeof...(_OtherExtents)>{},
+      integral_constant < bool,
+      sizeof...(_Extents) == sizeof...(_OtherExtents) > {},
       __indices_t{}, // _CUDA_VSTD::integer_sequence<size_t, _Extents...>{}
       _CUDA_VSTD::integer_sequence<size_t, _OtherExtents...>{}))::value))
   _LIBCUDACXX_HIDE_FROM_ABI __MDSPAN_CONDITIONAL_EXPLICIT(
diff --git a/libcudacxx/include/cuda/std/__mdspan/macros.h b/libcudacxx/include/cuda/std/__mdspan/macros.h
index b9b56adae37..36895751bb1 100644
--- a/libcudacxx/include/cuda/std/__mdspan/macros.h
+++ b/libcudacxx/include/cuda/std/__mdspan/macros.h
@@ -256,11 +256,11 @@
         return __MDSPAN_PP_REMOVE_PARENS(BODY);                                    \
       }
 #  else
-#    define __MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE(SIGNATURE, BODY)                          \
-      auto __MDSPAN_PP_REMOVE_PARENS(                                                         \
-        SIGNATURE) -> _CUDA_VSTD::remove_cv_t<_CUDA_VSTD::remove_reference_t<decltype(BODY)>> \
-      {                                                                                       \
-        return __MDSPAN_PP_REMOVE_PARENS(BODY);                                               \
+#    define __MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE(SIGNATURE, BODY)               \
+      auto __MDSPAN_PP_REMOVE_PARENS(SIGNATURE)                                    \
+        -> _CUDA_VSTD::remove_cv_t<_CUDA_VSTD::remove_reference_t<decltype(BODY)>> \
+      {                                                                            \
+        return __MDSPAN_PP_REMOVE_PARENS(BODY);                                    \
       }
 #    define __MDSPAN_DEDUCE_DECLTYPE_AUTO_RETURN_TYPE_SINGLE_LINE(SIGNATURE, BODY) \
       auto __MDSPAN_PP_REMOVE_PARENS(SIGNATURE) -> decltype(BODY)                  \
diff --git a/libcudacxx/include/cuda/std/__mdspan/mdspan.h b/libcudacxx/include/cuda/std/__mdspan/mdspan.h
index bf7625a8093..07918917125 100644
--- a/libcudacxx/include/cuda/std/__mdspan/mdspan.h
+++ b/libcudacxx/include/cuda/std/__mdspan/mdspan.h
@@ -472,24 +472,25 @@ class mdspan
 #  if defined(__MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION)
 _CCCL_TEMPLATE(class _ElementType, class... _SizeTypes)
 _CCCL_REQUIRES(__fold_and_v<_CCCL_TRAIT(is_integral, _SizeTypes)...> _CCCL_AND(sizeof...(_SizeTypes) > 0))
-_CCCL_HOST_DEVICE explicit mdspan(_ElementType*,
-                                  _SizeTypes...) -> mdspan<_ElementType, dextents<size_t, sizeof...(_SizeTypes)>>;
+_CCCL_HOST_DEVICE explicit mdspan(_ElementType*, _SizeTypes...)
+  -> mdspan<_ElementType, dextents<size_t, sizeof...(_SizeTypes)>>;
 
 _CCCL_TEMPLATE(class _Pointer)
 _CCCL_REQUIRES(_CCCL_TRAIT(is_pointer, _CUDA_VSTD::remove_reference_t<_Pointer>))
-_CCCL_HOST_DEVICE
-mdspan(_Pointer&&) -> mdspan<_CUDA_VSTD::remove_pointer_t<_CUDA_VSTD::remove_reference_t<_Pointer>>, extents<size_t>>;
+_CCCL_HOST_DEVICE mdspan(_Pointer&&)
+  -> mdspan<_CUDA_VSTD::remove_pointer_t<_CUDA_VSTD::remove_reference_t<_Pointer>>, extents<size_t>>;
 _CCCL_TEMPLATE(class _CArray)
 _CCCL_REQUIRES(_CCCL_TRAIT(is_array, _CArray) _CCCL_AND(rank_v<_CArray> == 1))
 _CCCL_HOST_DEVICE mdspan(_CArray&)
   -> mdspan<_CUDA_VSTD::remove_all_extents_t<_CArray>, extents<size_t, _CUDA_VSTD::extent_v<_CArray, 0>>>;
 
 template <class _ElementType, class _SizeType, size_t _Np>
-_CCCL_HOST_DEVICE mdspan(_ElementType*,
-                         const _CUDA_VSTD::array<_SizeType, _Np>&) -> mdspan<_ElementType, dextents<size_t, _Np>>;
+_CCCL_HOST_DEVICE mdspan(_ElementType*, const _CUDA_VSTD::array<_SizeType, _Np>&)
+  -> mdspan<_ElementType, dextents<size_t, _Np>>;
 
 template <class _ElementType, class _SizeType, size_t _Np>
-_CCCL_HOST_DEVICE mdspan(_ElementType*, _CUDA_VSTD::span<_SizeType, _Np>) -> mdspan<_ElementType, dextents<size_t, _Np>>;
+_CCCL_HOST_DEVICE mdspan(_ElementType*, _CUDA_VSTD::span<_SizeType, _Np>)
+  -> mdspan<_ElementType, dextents<size_t, _Np>>;
 
 // This one is necessary because all the constructors take `data_handle_type`s, not
 // `_ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which
diff --git a/libcudacxx/include/cuda/std/__mdspan/submdspan.h b/libcudacxx/include/cuda/std/__mdspan/submdspan.h
index aac6f43c85d..74bf1f79943 100644
--- a/libcudacxx/include/cuda/std/__mdspan/submdspan.h
+++ b/libcudacxx/include/cuda/std/__mdspan/submdspan.h
@@ -287,12 +287,11 @@ struct __assign_op_slice_handler<
   __MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator)
     constexpr auto
     operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, size_t>&& __slice) noexcept
-    -> __assign_op_slice_handler<
-      _IndexT,
-      typename _PreserveLayoutAnalysis::encounter_scalar,
-      __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>,
-      __partially_static_sizes<_IndexT, size_t, _Exts...>,
-      __partially_static_sizes<_IndexT, size_t, _Strides...> /* intentional space here to work around ICC bug*/>
+    -> __assign_op_slice_handler<_IndexT,
+                                 typename _PreserveLayoutAnalysis::encounter_scalar,
+                                 __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>,
+                                 __partially_static_sizes<_IndexT, size_t, _Exts...>,
+                                 __partially_static_sizes<_IndexT, size_t, _Strides...>>
   {
     return {__partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>(
               __construct_psa_from_all_exts_values_tag, __offsets.template __get_n<_OffsetIdxs>()..., __slice.slice),
@@ -307,12 +306,11 @@ struct __assign_op_slice_handler<
   __MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator)
     constexpr auto
     operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, integral_constant<_IntegerType, _Value0>>&&) noexcept
-    -> __assign_op_slice_handler<
-      _IndexT,
-      typename _PreserveLayoutAnalysis::encounter_scalar,
-      __partially_static_sizes<_IndexT, size_t, _Offsets..., _Value0>,
-      __partially_static_sizes<_IndexT, size_t, _Exts...>,
-      __partially_static_sizes<_IndexT, size_t, _Strides...> /* intentional space here to work around ICC bug*/>
+    -> __assign_op_slice_handler<_IndexT,
+                                 typename _PreserveLayoutAnalysis::encounter_scalar,
+                                 __partially_static_sizes<_IndexT, size_t, _Offsets..., _Value0>,
+                                 __partially_static_sizes<_IndexT, size_t, _Exts...>,
+                                 __partially_static_sizes<_IndexT, size_t, _Strides...>>
   {
 #  if __MDSPAN_HAS_CXX_17
     if constexpr (_CUDA_VSTD::is_signed_v<_IntegerType>)
@@ -331,15 +329,11 @@ struct __assign_op_slice_handler<
   __MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator)
     constexpr auto
     operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, full_extent_t>&& __slice) noexcept
-    -> __assign_op_slice_handler<
-      _IndexT,
-      typename _PreserveLayoutAnalysis::encounter_all,
-      __partially_static_sizes<_IndexT, size_t, _Offsets..., 0>,
-      __partially_static_sizes<_IndexT, size_t, _Exts..., _OldStaticExtent>,
-      __partially_static_sizes<_IndexT,
-                               size_t,
-                               _Strides...,
-                               _OldStaticStride> /* intentional space here to work around ICC bug*/>
+    -> __assign_op_slice_handler<_IndexT,
+                                 typename _PreserveLayoutAnalysis::encounter_all,
+                                 __partially_static_sizes<_IndexT, size_t, _Offsets..., 0>,
+                                 __partially_static_sizes<_IndexT, size_t, _Exts..., _OldStaticExtent>,
+                                 __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>>
   {
     return {
       __partially_static_sizes<_IndexT, size_t, _Offsets..., 0>(
@@ -355,15 +349,11 @@ struct __assign_op_slice_handler<
   __MDSPAN_FORCE_INLINE_FUNCTION // NOLINT (misc-unconventional-assign-operator)
     constexpr auto
     operator=(__slice_wrap<_OldStaticExtent, _OldStaticStride, tuple<size_t, size_t>>&& __slice) noexcept
-    -> __assign_op_slice_handler<
-      _IndexT,
-      typename _PreserveLayoutAnalysis::encounter_pair,
-      __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>,
-      __partially_static_sizes<_IndexT, size_t, _Exts..., dynamic_extent>,
-      __partially_static_sizes<_IndexT,
-                               size_t,
-                               _Strides...,
-                               _OldStaticStride> /* intentional space here to work around ICC bug*/>
+    -> __assign_op_slice_handler<_IndexT,
+                                 typename _PreserveLayoutAnalysis::encounter_pair,
+                                 __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>,
+                                 __partially_static_sizes<_IndexT, size_t, _Exts..., dynamic_extent>,
+                                 __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>>
   {
     return {
       __partially_static_sizes<_IndexT, size_t, _Offsets..., dynamic_extent>(
@@ -393,15 +383,11 @@ struct __assign_op_slice_handler<
                            _OldStaticStride,
                            tuple<integral_constant<_IntegerType0, _Value0>, integral_constant<_IntegerType1, _Value1>>>&&
                 __slice) noexcept
-    -> __assign_op_slice_handler<
-      _IndexT,
-      typename _PreserveLayoutAnalysis::encounter_pair,
-      __partially_static_sizes<_IndexT, size_t, _Offsets..., size_t(_Value0)>,
-      __partially_static_sizes<_IndexT, size_t, _Exts..., size_t(_Value1 - _Value0)>,
-      __partially_static_sizes<_IndexT,
-                               size_t,
-                               _Strides...,
-                               _OldStaticStride> /* intentional space here to work around ICC bug*/>
+    -> __assign_op_slice_handler<_IndexT,
+                                 typename _PreserveLayoutAnalysis::encounter_pair,
+                                 __partially_static_sizes<_IndexT, size_t, _Offsets..., size_t(_Value0)>,
+                                 __partially_static_sizes<_IndexT, size_t, _Exts..., size_t(_Value1 - _Value0)>,
+                                 __partially_static_sizes<_IndexT, size_t, _Strides..., _OldStaticStride>>
   {
     static_assert(_Value1 >= _Value0, "Invalid slice specifier");
     return {
diff --git a/libcudacxx/include/cuda/std/__memory/allocator_traits.h b/libcudacxx/include/cuda/std/__memory/allocator_traits.h
index 726b857be48..035731687a3 100644
--- a/libcudacxx/include/cuda/std/__memory/allocator_traits.h
+++ b/libcudacxx/include/cuda/std/__memory/allocator_traits.h
@@ -28,6 +28,8 @@
 #include <cuda/std/__type_traits/is_copy_constructible.h>
 #include <cuda/std/__type_traits/is_empty.h>
 #include <cuda/std/__type_traits/is_move_constructible.h>
+#include <cuda/std/__type_traits/is_same.h>
+#include <cuda/std/__type_traits/is_trivially_move_constructible.h>
 #include <cuda/std/__type_traits/make_unsigned.h>
 #include <cuda/std/__type_traits/remove_reference.h>
 #include <cuda/std/__type_traits/void_t.h>
@@ -289,8 +291,8 @@ _LIBCUDACXX_HIDE_FROM_ABI typename pointer_traits<_Pointer>::element_type* __to_
 }
 #else // ^^^ C++17 ^^^ / vvv C++20 vvv
 template <class _Pointer>
-_LIBCUDACXX_HIDE_FROM_ABI auto
-__to_raw_pointer(const _Pointer& __p) noexcept -> decltype(pointer_traits<_Pointer>::to_address(__p))
+_LIBCUDACXX_HIDE_FROM_ABI auto __to_raw_pointer(const _Pointer& __p) noexcept
+  -> decltype(pointer_traits<_Pointer>::to_address(__p))
 {
   return pointer_traits<_Pointer>::to_address(__p);
 }
diff --git a/libcudacxx/include/cuda/std/__memory/pointer_traits.h b/libcudacxx/include/cuda/std/__memory/pointer_traits.h
index 66e738b46df..d102dde7a74 100644
--- a/libcudacxx/include/cuda/std/__memory/pointer_traits.h
+++ b/libcudacxx/include/cuda/std/__memory/pointer_traits.h
@@ -238,8 +238,8 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr auto to_address(_Tp* __p) noexcept
 }
 
 template <class _Pointer>
-_LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-to_address(const _Pointer& __p) noexcept -> decltype(_CUDA_VSTD::__to_address(__p))
+_LIBCUDACXX_HIDE_FROM_ABI constexpr auto to_address(const _Pointer& __p) noexcept
+  -> decltype(_CUDA_VSTD::__to_address(__p))
 {
   return _CUDA_VSTD::__to_address(__p);
 }
diff --git a/libcudacxx/include/cuda/std/__memory/temporary_buffer.h b/libcudacxx/include/cuda/std/__memory/temporary_buffer.h
index 2aa33cad869..232003ab1ed 100644
--- a/libcudacxx/include/cuda/std/__memory/temporary_buffer.h
+++ b/libcudacxx/include/cuda/std/__memory/temporary_buffer.h
@@ -26,10 +26,11 @@
 #include <cuda/std/__iterator/iterator_traits.h>
 #include <cuda/std/__memory/addressof.h>
 #include <cuda/std/__new_>
+#include <cuda/std/__type_traits/alignment_of.h>
 #include <cuda/std/__utility/move.h>
 #include <cuda/std/__utility/pair.h>
 #include <cuda/std/cstddef>
-#include <cuda/std/detail/libcxx/include/limits>
+#include <cuda/std/limits>
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
diff --git a/libcudacxx/include/cuda/std/__ranges/access.h b/libcudacxx/include/cuda/std/__ranges/access.h
index 3c5ef7da52b..7a5d2aade97 100644
--- a/libcudacxx/include/cuda/std/__ranges/access.h
+++ b/libcudacxx/include/cuda/std/__ranges/access.h
@@ -33,7 +33,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
-#if _CCCL_STD_VER > 2014 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014
 
 template <class _Tp>
 _CCCL_CONCEPT __can_borrow = is_lvalue_reference_v<_Tp> || enable_borrowed_range<remove_cvref_t<_Tp>>;
@@ -120,6 +120,14 @@ struct __fn
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES((!__member_begin<_Tp>) _CCCL_AND(!__unqualified_begin<_Tp>))
   void operator()(_Tp&&) const = delete;
+
+#  if _CCCL_COMPILER(MSVC, <, 19, 23)
+  template <class _Tp>
+  void operator()(_Tp (&&)[]) const = delete;
+
+  template <class _Tp, size_t _Np>
+  void operator()(_Tp (&&)[_Np]) const = delete;
+#  endif // _CCCL_COMPILER(MSVC, <, 19, 23)
 };
 _LIBCUDACXX_END_NAMESPACE_CPO
 
@@ -209,6 +217,14 @@ struct __fn
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES((!__member_end<_Tp>) _CCCL_AND(!__unqualified_end<_Tp>))
   void operator()(_Tp&&) const = delete;
+
+#  if _CCCL_COMPILER(MSVC, <, 19, 23)
+  template <class _Tp>
+  void operator()(_Tp (&&)[]) const = delete;
+
+  template <class _Tp, size_t _Np>
+  void operator()(_Tp (&&)[_Np]) const = delete;
+#  endif // _CCCL_COMPILER(MSVC, <, 19, 23)
 };
 _LIBCUDACXX_END_NAMESPACE_CPO
 
@@ -267,8 +283,9 @@ struct __fn
   _CCCL_EXEC_CHECK_DISABLE
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES(is_rvalue_reference_v<_Tp&&>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const noexcept(noexcept(
-    _CUDA_VRANGES::end(static_cast<const _Tp&&>(__t)))) -> decltype(_CUDA_VRANGES::end(static_cast<const _Tp&&>(__t)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const
+    noexcept(noexcept(_CUDA_VRANGES::end(static_cast<const _Tp&&>(__t))))
+      -> decltype(_CUDA_VRANGES::end(static_cast<const _Tp&&>(__t)))
   {
     return _CUDA_VRANGES::end(static_cast<const _Tp&&>(__t));
   }
@@ -279,7 +296,7 @@ inline namespace __cpo
 {
 _CCCL_GLOBAL_CONSTANT auto cend = __cend::__fn{};
 } // namespace __cpo
-#endif // _CCCL_STD_VER > 2014 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/concepts.h b/libcudacxx/include/cuda/std/__ranges/concepts.h
index 4183f423ea6..dc4257e0818 100644
--- a/libcudacxx/include/cuda/std/__ranges/concepts.h
+++ b/libcudacxx/include/cuda/std/__ranges/concepts.h
@@ -44,7 +44,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014
 
 #  if !defined(_CCCL_NO_CONCEPTS)
 
@@ -142,19 +142,23 @@ concept viewable_range =
 #  else // ^^^ !_CCCL_NO_CONCEPTS ^^^ / vvv _CCCL_NO_CONCEPTS vvv
 // [range.range]
 
+// clang-format off
 template <class _Tp>
-_CCCL_CONCEPT_FRAGMENT(
-  __range_,
-  requires(_Tp& __t)(typename(decltype(_CUDA_VRANGES::begin(__t))), typename(decltype(_CUDA_VRANGES::end(__t)))));
-
-template <class _Tp>
-_CCCL_CONCEPT range = _CCCL_FRAGMENT(__range_, _Tp);
+_CCCL_CONCEPT range =
+  _CCCL_REQUIRES_EXPR((_Tp), _Tp& __t)
+  (
+    void(_CUDA_VRANGES::begin(__t)),
+    void(_CUDA_VRANGES::end(__t))
+  );
 
 template <class _Tp>
-_CCCL_CONCEPT_FRAGMENT(__input_range_, requires()(requires(range<_Tp>), requires(input_iterator<iterator_t<_Tp>>)));
-
-template <class _Tp>
-_CCCL_CONCEPT input_range = _CCCL_FRAGMENT(__input_range_, _Tp);
+_CCCL_CONCEPT input_range =
+  _CCCL_REQUIRES_EXPR((_Tp))
+  (
+    requires(range<_Tp>),
+    requires(input_iterator<iterator_t<_Tp>>)
+  );
+// clang-format on
 
 template <class _Range>
 _CCCL_CONCEPT_FRAGMENT(
@@ -302,7 +306,7 @@ template <class _Range, class _Tp>
 _CCCL_CONCEPT __container_compatible_range = _CCCL_FRAGMENT(__container_compatible_range_, _Range, _Tp);
 #  endif // _CCCL_NO_CONCEPTS
 
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/dangling.h b/libcudacxx/include/cuda/std/__ranges/dangling.h
index b97e5e5555a..7e99382cb3a 100644
--- a/libcudacxx/include/cuda/std/__ranges/dangling.h
+++ b/libcudacxx/include/cuda/std/__ranges/dangling.h
@@ -27,7 +27,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014
 
 struct dangling
 {
@@ -47,7 +47,7 @@ using borrowed_iterator_t = enable_if_t<range<_Rp>, _If<borrowed_range<_Rp>, ite
 
 // borrowed_subrange_t defined in <__ranges/subrange.h>
 
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/data.h b/libcudacxx/include/cuda/std/__ranges/data.h
index 0f756d52a9f..7f05385b120 100644
--- a/libcudacxx/include/cuda/std/__ranges/data.h
+++ b/libcudacxx/include/cuda/std/__ranges/data.h
@@ -34,7 +34,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014
 
 // [range.prim.data]
 
@@ -115,8 +115,9 @@ struct __fn
 
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES(is_rvalue_reference_v<_Tp&&>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const noexcept(noexcept(
-    _CUDA_VRANGES::data(static_cast<const _Tp&&>(__t)))) -> decltype(_CUDA_VRANGES::data(static_cast<const _Tp&&>(__t)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const
+    noexcept(noexcept(_CUDA_VRANGES::data(static_cast<const _Tp&&>(__t))))
+      -> decltype(_CUDA_VRANGES::data(static_cast<const _Tp&&>(__t)))
   {
     return _CUDA_VRANGES::data(static_cast<const _Tp&&>(__t));
   }
@@ -128,7 +129,7 @@ inline namespace __cpo
 _CCCL_GLOBAL_CONSTANT auto cdata = __cdata::__fn{};
 } // namespace __cpo
 
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/empty_view.h b/libcudacxx/include/cuda/std/__ranges/empty_view.h
new file mode 100644
index 00000000000..a1a90e7f197
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__ranges/empty_view.h
@@ -0,0 +1,80 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCUDACXX___RANGES_EMPTY_VIEW_H
+#define _LIBCUDACXX___RANGES_EMPTY_VIEW_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__ranges/enable_borrowed_range.h>
+#include <cuda/std/__ranges/view_interface.h>
+#include <cuda/std/__type_traits/is_object.h>
+
+#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+
+_LIBCUDACXX_BEGIN_NAMESPACE_RANGES
+_LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI
+
+_CCCL_TEMPLATE(class _Tp)
+_CCCL_REQUIRES(_CCCL_TRAIT(is_object, _Tp))
+class empty_view : public view_interface<empty_view<_Tp>>
+{
+public:
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp* begin() noexcept
+  {
+    return nullptr;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp* end() noexcept
+  {
+    return nullptr;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp* data() noexcept
+  {
+    return nullptr;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr size_t size() noexcept
+  {
+    return 0;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr bool empty() noexcept
+  {
+    return true;
+  }
+};
+
+_LIBCUDACXX_END_NAMESPACE_RANGES_ABI
+
+template <class _Tp>
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<empty_view<_Tp>> = true;
+
+_LIBCUDACXX_END_NAMESPACE_RANGES
+
+_LIBCUDACXX_BEGIN_NAMESPACE_VIEWS
+
+#  if _CCCL_COMPILER(MSVC)
+template <class _Tp>
+_CCCL_INLINE_VAR constexpr empty_view<_Tp> empty{};
+#  else // ^^^ _CCCL_COMPILER_MSVC ^^^ / vvv !_CCCL_COMPILER_MSVC vvv
+template <class _Tp>
+_CCCL_GLOBAL_CONSTANT empty_view<_Tp> empty{};
+#  endif // !_CCCL_COMPILER_MSVC
+
+_LIBCUDACXX_END_NAMESPACE_VIEWS
+
+#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER_MSVC_2017
+
+#endif // _LIBCUDACXX___RANGES_EMPTY_VIEW_H
diff --git a/libcudacxx/include/cuda/std/__ranges/enable_borrowed_range.h b/libcudacxx/include/cuda/std/__ranges/enable_borrowed_range.h
index f0c9a58e679..79d6c23b8da 100644
--- a/libcudacxx/include/cuda/std/__ranges/enable_borrowed_range.h
+++ b/libcudacxx/include/cuda/std/__ranges/enable_borrowed_range.h
@@ -25,7 +25,7 @@
 #  pragma system_header
 #endif // no system header
 
-#if _CCCL_STD_VER > 2014
+#if _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/enable_view.h b/libcudacxx/include/cuda/std/__ranges/enable_view.h
index 72e390c0499..315319387d2 100644
--- a/libcudacxx/include/cuda/std/__ranges/enable_view.h
+++ b/libcudacxx/include/cuda/std/__ranges/enable_view.h
@@ -30,7 +30,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
-#if _CCCL_STD_VER >= 2017
+#if _CCCL_STD_VER >= 2014
 
 struct view_base
 {};
@@ -74,7 +74,7 @@ _CCCL_INLINE_VAR constexpr bool
     true;
 #  endif // _CCCL_NO_CONCEPTS
 
-#endif // _CCCL_STD_VER >= 2017
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/rend.h b/libcudacxx/include/cuda/std/__ranges/rend.h
index 3f21c323eba..42a3c37054f 100644
--- a/libcudacxx/include/cuda/std/__ranges/rend.h
+++ b/libcudacxx/include/cuda/std/__ranges/rend.h
@@ -161,8 +161,9 @@ struct __fn
   _CCCL_EXEC_CHECK_DISABLE
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES(is_rvalue_reference_v<_Tp&&>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const noexcept(noexcept(
-    _CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t)))) -> decltype(_CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const
+    noexcept(noexcept(_CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t))))
+      -> decltype(_CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t)))
   {
     return _CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t));
   }
diff --git a/libcudacxx/include/cuda/std/__ranges/size.h b/libcudacxx/include/cuda/std/__ranges/size.h
index 0b432ae6e87..92d41a62052 100644
--- a/libcudacxx/include/cuda/std/__ranges/size.h
+++ b/libcudacxx/include/cuda/std/__ranges/size.h
@@ -36,7 +36,7 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014
 
 template <class>
 _CCCL_INLINE_VAR constexpr bool disable_sized_range = false;
@@ -182,15 +182,8 @@ struct __fn
     noexcept(noexcept(_CUDA_VRANGES::size(__t)))
   {
     using _Signed = make_signed_t<decltype(_CUDA_VRANGES::size(__t))>;
-    if constexpr (sizeof(ptrdiff_t) > sizeof(_Signed))
-    {
-      return static_cast<ptrdiff_t>(_CUDA_VRANGES::size(__t));
-    }
-    else
-    {
-      return static_cast<_Signed>(_CUDA_VRANGES::size(__t));
-    }
-    _CCCL_UNREACHABLE();
+    using _Result = conditional_t<(sizeof(ptrdiff_t) > sizeof(_Signed)), ptrdiff_t, _Signed>;
+    return static_cast<_Result>(_CUDA_VRANGES::size(__t));
   }
 };
 _LIBCUDACXX_END_NAMESPACE_CPO
@@ -200,7 +193,7 @@ inline namespace __cpo
 _CCCL_GLOBAL_CONSTANT auto ssize = __ssize::__fn{};
 } // namespace __cpo
 
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_END_NAMESPACE_RANGES
 
diff --git a/libcudacxx/include/cuda/std/__ranges/subrange.h b/libcudacxx/include/cuda/std/__ranges/subrange.h
index 484ce8c1f46..28da5c75774 100644
--- a/libcudacxx/include/cuda/std/__ranges/subrange.h
+++ b/libcudacxx/include/cuda/std/__ranges/subrange.h
@@ -244,7 +244,7 @@ class _CCCL_TYPE_VISIBILITY_DEFAULT subrange : public view_interface<subrange<_I
 
 public:
 #  if !defined(_CCCL_NO_CONCEPTS)
-  subrange()
+  _CCCL_HIDE_FROM_ABI subrange()
     requires default_initializable<_Iter>
   = default;
 #  else // ^^^ !_CCCL_NO_CONCEPTS ^^^ / vvv _CCCL_NO_CONCEPTS vvv
@@ -294,14 +294,15 @@ class _CCCL_TYPE_VISIBILITY_DEFAULT subrange : public view_interface<subrange<_I
       : subrange(_CUDA_VRANGES::begin(__range), _CUDA_VRANGES::end(__range), __n)
   {}
 
-#  if (!_CCCL_COMPILER(GCC) || _CCCL_COMPILER(GCC, >=, 9))
+  // This often ICEs all of clang and old gcc when it encounteres a rvalue subrange in a pipe
+#  if !defined(_CCCL_NO_CONCEPTS)
   _CCCL_TEMPLATE(class _Pair)
   _CCCL_REQUIRES(__pair_like<_Pair> _CCCL_AND __subrange_to_pair<_Iter, _Sent, _Kind, _Pair>)
   _LIBCUDACXX_HIDE_FROM_ABI constexpr operator _Pair() const
   {
     return _Pair(__begin_, __end_);
   }
-#  endif // (!_CCCL_COMPILER(GCC) || _CCCL_COMPILER(GCC, >=, 9))
+#  endif // !_CCCL_NO_CONCEPTS
 
   _CCCL_TEMPLATE(class _It = _Iter)
   _CCCL_REQUIRES(copyable<_It>)
@@ -399,8 +400,8 @@ _CCCL_HOST_DEVICE subrange(_Iter, _Sent) -> subrange<_Iter, _Sent>;
 
 _CCCL_TEMPLATE(class _Iter, class _Sent)
 _CCCL_REQUIRES(input_or_output_iterator<_Iter> _CCCL_AND sentinel_for<_Sent, _Iter>)
-_CCCL_HOST_DEVICE
-subrange(_Iter, _Sent, make_unsigned_t<iter_difference_t<_Iter>>) -> subrange<_Iter, _Sent, subrange_kind::sized>;
+_CCCL_HOST_DEVICE subrange(_Iter, _Sent, make_unsigned_t<iter_difference_t<_Iter>>)
+  -> subrange<_Iter, _Sent, subrange_kind::sized>;
 
 _CCCL_TEMPLATE(class _Range)
 _CCCL_REQUIRES(borrowed_range<_Range>)
diff --git a/libcudacxx/include/cuda/std/__ranges/views.h b/libcudacxx/include/cuda/std/__ranges/views.h
index 3954877f117..7dae143e8b2 100644
--- a/libcudacxx/include/cuda/std/__ranges/views.h
+++ b/libcudacxx/include/cuda/std/__ranges/views.h
@@ -21,7 +21,7 @@
 #  pragma system_header
 #endif // no system header
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014
 
 _LIBCUDACXX_BEGIN_NAMESPACE_VIEWS
 
@@ -29,10 +29,10 @@ _LIBCUDACXX_END_NAMESPACE_VIEWS
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
-namespace views = ranges::views;
+namespace views = ranges::views; // NOLINT: misc-unused-alias-decls
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014
 
 #endif // _LIBCUDACXX___RANGES_VIEWS
diff --git a/libcudacxx/include/cuda/std/__thread/threading_support.h b/libcudacxx/include/cuda/std/__thread/threading_support.h
index d2ebacf576f..31968a27365 100644
--- a/libcudacxx/include/cuda/std/__thread/threading_support.h
+++ b/libcudacxx/include/cuda/std/__thread/threading_support.h
@@ -45,9 +45,9 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 #  define _LIBCUDACXX_POLLING_COUNT 16
 
 #  if defined(__aarch64__)
-#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :::);)
+#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :: :);)
 #  elif defined(__x86_64__)
-#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :::);)
+#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :: :);)
 #  else // ^^^ __x86_64__ ^^^ / vvv !__x86_64__ vvv
 #    define __LIBCUDACXX_ASM_THREAD_YIELD (;)
 #  endif // !__x86_64__
diff --git a/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h b/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
index c46cf508dca..cd3c5f12e07 100644
--- a/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
+++ b/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
@@ -35,8 +35,7 @@ _LIBCUDACXX_HIDE_FROM_ABI void __cccl_thread_sleep_for(_CUDA_VSTD::chrono::nanos
 {
   NV_IF_TARGET(NV_IS_DEVICE,
                (auto const __step = __ns.count(); assert(__step < numeric_limits<unsigned>::max());
-                asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step)
-                             :);))
+                asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step) :);))
 }
 
 _LIBCUDACXX_END_NAMESPACE_STD
diff --git a/libcudacxx/include/cuda/std/__type_traits/common_reference.h b/libcudacxx/include/cuda/std/__type_traits/common_reference.h
index 6f62a1033ef..7db241807eb 100644
--- a/libcudacxx/include/cuda/std/__type_traits/common_reference.h
+++ b/libcudacxx/include/cuda/std/__type_traits/common_reference.h
@@ -37,6 +37,8 @@
 #include <cuda/std/__type_traits/void_t.h>
 #include <cuda/std/__utility/declval.h>
 
+_CCCL_NV_DIAG_SUPPRESS(1384) // warning: pointer converted to bool
+
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 // common_reference
@@ -253,4 +255,6 @@ struct common_reference
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
+_CCCL_NV_DIAG_DEFAULT(1384)
+
 #endif // _LIBCUDACXX___TYPE_TRAITS_COMMON_REFERENCE_H
diff --git a/libcudacxx/include/cuda/std/__type_traits/type_list.h b/libcudacxx/include/cuda/std/__type_traits/type_list.h
index 66652922ceb..f2dc0fffe43 100644
--- a/libcudacxx/include/cuda/std/__type_traits/type_list.h
+++ b/libcudacxx/include/cuda/std/__type_traits/type_list.h
@@ -1020,8 +1020,8 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __type_value_list : __type_list<integral_co
 namespace __detail
 {
 template <class _Ty, _Ty _Start, _Ty _Stride, _Ty... _Is>
-_LIBCUDACXX_HIDE_FROM_ABI auto
-__type_iota_fn(integer_sequence<_Ty, _Is...>*) -> __type_value_list<_Ty, _Ty(_Start + (_Is * _Stride))...>;
+_LIBCUDACXX_HIDE_FROM_ABI auto __type_iota_fn(integer_sequence<_Ty, _Is...>*)
+  -> __type_value_list<_Ty, _Ty(_Start + (_Is * _Stride))...>;
 } // namespace __detail
 
 //! \brief Return an \c __type_value_list of size \c _Size starting at \c _Start
diff --git a/libcudacxx/include/cuda/std/climits b/libcudacxx/include/cuda/std/climits
index 2860fc56468..a605f2dc597 100644
--- a/libcudacxx/include/cuda/std/climits
+++ b/libcudacxx/include/cuda/std/climits
@@ -23,7 +23,70 @@
 
 _CCCL_PUSH_MACROS
 
-#include <cuda/std/detail/libcxx/include/climits>
+#if !_CCCL_COMPILER(NVRTC)
+#  include <climits>
+#else // ^^^ !_CCCL_COMPILER(NVRTC) ^^^ / vvv _CCCL_COMPILER(NVRTC) vvv
+#  define CHAR_BIT 8
+
+#  define SCHAR_MIN         (-128)
+#  define SCHAR_MAX         127
+#  define UCHAR_MAX         255
+#  define __CHAR_UNSIGNED__ ('\xff' > 0) // CURSED
+#  if __CHAR_UNSIGNED__
+#    define CHAR_MIN 0
+#    define CHAR_MAX UCHAR_MAX
+#  else
+#    define CHAR_MIN SCHAR_MIN
+#    define CHAR_MAX SCHAR_MAX
+#  endif
+#  define SHRT_MIN  (-SHRT_MAX - 1)
+#  define SHRT_MAX  0x7fff
+#  define USHRT_MAX 0xffff
+#  define INT_MIN   (-INT_MAX - 1)
+#  define INT_MAX   0x7fffffff
+#  define UINT_MAX  0xffffffff
+#  define LONG_MIN  (-LONG_MAX - 1)
+#  ifdef __LP64__
+#    define LONG_MAX  LLONG_MAX
+#    define ULONG_MAX ULLONG_MAX
+#  else
+#    define LONG_MAX  INT_MAX
+#    define ULONG_MAX UINT_MAX
+#  endif
+#  define LLONG_MIN  (-LLONG_MAX - 1)
+#  define LLONG_MAX  0x7fffffffffffffffLL
+#  define ULLONG_MAX 0xffffffffffffffffUL
+
+#  define __FLT_RADIX__      2
+#  define __FLT_MANT_DIG__   24
+#  define __FLT_DIG__        6
+#  define __FLT_MIN__        1.17549435082228750796873653722224568e-38F
+#  define __FLT_MAX__        3.40282346638528859811704183484516925e+38F
+#  define __FLT_EPSILON__    1.19209289550781250000000000000000000e-7F
+#  define __FLT_MIN_EXP__    (-125)
+#  define __FLT_MIN_10_EXP__ (-37)
+#  define __FLT_MAX_EXP__    128
+#  define __FLT_MAX_10_EXP__ 38
+#  define __FLT_DENORM_MIN__ 1.40129846432481707092372958328991613e-45F
+#  define __DBL_MANT_DIG__   53
+#  define __DBL_DIG__        15
+#  define __DBL_MIN__        2.22507385850720138309023271733240406e-308
+#  define __DBL_MAX__        1.79769313486231570814527423731704357e+308
+#  define __DBL_EPSILON__    2.22044604925031308084726333618164062e-16
+#  define __DBL_MIN_EXP__    (-1021)
+#  define __DBL_MIN_10_EXP__ (-307)
+#  define __DBL_MAX_EXP__    1024
+#  define __DBL_MAX_10_EXP__ 308
+#  define __DBL_DENORM_MIN__ 4.94065645841246544176568792868221372e-324
+#endif // _CCCL_COMPILER(NVRTC)
+
+#if _CCCL_COMPILER(MSVC)
+#  include <cuda/std/__limits/msvc_win32.h>
+#endif // _CCCL_COMPILER(MSVC)
+
+#ifndef __CHAR_BIT__
+#  define __CHAR_BIT__ 8
+#endif // !__CHAR_BIT__
 
 _CCCL_POP_MACROS
 
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config
index 91569efb6ae..1706bd2937c 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/__config
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__config
@@ -258,9 +258,7 @@ typedef unsigned int char32_t;
 // TODO: Support C11 Atomics?
 // #if _CCCL_HAS_FEATURE(cxx_atomic) || __has_extension(c_atomic) || _CCCL_HAS_KEYWORD(_Atomic)
 // #  define _LIBCUDACXX_HAS_C_ATOMIC_IMP
-#  if _CCCL_COMPILER(ICC)
-#    define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP
-#  elif _CCCL_COMPILER(CLANG)
+#  if _CCCL_COMPILER(CLANG)
 #    define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP
 #  elif _CCCL_COMPILER(GCC)
 #    define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/climits b/libcudacxx/include/cuda/std/detail/libcxx/include/climits
deleted file mode 100644
index 985ce4b718c..00000000000
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/climits
+++ /dev/null
@@ -1,68 +0,0 @@
-// -*- C++ -*-
-//===--------------------------- climits ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCUDACXX_CLIMITS
-#define _LIBCUDACXX_CLIMITS
-
-/*
-    climits synopsis
-
-Macros:
-
-    CHAR_BIT
-    SCHAR_MIN
-    SCHAR_MAX
-    UCHAR_MAX
-    CHAR_MIN
-    CHAR_MAX
-    MB_LEN_MAX
-    SHRT_MIN
-    SHRT_MAX
-    USHRT_MAX
-    INT_MIN
-    INT_MAX
-    UINT_MAX
-    LONG_MIN
-    LONG_MAX
-    ULONG_MAX
-    LLONG_MIN   // C99
-    LLONG_MAX   // C99
-    ULLONG_MAX  // C99
-
-*/
-
-#include <cuda/std/detail/__config>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#include <cuda/std/__cuda/climits_prelude.h>
-
-_CCCL_PUSH_MACROS
-
-#if _CCCL_COMPILER(MSVC)
-#  include <cuda/std/detail/libcxx/include/support/win32/limits_msvc_win32.h>
-#endif // _CCCL_COMPILER(MSVC)
-
-// ICC defines __CHAR_BIT__ by default
-// accept that, but assert it is what we expect
-#ifdef __CHAR_BIT__
-static_assert(__CHAR_BIT__ == 8, "");
-#else
-#  define __CHAR_BIT__ 8
-#endif
-
-_CCCL_POP_MACROS
-
-#endif // _LIBCUDACXX_CLIMITS
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/cmath b/libcudacxx/include/cuda/std/detail/libcxx/include/cmath
index a258dc31710..6d5618b7d78 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/cmath
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/cmath
@@ -338,9 +338,8 @@ long double    truncl(long double x);
 #endif // _LIBCUDACXX_HAS_NVBF16
 
 #if _CCCL_COMPILER(NVRTC)
-#  include <cuda/std/climits>
-#  define INFINITY __builtin_huge_val()
-#  define NAN      __builtin_nan()
+#  define INFINITY _CUDA_VSTD::numeric_limits<float>::infinity()
+#  define NAN      _CUDA_VSTD::numeric_limits<float>::quiet_NaN()
 #endif // _CCCL_COMPILER(NVRTC)
 
 _CCCL_PUSH_MACROS
@@ -654,11 +653,7 @@ _LIBCUDACXX_HIDE_FROM_ABI _A1 __constexpr_fmax(_A1 __x, _A1 __y) noexcept
 _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14_COMPLEX float __constexpr_fmax(float __x, float __y) noexcept
 {
 #  if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) && !defined(_LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS)
-#    if _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
-  if (false)
-#    else // _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
   if (_CCCL_BUILTIN_IS_CONSTANT_EVALUATED())
-#    endif // _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
   {
     if (_CUDA_VSTD::isnan(__x))
     {
@@ -677,11 +672,7 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14_COMPLEX float __constexpr_fmax(f
 _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14_COMPLEX double __constexpr_fmax(double __x, double __y) noexcept
 {
 #  if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) && !defined(_LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS)
-#    if _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
-  if (false)
-#    else // _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
   if (_CCCL_BUILTIN_IS_CONSTANT_EVALUATED())
-#    endif // _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
   {
     if (_CUDA_VSTD::isnan(__x))
     {
@@ -702,11 +693,7 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14_COMPLEX long double
 __constexpr_fmax(long double __x, long double __y) noexcept
 {
 #    if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) && !defined(_LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS)
-#      if _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
-  if (false)
-#      else // _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
   if (_CCCL_BUILTIN_IS_CONSTANT_EVALUATED())
-#      endif // _CCCL_COMPILER(ICC) && _NV_ISEMPTY(_CCCL_CONSTEXPR_CXX14_COMPLEX)
   {
     if (_CUDA_VSTD::isnan(__x))
     {
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/limits b/libcudacxx/include/cuda/std/detail/libcxx/include/limits
deleted file mode 100644
index 95980e41dc9..00000000000
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/limits
+++ /dev/null
@@ -1,1120 +0,0 @@
-// -*- C++ -*-
-//===---------------------------- limits ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCUDACXX_LIMITS
-#define _LIBCUDACXX_LIMITS
-
-/*
-    limits synopsis
-
-namespace std
-{
-
-template<class T>
-class numeric_limits
-{
-public:
-    static constexpr bool is_specialized = false;
-    static constexpr T min() noexcept;
-    static constexpr T max() noexcept;
-    static constexpr T lowest() noexcept;
-
-    static constexpr int  digits = 0;
-    static constexpr int  digits10 = 0;
-    static constexpr int  max_digits10 = 0;
-    static constexpr bool is_signed = false;
-    static constexpr bool is_integer = false;
-    static constexpr bool is_exact = false;
-    static constexpr int  radix = 0;
-    static constexpr T epsilon() noexcept;
-    static constexpr T round_error() noexcept;
-
-    static constexpr int  min_exponent = 0;
-    static constexpr int  min_exponent10 = 0;
-    static constexpr int  max_exponent = 0;
-    static constexpr int  max_exponent10 = 0;
-
-    static constexpr bool has_infinity = false;
-    static constexpr bool has_quiet_NaN = false;
-    static constexpr bool has_signaling_NaN = false;
-    static constexpr float_denorm_style has_denorm = denorm_absent;
-    static constexpr bool has_denorm_loss = false;
-    static constexpr T infinity() noexcept;
-    static constexpr T quiet_NaN() noexcept;
-    static constexpr T signaling_NaN() noexcept;
-    static constexpr T denorm_min() noexcept;
-
-    static constexpr bool is_iec559 = false;
-    static constexpr bool is_bounded = false;
-    static constexpr bool is_modulo = false;
-
-    static constexpr bool traps = false;
-    static constexpr bool tinyness_before = false;
-    static constexpr float_round_style round_style = round_toward_zero;
-};
-
-enum float_round_style
-{
-    round_indeterminate       = -1,
-    round_toward_zero         =  0,
-    round_to_nearest          =  1,
-    round_toward_infinity     =  2,
-    round_toward_neg_infinity =  3
-};
-
-enum float_denorm_style
-{
-    denorm_indeterminate = -1,
-    denorm_absent = 0,
-    denorm_present = 1
-};
-
-template<> class numeric_limits<cv bool>;
-
-template<> class numeric_limits<cv char>;
-template<> class numeric_limits<cv signed char>;
-template<> class numeric_limits<cv unsigned char>;
-template<> class numeric_limits<cv wchar_t>;
-template<> class numeric_limits<cv char8_t>; // C++20
-template<> class numeric_limits<cv char16_t>;
-template<> class numeric_limits<cv char32_t>;
-
-template<> class numeric_limits<cv short>;
-template<> class numeric_limits<cv int>;
-template<> class numeric_limits<cv long>;
-template<> class numeric_limits<cv long long>;
-template<> class numeric_limits<cv unsigned short>;
-template<> class numeric_limits<cv unsigned int>;
-template<> class numeric_limits<cv unsigned long>;
-template<> class numeric_limits<cv unsigned long long>;
-
-template<> class numeric_limits<cv float>;
-template<> class numeric_limits<cv double>;
-template<> class numeric_limits<cv long double>;
-
-}  // std
-
-*/
-
-#include <cuda/std/detail/__config>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#include <cuda/std/climits>
-#include <cuda/std/type_traits>
-#include <cuda/std/version>
-
-_CCCL_PUSH_MACROS
-
-#if _CCCL_COMPILER(MSVC)
-#  include <cuda/std/detail/libcxx/include/support/win32/limits_msvc_win32.h>
-#endif // _CCCL_COMPILER(MSVC)
-
-_LIBCUDACXX_BEGIN_NAMESPACE_STD
-
-enum float_round_style
-{
-  round_indeterminate       = -1,
-  round_toward_zero         = 0,
-  round_to_nearest          = 1,
-  round_toward_infinity     = 2,
-  round_toward_neg_infinity = 3
-};
-
-enum float_denorm_style
-{
-  denorm_indeterminate = -1,
-  denorm_absent        = 0,
-  denorm_present       = 1
-};
-
-template <class _Tp, bool = is_arithmetic<_Tp>::value>
-class __cccl_numeric_limits
-{
-protected:
-  typedef _Tp type;
-
-  static constexpr bool is_specialized = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return type();
-  }
-
-  static constexpr int digits       = 0;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 0;
-  static constexpr bool is_signed   = false;
-  static constexpr bool is_integer  = false;
-  static constexpr bool is_exact    = false;
-  static constexpr int radix        = 0;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return type();
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 0;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type();
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = false;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-
-_CCCL_DIAG_PUSH
-_CCCL_DIAG_SUPPRESS_MSVC(4309)
-template <class _Tp, int __digits, bool _IsSigned>
-struct __cccl_compute_min
-{
-  static constexpr _Tp value = static_cast<_Tp>(_Tp(1) << __digits);
-};
-_CCCL_DIAG_POP
-
-template <class _Tp, int __digits>
-struct __cccl_compute_min<_Tp, __digits, false>
-{
-  static constexpr _Tp value = _Tp(0);
-};
-
-template <class _Tp>
-class __cccl_numeric_limits<_Tp, true>
-{
-protected:
-  typedef _Tp type;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = type(-1) < type(0);
-  static constexpr int digits       = static_cast<int>(sizeof(type) * __CHAR_BIT__ - is_signed);
-  static constexpr int digits10     = digits * 3 / 10;
-  static constexpr int max_digits10 = 0;
-  static constexpr type __min       = __cccl_compute_min<type, digits, is_signed>::value;
-  static constexpr type __max       = is_signed ? type(type(~0) ^ __min) : type(~0);
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __min;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __max;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return min();
-  }
-
-  static constexpr bool is_integer = true;
-  static constexpr bool is_exact   = true;
-  static constexpr int radix       = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 0;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = !_CUDA_VSTD::is_signed<_Tp>::value;
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__pnacl__) || defined(__wasm__)
-  static constexpr bool traps = true;
-#else
-  static constexpr bool traps = false;
-#endif
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-
-template <>
-class __cccl_numeric_limits<bool, true>
-{
-protected:
-  typedef bool type;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = false;
-  static constexpr int digits       = 1;
-  static constexpr int digits10     = 0;
-  static constexpr int max_digits10 = 0;
-  static constexpr type __min       = false;
-  static constexpr type __max       = true;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __min;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __max;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return min();
-  }
-
-  static constexpr bool is_integer = true;
-  static constexpr bool is_exact   = true;
-  static constexpr int radix       = 2;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr int min_exponent   = 0;
-  static constexpr int min_exponent10 = 0;
-  static constexpr int max_exponent   = 0;
-  static constexpr int max_exponent10 = 0;
-
-  static constexpr bool has_infinity             = false;
-  static constexpr bool has_quiet_NaN            = false;
-  static constexpr bool has_signaling_NaN        = false;
-  static constexpr float_denorm_style has_denorm = denorm_absent;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return type(0);
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return type(0);
-  }
-
-  static constexpr bool is_iec559  = false;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_toward_zero;
-};
-
-template <>
-class __cccl_numeric_limits<float, true>
-{
-protected:
-  typedef float type;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = __FLT_MANT_DIG__;
-  static constexpr int digits10     = __FLT_DIG__;
-  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __FLT_MIN__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __FLT_MAX__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return -max();
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __FLT_EPSILON__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return 0.5F;
-  }
-
-  static constexpr int min_exponent   = __FLT_MIN_EXP__;
-  static constexpr int min_exponent10 = __FLT_MIN_10_EXP__;
-  static constexpr int max_exponent   = __FLT_MAX_EXP__;
-  static constexpr int max_exponent10 = __FLT_MAX_10_EXP__;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-#if _CCCL_COMPILER(NVRTC)
-  _LIBCUDACXX_HIDE_FROM_ABI static type infinity() noexcept
-  {
-    return __builtin_huge_valf();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static type quiet_NaN() noexcept
-  {
-    return __builtin_nanf("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static type signaling_NaN() noexcept
-  {
-    return __builtin_nansf("");
-  }
-#else // ^^^ _CCCL_COMPILER(NVRTC) ^^^ // vvv !_CCCL_COMPILER(NVRTC) vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __builtin_huge_valf();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __builtin_nanf("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __builtin_nansf("");
-  }
-#endif // !_CCCL_COMPILER(NVRTC)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __FLT_DENORM_MIN__;
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-template <>
-class __cccl_numeric_limits<double, true>
-{
-protected:
-  typedef double type;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = __DBL_MANT_DIG__;
-  static constexpr int digits10     = __DBL_DIG__;
-  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __DBL_MIN__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __DBL_MAX__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return -max();
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __DBL_EPSILON__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return 0.5;
-  }
-
-  static constexpr int min_exponent   = __DBL_MIN_EXP__;
-  static constexpr int min_exponent10 = __DBL_MIN_10_EXP__;
-  static constexpr int max_exponent   = __DBL_MAX_EXP__;
-  static constexpr int max_exponent10 = __DBL_MAX_10_EXP__;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-#if _CCCL_COMPILER(NVRTC)
-  _LIBCUDACXX_HIDE_FROM_ABI static type infinity() noexcept
-  {
-    return __builtin_huge_val();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static type quiet_NaN() noexcept
-  {
-    return __builtin_nan("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static type signaling_NaN() noexcept
-  {
-    return __builtin_nans("");
-  }
-#else // ^^^ _CCCL_COMPILER(NVRTC) ^^^ // vvv !_CCCL_COMPILER(NVRTC) vvv
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __builtin_huge_val();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __builtin_nan("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __builtin_nans("");
-  }
-#endif // !_CCCL_COMPILER(NVRTC)
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __DBL_DENORM_MIN__;
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-};
-
-template <>
-class __cccl_numeric_limits<long double, true>
-{
-#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
-
-protected:
-  typedef long double type;
-
-  static constexpr bool is_specialized = true;
-
-  static constexpr bool is_signed   = true;
-  static constexpr int digits       = __LDBL_MANT_DIG__;
-  static constexpr int digits10     = __LDBL_DIG__;
-  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __LDBL_MIN__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __LDBL_MAX__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return -max();
-  }
-
-  static constexpr bool is_integer = false;
-  static constexpr bool is_exact   = false;
-  static constexpr int radix       = __FLT_RADIX__;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __LDBL_EPSILON__;
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return 0.5L;
-  }
-
-  static constexpr int min_exponent   = __LDBL_MIN_EXP__;
-  static constexpr int min_exponent10 = __LDBL_MIN_10_EXP__;
-  static constexpr int max_exponent   = __LDBL_MAX_EXP__;
-  static constexpr int max_exponent10 = __LDBL_MAX_10_EXP__;
-
-  static constexpr bool has_infinity             = true;
-  static constexpr bool has_quiet_NaN            = true;
-  static constexpr bool has_signaling_NaN        = true;
-  static constexpr float_denorm_style has_denorm = denorm_present;
-  static constexpr bool has_denorm_loss          = false;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __builtin_huge_vall();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __builtin_nanl("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __builtin_nansl("");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __LDBL_DENORM_MIN__;
-  }
-
-  static constexpr bool is_iec559  = true;
-  static constexpr bool is_bounded = true;
-  static constexpr bool is_modulo  = false;
-
-  static constexpr bool traps                    = false;
-  static constexpr bool tinyness_before          = false;
-  static constexpr float_round_style round_style = round_to_nearest;
-#endif
-};
-
-template <class _Tp>
-class _CCCL_TYPE_VISIBILITY_DEFAULT numeric_limits : private __cccl_numeric_limits<remove_cv_t<_Tp>>
-{
-  typedef __cccl_numeric_limits<remove_cv_t<_Tp>> __base;
-  typedef typename __base::type type;
-
-public:
-  static constexpr bool is_specialized = __base::is_specialized;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __base::min();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __base::max();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __base::lowest();
-  }
-
-  static constexpr int digits       = __base::digits;
-  static constexpr int digits10     = __base::digits10;
-  static constexpr int max_digits10 = __base::max_digits10;
-  static constexpr bool is_signed   = __base::is_signed;
-  static constexpr bool is_integer  = __base::is_integer;
-  static constexpr bool is_exact    = __base::is_exact;
-  static constexpr int radix        = __base::radix;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __base::epsilon();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __base::round_error();
-  }
-
-  static constexpr int min_exponent   = __base::min_exponent;
-  static constexpr int min_exponent10 = __base::min_exponent10;
-  static constexpr int max_exponent   = __base::max_exponent;
-  static constexpr int max_exponent10 = __base::max_exponent10;
-
-  static constexpr bool has_infinity             = __base::has_infinity;
-  static constexpr bool has_quiet_NaN            = __base::has_quiet_NaN;
-  static constexpr bool has_signaling_NaN        = __base::has_signaling_NaN;
-  static constexpr float_denorm_style has_denorm = __base::has_denorm;
-  static constexpr bool has_denorm_loss          = __base::has_denorm_loss;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __base::infinity();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __base::quiet_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __base::signaling_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __base::denorm_min();
-  }
-
-  static constexpr bool is_iec559  = __base::is_iec559;
-  static constexpr bool is_bounded = __base::is_bounded;
-  static constexpr bool is_modulo  = __base::is_modulo;
-
-  static constexpr bool traps                    = __base::traps;
-  static constexpr bool tinyness_before          = __base::tinyness_before;
-  static constexpr float_round_style round_style = __base::round_style;
-};
-
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_specialized;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::digits;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::digits10;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::max_digits10;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_signed;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_integer;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_exact;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::radix;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::min_exponent;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::min_exponent10;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::max_exponent;
-template <class _Tp>
-constexpr int numeric_limits<_Tp>::max_exponent10;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::has_infinity;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::has_quiet_NaN;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::has_signaling_NaN;
-template <class _Tp>
-constexpr float_denorm_style numeric_limits<_Tp>::has_denorm;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::has_denorm_loss;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_iec559;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_bounded;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::is_modulo;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::traps;
-template <class _Tp>
-constexpr bool numeric_limits<_Tp>::tinyness_before;
-template <class _Tp>
-constexpr float_round_style numeric_limits<_Tp>::round_style;
-
-template <class _Tp>
-class _CCCL_TYPE_VISIBILITY_DEFAULT numeric_limits<const _Tp> : private numeric_limits<_Tp>
-{
-  typedef numeric_limits<_Tp> __base;
-  typedef _Tp type;
-
-public:
-  static constexpr bool is_specialized = __base::is_specialized;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __base::min();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __base::max();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __base::lowest();
-  }
-
-  static constexpr int digits       = __base::digits;
-  static constexpr int digits10     = __base::digits10;
-  static constexpr int max_digits10 = __base::max_digits10;
-  static constexpr bool is_signed   = __base::is_signed;
-  static constexpr bool is_integer  = __base::is_integer;
-  static constexpr bool is_exact    = __base::is_exact;
-  static constexpr int radix        = __base::radix;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __base::epsilon();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __base::round_error();
-  }
-
-  static constexpr int min_exponent   = __base::min_exponent;
-  static constexpr int min_exponent10 = __base::min_exponent10;
-  static constexpr int max_exponent   = __base::max_exponent;
-  static constexpr int max_exponent10 = __base::max_exponent10;
-
-  static constexpr bool has_infinity             = __base::has_infinity;
-  static constexpr bool has_quiet_NaN            = __base::has_quiet_NaN;
-  static constexpr bool has_signaling_NaN        = __base::has_signaling_NaN;
-  static constexpr float_denorm_style has_denorm = __base::has_denorm;
-  static constexpr bool has_denorm_loss          = __base::has_denorm_loss;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __base::infinity();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __base::quiet_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __base::signaling_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __base::denorm_min();
-  }
-
-  static constexpr bool is_iec559  = __base::is_iec559;
-  static constexpr bool is_bounded = __base::is_bounded;
-  static constexpr bool is_modulo  = __base::is_modulo;
-
-  static constexpr bool traps                    = __base::traps;
-  static constexpr bool tinyness_before          = __base::tinyness_before;
-  static constexpr float_round_style round_style = __base::round_style;
-};
-
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_specialized;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::digits;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::digits10;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::max_digits10;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_signed;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_integer;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_exact;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::radix;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::min_exponent;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::min_exponent10;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::max_exponent;
-template <class _Tp>
-constexpr int numeric_limits<const _Tp>::max_exponent10;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::has_infinity;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::has_quiet_NaN;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::has_signaling_NaN;
-template <class _Tp>
-constexpr float_denorm_style numeric_limits<const _Tp>::has_denorm;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::has_denorm_loss;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_iec559;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_bounded;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::is_modulo;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::traps;
-template <class _Tp>
-constexpr bool numeric_limits<const _Tp>::tinyness_before;
-template <class _Tp>
-constexpr float_round_style numeric_limits<const _Tp>::round_style;
-
-template <class _Tp>
-class _CCCL_TYPE_VISIBILITY_DEFAULT numeric_limits<volatile _Tp> : private numeric_limits<_Tp>
-{
-  typedef numeric_limits<_Tp> __base;
-  typedef _Tp type;
-
-public:
-  static constexpr bool is_specialized = __base::is_specialized;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __base::min();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __base::max();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __base::lowest();
-  }
-
-  static constexpr int digits       = __base::digits;
-  static constexpr int digits10     = __base::digits10;
-  static constexpr int max_digits10 = __base::max_digits10;
-  static constexpr bool is_signed   = __base::is_signed;
-  static constexpr bool is_integer  = __base::is_integer;
-  static constexpr bool is_exact    = __base::is_exact;
-  static constexpr int radix        = __base::radix;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __base::epsilon();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __base::round_error();
-  }
-
-  static constexpr int min_exponent   = __base::min_exponent;
-  static constexpr int min_exponent10 = __base::min_exponent10;
-  static constexpr int max_exponent   = __base::max_exponent;
-  static constexpr int max_exponent10 = __base::max_exponent10;
-
-  static constexpr bool has_infinity             = __base::has_infinity;
-  static constexpr bool has_quiet_NaN            = __base::has_quiet_NaN;
-  static constexpr bool has_signaling_NaN        = __base::has_signaling_NaN;
-  static constexpr float_denorm_style has_denorm = __base::has_denorm;
-  static constexpr bool has_denorm_loss          = __base::has_denorm_loss;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __base::infinity();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __base::quiet_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __base::signaling_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __base::denorm_min();
-  }
-
-  static constexpr bool is_iec559  = __base::is_iec559;
-  static constexpr bool is_bounded = __base::is_bounded;
-  static constexpr bool is_modulo  = __base::is_modulo;
-
-  static constexpr bool traps                    = __base::traps;
-  static constexpr bool tinyness_before          = __base::tinyness_before;
-  static constexpr float_round_style round_style = __base::round_style;
-};
-
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_specialized;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::digits;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::digits10;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::max_digits10;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_signed;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_integer;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_exact;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::radix;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::min_exponent;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::min_exponent10;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::max_exponent;
-template <class _Tp>
-constexpr int numeric_limits<volatile _Tp>::max_exponent10;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::has_infinity;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::has_quiet_NaN;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::has_signaling_NaN;
-template <class _Tp>
-constexpr float_denorm_style numeric_limits<volatile _Tp>::has_denorm;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::has_denorm_loss;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_iec559;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_bounded;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::is_modulo;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::traps;
-template <class _Tp>
-constexpr bool numeric_limits<volatile _Tp>::tinyness_before;
-template <class _Tp>
-constexpr float_round_style numeric_limits<volatile _Tp>::round_style;
-
-template <class _Tp>
-class _CCCL_TYPE_VISIBILITY_DEFAULT numeric_limits<const volatile _Tp> : private numeric_limits<_Tp>
-{
-  typedef numeric_limits<_Tp> __base;
-  typedef _Tp type;
-
-public:
-  static constexpr bool is_specialized = __base::is_specialized;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
-  {
-    return __base::min();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
-  {
-    return __base::max();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
-  {
-    return __base::lowest();
-  }
-
-  static constexpr int digits       = __base::digits;
-  static constexpr int digits10     = __base::digits10;
-  static constexpr int max_digits10 = __base::max_digits10;
-  static constexpr bool is_signed   = __base::is_signed;
-  static constexpr bool is_integer  = __base::is_integer;
-  static constexpr bool is_exact    = __base::is_exact;
-  static constexpr int radix        = __base::radix;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
-  {
-    return __base::epsilon();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
-  {
-    return __base::round_error();
-  }
-
-  static constexpr int min_exponent   = __base::min_exponent;
-  static constexpr int min_exponent10 = __base::min_exponent10;
-  static constexpr int max_exponent   = __base::max_exponent;
-  static constexpr int max_exponent10 = __base::max_exponent10;
-
-  static constexpr bool has_infinity             = __base::has_infinity;
-  static constexpr bool has_quiet_NaN            = __base::has_quiet_NaN;
-  static constexpr bool has_signaling_NaN        = __base::has_signaling_NaN;
-  static constexpr float_denorm_style has_denorm = __base::has_denorm;
-  static constexpr bool has_denorm_loss          = __base::has_denorm_loss;
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
-  {
-    return __base::infinity();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
-  {
-    return __base::quiet_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
-  {
-    return __base::signaling_NaN();
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
-  {
-    return __base::denorm_min();
-  }
-
-  static constexpr bool is_iec559  = __base::is_iec559;
-  static constexpr bool is_bounded = __base::is_bounded;
-  static constexpr bool is_modulo  = __base::is_modulo;
-
-  static constexpr bool traps                    = __base::traps;
-  static constexpr bool tinyness_before          = __base::tinyness_before;
-  static constexpr float_round_style round_style = __base::round_style;
-};
-
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_specialized;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::digits;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::digits10;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::max_digits10;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_signed;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_integer;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_exact;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::radix;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::min_exponent;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::min_exponent10;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::max_exponent;
-template <class _Tp>
-constexpr int numeric_limits<const volatile _Tp>::max_exponent10;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::has_infinity;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::has_quiet_NaN;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::has_signaling_NaN;
-template <class _Tp>
-constexpr float_denorm_style numeric_limits<const volatile _Tp>::has_denorm;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::has_denorm_loss;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_iec559;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_bounded;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::is_modulo;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::traps;
-template <class _Tp>
-constexpr bool numeric_limits<const volatile _Tp>::tinyness_before;
-template <class _Tp>
-constexpr float_round_style numeric_limits<const volatile _Tp>::round_style;
-
-_LIBCUDACXX_END_NAMESPACE_STD
-
-_CCCL_POP_MACROS
-
-#endif // _LIBCUDACXX_LIMITS
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/span b/libcudacxx/include/cuda/std/detail/libcxx/include/span
index 042d2f029c5..b4e8fb04d95 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/span
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/span
@@ -203,18 +203,7 @@ _CCCL_INLINE_VAR constexpr bool __is_std_span<span<_Tp, _Extent>> = true;
 template <class _From, class _To>
 _CCCL_CONCEPT __span_array_convertible = _CCCL_TRAIT(is_convertible, _From (*)[], _To (*)[]);
 
-template <class _Tp>
-_CCCL_INLINE_VAR constexpr bool __is_std_initializer_list = false;
-
-template <class _Tp>
-_CCCL_INLINE_VAR constexpr bool __is_std_initializer_list<initializer_list<_Tp>> = true;
-
-// We want to ensure that span interacts nicely with containers that might not have had the ranges treatment
-#  if defined(__cpp_lib_ranges) && !_CCCL_COMPILER(MSVC2017)
-#    define _CCCL_SPAN_USES_RANGES
-#  endif // __cpp_lib_ranges && !_CCCL_COMPILER(MSVC2017)
-
-#  if defined(_CCCL_SPAN_USES_RANGES)
+#  if !_CCCL_COMPILER(MSVC2017)
 template <class _Range, class _ElementType>
 _CCCL_CONCEPT_FRAGMENT(
   __span_compatible_range_,
@@ -223,15 +212,43 @@ _CCCL_CONCEPT_FRAGMENT(
     requires(_CUDA_VRANGES::sized_range<_Range>),
     requires((_CUDA_VRANGES::borrowed_range<_Range> || _CCCL_TRAIT(is_const, _ElementType))),
     requires((!_CCCL_TRAIT(is_array, remove_cvref_t<_Range>))),
-    requires((!__is_std_span<remove_cvref_t<_Range>> && !__is_std_array<remove_cvref_t<_Range>>
-              && !__is_std_initializer_list<remove_cvref_t<_Range>>) ),
+    requires((!__is_std_span<remove_cvref_t<_Range>> && !__is_std_array<remove_cvref_t<_Range>>) ),
     requires(_CCCL_TRAIT(
       is_convertible, remove_reference_t<_CUDA_VRANGES::range_reference_t<_Range>> (*)[], _ElementType (*)[]))));
 
 template <class _Range, class _ElementType>
 _CCCL_CONCEPT __span_compatible_range = _CCCL_FRAGMENT(__span_compatible_range_, _Range, _ElementType);
 
-#    if _CCCL_STD_VER >= 2020
+#  else // // ^^^ !_CCCL_COMPILER(MSVC2017) ^^^ / vvv _CCCL_COMPILER(MSVC2017) vvv
+
+template <class _Range, class _ElementType, class = void>
+_CCCL_INLINE_VAR constexpr bool __span_compatible_range = false;
+
+template <class _Range, class _ElementType>
+_CCCL_INLINE_VAR constexpr bool __span_compatible_range<
+  _Range,
+  _ElementType,
+  void_t<
+    // // is a contiguous range
+    // enable_if_t<_CUDA_VRANGES::contiguous_range<_Range>, nullptr_t>,
+    // // is a sized range
+    // enable_if_t<_CUDA_VRANGES::sized_range<_Range>, nullptr_t>,
+    // // is a borrowed range or ElementType is const
+    // enable_if_t<(_CUDA_VRANGES::borrowed_range<_Range> || _CCCL_TRAIT(is_const, _ElementType)), nullptr_t>,
+    // is not a C-style array
+    enable_if_t<!_CCCL_TRAIT(is_array, remove_cvref_t<_Range>), nullptr_t>,
+    // is not a specialization of span
+    enable_if_t<!__is_std_span<remove_cvref_t<_Range>>, nullptr_t>,
+    // is not a specialization of array
+    enable_if_t<!__is_std_array<remove_cvref_t<_Range>>, nullptr_t>,
+    // remove_pointer_t<decltype(data(cont))>(*)[] is convertible to ElementType(*)[]
+    enable_if_t<_CCCL_TRAIT(is_convertible,
+                            remove_pointer_t<decltype(_CUDA_VSTD::data(declval<_Range&>()))> (*)[],
+                            _ElementType (*)[]),
+                nullptr_t>>> = true;
+#  endif // _CCCL_COMPILER(MSVC2017)
+
+#  if _CCCL_STD_VER >= 2020
 template <class _It, class _Tp>
 _CCCL_CONCEPT __span_compatible_iterator =
   contiguous_iterator<_It> && __span_array_convertible<remove_reference_t<iter_reference_t<_It>>, _Tp>;
@@ -239,7 +256,7 @@ _CCCL_CONCEPT __span_compatible_iterator =
 template <class _Sentinel, class _It>
 _CCCL_CONCEPT __span_compatible_sentinel_for =
   sized_sentinel_for<_Sentinel, _It> && !_CCCL_TRAIT(is_convertible, _Sentinel, size_t);
-#    else // ^^^ C++20 ^^^ / vvv C++17 vvv
+#  else // ^^^ C++20 ^^^ / vvv C++17 vvv
 template <class _It, class _Tp>
 _CCCL_CONCEPT_FRAGMENT(__span_compatible_iterator_,
                        requires()(requires(contiguous_iterator<_It>),
@@ -255,33 +272,7 @@ _CCCL_CONCEPT_FRAGMENT(
 
 template <class _Sentinel, class _It>
 _CCCL_CONCEPT __span_compatible_sentinel_for = _CCCL_FRAGMENT(__span_compatible_sentinel_for_, _Sentinel, _It);
-#    endif // _CCCL_STD_VER <= 2017
-#  else // ^^^ _CCCL_SPAN_USES_RANGES ^^^ / vvv !_CCCL_SPAN_USES_RANGES vvv
-
-template <class _Container, class _ElementType, class = void>
-_CCCL_INLINE_VAR constexpr bool __is_span_compatible_container = false;
-
-template <class _Container, class _ElementType>
-_CCCL_INLINE_VAR constexpr bool __is_span_compatible_container<
-  _Container,
-  _ElementType,
-  void_t<
-    // is not a specialization of span
-    enable_if_t<!__is_std_span<remove_cvref_t<_Container>>, nullptr_t>,
-    // is not a specialization of array
-    enable_if_t<!__is_std_array<remove_cvref_t<_Container>>, nullptr_t>,
-    // is not a specialization of array
-    enable_if_t<!__is_std_initializer_list<remove_cvref_t<_Container>>, nullptr_t>,
-    // is_array_v<Container> is false,
-    enable_if_t<!_CCCL_TRAIT(is_array, remove_cvref_t<_Container>), nullptr_t>,
-    // data(cont) and size(cont) are well formed
-    decltype(_CUDA_VSTD::data(_CUDA_VSTD::declval<_Container&>())),
-    decltype(_CUDA_VSTD::size(_CUDA_VSTD::declval<_Container&>())),
-    // remove_pointer_t<decltype(data(cont))>(*)[] is convertible to ElementType(*)[]
-    enable_if_t<is_convertible<remove_pointer_t<decltype(_CUDA_VSTD::data(declval<_Container&>()))> (*)[],
-                               _ElementType (*)[]>::value,
-                nullptr_t>>> = true;
-#  endif // !_CCCL_SPAN_USES_RANGES
+#  endif // _CCCL_STD_VER <= 2017
 
 #  if _CCCL_STD_VER >= 2020
 
@@ -350,7 +341,6 @@ public:
   _CCCL_HIDE_FROM_ABI span(const span&) noexcept            = default;
   _CCCL_HIDE_FROM_ABI span& operator=(const span&) noexcept = default;
 
-#  if defined(_CCCL_SPAN_USES_RANGES)
   _CCCL_TEMPLATE(class _It)
   _CCCL_REQUIRES(__span_compatible_iterator<_It, element_type>)
   _LIBCUDACXX_HIDE_FROM_ABI constexpr explicit span(_It __first, size_type __count)
@@ -370,20 +360,6 @@ public:
     _CCCL_ASSERT(__last - __first == _Extent,
                  "invalid range in span's constructor (iterator, sentinel): last - first != extent");
   }
-#  else // ^^^ _CCCL_SPAN_USES_RANGES ^^^ / vvv !_CCCL_SPAN_USES_RANGES vvv
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(pointer __ptr, size_type __count)
-      : __data_{__ptr}
-  {
-    (void) __count;
-    _CCCL_ASSERT(_Extent == __count, "size mismatch in span's constructor (ptr, len)");
-  }
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(pointer __f, pointer __l)
-      : __data_{__f}
-  {
-    (void) __l;
-    _CCCL_ASSERT(_Extent == distance(__f, __l), "size mismatch in span's constructor (ptr, ptr)");
-  }
-#  endif // !_CCCL_SPAN_USES_RANGES
 
 #  if _CCCL_COMPILER(NVRTC) || _CCCL_COMPILER(MSVC2017)
   template <size_t _Sz = _Extent, enable_if_t<_Sz != 0, int> = 0>
@@ -408,7 +384,6 @@ public:
       : __data_{__arr.data()}
   {}
 
-#  if defined(_CCCL_SPAN_USES_RANGES)
   _CCCL_TEMPLATE(class _Range)
   _CCCL_REQUIRES(__span_compatible_range<_Range, element_type>)
   _LIBCUDACXX_HIDE_FROM_ABI constexpr explicit span(_Range&& __r)
@@ -416,23 +391,6 @@ public:
   {
     _CCCL_ASSERT(_CUDA_VRANGES::size(__r) == _Extent, "size mismatch in span's constructor (range)");
   }
-#  else // ^^^ _CCCL_SPAN_USES_RANGES ^^^ / vvv !_CCCL_SPAN_USES_RANGES vvv
-  _CCCL_TEMPLATE(class _Container)
-  _CCCL_REQUIRES(__is_span_compatible_container<_Container, _Tp>)
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(_Container& __c) noexcept(noexcept(_CUDA_VSTD::data(__c)))
-      : __data_{_CUDA_VSTD::data(__c)}
-  {
-    _CCCL_ASSERT(_Extent == _CUDA_VSTD::size(__c), "size mismatch in span's constructor (other span)");
-  }
-
-  _CCCL_TEMPLATE(class _Container)
-  _CCCL_REQUIRES(__is_span_compatible_container<_Container, const _Tp>)
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(const _Container& __c) noexcept(noexcept(_CUDA_VSTD::data(__c)))
-      : __data_{_CUDA_VSTD::data(__c)}
-  {
-    _CCCL_ASSERT(_Extent == _CUDA_VSTD::size(__c), "size mismatch in span's constructor (other span)");
-  }
-#  endif // !_CCCL_SPAN_USES_RANGES
 
   _CCCL_TEMPLATE(class _OtherElementType, size_t _Extent2 = _Extent)
   _CCCL_REQUIRES((_Extent2 != dynamic_extent) _CCCL_AND __span_array_convertible<_OtherElementType, element_type>)
@@ -613,7 +571,6 @@ public:
   _CCCL_HIDE_FROM_ABI span(const span&) noexcept            = default;
   _CCCL_HIDE_FROM_ABI span& operator=(const span&) noexcept = default;
 
-#  if defined(_CCCL_SPAN_USES_RANGES)
   _CCCL_TEMPLATE(class _It)
   _CCCL_REQUIRES(__span_compatible_iterator<_It, element_type>)
   _LIBCUDACXX_HIDE_FROM_ABI constexpr span(_It __first, size_type __count)
@@ -630,17 +587,6 @@ public:
     _CCCL_ASSERT(__last - __first >= 0, "invalid range in span's constructor (iterator, sentinel)");
   }
 
-#  else // ^^^ _CCCL_SPAN_USES_RANGES ^^^ / vvv !_CCCL_SPAN_USES_RANGES vvv
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(pointer __ptr, size_type __count)
-      : __data_{__ptr}
-      , __size_{__count}
-  {}
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(pointer __f, pointer __l)
-      : __data_{__f}
-      , __size_{static_cast<size_t>(__l - __f)}
-  {}
-#  endif // !_CCCL_SPAN_USES_RANGES
-
   template <size_t _Sz>
   _LIBCUDACXX_HIDE_FROM_ABI constexpr span(type_identity_t<element_type> (&__arr)[_Sz]) noexcept
       : __data_{__arr}
@@ -661,28 +607,12 @@ public:
       , __size_{_Sz}
   {}
 
-#  if defined(_CCCL_SPAN_USES_RANGES)
   _CCCL_TEMPLATE(class _Range)
   _CCCL_REQUIRES(__span_compatible_range<_Range, element_type>)
   _LIBCUDACXX_HIDE_FROM_ABI constexpr span(_Range&& __r)
       : __data_(_CUDA_VRANGES::data(__r))
       , __size_{_CUDA_VRANGES::size(__r)}
   {}
-#  else // ^^^ _CCCL_SPAN_USES_RANGES ^^^ / vvv !_CCCL_SPAN_USES_RANGES vvv
-  _CCCL_TEMPLATE(class _Container)
-  _CCCL_REQUIRES(__is_span_compatible_container<_Container, _Tp>)
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(_Container& __c)
-      : __data_{_CUDA_VSTD::data(__c)}
-      , __size_{(size_type) _CUDA_VSTD::size(__c)}
-  {}
-
-  _CCCL_TEMPLATE(class _Container)
-  _CCCL_REQUIRES(__is_span_compatible_container<_Container, const _Tp>)
-  _LIBCUDACXX_HIDE_FROM_ABI constexpr span(const _Container& __c)
-      : __data_{_CUDA_VSTD::data(__c)}
-      , __size_{(size_type) _CUDA_VSTD::size(__c)}
-  {}
-#  endif // !_CCCL_SPAN_USES_RANGES
 
   _CCCL_TEMPLATE(class _OtherElementType, size_t _OtherExtent)
   _CCCL_REQUIRES(__span_array_convertible<_OtherElementType, element_type>)
@@ -812,12 +742,12 @@ public:
 
   _LIBCUDACXX_HIDE_FROM_ABI span<const byte, dynamic_extent> __as_bytes() const noexcept
   {
-    return {reinterpret_cast<const byte*>(data()), size_bytes()};
+    return span<const byte, dynamic_extent>{reinterpret_cast<const byte*>(data()), size_bytes()};
   }
 
   _LIBCUDACXX_HIDE_FROM_ABI span<byte, dynamic_extent> __as_writable_bytes() const noexcept
   {
-    return {reinterpret_cast<byte*>(data()), size_bytes()};
+    return span<byte, dynamic_extent>{reinterpret_cast<byte*>(data()), size_bytes()};
   }
 
 private:
@@ -853,34 +783,20 @@ _CCCL_HOST_DEVICE span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>;
 template <class _Tp, size_t _Sz>
 _CCCL_HOST_DEVICE span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>;
 
-#  if defined(_CCCL_SPAN_USES_RANGES)
-
 _CCCL_TEMPLATE(class _It, class _EndOrSize)
 _CCCL_REQUIRES(contiguous_iterator<_It>)
-_CCCL_HOST_DEVICE span(_It,
-                       _EndOrSize) -> span<remove_reference_t<iter_reference_t<_It>>, __maybe_static_ext<_EndOrSize>>;
+_CCCL_HOST_DEVICE span(_It, _EndOrSize)
+  -> span<remove_reference_t<iter_reference_t<_It>>, __maybe_static_ext<_EndOrSize>>;
 
 _CCCL_TEMPLATE(class _Range)
 _CCCL_REQUIRES(_CUDA_VRANGES::contiguous_range<_Range>)
 _CCCL_HOST_DEVICE span(_Range&&) -> span<remove_reference_t<_CUDA_VRANGES::range_reference_t<_Range>>>;
 
-#  else // ^^^ _CCCL_SPAN_USES_RANGES ^^^ / vvv !_CCCL_SPAN_USES_RANGES vvv
-
-_CCCL_TEMPLATE(class _Container)
-_CCCL_REQUIRES(__is_span_compatible_container<_Container, typename _Container::value_type>)
-_CCCL_HOST_DEVICE span(_Container&) -> span<typename _Container::value_type>;
-
-_CCCL_TEMPLATE(class _Container)
-_CCCL_REQUIRES(__is_span_compatible_container<_Container, const typename _Container::value_type>)
-_CCCL_HOST_DEVICE span(const _Container&) -> span<const typename _Container::value_type>;
-
-#  endif // !_CCCL_SPAN_USES_RANGES
-
 #endif // _CCCL_STD_VER >= 2017
 
 _LIBCUDACXX_END_NAMESPACE_STD
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#if _CCCL_STD_VER >= 2014 && !_CCCL_COMPILER(MSVC2017)
 _LIBCUDACXX_BEGIN_NAMESPACE_RANGES
 template <class _Tp, size_t _Extent>
 _CCCL_INLINE_VAR constexpr bool enable_borrowed_range<span<_Tp, _Extent>> = true;
@@ -888,6 +804,6 @@ _CCCL_INLINE_VAR constexpr bool enable_borrowed_range<span<_Tp, _Extent>> = true
 template <class _Tp, size_t _Extent>
 _CCCL_INLINE_VAR constexpr bool enable_view<span<_Tp, _Extent>> = true;
 _LIBCUDACXX_END_NAMESPACE_RANGES
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
+#endif // _CCCL_STD_VER >= 2014 && !_CCCL_COMPILER(MSVC2017)
 
 #endif // _LIBCUDACXX_SPAN
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/variant b/libcudacxx/include/cuda/std/detail/libcxx/include/variant
index 908c76ae1ed..0f6ec9d29fc 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/variant
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/variant
@@ -1059,8 +1059,9 @@ _LIBCUDACXX_VARIANT_MOVE_CONSTRUCTOR(_Trait::_TriviallyAvailable,
 _LIBCUDACXX_VARIANT_MOVE_CONSTRUCTOR(
   _Trait::_Available,
   _LIBCUDACXX_HIDE_FROM_ABI __move_constructor(__move_constructor&& __that) noexcept(
-    __all<_CCCL_TRAIT(is_nothrow_move_constructible, _Types)...>::value)
-  : __move_constructor(__valueless_t{}) { this->__generic_construct(*this, _CUDA_VSTD::move(__that)); });
+    __all<_CCCL_TRAIT(is_nothrow_move_constructible, _Types)...>::value) : __move_constructor(__valueless_t{}) {
+    this->__generic_construct(*this, _CUDA_VSTD::move(__that));
+  });
 
 _LIBCUDACXX_VARIANT_MOVE_CONSTRUCTOR(_Trait::_Unavailable, __move_constructor(__move_constructor&&) = delete;);
 
@@ -1091,8 +1092,10 @@ _LIBCUDACXX_VARIANT_COPY_CONSTRUCTOR(
   _Trait::_TriviallyAvailable, _CCCL_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that) = default;);
 
 _LIBCUDACXX_VARIANT_COPY_CONSTRUCTOR(
-  _Trait::_Available, _LIBCUDACXX_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that)
-  : __copy_constructor(__valueless_t{}) { this->__generic_construct(*this, __that); });
+  _Trait::_Available,
+  _LIBCUDACXX_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that) : __copy_constructor(__valueless_t{}) {
+    this->__generic_construct(*this, __that);
+  });
 
 _LIBCUDACXX_VARIANT_COPY_CONSTRUCTOR(_Trait::_Unavailable, __copy_constructor(const __copy_constructor&) = delete;);
 
@@ -1395,8 +1398,8 @@ template <class _Tp, size_t>
 struct __overload_bool
 {
   template <class _Up, class _Ap = remove_cvref_t<_Up>>
-  _LIBCUDACXX_HIDE_FROM_ABI auto
-  operator()(bool, _Up&&) const -> enable_if_t<_CCCL_TRAIT(is_same, _Ap, bool), type_identity<_Tp>>;
+  _LIBCUDACXX_HIDE_FROM_ABI auto operator()(bool, _Up&&) const
+    -> enable_if_t<_CCCL_TRAIT(is_same, _Ap, bool), type_identity<_Tp>>;
 };
 
 template <size_t _Idx>
@@ -2078,8 +2081,9 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr _Rp visit(_Visitor&& __visitor, _Vs&&... __v
 }
 
 template <class... _Types>
-_LIBCUDACXX_HIDE_FROM_ABI auto swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs) noexcept(
-  noexcept(__lhs.swap(__rhs))) -> decltype(__lhs.swap(__rhs))
+_LIBCUDACXX_HIDE_FROM_ABI auto
+swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs) noexcept(noexcept(__lhs.swap(__rhs)))
+  -> decltype(__lhs.swap(__rhs))
 {
   return __lhs.swap(__rhs);
 }
diff --git a/libcudacxx/include/cuda/std/limits b/libcudacxx/include/cuda/std/limits
index e1c5981cd3e..98c63813b7b 100644
--- a/libcudacxx/include/cuda/std/limits
+++ b/libcudacxx/include/cuda/std/limits
@@ -21,9 +21,553 @@
 #  pragma system_header
 #endif // no system header
 
+#include <cuda/std/__bit/bit_cast.h>
+#include <cuda/std/__type_traits/is_arithmetic.h>
+#include <cuda/std/climits>
+#include <cuda/std/version>
+
 _CCCL_PUSH_MACROS
 
-#include <cuda/std/detail/libcxx/include/limits>
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+enum float_round_style
+{
+  round_indeterminate       = -1,
+  round_toward_zero         = 0,
+  round_to_nearest          = 1,
+  round_toward_infinity     = 2,
+  round_toward_neg_infinity = 3
+};
+
+enum float_denorm_style
+{
+  denorm_indeterminate = -1,
+  denorm_absent        = 0,
+  denorm_present       = 1
+};
+
+template <class _Tp, bool = is_arithmetic<_Tp>::value>
+class __numeric_limits_impl
+{
+public:
+  using type = _Tp;
+
+  static constexpr bool is_specialized = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return type();
+  }
+
+  static constexpr int digits       = 0;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 0;
+  static constexpr bool is_signed   = false;
+  static constexpr bool is_integer  = false;
+  static constexpr bool is_exact    = false;
+  static constexpr int radix        = 0;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return type();
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 0;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type();
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = false;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+
+// MSVC warns about overflowing left shift
+_CCCL_DIAG_PUSH
+_CCCL_DIAG_SUPPRESS_MSVC(4309)
+template <class _Tp, int __digits, bool _IsSigned>
+struct __int_min
+{
+  static constexpr _Tp value = static_cast<_Tp>(_Tp(1) << __digits);
+};
+_CCCL_DIAG_POP
+
+template <class _Tp, int __digits>
+struct __int_min<_Tp, __digits, false>
+{
+  static constexpr _Tp value = _Tp(0);
+};
+
+template <class _Tp>
+class __numeric_limits_impl<_Tp, true>
+{
+public:
+  using type = _Tp;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = type(-1) < type(0);
+  static constexpr int digits       = static_cast<int>(sizeof(type) * __CHAR_BIT__ - is_signed);
+  static constexpr int digits10     = digits * 3 / 10;
+  static constexpr int max_digits10 = 0;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __int_min<type, digits, is_signed>::value;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return is_signed ? type(type(~0) ^ min()) : type(~0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return min();
+  }
+
+  static constexpr bool is_integer = true;
+  static constexpr bool is_exact   = true;
+  static constexpr int radix       = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 0;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = !is_signed;
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__pnacl__) || defined(__wasm__)
+  static constexpr bool traps = true;
+#else
+  static constexpr bool traps = false;
+#endif
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+
+template <>
+class __numeric_limits_impl<bool, true>
+{
+public:
+  using type = bool;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = false;
+  static constexpr int digits       = 1;
+  static constexpr int digits10     = 0;
+  static constexpr int max_digits10 = 0;
+
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return false;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return true;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return min();
+  }
+
+  static constexpr bool is_integer = true;
+  static constexpr bool is_exact   = true;
+  static constexpr int radix       = 2;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr int min_exponent   = 0;
+  static constexpr int min_exponent10 = 0;
+  static constexpr int max_exponent   = 0;
+  static constexpr int max_exponent10 = 0;
+
+  static constexpr bool has_infinity             = false;
+  static constexpr bool has_quiet_NaN            = false;
+  static constexpr bool has_signaling_NaN        = false;
+  static constexpr float_denorm_style has_denorm = denorm_absent;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return type(0);
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return type(0);
+  }
+
+  static constexpr bool is_iec559  = false;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_toward_zero;
+};
+
+template <>
+class __numeric_limits_impl<float, true>
+{
+public:
+  using type = float;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = __FLT_MANT_DIG__;
+  static constexpr int digits10     = __FLT_DIG__;
+  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __FLT_MIN__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __FLT_MAX__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return -max();
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __FLT_EPSILON__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return 0.5F;
+  }
+
+  static constexpr int min_exponent   = __FLT_MIN_EXP__;
+  static constexpr int min_exponent10 = __FLT_MIN_10_EXP__;
+  static constexpr int max_exponent   = __FLT_MAX_EXP__;
+  static constexpr int max_exponent10 = __FLT_MAX_10_EXP__;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+
+#if defined(_CCCL_BUILTIN_HUGE_VALF)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return _CCCL_BUILTIN_HUGE_VALF();
+  }
+#else // ^^^ _CCCL_BUILTIN_HUGE_VALF ^^^ // vvv !_CCCL_BUILTIN_HUGE_VALF vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type infinity() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7f800000);
+  }
+#endif // !_CCCL_BUILTIN_HUGE_VALF
+#if defined(_CCCL_BUILTIN_NANF)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANF("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NANF ^^^ // vvv !_CCCL_BUILTIN_NANF vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type quiet_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7fc00000);
+  }
+#endif // !_CCCL_BUILTIN_NANF
+#if defined(_CCCL_BUILTIN_NANSF)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANSF("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NANSF ^^^ // vvv !_CCCL_BUILTIN_NANSF vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type signaling_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7fa00000);
+  }
+#endif // !_CCCL_BUILTIN_NANSF
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __FLT_DENORM_MIN__;
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<double, true>
+{
+public:
+  using type = double;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = __DBL_MANT_DIG__;
+  static constexpr int digits10     = __DBL_DIG__;
+  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __DBL_MIN__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __DBL_MAX__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return -max();
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __DBL_EPSILON__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return 0.5;
+  }
+
+  static constexpr int min_exponent   = __DBL_MIN_EXP__;
+  static constexpr int min_exponent10 = __DBL_MIN_10_EXP__;
+  static constexpr int max_exponent   = __DBL_MAX_EXP__;
+  static constexpr int max_exponent10 = __DBL_MAX_10_EXP__;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+
+#if defined(_CCCL_BUILTIN_HUGE_VAL)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return _CCCL_BUILTIN_HUGE_VAL();
+  }
+#else // ^^^ _CCCL_BUILTIN_HUGE_VAL ^^^ // vvv !_CCCL_BUILTIN_HUGE_VAL vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type infinity() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7ff0000000000000);
+  }
+#endif // !_CCCL_BUILTIN_HUGE_VAL
+#if defined(_CCCL_BUILTIN_NAN)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NAN("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NAN ^^^ // vvv !_CCCL_BUILTIN_NAN vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type quiet_NaN() noexcept
+  {
+    return std::bit_cast<type>(0x7ff8000000000000);
+  }
+#endif // !_CCCL_BUILTIN_NAN
+#if defined(_CCCL_BUILTIN_NANS)
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANS("");
+  }
+#else // ^^^ _CCCL_BUILTIN_NANS ^^^ // vvv !_CCCL_BUILTIN_NANS vvv
+  _LIBCUDACXX_HIDE_FROM_ABI static _LIBCUDACXX_CONSTEXPR_BIT_CAST type signaling_NaN() noexcept
+  {
+    return _CUDA_VSTD::bit_cast<type>(0x7ff4000000000000);
+  }
+#endif // !_CCCL_BUILTIN_NANS
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __DBL_DENORM_MIN__;
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+};
+
+template <>
+class __numeric_limits_impl<long double, true>
+{
+#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
+
+public:
+  using type = long double;
+
+  static constexpr bool is_specialized = true;
+
+  static constexpr bool is_signed   = true;
+  static constexpr int digits       = __LDBL_MANT_DIG__;
+  static constexpr int digits10     = __LDBL_DIG__;
+  static constexpr int max_digits10 = 2 + (digits * 30103l) / 100000l;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type min() noexcept
+  {
+    return __LDBL_MIN__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type max() noexcept
+  {
+    return __LDBL_MAX__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type lowest() noexcept
+  {
+    return -max();
+  }
+
+  static constexpr bool is_integer = false;
+  static constexpr bool is_exact   = false;
+  static constexpr int radix       = __FLT_RADIX__;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type epsilon() noexcept
+  {
+    return __LDBL_EPSILON__;
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type round_error() noexcept
+  {
+    return 0.5L;
+  }
+
+  static constexpr int min_exponent   = __LDBL_MIN_EXP__;
+  static constexpr int min_exponent10 = __LDBL_MIN_10_EXP__;
+  static constexpr int max_exponent   = __LDBL_MAX_EXP__;
+  static constexpr int max_exponent10 = __LDBL_MAX_10_EXP__;
+
+  static constexpr bool has_infinity             = true;
+  static constexpr bool has_quiet_NaN            = true;
+  static constexpr bool has_signaling_NaN        = true;
+  static constexpr float_denorm_style has_denorm = denorm_present;
+  static constexpr bool has_denorm_loss          = false;
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type infinity() noexcept
+  {
+    return _CCCL_BUILTIN_HUGE_VALL();
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type quiet_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANL("");
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type signaling_NaN() noexcept
+  {
+    return _CCCL_BUILTIN_NANSL("");
+  }
+  _LIBCUDACXX_HIDE_FROM_ABI static constexpr type denorm_min() noexcept
+  {
+    return __LDBL_DENORM_MIN__;
+  }
+
+  static constexpr bool is_iec559  = true;
+  static constexpr bool is_bounded = true;
+  static constexpr bool is_modulo  = false;
+
+  static constexpr bool traps                    = false;
+  static constexpr bool tinyness_before          = false;
+  static constexpr float_round_style round_style = round_to_nearest;
+#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE
+};
+
+template <class _Tp>
+class numeric_limits : public __numeric_limits_impl<_Tp>
+{};
+
+template <class _Tp>
+class numeric_limits<const _Tp> : public numeric_limits<_Tp>
+{};
+
+template <class _Tp>
+class numeric_limits<volatile _Tp> : public numeric_limits<_Tp>
+{};
+
+template <class _Tp>
+class numeric_limits<const volatile _Tp> : public numeric_limits<_Tp>
+{};
+
+_LIBCUDACXX_END_NAMESPACE_STD
 
 _CCCL_POP_MACROS
 
diff --git a/libcudacxx/include/cuda/std/ranges b/libcudacxx/include/cuda/std/ranges
index a9678d6a43a..03f1438f456 100644
--- a/libcudacxx/include/cuda/std/ranges
+++ b/libcudacxx/include/cuda/std/ranges
@@ -30,6 +30,7 @@ _CCCL_DIAG_SUPPRESS_MSVC(4848)
 #include <cuda/std/__ranges/dangling.h>
 #include <cuda/std/__ranges/data.h>
 #include <cuda/std/__ranges/empty.h>
+#include <cuda/std/__ranges/empty_view.h>
 #include <cuda/std/__ranges/enable_borrowed_range.h>
 #include <cuda/std/__ranges/enable_view.h>
 #include <cuda/std/__ranges/rbegin.h>
diff --git a/libcudacxx/include/nv/detail/__preprocessor b/libcudacxx/include/nv/detail/__preprocessor
index 15fe84eabd7..d6181383095 100644
--- a/libcudacxx/include/nv/detail/__preprocessor
+++ b/libcudacxx/include/nv/detail/__preprocessor
@@ -89,15 +89,17 @@
 
 #  define _NV_TRIGGER_PARENTHESIS_(...) ,
 
-#  define _NV_ISEMPTY(...)                                                                                            \
-    _NV_ISEMPTY0(/* test if there is just one argument, eventually an empty                                           \
-                    one */                                                                                            \
-                 _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__)), /* test if _TRIGGER_PARENTHESIS_ together with the argument    \
-                                                          adds a comma */                                             \
-                 _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__)), /* test if the argument together with \
-                                                                                   a parenthesis adds a comma */      \
-                 _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__(/*empty*/))), /* test if placing it between _TRIGGER_PARENTHESIS_ \
-                                                                     and the parenthesis adds a comma */              \
+/*
+This tests a variety of conditions for determining what the incoming statement is.
+1. test if there is just one argument
+2. test if _TRIGGER_PARENTHESIS_ together with the argument adds a comma
+3. test if the argument together with a parenthesis adds a comma
+4. test if placing it between _TRIGGER_PARENTHESIS_ and the parenthesis adds a comma
+*/
+#  define _NV_ISEMPTY(...)                                                      \
+    _NV_ISEMPTY0(_NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__)),                          \
+                 _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__)), \
+                 _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__(/*empty*/))),               \
                  _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__(/*empty*/))))
 
 #  define _NV_PASTE5(_0, _1, _2, _3, _4) _0##_1##_2##_3##_4
diff --git a/libcudacxx/test/internal_headers/CMakeLists.txt b/libcudacxx/test/internal_headers/CMakeLists.txt
index cca23ef3575..0129b5540ad 100644
--- a/libcudacxx/test/internal_headers/CMakeLists.txt
+++ b/libcudacxx/test/internal_headers/CMakeLists.txt
@@ -26,6 +26,11 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" AND NOT "${CMAKE_CXX_STANDARD}" M
   list(FILTER internal_headers EXCLUDE REGEX "mdspan")
 endif()
 
+# Exclude __limits/msvc_win32.h on non-MSVC compilers
+if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
+  list(FILTER internal_headers EXCLUDE REGEX "__limits/msvc_win32.h")
+endif()
+
 # generated cuda::ptx headers are not standalone
 list(FILTER internal_headers EXCLUDE REGEX "__ptx/instructions/generated")
 
@@ -58,9 +63,6 @@ function(libcudacxx_create_internal_header_test header_name, headertest_src, fal
     target_compile_definitions(headertest_${header_name} PRIVATE "-D${fallback}")
   endif()
   target_compile_definitions(headertest_${header_name} PRIVATE "-DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE")
-  if (CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-    target_compile_definitions(headertest_${header_name} PRIVATE CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-  endif()
   if (CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
     target_compile_definitions(headertest_${header_name} PRIVATE CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
   endif()
diff --git a/libcudacxx/test/libcudacxx/CMakeLists.txt b/libcudacxx/test/libcudacxx/CMakeLists.txt
index 47a3bbd4458..2aad419a35f 100644
--- a/libcudacxx/test/libcudacxx/CMakeLists.txt
+++ b/libcudacxx/test/libcudacxx/CMakeLists.txt
@@ -49,19 +49,16 @@ if (NOT MSVC AND NOT ${CMAKE_CUDA_COMPILER_ID} STREQUAL "Clang")
   set(LIBCUDACXX_WARNING_LEVEL "--compiler-options=-Wall --compiler-options=-Wextra")
 endif()
 
-# sccache cannot handle the -Fd option generationg pdb files
 if (MSVC)
+  # sccache cannot handle the -Fd option generationg pdb files
   set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
-endif()
 
-# Intel OneAPI compiler has fast math enabled by default which breaks almost all floating point tests
-if (${CMAKE_CXX_COMPILER_ID} STREQUAL "IntelLLVM" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
-  string(APPEND LIBCUDACXX_TEST_COMPILER_FLAGS " --compiler-options=-fno-fast-math")
+  # We want to use cudaLaunchKernelEx which is guarded by __cplusplus
+  if ("${CMAKE_CUDA_COMPILER_VERSION}" LESS "12.3.0")
+    string(APPEND LIBCUDACXX_TEST_COMPILER_FLAGS " -Xcompiler=/Zc:__cplusplus")
+  endif()
 endif()
 
-if (CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-  string(APPEND LIBCUDACXX_TEST_COMPILER_FLAGS " -DCCCL_SUPPRESS_ICC_DEPRECATION_WARNING")
-endif()
 if (CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
   string(APPEND LIBCUDACXX_TEST_COMPILER_FLAGS " -DCCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING")
 endif()
diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp
index 2c6c155d784..cc4d90341d4 100644
--- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp
@@ -20,8 +20,7 @@
 #include "test_macros.h"
 
 template <class T,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope ThreadScope,
           bool Signed = cuda::std::is_signed<T>::value>
 struct TestFn
diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp
index fae515acc94..8bd6a5cd685 100644
--- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp
@@ -20,8 +20,7 @@
 #include "test_macros.h"
 
 template <class T,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope ThreadScope,
           bool Signed = cuda::std::is_signed<T>::value>
 struct TestFn
diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h
index cc54eda725e..c2be3033275 100644
--- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h
+++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h
@@ -27,8 +27,7 @@ struct UserAtomicType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -66,8 +65,7 @@ struct TestEachIntegralType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -83,8 +81,7 @@ struct TestEachFloatingPointType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
index 5527b2359af..334053be29c 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
@@ -52,7 +52,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
index 6df27820c79..21250677f10 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
@@ -57,7 +57,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
index f765b02d540..88973305198 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
@@ -52,7 +52,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
index fa46fde4d43..c40f5784da2 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
@@ -53,7 +53,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
index 557a1277250..71089666b9c 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
@@ -53,7 +53,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/memcpy_async.h b/libcudacxx/test/libcudacxx/cuda/memcpy_async.h
index 4d0504ab66d..7d75bf4b842 100644
--- a/libcudacxx/test/libcudacxx/cuda/memcpy_async.h
+++ b/libcudacxx/test/libcudacxx/cuda/memcpy_async.h
@@ -16,12 +16,9 @@
 #include "large_type.h"
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector,
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector,
           cuda::thread_scope BarrierScope,
           typename... CompletionF>
 __host__ __device__ __noinline__ void test_fully_specialized()
@@ -60,12 +57,9 @@ struct completion
 };
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector>
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector>
 __host__ __device__ __noinline__ void test_select_scope()
 {
   test_fully_specialized<T, SourceSelector, DestSelector, BarrierSelector, cuda::thread_scope_system>();
diff --git a/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h b/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h
index 574ed5ceb80..8b7fba78404 100644
--- a/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h
+++ b/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h
@@ -71,12 +71,9 @@ static_assert(std::is_trivially_copy_constructible<storage<uint64_t>>::value, ""
 #endif
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector,
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector,
           cuda::thread_scope BarrierScope,
           typename... CompletionF>
 __device__ __noinline__ void test_fully_specialized()
@@ -123,12 +120,9 @@ struct completion
 };
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector>
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector>
 __host__ __device__ __noinline__ void test_select_scope()
 {
   test_fully_specialized<T, SourceSelector, DestSelector, BarrierSelector, cuda::thread_scope_system>();
diff --git a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp
index 14ae81da2da..6bedfb5bb8f 100644
--- a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp
@@ -51,8 +51,7 @@ __device__ __noinline__ void test_consumer(T* dest, T* source, cuda::pipeline<Pi
 }
 
 template <class T,
-          template <typename, typename>
-          class PipelineSelector,
+          template <typename, typename> class PipelineSelector,
           cuda::thread_scope PipelineScope,
           uint8_t PipelineStages>
 __device__ __noinline__ void test_fully_specialized()
diff --git a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h
index 787c6bd050e..86d9fa0f180 100644
--- a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h
+++ b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h
@@ -27,12 +27,9 @@ __host__ __device__ cuda::pipeline<scope> get_pipeline(cuda::pipeline_shared_sta
 
 template <cuda::thread_scope Scope,
           class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class PipelineSelector,
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class PipelineSelector,
           uint8_t PipelineStages>
 __host__ __device__ __noinline__ void test_fully_specialized()
 {
@@ -86,10 +83,8 @@ __host__ __device__ __noinline__ void test_fully_specialized()
 
 template <cuda::thread_scope Scope,
           class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector>
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector>
 __host__ __device__ __noinline__ void test_select_pipeline()
 {
   constexpr uint8_t stages_count = 2;
diff --git a/libcudacxx/test/libcudacxx/cuda/type_list.pass.cpp b/libcudacxx/test/libcudacxx/cuda/type_list.pass.cpp
index 4b94926e72e..5c5d99755be 100644
--- a/libcudacxx/test/libcudacxx/cuda/type_list.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/type_list.pass.cpp
@@ -16,7 +16,7 @@
 #include <cuda/std/__utility/integer_sequence.h>
 #include <cuda/std/__utility/pair.h>
 
-#if _CCCL_COMPILER(ICC) || _CCCL_CUDA_COMPILER(NVCC) || _CCCL_COMPILER(NVRTC) || _CCCL_CUDA_COMPILER(CLANG)
+#if _CCCL_CUDA_COMPILER(NVCC) || _CCCL_COMPILER(NVRTC) || _CCCL_CUDA_COMPILER(CLANG)
 // These compilers have trouble making substitution failures during
 // alias template instantiation non-fatal.
 #  define SKIP_SFINAE_TESTS
diff --git a/libcudacxx/test/libcudacxx/libcxx/atomics/libcpp-has-no-threads.fail.cpp b/libcudacxx/test/libcudacxx/libcxx/atomics/libcpp-has-no-threads.fail.cpp
index 0ece946d920..1f4c01e3747 100644
--- a/libcudacxx/test/libcudacxx/libcxx/atomics/libcpp-has-no-threads.fail.cpp
+++ b/libcudacxx/test/libcudacxx/libcxx/atomics/libcpp-has-no-threads.fail.cpp
@@ -14,8 +14,8 @@
 // .fail. expects compilation to fail, but this would only fail at runtime with NVRTC
 // UNSUPPORTED: nvrtc
 
-// nvcc doesn't propagate nvhpc's or icc's preprocessor failures
-// UNSUPPORTED: nvhpc, icc
+// nvcc doesn't propagate nvhpc's preprocessor failures
+// UNSUPPORTED: nvhpc
 
 // MODULES_DEFINES: _LIBCUDACXX_HAS_NO_THREADS
 #ifndef _LIBCUDACXX_HAS_NO_THREADS
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp
index 401e6cff1b3..d9849291883 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp
@@ -32,7 +32,6 @@ __host__ __device__ void test()
     assert(atomic_flag_test_and_set_explicit(&f, cuda::std::memory_order_relaxed) == 0);
     assert(f.test_and_set() == 1);
   }
-#ifndef TEST_COMPILER_ICC
   {
     Selector<cuda::std::atomic_flag, default_initializer> sel;
     cuda::std::atomic_flag& f = *sel.construct();
@@ -40,7 +39,6 @@ __host__ __device__ void test()
     assert(atomic_flag_test_and_set_explicit(&f, cuda::std::memory_order_consume) == 0);
     assert(f.test_and_set() == 1);
   }
-#endif
   {
     Selector<cuda::std::atomic_flag, default_initializer> sel;
     cuda::std::atomic_flag& f = *sel.construct();
@@ -76,7 +74,6 @@ __host__ __device__ void test()
     assert(atomic_flag_test_and_set_explicit(&f, cuda::std::memory_order_relaxed) == 0);
     assert(f.test_and_set() == 1);
   }
-#ifndef TEST_COMPILER_ICC
   {
     Selector<volatile cuda::std::atomic_flag, default_initializer> sel;
     volatile cuda::std::atomic_flag& f = *sel.construct();
@@ -84,7 +81,6 @@ __host__ __device__ void test()
     assert(atomic_flag_test_and_set_explicit(&f, cuda::std::memory_order_consume) == 0);
     assert(f.test_and_set() == 1);
   }
-#endif
   {
     Selector<volatile cuda::std::atomic_flag, default_initializer> sel;
     volatile cuda::std::atomic_flag& f = *sel.construct();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/test_and_set.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/test_and_set.pass.cpp
index c62a8ec3090..d8375d960a7 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/test_and_set.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/test_and_set.pass.cpp
@@ -39,7 +39,6 @@ __host__ __device__ void test()
     assert(f.test_and_set(cuda::std::memory_order_relaxed) == 0);
     assert(f.test_and_set(cuda::std::memory_order_relaxed) == 1);
   }
-#ifndef TEST_COMPILER_ICC
   {
     Selector<cuda::std::atomic_flag, default_initializer> sel;
     cuda::std::atomic_flag& f = *sel.construct();
@@ -47,7 +46,6 @@ __host__ __device__ void test()
     assert(f.test_and_set(cuda::std::memory_order_consume) == 0);
     assert(f.test_and_set(cuda::std::memory_order_consume) == 1);
   }
-#endif
   {
     Selector<cuda::std::atomic_flag, default_initializer> sel;
     cuda::std::atomic_flag& f = *sel.construct();
@@ -90,7 +88,6 @@ __host__ __device__ void test()
     assert(f.test_and_set(cuda::std::memory_order_relaxed) == 0);
     assert(f.test_and_set(cuda::std::memory_order_relaxed) == 1);
   }
-#ifndef TEST_COMPILER_ICC
   {
     Selector<volatile cuda::std::atomic_flag, default_initializer> sel;
     volatile cuda::std::atomic_flag& f = *sel.construct();
@@ -98,7 +95,6 @@ __host__ __device__ void test()
     assert(f.test_and_set(cuda::std::memory_order_consume) == 0);
     assert(f.test_and_set(cuda::std::memory_order_consume) == 1);
   }
-#endif
   {
     Selector<volatile cuda::std::atomic_flag, default_initializer> sel;
     volatile cuda::std::atomic_flag& f = *sel.construct();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp
index 888c473feac..a36fd0ada46 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp
@@ -64,8 +64,7 @@
 
 template <template <cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ __noinline__ void do_test()
 {
   {
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp
index 4f457985ed5..4af01d2a60b 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp
@@ -148,8 +148,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<float, Scope>, float, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
index 82dba2c3302..e74a5b8bcb9 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
@@ -144,8 +144,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<float, Scope>, float, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
index 87953269665..e9959955657 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
@@ -84,8 +84,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<float, Scope>, float, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp
index 5aa401c3f75..267afbd1034 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<char, Scope>, char, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp
index c3e809be2c0..617fb4481b9 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<char, Scope>, char, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp
index faf48d1d970..9efb262fd46 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<short, Scope>, short, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp
index 7fa15876db7..f9697062d5e 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<short, Scope>, short, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp
index 7df29b0dc04..1af63517f50 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp
index 9c85979457d..cb2efeaab43 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp
index 0ba82452f85..b0a90e5fccc 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<long, Scope>, long, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp
index 8263f359e6f..9d71b47735a 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<long, Scope>, long, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp
index b2da9693495..1193b49bede 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp
@@ -164,8 +164,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp
index 426551da2f3..7125e74b316 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp
@@ -164,8 +164,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h
index 1344c1aa2fb..9c0a8ef670c 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h
@@ -28,8 +28,7 @@ struct UserAtomicType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -67,8 +66,7 @@ struct TestEachIntegralType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -84,8 +82,7 @@ struct TestEachFloatingPointType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -104,8 +101,7 @@ struct TestEachAtomicType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -132,8 +128,7 @@ struct TestEachIntegralRefType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
diff --git a/libcudacxx/test/libcudacxx/std/cccl/architecture.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/cccl/architecture.compile.pass.cpp
new file mode 100644
index 00000000000..345fce365f1
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/cccl/architecture.compile.pass.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/std/__cccl/architecture.h>
+#include <cuda/std/__cccl/compiler.h>
+
+#if !defined(__CUDACC_RTC__)
+#  if _CCCL_ARCH(X86_64)
+#    if _CCCL_COMPILER(MSVC)
+#      include <intrin.h>
+#    elif _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG)
+#      include <cpuid.h>
+#    endif
+#  endif
+
+#  if _CCCL_ARCH(ARM64) && defined(__ARM_ACLE)
+#    include <arm_acle.h>
+#  endif
+#endif
+
+int main(int, char**)
+{
+  static_assert(sizeof(void*) == 8, "");
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/cccl/os.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/cccl/os.compile.pass.cpp
new file mode 100644
index 00000000000..9bc788526c9
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/cccl/os.compile.pass.cpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/std/__cccl/os.h>
+
+#if !defined(__CUDACC_RTC__)
+#  if _CCCL_OS(WINDOWS)
+#    include <windows.h>
+#  endif
+
+#  if _CCCL_OS(LINUX)
+#    include <unistd.h>
+#  endif
+
+#  if _CCCL_OS(ANDROID)
+#    include <android/api-level.h>
+#  endif
+
+#  if _CCCL_OS(QNX)
+#    include <qnx.h>
+#  endif
+#endif
+
+int main(int, char**)
+{
+  static_assert(_CCCL_OS(WINDOWS) + _CCCL_OS(LINUX) == 1, "");
+#if _CCCL_OS(ANDROID) || _CCCL_OS(QNX)
+  static_assert(_CCCL_OS(LINUX) == 1, "");
+  static_assert(_CCCL_OS(ANDROID) + _CCCL_OS(QNX) == 1, "");
+#endif
+#if _CCCL_OS(LINUX)
+  static_assert(_CCCL_OS(WINDOWS) == 0, "");
+#endif
+#if _CCCL_OS(WINDOWS)
+  static_assert(_CCCL_OS(ANDROID) + _CCCL_OS(QNX) + _CCCL_OS(LINUX) == 0, "");
+#endif
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp
index a2650b5bc68..3171d716ed0 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp
@@ -51,25 +51,25 @@ static_assert(equality_comparable<int S::*>, "");
 static_assert(equality_comparable<int (S::*)()>, "");
 static_assert(equality_comparable<int (S::*)() noexcept>, "");
 static_assert(equality_comparable<int (S::*)() &>, "");
-static_assert(equality_comparable<int (S::*)() & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() & noexcept >, "");
 static_assert(equality_comparable<int (S::*)() &&>, "");
 static_assert(equality_comparable < int(S::*)() && noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const>, "");
 static_assert(equality_comparable<int (S::*)() const noexcept>, "");
 static_assert(equality_comparable<int (S::*)() const&>, "");
-static_assert(equality_comparable<int (S::*)() const & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() const& noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const&&>, "");
 static_assert(equality_comparable < int(S::*)() const&& noexcept >, "");
 static_assert(equality_comparable<int (S::*)() volatile>, "");
 static_assert(equality_comparable<int (S::*)() volatile noexcept>, "");
 static_assert(equality_comparable<int (S::*)() volatile&>, "");
-static_assert(equality_comparable<int (S::*)() volatile & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() volatile & noexcept >, "");
 static_assert(equality_comparable<int (S::*)() volatile&&>, "");
 static_assert(equality_comparable < int(S::*)() volatile && noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const volatile>, "");
 static_assert(equality_comparable<int (S::*)() const volatile noexcept>, "");
 static_assert(equality_comparable<int (S::*)() const volatile&>, "");
-static_assert(equality_comparable<int (S::*)() const volatile & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() const volatile& noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const volatile&&>, "");
 static_assert(equality_comparable < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp
index f2299543519..83561c30db7 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp
@@ -79,13 +79,13 @@ static_assert(!check_equality_comparable_with<int, int (S::*)() volatile & noexc
 static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int*, int*>(), "");
 static_assert(check_equality_comparable_with<int*, int[5]>(), "");
@@ -108,13 +108,13 @@ static_assert(!check_equality_comparable_with<int*, int (S::*)() volatile & noex
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int[5], int[5]>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (*)()>(), "");
@@ -136,13 +136,13 @@ static_assert(!check_equality_comparable_with<int[5], int (S::*)() volatile & no
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (*)(), int (*)()>(), "");
 static_assert(check_equality_comparable_with<int (*)(), int (&)()>(), "");
@@ -163,13 +163,13 @@ static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() volatile &
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile && noexcept>(), "");
 
 #ifdef INVESTIGATE_COMPILER_BUG
 static_assert(check_equality_comparable_with<int (&)(), int (&)()>(), "");
@@ -191,13 +191,13 @@ static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() volatile &
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)(), int (S::*)()>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -218,13 +218,13 @@ static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() volatil
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const>(), "");
@@ -242,13 +242,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)(
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const, int (S::*)() const>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -267,13 +267,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() v
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() volatile>(), "");
@@ -290,14 +290,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile, int (S::*)() volatile>(), "");
@@ -315,13 +314,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)(
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() volatile noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile>(), "");
@@ -337,17 +336,14 @@ static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, in
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -363,14 +359,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(
@@ -388,20 +383,17 @@ static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexce
 static_assert(
   !check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept,
-              int (S::*)() const&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const && noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile&&>(),
               "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() &, int (S::*)() &>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -414,13 +406,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() volat
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const&>(), "");
@@ -430,13 +422,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const&, int (S::*)() const&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -447,13 +439,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)()
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() volatile&>(), "");
@@ -462,14 +454,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() volatile&>(), "");
@@ -479,30 +470,28 @@ static_assert(check_equality_comparable_with<int (S::*)() volatile&, int (S::*)(
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(
   !check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept,
-              int (S::*)() volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() volatile && noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const volatile&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -510,98 +499,87 @@ static_assert(check_equality_comparable_with<int (S::*)() const volatile&, int (
               "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(
   check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const && noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile&&>(),
               "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile && noexcept>(),
+  "");
 
 static_assert(check_equality_comparable_with<int (S::*)() &&, int (S::*)() &&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() &&, int (S::*)() && noexcept > (), "");
+static_assert(check_equality_comparable_with<int (S::*)() &&, int (S::*)() && noexcept>(), "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &&, int (S::*)() const volatile&& noexcept > (), "");
-
-static_assert(check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() && noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() const&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const volatile && noexcept>(), "");
+
+static_assert(check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const&&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() const&&, int (S::*)() const&& noexcept > (), "");
+static_assert(check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const && noexcept>(), "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&&, int (S::*)() const volatile&& noexcept > (), "");
-
-static_assert(check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const volatile && noexcept>(), "");
+
+static_assert(check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() volatile&&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() volatile&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() volatile && noexcept>(), "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() const volatile && noexcept>(), "");
 
-static_assert(check_equality_comparable_with < int(S::*)() volatile && noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile && noexcept,
-              int (S::*)() const volatile&& > (),
-              "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile && noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(check_equality_comparable_with<int (S::*)() volatile && noexcept, int (S::*)() volatile && noexcept>(),
               "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const volatile&&, int (S::*)() const volatile&&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() const volatile&&,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(check_equality_comparable_with<int (S::*)() const volatile&&, int (S::*)() const volatile && noexcept>(),
               "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() const volatile&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  check_equality_comparable_with<int (S::*)() const volatile && noexcept, int (S::*)() const volatile && noexcept>(),
+  "");
 
 static_assert(!check_equality_comparable_with<nullptr_t, int>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int*>(), "");
@@ -627,13 +605,13 @@ static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() volatile &
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile&>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile & noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() &&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() && noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() && noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const&&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() const&& noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const && noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() volatile&&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() volatile&& noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() volatile && noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile&&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!equality_comparable_with<void, int>, "");
 static_assert(!equality_comparable_with<void, int*>, "");
@@ -651,13 +629,13 @@ static_assert(!equality_comparable_with<void, int (S::*)() volatile noexcept>, "
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile noexcept>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() & noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &&>, "");
 static_assert(!equality_comparable_with < void, int (S::*)() && noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&&>, "");
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp
index a76fb02b07a..a5d7fda4f85 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp
@@ -88,25 +88,25 @@ static_assert(!totally_ordered<int S::*>, "");
 static_assert(!totally_ordered<int (S::*)()>, "");
 static_assert(!totally_ordered<int (S::*)() noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() &>, "");
-static_assert(!totally_ordered<int (S::*)() & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() & noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() &&>, "");
 static_assert(!totally_ordered < int(S::*)() && noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const>, "");
 static_assert(!totally_ordered<int (S::*)() const noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() const&>, "");
-static_assert(!totally_ordered<int (S::*)() const & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() const& noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const&&>, "");
 static_assert(!totally_ordered < int(S::*)() const&& noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() volatile>, "");
 static_assert(!totally_ordered<int (S::*)() volatile noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() volatile&>, "");
-static_assert(!totally_ordered<int (S::*)() volatile & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() volatile & noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() volatile&&>, "");
 static_assert(!totally_ordered < int(S::*)() volatile && noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const volatile>, "");
 static_assert(!totally_ordered<int (S::*)() const volatile noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() const volatile&>, "");
-static_assert(!totally_ordered<int (S::*)() const volatile & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() const volatile& noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const volatile&&>, "");
 static_assert(!totally_ordered < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp
index a67a915346c..493cf05f637 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp
@@ -85,13 +85,13 @@ static_assert(!check_totally_ordered_with<int, int (S::*)() volatile & noexcept>
 static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_totally_ordered_with<int*, int*>(), "");
 static_assert(check_totally_ordered_with<int*, int[5]>(), "");
@@ -114,13 +114,13 @@ static_assert(!check_totally_ordered_with<int*, int (S::*)() volatile & noexcept
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_totally_ordered_with<int[5], int[5]>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (*)()>(), "");
@@ -142,13 +142,13 @@ static_assert(!check_totally_ordered_with<int[5], int (S::*)() volatile & noexce
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_totally_ordered_with<int (*)(), int (*)()>(), "");
 static_assert(check_totally_ordered_with<int (*)(), int (&)()>(), "");
@@ -169,13 +169,13 @@ static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() volatile & noe
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile && noexcept>(), "");
 #ifdef INVESTIGATE_COMPILER_BUG
 static_assert(check_totally_ordered_with<int (&)(), int (&)()>(), "");
 #endif // INVESTIGATE_COMPILER_BUG
@@ -196,13 +196,13 @@ static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() volatile & noe
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)()>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() noexcept>(), "");
@@ -221,13 +221,13 @@ static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() volatile &
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const>(), "");
@@ -245,13 +245,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() vo
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const noexcept>(), "");
@@ -268,13 +268,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() volat
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() volatile>(), "");
@@ -290,13 +290,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile noexcept>(), "");
@@ -311,13 +311,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() vo
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() volatile noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile>(), "");
@@ -332,14 +332,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile>(), "");
@@ -353,13 +352,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile noexcept>(),
               "");
@@ -374,17 +373,15 @@ static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept,
 static_assert(
   !check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept,
-              int (S::*)() volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() volatile && noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() &>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() & noexcept>(), "");
@@ -395,13 +392,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() volatile
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const&>(), "");
@@ -411,13 +408,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)()
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const & noexcept>(), "");
@@ -426,13 +423,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() vola
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() volatile&>(), "");
@@ -440,14 +437,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S:
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() volatile&>(), "");
@@ -455,109 +451,99 @@ static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() v
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(
   !check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const && noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile && noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const volatile && noexcept>(), "");
 
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() && noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&&, int (S::*)() const volatile&& noexcept > (), "");
-
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const volatile && noexcept>(), "");
+
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() const volatile && noexcept>(), "");
 
-static_assert(!check_totally_ordered_with < int(S::*)() volatile && noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile && noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile && noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() volatile && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&&,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&&, int (S::*)() const volatile && noexcept>(),
               "");
+static_assert(
+  !check_totally_ordered_with<int (S::*)() const volatile && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 #if !defined(TEST_COMPILER_GCC) && defined(INVESTIGATE_COMPILER_BUG)
 static_assert(!check_totally_ordered_with<nullptr_t, int>(), "");
@@ -586,13 +572,13 @@ static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() volatile & noe
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!equality_comparable_with<void, int>, "");
 static_assert(!equality_comparable_with<void, int*>, "");
@@ -610,13 +596,13 @@ static_assert(!equality_comparable_with<void, int (S::*)() volatile noexcept>, "
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile noexcept>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() & noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &&>, "");
 static_assert(!equality_comparable_with < void, int (S::*)() && noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&&>, "");
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp
index 6769ea45c40..51314e675ac 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp
@@ -36,25 +36,25 @@ static_assert(copyable<int S::*>, "");
 static_assert(copyable<int (S::*)()>, "");
 static_assert(copyable<int (S::*)() noexcept>, "");
 static_assert(copyable<int (S::*)() &>, "");
-static_assert(copyable<int (S::*)() & noexcept>, "");
+static_assert(copyable < int(S::*)() & noexcept >, "");
 static_assert(copyable<int (S::*)() &&>, "");
 static_assert(copyable < int(S::*)() && noexcept >, "");
 static_assert(copyable<int (S::*)() const>, "");
 static_assert(copyable<int (S::*)() const noexcept>, "");
 static_assert(copyable<int (S::*)() const&>, "");
-static_assert(copyable<int (S::*)() const & noexcept>, "");
+static_assert(copyable < int(S::*)() const& noexcept >, "");
 static_assert(copyable<int (S::*)() const&&>, "");
 static_assert(copyable < int(S::*)() const&& noexcept >, "");
 static_assert(copyable<int (S::*)() volatile>, "");
 static_assert(copyable<int (S::*)() volatile noexcept>, "");
 static_assert(copyable<int (S::*)() volatile&>, "");
-static_assert(copyable<int (S::*)() volatile & noexcept>, "");
+static_assert(copyable < int(S::*)() volatile & noexcept >, "");
 static_assert(copyable<int (S::*)() volatile&&>, "");
 static_assert(copyable < int(S::*)() volatile && noexcept >, "");
 static_assert(copyable<int (S::*)() const volatile>, "");
 static_assert(copyable<int (S::*)() const volatile noexcept>, "");
 static_assert(copyable<int (S::*)() const volatile&>, "");
-static_assert(copyable<int (S::*)() const volatile & noexcept>, "");
+static_assert(copyable < int(S::*)() const volatile& noexcept >, "");
 static_assert(copyable<int (S::*)() const volatile&&>, "");
 static_assert(copyable < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp
index 52cd49b311d..68006e6d465 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp
@@ -37,25 +37,25 @@ static_assert(movable<int S::*>, "");
 static_assert(movable<int (S::*)()>, "");
 static_assert(movable<int (S::*)() noexcept>, "");
 static_assert(movable<int (S::*)() &>, "");
-static_assert(movable<int (S::*)() & noexcept>, "");
+static_assert(movable < int(S::*)() & noexcept >, "");
 static_assert(movable<int (S::*)() &&>, "");
 static_assert(movable < int(S::*)() && noexcept >, "");
 static_assert(movable<int (S::*)() const>, "");
 static_assert(movable<int (S::*)() const noexcept>, "");
 static_assert(movable<int (S::*)() const&>, "");
-static_assert(movable<int (S::*)() const & noexcept>, "");
+static_assert(movable < int(S::*)() const& noexcept >, "");
 static_assert(movable<int (S::*)() const&&>, "");
 static_assert(movable < int(S::*)() const&& noexcept >, "");
 static_assert(movable<int (S::*)() volatile>, "");
 static_assert(movable<int (S::*)() volatile noexcept>, "");
 static_assert(movable<int (S::*)() volatile&>, "");
-static_assert(movable<int (S::*)() volatile & noexcept>, "");
+static_assert(movable < int(S::*)() volatile & noexcept >, "");
 static_assert(movable<int (S::*)() volatile&&>, "");
 static_assert(movable < int(S::*)() volatile && noexcept >, "");
 static_assert(movable<int (S::*)() const volatile>, "");
 static_assert(movable<int (S::*)() const volatile noexcept>, "");
 static_assert(movable<int (S::*)() const volatile&>, "");
-static_assert(movable<int (S::*)() const volatile & noexcept>, "");
+static_assert(movable < int(S::*)() const volatile& noexcept >, "");
 static_assert(movable<int (S::*)() const volatile&&>, "");
 static_assert(movable < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp
index cf8a4608289..d476b3cbf29 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp
@@ -40,25 +40,25 @@ static_assert(regular<int S::*>, "");
 static_assert(regular<int (S::*)()>, "");
 static_assert(regular<int (S::*)() noexcept>, "");
 static_assert(regular<int (S::*)() &>, "");
-static_assert(regular<int (S::*)() & noexcept>, "");
+static_assert(regular < int(S::*)() & noexcept >, "");
 static_assert(regular<int (S::*)() &&>, "");
 static_assert(regular < int(S::*)() && noexcept >, "");
 static_assert(regular<int (S::*)() const>, "");
 static_assert(regular<int (S::*)() const noexcept>, "");
 static_assert(regular<int (S::*)() const&>, "");
-static_assert(regular<int (S::*)() const & noexcept>, "");
+static_assert(regular < int(S::*)() const& noexcept >, "");
 static_assert(regular<int (S::*)() const&&>, "");
 static_assert(regular < int(S::*)() const&& noexcept >, "");
 static_assert(regular<int (S::*)() volatile>, "");
 static_assert(regular<int (S::*)() volatile noexcept>, "");
 static_assert(regular<int (S::*)() volatile&>, "");
-static_assert(regular<int (S::*)() volatile & noexcept>, "");
+static_assert(regular < int(S::*)() volatile & noexcept >, "");
 static_assert(regular<int (S::*)() volatile&&>, "");
 static_assert(regular < int(S::*)() volatile && noexcept >, "");
 static_assert(regular<int (S::*)() const volatile>, "");
 static_assert(regular<int (S::*)() const volatile noexcept>, "");
 static_assert(regular<int (S::*)() const volatile&>, "");
-static_assert(regular<int (S::*)() const volatile & noexcept>, "");
+static_assert(regular < int(S::*)() const volatile& noexcept >, "");
 static_assert(regular<int (S::*)() const volatile&&>, "");
 static_assert(regular < int(S::*)() const volatile&& noexcept >, "");
 
@@ -69,25 +69,25 @@ static_assert(regular<int U::*>, "");
 static_assert(regular<int (U::*)()>, "");
 static_assert(regular<int (U::*)() noexcept>, "");
 static_assert(regular<int (U::*)() &>, "");
-static_assert(regular<int (U::*)() & noexcept>, "");
+static_assert(regular < int(U::*)() & noexcept >, "");
 static_assert(regular<int (U::*)() &&>, "");
 static_assert(regular < int(U::*)() && noexcept >, "");
 static_assert(regular<int (U::*)() const>, "");
 static_assert(regular<int (U::*)() const noexcept>, "");
 static_assert(regular<int (U::*)() const&>, "");
-static_assert(regular<int (U::*)() const & noexcept>, "");
+static_assert(regular < int(U::*)() const& noexcept >, "");
 static_assert(regular<int (U::*)() const&&>, "");
 static_assert(regular < int(U::*)() const&& noexcept >, "");
 static_assert(regular<int (U::*)() volatile>, "");
 static_assert(regular<int (U::*)() volatile noexcept>, "");
 static_assert(regular<int (U::*)() volatile&>, "");
-static_assert(regular<int (U::*)() volatile & noexcept>, "");
+static_assert(regular < int(U::*)() volatile & noexcept >, "");
 static_assert(regular<int (U::*)() volatile&&>, "");
 static_assert(regular < int(U::*)() volatile && noexcept >, "");
 static_assert(regular<int (U::*)() const volatile>, "");
 static_assert(regular<int (U::*)() const volatile noexcept>, "");
 static_assert(regular<int (U::*)() const volatile&>, "");
-static_assert(regular<int (U::*)() const volatile & noexcept>, "");
+static_assert(regular < int(U::*)() const volatile& noexcept >, "");
 static_assert(regular<int (U::*)() const volatile&&>, "");
 static_assert(regular < int(U::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp
index 44e7b55b803..b2087fd4a75 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp
@@ -36,25 +36,25 @@ static_assert(semiregular<int S::*>, "");
 static_assert(semiregular<int (S::*)()>, "");
 static_assert(semiregular<int (S::*)() noexcept>, "");
 static_assert(semiregular<int (S::*)() &>, "");
-static_assert(semiregular<int (S::*)() & noexcept>, "");
+static_assert(semiregular < int(S::*)() & noexcept >, "");
 static_assert(semiregular<int (S::*)() &&>, "");
 static_assert(semiregular < int(S::*)() && noexcept >, "");
 static_assert(semiregular<int (S::*)() const>, "");
 static_assert(semiregular<int (S::*)() const noexcept>, "");
 static_assert(semiregular<int (S::*)() const&>, "");
-static_assert(semiregular<int (S::*)() const & noexcept>, "");
+static_assert(semiregular < int(S::*)() const& noexcept >, "");
 static_assert(semiregular<int (S::*)() const&&>, "");
 static_assert(semiregular < int(S::*)() const&& noexcept >, "");
 static_assert(semiregular<int (S::*)() volatile>, "");
 static_assert(semiregular<int (S::*)() volatile noexcept>, "");
 static_assert(semiregular<int (S::*)() volatile&>, "");
-static_assert(semiregular<int (S::*)() volatile & noexcept>, "");
+static_assert(semiregular < int(S::*)() volatile & noexcept >, "");
 static_assert(semiregular<int (S::*)() volatile&&>, "");
 static_assert(semiregular < int(S::*)() volatile && noexcept >, "");
 static_assert(semiregular<int (S::*)() const volatile>, "");
 static_assert(semiregular<int (S::*)() const volatile noexcept>, "");
 static_assert(semiregular<int (S::*)() const volatile&>, "");
-static_assert(semiregular<int (S::*)() const volatile & noexcept>, "");
+static_assert(semiregular < int(S::*)() const volatile& noexcept >, "");
 static_assert(semiregular<int (S::*)() const volatile&&>, "");
 static_assert(semiregular < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp
index eb4ffd3998d..e5b6aaf298e 100644
--- a/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp
@@ -94,7 +94,7 @@ __host__ __device__ constexpr bool tests()
 #if defined(TEST_COMPILER_NVRTC) && defined(TEST_COMPILER_MSVC)
   // Test C99 compound literal.
   {
-    auto arr = cuda::std::to_array((int[]){3, 4});
+    auto arr = cuda::std::to_array((int[]) {3, 4});
     ASSERT_SAME_TYPE(decltype(arr), cuda::std::array<int, 2>);
     assert(arr[0] == 3);
     assert(arr[1] == 4);
diff --git a/libcudacxx/test/libcudacxx/std/containers/sequences/array/iterators.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/sequences/array/iterators.pass.cpp
index ee800a4da6c..576a5f90457 100644
--- a/libcudacxx/test/libcudacxx/std/containers/sequences/array/iterators.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/sequences/array/iterators.pass.cpp
@@ -78,7 +78,7 @@ __host__ __device__
     check_noexcept(array);
     typename C::iterator i       = array.begin();
     typename C::const_iterator j = array.cbegin();
-#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC) // seems there are different nullptr's
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) // seems there are different nullptr's
     assert(i == j);
 #else // ^^^ !TEST_COMPILER_CUDACC_BELOW_11_3 ^^^ / vvv TEST_COMPILER_CUDACC_BELOW_11_3 vvv
     assert(i == nullptr);
@@ -147,8 +147,7 @@ __host__ __device__
       assert(ii1 == ii4);
       static_assert(cuda::std::is_same_v<decltype(ii1), int*>, "");
       static_assert(cuda::std::is_same_v<decltype(cii), const int*>, "");
-#  if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC) // old NVCC has issues comparing int*
-                                                                               // with const int*
+#  if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) // old NVCC has issues comparing int* with const int*
       assert(ii1 == cii);
 #  else // ^^^ !TEST_COMPILER_CUDACC_BELOW_11_3 ^^^ / vvv TEST_COMPILER_CUDACC_BELOW_11_3 vvv
       assert(ii1 == nullptr);
@@ -156,8 +155,7 @@ __host__ __device__
 #  endif // TEST_COMPILER_CUDACC_BELOW_11_3
 
       assert(!(ii1 != ii2));
-#  if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC) // old NVCC has issues comparing int*
-                                                                               // with const int*
+#  if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) // old NVCC has issues comparing int* with const int*
       assert(!(ii1 != cii));
 #  endif // TEST_COMPILER_CUDACC_BELOW_11_3
 
@@ -265,11 +263,9 @@ __host__ __device__
 int main(int, char**)
 {
   tests();
-#ifndef TEST_COMPILER_ICC
-#  if TEST_STD_VER >= 2014 && defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) \
-    && (!defined(TEST_COMPILER_GCC) || __GNUC__ > 8)
+#if TEST_STD_VER >= 2014 && defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) \
+  && (!defined(TEST_COMPILER_GCC) || __GNUC__ > 8)
   static_assert(tests(), "");
-#  endif // TEST_STD_VER >= 2014 && defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED)
-#endif // TEST_COMPILER_ICC
+#endif // TEST_STD_VER >= 2014 && defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED)
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/containers/views/views.span/enable_borrowed_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/views.span/enable_borrowed_range.compile.pass.cpp
index 93f9165ae59..81b36ef1d43 100644
--- a/libcudacxx/test/libcudacxx/std/containers/views/views.span/enable_borrowed_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/views/views.span/enable_borrowed_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // <cuda/std/span>
@@ -22,11 +22,11 @@
 
 int main(int, char**)
 {
-  static_assert(cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 0>>);
-  static_assert(cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 42>>);
-  static_assert(cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, cuda::std::dynamic_extent>>);
-  static_assert(!cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 42>&>);
-  static_assert(!cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 42> const>);
+  static_assert(cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 0>>, "");
+  static_assert(cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 42>>, "");
+  static_assert(cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, cuda::std::dynamic_extent>>, "");
+  static_assert(!cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 42>&>, "");
+  static_assert(!cuda::std::ranges::enable_borrowed_range<cuda::std::span<int, 42> const>, "");
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/containers/views/views.span/range_concept_conformance.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/views.span/range_concept_conformance.compile.pass.cpp
index d0959bf08c3..08ba0d47aaa 100644
--- a/libcudacxx/test/libcudacxx/std/containers/views/views.span/range_concept_conformance.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/views/views.span/range_concept_conformance.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // span
@@ -18,23 +18,23 @@
 
 using range = cuda::std::span<int>;
 
-static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<range>, range::iterator>);
-static_assert(cuda::std::ranges::common_range<range>);
-static_assert(cuda::std::ranges::random_access_range<range>);
-static_assert(cuda::std::ranges::contiguous_range<range>);
-static_assert(cuda::std::ranges::view<range> && cuda::std::ranges::enable_view<range>);
-static_assert(cuda::std::ranges::sized_range<range>);
-static_assert(cuda::std::ranges::borrowed_range<range>);
-static_assert(cuda::std::ranges::viewable_range<range>);
+static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<range>, range::iterator>, "");
+static_assert(cuda::std::ranges::common_range<range>, "");
+static_assert(cuda::std::ranges::random_access_range<range>, "");
+static_assert(cuda::std::ranges::contiguous_range<range>, "");
+static_assert(cuda::std::ranges::view<range> && cuda::std::ranges::enable_view<range>, "");
+static_assert(cuda::std::ranges::sized_range<range>, "");
+static_assert(cuda::std::ranges::borrowed_range<range>, "");
+static_assert(cuda::std::ranges::viewable_range<range>, "");
 
-static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<range const>, range::iterator>);
-static_assert(cuda::std::ranges::common_range<range const>);
-static_assert(cuda::std::ranges::random_access_range<range const>);
-static_assert(cuda::std::ranges::contiguous_range<range const>);
-static_assert(!cuda::std::ranges::view<range const> && !cuda::std::ranges::enable_view<range const>);
-static_assert(cuda::std::ranges::sized_range<range const>);
-static_assert(cuda::std::ranges::borrowed_range<range const>);
-static_assert(cuda::std::ranges::viewable_range<range const>);
+static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<range const>, range::iterator>, "");
+static_assert(cuda::std::ranges::common_range<range const>, "");
+static_assert(cuda::std::ranges::random_access_range<range const>, "");
+static_assert(cuda::std::ranges::contiguous_range<range const>, "");
+static_assert(!cuda::std::ranges::view<range const> && !cuda::std::ranges::enable_view<range const>, "");
+static_assert(cuda::std::ranges::sized_range<range const>, "");
+static_assert(cuda::std::ranges::borrowed_range<range const>, "");
+static_assert(cuda::std::ranges::viewable_range<range const>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/deduct.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/deduct.pass.cpp
index 6f3434b5013..c6fe12b759b 100644
--- a/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/deduct.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/deduct.pass.cpp
@@ -52,7 +52,6 @@ __host__ __device__ void test_iterator_sentinel()
     assert(s.data() == cuda::std::data(arr));
   }
 
-#if defined(_CCCL_SPAN_USES_RANGES)
   // P3029R1: deduction from `integral_constant`
   {
     cuda::std::span s{cuda::std::begin(arr), cuda::std::integral_constant<size_t, 3>{}};
@@ -60,7 +59,6 @@ __host__ __device__ void test_iterator_sentinel()
     assert(s.size() == cuda::std::size(arr));
     assert(s.data() == cuda::std::data(arr));
   }
-#endif // _CCCL_SPAN_USES_RANGES
 }
 
 __host__ __device__ void test_c_array()
diff --git a/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/initializer_list.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/initializer_list.pass.cpp
index d84d0b01115..3f5990de3e8 100644
--- a/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/initializer_list.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/views/views.span/span.cons/initializer_list.pass.cpp
@@ -28,8 +28,8 @@ using cuda::std::is_constructible;
 // Constructor constrains
 static_assert(is_constructible<cuda::std::span<const int>, cuda::std::initializer_list<int>>::value, "");
 static_assert(is_constructible<cuda::std::span<const int, 42>, cuda::std::initializer_list<int>>::value, "");
-static_assert(!is_constructible<cuda::std::span<const int>, cuda::std::initializer_list<const int>>::value, "");
-static_assert(!is_constructible<cuda::std::span<const int, 42>, cuda::std::initializer_list<const int>>::value, "");
+static_assert(is_constructible<cuda::std::span<const int>, cuda::std::initializer_list<const int>>::value, "");
+static_assert(is_constructible<cuda::std::span<const int, 42>, cuda::std::initializer_list<const int>>::value, "");
 
 static_assert(!is_constructible<cuda::std::span<int>, cuda::std::initializer_list<int>>::value, "");
 static_assert(!is_constructible<cuda::std::span<int, 42>, cuda::std::initializer_list<int>>::value, "");
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp
index 87bb2900fc8..1ce2ee9a691 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // struct iterator_traits;
@@ -26,32 +26,33 @@
 #include "test_macros.h"
 
 template <class Traits, class = void>
-inline constexpr bool has_iterator_concept_v = false;
+_CCCL_INLINE_VAR constexpr bool has_iterator_concept_v = false;
 
 template <class Traits>
-inline constexpr bool has_iterator_concept_v<Traits, cuda::std::void_t<typename Traits::iterator_concept>> = true;
+_CCCL_INLINE_VAR constexpr bool has_iterator_concept_v<Traits, cuda::std::void_t<typename Traits::iterator_concept>> =
+  true;
 
 template <class It, class Traits, cuda::std::enable_if_t<cuda::std::is_pointer_v<It>, int> = 0>
 __host__ __device__ constexpr void test_iter_concept()
 {
-  static_assert(cuda::std::same_as<typename Traits::iterator_concept, cuda::std::contiguous_iterator_tag>);
+  static_assert(cuda::std::same_as<typename Traits::iterator_concept, cuda::std::contiguous_iterator_tag>, "");
 }
 
 template <class It, class Traits, cuda::std::enable_if_t<!cuda::std::is_pointer_v<It>, int> = 0>
 __host__ __device__ constexpr void test_iter_concept()
 {
-  static_assert(!has_iterator_concept_v<Traits>);
+  static_assert(!has_iterator_concept_v<Traits>, "");
 }
 
 template <class Iter, class Category, class ValueType, class DiffType, class RefType, class PtrType>
 __host__ __device__ constexpr bool test()
 {
   using Traits = cuda::std::iterator_traits<Iter>;
-  static_assert(cuda::std::same_as<typename Traits::iterator_category, Category>);
-  static_assert(cuda::std::same_as<typename Traits::value_type, ValueType>);
-  static_assert(cuda::std::same_as<typename Traits::difference_type, DiffType>);
-  static_assert(cuda::std::same_as<typename Traits::reference, RefType>);
-  static_assert(cuda::std::same_as<typename Traits::pointer, PtrType>);
+  static_assert(cuda::std::same_as<typename Traits::iterator_category, Category>, "");
+  static_assert(cuda::std::same_as<typename Traits::value_type, ValueType>, "");
+  static_assert(cuda::std::same_as<typename Traits::difference_type, DiffType>, "");
+  static_assert(cuda::std::same_as<typename Traits::reference, RefType>, "");
+  static_assert(cuda::std::same_as<typename Traits::pointer, PtrType>, "");
 
   test_iter_concept<Iter, Traits>();
 
@@ -82,8 +83,8 @@ __host__ __device__ constexpr bool testMutable()
 // exists for any particular non-pointer type, we assume it is present
 // only for pointers.
 //
-static_assert(testMutable<cuda::std::array<int, 10>::iterator, cuda::std::random_access_iterator_tag, int>());
-static_assert(testConst<cuda::std::array<int, 10>::const_iterator, cuda::std::random_access_iterator_tag, int>());
+static_assert(testMutable<cuda::std::array<int, 10>::iterator, cuda::std::random_access_iterator_tag, int>(), "");
+static_assert(testConst<cuda::std::array<int, 10>::const_iterator, cuda::std::random_access_iterator_tag, int>(), "");
 
 // Local test iterators.
 
@@ -101,12 +102,12 @@ struct AllMembers
   {};
 };
 using AllMembersTraits = cuda::std::iterator_traits<AllMembers>;
-static_assert(cuda::std::same_as<AllMembersTraits::iterator_category, AllMembers::iterator_category>);
-static_assert(cuda::std::same_as<AllMembersTraits::value_type, AllMembers::value_type>);
-static_assert(cuda::std::same_as<AllMembersTraits::difference_type, AllMembers::difference_type>);
-static_assert(cuda::std::same_as<AllMembersTraits::reference, AllMembers::reference>);
-static_assert(cuda::std::same_as<AllMembersTraits::pointer, AllMembers::pointer>);
-static_assert(!has_iterator_concept_v<AllMembersTraits>);
+static_assert(cuda::std::same_as<AllMembersTraits::iterator_category, AllMembers::iterator_category>, "");
+static_assert(cuda::std::same_as<AllMembersTraits::value_type, AllMembers::value_type>, "");
+static_assert(cuda::std::same_as<AllMembersTraits::difference_type, AllMembers::difference_type>, "");
+static_assert(cuda::std::same_as<AllMembersTraits::reference, AllMembers::reference>, "");
+static_assert(cuda::std::same_as<AllMembersTraits::pointer, AllMembers::pointer>, "");
+static_assert(!has_iterator_concept_v<AllMembersTraits>, "");
 
 struct NoPointerMember
 {
@@ -122,12 +123,12 @@ struct NoPointerMember
   __host__ __device__ value_type* operator->() const;
 };
 using NoPointerMemberTraits = cuda::std::iterator_traits<NoPointerMember>;
-static_assert(cuda::std::same_as<NoPointerMemberTraits::iterator_category, NoPointerMember::iterator_category>);
-static_assert(cuda::std::same_as<NoPointerMemberTraits::value_type, NoPointerMember::value_type>);
-static_assert(cuda::std::same_as<NoPointerMemberTraits::difference_type, NoPointerMember::difference_type>);
-static_assert(cuda::std::same_as<NoPointerMemberTraits::reference, NoPointerMember::reference>);
-static_assert(cuda::std::same_as<NoPointerMemberTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<NoPointerMemberTraits>);
+static_assert(cuda::std::same_as<NoPointerMemberTraits::iterator_category, NoPointerMember::iterator_category>, "");
+static_assert(cuda::std::same_as<NoPointerMemberTraits::value_type, NoPointerMember::value_type>, "");
+static_assert(cuda::std::same_as<NoPointerMemberTraits::difference_type, NoPointerMember::difference_type>, "");
+static_assert(cuda::std::same_as<NoPointerMemberTraits::reference, NoPointerMember::reference>, "");
+static_assert(cuda::std::same_as<NoPointerMemberTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<NoPointerMemberTraits>, "");
 
 struct IterConcept
 {
@@ -146,12 +147,12 @@ struct IterConcept
   {};
 };
 using IterConceptTraits = cuda::std::iterator_traits<IterConcept>;
-static_assert(cuda::std::same_as<IterConceptTraits::iterator_category, IterConcept::iterator_category>);
-static_assert(cuda::std::same_as<IterConceptTraits::value_type, IterConcept::value_type>);
-static_assert(cuda::std::same_as<IterConceptTraits::difference_type, IterConcept::difference_type>);
-static_assert(cuda::std::same_as<IterConceptTraits::reference, IterConcept::reference>);
-static_assert(cuda::std::same_as<IterConceptTraits::pointer, IterConcept::pointer>);
-static_assert(!has_iterator_concept_v<IterConceptTraits>);
+static_assert(cuda::std::same_as<IterConceptTraits::iterator_category, IterConcept::iterator_category>, "");
+static_assert(cuda::std::same_as<IterConceptTraits::value_type, IterConcept::value_type>, "");
+static_assert(cuda::std::same_as<IterConceptTraits::difference_type, IterConcept::difference_type>, "");
+static_assert(cuda::std::same_as<IterConceptTraits::reference, IterConcept::reference>, "");
+static_assert(cuda::std::same_as<IterConceptTraits::pointer, IterConcept::pointer>, "");
+static_assert(!has_iterator_concept_v<IterConceptTraits>, "");
 
 struct LegacyInput
 {
@@ -178,12 +179,12 @@ struct cuda::std::incrementable_traits<LegacyInput>
   using difference_type = short;
 };
 using LegacyInputTraits = cuda::std::iterator_traits<LegacyInput>;
-static_assert(cuda::std::same_as<LegacyInputTraits::iterator_category, LegacyInput::iterator_category>);
-static_assert(cuda::std::same_as<LegacyInputTraits::value_type, LegacyInput::value_type>);
-static_assert(cuda::std::same_as<LegacyInputTraits::difference_type, short>);
-static_assert(cuda::std::same_as<LegacyInputTraits::reference, LegacyInput::reference>);
-static_assert(cuda::std::same_as<LegacyInputTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyInputTraits>);
+static_assert(cuda::std::same_as<LegacyInputTraits::iterator_category, LegacyInput::iterator_category>, "");
+static_assert(cuda::std::same_as<LegacyInputTraits::value_type, LegacyInput::value_type>, "");
+static_assert(cuda::std::same_as<LegacyInputTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<LegacyInputTraits::reference, LegacyInput::reference>, "");
+static_assert(cuda::std::same_as<LegacyInputTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyInputTraits>, "");
 
 struct LegacyInputNoValueType
 {
@@ -209,12 +210,12 @@ struct cuda::std::indirectly_readable_traits<LegacyInputNoValueType>
   using value_type = LegacyInputNoValueType::not_value_type;
 };
 using LegacyInputNoValueTypeTraits = cuda::std::iterator_traits<LegacyInputNoValueType>;
-static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::iterator_category, cuda::std::input_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::value_type, LegacyInputNoValueType::not_value_type>);
-static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::difference_type, int>);
-static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::reference, LegacyInputNoValueType::reference>);
-static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyInputNoValueTypeTraits>);
+static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::iterator_category, cuda::std::input_iterator_tag>, "");
+static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::value_type, LegacyInputNoValueType::not_value_type>, "");
+static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::difference_type, int>, "");
+static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::reference, LegacyInputNoValueType::reference>, "");
+static_assert(cuda::std::same_as<LegacyInputNoValueTypeTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyInputNoValueTypeTraits>, "");
 
 struct LegacyForward
 {
@@ -240,12 +241,12 @@ struct cuda::std::incrementable_traits<LegacyForward>
   using difference_type = short; // or any signed integral type
 };
 using LegacyForwardTraits = cuda::std::iterator_traits<LegacyForward>;
-static_assert(cuda::std::same_as<LegacyForwardTraits::iterator_category, cuda::std::forward_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyForwardTraits::value_type, LegacyForward::not_value_type>);
-static_assert(cuda::std::same_as<LegacyForwardTraits::difference_type, short>);
-static_assert(cuda::std::same_as<LegacyForwardTraits::reference, const LegacyForward::not_value_type&>);
-static_assert(cuda::std::same_as<LegacyForwardTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyForwardTraits>);
+static_assert(cuda::std::same_as<LegacyForwardTraits::iterator_category, cuda::std::forward_iterator_tag>, "");
+static_assert(cuda::std::same_as<LegacyForwardTraits::value_type, LegacyForward::not_value_type>, "");
+static_assert(cuda::std::same_as<LegacyForwardTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<LegacyForwardTraits::reference, const LegacyForward::not_value_type&>, "");
+static_assert(cuda::std::same_as<LegacyForwardTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyForwardTraits>, "");
 
 struct LegacyBidirectional
 {
@@ -264,12 +265,13 @@ struct LegacyBidirectional
   __host__ __device__ friend short operator-(LegacyBidirectional, LegacyBidirectional);
 };
 using LegacyBidirectionalTraits = cuda::std::iterator_traits<LegacyBidirectional>;
-static_assert(cuda::std::same_as<LegacyBidirectionalTraits::iterator_category, cuda::std::bidirectional_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyBidirectionalTraits::value_type, LegacyBidirectional::value_type>);
-static_assert(cuda::std::same_as<LegacyBidirectionalTraits::difference_type, short>);
-static_assert(cuda::std::same_as<LegacyBidirectionalTraits::reference, const LegacyBidirectional::value_type&>);
-static_assert(cuda::std::same_as<LegacyBidirectionalTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyBidirectionalTraits>);
+static_assert(cuda::std::same_as<LegacyBidirectionalTraits::iterator_category, cuda::std::bidirectional_iterator_tag>,
+              "");
+static_assert(cuda::std::same_as<LegacyBidirectionalTraits::value_type, LegacyBidirectional::value_type>, "");
+static_assert(cuda::std::same_as<LegacyBidirectionalTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<LegacyBidirectionalTraits::reference, const LegacyBidirectional::value_type&>, "");
+static_assert(cuda::std::same_as<LegacyBidirectionalTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyBidirectionalTraits>, "");
 
 // Almost a random access iterator except it is missing operator-(It, It).
 struct MinusNotDeclaredIter
@@ -326,12 +328,13 @@ struct cuda::std::incrementable_traits<MinusNotDeclaredIter>
   using difference_type = short;
 };
 using MinusNotDeclaredIterTraits = cuda::std::iterator_traits<MinusNotDeclaredIter>;
-static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::iterator_category, cuda::std::bidirectional_iterator_tag>);
-static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::value_type, MinusNotDeclaredIter::value_type>);
-static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::difference_type, short>);
-static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::reference, const MinusNotDeclaredIter::value_type&>);
-static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<MinusNotDeclaredIterTraits>);
+static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::iterator_category, cuda::std::bidirectional_iterator_tag>,
+              "");
+static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::value_type, MinusNotDeclaredIter::value_type>, "");
+static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::reference, const MinusNotDeclaredIter::value_type&>, "");
+static_assert(cuda::std::same_as<MinusNotDeclaredIterTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<MinusNotDeclaredIterTraits>, "");
 
 struct WrongSubscriptReturnType
 {
@@ -383,12 +386,12 @@ struct WrongSubscriptReturnType
 };
 using WrongSubscriptReturnTypeTraits = cuda::std::iterator_traits<WrongSubscriptReturnType>;
 static_assert(
-  cuda::std::same_as<WrongSubscriptReturnTypeTraits::iterator_category, cuda::std::bidirectional_iterator_tag>);
-static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::value_type, WrongSubscriptReturnType::value_type>);
-static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::difference_type, short>);
-static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::reference, WrongSubscriptReturnType::value_type&>);
-static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<WrongSubscriptReturnTypeTraits>);
+  cuda::std::same_as<WrongSubscriptReturnTypeTraits::iterator_category, cuda::std::bidirectional_iterator_tag>, "");
+static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::value_type, WrongSubscriptReturnType::value_type>, "");
+static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::reference, WrongSubscriptReturnType::value_type&>, "");
+static_assert(cuda::std::same_as<WrongSubscriptReturnTypeTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<WrongSubscriptReturnTypeTraits>, "");
 
 struct LegacyRandomAccess
 {
@@ -417,12 +420,13 @@ struct LegacyRandomAccess
   __host__ __device__ friend LegacyRandomAccess operator+(int, LegacyRandomAccess);
 };
 using LegacyRandomAccessTraits = cuda::std::iterator_traits<LegacyRandomAccess>;
-static_assert(cuda::std::same_as<LegacyRandomAccessTraits::iterator_category, cuda::std::random_access_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyRandomAccessTraits::value_type, LegacyRandomAccess::value_type>);
-static_assert(cuda::std::same_as<LegacyRandomAccessTraits::difference_type, short>);
-static_assert(cuda::std::same_as<LegacyRandomAccessTraits::reference, const LegacyRandomAccess::value_type&>);
-static_assert(cuda::std::same_as<LegacyRandomAccessTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyRandomAccessTraits>);
+static_assert(cuda::std::same_as<LegacyRandomAccessTraits::iterator_category, cuda::std::random_access_iterator_tag>,
+              "");
+static_assert(cuda::std::same_as<LegacyRandomAccessTraits::value_type, LegacyRandomAccess::value_type>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessTraits::reference, const LegacyRandomAccess::value_type&>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyRandomAccessTraits>, "");
 
 struct LegacyRandomAccessSpaceship
 {
@@ -493,14 +497,14 @@ struct cuda::std::incrementable_traits<LegacyRandomAccessSpaceship>
 };
 using LegacyRandomAccessSpaceshipTraits = cuda::std::iterator_traits<LegacyRandomAccessSpaceship>;
 static_assert(
-  cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::iterator_category, cuda::std::random_access_iterator_tag>);
+  cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::iterator_category, cuda::std::random_access_iterator_tag>, "");
 static_assert(
-  cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::value_type, LegacyRandomAccessSpaceship::not_value_type>);
-static_assert(cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::difference_type, short>);
+  cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::value_type, LegacyRandomAccessSpaceship::not_value_type>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::difference_type, short>, "");
 static_assert(
-  cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::reference, LegacyRandomAccessSpaceship::not_value_type&>);
-static_assert(cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyRandomAccessSpaceshipTraits>);
+  cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::reference, LegacyRandomAccessSpaceship::not_value_type&>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessSpaceshipTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyRandomAccessSpaceshipTraits>, "");
 
 // For output iterators, value_type, difference_type, and reference may be void.
 struct BareLegacyOutput
@@ -512,12 +516,12 @@ struct BareLegacyOutput
   __host__ __device__ BareLegacyOutput operator++(int);
 };
 using BareLegacyOutputTraits = cuda::std::iterator_traits<BareLegacyOutput>;
-static_assert(cuda::std::same_as<BareLegacyOutputTraits::iterator_category, cuda::std::output_iterator_tag>);
-static_assert(cuda::std::same_as<BareLegacyOutputTraits::value_type, void>);
-static_assert(cuda::std::same_as<BareLegacyOutputTraits::difference_type, void>);
-static_assert(cuda::std::same_as<BareLegacyOutputTraits::reference, void>);
-static_assert(cuda::std::same_as<BareLegacyOutputTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<BareLegacyOutputTraits>);
+static_assert(cuda::std::same_as<BareLegacyOutputTraits::iterator_category, cuda::std::output_iterator_tag>, "");
+static_assert(cuda::std::same_as<BareLegacyOutputTraits::value_type, void>, "");
+static_assert(cuda::std::same_as<BareLegacyOutputTraits::difference_type, void>, "");
+static_assert(cuda::std::same_as<BareLegacyOutputTraits::reference, void>, "");
+static_assert(cuda::std::same_as<BareLegacyOutputTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<BareLegacyOutputTraits>, "");
 
 // The operator- means we get difference_type.
 struct LegacyOutputWithMinus
@@ -531,12 +535,12 @@ struct LegacyOutputWithMinus
   // Lacking operator==, this is a LegacyIterator but not a LegacyInputIterator.
 };
 using LegacyOutputWithMinusTraits = cuda::std::iterator_traits<LegacyOutputWithMinus>;
-static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::iterator_category, cuda::std::output_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::value_type, void>);
-static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::difference_type, short>);
-static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::reference, void>);
-static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyOutputWithMinusTraits>);
+static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::iterator_category, cuda::std::output_iterator_tag>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::value_type, void>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::reference, void>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMinusTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyOutputWithMinusTraits>, "");
 
 struct LegacyOutputWithMemberTypes
 {
@@ -557,12 +561,13 @@ struct LegacyOutputWithMemberTypes
   // Since (*it) is not convertible to value_type, this is not a LegacyInputIterator.
 };
 using LegacyOutputWithMemberTypesTraits = cuda::std::iterator_traits<LegacyOutputWithMemberTypes>;
-static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::iterator_category, cuda::std::output_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::value_type, void>);
-static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::difference_type, long>);
-static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::reference, void>);
-static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<LegacyOutputWithMemberTypesTraits>);
+static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::iterator_category, cuda::std::output_iterator_tag>,
+              "");
+static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::value_type, void>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::difference_type, long>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::reference, void>, "");
+static_assert(cuda::std::same_as<LegacyOutputWithMemberTypesTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<LegacyOutputWithMemberTypesTraits>, "");
 
 struct LegacyRandomAccessSpecialized
 {
@@ -628,88 +633,89 @@ struct cuda::std::iterator_traits<LegacyRandomAccessSpecialized>
 };
 using LegacyRandomAccessSpecializedTraits = cuda::std::iterator_traits<LegacyRandomAccessSpecialized>;
 static_assert(
-  cuda::std::same_as<LegacyRandomAccessSpecializedTraits::iterator_category, cuda::std::output_iterator_tag>);
-static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::value_type, short>);
-static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::difference_type, short>);
-static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::reference, short&>);
-static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::pointer, short*>);
-static_assert(!has_iterator_concept_v<LegacyRandomAccessSpecializedTraits>);
+  cuda::std::same_as<LegacyRandomAccessSpecializedTraits::iterator_category, cuda::std::output_iterator_tag>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::value_type, short>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::difference_type, short>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::reference, short&>, "");
+static_assert(cuda::std::same_as<LegacyRandomAccessSpecializedTraits::pointer, short*>, "");
+static_assert(!has_iterator_concept_v<LegacyRandomAccessSpecializedTraits>, "");
 
 // Other test iterators.
 
 using InputTestIteratorTraits = cuda::std::iterator_traits<cpp17_input_iterator<int*>>;
-static_assert(cuda::std::same_as<InputTestIteratorTraits::iterator_category, cuda::std::input_iterator_tag>);
-static_assert(cuda::std::same_as<InputTestIteratorTraits::value_type, int>);
-static_assert(cuda::std::same_as<InputTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>);
-static_assert(cuda::std::same_as<InputTestIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<InputTestIteratorTraits::pointer, int*>);
-static_assert(!has_iterator_concept_v<InputTestIteratorTraits>);
+static_assert(cuda::std::same_as<InputTestIteratorTraits::iterator_category, cuda::std::input_iterator_tag>, "");
+static_assert(cuda::std::same_as<InputTestIteratorTraits::value_type, int>, "");
+static_assert(cuda::std::same_as<InputTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>, "");
+static_assert(cuda::std::same_as<InputTestIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<InputTestIteratorTraits::pointer, int*>, "");
+static_assert(!has_iterator_concept_v<InputTestIteratorTraits>, "");
 
 using OutputTestIteratorTraits = cuda::std::iterator_traits<cpp17_output_iterator<int*>>;
-static_assert(cuda::std::same_as<OutputTestIteratorTraits::iterator_category, cuda::std::output_iterator_tag>);
-static_assert(cuda::std::same_as<OutputTestIteratorTraits::value_type, void>);
-static_assert(cuda::std::same_as<OutputTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>);
-static_assert(cuda::std::same_as<OutputTestIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<OutputTestIteratorTraits::pointer, int*>);
-static_assert(!has_iterator_concept_v<OutputTestIteratorTraits>);
+static_assert(cuda::std::same_as<OutputTestIteratorTraits::iterator_category, cuda::std::output_iterator_tag>, "");
+static_assert(cuda::std::same_as<OutputTestIteratorTraits::value_type, void>, "");
+static_assert(cuda::std::same_as<OutputTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>, "");
+static_assert(cuda::std::same_as<OutputTestIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<OutputTestIteratorTraits::pointer, int*>, "");
+static_assert(!has_iterator_concept_v<OutputTestIteratorTraits>, "");
 
 using ForwardTestIteratorTraits = cuda::std::iterator_traits<forward_iterator<int*>>;
-static_assert(cuda::std::same_as<ForwardTestIteratorTraits::iterator_category, cuda::std::forward_iterator_tag>);
-static_assert(cuda::std::same_as<ForwardTestIteratorTraits::value_type, int>);
-static_assert(cuda::std::same_as<ForwardTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>);
-static_assert(cuda::std::same_as<ForwardTestIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<ForwardTestIteratorTraits::pointer, int*>);
-static_assert(!has_iterator_concept_v<ForwardTestIteratorTraits>);
+static_assert(cuda::std::same_as<ForwardTestIteratorTraits::iterator_category, cuda::std::forward_iterator_tag>, "");
+static_assert(cuda::std::same_as<ForwardTestIteratorTraits::value_type, int>, "");
+static_assert(cuda::std::same_as<ForwardTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>, "");
+static_assert(cuda::std::same_as<ForwardTestIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<ForwardTestIteratorTraits::pointer, int*>, "");
+static_assert(!has_iterator_concept_v<ForwardTestIteratorTraits>, "");
 
 using BidirectionalTestIteratorTraits = cuda::std::iterator_traits<bidirectional_iterator<int*>>;
 static_assert(
-  cuda::std::same_as<BidirectionalTestIteratorTraits::iterator_category, cuda::std::bidirectional_iterator_tag>);
-static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::value_type, int>);
-static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>);
-static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::pointer, int*>);
-static_assert(!has_iterator_concept_v<BidirectionalTestIteratorTraits>);
+  cuda::std::same_as<BidirectionalTestIteratorTraits::iterator_category, cuda::std::bidirectional_iterator_tag>, "");
+static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::value_type, int>, "");
+static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>, "");
+static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<BidirectionalTestIteratorTraits::pointer, int*>, "");
+static_assert(!has_iterator_concept_v<BidirectionalTestIteratorTraits>, "");
 
 using RandomAccessTestIteratorTraits = cuda::std::iterator_traits<random_access_iterator<int*>>;
 static_assert(
-  cuda::std::same_as<RandomAccessTestIteratorTraits::iterator_category, cuda::std::random_access_iterator_tag>);
-static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::value_type, int>);
-static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>);
-static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::pointer, int*>);
-static_assert(!has_iterator_concept_v<RandomAccessTestIteratorTraits>);
+  cuda::std::same_as<RandomAccessTestIteratorTraits::iterator_category, cuda::std::random_access_iterator_tag>, "");
+static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::value_type, int>, "");
+static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>, "");
+static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<RandomAccessTestIteratorTraits::pointer, int*>, "");
+static_assert(!has_iterator_concept_v<RandomAccessTestIteratorTraits>, "");
 
 using ContiguousTestIteratorTraits = cuda::std::iterator_traits<contiguous_iterator<int*>>;
-static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::iterator_category, cuda::std::contiguous_iterator_tag>);
-static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::value_type, int>);
-static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>);
-static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::pointer, int*>);
-static_assert(!has_iterator_concept_v<ContiguousTestIteratorTraits>);
+static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::iterator_category, cuda::std::contiguous_iterator_tag>,
+              "");
+static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::value_type, int>, "");
+static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::difference_type, cuda::std::ptrdiff_t>, "");
+static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<ContiguousTestIteratorTraits::pointer, int*>, "");
+static_assert(!has_iterator_concept_v<ContiguousTestIteratorTraits>, "");
 
 using Cpp17BasicIteratorTraits = cuda::std::iterator_traits<iterator_traits_cpp17_iterator>;
-static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::iterator_category, cuda::std::output_iterator_tag>);
-static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::value_type, void>);
-static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::difference_type, void>);
-static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::reference, void>);
-static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<Cpp17BasicIteratorTraits>);
+static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::iterator_category, cuda::std::output_iterator_tag>, "");
+static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::value_type, void>, "");
+static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::difference_type, void>, "");
+static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::reference, void>, "");
+static_assert(cuda::std::same_as<Cpp17BasicIteratorTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<Cpp17BasicIteratorTraits>, "");
 
 using Cpp17InputIteratorTraits = cuda::std::iterator_traits<iterator_traits_cpp17_input_iterator>;
-static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::iterator_category, cuda::std::input_iterator_tag>);
-static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::value_type, long>);
-static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::difference_type, int>);
-static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<Cpp17InputIteratorTraits>);
+static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::iterator_category, cuda::std::input_iterator_tag>, "");
+static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::value_type, long>, "");
+static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::difference_type, int>, "");
+static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<Cpp17InputIteratorTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<Cpp17InputIteratorTraits>, "");
 
 using Cpp17ForwardIteratorTraits = cuda::std::iterator_traits<iterator_traits_cpp17_forward_iterator>;
-static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::iterator_category, cuda::std::forward_iterator_tag>);
-static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::value_type, int>);
-static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::difference_type, int>);
-static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::reference, int&>);
-static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::pointer, void>);
-static_assert(!has_iterator_concept_v<Cpp17ForwardIteratorTraits>);
+static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::iterator_category, cuda::std::forward_iterator_tag>, "");
+static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::value_type, int>, "");
+static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::difference_type, int>, "");
+static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::reference, int&>, "");
+static_assert(cuda::std::same_as<Cpp17ForwardIteratorTraits::pointer, void>, "");
+static_assert(!has_iterator_concept_v<Cpp17ForwardIteratorTraits>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp
index c5f0a019c70..cfb7dfc6c2b 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // using iter_reference_t = decltype(*declval<T&>());
@@ -17,10 +17,10 @@
 
 #include "test_iterators.h"
 
-static_assert(cuda::std::same_as<cuda::std::iter_reference_t<cpp17_input_iterator<int*>>, int&>);
-static_assert(cuda::std::same_as<cuda::std::iter_reference_t<forward_iterator<int*>>, int&>);
-static_assert(cuda::std::same_as<cuda::std::iter_reference_t<bidirectional_iterator<int*>>, int&>);
-static_assert(cuda::std::same_as<cuda::std::iter_reference_t<random_access_iterator<int*>>, int&>);
+static_assert(cuda::std::same_as<cuda::std::iter_reference_t<cpp17_input_iterator<int*>>, int&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_reference_t<forward_iterator<int*>>, int&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_reference_t<bidirectional_iterator<int*>>, int&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_reference_t<random_access_iterator<int*>>, int&>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/std.iterator.tags/contiguous_iterator_tag.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/std.iterator.tags/contiguous_iterator_tag.pass.cpp
index e57be04cd55..c5764895b62 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/std.iterator.tags/contiguous_iterator_tag.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.primitives/std.iterator.tags/contiguous_iterator_tag.pass.cpp
@@ -11,7 +11,7 @@
 
 // struct contiguous_iterator_tag : public random_access_iterator_tag {};
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 #include <cuda/std/iterator>
 #include <cuda/std/type_traits>
@@ -23,8 +23,9 @@ int main(int, char**)
   cuda::std::contiguous_iterator_tag tag;
   ((void) tag); // Prevent unused warning
   static_assert(
-    (cuda::std::is_base_of<cuda::std::random_access_iterator_tag, cuda::std::contiguous_iterator_tag>::value));
-  static_assert((!cuda::std::is_base_of<cuda::std::output_iterator_tag, cuda::std::contiguous_iterator_tag>::value));
+    (cuda::std::is_base_of<cuda::std::random_access_iterator_tag, cuda::std::contiguous_iterator_tag>::value), "");
+  static_assert((!cuda::std::is_base_of<cuda::std::output_iterator_tag, cuda::std::contiguous_iterator_tag>::value),
+                "");
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.compile.pass.cpp
index 8f1a6b608e6..3f89e8b84fc 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In, class Out>
 // concept indirectly_copyable;
@@ -29,43 +29,43 @@ struct CopyOnly
 };
 
 // Can copy the underlying objects between pointers.
-static_assert(cuda::std::indirectly_copyable<int*, int*>);
-static_assert(cuda::std::indirectly_copyable<const int*, int*>);
+static_assert(cuda::std::indirectly_copyable<int*, int*>, "");
+static_assert(cuda::std::indirectly_copyable<const int*, int*>, "");
 
 // Can't copy if the output pointer is const.
-static_assert(!cuda::std::indirectly_copyable<int*, const int*>);
-static_assert(!cuda::std::indirectly_copyable<const int*, const int*>);
+static_assert(!cuda::std::indirectly_copyable<int*, const int*>, "");
+static_assert(!cuda::std::indirectly_copyable<const int*, const int*>, "");
 
 // Can copy from a pointer into an array but arrays aren't considered indirectly copyable-from.
-static_assert(cuda::std::indirectly_copyable<int*, int[2]>);
-static_assert(!cuda::std::indirectly_copyable<int[2], int*>);
-static_assert(!cuda::std::indirectly_copyable<int[2], int[2]>);
-static_assert(!cuda::std::indirectly_copyable<int (&)[2], int (&)[2]>);
+static_assert(cuda::std::indirectly_copyable<int*, int[2]>, "");
+static_assert(!cuda::std::indirectly_copyable<int[2], int*>, "");
+static_assert(!cuda::std::indirectly_copyable<int[2], int[2]>, "");
+static_assert(!cuda::std::indirectly_copyable<int (&)[2], int (&)[2]>, "");
 
 // Can't copy between non-pointer types.
-static_assert(!cuda::std::indirectly_copyable<int*, int>);
-static_assert(!cuda::std::indirectly_copyable<int, int*>);
-static_assert(!cuda::std::indirectly_copyable<int, int>);
+static_assert(!cuda::std::indirectly_copyable<int*, int>, "");
+static_assert(!cuda::std::indirectly_copyable<int, int*>, "");
+static_assert(!cuda::std::indirectly_copyable<int, int>, "");
 
 // Check some less common types.
-static_assert(!cuda::std::indirectly_movable<void*, void*>);
-static_assert(!cuda::std::indirectly_movable<int*, void*>);
-static_assert(!cuda::std::indirectly_movable<int(), int()>);
-static_assert(!cuda::std::indirectly_movable<int*, int()>);
-static_assert(!cuda::std::indirectly_movable<void, void>);
+static_assert(!cuda::std::indirectly_movable<void*, void*>, "");
+static_assert(!cuda::std::indirectly_movable<int*, void*>, "");
+static_assert(!cuda::std::indirectly_movable<int(), int()>, "");
+static_assert(!cuda::std::indirectly_movable<int*, int()>, "");
+static_assert(!cuda::std::indirectly_movable<void, void>, "");
 
 // Can't copy move-only objects.
-static_assert(!cuda::std::indirectly_copyable<MoveOnly*, MoveOnly*>);
-static_assert(!cuda::std::indirectly_copyable<MoveOnly*, const MoveOnly*>);
-static_assert(!cuda::std::indirectly_copyable<const MoveOnly*, MoveOnly*>);
-static_assert(!cuda::std::indirectly_copyable<const MoveOnly*, const MoveOnly*>);
+static_assert(!cuda::std::indirectly_copyable<MoveOnly*, MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable<MoveOnly*, const MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable<const MoveOnly*, MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable<const MoveOnly*, const MoveOnly*>, "");
 
 // Can copy copy-only objects.
 #ifndef TEST_COMPILER_MSVC_2017 // MSVC2017 has issues determining common_reference
-static_assert(cuda::std::indirectly_copyable<CopyOnly*, CopyOnly*>);
-static_assert(!cuda::std::indirectly_copyable<CopyOnly*, const CopyOnly*>);
-static_assert(cuda::std::indirectly_copyable<const CopyOnly*, CopyOnly*>);
-static_assert(!cuda::std::indirectly_copyable<const CopyOnly*, const CopyOnly*>);
+static_assert(cuda::std::indirectly_copyable<CopyOnly*, CopyOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable<CopyOnly*, const CopyOnly*>, "");
+static_assert(cuda::std::indirectly_copyable<const CopyOnly*, CopyOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable<const CopyOnly*, const CopyOnly*>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 template <class T>
@@ -77,13 +77,13 @@ struct PointerTo
 
 #ifndef TEST_COMPILER_MSVC_2017 // MSVC2017 has issues determining common_reference
 // Can copy through a dereferenceable class.
-static_assert(cuda::std::indirectly_copyable<int*, PointerTo<int>>);
-static_assert(!cuda::std::indirectly_copyable<int*, PointerTo<const int>>);
-static_assert(cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<int>>);
-static_assert(!cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<const int>>);
-static_assert(cuda::std::indirectly_copyable<CopyOnly*, PointerTo<CopyOnly>>);
-static_assert(cuda::std::indirectly_copyable<PointerTo<CopyOnly>, CopyOnly*>);
-static_assert(cuda::std::indirectly_copyable<PointerTo<CopyOnly>, PointerTo<CopyOnly>>);
+static_assert(cuda::std::indirectly_copyable<int*, PointerTo<int>>, "");
+static_assert(!cuda::std::indirectly_copyable<int*, PointerTo<const int>>, "");
+static_assert(cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<int>>, "");
+static_assert(!cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<const int>>, "");
+static_assert(cuda::std::indirectly_copyable<CopyOnly*, PointerTo<CopyOnly>>, "");
+static_assert(cuda::std::indirectly_copyable<PointerTo<CopyOnly>, CopyOnly*>, "");
+static_assert(cuda::std::indirectly_copyable<PointerTo<CopyOnly>, PointerTo<CopyOnly>>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.subsumption.compile.pass.cpp
index 3f862c463e4..dc97f45d014 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable.subsumption.compile.pass.cpp
@@ -29,7 +29,7 @@ __host__ __device__ constexpr bool indirectly_copyable_subsumption()
   return true;
 }
 
-static_assert(indirectly_copyable_subsumption<int*, int*>());
+static_assert(indirectly_copyable_subsumption<int*, int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.compile.pass.cpp
index dc63264f480..33d6ed0477f 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In, class Out>
 // concept indirectly_copyable_storable;
@@ -35,17 +35,17 @@ struct PointerTo
 
 // Copying the underlying object between pointers (or dereferenceable classes) works. This is a non-exhaustive check
 // because this functionality comes from `indirectly_copyable`.
-static_assert(cuda::std::indirectly_copyable_storable<int*, int*>);
-static_assert(cuda::std::indirectly_copyable_storable<const int*, int*>);
-static_assert(!cuda::std::indirectly_copyable_storable<int*, const int*>);
-static_assert(!cuda::std::indirectly_copyable_storable<const int*, const int*>);
-static_assert(cuda::std::indirectly_copyable_storable<int*, int[2]>);
-static_assert(!cuda::std::indirectly_copyable_storable<int[2], int*>);
-static_assert(!cuda::std::indirectly_copyable_storable<MoveOnly*, MoveOnly*>);
-static_assert(!cuda::std::indirectly_copyable_storable<PointerTo<MoveOnly>, PointerTo<MoveOnly>>);
+static_assert(cuda::std::indirectly_copyable_storable<int*, int*>, "");
+static_assert(cuda::std::indirectly_copyable_storable<const int*, int*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<int*, const int*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<const int*, const int*>, "");
+static_assert(cuda::std::indirectly_copyable_storable<int*, int[2]>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<int[2], int*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<MoveOnly*, MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<PointerTo<MoveOnly>, PointerTo<MoveOnly>>, "");
 // `indirectly_copyable_storable` requires the type to be `copyable`, which in turns requires it to be `movable`.
-static_assert(!cuda::std::indirectly_copyable_storable<CopyOnly*, CopyOnly*>);
-static_assert(!cuda::std::indirectly_copyable_storable<PointerTo<CopyOnly>, PointerTo<CopyOnly>>);
+static_assert(!cuda::std::indirectly_copyable_storable<CopyOnly*, CopyOnly*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<PointerTo<CopyOnly>, PointerTo<CopyOnly>>, "");
 
 // The dereference operator returns a different type from `value_type` and the reference type cannot be assigned from a
 // non-const lvalue of `ValueType` (but all other forms of assignment from `ValueType` work).
@@ -70,12 +70,14 @@ struct NoLvalueAssignment
   __host__ __device__ ReferenceType& operator*() const;
 };
 
-static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, cuda::std::iter_reference_t<NoLvalueAssignment>>);
-static_assert(!cuda::std::indirectly_writable<NoLvalueAssignment, cuda::std::iter_value_t<NoLvalueAssignment>&>);
-static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, const cuda::std::iter_value_t<NoLvalueAssignment>&>);
-static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, cuda::std::iter_value_t<NoLvalueAssignment>&&>);
-static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, const cuda::std::iter_value_t<NoLvalueAssignment>&&>);
-static_assert(!cuda::std::indirectly_copyable_storable<NoLvalueAssignment, NoLvalueAssignment>);
+static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, cuda::std::iter_reference_t<NoLvalueAssignment>>, "");
+static_assert(!cuda::std::indirectly_writable<NoLvalueAssignment, cuda::std::iter_value_t<NoLvalueAssignment>&>, "");
+static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, const cuda::std::iter_value_t<NoLvalueAssignment>&>,
+              "");
+static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, cuda::std::iter_value_t<NoLvalueAssignment>&&>, "");
+static_assert(cuda::std::indirectly_writable<NoLvalueAssignment, const cuda::std::iter_value_t<NoLvalueAssignment>&&>,
+              "");
+static_assert(!cuda::std::indirectly_copyable_storable<NoLvalueAssignment, NoLvalueAssignment>, "");
 
 // The dereference operator returns a different type from `value_type` and the reference type cannot be assigned from a
 // const lvalue of `ValueType` (but all other forms of assignment from `ValueType` work).
@@ -101,16 +103,18 @@ struct NoConstLvalueAssignment
 };
 
 static_assert(
-  cuda::std::indirectly_writable<NoConstLvalueAssignment, cuda::std::iter_reference_t<NoConstLvalueAssignment>>);
+  cuda::std::indirectly_writable<NoConstLvalueAssignment, cuda::std::iter_reference_t<NoConstLvalueAssignment>>, "");
 static_assert(
-  cuda::std::indirectly_writable<NoConstLvalueAssignment, cuda::std::iter_value_t<NoConstLvalueAssignment>&>);
+  cuda::std::indirectly_writable<NoConstLvalueAssignment, cuda::std::iter_value_t<NoConstLvalueAssignment>&>, "");
 static_assert(
-  !cuda::std::indirectly_writable<NoConstLvalueAssignment, const cuda::std::iter_value_t<NoConstLvalueAssignment>&>);
+  !cuda::std::indirectly_writable<NoConstLvalueAssignment, const cuda::std::iter_value_t<NoConstLvalueAssignment>&>,
+  "");
 static_assert(
-  cuda::std::indirectly_writable<NoConstLvalueAssignment, cuda::std::iter_value_t<NoConstLvalueAssignment>&&>);
+  cuda::std::indirectly_writable<NoConstLvalueAssignment, cuda::std::iter_value_t<NoConstLvalueAssignment>&&>, "");
 static_assert(
-  cuda::std::indirectly_writable<NoConstLvalueAssignment, const cuda::std::iter_value_t<NoConstLvalueAssignment>&&>);
-static_assert(!cuda::std::indirectly_copyable_storable<NoConstLvalueAssignment, NoConstLvalueAssignment>);
+  cuda::std::indirectly_writable<NoConstLvalueAssignment, const cuda::std::iter_value_t<NoConstLvalueAssignment>&&>,
+  "");
+static_assert(!cuda::std::indirectly_copyable_storable<NoConstLvalueAssignment, NoConstLvalueAssignment>, "");
 
 // The dereference operator returns a different type from `value_type` and the reference type cannot be assigned from a
 // non-const rvalue of `ValueType` (but all other forms of assignment from `ValueType` work).
@@ -135,12 +139,14 @@ struct NoRvalueAssignment
   __host__ __device__ ReferenceType& operator*() const;
 };
 
-static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, cuda::std::iter_reference_t<NoRvalueAssignment>>);
-static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, cuda::std::iter_value_t<NoRvalueAssignment>&>);
-static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, const cuda::std::iter_value_t<NoRvalueAssignment>&>);
-static_assert(!cuda::std::indirectly_writable<NoRvalueAssignment, cuda::std::iter_value_t<NoRvalueAssignment>&&>);
-static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, const cuda::std::iter_value_t<NoRvalueAssignment>&&>);
-static_assert(!cuda::std::indirectly_copyable_storable<NoRvalueAssignment, NoRvalueAssignment>);
+static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, cuda::std::iter_reference_t<NoRvalueAssignment>>, "");
+static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, cuda::std::iter_value_t<NoRvalueAssignment>&>, "");
+static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, const cuda::std::iter_value_t<NoRvalueAssignment>&>,
+              "");
+static_assert(!cuda::std::indirectly_writable<NoRvalueAssignment, cuda::std::iter_value_t<NoRvalueAssignment>&&>, "");
+static_assert(cuda::std::indirectly_writable<NoRvalueAssignment, const cuda::std::iter_value_t<NoRvalueAssignment>&&>,
+              "");
+static_assert(!cuda::std::indirectly_copyable_storable<NoRvalueAssignment, NoRvalueAssignment>, "");
 
 // The dereference operator returns a different type from `value_type` and the reference type cannot be assigned from a
 // const rvalue of `ValueType` (but all other forms of assignment from `ValueType` work).
@@ -166,16 +172,17 @@ struct NoConstRvalueAssignment
 };
 
 static_assert(
-  cuda::std::indirectly_writable<NoConstRvalueAssignment, cuda::std::iter_reference_t<NoConstRvalueAssignment>>);
+  cuda::std::indirectly_writable<NoConstRvalueAssignment, cuda::std::iter_reference_t<NoConstRvalueAssignment>>, "");
 static_assert(
-  cuda::std::indirectly_writable<NoConstRvalueAssignment, cuda::std::iter_value_t<NoConstRvalueAssignment>&>);
+  cuda::std::indirectly_writable<NoConstRvalueAssignment, cuda::std::iter_value_t<NoConstRvalueAssignment>&>, "");
 static_assert(
-  cuda::std::indirectly_writable<NoConstRvalueAssignment, const cuda::std::iter_value_t<NoConstRvalueAssignment>&>);
+  cuda::std::indirectly_writable<NoConstRvalueAssignment, const cuda::std::iter_value_t<NoConstRvalueAssignment>&>, "");
 static_assert(
-  cuda::std::indirectly_writable<NoConstRvalueAssignment, cuda::std::iter_value_t<NoConstRvalueAssignment>&&>);
+  cuda::std::indirectly_writable<NoConstRvalueAssignment, cuda::std::iter_value_t<NoConstRvalueAssignment>&&>, "");
 static_assert(
-  !cuda::std::indirectly_writable<NoConstRvalueAssignment, const cuda::std::iter_value_t<NoConstRvalueAssignment>&&>);
-static_assert(!cuda::std::indirectly_copyable_storable<NoConstRvalueAssignment, NoConstRvalueAssignment>);
+  !cuda::std::indirectly_writable<NoConstRvalueAssignment, const cuda::std::iter_value_t<NoConstRvalueAssignment>&&>,
+  "");
+static_assert(!cuda::std::indirectly_copyable_storable<NoConstRvalueAssignment, NoConstRvalueAssignment>, "");
 
 struct DeletedCopyCtor
 {
@@ -191,7 +198,7 @@ struct DeletedNonconstCopyCtor
   DeletedNonconstCopyCtor(DeletedNonconstCopyCtor&)                  = delete;
   DeletedNonconstCopyCtor& operator=(DeletedNonconstCopyCtor const&) = default;
 };
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedNonconstCopyCtor*, DeletedNonconstCopyCtor*>);
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedNonconstCopyCtor*, DeletedNonconstCopyCtor*>, "");
 #endif // TEST_STD_VER > 2017 || !defined(TEST_COMPILER_MSVC)
 
 struct DeletedMoveCtor
@@ -208,7 +215,7 @@ struct DeletedConstMoveCtor
   DeletedConstMoveCtor(DeletedConstMoveCtor const&&)      = delete;
   DeletedConstMoveCtor& operator=(DeletedConstMoveCtor&&) = default;
 };
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedConstMoveCtor*, DeletedConstMoveCtor*>);
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedConstMoveCtor*, DeletedConstMoveCtor*>, "");
 #endif // TEST_STD_VER > 2017 || !defined(TEST_COMPILER_MSVC)
 
 struct DeletedCopyAssignment
@@ -225,7 +232,8 @@ struct DeletedNonconstCopyAssignment
   DeletedNonconstCopyAssignment& operator=(DeletedNonconstCopyAssignment const&) = default;
   DeletedNonconstCopyAssignment& operator=(DeletedNonconstCopyAssignment&)       = delete;
 };
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedNonconstCopyAssignment*, DeletedNonconstCopyAssignment*>);
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedNonconstCopyAssignment*, DeletedNonconstCopyAssignment*>,
+              "");
 #endif // TEST_STD_VER > 2017 || !defined(TEST_COMPILER_MSVC)
 
 struct DeletedMoveAssignment
@@ -240,11 +248,11 @@ struct DeletedConstMoveAssignment
   DeletedConstMoveAssignment& operator=(DeletedConstMoveAssignment&&) = delete;
 };
 
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedCopyCtor*, DeletedCopyCtor*>);
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedMoveCtor*, DeletedMoveCtor*>);
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedCopyAssignment*, DeletedCopyAssignment*>);
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedMoveAssignment*, DeletedMoveAssignment*>);
-static_assert(!cuda::std::indirectly_copyable_storable<DeletedConstMoveAssignment*, DeletedConstMoveAssignment*>);
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedCopyCtor*, DeletedCopyCtor*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedMoveCtor*, DeletedMoveCtor*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedCopyAssignment*, DeletedCopyAssignment*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedMoveAssignment*, DeletedMoveAssignment*>, "");
+static_assert(!cuda::std::indirectly_copyable_storable<DeletedConstMoveAssignment*, DeletedConstMoveAssignment*>, "");
 
 struct InconsistentIterator
 {
@@ -267,7 +275,7 @@ struct InconsistentIterator
 
 // `ValueType` can be constructed with a `ReferenceType` and assigned to a `ReferenceType`, so it does model
 // `indirectly_copyable_storable`.
-static_assert(cuda::std::indirectly_copyable_storable<InconsistentIterator, InconsistentIterator>);
+static_assert(cuda::std::indirectly_copyable_storable<InconsistentIterator, InconsistentIterator>, "");
 
 struct CommonType
 {};
@@ -292,22 +300,27 @@ struct NotConstructibleFromRefIn
   __host__ __device__ ReferenceType& operator*() const;
 };
 
+namespace cuda
+{
+namespace std
+{
 template <template <class> class X, template <class> class Y>
-struct cuda::std::
-  basic_common_reference<NotConstructibleFromRefIn::ValueType, NotConstructibleFromRefIn::ReferenceType, X, Y>
+struct basic_common_reference<NotConstructibleFromRefIn::ValueType, NotConstructibleFromRefIn::ReferenceType, X, Y>
 {
   using type = CommonType&;
 };
 
 template <template <class> class X, template <class> class Y>
-struct cuda::std::
-  basic_common_reference<NotConstructibleFromRefIn::ReferenceType, NotConstructibleFromRefIn::ValueType, X, Y>
+struct basic_common_reference<NotConstructibleFromRefIn::ReferenceType, NotConstructibleFromRefIn::ValueType, X, Y>
 {
   using type = CommonType&;
 };
+} // namespace std
+} // namespace cuda
 
 static_assert(
-  cuda::std::common_reference_with<NotConstructibleFromRefIn::ValueType&, NotConstructibleFromRefIn::ReferenceType&>);
+  cuda::std::common_reference_with<NotConstructibleFromRefIn::ValueType&, NotConstructibleFromRefIn::ReferenceType&>,
+  "");
 
 struct AssignableFromAnything
 {
@@ -317,7 +330,7 @@ struct AssignableFromAnything
 
 // A type that can't be constructed from its own reference isn't `indirectly_copyable_storable`, even when assigning it
 // to a type that can be assigned from anything.
-static_assert(!cuda::std::indirectly_copyable_storable<NotConstructibleFromRefIn, AssignableFromAnything*>);
+static_assert(!cuda::std::indirectly_copyable_storable<NotConstructibleFromRefIn, AssignableFromAnything*>, "");
 
 // ReferenceType is a (proxy) reference for ValueType, but ValueType is not assignable from ReferenceType.
 struct NotAssignableFromRefIn
@@ -340,24 +353,30 @@ struct NotAssignableFromRefIn
   __host__ __device__ ReferenceType& operator*() const;
 };
 
+namespace cuda
+{
+namespace std
+{
 template <template <class> class X, template <class> class Y>
-struct cuda::std::basic_common_reference<NotAssignableFromRefIn::ValueType, NotAssignableFromRefIn::ReferenceType, X, Y>
+struct basic_common_reference<NotAssignableFromRefIn::ValueType, NotAssignableFromRefIn::ReferenceType, X, Y>
 {
   using type = CommonType&;
 };
 
 template <template <class> class X, template <class> class Y>
-struct cuda::std::basic_common_reference<NotAssignableFromRefIn::ReferenceType, NotAssignableFromRefIn::ValueType, X, Y>
+struct basic_common_reference<NotAssignableFromRefIn::ReferenceType, NotAssignableFromRefIn::ValueType, X, Y>
 {
   using type = CommonType&;
 };
+} // namespace std
+} // namespace cuda
 
 static_assert(
-  cuda::std::common_reference_with<NotAssignableFromRefIn::ValueType&, NotAssignableFromRefIn::ReferenceType&>);
+  cuda::std::common_reference_with<NotAssignableFromRefIn::ValueType&, NotAssignableFromRefIn::ReferenceType&>, "");
 
 // A type that can't be assigned from its own reference isn't `indirectly_copyable_storable`, even when assigning it
 // to a type that can be assigned from anything.
-static_assert(!cuda::std::indirectly_copyable_storable<NotAssignableFromRefIn, AssignableFromAnything*>);
+static_assert(!cuda::std::indirectly_copyable_storable<NotAssignableFromRefIn, AssignableFromAnything*>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.subsumption.compile.pass.cpp
index d61bb3fd768..ee9982e018e 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.copy/indirectly_copyable_storable.subsumption.compile.pass.cpp
@@ -29,7 +29,7 @@ __host__ __device__ constexpr bool indirectly_copyable_storable_subsumption()
 }
 
 #ifndef __NVCOMPILER // nvbug 3885350
-static_assert(indirectly_copyable_storable_subsumption<int*, int*>());
+static_assert(indirectly_copyable_storable_subsumption<int*, int*>(), "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.compile.pass.cpp
index 6fa7d019a8d..93abbf6d635 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In, class Out>
 // concept indirectly_movable;
@@ -18,35 +18,35 @@
 #include "test_macros.h"
 
 // Can move between pointers.
-static_assert(cuda::std::indirectly_movable<int*, int*>);
-static_assert(cuda::std::indirectly_movable<const int*, int*>);
-static_assert(!cuda::std::indirectly_movable<int*, const int*>);
-static_assert(cuda::std::indirectly_movable<const int*, int*>);
+static_assert(cuda::std::indirectly_movable<int*, int*>, "");
+static_assert(cuda::std::indirectly_movable<const int*, int*>, "");
+static_assert(!cuda::std::indirectly_movable<int*, const int*>, "");
+static_assert(cuda::std::indirectly_movable<const int*, int*>, "");
 
 // Can move from a pointer into an array but arrays aren't considered indirectly movable-from.
-static_assert(cuda::std::indirectly_movable<int*, int[2]>);
-static_assert(!cuda::std::indirectly_movable<int[2], int*>);
-static_assert(!cuda::std::indirectly_movable<int[2], int[2]>);
-static_assert(!cuda::std::indirectly_movable<int (&)[2], int (&)[2]>);
+static_assert(cuda::std::indirectly_movable<int*, int[2]>, "");
+static_assert(!cuda::std::indirectly_movable<int[2], int*>, "");
+static_assert(!cuda::std::indirectly_movable<int[2], int[2]>, "");
+static_assert(!cuda::std::indirectly_movable<int (&)[2], int (&)[2]>, "");
 
 // Can't move between non-pointer types.
-static_assert(!cuda::std::indirectly_movable<int*, int>);
-static_assert(!cuda::std::indirectly_movable<int, int*>);
-static_assert(!cuda::std::indirectly_movable<int, int>);
+static_assert(!cuda::std::indirectly_movable<int*, int>, "");
+static_assert(!cuda::std::indirectly_movable<int, int*>, "");
+static_assert(!cuda::std::indirectly_movable<int, int>, "");
 
 // Check some less common types.
-static_assert(!cuda::std::indirectly_movable<void*, void*>);
-static_assert(!cuda::std::indirectly_movable<int*, void*>);
-static_assert(!cuda::std::indirectly_movable<int(), int()>);
-static_assert(!cuda::std::indirectly_movable<int*, int()>);
-static_assert(!cuda::std::indirectly_movable<void, void>);
+static_assert(!cuda::std::indirectly_movable<void*, void*>, "");
+static_assert(!cuda::std::indirectly_movable<int*, void*>, "");
+static_assert(!cuda::std::indirectly_movable<int(), int()>, "");
+static_assert(!cuda::std::indirectly_movable<int*, int()>, "");
+static_assert(!cuda::std::indirectly_movable<void, void>, "");
 
 #ifndef TEST_COMPILER_MSVC_2017 // MSVC2017 has issues determining common_reference
 // Can move move-only objects.
-static_assert(cuda::std::indirectly_movable<MoveOnly*, MoveOnly*>);
-static_assert(!cuda::std::indirectly_movable<MoveOnly*, const MoveOnly*>);
-static_assert(!cuda::std::indirectly_movable<const MoveOnly*, const MoveOnly*>);
-static_assert(!cuda::std::indirectly_movable<const MoveOnly*, MoveOnly*>);
+static_assert(cuda::std::indirectly_movable<MoveOnly*, MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_movable<MoveOnly*, const MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_movable<const MoveOnly*, const MoveOnly*>, "");
+static_assert(!cuda::std::indirectly_movable<const MoveOnly*, MoveOnly*>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 template <class T>
@@ -58,13 +58,13 @@ struct PointerTo
 
 #ifndef TEST_COMPILER_MSVC_2017 // MSVC2017 has issues determining common_reference
 // Can copy through a dereferenceable class.
-static_assert(cuda::std::indirectly_movable<int*, PointerTo<int>>);
-static_assert(!cuda::std::indirectly_movable<int*, PointerTo<const int>>);
-static_assert(cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<int>>);
-static_assert(!cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<const int>>);
-static_assert(cuda::std::indirectly_movable<MoveOnly*, PointerTo<MoveOnly>>);
-static_assert(cuda::std::indirectly_movable<PointerTo<MoveOnly>, MoveOnly*>);
-static_assert(cuda::std::indirectly_movable<PointerTo<MoveOnly>, PointerTo<MoveOnly>>);
+static_assert(cuda::std::indirectly_movable<int*, PointerTo<int>>, "");
+static_assert(!cuda::std::indirectly_movable<int*, PointerTo<const int>>, "");
+static_assert(cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<int>>, "");
+static_assert(!cuda::std::indirectly_copyable<PointerTo<int>, PointerTo<const int>>, "");
+static_assert(cuda::std::indirectly_movable<MoveOnly*, PointerTo<MoveOnly>>, "");
+static_assert(cuda::std::indirectly_movable<PointerTo<MoveOnly>, MoveOnly*>, "");
+static_assert(cuda::std::indirectly_movable<PointerTo<MoveOnly>, PointerTo<MoveOnly>>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.subsumption.compile.pass.cpp
index 81856bf8735..adbc1d91f95 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable.subsumption.compile.pass.cpp
@@ -29,7 +29,7 @@ __host__ __device__ constexpr bool indirectly_movable_subsumption()
   return true;
 }
 
-static_assert(indirectly_movable_subsumption<int*, int*>());
+static_assert(indirectly_movable_subsumption<int*, int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.compile.pass.cpp
index b32380b88be..f5da4b865d5 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In, class Out>
 // concept indirectly_movable_storable;
@@ -26,15 +26,15 @@ struct PointerTo
 
 // Copying the underlying object between pointers (or dereferenceable classes) works. This is a non-exhaustive check
 // because this functionality comes from `indirectly_movable`.
-static_assert(cuda::std::indirectly_movable_storable<int*, int*>);
-static_assert(cuda::std::indirectly_movable_storable<const int*, int*>);
-static_assert(!cuda::std::indirectly_movable_storable<int*, const int*>);
-static_assert(!cuda::std::indirectly_movable_storable<const int*, const int*>);
-static_assert(cuda::std::indirectly_movable_storable<int*, int[2]>);
-static_assert(!cuda::std::indirectly_movable_storable<int[2], int*>);
+static_assert(cuda::std::indirectly_movable_storable<int*, int*>, "");
+static_assert(cuda::std::indirectly_movable_storable<const int*, int*>, "");
+static_assert(!cuda::std::indirectly_movable_storable<int*, const int*>, "");
+static_assert(!cuda::std::indirectly_movable_storable<const int*, const int*>, "");
+static_assert(cuda::std::indirectly_movable_storable<int*, int[2]>, "");
+static_assert(!cuda::std::indirectly_movable_storable<int[2], int*>, "");
 #ifndef TEST_COMPILER_MSVC_2017 // MSVC2017 has issues determining common_reference
-static_assert(cuda::std::indirectly_movable_storable<MoveOnly*, MoveOnly*>);
-static_assert(cuda::std::indirectly_movable_storable<PointerTo<MoveOnly>, PointerTo<MoveOnly>>);
+static_assert(cuda::std::indirectly_movable_storable<MoveOnly*, MoveOnly*>, "");
+static_assert(cuda::std::indirectly_movable_storable<PointerTo<MoveOnly>, PointerTo<MoveOnly>>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 // The dereference operator returns a different type from `value_type` and the reference type cannot be assigned from a
@@ -61,9 +61,9 @@ struct NoAssignment
 
 // The case when `indirectly_writable<iter_rvalue_reference>` but not `indirectly_writable<iter_value>` (you can
 // do `ReferenceType r = ValueType();` but not `r = ValueType();`).
-static_assert(cuda::std::indirectly_writable<NoAssignment, cuda::std::iter_rvalue_reference_t<NoAssignment>>);
-static_assert(!cuda::std::indirectly_writable<NoAssignment, cuda::std::iter_value_t<NoAssignment>>);
-static_assert(!cuda::std::indirectly_movable_storable<NoAssignment, NoAssignment>);
+static_assert(cuda::std::indirectly_writable<NoAssignment, cuda::std::iter_rvalue_reference_t<NoAssignment>>, "");
+static_assert(!cuda::std::indirectly_writable<NoAssignment, cuda::std::iter_value_t<NoAssignment>>, "");
+static_assert(!cuda::std::indirectly_movable_storable<NoAssignment, NoAssignment>, "");
 
 struct DeletedMoveCtor
 {
@@ -77,8 +77,8 @@ struct DeletedMoveAssignment
   DeletedMoveAssignment& operator=(DeletedMoveAssignment&&) = delete;
 };
 
-static_assert(!cuda::std::indirectly_movable_storable<DeletedMoveCtor*, DeletedMoveCtor*>);
-static_assert(!cuda::std::indirectly_movable_storable<DeletedMoveAssignment*, DeletedMoveAssignment*>);
+static_assert(!cuda::std::indirectly_movable_storable<DeletedMoveCtor*, DeletedMoveCtor*>, "");
+static_assert(!cuda::std::indirectly_movable_storable<DeletedMoveAssignment*, DeletedMoveAssignment*>, "");
 
 struct InconsistentIterator
 {
@@ -101,7 +101,7 @@ struct InconsistentIterator
 
 // `ValueType` can be constructed with a `ReferenceType` and assigned to a `ReferenceType`, so it does model
 // `indirectly_movable_storable`.
-static_assert(cuda::std::indirectly_movable_storable<InconsistentIterator, InconsistentIterator>);
+static_assert(cuda::std::indirectly_movable_storable<InconsistentIterator, InconsistentIterator>, "");
 
 // ReferenceType is a (proxy) reference for ValueType, but ValueType is not constructible from ReferenceType.
 struct NotConstructibleFromRefIn
@@ -124,22 +124,27 @@ struct NotConstructibleFromRefIn
   __host__ __device__ ReferenceType& operator*() const;
 };
 
+namespace cuda
+{
+namespace std
+{
 template <template <class> class X, template <class> class Y>
-struct cuda::std::
-  basic_common_reference<NotConstructibleFromRefIn::ValueType, NotConstructibleFromRefIn::ReferenceType, X, Y>
+struct basic_common_reference<NotConstructibleFromRefIn::ValueType, NotConstructibleFromRefIn::ReferenceType, X, Y>
 {
   using type = NotConstructibleFromRefIn::CommonType&;
 };
 
 template <template <class> class X, template <class> class Y>
-struct cuda::std::
-  basic_common_reference<NotConstructibleFromRefIn::ReferenceType, NotConstructibleFromRefIn::ValueType, X, Y>
+struct basic_common_reference<NotConstructibleFromRefIn::ReferenceType, NotConstructibleFromRefIn::ValueType, X, Y>
 {
   using type = NotConstructibleFromRefIn::CommonType&;
 };
+} // namespace std
+} // namespace cuda
 
 static_assert(
-  cuda::std::common_reference_with<NotConstructibleFromRefIn::ValueType&, NotConstructibleFromRefIn::ReferenceType&>);
+  cuda::std::common_reference_with<NotConstructibleFromRefIn::ValueType&, NotConstructibleFromRefIn::ReferenceType&>,
+  "");
 
 struct AssignableFromAnything
 {
@@ -149,8 +154,8 @@ struct AssignableFromAnything
 
 // A type that can't be constructed from its own reference isn't `indirectly_movable_storable`, even when assigning it
 // to a type that can be assigned from anything.
-static_assert(cuda::std::indirectly_movable_storable<int*, AssignableFromAnything*>);
-static_assert(!cuda::std::indirectly_movable_storable<NotConstructibleFromRefIn, AssignableFromAnything*>);
+static_assert(cuda::std::indirectly_movable_storable<int*, AssignableFromAnything*>, "");
+static_assert(!cuda::std::indirectly_movable_storable<NotConstructibleFromRefIn, AssignableFromAnything*>, "");
 
 // ReferenceType is a (proxy) reference for ValueType, but ValueType is not assignable from ReferenceType.
 struct NotAssignableFromRefIn
@@ -174,24 +179,30 @@ struct NotAssignableFromRefIn
   __host__ __device__ ReferenceType& operator*() const;
 };
 
+namespace cuda
+{
+namespace std
+{
 template <template <class> class X, template <class> class Y>
-struct cuda::std::basic_common_reference<NotAssignableFromRefIn::ValueType, NotAssignableFromRefIn::ReferenceType, X, Y>
+struct basic_common_reference<NotAssignableFromRefIn::ValueType, NotAssignableFromRefIn::ReferenceType, X, Y>
 {
   using type = NotAssignableFromRefIn::CommonType&;
 };
 
 template <template <class> class X, template <class> class Y>
-struct cuda::std::basic_common_reference<NotAssignableFromRefIn::ReferenceType, NotAssignableFromRefIn::ValueType, X, Y>
+struct basic_common_reference<NotAssignableFromRefIn::ReferenceType, NotAssignableFromRefIn::ValueType, X, Y>
 {
   using type = NotAssignableFromRefIn::CommonType&;
 };
+} // namespace std
+} // namespace cuda
 
 static_assert(
-  cuda::std::common_reference_with<NotAssignableFromRefIn::ValueType&, NotAssignableFromRefIn::ReferenceType&>);
+  cuda::std::common_reference_with<NotAssignableFromRefIn::ValueType&, NotAssignableFromRefIn::ReferenceType&>, "");
 
 // A type that can't be assigned from its own reference isn't `indirectly_movable_storable`, even when assigning it
 // to a type that can be assigned from anything.
-static_assert(!cuda::std::indirectly_movable_storable<NotAssignableFromRefIn, AssignableFromAnything*>);
+static_assert(!cuda::std::indirectly_movable_storable<NotAssignableFromRefIn, AssignableFromAnything*>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.subsumption.compile.pass.cpp
index 55809ceb0c2..40dfa4fa0c1 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.move/indirectly_movable_storable.subsumption.compile.pass.cpp
@@ -28,7 +28,7 @@ __host__ __device__ constexpr bool indirectly_movable_storable_subsumption()
   return true;
 }
 
-static_assert(indirectly_movable_storable_subsumption<int*, int*>());
+static_assert(indirectly_movable_storable_subsumption<int*, int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp
index 6d19c130bb4..af46eb7595d 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class I1, class I2>
 // concept indirectly_swappable;
@@ -23,8 +23,8 @@ struct PointerTo
   __host__ __device__ T& operator*() const;
 };
 
-static_assert(cuda::std::indirectly_swappable<PointerTo<int>>);
-static_assert(cuda::std::indirectly_swappable<PointerTo<int>, PointerTo<int>>);
+static_assert(cuda::std::indirectly_swappable<PointerTo<int>>, "");
+static_assert(cuda::std::indirectly_swappable<PointerTo<int>, PointerTo<int>>, "");
 
 struct B;
 
@@ -80,12 +80,12 @@ struct G
 };
 
 #if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
-static_assert(cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<B>>);
+static_assert(cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<B>>, "");
 #endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
-static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<C>>);
-static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<D>>);
-static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<E>>);
-static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<G>>);
+static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<C>>, "");
+static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<D>>, "");
+static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<E>>, "");
+static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<G>>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp
index ee0cb762c87..a1cd2cd2c86 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp
@@ -29,7 +29,7 @@ __host__ __device__ constexpr bool indirectly_swappable_subsumption()
   return true;
 }
 
-static_assert(indirectly_swappable_subsumption<int*, int*>());
+static_assert(indirectly_swappable_subsumption<int*, int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.compile.pass.cpp
index 61238c63021..71ccdd9253c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class I1, class I2, class Out,
@@ -25,24 +25,24 @@ using CompInt     = bool (*)(int, int);
 using ProjDefault = cuda::std::identity;
 
 using Input = cpp20_input_iterator<int*>;
-static_assert(cuda::std::input_iterator<Input>);
+static_assert(cuda::std::input_iterator<Input>, "");
 using InputLong = cpp20_input_iterator<long*>;
-static_assert(cuda::std::input_iterator<InputLong>);
+static_assert(cuda::std::input_iterator<InputLong>, "");
 
 using Output = cpp17_output_iterator<int*>;
-static_assert(cuda::std::weakly_incrementable<Output>);
+static_assert(cuda::std::weakly_incrementable<Output>, "");
 
-static_assert(cuda::std::indirectly_copyable<Input, Output>);
-static_assert(cuda::std::indirectly_copyable<InputLong, Output>);
-static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, Input>);
-static_assert(cuda::std::indirect_strict_weak_order<CompInt, Input, Input>);
-static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, InputLong>);
-static_assert(cuda::std::indirect_strict_weak_order<CompInt, Input, InputLong>);
+static_assert(cuda::std::indirectly_copyable<Input, Output>, "");
+static_assert(cuda::std::indirectly_copyable<InputLong, Output>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, Input>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompInt, Input, Input>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, InputLong>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompInt, Input, InputLong>, "");
 
 // All requirements satisfied.
-static_assert(cuda::std::mergeable<Input, Input, Output>);
-static_assert(cuda::std::mergeable<Input, Input, Output, CompInt>);
-static_assert(cuda::std::mergeable<Input, Input, Output, CompInt, ProjDefault>);
+static_assert(cuda::std::mergeable<Input, Input, Output>, "");
+static_assert(cuda::std::mergeable<Input, Input, Output, CompInt>, "");
+static_assert(cuda::std::mergeable<Input, Input, Output, CompInt, ProjDefault>, "");
 
 // Non-default projections.
 struct Foo
@@ -51,17 +51,19 @@ using ProjFooToInt  = int (*)(Foo);
 using ProjFooToLong = long (*)(Foo);
 static_assert(cuda::std::indirect_strict_weak_order<CompDefault,
                                                     cuda::std::projected<Foo*, ProjFooToInt>,
-                                                    cuda::std::projected<Foo*, ProjFooToLong>>);
-static_assert(cuda::std::mergeable<Foo*, Foo*, Foo*, CompDefault, ProjFooToInt, ProjFooToLong>);
+                                                    cuda::std::projected<Foo*, ProjFooToLong>>,
+              "");
+static_assert(cuda::std::mergeable<Foo*, Foo*, Foo*, CompDefault, ProjFooToInt, ProjFooToLong>, "");
 static_assert(cuda::std::indirect_strict_weak_order<CompInt,
                                                     cuda::std::projected<Foo*, ProjFooToInt>,
-                                                    cuda::std::projected<Foo*, ProjFooToLong>>);
-static_assert(cuda::std::mergeable<Foo*, Foo*, Foo*, CompInt, ProjFooToInt, ProjFooToLong>);
+                                                    cuda::std::projected<Foo*, ProjFooToLong>>,
+              "");
+static_assert(cuda::std::mergeable<Foo*, Foo*, Foo*, CompInt, ProjFooToInt, ProjFooToLong>, "");
 
 // I1 or I2 is not an input iterator.
-static_assert(!cuda::std::input_iterator<Output>);
-static_assert(!cuda::std::mergeable<Output, Input, Output>);
-static_assert(!cuda::std::mergeable<Input, Output, Output>);
+static_assert(!cuda::std::input_iterator<Output>, "");
+static_assert(!cuda::std::mergeable<Output, Input, Output>, "");
+static_assert(!cuda::std::mergeable<Input, Output, Output>, "");
 
 // Out is not weakly incrementable.
 struct NotWeaklyIncrementable
@@ -69,10 +71,10 @@ struct NotWeaklyIncrementable
   __host__ __device__ int& operator*() const;
 };
 
-static_assert(!cuda::std::weakly_incrementable<NotWeaklyIncrementable>);
-static_assert(cuda::std::indirectly_copyable<Input, NotWeaklyIncrementable>);
-static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, Input>);
-static_assert(!cuda::std::mergeable<Input, Input, NotWeaklyIncrementable>);
+static_assert(!cuda::std::weakly_incrementable<NotWeaklyIncrementable>, "");
+static_assert(cuda::std::indirectly_copyable<Input, NotWeaklyIncrementable>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, Input>, "");
+static_assert(!cuda::std::mergeable<Input, Input, NotWeaklyIncrementable>, "");
 
 // I1 or I2 is not indirectly copyable into O.
 struct AssignableOnlyFromInt
@@ -82,32 +84,32 @@ struct AssignableOnlyFromInt
   AssignableOnlyFromInt& operator=(T) = delete;
 };
 using OutputOnlyInt = cpp17_output_iterator<AssignableOnlyFromInt*>;
-static_assert(cuda::std::weakly_incrementable<OutputOnlyInt>);
+static_assert(cuda::std::weakly_incrementable<OutputOnlyInt>, "");
 
-static_assert(cuda::std::indirectly_copyable<Input, OutputOnlyInt>);
-static_assert(!cuda::std::indirectly_copyable<InputLong, OutputOnlyInt>);
-static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, InputLong>);
-static_assert(cuda::std::mergeable<Input, Input, OutputOnlyInt>);
-static_assert(!cuda::std::mergeable<Input, InputLong, OutputOnlyInt>);
-static_assert(!cuda::std::mergeable<InputLong, Input, OutputOnlyInt>);
+static_assert(cuda::std::indirectly_copyable<Input, OutputOnlyInt>, "");
+static_assert(!cuda::std::indirectly_copyable<InputLong, OutputOnlyInt>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompDefault, Input, InputLong>, "");
+static_assert(cuda::std::mergeable<Input, Input, OutputOnlyInt>, "");
+static_assert(!cuda::std::mergeable<Input, InputLong, OutputOnlyInt>, "");
+static_assert(!cuda::std::mergeable<InputLong, Input, OutputOnlyInt>, "");
 
 // No indirect strict weak order between I1 and I2 (bad comparison functor).
 using GoodComp = bool (*)(int, int);
-static_assert(cuda::std::indirect_strict_weak_order<GoodComp, Input, Input>);
-static_assert(cuda::std::mergeable<Input, Input, Output, GoodComp>);
+static_assert(cuda::std::indirect_strict_weak_order<GoodComp, Input, Input>, "");
+static_assert(cuda::std::mergeable<Input, Input, Output, GoodComp>, "");
 using BadComp = bool (*)(int*, int*);
-static_assert(!cuda::std::indirect_strict_weak_order<BadComp, Input, Input>);
-static_assert(!cuda::std::mergeable<Input, Input, Output, BadComp>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadComp, Input, Input>, "");
+static_assert(!cuda::std::mergeable<Input, Input, Output, BadComp>, "");
 
 // No indirect strict weak order between I1 and I2 (bad projection).
 using ToInt = int (*)(int);
 using ToPtr = int* (*) (int);
-static_assert(cuda::std::mergeable<Input, Input, Output, GoodComp, cuda::std::identity, cuda::std::identity>);
-static_assert(cuda::std::mergeable<Input, Input, Output, GoodComp, ToInt, ToInt>);
-static_assert(!cuda::std::mergeable<Input, Input, Output, GoodComp, ToPtr, ToInt>);
-static_assert(!cuda::std::mergeable<Input, Input, Output, GoodComp, ToInt, ToPtr>);
-static_assert(!cuda::std::mergeable<Input, Input, Output, bool (*)(int*, int), ToPtr, ToInt>);
-static_assert(!cuda::std::mergeable<Input, Input, Output, bool (*)(int, int*), ToInt, ToPtr>);
+static_assert(cuda::std::mergeable<Input, Input, Output, GoodComp, cuda::std::identity, cuda::std::identity>, "");
+static_assert(cuda::std::mergeable<Input, Input, Output, GoodComp, ToInt, ToInt>, "");
+static_assert(!cuda::std::mergeable<Input, Input, Output, GoodComp, ToPtr, ToInt>, "");
+static_assert(!cuda::std::mergeable<Input, Input, Output, GoodComp, ToInt, ToPtr>, "");
+static_assert(!cuda::std::mergeable<Input, Input, Output, bool (*)(int*, int), ToPtr, ToInt>, "");
+static_assert(!cuda::std::mergeable<Input, Input, Output, bool (*)(int, int*), ToInt, ToPtr>, "");
 
 // A projection that only supports non-const references and has a non-const `operator()` still has to work.
 struct ProjectionOnlyMutable
@@ -115,7 +117,8 @@ struct ProjectionOnlyMutable
   __host__ __device__ int operator()(int&);
   int operator()(int&&) const = delete;
 };
-static_assert(cuda::std::mergeable<Input, Input, Output, CompDefault, ProjectionOnlyMutable, ProjectionOnlyMutable>);
+static_assert(cuda::std::mergeable<Input, Input, Output, CompDefault, ProjectionOnlyMutable, ProjectionOnlyMutable>,
+              "");
 
 // The output is weakly incrementable but not an output iterator.
 struct WeaklyIncrementable
@@ -129,10 +132,10 @@ struct WeaklyIncrementable
   // while `weakly_incrementable` requires only that `i++` be well-formed.
   __host__ __device__ void operator++(int);
 };
-static_assert(cuda::std::weakly_incrementable<WeaklyIncrementable>);
-static_assert(cuda::std::indirectly_copyable<int*, WeaklyIncrementable>);
-static_assert(!cuda::std::output_iterator<WeaklyIncrementable, int>);
-static_assert(cuda::std::mergeable<Input, Input, WeaklyIncrementable>);
+static_assert(cuda::std::weakly_incrementable<WeaklyIncrementable>, "");
+static_assert(cuda::std::indirectly_copyable<int*, WeaklyIncrementable>, "");
+static_assert(!cuda::std::output_iterator<WeaklyIncrementable, int>, "");
+static_assert(cuda::std::mergeable<Input, Input, WeaklyIncrementable>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.subsumption.compile.pass.cpp
index 648554000f6..f56bff601f8 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.mergeable/mergeable.subsumption.compile.pass.cpp
@@ -40,7 +40,7 @@ __host__ __device__ constexpr bool test_subsumption()
   return true;
 }
 
-static_assert(test_subsumption<int*, int*, int*>());
+static_assert(test_subsumption<int*, int*, int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp
index d306ad5402d..3bcbb9f278c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class I>
 //   concept permutable = see below; // Since C++20
@@ -17,16 +17,16 @@
 #include "test_iterators.h"
 
 using AllConstraintsSatisfied = forward_iterator<int*>;
-static_assert(cuda::std::forward_iterator<AllConstraintsSatisfied>);
-static_assert(cuda::std::indirectly_movable_storable<AllConstraintsSatisfied, AllConstraintsSatisfied>);
-static_assert(cuda::std::indirectly_swappable<AllConstraintsSatisfied>);
-static_assert(cuda::std::permutable<AllConstraintsSatisfied>);
+static_assert(cuda::std::forward_iterator<AllConstraintsSatisfied>, "");
+static_assert(cuda::std::indirectly_movable_storable<AllConstraintsSatisfied, AllConstraintsSatisfied>, "");
+static_assert(cuda::std::indirectly_swappable<AllConstraintsSatisfied>, "");
+static_assert(cuda::std::permutable<AllConstraintsSatisfied>, "");
 
 using NotAForwardIterator = cpp20_input_iterator<int*>;
-static_assert(!cuda::std::forward_iterator<NotAForwardIterator>);
-static_assert(cuda::std::indirectly_movable_storable<NotAForwardIterator, NotAForwardIterator>);
-static_assert(cuda::std::indirectly_swappable<NotAForwardIterator>);
-static_assert(!cuda::std::permutable<NotAForwardIterator>);
+static_assert(!cuda::std::forward_iterator<NotAForwardIterator>, "");
+static_assert(cuda::std::indirectly_movable_storable<NotAForwardIterator, NotAForwardIterator>, "");
+static_assert(cuda::std::indirectly_swappable<NotAForwardIterator>, "");
+static_assert(!cuda::std::permutable<NotAForwardIterator>, "");
 
 #if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
 struct NonCopyable
@@ -37,10 +37,10 @@ struct NonCopyable
 };
 using NotIMS = forward_iterator<NonCopyable*>;
 
-static_assert(cuda::std::forward_iterator<NotIMS>);
-static_assert(!cuda::std::indirectly_movable_storable<NotIMS, NotIMS>);
-static_assert(cuda::std::indirectly_swappable<NotIMS>);
-static_assert(!cuda::std::permutable<NotIMS>);
+static_assert(cuda::std::forward_iterator<NotIMS>, "");
+static_assert(!cuda::std::indirectly_movable_storable<NotIMS, NotIMS>, "");
+static_assert(cuda::std::indirectly_swappable<NotIMS>, "");
+static_assert(!cuda::std::permutable<NotIMS>, "");
 #endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
 // Note: it is impossible for an iterator to satisfy `indirectly_movable_storable` but not `indirectly_swappable`:
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.subsumption.compile.pass.cpp
index 66e34b98ce9..36d6283ea50 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.permutable/permutable.subsumption.compile.pass.cpp
@@ -29,7 +29,7 @@ __host__ __device__ constexpr bool test_subsumption()
 {
   return true;
 }
-static_assert(test_subsumption<int*>());
+static_assert(test_subsumption<int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.compile.pass.cpp
index 326801d92c8..008982f76d2 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class I, class R = ranges::less, class P = identity>
@@ -20,36 +20,36 @@ using CompInt     = bool (*)(int, int);
 using CompDefault = cuda::std::ranges::less;
 
 using AllConstraintsSatisfied = int*;
-static_assert(cuda::std::permutable<AllConstraintsSatisfied>);
-static_assert(cuda::std::indirect_strict_weak_order<CompDefault, AllConstraintsSatisfied>);
-static_assert(cuda::std::sortable<AllConstraintsSatisfied>);
-static_assert(cuda::std::indirect_strict_weak_order<CompInt, AllConstraintsSatisfied>);
-static_assert(cuda::std::sortable<AllConstraintsSatisfied, CompInt>);
+static_assert(cuda::std::permutable<AllConstraintsSatisfied>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompDefault, AllConstraintsSatisfied>, "");
+static_assert(cuda::std::sortable<AllConstraintsSatisfied>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompInt, AllConstraintsSatisfied>, "");
+static_assert(cuda::std::sortable<AllConstraintsSatisfied, CompInt>, "");
 
 struct Foo
 {};
 using Proj = int (*)(Foo);
-static_assert(cuda::std::permutable<Foo*>);
-static_assert(!cuda::std::indirect_strict_weak_order<CompDefault, Foo*>);
-static_assert(cuda::std::indirect_strict_weak_order<CompDefault, cuda::std::projected<Foo*, Proj>>);
-static_assert(!cuda::std::sortable<Foo*, CompDefault>);
-static_assert(cuda::std::sortable<Foo*, CompDefault, Proj>);
-static_assert(!cuda::std::indirect_strict_weak_order<CompInt, Foo*>);
-static_assert(cuda::std::indirect_strict_weak_order<CompInt, cuda::std::projected<Foo*, Proj>>);
-static_assert(!cuda::std::sortable<Foo*, CompInt>);
-static_assert(cuda::std::sortable<Foo*, CompInt, Proj>);
+static_assert(cuda::std::permutable<Foo*>, "");
+static_assert(!cuda::std::indirect_strict_weak_order<CompDefault, Foo*>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompDefault, cuda::std::projected<Foo*, Proj>>, "");
+static_assert(!cuda::std::sortable<Foo*, CompDefault>, "");
+static_assert(cuda::std::sortable<Foo*, CompDefault, Proj>, "");
+static_assert(!cuda::std::indirect_strict_weak_order<CompInt, Foo*>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompInt, cuda::std::projected<Foo*, Proj>>, "");
+static_assert(!cuda::std::sortable<Foo*, CompInt>, "");
+static_assert(cuda::std::sortable<Foo*, CompInt, Proj>, "");
 
 using NotPermutable = const int*;
-static_assert(!cuda::std::permutable<NotPermutable>);
-static_assert(cuda::std::indirect_strict_weak_order<CompInt, NotPermutable>);
-static_assert(!cuda::std::sortable<NotPermutable, CompInt>);
+static_assert(!cuda::std::permutable<NotPermutable>, "");
+static_assert(cuda::std::indirect_strict_weak_order<CompInt, NotPermutable>, "");
+static_assert(!cuda::std::sortable<NotPermutable, CompInt>, "");
 
 struct Empty
 {};
 using NoIndirectStrictWeakOrder = Empty*;
-static_assert(cuda::std::permutable<NoIndirectStrictWeakOrder>);
-static_assert(!cuda::std::indirect_strict_weak_order<CompInt, NoIndirectStrictWeakOrder>);
-static_assert(!cuda::std::sortable<NoIndirectStrictWeakOrder, CompInt>);
+static_assert(cuda::std::permutable<NoIndirectStrictWeakOrder>, "");
+static_assert(!cuda::std::indirect_strict_weak_order<CompInt, NoIndirectStrictWeakOrder>, "");
+static_assert(!cuda::std::sortable<NoIndirectStrictWeakOrder, CompInt>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.subsumption.compile.pass.cpp
index 01c238a7bd1..abc84113afc 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/alg.req.sortable/sortable.subsumption.compile.pass.cpp
@@ -30,7 +30,7 @@ __host__ __device__ constexpr bool test_subsumption()
   return true;
 }
 
-static_assert(test_subsumption<int*, cuda::std::ranges::less, cuda::std::identity>());
+static_assert(test_subsumption<int*, cuda::std::ranges::less, cuda::std::identity>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_binary_predicate.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_binary_predicate.compile.pass.cpp
index a363dd08fed..677e42001c6 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_binary_predicate.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_binary_predicate.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class F, class I1, class I2>
 // concept indirect_binary_predicate;
@@ -33,8 +33,8 @@ struct GoodPredicate
 };
 
 // Should work when all constraints are satisfied
-static_assert(cuda::std::indirect_binary_predicate<GoodPredicate<It1, It2>, It1, It2>);
-static_assert(cuda::std::indirect_binary_predicate<bool (*)(int, float), int*, float*>);
+static_assert(cuda::std::indirect_binary_predicate<GoodPredicate<It1, It2>, It1, It2>, "");
+static_assert(cuda::std::indirect_binary_predicate<bool (*)(int, float), int*, float*>, "");
 
 #ifdef TEST_COMPILER_CLANG_CUDA
 #  pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
@@ -43,7 +43,7 @@ static_assert(cuda::std::indirect_binary_predicate<bool (*)(int, float), int*, f
 auto lambda = [](int i, long j) {
   return i == j;
 };
-static_assert(cuda::std::indirect_binary_predicate<decltype(lambda), int*, int*>);
+static_assert(cuda::std::indirect_binary_predicate<decltype(lambda), int*, int*>, "");
 #endif
 
 // Should fail when either of the iterators is not indirectly_readable
@@ -51,9 +51,9 @@ static_assert(cuda::std::indirect_binary_predicate<decltype(lambda), int*, int*>
 struct NotIndirectlyReadable
 {};
 static_assert(
-  !cuda::std::indirect_binary_predicate<GoodPredicate<It1, NotIndirectlyReadable>, It1, NotIndirectlyReadable>);
+  !cuda::std::indirect_binary_predicate<GoodPredicate<It1, NotIndirectlyReadable>, It1, NotIndirectlyReadable>, "");
 static_assert(
-  !cuda::std::indirect_binary_predicate<GoodPredicate<NotIndirectlyReadable, It2>, NotIndirectlyReadable, It2>);
+  !cuda::std::indirect_binary_predicate<GoodPredicate<NotIndirectlyReadable, It2>, NotIndirectlyReadable, It2>, "");
 #endif
 
 // Should fail when the predicate is not copy constructible
@@ -63,7 +63,7 @@ struct BadPredicate1
   template <class T, class U>
   __host__ __device__ bool operator()(T const&, U const&) const;
 };
-static_assert(!cuda::std::indirect_binary_predicate<BadPredicate1, It1, It2>);
+static_assert(!cuda::std::indirect_binary_predicate<BadPredicate1, It1, It2>, "");
 
 // Should fail when the predicate can't be called with (iter_value_t&, iter_value_t&)
 struct BadPredicate2
@@ -72,7 +72,7 @@ struct BadPredicate2
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_value_t<It1>&, cuda::std::iter_value_t<It2>&) const = delete;
 };
-static_assert(!cuda::std::indirect_binary_predicate<BadPredicate2, It1, It2>);
+static_assert(!cuda::std::indirect_binary_predicate<BadPredicate2, It1, It2>, "");
 
 // Should fail when the predicate can't be called with (iter_value_t&, iter_reference_t)
 struct BadPredicate3
@@ -81,7 +81,7 @@ struct BadPredicate3
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_value_t<It1>&, cuda::std::iter_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_binary_predicate<BadPredicate3, It1, It2>);
+static_assert(!cuda::std::indirect_binary_predicate<BadPredicate3, It1, It2>, "");
 
 // Should fail when the predicate can't be called with (iter_reference_t, iter_value_t&)
 struct BadPredicate4
@@ -90,7 +90,7 @@ struct BadPredicate4
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_reference_t<It1>, cuda::std::iter_value_t<It2>&) const = delete;
 };
-static_assert(!cuda::std::indirect_binary_predicate<BadPredicate4, It1, It2>);
+static_assert(!cuda::std::indirect_binary_predicate<BadPredicate4, It1, It2>, "");
 
 // Should fail when the predicate can't be called with (iter_reference_t, iter_reference_t)
 struct BadPredicate5
@@ -99,7 +99,7 @@ struct BadPredicate5
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_reference_t<It1>, cuda::std::iter_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_binary_predicate<BadPredicate5, It1, It2>);
+static_assert(!cuda::std::indirect_binary_predicate<BadPredicate5, It1, It2>, "");
 
 // Should fail when the predicate can't be called with (iter_common_reference_t, iter_common_reference_t)
 struct BadPredicate6
@@ -108,7 +108,7 @@ struct BadPredicate6
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_common_reference_t<It1>, cuda::std::iter_common_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_binary_predicate<BadPredicate6, It1, It2>);
+static_assert(!cuda::std::indirect_binary_predicate<BadPredicate6, It1, It2>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_equivalence_relation.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_equivalence_relation.compile.pass.cpp
index 4ddc565da86..ca96143abc0 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_equivalence_relation.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_equivalence_relation.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class F, class I1, class I2 = I1>
 // concept indirect_equivalence_relation;
@@ -51,8 +51,8 @@ struct GoodRelation
 };
 
 // Should work when all constraints are satisfied
-static_assert(cuda::std::indirect_equivalence_relation<GoodRelation<It1, It2>, It1, It2>);
-static_assert(cuda::std::indirect_equivalence_relation<bool (*)(int, long), int*, long*>);
+static_assert(cuda::std::indirect_equivalence_relation<GoodRelation<It1, It2>, It1, It2>, "");
+static_assert(cuda::std::indirect_equivalence_relation<bool (*)(int, long), int*, long*>, "");
 
 #ifdef TEST_COMPILER_CLANG_CUDA
 #  pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
@@ -61,7 +61,7 @@ static_assert(cuda::std::indirect_equivalence_relation<bool (*)(int, long), int*
 auto lambda = [](int i, long j) {
   return i == j;
 };
-static_assert(cuda::std::indirect_equivalence_relation<decltype(lambda), int*, long*>);
+static_assert(cuda::std::indirect_equivalence_relation<decltype(lambda), int*, long*>, "");
 #endif
 
 // Should fail when either of the iterators is not indirectly_readable
@@ -69,9 +69,9 @@ static_assert(cuda::std::indirect_equivalence_relation<decltype(lambda), int*, l
 struct NotIndirectlyReadable
 {};
 static_assert(
-  !cuda::std::indirect_equivalence_relation<GoodRelation<It1, NotIndirectlyReadable>, It1, NotIndirectlyReadable>);
+  !cuda::std::indirect_equivalence_relation<GoodRelation<It1, NotIndirectlyReadable>, It1, NotIndirectlyReadable>, "");
 static_assert(
-  !cuda::std::indirect_equivalence_relation<GoodRelation<NotIndirectlyReadable, It2>, NotIndirectlyReadable, It2>);
+  !cuda::std::indirect_equivalence_relation<GoodRelation<NotIndirectlyReadable, It2>, NotIndirectlyReadable, It2>, "");
 #endif
 
 // Should fail when the function is not copy constructible
@@ -81,7 +81,7 @@ struct BadRelation1
   template <class T, class U>
   __host__ __device__ bool operator()(T const&, U const&) const;
 };
-static_assert(!cuda::std::indirect_equivalence_relation<BadRelation1, It1, It2>);
+static_assert(!cuda::std::indirect_equivalence_relation<BadRelation1, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_value_t&, iter_value_t&)
 struct BadRelation2
@@ -90,7 +90,7 @@ struct BadRelation2
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_value_t<It1>&, cuda::std::iter_value_t<It2>&) const = delete;
 };
-static_assert(!cuda::std::indirect_equivalence_relation<BadRelation2, It1, It2>);
+static_assert(!cuda::std::indirect_equivalence_relation<BadRelation2, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_value_t&, iter_reference_t)
 struct BadRelation3
@@ -99,7 +99,7 @@ struct BadRelation3
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_value_t<It1>&, cuda::std::iter_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_equivalence_relation<BadRelation3, It1, It2>);
+static_assert(!cuda::std::indirect_equivalence_relation<BadRelation3, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_reference_t, iter_value_t&)
 struct BadRelation4
@@ -108,7 +108,7 @@ struct BadRelation4
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_reference_t<It1>, cuda::std::iter_value_t<It2>&) const = delete;
 };
-static_assert(!cuda::std::indirect_equivalence_relation<BadRelation4, It1, It2>);
+static_assert(!cuda::std::indirect_equivalence_relation<BadRelation4, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_reference_t, iter_reference_t)
 struct BadRelation5
@@ -117,7 +117,7 @@ struct BadRelation5
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_reference_t<It1>, cuda::std::iter_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_equivalence_relation<BadRelation5, It1, It2>);
+static_assert(!cuda::std::indirect_equivalence_relation<BadRelation5, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_common_reference_t, iter_common_reference_t)
 struct BadRelation6
@@ -126,7 +126,7 @@ struct BadRelation6
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_common_reference_t<It1>, cuda::std::iter_common_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_equivalence_relation<BadRelation6, It1, It2>);
+static_assert(!cuda::std::indirect_equivalence_relation<BadRelation6, It1, It2>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_result_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_result_t.compile.pass.cpp
index 73f687bc340..3702ddfc9d2 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_result_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_result_t.compile.pass.cpp
@@ -7,23 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // indirect_result_t
 
 #include <cuda/std/concepts>
 #include <cuda/std/iterator>
 
-static_assert(cuda::std::same_as<cuda::std::indirect_result_t<int (*)(int), int*>, int>);
+static_assert(cuda::std::same_as<cuda::std::indirect_result_t<int (*)(int), int*>, int>, "");
 static_assert(
-  cuda::std::same_as<cuda::std::indirect_result_t<double (*)(int const&, float), int const*, float*>, double>);
+  cuda::std::same_as<cuda::std::indirect_result_t<double (*)(int const&, float), int const*, float*>, double>, "");
 
 struct S
 {};
-static_assert(cuda::std::same_as<cuda::std::indirect_result_t<S (&)(int), int*>, S>);
-static_assert(cuda::std::same_as<cuda::std::indirect_result_t<long S::*, S*>, long&>);
-static_assert(cuda::std::same_as<cuda::std::indirect_result_t<S && (S::*) (), S*>, S&&>);
-static_assert(cuda::std::same_as<cuda::std::indirect_result_t<int S::* (S::*) (int) const, S*, int*>, int S::*>);
+static_assert(cuda::std::same_as<cuda::std::indirect_result_t<S (&)(int), int*>, S>, "");
+static_assert(cuda::std::same_as<cuda::std::indirect_result_t<long S::*, S*>, long&>, "");
+static_assert(cuda::std::same_as<cuda::std::indirect_result_t<S && (S::*) (), S*>, S&&>, "");
+static_assert(cuda::std::same_as<cuda::std::indirect_result_t<int S::* (S::*) (int) const, S*, int*>, int S::*>, "");
 
 #if TEST_STD_VER > 2017
 template <class F, class... Is>
@@ -36,8 +36,8 @@ template <class F, class... Is>
 _CCCL_CONCEPT has_indirect_result = _CCCL_FRAGMENT(has_indirect_result_, F, Is...);
 #endif
 
-static_assert(!has_indirect_result<int (*)(int), int>); // int isn't indirectly_readable
-static_assert(!has_indirect_result<int, int*>); // int isn't invocable
+static_assert(!has_indirect_result<int (*)(int), int>, ""); // int isn't indirectly_readable
+static_assert(!has_indirect_result<int, int*>, ""); // int isn't invocable
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_strict_weak_order.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_strict_weak_order.compile.pass.cpp
index d21d297e5d6..c5c7b316d6a 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_strict_weak_order.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_strict_weak_order.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class F, class I1, class I2 = I1>
 // concept indirect_strict_weak_order;
@@ -51,8 +51,8 @@ struct GoodOrder
 };
 
 // Should work when all constraints are satisfied
-static_assert(cuda::std::indirect_strict_weak_order<GoodOrder<It1, It2>, It1, It2>);
-static_assert(cuda::std::indirect_strict_weak_order<bool (*)(int, long), int*, long*>);
+static_assert(cuda::std::indirect_strict_weak_order<GoodOrder<It1, It2>, It1, It2>, "");
+static_assert(cuda::std::indirect_strict_weak_order<bool (*)(int, long), int*, long*>, "");
 
 #ifdef TEST_COMPILER_CLANG_CUDA
 #  pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
@@ -61,15 +61,17 @@ static_assert(cuda::std::indirect_strict_weak_order<bool (*)(int, long), int*, l
 auto lambda = [](int i, long j) {
   return i == j;
 };
-static_assert(cuda::std::indirect_strict_weak_order<decltype(lambda), int*, long*>);
+static_assert(cuda::std::indirect_strict_weak_order<decltype(lambda), int*, long*>, "");
 #endif
 
 // Should fail when either of the iterators is not indirectly_readable
 #if TEST_STD_VER > 2017
 struct NotIndirectlyReadable
 {};
-static_assert(!cuda::std::indirect_strict_weak_order<GoodOrder<It1, NotIndirectlyReadable>, It1, NotIndirectlyReadable>);
-static_assert(!cuda::std::indirect_strict_weak_order<GoodOrder<NotIndirectlyReadable, It2>, NotIndirectlyReadable, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<GoodOrder<It1, NotIndirectlyReadable>, It1, NotIndirectlyReadable>,
+              "");
+static_assert(!cuda::std::indirect_strict_weak_order<GoodOrder<NotIndirectlyReadable, It2>, NotIndirectlyReadable, It2>,
+              "");
 #endif
 
 // Should fail when the function is not copy constructible
@@ -79,7 +81,7 @@ struct BadOrder1
   template <class T, class U>
   __host__ __device__ bool operator()(T const&, U const&) const;
 };
-static_assert(!cuda::std::indirect_strict_weak_order<BadOrder1, It1, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadOrder1, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_value_t&, iter_value_t&)
 struct BadOrder2
@@ -88,7 +90,7 @@ struct BadOrder2
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_value_t<It1>&, cuda::std::iter_value_t<It2>&) const = delete;
 };
-static_assert(!cuda::std::indirect_strict_weak_order<BadOrder2, It1, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadOrder2, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_value_t&, iter_reference_t)
 struct BadOrder3
@@ -97,7 +99,7 @@ struct BadOrder3
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_value_t<It1>&, cuda::std::iter_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_strict_weak_order<BadOrder3, It1, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadOrder3, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_reference_t, iter_value_t&)
 struct BadOrder4
@@ -106,7 +108,7 @@ struct BadOrder4
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_reference_t<It1>, cuda::std::iter_value_t<It2>&) const = delete;
 };
-static_assert(!cuda::std::indirect_strict_weak_order<BadOrder4, It1, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadOrder4, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_reference_t, iter_reference_t)
 struct BadOrder5
@@ -115,7 +117,7 @@ struct BadOrder5
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_reference_t<It1>, cuda::std::iter_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_strict_weak_order<BadOrder5, It1, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadOrder5, It1, It2>, "");
 
 // Should fail when the function can't be called with (iter_common_reference_t, iter_common_reference_t)
 struct BadOrder6
@@ -124,7 +126,7 @@ struct BadOrder6
   __host__ __device__ bool operator()(T const&, U const&) const;
   bool operator()(cuda::std::iter_common_reference_t<It1>, cuda::std::iter_common_reference_t<It2>) const = delete;
 };
-static_assert(!cuda::std::indirect_strict_weak_order<BadOrder6, It1, It2>);
+static_assert(!cuda::std::indirect_strict_weak_order<BadOrder6, It1, It2>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_unary_predicate.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_unary_predicate.compile.pass.cpp
index 5914232b6cf..11e0c118900 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_unary_predicate.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirect_unary_predicate.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class F, class I>
 // concept indirect_unary_predicate;
@@ -29,8 +29,8 @@ struct GoodPredicate
 };
 
 // Should work when all constraints are satisfied
-static_assert(cuda::std::indirect_unary_predicate<GoodPredicate<It>, It>);
-static_assert(cuda::std::indirect_unary_predicate<bool (*)(int), int*>);
+static_assert(cuda::std::indirect_unary_predicate<GoodPredicate<It>, It>, "");
+static_assert(cuda::std::indirect_unary_predicate<bool (*)(int), int*>, "");
 
 #ifdef TEST_COMPILER_CLANG_CUDA
 #  pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
@@ -39,14 +39,14 @@ static_assert(cuda::std::indirect_unary_predicate<bool (*)(int), int*>);
 auto lambda = [](int i) {
   return i % 2 == 0;
 };
-static_assert(cuda::std::indirect_unary_predicate<decltype(lambda), int*>);
+static_assert(cuda::std::indirect_unary_predicate<decltype(lambda), int*>, "");
 #endif
 
 // Should fail when the iterator is not indirectly_readable
 #if TEST_STD_VER > 2017
 struct NotIndirectlyReadable
 {};
-static_assert(!cuda::std::indirect_unary_predicate<GoodPredicate<NotIndirectlyReadable>, NotIndirectlyReadable>);
+static_assert(!cuda::std::indirect_unary_predicate<GoodPredicate<NotIndirectlyReadable>, NotIndirectlyReadable>, "");
 #endif
 
 // Should fail when the predicate is not copy constructible
@@ -56,7 +56,7 @@ struct BadPredicate1
   template <class T>
   __host__ __device__ bool operator()(T const&) const;
 };
-static_assert(!cuda::std::indirect_unary_predicate<BadPredicate1, It>);
+static_assert(!cuda::std::indirect_unary_predicate<BadPredicate1, It>, "");
 
 // Should fail when the predicate can't be called with cuda::std::iter_value_t<It>&
 struct BadPredicate2
@@ -65,7 +65,7 @@ struct BadPredicate2
   __host__ __device__ bool operator()(T const&) const;
   bool operator()(cuda::std::iter_value_t<It>&) const = delete;
 };
-static_assert(!cuda::std::indirect_unary_predicate<BadPredicate2, It>);
+static_assert(!cuda::std::indirect_unary_predicate<BadPredicate2, It>, "");
 
 // Should fail when the predicate can't be called with cuda::std::iter_reference_t<It>
 struct BadPredicate3
@@ -74,7 +74,7 @@ struct BadPredicate3
   __host__ __device__ bool operator()(T const&) const;
   bool operator()(cuda::std::iter_reference_t<It>) const = delete;
 };
-static_assert(!cuda::std::indirect_unary_predicate<BadPredicate3, It>);
+static_assert(!cuda::std::indirect_unary_predicate<BadPredicate3, It>, "");
 
 // Should fail when the predicate can't be called with cuda::std::iter_common_reference_t<It>
 struct BadPredicate4
@@ -83,7 +83,7 @@ struct BadPredicate4
   __host__ __device__ bool operator()(T const&) const;
   bool operator()(cuda::std::iter_common_reference_t<It>) const = delete;
 };
-static_assert(!cuda::std::indirect_unary_predicate<BadPredicate4, It>);
+static_assert(!cuda::std::indirect_unary_predicate<BadPredicate4, It>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp
index 30121d9c00d..4356605246c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class I1, class I2, class R, class P1, class P2>
@@ -22,14 +22,14 @@ struct Deref
   __host__ __device__ int operator()(int*) const;
 };
 
-static_assert(!cuda::std::indirectly_comparable<int, int, cuda::std::less<int>>); // not dereferenceable
-static_assert(!cuda::std::indirectly_comparable<int*, int*, int>); // not a predicate
-static_assert(cuda::std::indirectly_comparable<int*, int*, cuda::std::less<int>>);
-static_assert(!cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>>);
-static_assert(cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>, Deref>);
-static_assert(!cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>, Deref, Deref>);
-static_assert(!cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>, cuda::std::identity, Deref>);
-static_assert(cuda::std::indirectly_comparable<int*, int**, cuda::std::less<int>, cuda::std::identity, Deref>);
+static_assert(!cuda::std::indirectly_comparable<int, int, cuda::std::less<int>>, ""); // not dereferenceable
+static_assert(!cuda::std::indirectly_comparable<int*, int*, int>, ""); // not a predicate
+static_assert(cuda::std::indirectly_comparable<int*, int*, cuda::std::less<int>>, "");
+static_assert(!cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>>, "");
+static_assert(cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>, Deref>, "");
+static_assert(!cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>, Deref, Deref>, "");
+static_assert(!cuda::std::indirectly_comparable<int**, int*, cuda::std::less<int>, cuda::std::identity, Deref>, "");
+static_assert(cuda::std::indirectly_comparable<int*, int**, cuda::std::less<int>, cuda::std::identity, Deref>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.subsumption.compile.pass.cpp
index 68d826af70c..06855ebb4ec 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.subsumption.compile.pass.cpp
@@ -44,8 +44,8 @@ template <class F>
   requires cuda::std::indirectly_comparable<int*, char*, F>
 __host__ __device__ void is_subsumed(F);
 
-static_assert(subsumes(cuda::std::less<int>()));
-static_assert(is_subsumed(cuda::std::less<int>()));
+static_assert(subsumes(cuda::std::less<int>()), "");
+static_assert(is_subsumed(cuda::std::less<int>()), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_regular_unary_invocable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_regular_unary_invocable.compile.pass.cpp
index a1d430ab850..cdb85cf57cf 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_regular_unary_invocable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_regular_unary_invocable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class F, class I>
 // concept indirectly_regular_unary_invocable;
@@ -30,14 +30,14 @@ struct GoodInvocable
 };
 
 // Should work when all constraints are satisfied
-static_assert(cuda::std::indirectly_regular_unary_invocable<GoodInvocable<It>, It>);
+static_assert(cuda::std::indirectly_regular_unary_invocable<GoodInvocable<It>, It>, "");
 
 // Should fail when the iterator is not indirectly_readable
 #if TEST_STD_VER > 2017
 struct NotIndirectlyReadable
 {};
 static_assert(
-  !cuda::std::indirectly_regular_unary_invocable<GoodInvocable<NotIndirectlyReadable>, NotIndirectlyReadable>);
+  !cuda::std::indirectly_regular_unary_invocable<GoodInvocable<NotIndirectlyReadable>, NotIndirectlyReadable>, "");
 #endif
 
 // Should fail when the invocable is not copy constructible
@@ -47,7 +47,7 @@ struct BadInvocable1
   template <class T>
   __host__ __device__ R1 operator()(T const&) const;
 };
-static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable1, It>);
+static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable1, It>, "");
 
 // Should fail when the invocable can't be called with (iter_value_t&)
 struct BadInvocable2
@@ -56,7 +56,7 @@ struct BadInvocable2
   __host__ __device__ R1 operator()(T const&) const;
   R1 operator()(cuda::std::iter_value_t<It>&) const = delete;
 };
-static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable2, It>);
+static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable2, It>, "");
 
 // Should fail when the invocable can't be called with (iter_reference_t)
 struct BadInvocable3
@@ -65,7 +65,7 @@ struct BadInvocable3
   __host__ __device__ R1 operator()(T const&) const;
   R1 operator()(cuda::std::iter_reference_t<It>) const = delete;
 };
-static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable3, It>);
+static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable3, It>, "");
 
 // Should fail when the invocable can't be called with (iter_common_reference_t)
 struct BadInvocable4
@@ -74,7 +74,7 @@ struct BadInvocable4
   __host__ __device__ R1 operator()(T const&) const;
   R1 operator()(cuda::std::iter_common_reference_t<It>) const = delete;
 };
-static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable4, It>);
+static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable4, It>, "");
 
 // Should fail when the invocable doesn't have a common reference between its return types
 struct BadInvocable5
@@ -85,23 +85,23 @@ struct BadInvocable5
   __host__ __device__ Unrelated operator()(cuda::std::iter_reference_t<It>) const;
   __host__ __device__ R1 operator()(cuda::std::iter_common_reference_t<It>) const;
 };
-static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable5, It>);
+static_assert(!cuda::std::indirectly_regular_unary_invocable<BadInvocable5, It>, "");
 
 // Various tests with callables
 struct S
 {};
-static_assert(cuda::std::indirectly_regular_unary_invocable<int (*)(int), int*>);
-static_assert(cuda::std::indirectly_regular_unary_invocable<int (&)(int), int*>);
-static_assert(cuda::std::indirectly_regular_unary_invocable<int S::*, S*>);
-static_assert(cuda::std::indirectly_regular_unary_invocable<int (S::*)(), S*>);
-static_assert(cuda::std::indirectly_regular_unary_invocable<int (S::*)() const, S*>);
-static_assert(cuda::std::indirectly_regular_unary_invocable<void (*)(int), int*>);
-
-static_assert(!cuda::std::indirectly_regular_unary_invocable<int(int), int*>); // not move constructible
-static_assert(!cuda::std::indirectly_regular_unary_invocable<int (*)(int*, int*), int*>);
-static_assert(!cuda::std::indirectly_regular_unary_invocable<int (&)(int*, int*), int*>);
-static_assert(!cuda::std::indirectly_regular_unary_invocable<int (S::*)(int*), S*>);
-static_assert(!cuda::std::indirectly_regular_unary_invocable<int (S::*)(int*) const, S*>);
+static_assert(cuda::std::indirectly_regular_unary_invocable<int (*)(int), int*>, "");
+static_assert(cuda::std::indirectly_regular_unary_invocable<int (&)(int), int*>, "");
+static_assert(cuda::std::indirectly_regular_unary_invocable<int S::*, S*>, "");
+static_assert(cuda::std::indirectly_regular_unary_invocable<int (S::*)(), S*>, "");
+static_assert(cuda::std::indirectly_regular_unary_invocable<int (S::*)() const, S*>, "");
+static_assert(cuda::std::indirectly_regular_unary_invocable<void (*)(int), int*>, "");
+
+static_assert(!cuda::std::indirectly_regular_unary_invocable<int(int), int*>, ""); // not move constructible
+static_assert(!cuda::std::indirectly_regular_unary_invocable<int (*)(int*, int*), int*>, "");
+static_assert(!cuda::std::indirectly_regular_unary_invocable<int (&)(int*, int*), int*>, "");
+static_assert(!cuda::std::indirectly_regular_unary_invocable<int (S::*)(int*), S*>, "");
+static_assert(!cuda::std::indirectly_regular_unary_invocable<int (S::*)(int*) const, S*>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_unary_invocable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_unary_invocable.compile.pass.cpp
index cb3d2935c22..58388425bb6 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_unary_invocable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_unary_invocable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class F, class I>
 // concept indirectly_unary_invocable;
@@ -30,13 +30,13 @@ struct GoodInvocable
 };
 
 // Should work when all constraints are satisfied
-static_assert(cuda::std::indirectly_unary_invocable<GoodInvocable<It>, It>);
+static_assert(cuda::std::indirectly_unary_invocable<GoodInvocable<It>, It>, "");
 
 // Should fail when the iterator is not indirectly_readable
 #if TEST_STD_VER > 2017
 struct NotIndirectlyReadable
 {};
-static_assert(!cuda::std::indirectly_unary_invocable<GoodInvocable<NotIndirectlyReadable>, NotIndirectlyReadable>);
+static_assert(!cuda::std::indirectly_unary_invocable<GoodInvocable<NotIndirectlyReadable>, NotIndirectlyReadable>, "");
 #endif
 
 // Should fail when the invocable is not copy constructible
@@ -46,7 +46,7 @@ struct BadInvocable1
   template <class T>
   __host__ __device__ R1 operator()(T const&) const;
 };
-static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable1, It>);
+static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable1, It>, "");
 
 // Should fail when the invocable can't be called with (iter_value_t&)
 struct BadInvocable2
@@ -55,7 +55,7 @@ struct BadInvocable2
   __host__ __device__ R1 operator()(T const&) const;
   R1 operator()(cuda::std::iter_value_t<It>&) const = delete;
 };
-static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable2, It>);
+static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable2, It>, "");
 
 // Should fail when the invocable can't be called with (iter_reference_t)
 struct BadInvocable3
@@ -64,7 +64,7 @@ struct BadInvocable3
   __host__ __device__ R1 operator()(T const&) const;
   R1 operator()(cuda::std::iter_reference_t<It>) const = delete;
 };
-static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable3, It>);
+static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable3, It>, "");
 
 // Should fail when the invocable can't be called with (iter_common_reference_t)
 struct BadInvocable4
@@ -73,7 +73,7 @@ struct BadInvocable4
   __host__ __device__ R1 operator()(T const&) const;
   R1 operator()(cuda::std::iter_common_reference_t<It>) const = delete;
 };
-static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable4, It>);
+static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable4, It>, "");
 
 // Should fail when the invocable doesn't have a common reference between its return types
 struct BadInvocable5
@@ -84,23 +84,23 @@ struct BadInvocable5
   __host__ __device__ Unrelated operator()(cuda::std::iter_reference_t<It>) const;
   __host__ __device__ R1 operator()(cuda::std::iter_common_reference_t<It>) const;
 };
-static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable5, It>);
+static_assert(!cuda::std::indirectly_unary_invocable<BadInvocable5, It>, "");
 
 // Various tests with callables
 struct S
 {};
-static_assert(cuda::std::indirectly_unary_invocable<int (*)(int), int*>);
-static_assert(cuda::std::indirectly_unary_invocable<int (&)(int), int*>);
-static_assert(cuda::std::indirectly_unary_invocable<int S::*, S*>);
-static_assert(cuda::std::indirectly_unary_invocable<int (S::*)(), S*>);
-static_assert(cuda::std::indirectly_unary_invocable<int (S::*)() const, S*>);
-static_assert(cuda::std::indirectly_unary_invocable<void (*)(int), int*>);
-
-static_assert(!cuda::std::indirectly_unary_invocable<int(int), int*>); // not move constructible
-static_assert(!cuda::std::indirectly_unary_invocable<int (*)(int*, int*), int*>);
-static_assert(!cuda::std::indirectly_unary_invocable<int (&)(int*, int*), int*>);
-static_assert(!cuda::std::indirectly_unary_invocable<int (S::*)(int*), S*>);
-static_assert(!cuda::std::indirectly_unary_invocable<int (S::*)(int*) const, S*>);
+static_assert(cuda::std::indirectly_unary_invocable<int (*)(int), int*>, "");
+static_assert(cuda::std::indirectly_unary_invocable<int (&)(int), int*>, "");
+static_assert(cuda::std::indirectly_unary_invocable<int S::*, S*>, "");
+static_assert(cuda::std::indirectly_unary_invocable<int (S::*)(), S*>, "");
+static_assert(cuda::std::indirectly_unary_invocable<int (S::*)() const, S*>, "");
+static_assert(cuda::std::indirectly_unary_invocable<void (*)(int), int*>, "");
+
+static_assert(!cuda::std::indirectly_unary_invocable<int(int), int*>, ""); // not move constructible
+static_assert(!cuda::std::indirectly_unary_invocable<int (*)(int*, int*), int*>, "");
+static_assert(!cuda::std::indirectly_unary_invocable<int (&)(int*, int*), int*>, "");
+static_assert(!cuda::std::indirectly_unary_invocable<int (S::*)(int*), S*>, "");
+static_assert(!cuda::std::indirectly_unary_invocable<int (S::*)(int*) const, S*>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/projected/projected.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/projected/projected.compile.pass.cpp
index a0eda1c5e34..b526580400c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/projected/projected.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/indirectcallable/projected/projected.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // projected
 
@@ -18,42 +18,42 @@
 #include "test_iterators.h"
 
 using IntPtr = cuda::std::projected<int const*, cuda::std::identity>;
-static_assert(cuda::std::same_as<IntPtr::value_type, int>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<IntPtr>()), int const&>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<IntPtr>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<IntPtr::value_type, int>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<IntPtr>()), int const&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<IntPtr>, cuda::std::ptrdiff_t>, "");
 
 struct S
 {};
 
 using Cpp17InputIterator = cuda::std::projected<cpp17_input_iterator<S*>, int S::*>;
-static_assert(cuda::std::same_as<Cpp17InputIterator::value_type, int>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<Cpp17InputIterator>()), int&>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<Cpp17InputIterator>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<Cpp17InputIterator::value_type, int>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<Cpp17InputIterator>()), int&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<Cpp17InputIterator>, cuda::std::ptrdiff_t>, "");
 
 using Cpp20InputIterator = cuda::std::projected<cpp20_input_iterator<S*>, int S::*>;
-static_assert(cuda::std::same_as<Cpp20InputIterator::value_type, int>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<Cpp20InputIterator>()), int&>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<Cpp20InputIterator>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<Cpp20InputIterator::value_type, int>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<Cpp20InputIterator>()), int&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<Cpp20InputIterator>, cuda::std::ptrdiff_t>, "");
 
 using ForwardIterator = cuda::std::projected<forward_iterator<S*>, int (S::*)()>;
-static_assert(cuda::std::same_as<ForwardIterator::value_type, int>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<ForwardIterator>()), int>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<ForwardIterator>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<ForwardIterator::value_type, int>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<ForwardIterator>()), int>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<ForwardIterator>, cuda::std::ptrdiff_t>, "");
 
 using BidirectionalIterator = cuda::std::projected<bidirectional_iterator<S*>, S* (S::*) () const>;
-static_assert(cuda::std::same_as<BidirectionalIterator::value_type, S*>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<BidirectionalIterator>()), S*>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<BidirectionalIterator>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<BidirectionalIterator::value_type, S*>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<BidirectionalIterator>()), S*>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<BidirectionalIterator>, cuda::std::ptrdiff_t>, "");
 
 using RandomAccessIterator = cuda::std::projected<random_access_iterator<S*>, S && (S::*) ()>;
-static_assert(cuda::std::same_as<RandomAccessIterator::value_type, S>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<RandomAccessIterator>()), S&&>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<RandomAccessIterator>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<RandomAccessIterator::value_type, S>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<RandomAccessIterator>()), S&&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<RandomAccessIterator>, cuda::std::ptrdiff_t>, "");
 
 using ContiguousIterator = cuda::std::projected<contiguous_iterator<S*>, S& (S::*) () const>;
-static_assert(cuda::std::same_as<ContiguousIterator::value_type, S>);
-static_assert(cuda::std::same_as<decltype(*cuda::std::declval<ContiguousIterator>()), S&>);
-static_assert(cuda::std::same_as<cuda::std::iter_difference_t<ContiguousIterator>, cuda::std::ptrdiff_t>);
+static_assert(cuda::std::same_as<ContiguousIterator::value_type, S>, "");
+static_assert(cuda::std::same_as<decltype(*cuda::std::declval<ContiguousIterator>()), S&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_difference_t<ContiguousIterator>, cuda::std::ptrdiff_t>, "");
 
 #if TEST_STD_VER > 2017
 template <class I, class F>
@@ -66,9 +66,9 @@ template <class I, class F>
 _CCCL_CONCEPT projectable = _CCCL_FRAGMENT(projectable_, I, F);
 #endif
 
-static_assert(!projectable<int, void (*)(int)>); // int isn't indirectly_readable
-static_assert(!projectable<S, void (*)(int)>); // S isn't weakly_incrementable
-static_assert(!projectable<int*, void(int)>); // void(int) doesn't satisfy indirectly_regular_unary_invcable
+static_assert(!projectable<int, void (*)(int)>, ""); // int isn't indirectly_readable
+static_assert(!projectable<S, void (*)(int)>, ""); // S isn't weakly_incrementable
+static_assert(!projectable<int*, void(int)>, ""); // void(int) doesn't satisfy indirectly_regular_unary_invcable
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp
index 5e70d8054d5..b913eaac535 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // struct incrementable_traits;
@@ -27,10 +27,10 @@ concept check_difference_type_matches =
   && cuda::std::same_as<typename cuda::std::incrementable_traits<T>::difference_type, Expected>;
 #else
 template <class T, class = void>
-inline constexpr bool check_has_difference_type = false;
+_CCCL_INLINE_VAR constexpr bool check_has_difference_type = false;
 
 template <class T>
-inline constexpr bool
+_CCCL_INLINE_VAR constexpr bool
   check_has_difference_type<T, cuda::std::void_t<typename cuda::std::incrementable_traits<T>::difference_type>> = true;
 
 template <class T, class Expected>
@@ -47,72 +47,72 @@ template <class T, class Expected>
 __host__ __device__ constexpr bool check_incrementable_traits()
 {
   constexpr bool result = check_difference_type_matches<T, Expected>;
-  static_assert(check_difference_type_matches<T const, Expected> == result);
+  static_assert(check_difference_type_matches<T const, Expected> == result, "");
   return result;
 }
 
-static_assert(check_incrementable_traits<float*, cuda::std::ptrdiff_t>());
-static_assert(check_incrementable_traits<float const*, cuda::std::ptrdiff_t>());
-static_assert(check_incrementable_traits<float volatile*, cuda::std::ptrdiff_t>());
-static_assert(check_incrementable_traits<float const volatile*, cuda::std::ptrdiff_t>());
-static_assert(check_incrementable_traits<float**, cuda::std::ptrdiff_t>());
-
-static_assert(check_incrementable_traits<int[], cuda::std::ptrdiff_t>());
-static_assert(check_incrementable_traits<int[10], cuda::std::ptrdiff_t>());
-
-static_assert(check_incrementable_traits<char, int>());
-static_assert(check_incrementable_traits<signed char, int>());
-static_assert(check_incrementable_traits<unsigned char, int>());
-static_assert(check_incrementable_traits<short, int>());
-static_assert(check_incrementable_traits<unsigned short, int>());
-static_assert(check_incrementable_traits<int, int>());
-static_assert(check_incrementable_traits<unsigned int, int>());
-static_assert(check_incrementable_traits<long, long>());
-static_assert(check_incrementable_traits<unsigned long, long>());
-static_assert(check_incrementable_traits<long long, long long>());
-static_assert(check_incrementable_traits<unsigned long long, long long>());
-
-static_assert(check_incrementable_traits<int&, int>());
-static_assert(check_incrementable_traits<int const&, int>());
-static_assert(check_incrementable_traits<int volatile&, int>());
-static_assert(check_incrementable_traits<int const volatile&, int>());
-static_assert(check_incrementable_traits<int&&, int>());
-static_assert(check_incrementable_traits<int const&&, int>());
-static_assert(check_incrementable_traits<int volatile&&, int>());
-static_assert(check_incrementable_traits<int const volatile&&, int>());
-
-static_assert(check_incrementable_traits<int volatile, int>());
+static_assert(check_incrementable_traits<float*, cuda::std::ptrdiff_t>(), "");
+static_assert(check_incrementable_traits<float const*, cuda::std::ptrdiff_t>(), "");
+static_assert(check_incrementable_traits<float volatile*, cuda::std::ptrdiff_t>(), "");
+static_assert(check_incrementable_traits<float const volatile*, cuda::std::ptrdiff_t>(), "");
+static_assert(check_incrementable_traits<float**, cuda::std::ptrdiff_t>(), "");
+
+static_assert(check_incrementable_traits<int[], cuda::std::ptrdiff_t>(), "");
+static_assert(check_incrementable_traits<int[10], cuda::std::ptrdiff_t>(), "");
+
+static_assert(check_incrementable_traits<char, int>(), "");
+static_assert(check_incrementable_traits<signed char, int>(), "");
+static_assert(check_incrementable_traits<unsigned char, int>(), "");
+static_assert(check_incrementable_traits<short, int>(), "");
+static_assert(check_incrementable_traits<unsigned short, int>(), "");
+static_assert(check_incrementable_traits<int, int>(), "");
+static_assert(check_incrementable_traits<unsigned int, int>(), "");
+static_assert(check_incrementable_traits<long, long>(), "");
+static_assert(check_incrementable_traits<unsigned long, long>(), "");
+static_assert(check_incrementable_traits<long long, long long>(), "");
+static_assert(check_incrementable_traits<unsigned long long, long long>(), "");
+
+static_assert(check_incrementable_traits<int&, int>(), "");
+static_assert(check_incrementable_traits<int const&, int>(), "");
+static_assert(check_incrementable_traits<int volatile&, int>(), "");
+static_assert(check_incrementable_traits<int const volatile&, int>(), "");
+static_assert(check_incrementable_traits<int&&, int>(), "");
+static_assert(check_incrementable_traits<int const&&, int>(), "");
+static_assert(check_incrementable_traits<int volatile&&, int>(), "");
+static_assert(check_incrementable_traits<int const volatile&&, int>(), "");
+
+static_assert(check_incrementable_traits<int volatile, int>(), "");
 #ifdef INVESTIGATE_VOLATILE_POINTER
-static_assert(check_incrementable_traits<int* volatile, cuda::std::ptrdiff_t>());
+static_assert(check_incrementable_traits<int* volatile, cuda::std::ptrdiff_t>(), "");
 #endif
 
 struct integral_difference_type
 {
   using difference_type = int;
 };
-static_assert(check_incrementable_traits<integral_difference_type, int>());
+static_assert(check_incrementable_traits<integral_difference_type, int>(), "");
 
 struct non_integral_difference_type
 {
   using difference_type = void;
 };
-static_assert(check_incrementable_traits<non_integral_difference_type, void>());
+static_assert(check_incrementable_traits<non_integral_difference_type, void>(), "");
 
 struct int_subtraction
 {
   __host__ __device__ friend int operator-(int_subtraction, int_subtraction);
 };
-static_assert(check_incrementable_traits<int_subtraction, int>());
+static_assert(check_incrementable_traits<int_subtraction, int>(), "");
 #ifdef INVESTIGATE_VOLATILE_REFERENCES
-static_assert(!check_incrementable_traits<int_subtraction volatile&, int>());
-static_assert(!check_incrementable_traits<int_subtraction const volatile&, int>());
+static_assert(!check_incrementable_traits<int_subtraction volatile&, int>(), "");
+static_assert(!check_incrementable_traits<int_subtraction const volatile&, int>(), "");
 #endif
 
 struct char_subtraction
 {
   __host__ __device__ friend char operator-(char_subtraction, char_subtraction);
 };
-static_assert(check_incrementable_traits<char_subtraction, signed char>());
+static_assert(check_incrementable_traits<char_subtraction, signed char>(), "");
 
 struct unsigned_int_subtraction_with_cv
 {
@@ -121,44 +121,50 @@ struct unsigned_int_subtraction_with_cv
   __host__ __device__ friend unsigned int
   operator-(unsigned_int_subtraction_with_cv const volatile&, unsigned_int_subtraction_with_cv const volatile&);
 };
-static_assert(check_incrementable_traits<unsigned_int_subtraction_with_cv, int>());
-static_assert(check_incrementable_traits<unsigned_int_subtraction_with_cv volatile&, int>());
-static_assert(check_incrementable_traits<unsigned_int_subtraction_with_cv const volatile&, int>());
+static_assert(check_incrementable_traits<unsigned_int_subtraction_with_cv, int>(), "");
+static_assert(check_incrementable_traits<unsigned_int_subtraction_with_cv volatile&, int>(), "");
+static_assert(check_incrementable_traits<unsigned_int_subtraction_with_cv const volatile&, int>(), "");
 
 struct specialised_incrementable_traits
 {};
+namespace cuda
+{
+namespace std
+{
 template <>
-struct cuda::std::incrementable_traits<specialised_incrementable_traits>
+struct incrementable_traits<specialised_incrementable_traits>
 {
   using difference_type = int;
 };
-static_assert(check_incrementable_traits<specialised_incrementable_traits, int>());
+} // namespace std
+} // namespace cuda
+static_assert(check_incrementable_traits<specialised_incrementable_traits, int>(), "");
 
-static_assert(!check_has_difference_type<void>);
-static_assert(!check_has_difference_type<float>);
-static_assert(!check_has_difference_type<double>);
-static_assert(!check_has_difference_type<long double>);
-static_assert(!check_has_difference_type<float&>);
-static_assert(!check_has_difference_type<float const&>);
+static_assert(!check_has_difference_type<void>, "");
+static_assert(!check_has_difference_type<float>, "");
+static_assert(!check_has_difference_type<double>, "");
+static_assert(!check_has_difference_type<long double>, "");
+static_assert(!check_has_difference_type<float&>, "");
+static_assert(!check_has_difference_type<float const&>, "");
 
 #if !defined(TEST_COMPILER_GCC) || __GNUC__ > 11 || TEST_STD_VER < 2020 // gcc 10 has an issue with void
-static_assert(!check_has_difference_type<void*>);
+static_assert(!check_has_difference_type<void*>, "");
 #endif // !defined(TEST_COMPILER_GCC) || __GNUC__ > 11 || TEST_STD_VER < 2020
-static_assert(!check_has_difference_type<cuda::std::nullptr_t>);
-static_assert(!check_has_difference_type<int()>);
-static_assert(!check_has_difference_type<int() noexcept>);
-static_assert(!check_has_difference_type<int (*)()>);
-static_assert(!check_has_difference_type<int (*)() noexcept>);
-static_assert(!check_has_difference_type<int (&)()>);
-static_assert(!check_has_difference_type<int (&)() noexcept>);
-
-#define TEST_POINTER_TO_MEMBER_FUNCTION(type, cv)                           \
-  static_assert(!check_has_difference_type<int (type::*)() cv>);            \
-  static_assert(!check_has_difference_type<int (type::*)() cv noexcept>);   \
-  static_assert(!check_has_difference_type<int (type::*)() cv&>);           \
-  static_assert(!check_has_difference_type<int (type::*)() cv & noexcept>); \
-  static_assert(!check_has_difference_type<int (type::*)() cv&&>);          \
-  static_assert(!check_has_difference_type < int(type::*)() cv && noexcept >);
+static_assert(!check_has_difference_type<cuda::std::nullptr_t>, "");
+static_assert(!check_has_difference_type<int()>, "");
+static_assert(!check_has_difference_type<int() noexcept>, "");
+static_assert(!check_has_difference_type<int (*)()>, "");
+static_assert(!check_has_difference_type<int (*)() noexcept>, "");
+static_assert(!check_has_difference_type<int (&)()>, "");
+static_assert(!check_has_difference_type<int (&)() noexcept>, "");
+
+#define TEST_POINTER_TO_MEMBER_FUNCTION(type, cv)                                 \
+  static_assert(!check_has_difference_type<int (type::*)() cv>, "");              \
+  static_assert(!check_has_difference_type<int (type::*)() cv noexcept>, "");     \
+  static_assert(!check_has_difference_type<int (type::*)() cv&>, "");             \
+  static_assert(!check_has_difference_type < int(type::*)() cv & noexcept >, ""); \
+  static_assert(!check_has_difference_type<int (type::*)() cv&&>, "");            \
+  static_assert(!check_has_difference_type < int(type::*)() cv && noexcept >, "");
 
 struct empty
 {};
@@ -173,7 +179,7 @@ struct void_subtraction
 {
   __host__ __device__ friend void operator-(void_subtraction, void_subtraction);
 };
-static_assert(!check_has_difference_type<void_subtraction>);
+static_assert(!check_has_difference_type<void_subtraction>, "");
 
 #define TEST_NOT_DIFFERENCE_TYPE(S, qual1, qual2)               \
   struct S                                                      \
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.compile.pass.cpp
index 34b21e77a68..de6d92e1e80 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // using iter_difference_t;
@@ -21,10 +21,10 @@ inline constexpr bool has_no_iter_difference_t = !requires { typename cuda::std:
 
 #else
 template <class T, class = void>
-inline constexpr bool has_no_iter_difference_t = true;
+_CCCL_INLINE_VAR constexpr bool has_no_iter_difference_t = true;
 
 template <class T>
-inline constexpr bool has_no_iter_difference_t<T, cuda::std::void_t<cuda::std::iter_difference_t<T>>> = false;
+_CCCL_INLINE_VAR constexpr bool has_no_iter_difference_t<T, cuda::std::void_t<cuda::std::iter_difference_t<T>>> = false;
 #endif
 
 #ifndef TEST_COMPILER_MSVC_2017 // MSVC 2017 cannot make this a constexpr function
@@ -32,41 +32,41 @@ template <class T, class Expected>
 __host__ __device__ constexpr bool check_iter_difference_t()
 {
   constexpr bool result = cuda::std::same_as<cuda::std::iter_difference_t<T>, Expected>;
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T volatile>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const volatile>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T volatile&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const volatile&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const&&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T volatile&&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const volatile&&>, Expected> == result);
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T volatile>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const volatile>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T volatile&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const volatile&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const&&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T volatile&&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_difference_t<T const volatile&&>, Expected> == result, "");
 
   return result;
 }
 
-static_assert(check_iter_difference_t<int, int>());
-static_assert(check_iter_difference_t<int*, cuda::std::ptrdiff_t>());
+static_assert(check_iter_difference_t<int, int>(), "");
+static_assert(check_iter_difference_t<int*, cuda::std::ptrdiff_t>(), "");
 
 struct int_subtraction
 {
   __host__ __device__ friend int operator-(int_subtraction, int_subtraction);
 };
-static_assert(check_iter_difference_t<int_subtraction, int>());
+static_assert(check_iter_difference_t<int_subtraction, int>(), "");
 #endif // !TEST_COMPILER_MSVC_2017
 
-static_assert(has_no_iter_difference_t<void>);
-static_assert(has_no_iter_difference_t<double>);
+static_assert(has_no_iter_difference_t<void>, "");
+static_assert(has_no_iter_difference_t<double>, "");
 
 struct S
 {};
-static_assert(has_no_iter_difference_t<S>);
+static_assert(has_no_iter_difference_t<S>, "");
 
 struct void_subtraction
 {
   __host__ __device__ friend void operator-(void_subtraction, void_subtraction);
 };
-static_assert(has_no_iter_difference_t<void_subtraction>);
+static_assert(has_no_iter_difference_t<void_subtraction>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp
index f47ea8e9475..b39311da49c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // struct indirectly_readable_traits;
@@ -45,14 +45,14 @@ template <class T>
 __host__ __device__ constexpr bool check_pointer()
 {
   constexpr bool result = value_type_matches<T*, T>;
-  static_assert(value_type_matches<T const*, T> == result);
-  static_assert(value_type_matches<T volatile*, T> == result);
-  static_assert(value_type_matches<T const volatile*, T> == result);
+  static_assert(value_type_matches<T const*, T> == result, "");
+  static_assert(value_type_matches<T volatile*, T> == result, "");
+  static_assert(value_type_matches<T const volatile*, T> == result, "");
 
-  static_assert(value_type_matches<T* const, T> == result);
-  static_assert(value_type_matches<T const* const, T> == result);
-  static_assert(value_type_matches<T volatile* const, T> == result);
-  static_assert(value_type_matches<T const volatile* const, T> == result);
+  static_assert(value_type_matches<T* const, T> == result, "");
+  static_assert(value_type_matches<T const* const, T> == result, "");
+  static_assert(value_type_matches<T volatile* const, T> == result, "");
+  static_assert(value_type_matches<T const volatile* const, T> == result, "");
 
   return result;
 }
@@ -60,35 +60,35 @@ __host__ __device__ constexpr bool check_pointer()
 template <class T>
 __host__ __device__ constexpr bool check_array()
 {
-  static_assert(value_type_matches<T[], T>);
-  static_assert(value_type_matches<T const[], T>);
-  static_assert(value_type_matches<T volatile[], T>);
-  static_assert(value_type_matches<T const volatile[], T>);
-  static_assert(value_type_matches<T[10], T>);
-  static_assert(value_type_matches<T const[10], T>);
-  static_assert(value_type_matches<T volatile[10], T>);
-  static_assert(value_type_matches<T const volatile[10], T>);
+  static_assert(value_type_matches<T[], T>, "");
+  static_assert(value_type_matches<T const[], T>, "");
+  static_assert(value_type_matches<T volatile[], T>, "");
+  static_assert(value_type_matches<T const volatile[], T>, "");
+  static_assert(value_type_matches<T[10], T>, "");
+  static_assert(value_type_matches<T const[10], T>, "");
+  static_assert(value_type_matches<T volatile[10], T>, "");
+  static_assert(value_type_matches<T const volatile[10], T>, "");
   return true;
 }
 
 template <class T, class Expected>
 __host__ __device__ constexpr bool check_member()
 {
-  static_assert(value_type_matches<T, Expected>);
-  static_assert(value_type_matches<T const, Expected>);
-  static_assert(value_type_matches<T volatile, Expected>);
+  static_assert(value_type_matches<T, Expected>, "");
+  static_assert(value_type_matches<T const, Expected>, "");
+  static_assert(value_type_matches<T volatile, Expected>, "");
   return true;
 }
 
-static_assert(check_pointer<int>());
-static_assert(check_pointer<int*>());
-static_assert(check_pointer<int[10]>());
-static_assert(!check_pointer<void>());
-static_assert(!check_pointer<int()>());
+static_assert(check_pointer<int>(), "");
+static_assert(check_pointer<int*>(), "");
+static_assert(check_pointer<int[10]>(), "");
+static_assert(!check_pointer<void>(), "");
+static_assert(!check_pointer<int()>(), "");
 
-static_assert(check_array<int>());
-static_assert(check_array<int*>());
-static_assert(check_array<int[10]>());
+static_assert(check_array<int>(), "");
+static_assert(check_array<int*>(), "");
+static_assert(check_array<int[10]>(), "");
 
 template <class T>
 struct ValueOf
@@ -109,59 +109,65 @@ struct TwoTypes
   using element_type = U;
 };
 
-static_assert(check_member<ValueOf<int>, int>());
-static_assert(check_member<ValueOf<int[10]>, int[10]>());
-static_assert(check_member<ValueOf<int[]>, int[]>());
-static_assert(has_no_value_type<ValueOf<void>>);
-static_assert(has_no_value_type<ValueOf<int()>>);
-static_assert(has_no_value_type<ValueOf<int&>>);
-static_assert(has_no_value_type<ValueOf<int&&>>);
-
-static_assert(check_member<ElementOf<int>, int>());
-static_assert(check_member<ElementOf<int[10]>, int[10]>());
-static_assert(check_member<ElementOf<int[]>, int[]>());
-static_assert(has_no_value_type<ElementOf<void>>);
-static_assert(has_no_value_type<ElementOf<int()>>);
-static_assert(has_no_value_type<ElementOf<int&>>);
-static_assert(has_no_value_type<ElementOf<int&&>>);
-
-static_assert(check_member<TwoTypes<int, int>, int>());
-static_assert(check_member<TwoTypes<int, int const>, int>());
-static_assert(check_member<TwoTypes<int, int volatile>, int>());
-static_assert(check_member<TwoTypes<int, int const volatile>, int>());
-static_assert(check_member<TwoTypes<int const, int>, int>());
-static_assert(check_member<TwoTypes<int const, int const>, int>());
-static_assert(check_member<TwoTypes<int const, int volatile>, int>());
-static_assert(check_member<TwoTypes<int const, int const volatile>, int>());
-static_assert(check_member<TwoTypes<int volatile, int>, int>());
-static_assert(check_member<TwoTypes<int volatile, int const>, int>());
-static_assert(check_member<TwoTypes<int volatile, int volatile>, int>());
-static_assert(check_member<TwoTypes<int volatile, int const volatile>, int>());
-static_assert(check_member<TwoTypes<int const volatile, int>, int>());
-static_assert(check_member<TwoTypes<int const volatile, int const>, int>());
-static_assert(check_member<TwoTypes<int const volatile, int volatile>, int>());
-static_assert(check_member<TwoTypes<int const volatile, int const volatile>, int>());
-static_assert(has_no_value_type<TwoTypes<int, long>>);
-static_assert(has_no_value_type<TwoTypes<int, int&>>);
-static_assert(has_no_value_type<TwoTypes<int&, int>>);
+static_assert(check_member<ValueOf<int>, int>(), "");
+static_assert(check_member<ValueOf<int[10]>, int[10]>(), "");
+static_assert(check_member<ValueOf<int[]>, int[]>(), "");
+static_assert(has_no_value_type<ValueOf<void>>, "");
+static_assert(has_no_value_type<ValueOf<int()>>, "");
+static_assert(has_no_value_type<ValueOf<int&>>, "");
+static_assert(has_no_value_type<ValueOf<int&&>>, "");
+
+static_assert(check_member<ElementOf<int>, int>(), "");
+static_assert(check_member<ElementOf<int[10]>, int[10]>(), "");
+static_assert(check_member<ElementOf<int[]>, int[]>(), "");
+static_assert(has_no_value_type<ElementOf<void>>, "");
+static_assert(has_no_value_type<ElementOf<int()>>, "");
+static_assert(has_no_value_type<ElementOf<int&>>, "");
+static_assert(has_no_value_type<ElementOf<int&&>>, "");
+
+static_assert(check_member<TwoTypes<int, int>, int>(), "");
+static_assert(check_member<TwoTypes<int, int const>, int>(), "");
+static_assert(check_member<TwoTypes<int, int volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int, int const volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int const, int>, int>(), "");
+static_assert(check_member<TwoTypes<int const, int const>, int>(), "");
+static_assert(check_member<TwoTypes<int const, int volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int const, int const volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int volatile, int>, int>(), "");
+static_assert(check_member<TwoTypes<int volatile, int const>, int>(), "");
+static_assert(check_member<TwoTypes<int volatile, int volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int volatile, int const volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int const volatile, int>, int>(), "");
+static_assert(check_member<TwoTypes<int const volatile, int const>, int>(), "");
+static_assert(check_member<TwoTypes<int const volatile, int volatile>, int>(), "");
+static_assert(check_member<TwoTypes<int const volatile, int const volatile>, int>(), "");
+static_assert(has_no_value_type<TwoTypes<int, long>>, "");
+static_assert(has_no_value_type<TwoTypes<int, int&>>, "");
+static_assert(has_no_value_type<TwoTypes<int&, int>>, "");
 
 struct S2
 {};
+namespace cuda
+{
+namespace std
+{
 template <>
-struct cuda::std::indirectly_readable_traits<S2>
+struct indirectly_readable_traits<S2>
 {
   using value_type = int;
 };
-static_assert(value_type_matches<S2, int>);
-static_assert(value_type_matches<const S2, int>);
-static_assert(has_no_value_type<volatile S2>);
-static_assert(has_no_value_type<const volatile S2>);
-static_assert(has_no_value_type<S2&>);
-static_assert(has_no_value_type<const S2&>);
-
-static_assert(has_no_value_type<void>);
-static_assert(has_no_value_type<int>);
-static_assert(has_no_value_type<cuda::std::nullptr_t>);
+} // namespace std
+} // namespace cuda
+static_assert(value_type_matches<S2, int>, "");
+static_assert(value_type_matches<const S2, int>, "");
+static_assert(has_no_value_type<volatile S2>, "");
+static_assert(has_no_value_type<const volatile S2>, "");
+static_assert(has_no_value_type<S2&>, "");
+static_assert(has_no_value_type<const S2&>, "");
+
+static_assert(has_no_value_type<void>, "");
+static_assert(has_no_value_type<int>, "");
+static_assert(has_no_value_type<cuda::std::nullptr_t>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp
index 7337835a373..5685c78bded 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // using iter_value_t;
@@ -20,48 +20,48 @@ template <class T, class Expected>
 __host__ __device__ constexpr bool check_iter_value_t()
 {
   constexpr bool result = cuda::std::same_as<cuda::std::iter_value_t<T>, Expected>;
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T volatile>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const volatile>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T volatile&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const volatile&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const&&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T volatile&&>, Expected> == result);
-  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const volatile&&>, Expected> == result);
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T volatile>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const volatile>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T volatile&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const volatile&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const&&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T volatile&&>, Expected> == result, "");
+  static_assert(cuda::std::same_as<cuda::std::iter_value_t<T const volatile&&>, Expected> == result, "");
 
   return result;
 }
 
-static_assert(check_iter_value_t<int*, int>());
-static_assert(check_iter_value_t<int[], int>());
-static_assert(check_iter_value_t<int[10], int>());
+static_assert(check_iter_value_t<int*, int>(), "");
+static_assert(check_iter_value_t<int[], int>(), "");
+static_assert(check_iter_value_t<int[10], int>(), "");
 
 struct both_members
 {
   using value_type   = double;
   using element_type = double;
 };
-static_assert(check_iter_value_t<both_members, double>());
+static_assert(check_iter_value_t<both_members, double>(), "");
 #endif // !TEST_COMPILER_MSVC_2017
 
 // clang-format off
 template <class T, class = void>
-inline constexpr bool check_no_iter_value_t = true;
+_CCCL_INLINE_VAR constexpr bool check_no_iter_value_t = true;
 
 template <class T>
-inline constexpr bool check_no_iter_value_t<T, cuda::std::void_t<cuda::std::iter_value_t<T>>> = false;
+_CCCL_INLINE_VAR constexpr bool check_no_iter_value_t<T, cuda::std::void_t<cuda::std::iter_value_t<T>>> = false;
 
-static_assert(check_no_iter_value_t<void>);
-static_assert(check_no_iter_value_t<double>);
+static_assert(check_no_iter_value_t<void>, "");
+static_assert(check_no_iter_value_t<double>, "");
 
 struct S {};
-static_assert(check_no_iter_value_t<S>);
+static_assert(check_no_iter_value_t<S>, "");
 
 struct different_value_element_members {
   using value_type = int;
   using element_type = long;
 };
-static_assert(check_no_iter_value_t<different_value_element_members>);
+static_assert(check_no_iter_value_t<different_value_element_members>, "");
 
 int main(int, char**) { return 0; }
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/bidirectional_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/bidirectional_iterator.compile.pass.cpp
index a6a88912020..a69f0f5e8ba 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/bidirectional_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/bidirectional_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // concept bidirectional_iterator;
@@ -17,17 +17,17 @@
 
 #include "test_iterators.h"
 
-static_assert(!cuda::std::bidirectional_iterator<cpp17_input_iterator<int*>>);
-static_assert(!cuda::std::bidirectional_iterator<cpp20_input_iterator<int*>>);
-static_assert(!cuda::std::bidirectional_iterator<forward_iterator<int*>>);
-static_assert(cuda::std::bidirectional_iterator<bidirectional_iterator<int*>>);
-static_assert(cuda::std::bidirectional_iterator<random_access_iterator<int*>>);
-static_assert(cuda::std::bidirectional_iterator<contiguous_iterator<int*>>);
+static_assert(!cuda::std::bidirectional_iterator<cpp17_input_iterator<int*>>, "");
+static_assert(!cuda::std::bidirectional_iterator<cpp20_input_iterator<int*>>, "");
+static_assert(!cuda::std::bidirectional_iterator<forward_iterator<int*>>, "");
+static_assert(cuda::std::bidirectional_iterator<bidirectional_iterator<int*>>, "");
+static_assert(cuda::std::bidirectional_iterator<random_access_iterator<int*>>, "");
+static_assert(cuda::std::bidirectional_iterator<contiguous_iterator<int*>>, "");
 
-static_assert(cuda::std::bidirectional_iterator<int*>);
-static_assert(cuda::std::bidirectional_iterator<int const*>);
-static_assert(cuda::std::bidirectional_iterator<int volatile*>);
-static_assert(cuda::std::bidirectional_iterator<int const volatile*>);
+static_assert(cuda::std::bidirectional_iterator<int*>, "");
+static_assert(cuda::std::bidirectional_iterator<int const*>, "");
+static_assert(cuda::std::bidirectional_iterator<int volatile*>, "");
+static_assert(cuda::std::bidirectional_iterator<int const volatile*>, "");
 
 struct not_forward_iterator
 {
@@ -44,7 +44,8 @@ struct not_forward_iterator
   __host__ __device__ not_forward_iterator& operator--(int);
 };
 static_assert(cuda::std::input_iterator<not_forward_iterator> && !cuda::std::forward_iterator<not_forward_iterator>
-              && !cuda::std::bidirectional_iterator<not_forward_iterator>);
+                && !cuda::std::bidirectional_iterator<not_forward_iterator>,
+              "");
 
 struct wrong_iterator_category
 {
@@ -72,7 +73,7 @@ struct wrong_iterator_category
   };
 #endif
 };
-static_assert(!cuda::std::bidirectional_iterator<wrong_iterator_category>);
+static_assert(!cuda::std::bidirectional_iterator<wrong_iterator_category>, "");
 
 struct wrong_iterator_concept
 {
@@ -101,7 +102,7 @@ struct wrong_iterator_concept
   };
 #endif
 };
-static_assert(!cuda::std::bidirectional_iterator<wrong_iterator_concept>);
+static_assert(!cuda::std::bidirectional_iterator<wrong_iterator_concept>, "");
 
 struct no_predecrement
 {
@@ -129,7 +130,7 @@ struct no_predecrement
   };
 #endif
 };
-static_assert(!cuda::std::bidirectional_iterator<no_predecrement>);
+static_assert(!cuda::std::bidirectional_iterator<no_predecrement>, "");
 
 struct bad_predecrement
 {
@@ -158,7 +159,7 @@ struct bad_predecrement
   };
 #endif
 };
-static_assert(!cuda::std::bidirectional_iterator<bad_predecrement>);
+static_assert(!cuda::std::bidirectional_iterator<bad_predecrement>, "");
 
 struct no_postdecrement
 {
@@ -189,7 +190,7 @@ struct no_postdecrement
   };
 #endif
 };
-static_assert(!cuda::std::bidirectional_iterator<no_postdecrement>);
+static_assert(!cuda::std::bidirectional_iterator<no_postdecrement>, "");
 
 struct bad_postdecrement
 {
@@ -218,7 +219,7 @@ struct bad_postdecrement
   };
 #endif
 };
-static_assert(!cuda::std::bidirectional_iterator<bad_postdecrement>);
+static_assert(!cuda::std::bidirectional_iterator<bad_postdecrement>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/subsumption.compile.pass.cpp
index 5d91b70f15f..147706b5aee 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.bidir/subsumption.compile.pass.cpp
@@ -29,7 +29,7 @@ __host__ __device__ constexpr bool check_subsumption()
   return true;
 }
 
-static_assert(check_subsumption<int*>());
+static_assert(check_subsumption<int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/forward_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/forward_iterator.compile.pass.cpp
index 24285b836c3..6e1164b0b4d 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/forward_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/forward_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // cuda::std::forward_iterator;
 
@@ -16,17 +16,17 @@
 
 #include "test_iterators.h"
 
-static_assert(!cuda::std::forward_iterator<cpp17_input_iterator<int*>>);
-static_assert(!cuda::std::forward_iterator<cpp20_input_iterator<int*>>);
-static_assert(cuda::std::forward_iterator<forward_iterator<int*>>);
-static_assert(cuda::std::forward_iterator<bidirectional_iterator<int*>>);
-static_assert(cuda::std::forward_iterator<random_access_iterator<int*>>);
-static_assert(cuda::std::forward_iterator<contiguous_iterator<int*>>);
+static_assert(!cuda::std::forward_iterator<cpp17_input_iterator<int*>>, "");
+static_assert(!cuda::std::forward_iterator<cpp20_input_iterator<int*>>, "");
+static_assert(cuda::std::forward_iterator<forward_iterator<int*>>, "");
+static_assert(cuda::std::forward_iterator<bidirectional_iterator<int*>>, "");
+static_assert(cuda::std::forward_iterator<random_access_iterator<int*>>, "");
+static_assert(cuda::std::forward_iterator<contiguous_iterator<int*>>, "");
 
-static_assert(cuda::std::forward_iterator<int*>);
-static_assert(cuda::std::forward_iterator<int const*>);
-static_assert(cuda::std::forward_iterator<int volatile*>);
-static_assert(cuda::std::forward_iterator<int const volatile*>);
+static_assert(cuda::std::forward_iterator<int*>, "");
+static_assert(cuda::std::forward_iterator<int const*>, "");
+static_assert(cuda::std::forward_iterator<int volatile*>, "");
+static_assert(cuda::std::forward_iterator<int const volatile*>, "");
 
 struct not_input_iterator
 {
@@ -52,9 +52,9 @@ struct not_input_iterator
   };
 #endif
 };
-static_assert(cuda::std::input_or_output_iterator<not_input_iterator>);
-static_assert(!cuda::std::input_iterator<not_input_iterator>);
-static_assert(!cuda::std::forward_iterator<not_input_iterator>);
+static_assert(cuda::std::input_or_output_iterator<not_input_iterator>, "");
+static_assert(!cuda::std::input_iterator<not_input_iterator>, "");
+static_assert(!cuda::std::forward_iterator<not_input_iterator>, "");
 
 struct bad_iterator_tag
 {
@@ -80,7 +80,7 @@ struct bad_iterator_tag
   };
 #endif
 };
-static_assert(!cuda::std::forward_iterator<bad_iterator_tag>);
+static_assert(!cuda::std::forward_iterator<bad_iterator_tag>, "");
 
 struct not_incrementable
 {
@@ -106,7 +106,7 @@ struct not_incrementable
   };
 #endif
 };
-static_assert(!cuda::std::forward_iterator<not_incrementable>);
+static_assert(!cuda::std::forward_iterator<not_incrementable>, "");
 
 struct not_equality_comparable
 {
@@ -121,7 +121,7 @@ struct not_equality_comparable
 
   __host__ __device__ bool operator==(not_equality_comparable const&) const = delete;
 };
-static_assert(!cuda::std::forward_iterator<not_equality_comparable>);
+static_assert(!cuda::std::forward_iterator<not_equality_comparable>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/subsumption.compile.pass.cpp
index d14571fdbe8..2cfc57ff161 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.forward/subsumption.compile.pass.cpp
@@ -28,7 +28,7 @@ __host__ __device__ constexpr bool check_subsumption() {
 }
 // clang-format on
 
-static_assert(check_subsumption<int*>());
+static_assert(check_subsumption<int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp
index 1269648628e..269b7d5aa4d 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In>
 // concept indirectly_readable;
@@ -17,29 +17,29 @@
 #include <cuda/std/concepts>
 #include <cuda/std/iterator>
 
-static_assert(cuda::std::incrementable<int>);
-static_assert(cuda::std::incrementable<int*>);
-static_assert(cuda::std::incrementable<int**>);
+static_assert(cuda::std::incrementable<int>, "");
+static_assert(cuda::std::incrementable<int*>, "");
+static_assert(cuda::std::incrementable<int**>, "");
 
-static_assert(!cuda::std::incrementable<postfix_increment_returns_void>);
-static_assert(!cuda::std::incrementable<postfix_increment_returns_copy>);
-static_assert(!cuda::std::incrementable<has_integral_minus>);
-static_assert(!cuda::std::incrementable<has_distinct_difference_type_and_minus>);
-static_assert(!cuda::std::incrementable<missing_difference_type>);
-static_assert(!cuda::std::incrementable<floating_difference_type>);
-static_assert(!cuda::std::incrementable<non_const_minus>);
-static_assert(!cuda::std::incrementable<non_integral_minus>);
-static_assert(!cuda::std::incrementable<bad_difference_type_good_minus>);
-static_assert(!cuda::std::incrementable<not_default_initializable>);
-static_assert(!cuda::std::incrementable<not_movable>);
-static_assert(!cuda::std::incrementable<preinc_not_declared>);
-static_assert(!cuda::std::incrementable<postinc_not_declared>);
-static_assert(cuda::std::incrementable<incrementable_with_difference_type>);
-static_assert(cuda::std::incrementable<incrementable_without_difference_type>);
-static_assert(cuda::std::incrementable<difference_type_and_void_minus>);
-static_assert(!cuda::std::incrementable<noncopyable_with_difference_type>);
-static_assert(!cuda::std::incrementable<noncopyable_without_difference_type>);
-static_assert(!cuda::std::incrementable<noncopyable_with_difference_type_and_minus>);
+static_assert(!cuda::std::incrementable<postfix_increment_returns_void>, "");
+static_assert(!cuda::std::incrementable<postfix_increment_returns_copy>, "");
+static_assert(!cuda::std::incrementable<has_integral_minus>, "");
+static_assert(!cuda::std::incrementable<has_distinct_difference_type_and_minus>, "");
+static_assert(!cuda::std::incrementable<missing_difference_type>, "");
+static_assert(!cuda::std::incrementable<floating_difference_type>, "");
+static_assert(!cuda::std::incrementable<non_const_minus>, "");
+static_assert(!cuda::std::incrementable<non_integral_minus>, "");
+static_assert(!cuda::std::incrementable<bad_difference_type_good_minus>, "");
+static_assert(!cuda::std::incrementable<not_default_initializable>, "");
+static_assert(!cuda::std::incrementable<not_movable>, "");
+static_assert(!cuda::std::incrementable<preinc_not_declared>, "");
+static_assert(!cuda::std::incrementable<postinc_not_declared>, "");
+static_assert(cuda::std::incrementable<incrementable_with_difference_type>, "");
+static_assert(cuda::std::incrementable<incrementable_without_difference_type>, "");
+static_assert(cuda::std::incrementable<difference_type_and_void_minus>, "");
+static_assert(!cuda::std::incrementable<noncopyable_with_difference_type>, "");
+static_assert(!cuda::std::incrementable<noncopyable_without_difference_type>, "");
+static_assert(!cuda::std::incrementable<noncopyable_with_difference_type_and_minus>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp
index e283a512a5a..4235c488eae 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp
@@ -30,7 +30,7 @@ __host__ __device__ constexpr bool check_subsumption() {
 }
 // clang-format on
 
-static_assert(check_subsumption<int*>());
+static_assert(check_subsumption<int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp
index b877e874374..aadc16e379e 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // concept input_iterator;
@@ -17,8 +17,8 @@
 #include "test_iterators.h"
 #include "test_macros.h"
 
-static_assert(cuda::std::input_iterator<cpp17_input_iterator<int*>>);
-static_assert(cuda::std::input_iterator<cpp20_input_iterator<int*>>);
+static_assert(cuda::std::input_iterator<cpp17_input_iterator<int*>>, "");
+static_assert(cuda::std::input_iterator<cpp20_input_iterator<int*>>, "");
 
 struct no_explicit_iter_concept
 {
@@ -40,13 +40,13 @@ struct no_explicit_iter_concept
 };
 #ifndef TEST_COMPILER_MSVC_2017
 // ITER-CONCEPT is `random_access_iterator_tag` >:(
-static_assert(cuda::std::input_iterator<no_explicit_iter_concept>);
+static_assert(cuda::std::input_iterator<no_explicit_iter_concept>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
-static_assert(cuda::std::input_iterator<int*>);
-static_assert(cuda::std::input_iterator<int const*>);
-static_assert(cuda::std::input_iterator<int volatile*>);
-static_assert(cuda::std::input_iterator<int const volatile*>);
+static_assert(cuda::std::input_iterator<int*>, "");
+static_assert(cuda::std::input_iterator<int const*>, "");
+static_assert(cuda::std::input_iterator<int volatile*>, "");
+static_assert(cuda::std::input_iterator<int const volatile*>, "");
 
 struct not_weakly_incrementable
 {
@@ -70,7 +70,8 @@ struct not_weakly_incrementable
 #endif // TEST_COMPILER_MSVC
 };
 static_assert(!cuda::std::input_or_output_iterator<not_weakly_incrementable>
-              && !cuda::std::input_iterator<not_weakly_incrementable>);
+                && !cuda::std::input_iterator<not_weakly_incrementable>,
+              "");
 
 struct not_indirectly_readable
 {
@@ -90,8 +91,8 @@ struct not_indirectly_readable
   __host__ __device__ not_indirectly_readable& operator++();
   __host__ __device__ void operator++(int);
 };
-static_assert(!cuda::std::indirectly_readable<not_indirectly_readable>
-              && !cuda::std::input_iterator<not_indirectly_readable>);
+static_assert(
+  !cuda::std::indirectly_readable<not_indirectly_readable> && !cuda::std::input_iterator<not_indirectly_readable>, "");
 
 struct bad_iterator_category
 {
@@ -112,7 +113,7 @@ struct bad_iterator_category
   __host__ __device__ bad_iterator_category& operator++();
   __host__ __device__ void operator++(int);
 };
-static_assert(!cuda::std::input_iterator<bad_iterator_category>);
+static_assert(!cuda::std::input_iterator<bad_iterator_category>, "");
 
 struct bad_iterator_concept
 {
@@ -133,7 +134,7 @@ struct bad_iterator_concept
   __host__ __device__ bad_iterator_concept& operator++();
   __host__ __device__ void operator++(int);
 };
-static_assert(!cuda::std::input_iterator<bad_iterator_concept>);
+static_assert(!cuda::std::input_iterator<bad_iterator_concept>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp
index 15c15491e6f..ea8711219f6 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp
@@ -30,7 +30,7 @@ __host__ __device__ constexpr bool check_subsumption() {
 }
 // clang-format on
 
-static_assert(check_subsumption<int*>());
+static_assert(check_subsumption<int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp
index 50486148f7a..b690a54ebaf 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In>
 // concept input_or_output_iterator;
@@ -16,43 +16,43 @@
 
 #include "test_iterators.h"
 
-static_assert(cuda::std::input_or_output_iterator<int*>);
-static_assert(cuda::std::input_or_output_iterator<int const*>);
-static_assert(cuda::std::input_or_output_iterator<int volatile*>);
-static_assert(cuda::std::input_or_output_iterator<int const volatile*>);
+static_assert(cuda::std::input_or_output_iterator<int*>, "");
+static_assert(cuda::std::input_or_output_iterator<int const*>, "");
+static_assert(cuda::std::input_or_output_iterator<int volatile*>, "");
+static_assert(cuda::std::input_or_output_iterator<int const volatile*>, "");
 
-static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int*>>);
-static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int const*>>);
-static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int volatile*>>);
-static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int const volatile*>>);
+static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int*>>, "");
+static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int const*>>, "");
+static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int volatile*>>, "");
+static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int const volatile*>>, "");
 
-static_assert(cuda::std::input_or_output_iterator<forward_iterator<int*>>);
-static_assert(cuda::std::input_or_output_iterator<forward_iterator<int const*>>);
-static_assert(cuda::std::input_or_output_iterator<forward_iterator<int volatile*>>);
-static_assert(cuda::std::input_or_output_iterator<forward_iterator<int const volatile*>>);
+static_assert(cuda::std::input_or_output_iterator<forward_iterator<int*>>, "");
+static_assert(cuda::std::input_or_output_iterator<forward_iterator<int const*>>, "");
+static_assert(cuda::std::input_or_output_iterator<forward_iterator<int volatile*>>, "");
+static_assert(cuda::std::input_or_output_iterator<forward_iterator<int const volatile*>>, "");
 
-static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int*>>);
-static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int const*>>);
-static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int volatile*>>);
-static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int const volatile*>>);
+static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int*>>, "");
+static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int const*>>, "");
+static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int volatile*>>, "");
+static_assert(cuda::std::input_or_output_iterator<bidirectional_iterator<int const volatile*>>, "");
 
-static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int*>>);
-static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int const*>>);
-static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int volatile*>>);
-static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int const volatile*>>);
+static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int*>>, "");
+static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int const*>>, "");
+static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int volatile*>>, "");
+static_assert(cuda::std::input_or_output_iterator<random_access_iterator<int const volatile*>>, "");
 
-static_assert(!cuda::std::input_or_output_iterator<void*>);
-static_assert(!cuda::std::input_or_output_iterator<void const*>);
-static_assert(!cuda::std::input_or_output_iterator<void volatile*>);
-static_assert(!cuda::std::input_or_output_iterator<void const volatile*>);
+static_assert(!cuda::std::input_or_output_iterator<void*>, "");
+static_assert(!cuda::std::input_or_output_iterator<void const*>, "");
+static_assert(!cuda::std::input_or_output_iterator<void volatile*>, "");
+static_assert(!cuda::std::input_or_output_iterator<void const volatile*>, "");
 
 struct S
 {};
-static_assert(!cuda::std::input_or_output_iterator<S>);
-static_assert(!cuda::std::input_or_output_iterator<int S::*>);
-static_assert(!cuda::std::input_or_output_iterator<int (S::*)()>);
-static_assert(!cuda::std::input_or_output_iterator<int (S::*)() const>);
-static_assert(!cuda::std::input_or_output_iterator<int (S::*)() volatile>);
+static_assert(!cuda::std::input_or_output_iterator<S>, "");
+static_assert(!cuda::std::input_or_output_iterator<int S::*>, "");
+static_assert(!cuda::std::input_or_output_iterator<int (S::*)()>, "");
+static_assert(!cuda::std::input_or_output_iterator<int (S::*)() const>, "");
+static_assert(!cuda::std::input_or_output_iterator<int (S::*)() volatile>, "");
 
 struct missing_dereference
 {
@@ -62,7 +62,8 @@ struct missing_dereference
   __host__ __device__ missing_dereference& operator++(int);
 };
 static_assert(cuda::std::weakly_incrementable<missing_dereference>
-              && !cuda::std::input_or_output_iterator<missing_dereference>);
+                && !cuda::std::input_or_output_iterator<missing_dereference>,
+              "");
 
 struct void_dereference
 {
@@ -72,14 +73,14 @@ struct void_dereference
   __host__ __device__ void_dereference& operator++();
   __host__ __device__ void_dereference& operator++(int);
 };
-static_assert(cuda::std::weakly_incrementable<void_dereference>
-              && !cuda::std::input_or_output_iterator<void_dereference>);
+static_assert(
+  cuda::std::weakly_incrementable<void_dereference> && !cuda::std::input_or_output_iterator<void_dereference>, "");
 
 struct not_weakly_incrementable
 {
   __host__ __device__ int operator*() const;
 };
-static_assert(!cuda::std::input_or_output_iterator<not_weakly_incrementable>);
+static_assert(!cuda::std::input_or_output_iterator<not_weakly_incrementable>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp
index fe3afeadd89..9447c2adf41 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp
@@ -28,7 +28,7 @@ __host__ __device__ constexpr bool check_subsumption() {
 }
 // clang-format on
 
-static_assert(check_subsumption<int*>());
+static_assert(check_subsumption<int*>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.output/output_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.output/output_iterator.compile.pass.cpp
index 683044882f9..fab1cb7c319 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.output/output_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.output/output_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class It, class T>
 // concept output_iterator;
@@ -22,29 +22,29 @@ struct T
 struct DerivedFromT : T
 {};
 
-static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, int>);
-static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, short>);
-static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, long>);
-static_assert(cuda::std::output_iterator<cpp17_output_iterator<T*>, T>);
-static_assert(!cuda::std::output_iterator<cpp17_output_iterator<T const*>, T>);
-static_assert(cuda::std::output_iterator<cpp17_output_iterator<T*>, T const>);
-static_assert(cuda::std::output_iterator<cpp17_output_iterator<T*>, DerivedFromT>);
-static_assert(!cuda::std::output_iterator<cpp17_output_iterator<DerivedFromT*>, T>);
+static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, int>, "");
+static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, short>, "");
+static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, long>, "");
+static_assert(cuda::std::output_iterator<cpp17_output_iterator<T*>, T>, "");
+static_assert(!cuda::std::output_iterator<cpp17_output_iterator<T const*>, T>, "");
+static_assert(cuda::std::output_iterator<cpp17_output_iterator<T*>, T const>, "");
+static_assert(cuda::std::output_iterator<cpp17_output_iterator<T*>, DerivedFromT>, "");
+static_assert(!cuda::std::output_iterator<cpp17_output_iterator<DerivedFromT*>, T>, "");
 
-static_assert(cuda::std::output_iterator<cpp20_output_iterator<int*>, int>);
-static_assert(cuda::std::output_iterator<cpp20_output_iterator<int*>, short>);
-static_assert(cuda::std::output_iterator<cpp20_output_iterator<int*>, long>);
-static_assert(cuda::std::output_iterator<cpp20_output_iterator<T*>, T>);
-static_assert(!cuda::std::output_iterator<cpp20_output_iterator<T const*>, T>);
-static_assert(cuda::std::output_iterator<cpp20_output_iterator<T*>, T const>);
-static_assert(cuda::std::output_iterator<cpp20_output_iterator<T*>, DerivedFromT>);
-static_assert(!cuda::std::output_iterator<cpp20_output_iterator<DerivedFromT*>, T>);
+static_assert(cuda::std::output_iterator<cpp20_output_iterator<int*>, int>, "");
+static_assert(cuda::std::output_iterator<cpp20_output_iterator<int*>, short>, "");
+static_assert(cuda::std::output_iterator<cpp20_output_iterator<int*>, long>, "");
+static_assert(cuda::std::output_iterator<cpp20_output_iterator<T*>, T>, "");
+static_assert(!cuda::std::output_iterator<cpp20_output_iterator<T const*>, T>, "");
+static_assert(cuda::std::output_iterator<cpp20_output_iterator<T*>, T const>, "");
+static_assert(cuda::std::output_iterator<cpp20_output_iterator<T*>, DerivedFromT>, "");
+static_assert(!cuda::std::output_iterator<cpp20_output_iterator<DerivedFromT*>, T>, "");
 
 // Not satisfied when the iterator is not an input_or_output_iterator
-static_assert(!cuda::std::output_iterator<void, int>);
-static_assert(!cuda::std::output_iterator<void (*)(), int>);
-static_assert(!cuda::std::output_iterator<int&, int>);
-static_assert(!cuda::std::output_iterator<T, int>);
+static_assert(!cuda::std::output_iterator<void, int>, "");
+static_assert(!cuda::std::output_iterator<void (*)(), int>, "");
+static_assert(!cuda::std::output_iterator<int&, int>, "");
+static_assert(!cuda::std::output_iterator<T, int>, "");
 
 // Not satisfied when we can't assign a T to the result of *it++
 struct WrongPostIncrement
@@ -54,9 +54,9 @@ struct WrongPostIncrement
   __host__ __device__ WrongPostIncrement& operator++();
   __host__ __device__ T& operator*();
 };
-static_assert(cuda::std::input_or_output_iterator<WrongPostIncrement>);
-static_assert(cuda::std::indirectly_writable<WrongPostIncrement, T>);
-static_assert(!cuda::std::output_iterator<WrongPostIncrement, T>);
+static_assert(cuda::std::input_or_output_iterator<WrongPostIncrement>, "");
+static_assert(cuda::std::indirectly_writable<WrongPostIncrement, T>, "");
+static_assert(!cuda::std::output_iterator<WrongPostIncrement, T>, "");
 
 // Not satisfied when we can't assign a T to the result of *it (i.e. not indirectly_writable)
 struct NotIndirectlyWritable
@@ -66,9 +66,9 @@ struct NotIndirectlyWritable
   __host__ __device__ NotIndirectlyWritable& operator++();
   __host__ __device__ T const& operator*(); // const so we can't write to it
 };
-static_assert(cuda::std::input_or_output_iterator<NotIndirectlyWritable>);
-static_assert(!cuda::std::indirectly_writable<NotIndirectlyWritable, T>);
-static_assert(!cuda::std::output_iterator<NotIndirectlyWritable, T>);
+static_assert(cuda::std::input_or_output_iterator<NotIndirectlyWritable>, "");
+static_assert(!cuda::std::indirectly_writable<NotIndirectlyWritable, T>, "");
+static_assert(!cuda::std::output_iterator<NotIndirectlyWritable, T>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/contiguous_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/contiguous_iterator.compile.pass.cpp
index 66593b74760..ab14d6629ab 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/contiguous_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/contiguous_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // concept contiguous_iterator;
@@ -17,18 +17,18 @@
 #include "test_iterators.h"
 #include "test_macros.h"
 
-static_assert(!cuda::std::contiguous_iterator<cpp17_input_iterator<int*>>);
-static_assert(!cuda::std::contiguous_iterator<cpp20_input_iterator<int*>>);
-static_assert(!cuda::std::contiguous_iterator<forward_iterator<int*>>);
-static_assert(!cuda::std::contiguous_iterator<bidirectional_iterator<int*>>);
-static_assert(!cuda::std::contiguous_iterator<random_access_iterator<int*>>);
-static_assert(cuda::std::contiguous_iterator<contiguous_iterator<int*>>);
+static_assert(!cuda::std::contiguous_iterator<cpp17_input_iterator<int*>>, "");
+static_assert(!cuda::std::contiguous_iterator<cpp20_input_iterator<int*>>, "");
+static_assert(!cuda::std::contiguous_iterator<forward_iterator<int*>>, "");
+static_assert(!cuda::std::contiguous_iterator<bidirectional_iterator<int*>>, "");
+static_assert(!cuda::std::contiguous_iterator<random_access_iterator<int*>>, "");
+static_assert(cuda::std::contiguous_iterator<contiguous_iterator<int*>>, "");
 
 #ifndef TEST_COMPILER_MSVC_2017
-static_assert(cuda::std::contiguous_iterator<int*>);
-static_assert(cuda::std::contiguous_iterator<int const*>);
-static_assert(cuda::std::contiguous_iterator<int volatile*>);
-static_assert(cuda::std::contiguous_iterator<int const volatile*>);
+static_assert(cuda::std::contiguous_iterator<int*>, "");
+static_assert(cuda::std::contiguous_iterator<int const*>, "");
+static_assert(cuda::std::contiguous_iterator<int volatile*>, "");
+static_assert(cuda::std::contiguous_iterator<int const volatile*>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 struct simple_contiguous_iterator
@@ -91,8 +91,8 @@ struct simple_contiguous_iterator
   __host__ __device__ reference operator[](difference_type n) const;
 };
 
-static_assert(cuda::std::random_access_iterator<simple_contiguous_iterator>);
-static_assert(cuda::std::contiguous_iterator<simple_contiguous_iterator>);
+static_assert(cuda::std::random_access_iterator<simple_contiguous_iterator>, "");
+static_assert(cuda::std::contiguous_iterator<simple_contiguous_iterator>, "");
 
 struct mismatch_value_iter_ref_t
 {
@@ -156,8 +156,8 @@ struct mismatch_value_iter_ref_t
   __host__ __device__ reference operator[](difference_type n) const;
 };
 
-static_assert(cuda::std::random_access_iterator<mismatch_value_iter_ref_t>);
-static_assert(!cuda::std::contiguous_iterator<mismatch_value_iter_ref_t>);
+static_assert(cuda::std::random_access_iterator<mismatch_value_iter_ref_t>, "");
+static_assert(!cuda::std::contiguous_iterator<mismatch_value_iter_ref_t>, "");
 
 struct wrong_iter_reference_t
 {
@@ -219,8 +219,8 @@ struct wrong_iter_reference_t
   __host__ __device__ reference operator[](difference_type n) const;
 };
 
-static_assert(cuda::std::random_access_iterator<wrong_iter_reference_t>);
-static_assert(!cuda::std::contiguous_iterator<wrong_iter_reference_t>);
+static_assert(cuda::std::random_access_iterator<wrong_iter_reference_t>, "");
+static_assert(!cuda::std::contiguous_iterator<wrong_iter_reference_t>, "");
 
 struct to_address_wrong_return_type
 {
@@ -282,15 +282,21 @@ struct to_address_wrong_return_type
   __host__ __device__ reference operator[](difference_type n) const;
 };
 
+namespace cuda
+{
+namespace std
+{
 template <>
-struct cuda::std::pointer_traits<to_address_wrong_return_type>
+struct pointer_traits<to_address_wrong_return_type>
 {
   typedef void element_type;
   __host__ __device__ static void* to_address(to_address_wrong_return_type const&);
 };
+} // namespace std
+} // namespace cuda
 
-static_assert(cuda::std::random_access_iterator<to_address_wrong_return_type>);
-static_assert(!cuda::std::contiguous_iterator<to_address_wrong_return_type>);
+static_assert(cuda::std::random_access_iterator<to_address_wrong_return_type>, "");
+static_assert(!cuda::std::contiguous_iterator<to_address_wrong_return_type>, "");
 
 template <class>
 struct template_and_no_element_type
@@ -356,8 +362,8 @@ struct template_and_no_element_type
 };
 
 // Template param is used instead of element_type.
-static_assert(cuda::std::random_access_iterator<template_and_no_element_type<int>>);
-static_assert(cuda::std::contiguous_iterator<template_and_no_element_type<int>>);
+static_assert(cuda::std::random_access_iterator<template_and_no_element_type<int>>, "");
+static_assert(cuda::std::contiguous_iterator<template_and_no_element_type<int>>, "");
 
 template <bool DisableArrow, bool DisableToAddress>
 struct no_operator_arrow
@@ -426,15 +432,21 @@ struct no_operator_arrow
   __host__ __device__ reference operator[](difference_type n) const;
 };
 
+namespace cuda
+{
+namespace std
+{
 template <>
-struct cuda::std::pointer_traits<no_operator_arrow</*DisableArrow=*/true, /*DisableToAddress=*/false>>
+struct pointer_traits<no_operator_arrow</*DisableArrow=*/true, /*DisableToAddress=*/false>>
 {
   __host__ __device__ static constexpr int* to_address(const no_operator_arrow<true, false>&);
 };
+} // namespace std
+} // namespace cuda
 
-static_assert(cuda::std::contiguous_iterator<no_operator_arrow</*DisableArrow=*/false, /*DisableToAddress=*/true>>);
-static_assert(!cuda::std::contiguous_iterator<no_operator_arrow</*DisableArrow=*/true, /*DisableToAddress=*/true>>);
-static_assert(cuda::std::contiguous_iterator<no_operator_arrow</*DisableArrow=*/true, /*DisableToAddress=*/false>>);
+static_assert(cuda::std::contiguous_iterator<no_operator_arrow</*DisableArrow=*/false, /*DisableToAddress=*/true>>, "");
+static_assert(!cuda::std::contiguous_iterator<no_operator_arrow</*DisableArrow=*/true, /*DisableToAddress=*/true>>, "");
+static_assert(cuda::std::contiguous_iterator<no_operator_arrow</*DisableArrow=*/true, /*DisableToAddress=*/false>>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/random_access_iterator.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/random_access_iterator.compile.pass.cpp
index 6bdd7a7145b..5f8afdfff85 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/random_access_iterator.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.random.access/random_access_iterator.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class T>
 // concept random_access_iterator;
@@ -17,18 +17,18 @@
 #include "test_iterators.h"
 #include "test_macros.h"
 
-static_assert(!cuda::std::random_access_iterator<cpp17_input_iterator<int*>>);
-static_assert(!cuda::std::random_access_iterator<cpp20_input_iterator<int*>>);
-static_assert(!cuda::std::random_access_iterator<forward_iterator<int*>>);
-static_assert(!cuda::std::random_access_iterator<bidirectional_iterator<int*>>);
-static_assert(cuda::std::random_access_iterator<random_access_iterator<int*>>);
-static_assert(cuda::std::random_access_iterator<contiguous_iterator<int*>>);
+static_assert(!cuda::std::random_access_iterator<cpp17_input_iterator<int*>>, "");
+static_assert(!cuda::std::random_access_iterator<cpp20_input_iterator<int*>>, "");
+static_assert(!cuda::std::random_access_iterator<forward_iterator<int*>>, "");
+static_assert(!cuda::std::random_access_iterator<bidirectional_iterator<int*>>, "");
+static_assert(cuda::std::random_access_iterator<random_access_iterator<int*>>, "");
+static_assert(cuda::std::random_access_iterator<contiguous_iterator<int*>>, "");
 
 #ifndef TEST_COMPILER_MSVC_2017
-static_assert(cuda::std::random_access_iterator<int*>);
-static_assert(cuda::std::random_access_iterator<int const*>);
-static_assert(cuda::std::random_access_iterator<int volatile*>);
-static_assert(cuda::std::random_access_iterator<int const volatile*>);
+static_assert(cuda::std::random_access_iterator<int*>, "");
+static_assert(cuda::std::random_access_iterator<int const*>, "");
+static_assert(cuda::std::random_access_iterator<int volatile*>, "");
+static_assert(cuda::std::random_access_iterator<int const volatile*>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 struct wrong_iterator_category
@@ -87,8 +87,8 @@ struct wrong_iterator_category
 
   __host__ __device__ reference operator[](difference_type n) const;
 };
-static_assert(cuda::std::bidirectional_iterator<wrong_iterator_category>);
-static_assert(!cuda::std::random_access_iterator<wrong_iterator_category>);
+static_assert(cuda::std::bidirectional_iterator<wrong_iterator_category>, "");
+static_assert(!cuda::std::random_access_iterator<wrong_iterator_category>, "");
 
 template <class Child>
 struct common_base
@@ -174,8 +174,8 @@ struct simple_random_access_iterator : common_base<simple_random_access_iterator
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<simple_random_access_iterator>);
-static_assert(cuda::std::random_access_iterator<simple_random_access_iterator>);
+static_assert(cuda::std::bidirectional_iterator<simple_random_access_iterator>, "");
+static_assert(cuda::std::random_access_iterator<simple_random_access_iterator>, "");
 
 struct no_plus_equals : common_base<no_plus_equals>
 {
@@ -215,8 +215,8 @@ struct no_plus_equals : common_base<no_plus_equals>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<no_plus_equals>);
-static_assert(!cuda::std::random_access_iterator<no_plus_equals>);
+static_assert(cuda::std::bidirectional_iterator<no_plus_equals>, "");
+static_assert(!cuda::std::random_access_iterator<no_plus_equals>, "");
 
 struct no_plus_difference_type : common_base<no_plus_difference_type>
 {
@@ -256,8 +256,8 @@ struct no_plus_difference_type : common_base<no_plus_difference_type>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<no_plus_difference_type>);
-static_assert(!cuda::std::random_access_iterator<no_plus_difference_type>);
+static_assert(cuda::std::bidirectional_iterator<no_plus_difference_type>, "");
+static_assert(!cuda::std::random_access_iterator<no_plus_difference_type>, "");
 
 struct difference_type_no_plus : common_base<difference_type_no_plus>
 {
@@ -297,8 +297,8 @@ struct difference_type_no_plus : common_base<difference_type_no_plus>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<difference_type_no_plus>);
-static_assert(!cuda::std::random_access_iterator<difference_type_no_plus>);
+static_assert(cuda::std::bidirectional_iterator<difference_type_no_plus>, "");
+static_assert(!cuda::std::random_access_iterator<difference_type_no_plus>, "");
 
 struct no_minus_equals : common_base<no_minus_equals>
 {
@@ -338,8 +338,8 @@ struct no_minus_equals : common_base<no_minus_equals>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<no_minus_equals>);
-static_assert(!cuda::std::random_access_iterator<no_minus_equals>);
+static_assert(cuda::std::bidirectional_iterator<no_minus_equals>, "");
+static_assert(!cuda::std::random_access_iterator<no_minus_equals>, "");
 
 struct no_minus : common_base<no_minus>
 {
@@ -379,8 +379,8 @@ struct no_minus : common_base<no_minus>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<no_minus>);
-static_assert(!cuda::std::random_access_iterator<no_minus>);
+static_assert(cuda::std::bidirectional_iterator<no_minus>, "");
+static_assert(!cuda::std::random_access_iterator<no_minus>, "");
 
 struct not_sized_sentinel : common_base<not_sized_sentinel>
 {
@@ -420,8 +420,8 @@ struct not_sized_sentinel : common_base<not_sized_sentinel>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<not_sized_sentinel>);
-static_assert(!cuda::std::random_access_iterator<not_sized_sentinel>);
+static_assert(cuda::std::bidirectional_iterator<not_sized_sentinel>, "");
+static_assert(!cuda::std::random_access_iterator<not_sized_sentinel>, "");
 
 struct no_subscript : common_base<no_subscript>
 {
@@ -461,8 +461,8 @@ struct no_subscript : common_base<no_subscript>
   };
 #endif
 };
-static_assert(cuda::std::bidirectional_iterator<no_subscript>);
-static_assert(!cuda::std::random_access_iterator<no_subscript>);
+static_assert(cuda::std::bidirectional_iterator<no_subscript>, "");
+static_assert(!cuda::std::random_access_iterator<no_subscript>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp
index 9789e6b4da3..b90e5b74218 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In>
 // concept indirectly_readable;
@@ -21,32 +21,32 @@ template <class In>
 __host__ __device__ constexpr bool check_indirectly_readable()
 {
   constexpr bool result = cuda::std::indirectly_readable<In>;
-  static_assert(cuda::std::indirectly_readable<In const> == result);
-  static_assert(cuda::std::indirectly_readable<In volatile> == result);
-  static_assert(cuda::std::indirectly_readable<In const volatile> == result);
-  static_assert(cuda::std::indirectly_readable<In const&> == result);
-  static_assert(cuda::std::indirectly_readable<In volatile&> == result);
-  static_assert(cuda::std::indirectly_readable<In const volatile&> == result);
-  static_assert(cuda::std::indirectly_readable<In const&&> == result);
-  static_assert(cuda::std::indirectly_readable<In volatile&&> == result);
-  static_assert(cuda::std::indirectly_readable<In const volatile&&> == result);
+  static_assert(cuda::std::indirectly_readable<In const> == result, "");
+  static_assert(cuda::std::indirectly_readable<In volatile> == result, "");
+  static_assert(cuda::std::indirectly_readable<In const volatile> == result, "");
+  static_assert(cuda::std::indirectly_readable<In const&> == result, "");
+  static_assert(cuda::std::indirectly_readable<In volatile&> == result, "");
+  static_assert(cuda::std::indirectly_readable<In const volatile&> == result, "");
+  static_assert(cuda::std::indirectly_readable<In const&&> == result, "");
+  static_assert(cuda::std::indirectly_readable<In volatile&&> == result, "");
+  static_assert(cuda::std::indirectly_readable<In const volatile&&> == result, "");
   return result;
 }
 
-static_assert(!check_indirectly_readable<void*>());
-static_assert(!check_indirectly_readable<void const*>());
-static_assert(!check_indirectly_readable<void volatile*>());
-static_assert(!check_indirectly_readable<void const volatile*>());
+static_assert(!check_indirectly_readable<void*>(), "");
+static_assert(!check_indirectly_readable<void const*>(), "");
+static_assert(!check_indirectly_readable<void volatile*>(), "");
+static_assert(!check_indirectly_readable<void const volatile*>(), "");
 
-static_assert(check_indirectly_readable<int*>());
-static_assert(check_indirectly_readable<int const*>());
-static_assert(check_indirectly_readable<int volatile*>());
-static_assert(check_indirectly_readable<int const volatile*>());
+static_assert(check_indirectly_readable<int*>(), "");
+static_assert(check_indirectly_readable<int const*>(), "");
+static_assert(check_indirectly_readable<int volatile*>(), "");
+static_assert(check_indirectly_readable<int const volatile*>(), "");
 
-static_assert(check_indirectly_readable<value_type_indirection>());
-static_assert(check_indirectly_readable<element_type_indirection>());
-static_assert(check_indirectly_readable<proxy_indirection>());
-static_assert(check_indirectly_readable<read_only_indirection>());
+static_assert(check_indirectly_readable<value_type_indirection>(), "");
+static_assert(check_indirectly_readable<element_type_indirection>(), "");
+static_assert(check_indirectly_readable<proxy_indirection>(), "");
+static_assert(check_indirectly_readable<read_only_indirection>(), "");
 
 struct indirection_mismatch
 {
@@ -55,8 +55,9 @@ struct indirection_mismatch
 };
 static_assert(
   !cuda::std::same_as<cuda::std::iter_value_t<indirection_mismatch>, cuda::std::iter_reference_t<indirection_mismatch>>
-  && check_indirectly_readable<indirection_mismatch>());
-static_assert(!check_indirectly_readable<missing_dereference>());
+    && check_indirectly_readable<indirection_mismatch>(),
+  "");
+static_assert(!check_indirectly_readable<missing_dereference>(), "");
 
 // `iter_rvalue_reference_t` can't be missing unless the dereference operator is also missing.
 
@@ -67,7 +68,7 @@ struct iter_move_mismatch
 
   __host__ __device__ friend float& iter_move(iter_move_mismatch&);
 };
-static_assert(!check_indirectly_readable<iter_move_mismatch>());
+static_assert(!check_indirectly_readable<iter_move_mismatch>(), "");
 
 struct indirection_and_iter_move_mismatch
 {
@@ -76,13 +77,13 @@ struct indirection_and_iter_move_mismatch
 
   __host__ __device__ friend unsigned long long& iter_move(indirection_and_iter_move_mismatch&);
 };
-static_assert(!check_indirectly_readable<indirection_and_iter_move_mismatch>());
+static_assert(!check_indirectly_readable<indirection_and_iter_move_mismatch>(), "");
 
 struct missing_iter_value_t
 {
   __host__ __device__ int operator*() const;
 };
-static_assert(!check_indirectly_readable<missing_iter_value_t>());
+static_assert(!check_indirectly_readable<missing_iter_value_t>(), "");
 
 struct unrelated_lvalue_ref_and_rvalue_ref
 {};
@@ -102,14 +103,14 @@ struct common_reference<iter_ref1&&, iter_ref1&>
 {};
 } // namespace std
 } // namespace cuda
-static_assert(!cuda::std::common_reference_with<iter_ref1&, iter_ref1&&>);
+static_assert(!cuda::std::common_reference_with<iter_ref1&, iter_ref1&&>, "");
 
 struct bad_iter_reference_t
 {
   using value_type = int;
   __host__ __device__ iter_ref1& operator*() const;
 };
-static_assert(!check_indirectly_readable<bad_iter_reference_t>());
+static_assert(!check_indirectly_readable<bad_iter_reference_t>(), "");
 
 struct iter_ref2
 {};
@@ -122,7 +123,7 @@ struct unrelated_iter_ref_rvalue_and_iter_rvalue_ref_rvalue
   __host__ __device__ iter_ref2& operator*() const;
   __host__ __device__ friend iter_rvalue_ref&& iter_move(unrelated_iter_ref_rvalue_and_iter_rvalue_ref_rvalue);
 };
-static_assert(!check_indirectly_readable<unrelated_iter_ref_rvalue_and_iter_rvalue_ref_rvalue>());
+static_assert(!check_indirectly_readable<unrelated_iter_ref_rvalue_and_iter_rvalue_ref_rvalue>(), "");
 
 struct iter_ref3
 {
@@ -144,7 +145,7 @@ struct basic_common_reference<iter_rvalue_ref, iter_ref3, XQual, YQual>
 };
 } // namespace std
 } // namespace cuda
-static_assert(cuda::std::common_reference_with<iter_ref3&&, iter_rvalue_ref&&>);
+static_assert(cuda::std::common_reference_with<iter_ref3&&, iter_rvalue_ref&&>, "");
 
 struct different_reference_types_with_common_reference
 {
@@ -152,7 +153,7 @@ struct different_reference_types_with_common_reference
   __host__ __device__ iter_ref3& operator*() const;
   __host__ __device__ friend iter_rvalue_ref&& iter_move(different_reference_types_with_common_reference);
 };
-static_assert(check_indirectly_readable<different_reference_types_with_common_reference>());
+static_assert(check_indirectly_readable<different_reference_types_with_common_reference>(), "");
 
 struct iter_ref4
 {
@@ -181,8 +182,8 @@ struct common_reference<iter_rvalue_ref&&, iter_ref4 const&>
 {};
 } // namespace std
 } // namespace cuda
-static_assert(cuda::std::common_reference_with<iter_ref4&&, iter_rvalue_ref&&>);
-static_assert(!cuda::std::common_reference_with<iter_ref4 const&, iter_rvalue_ref&&>);
+static_assert(cuda::std::common_reference_with<iter_ref4&&, iter_rvalue_ref&&>, "");
+static_assert(!cuda::std::common_reference_with<iter_ref4 const&, iter_rvalue_ref&&>, "");
 
 struct different_reference_types_without_common_reference_to_const
 {
@@ -190,20 +191,20 @@ struct different_reference_types_without_common_reference_to_const
   __host__ __device__ iter_ref4& operator*() const;
   __host__ __device__ friend iter_rvalue_ref&& iter_move(different_reference_types_without_common_reference_to_const);
 };
-static_assert(!check_indirectly_readable<different_reference_types_without_common_reference_to_const>());
+static_assert(!check_indirectly_readable<different_reference_types_without_common_reference_to_const>(), "");
 
 struct non_const_deref
 {
   __host__ __device__ int& operator*();
 };
-static_assert(!check_indirectly_readable<non_const_deref>());
+static_assert(!check_indirectly_readable<non_const_deref>(), "");
 
 struct not_referenceable
 {
   using value_type = void;
   __host__ __device__ void operator*() const;
 };
-static_assert(!cuda::std::indirectly_readable<not_referenceable>);
+static_assert(!cuda::std::indirectly_readable<not_referenceable>, "");
 
 struct legacy_output_iterator
 {
@@ -211,30 +212,30 @@ struct legacy_output_iterator
   __host__ __device__ legacy_output_iterator& operator*();
 };
 
-static_assert(!cuda::std::indirectly_readable<legacy_output_iterator>);
+static_assert(!cuda::std::indirectly_readable<legacy_output_iterator>, "");
 
 struct S
 {};
-static_assert(!cuda::std::indirectly_readable<S>);
-static_assert(!cuda::std::indirectly_readable<int S::*>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)()>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() noexcept>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() &>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() & noexcept>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() &&>);
-static_assert(!cuda::std::indirectly_readable < int(S::*)() && noexcept >);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() const>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() const noexcept>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() const&>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() const & noexcept>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() const&&>);
-static_assert(!cuda::std::indirectly_readable < int(S::*)() const&& noexcept >);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile noexcept>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile&>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile & noexcept>);
-static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile&&>);
-static_assert(!cuda::std::indirectly_readable < int(S::*)() volatile && noexcept >);
+static_assert(!cuda::std::indirectly_readable<S>, "");
+static_assert(!cuda::std::indirectly_readable<int S::*>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)()>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() noexcept>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() &>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() & noexcept >, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() &&>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() && noexcept >, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() const>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() const noexcept>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() const&>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() const& noexcept >, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() const&&>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() const&& noexcept >, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile noexcept>, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile&>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() volatile & noexcept >, "");
+static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile&&>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() volatile && noexcept >, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/iter_common_reference_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/iter_common_reference_t.compile.pass.cpp
index 28c03dfe188..78b96dbb396 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/iter_common_reference_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/iter_common_reference_t.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // iter_common_reference_t
 
@@ -23,7 +23,7 @@ struct T1
   using value_type = X;
   __host__ __device__ X operator*() const;
 };
-static_assert(cuda::std::same_as<cuda::std::iter_common_reference_t<T1>, X>);
+static_assert(cuda::std::same_as<cuda::std::iter_common_reference_t<T1>, X>, "");
 
 // value_type and dereferencing are the same (modulo qualifiers)
 struct T2
@@ -31,7 +31,7 @@ struct T2
   using value_type = X;
   __host__ __device__ X& operator*() const;
 };
-static_assert(cuda::std::same_as<cuda::std::iter_common_reference_t<T2>, X&>);
+static_assert(cuda::std::same_as<cuda::std::iter_common_reference_t<T2>, X&>, "");
 
 // There's a custom common reference between value_type and the type of dereferencing
 struct A
@@ -43,21 +43,28 @@ struct Common
   __host__ __device__ Common(A);
   __host__ __device__ Common(B);
 };
+
+namespace cuda
+{
+namespace std
+{
 template <template <class> class TQual, template <class> class QQual>
-struct cuda::std::basic_common_reference<A, B, TQual, QQual>
+struct basic_common_reference<A, B, TQual, QQual>
 {
   using type = Common;
 };
 template <template <class> class TQual, template <class> class QQual>
-struct cuda::std::basic_common_reference<B, A, TQual, QQual> : cuda::std::basic_common_reference<A, B, TQual, QQual>
+struct basic_common_reference<B, A, TQual, QQual> : basic_common_reference<A, B, TQual, QQual>
 {};
+} // namespace std
+} // namespace cuda
 
 struct T3
 {
   using value_type = A;
   __host__ __device__ B&& operator*() const;
 };
-static_assert(cuda::std::same_as<cuda::std::iter_common_reference_t<T3>, Common>);
+static_assert(cuda::std::same_as<cuda::std::iter_common_reference_t<T3>, Common>, "");
 
 // Make sure we're SFINAE-friendly
 #if TEST_STD_VER > 2017
@@ -72,7 +79,7 @@ _CCCL_CONCEPT has_common_reference = _CCCL_FRAGMENT(has_common_reference_, T);
 #endif
 struct NotIndirectlyReadable
 {};
-static_assert(!has_common_reference<NotIndirectlyReadable>);
+static_assert(!has_common_reference<NotIndirectlyReadable>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp
index 91743b8a969..b781bafc4c0 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class S, class I>
 // concept sentinel_for;
@@ -16,8 +16,8 @@
 
 #include "test_macros.h"
 
-static_assert(cuda::std::sentinel_for<int*, int*>);
-static_assert(!cuda::std::sentinel_for<int*, long*>);
+static_assert(cuda::std::sentinel_for<int*, int*>, "");
+static_assert(!cuda::std::sentinel_for<int*, long*>, "");
 struct nth_element_sentinel
 {
   __host__ __device__ friend bool operator==(const nth_element_sentinel&, int*);
@@ -25,7 +25,7 @@ struct nth_element_sentinel
   __host__ __device__ friend bool operator!=(const nth_element_sentinel&, int*);
   __host__ __device__ friend bool operator!=(int*, const nth_element_sentinel&);
 };
-static_assert(cuda::std::sentinel_for<nth_element_sentinel, int*>);
+static_assert(cuda::std::sentinel_for<nth_element_sentinel, int*>, "");
 
 struct not_semiregular
 {
@@ -35,7 +35,7 @@ struct not_semiregular
   __host__ __device__ friend bool operator!=(const not_semiregular&, int*);
   __host__ __device__ friend bool operator!=(int*, const not_semiregular&);
 };
-static_assert(!cuda::std::sentinel_for<not_semiregular, int*>);
+static_assert(!cuda::std::sentinel_for<not_semiregular, int*>, "");
 
 struct weakly_equality_comparable_with_int
 {
@@ -44,7 +44,7 @@ struct weakly_equality_comparable_with_int
   __host__ __device__ friend bool operator!=(const weakly_equality_comparable_with_int&, int*);
   __host__ __device__ friend bool operator!=(int*, const weakly_equality_comparable_with_int&);
 };
-static_assert(!cuda::std::sentinel_for<weakly_equality_comparable_with_int, int>);
+static_assert(!cuda::std::sentinel_for<weakly_equality_comparable_with_int, int>, "");
 
 struct move_only_iterator
 {
@@ -69,8 +69,9 @@ struct move_only_iterator
 
 #ifndef TEST_COMPILER_MSVC_2017
 static_assert(cuda::std::movable<move_only_iterator> && !cuda::std::copyable<move_only_iterator>
-              && cuda::std::input_or_output_iterator<move_only_iterator>
-              && !cuda::std::sentinel_for<move_only_iterator, move_only_iterator>);
+                && cuda::std::input_or_output_iterator<move_only_iterator>
+                && !cuda::std::sentinel_for<move_only_iterator, move_only_iterator>,
+              "");
 #endif // TEST_COMPILER_MSVC_2017
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp
index eadcbb200a5..97985ee5a62 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp
@@ -28,7 +28,7 @@ __host__ __device__ constexpr bool check_subsumption() {
 }
 // clang-format on
 
-static_assert(check_subsumption<cuda::std::array<int, 4>::iterator, cuda::std::array<int, 4>::iterator>());
+static_assert(check_subsumption<cuda::std::array<int, 4>::iterator, cuda::std::array<int, 4>::iterator>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp
index 4398e61e30a..02904b2a46c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // [iterator.concept.sizedsentinel], concept sized_sentinel_for
 //
@@ -24,8 +24,8 @@
 #include "test_iterators.h"
 #include "test_macros.h"
 
-static_assert(cuda::std::sized_sentinel_for<random_access_iterator<int*>, random_access_iterator<int*>>);
-static_assert(!cuda::std::sized_sentinel_for<bidirectional_iterator<int*>, bidirectional_iterator<int*>>);
+static_assert(cuda::std::sized_sentinel_for<random_access_iterator<int*>, random_access_iterator<int*>>, "");
+static_assert(!cuda::std::sized_sentinel_for<bidirectional_iterator<int*>, bidirectional_iterator<int*>>, "");
 
 struct int_sized_sentinel
 {
@@ -40,9 +40,9 @@ struct int_sized_sentinel
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(int_sized_sentinel, int*);
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(int*, int_sized_sentinel);
 };
-static_assert(cuda::std::sized_sentinel_for<int_sized_sentinel, int*>);
+static_assert(cuda::std::sized_sentinel_for<int_sized_sentinel, int*>, "");
 // int_sized_sentinel is not an iterator.
-static_assert(!cuda::std::sized_sentinel_for<int*, int_sized_sentinel>);
+static_assert(!cuda::std::sized_sentinel_for<int*, int_sized_sentinel>, "");
 
 struct no_default_ctor
 {
@@ -67,7 +67,7 @@ struct no_default_ctor
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(no_default_ctor, int*);
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(int*, no_default_ctor);
 };
-static_assert(!cuda::std::sized_sentinel_for<no_default_ctor, int*>);
+static_assert(!cuda::std::sized_sentinel_for<no_default_ctor, int*>, "");
 
 struct not_copyable
 {
@@ -93,7 +93,7 @@ struct not_copyable
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(not_copyable, int*);
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(int*, not_copyable);
 };
-static_assert(!cuda::std::sized_sentinel_for<not_copyable, int*>);
+static_assert(!cuda::std::sized_sentinel_for<not_copyable, int*>, "");
 
 struct double_sized_sentinel
 {
@@ -101,10 +101,17 @@ struct double_sized_sentinel
   __host__ __device__ friend int operator-(double_sized_sentinel, double*);
   __host__ __device__ friend int operator-(double*, double_sized_sentinel);
 };
+
+namespace cuda
+{
+namespace std
+{
 template <>
-inline constexpr bool cuda::std::disable_sized_sentinel_for<double_sized_sentinel, double*> = true;
+_CCCL_INLINE_VAR constexpr bool disable_sized_sentinel_for<double_sized_sentinel, double*> = true;
+}
+} // namespace cuda
 
-static_assert(!cuda::std::sized_sentinel_for<double_sized_sentinel, double*>);
+static_assert(!cuda::std::sized_sentinel_for<double_sized_sentinel, double*>, "");
 
 struct only_one_sub_op
 {
@@ -113,7 +120,7 @@ struct only_one_sub_op
   template <class It, cuda::std::enable_if_t<cuda::std::input_or_output_iterator<It>, int>>
   __host__ __device__ friend cuda::std::ptrdiff_t operator-(only_one_sub_op, It);
 };
-static_assert(!cuda::std::sized_sentinel_for<only_one_sub_op, int*>);
+static_assert(!cuda::std::sized_sentinel_for<only_one_sub_op, int*>, "");
 
 struct wrong_return_type
 {
@@ -124,13 +131,13 @@ struct wrong_return_type
   template <class It, cuda::std::enable_if_t<cuda::std::input_or_output_iterator<It>, int>>
   __host__ __device__ friend void operator-(It, wrong_return_type);
 };
-static_assert(!cuda::std::sized_sentinel_for<wrong_return_type, int*>);
+static_assert(!cuda::std::sized_sentinel_for<wrong_return_type, int*>, "");
 
 // Standard types
-static_assert(cuda::std::sized_sentinel_for<int*, int*>);
-static_assert(cuda::std::sized_sentinel_for<const int*, int*>);
-static_assert(cuda::std::sized_sentinel_for<const int*, const int*>);
-static_assert(cuda::std::sized_sentinel_for<int*, const int*>);
+static_assert(cuda::std::sized_sentinel_for<int*, int*>, "");
+static_assert(cuda::std::sized_sentinel_for<const int*, int*>, "");
+static_assert(cuda::std::sized_sentinel_for<const int*, const int*>, "");
+static_assert(cuda::std::sized_sentinel_for<int*, const int*>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp
index a9e2a8d0cfa..0087bb825dd 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In>
 // concept cuda::std::weakly_incrementable;
@@ -18,31 +18,31 @@
 #include "../incrementable.h"
 #include "test_macros.h"
 
-static_assert(cuda::std::weakly_incrementable<int>);
-static_assert(cuda::std::weakly_incrementable<int*>);
-static_assert(cuda::std::weakly_incrementable<int**>);
-static_assert(!cuda::std::weakly_incrementable<int[]>);
-static_assert(!cuda::std::weakly_incrementable<int[10]>);
-static_assert(!cuda::std::weakly_incrementable<double>);
-static_assert(!cuda::std::weakly_incrementable<int&>);
-static_assert(!cuda::std::weakly_incrementable<int()>);
-static_assert(!cuda::std::weakly_incrementable<int (*)()>);
-static_assert(!cuda::std::weakly_incrementable<int (&)()>);
+static_assert(cuda::std::weakly_incrementable<int>, "");
+static_assert(cuda::std::weakly_incrementable<int*>, "");
+static_assert(cuda::std::weakly_incrementable<int**>, "");
+static_assert(!cuda::std::weakly_incrementable<int[]>, "");
+static_assert(!cuda::std::weakly_incrementable<int[10]>, "");
+static_assert(!cuda::std::weakly_incrementable<double>, "");
+static_assert(!cuda::std::weakly_incrementable<int&>, "");
+static_assert(!cuda::std::weakly_incrementable<int()>, "");
+static_assert(!cuda::std::weakly_incrementable<int (*)()>, "");
+static_assert(!cuda::std::weakly_incrementable<int (&)()>, "");
 #ifndef TEST_COMPILER_GCC
-static_assert(!cuda::std::weakly_incrementable<bool>);
+static_assert(!cuda::std::weakly_incrementable<bool>, "");
 #endif
 
 struct S
 {};
-static_assert(!cuda::std::weakly_incrementable<int S::*>);
+static_assert(!cuda::std::weakly_incrementable<int S::*>, "");
 
-#define CHECK_POINTER_TO_MEMBER_FUNCTIONS(qualifier)                                  \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier>);            \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier noexcept>);   \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&>);           \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier & noexcept>); \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&&>);          \
-  static_assert(!cuda::std::weakly_incrementable < int(S::*)() qualifier && noexcept >);
+#define CHECK_POINTER_TO_MEMBER_FUNCTIONS(qualifier)                                        \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier>, "");              \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier noexcept>, "");     \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&>, "");             \
+  static_assert(!cuda::std::weakly_incrementable < int(S::*)() qualifier & noexcept >, ""); \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&&>, "");            \
+  static_assert(!cuda::std::weakly_incrementable < int(S::*)() qualifier && noexcept >, "");
 
 #define NO_QUALIFIER
 CHECK_POINTER_TO_MEMBER_FUNCTIONS(NO_QUALIFIER);
@@ -50,26 +50,26 @@ CHECK_POINTER_TO_MEMBER_FUNCTIONS(const);
 CHECK_POINTER_TO_MEMBER_FUNCTIONS(volatile);
 CHECK_POINTER_TO_MEMBER_FUNCTIONS(const volatile);
 
-static_assert(cuda::std::weakly_incrementable<postfix_increment_returns_void>);
-static_assert(cuda::std::weakly_incrementable<postfix_increment_returns_copy>);
-static_assert(cuda::std::weakly_incrementable<has_integral_minus>);
-static_assert(cuda::std::weakly_incrementable<has_distinct_difference_type_and_minus>);
-static_assert(!cuda::std::weakly_incrementable<missing_difference_type>);
-static_assert(!cuda::std::weakly_incrementable<floating_difference_type>);
-static_assert(!cuda::std::weakly_incrementable<non_const_minus>);
-static_assert(!cuda::std::weakly_incrementable<non_integral_minus>);
-static_assert(!cuda::std::weakly_incrementable<bad_difference_type_good_minus>);
-static_assert(!cuda::std::weakly_incrementable<not_movable>);
-static_assert(!cuda::std::weakly_incrementable<preinc_not_declared>);
-static_assert(!cuda::std::weakly_incrementable<postinc_not_declared>);
-static_assert(cuda::std::weakly_incrementable<not_default_initializable>);
-static_assert(cuda::std::weakly_incrementable<incrementable_with_difference_type>);
-static_assert(cuda::std::weakly_incrementable<incrementable_without_difference_type>);
-static_assert(cuda::std::weakly_incrementable<difference_type_and_void_minus>);
+static_assert(cuda::std::weakly_incrementable<postfix_increment_returns_void>, "");
+static_assert(cuda::std::weakly_incrementable<postfix_increment_returns_copy>, "");
+static_assert(cuda::std::weakly_incrementable<has_integral_minus>, "");
+static_assert(cuda::std::weakly_incrementable<has_distinct_difference_type_and_minus>, "");
+static_assert(!cuda::std::weakly_incrementable<missing_difference_type>, "");
+static_assert(!cuda::std::weakly_incrementable<floating_difference_type>, "");
+static_assert(!cuda::std::weakly_incrementable<non_const_minus>, "");
+static_assert(!cuda::std::weakly_incrementable<non_integral_minus>, "");
+static_assert(!cuda::std::weakly_incrementable<bad_difference_type_good_minus>, "");
+static_assert(!cuda::std::weakly_incrementable<not_movable>, "");
+static_assert(!cuda::std::weakly_incrementable<preinc_not_declared>, "");
+static_assert(!cuda::std::weakly_incrementable<postinc_not_declared>, "");
+static_assert(cuda::std::weakly_incrementable<not_default_initializable>, "");
+static_assert(cuda::std::weakly_incrementable<incrementable_with_difference_type>, "");
+static_assert(cuda::std::weakly_incrementable<incrementable_without_difference_type>, "");
+static_assert(cuda::std::weakly_incrementable<difference_type_and_void_minus>, "");
 #ifndef TEST_COMPILER_MSVC_2017
-static_assert(cuda::std::weakly_incrementable<noncopyable_with_difference_type>);
-static_assert(cuda::std::weakly_incrementable<noncopyable_without_difference_type>);
-static_assert(cuda::std::weakly_incrementable<noncopyable_with_difference_type_and_minus>);
+static_assert(cuda::std::weakly_incrementable<noncopyable_with_difference_type>, "");
+static_assert(cuda::std::weakly_incrementable<noncopyable_without_difference_type>, "");
+static_assert(cuda::std::weakly_incrementable<noncopyable_with_difference_type_and_minus>, "");
 #endif // TEST_COMPILER_MSVC_2017
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp
index 0ca9215636b..9ebd4e2ce2f 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class In>
 // concept indirectly_writable;
@@ -21,35 +21,35 @@ template <class Out, class T>
 __host__ __device__ constexpr bool check_indirectly_writable()
 {
   constexpr bool result = cuda::std::indirectly_writable<Out, T>;
-  static_assert(cuda::std::indirectly_writable<Out const, T> == result);
+  static_assert(cuda::std::indirectly_writable<Out const, T> == result, "");
   return result;
 }
 
-static_assert(check_indirectly_writable<value_type_indirection, int>());
-static_assert(check_indirectly_writable<value_type_indirection, double>());
-static_assert(!check_indirectly_writable<value_type_indirection, double*>());
-
-static_assert(!check_indirectly_writable<read_only_indirection, int>());
-static_assert(!check_indirectly_writable<proxy_indirection, int>());
-
-static_assert(!check_indirectly_writable<int, int>());
-static_assert(!check_indirectly_writable<missing_dereference, missing_dereference::value_type>());
-
-static_assert(!check_indirectly_writable<void*, int>());
-static_assert(!check_indirectly_writable<void const*, int>());
-static_assert(!check_indirectly_writable<void volatile*, int>());
-static_assert(!check_indirectly_writable<void const volatile*, int>());
-static_assert(!check_indirectly_writable<void*, double>());
-static_assert(check_indirectly_writable<void**, int*>());
-static_assert(!check_indirectly_writable<void**, int>());
-
-static_assert(check_indirectly_writable<int*, int>());
-static_assert(!check_indirectly_writable<int const*, int>());
-static_assert(check_indirectly_writable<int volatile*, int>());
-static_assert(!check_indirectly_writable<int const volatile*, int>());
-static_assert(check_indirectly_writable<int*, double>());
-static_assert(check_indirectly_writable<int**, int*>());
-static_assert(!check_indirectly_writable<int**, int>());
+static_assert(check_indirectly_writable<value_type_indirection, int>(), "");
+static_assert(check_indirectly_writable<value_type_indirection, double>(), "");
+static_assert(!check_indirectly_writable<value_type_indirection, double*>(), "");
+
+static_assert(!check_indirectly_writable<read_only_indirection, int>(), "");
+static_assert(!check_indirectly_writable<proxy_indirection, int>(), "");
+
+static_assert(!check_indirectly_writable<int, int>(), "");
+static_assert(!check_indirectly_writable<missing_dereference, missing_dereference::value_type>(), "");
+
+static_assert(!check_indirectly_writable<void*, int>(), "");
+static_assert(!check_indirectly_writable<void const*, int>(), "");
+static_assert(!check_indirectly_writable<void volatile*, int>(), "");
+static_assert(!check_indirectly_writable<void const volatile*, int>(), "");
+static_assert(!check_indirectly_writable<void*, double>(), "");
+static_assert(check_indirectly_writable<void**, int*>(), "");
+static_assert(!check_indirectly_writable<void**, int>(), "");
+
+static_assert(check_indirectly_writable<int*, int>(), "");
+static_assert(!check_indirectly_writable<int const*, int>(), "");
+static_assert(check_indirectly_writable<int volatile*, int>(), "");
+static_assert(!check_indirectly_writable<int const volatile*, int>(), "");
+static_assert(check_indirectly_writable<int*, double>(), "");
+static_assert(check_indirectly_writable<int**, int*>(), "");
+static_assert(!check_indirectly_writable<int**, int>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp
index 86be6cb27f0..309d980f0c1 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: LIBCUDACXX-has-no-incomplete-ranges
 
 // Test the [[nodiscard]] extension in libc++.
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp
index 22662331908..f86a0b27eed 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class I>
 // unspecified iter_move;
@@ -80,9 +80,7 @@ __host__ __device__ constexpr void unqualified_lookup_move(It first_, It last_,
   auto result_first = ::check_unqualified_lookup::unqualified_lookup_wrapper<It>{cuda::std::move(result_first_)};
   auto result_last  = ::check_unqualified_lookup::unqualified_lookup_wrapper<It>{cuda::std::move(result_last_)};
 
-#ifndef TEST_COMPILER_ICC
   static_assert(!noexcept(cuda::std::ranges::iter_move(first)), "unqualified-lookup case not being chosen");
-#endif // TEST_COMPILER_ICC
 
   for (; first != last && result_first != result_last; (void) ++first, ++result_first)
   {
@@ -98,11 +96,9 @@ __host__ __device__ constexpr void lvalue_move(It first_, It last_, Out result_f
   auto result_first = iterator_wrapper<It>{cuda::std::move(result_first_)};
   auto result_last  = iterator_wrapper<It>{cuda::std::move(result_last_)};
 
-#ifndef TEST_COMPILER_ICC
   static_assert(!noexcept(cuda::std::ranges::iter_move(first)),
                 "`operator*() const&` is not noexcept, and there's no hidden "
                 "friend iter_move.");
-#endif // TEST_COMPILER_ICC
 
   for (; first != last && result_first != result_last; (void) ++first, ++result_first)
   {
@@ -226,9 +222,7 @@ __host__ __device__ constexpr bool test()
 
   auto unscoped = check_unqualified_lookup::unscoped_enum::a;
   assert(cuda::std::ranges::iter_move(unscoped) == check_unqualified_lookup::unscoped_enum::a);
-#ifndef TEST_COMPILER_ICC
   assert(!noexcept(cuda::std::ranges::iter_move(unscoped)));
-#endif // TEST_COMPILER_ICC
 
   auto scoped = check_unqualified_lookup::scoped_enum::a;
   assert(cuda::std::ranges::iter_move(scoped) == nullptr);
@@ -236,25 +230,23 @@ __host__ __device__ constexpr bool test()
 
   auto some_union = check_unqualified_lookup::some_union{0};
   assert(cuda::std::ranges::iter_move(some_union) == 0);
-#ifndef TEST_COMPILER_ICC
   assert(!noexcept(cuda::std::ranges::iter_move(some_union)));
 
   // Check noexcept-correctness
-  static_assert(noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithADL<true>>())));
-  static_assert(noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithoutADL<true>>())));
+  static_assert(noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithADL<true>>())), "");
+  static_assert(noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithoutADL<true>>())), "");
 // old GCC seems to fall over the chaining of the noexcept clauses here
-#  if (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9)
-  static_assert(!noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithADL<false>>())));
-  static_assert(!noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithoutADL<false>>())));
-#  endif
-#endif // TEST_COMPILER_ICC
+#if (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9)
+  static_assert(!noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithADL<false>>())), "");
+  static_assert(!noexcept(cuda::std::ranges::iter_move(cuda::std::declval<WithoutADL<false>>())), "");
+#endif
 
   return true;
 }
 
 #if _CCCL_CUDACC_AT_LEAST(11, 3) // nvcc segfaults here
-static_assert(!cuda::std::is_invocable_v<IterMoveT, int*, int*>); // too many arguments
-static_assert(!cuda::std::is_invocable_v<IterMoveT, int>);
+static_assert(!cuda::std::is_invocable_v<IterMoveT, int*, int*>, ""); // too many arguments
+static_assert(!cuda::std::is_invocable_v<IterMoveT, int>, "");
 #endif // _CCCL_CUDACC_AT_LEAST(11, 3)
 
 #if TEST_STD_VER > 2017
@@ -265,14 +257,14 @@ struct Holder
 {
   T t;
 };
-static_assert(cuda::std::is_invocable_v<IterMoveT, Holder<Incomplete>**>);
-static_assert(cuda::std::is_invocable_v<IterMoveT, Holder<Incomplete>**&>);
+static_assert(cuda::std::is_invocable_v<IterMoveT, Holder<Incomplete>**>, "");
+static_assert(cuda::std::is_invocable_v<IterMoveT, Holder<Incomplete>**&>, "");
 #endif
 
 int main(int, char**)
 {
   test();
-  static_assert(test());
+  static_assert(test(), "");
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.compile.pass.cpp
index 07ecd7dfb02..efa5489531c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.compile.pass.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class I>
 // using iter_rvalue_reference;
 
 #include <cuda/std/iterator>
 
-static_assert(cuda::std::same_as<cuda::std::iter_rvalue_reference_t<int*>, int&&>);
-static_assert(cuda::std::same_as<cuda::std::iter_rvalue_reference_t<const int*>, const int&&>);
+static_assert(cuda::std::same_as<cuda::std::iter_rvalue_reference_t<int*>, int&&>, "");
+static_assert(cuda::std::same_as<cuda::std::iter_rvalue_reference_t<const int*>, const int&&>, "");
 
 __host__ __device__ void test_undefined_internal()
 {
@@ -23,7 +23,7 @@ __host__ __device__ void test_undefined_internal()
   {
     __host__ __device__ int& operator*() const;
   };
-  static_assert(cuda::std::same_as<cuda::std::iter_rvalue_reference_t<A>, int&&>);
+  static_assert(cuda::std::same_as<cuda::std::iter_rvalue_reference_t<A>, int&&>, "");
 }
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
index 8b2198b6ce3..2661ff23658 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 
 // template<class I>
 // unspecified iter_swap;
@@ -44,17 +44,17 @@ struct HasIterSwap
 };
 
 #if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017) // nvcc segfaults here
-static_assert(cuda::std::is_invocable_v<IterSwapT, HasIterSwap&, HasIterSwap&>);
-static_assert(cuda::std::is_invocable_v<IterSwapT, HasIterSwap&, int&>);
-static_assert(!cuda::std::is_invocable_v<IterSwapT, int&, HasIterSwap&>);
+static_assert(cuda::std::is_invocable_v<IterSwapT, HasIterSwap&, HasIterSwap&>, "");
+static_assert(cuda::std::is_invocable_v<IterSwapT, HasIterSwap&, int&>, "");
+static_assert(!cuda::std::is_invocable_v<IterSwapT, int&, HasIterSwap&>, "");
 
-static_assert(cuda::std::is_invocable_v<IterSwapT&, HasIterSwap&, HasIterSwap&>);
-static_assert(cuda::std::is_invocable_v<IterSwapT&, HasIterSwap&, int&>);
-static_assert(!cuda::std::is_invocable_v<IterSwapT&, int&, HasIterSwap&>);
+static_assert(cuda::std::is_invocable_v<IterSwapT&, HasIterSwap&, HasIterSwap&>, "");
+static_assert(cuda::std::is_invocable_v<IterSwapT&, HasIterSwap&, int&>, "");
+static_assert(!cuda::std::is_invocable_v<IterSwapT&, int&, HasIterSwap&>, "");
 
-static_assert(cuda::std::is_invocable_v<IterSwapT&&, HasIterSwap&, HasIterSwap&>);
-static_assert(cuda::std::is_invocable_v<IterSwapT&&, HasIterSwap&, int&>);
-static_assert(!cuda::std::is_invocable_v<IterSwapT&&, int&, HasIterSwap&>);
+static_assert(cuda::std::is_invocable_v<IterSwapT&&, HasIterSwap&, HasIterSwap&>, "");
+static_assert(cuda::std::is_invocable_v<IterSwapT&&, HasIterSwap&, int&>, "");
+static_assert(!cuda::std::is_invocable_v<IterSwapT&&, int&, HasIterSwap&>, "");
 #endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
 #if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
@@ -109,10 +109,10 @@ struct HasRangesSwapWrapper
 };
 
 #if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017) // nvcc segfaults here
-static_assert(cuda::std::is_invocable_v<IterSwapT, HasRangesSwapWrapper&, HasRangesSwapWrapper&>);
+static_assert(cuda::std::is_invocable_v<IterSwapT, HasRangesSwapWrapper&, HasRangesSwapWrapper&>, "");
 // Does not satisfy swappable_with, even though swap(X, Y) is valid.
-static_assert(!cuda::std::is_invocable_v<IterSwapT, HasRangesSwapWrapper&, int&>);
-static_assert(!cuda::std::is_invocable_v<IterSwapT, int&, HasRangesSwapWrapper&>);
+static_assert(!cuda::std::is_invocable_v<IterSwapT, HasRangesSwapWrapper&, int&>, "");
+static_assert(!cuda::std::is_invocable_v<IterSwapT, int&, HasRangesSwapWrapper&>, "");
 #endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
 struct B;
@@ -255,11 +255,11 @@ __host__ __device__ constexpr bool test()
 }
 
 #ifndef TEST_COMPILER_CUDACC_BELOW_11_3 // nvcc segfaults here
-static_assert(!cuda::std::is_invocable_v<IterSwapT, int*>); // too few arguments
-static_assert(!cuda::std::is_invocable_v<IterSwapT, int*, int*, int*>); // too many arguments
-static_assert(!cuda::std::is_invocable_v<IterSwapT, int, int*>);
-static_assert(!cuda::std::is_invocable_v<IterSwapT, int*, int>);
-static_assert(!cuda::std::is_invocable_v<IterSwapT, void*, void*>);
+static_assert(!cuda::std::is_invocable_v<IterSwapT, int*>, ""); // too few arguments
+static_assert(!cuda::std::is_invocable_v<IterSwapT, int*, int*, int*>, ""); // too many arguments
+static_assert(!cuda::std::is_invocable_v<IterSwapT, int, int*>, "");
+static_assert(!cuda::std::is_invocable_v<IterSwapT, int*, int>, "");
+static_assert(!cuda::std::is_invocable_v<IterSwapT, void*, void*>, "");
 #endif // TEST_COMPILER_CUDACC_BELOW_11_3
 
 #if TEST_STD_VER > 2017
@@ -270,16 +270,16 @@ struct Holder
 {
   T t;
 };
-static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**, Holder<Incomplete>**>);
-static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**, Holder<Incomplete>**&>);
-static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**&, Holder<Incomplete>**>);
-static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**&, Holder<Incomplete>**&>);
+static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**, Holder<Incomplete>**>, "");
+static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**, Holder<Incomplete>**&>, "");
+static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**&, Holder<Incomplete>**>, "");
+static_assert(cuda::std::is_invocable_v<IterSwapT, Holder<Incomplete>**&, Holder<Incomplete>**&>, "");
 #endif
 
 int main(int, char**)
 {
   test();
-  static_assert(test());
+  static_assert(test(), "");
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp
index b4c8cdfa69d..a8dd32f1e18 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp
@@ -49,9 +49,7 @@ struct MaybeNoexceptMove
 using ThrowingBase = MaybeNoexceptMove<false>;
 using NoexceptBase = MaybeNoexceptMove<true>;
 static_assert(cuda::std::input_iterator<ThrowingBase>);
-#ifndef TEST_COMPILER_ICC
 ASSERT_NOT_NOEXCEPT(cuda::std::ranges::iter_move(cuda::std::declval<ThrowingBase>()));
-#endif // TEST_COMPILER_ICC
 ASSERT_NOEXCEPT(cuda::std::ranges::iter_move(cuda::std::declval<NoexceptBase>()));
 
 __host__ __device__ constexpr bool test()
@@ -82,10 +80,8 @@ __host__ __device__ constexpr bool test()
 
   // Check the `noexcept` specification.
   {
-#ifndef TEST_COMPILER_ICC
     using ThrowingIter = cuda::std::move_iterator<ThrowingBase>;
     ASSERT_NOT_NOEXCEPT(iter_move(cuda::std::declval<ThrowingIter>()));
-#endif // TEST_COMPILER_ICC
     using NoexceptIter = cuda::std::move_iterator<NoexceptBase>;
     ASSERT_NOEXCEPT(iter_move(cuda::std::declval<NoexceptIter>()));
   }
diff --git a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp
index ef62110918a..7a43631d027 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp
@@ -44,13 +44,11 @@ struct MaybeNoexceptSwap
 using ThrowingBase = MaybeNoexceptSwap<false>;
 using NoexceptBase = MaybeNoexceptSwap<true>;
 static_assert(cuda::std::input_iterator<ThrowingBase>);
-#if !defined(TEST_COMPILER_ICC)
 ASSERT_NOT_NOEXCEPT(
   cuda::std::ranges::iter_swap(cuda::std::declval<ThrowingBase>(), cuda::std::declval<ThrowingBase>()));
-#  if !defined(TEST_COMPILER_MSVC_2017) // MSVC2017 gets confused by the two friends and only considers the first
+#if !defined(TEST_COMPILER_MSVC_2017) // MSVC2017 gets confused by the two friends and only considers the first
 ASSERT_NOEXCEPT(cuda::std::ranges::iter_swap(cuda::std::declval<NoexceptBase>(), cuda::std::declval<NoexceptBase>()));
-#  endif // !TEST_COMPILER_MSVC_2017
-#endif // & !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC_2017
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
@@ -84,14 +82,12 @@ __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 
   // Check the `noexcept` specification.
   {
-#if !defined(TEST_COMPILER_ICC)
     using ThrowingIter = cuda::std::move_iterator<ThrowingBase>;
     ASSERT_NOT_NOEXCEPT(iter_swap(cuda::std::declval<ThrowingIter>(), cuda::std::declval<ThrowingIter>()));
-#  if !defined(TEST_COMPILER_MSVC_2017) // MSVC2017 gets confused by the two friends and only considers the first
+#if !defined(TEST_COMPILER_MSVC_2017) // MSVC2017 gets confused by the two friends and only considers the first
     using NoexceptIter = cuda::std::move_iterator<NoexceptBase>;
     ASSERT_NOEXCEPT(iter_swap(cuda::std::declval<NoexceptIter>(), cuda::std::declval<NoexceptIter>()));
-#  endif // !TEST_COMPILER_MSVC_2017
-#endif // !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC_2017
   }
 
   return true;
diff --git a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.advance_back/difference_type.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.advance_back/difference_type.pass.cpp
index 621d7470f95..0112c5d475c 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.advance_back/difference_type.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op.advance_back/difference_type.pass.cpp
@@ -38,14 +38,12 @@ int main(int, char**)
   test(s + 5, 5, s);
 
 #if TEST_STD_VER > 2011
-#  ifndef TEST_COMPILER_ICC
   {
     constexpr const char* p = "123456789";
     constexpr auto it1      = cuda::std::make_move_iterator(p);
     constexpr auto it2      = cuda::std::make_move_iterator(p + 5) -= 5;
     static_assert(it1 == it2, "");
   }
-#  endif // TEST_COMPILER_ICC
 #endif // TEST_STD_VER > 2011
 
   return 0;
diff --git a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op_advance/difference_type.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op_advance/difference_type.pass.cpp
index 73b3780d95c..a230ea9b3b5 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op_advance/difference_type.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op_advance/difference_type.pass.cpp
@@ -43,14 +43,10 @@ int main(int, char**)
     typedef cuda::std::move_iterator<const char*> MI;
     constexpr MI it1 = cuda::std::make_move_iterator(p);
     constexpr MI it2 = cuda::std::make_move_iterator(p + 5);
-#  ifndef TEST_COMPILER_ICC
     constexpr MI it3 = cuda::std::make_move_iterator(p) += 5;
-#  endif // TEST_COMPILER_ICC
     static_assert(it1 != it2, "");
-#  ifndef TEST_COMPILER_ICC
     static_assert(it1 != it3, "");
     static_assert(it2 == it3, "");
-#  endif // TEST_COMPILER_ICC
   }
 #endif
 
diff --git a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp
index 7c03921a628..4312519bf7a 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp
@@ -83,12 +83,10 @@ __host__ __device__ constexpr bool test()
       };
       static_assert(cuda::std::bidirectional_iterator<ThrowingCopyNoexceptDecrement>);
 
-#  ifndef TEST_COMPILER_ICC
       static_assert(!cuda::std::is_nothrow_copy_constructible_v<ThrowingCopyNoexceptDecrement>);
       ASSERT_NOEXCEPT(cuda::std::ranges::iter_move(--cuda::std::declval<ThrowingCopyNoexceptDecrement&>()));
       using RI = cuda::std::reverse_iterator<ThrowingCopyNoexceptDecrement>;
       ASSERT_NOT_NOEXCEPT(iter_move(cuda::std::declval<RI>()));
-#  endif // TEST_COMPILER_ICC
     }
 
     {
@@ -121,11 +119,9 @@ __host__ __device__ constexpr bool test()
       static_assert(cuda::std::bidirectional_iterator<NoexceptCopyThrowingDecrement>);
 
       static_assert(cuda::std::is_nothrow_copy_constructible_v<NoexceptCopyThrowingDecrement>);
-#  ifndef TEST_COMPILER_ICC
       ASSERT_NOT_NOEXCEPT(cuda::std::ranges::iter_move(--cuda::std::declval<NoexceptCopyThrowingDecrement&>()));
       using RI = cuda::std::reverse_iterator<NoexceptCopyThrowingDecrement>;
       ASSERT_NOT_NOEXCEPT(iter_move(cuda::std::declval<RI>()));
-#  endif // TEST_COMPILER_ICC
     }
 
     {
diff --git a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp
index 96401f96991..d476ad66253 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp
@@ -148,9 +148,7 @@ __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
     {
       static_assert(cuda::std::bidirectional_iterator<ThrowingCopyNoexceptDecrement>);
 
-#ifndef TEST_COMPILER_ICC
       static_assert(!cuda::std::is_nothrow_copy_constructible_v<ThrowingCopyNoexceptDecrement>);
-#endif // TEST_COMPILER_ICC
       static_assert(cuda::std::is_nothrow_copy_constructible_v<int*>);
 #if TEST_STD_VER > 2017
       ASSERT_NOEXCEPT(cuda::std::ranges::iter_swap(
@@ -158,10 +156,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 #endif
       using RI1 = cuda::std::reverse_iterator<ThrowingCopyNoexceptDecrement>;
       using RI2 = cuda::std::reverse_iterator<int*>;
-#ifndef TEST_COMPILER_ICC
       ASSERT_NOT_NOEXCEPT(iter_swap(cuda::std::declval<RI1>(), cuda::std::declval<RI2>()));
       ASSERT_NOT_NOEXCEPT(iter_swap(cuda::std::declval<RI2>(), cuda::std::declval<RI1>()));
-#endif // TEST_COMPILER_ICC
     }
 
     {
@@ -175,10 +171,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 #endif
       using RI1 = cuda::std::reverse_iterator<NoexceptCopyThrowingDecrement>;
       using RI2 = cuda::std::reverse_iterator<int*>;
-#ifndef TEST_COMPILER_ICC
       ASSERT_NOT_NOEXCEPT(iter_swap(cuda::std::declval<RI1>(), cuda::std::declval<RI2>()));
       ASSERT_NOT_NOEXCEPT(iter_swap(cuda::std::declval<RI2>(), cuda::std::declval<RI1>()));
-#endif // TEST_COMPILER_ICC
     }
 
     {
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
index 0c3bcb22229..6fec93e4a3d 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
@@ -13,6 +13,7 @@
 #include <cuda/std/cassert>
 #include <cuda/std/cfloat>
 #include <cuda/std/climits>
+#include <cuda/std/cstdint>
 #include <cuda/std/limits>
 
 #include "test_macros.h"
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
index 4e7a65d40b0..67c94051729 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
@@ -13,6 +13,7 @@
 #include <cuda/std/cassert>
 #include <cuda/std/cfloat>
 #include <cuda/std/climits>
+#include <cuda/std/cstdint>
 #include <cuda/std/limits>
 
 #include "test_macros.h"
diff --git a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
index c377645f698..53d196d2a51 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
@@ -13,6 +13,7 @@
 #include <cuda/std/cassert>
 #include <cuda/std/cfloat>
 #include <cuda/std/climits>
+#include <cuda/std/cstdint>
 #include <cuda/std/limits>
 
 #include "test_macros.h"
diff --git a/libcudacxx/test/libcudacxx/std/numerics/bit/bit.cast/bit_cast.pass.cpp b/libcudacxx/test/libcudacxx/std/numerics/bit/bit.cast/bit_cast.pass.cpp
index 8aeef5fcff4..b4c5f35683a 100644
--- a/libcudacxx/test/libcudacxx/std/numerics/bit/bit.cast/bit_cast.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/numerics/bit/bit.cast/bit_cast.pass.cpp
@@ -263,7 +263,9 @@ __host__ __device__ bool tests()
 #if !defined(TEST_COMPILER_NVRTC) && !defined(TEST_COMPILER_CLANG_CUDA)
         cuda::std::nanf(""),
 #endif // !TEST_COMPILER_NVRTC && !TEST_COMPILER_CLANG_CUDA
-        __builtin_nanf("0x55550001"), // NaN with a payload
+#if defined(_CCCL_BUILTIN_NANF)
+        _CCCL_BUILTIN_NANF("0x55550001"), // NaN with a payload
+#endif // _CCCL_BUILTIN_NANF
         cuda::std::numeric_limits<float>::signaling_NaN(),
         cuda::std::numeric_limits<float>::quiet_NaN(),
         cuda::std::numeric_limits<float>::infinity()})
diff --git a/libcudacxx/test/libcudacxx/std/numerics/bit/bit.endian/endian.pass.cpp b/libcudacxx/test/libcudacxx/std/numerics/bit/bit.endian/endian.pass.cpp
index 073a54568cf..8f8d721fee7 100644
--- a/libcudacxx/test/libcudacxx/std/numerics/bit/bit.endian/endian.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/numerics/bit/bit.endian/endian.pass.cpp
@@ -15,6 +15,7 @@
 // #include <cuda/std/cstring>
 #include <cuda/std/cassert>
 #include <cuda/std/cstdint>
+#include <cuda/std/type_traits>
 
 #include "test_macros.h"
 
diff --git a/libcudacxx/test/libcudacxx/std/numerics/bit/invocable.pass.cpp b/libcudacxx/test/libcudacxx/std/numerics/bit/invocable.pass.cpp
index 369263bedc1..f2a32e5af68 100644
--- a/libcudacxx/test/libcudacxx/std/numerics/bit/invocable.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/numerics/bit/invocable.pass.cpp
@@ -11,6 +11,7 @@
 
 #include <cuda/std/bit>
 #include <cuda/std/cstdint>
+#include <cuda/std/type_traits>
 
 #if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_MSC_VER) && _MSVC_LANG >= 201703L)
 #  define CPP17_PERFORM_INVOCABLE_TEST
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/begin.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/begin.pass.cpp
index 2cfd0d1ab27..45c97e3db4b 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/begin.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/begin.pass.cpp
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: c++03, c++11
 
 // std::ranges::begin
 // std::ranges::cbegin
@@ -25,46 +24,45 @@ using RangeCBeginT = decltype(cuda::std::ranges::cbegin);
 
 STATIC_TEST_GLOBAL_VAR int globalBuff[8] = {};
 
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, int (&&)[10]>);
-static_assert(cuda::std::is_invocable_v<RangeBeginT, int (&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, int (&&)[]>);
-
 // This has been made valid as a defect report for C++17 onwards, however both clang and gcc below 11.0 does not
 // implement it
 #if (!defined(__GNUC__) || __GNUC__ >= 11)
-static_assert(cuda::std::is_invocable_v<RangeBeginT, int (&)[]>);
+static_assert(cuda::std::is_invocable_v<RangeBeginT, int (&)[]>, "");
+static_assert(cuda::std::is_invocable_v<RangeCBeginT, int (&)[]>, "");
 #endif
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, int (&&)[10]>);
-static_assert(cuda::std::is_invocable_v<RangeCBeginT, int (&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, int (&&)[]>);
-// This has been made valid as a defect report for C++17 onwards, however both clang and gcc below 11.0 does not
-// implement it
-#if (!defined(__GNUC__) || __GNUC__ >= 11)
-static_assert(cuda::std::is_invocable_v<RangeCBeginT, int (&)[]>);
+static_assert(cuda::std::is_invocable_v<RangeBeginT, int (&)[10]>, "");
+static_assert(cuda::std::is_invocable_v<RangeCBeginT, int (&)[10]>, "");
+
+#if (!defined(_MSC_VER) || _MSC_VER >= 1923)
+// old MSVC has a bug where it doesn't properly handle rvalue arrays
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, int (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, int (&&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, int (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, int (&&)[10]>, "");
 #endif
 
 struct Incomplete;
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&&)[]>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&&)[]>, "");
 
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&&)[10]>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&&)[10]>, "");
 
 // This case is IFNDR; we handle it SFINAE-friendly.
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&)[]>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&)[]>, "");
 
 // This case is IFNDR; we handle it SFINAE-friendly.
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&)[10]>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, Incomplete (&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, const Incomplete (&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Incomplete (&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, const Incomplete (&)[10]>, "");
 
 struct BeginMember
 {
@@ -76,14 +74,14 @@ struct BeginMember
 };
 
 // Ensure that we can't call with rvalues with borrowing disabled.
-static_assert(cuda::std::is_invocable_v<RangeBeginT, BeginMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMember&&>);
-static_assert(cuda::std::is_invocable_v<RangeBeginT, BeginMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMember const&&>);
-static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginMember&&>);
-static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginMember const&&>);
+static_assert(cuda::std::is_invocable_v<RangeBeginT, BeginMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMember&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeBeginT, BeginMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMember const&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginMember&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginMember const&&>, "");
 
 struct Different
 {
@@ -95,20 +93,20 @@ __host__ __device__ constexpr bool testReturnTypes()
   {
     int* x[2] = {};
     unused(x);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::begin(x)), int**>);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cbegin(x)), int* const*>);
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::begin(x)), int**>, "");
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cbegin(x)), int* const*>, "");
   }
   {
     int x[2][2] = {};
     unused(x);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::begin(x)), int(*)[2]>);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cbegin(x)), const int(*)[2]>);
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::begin(x)), int(*)[2]>, "");
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cbegin(x)), const int(*)[2]>, "");
   }
   {
     Different x{};
     unused(x);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::begin(x)), char*>);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cbegin(x)), short*>);
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::begin(x)), char*>, "");
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cbegin(x)), short*>, "");
   }
   return true;
 }
@@ -134,13 +132,13 @@ struct BeginMemberReturnsInt
 {
   __host__ __device__ int begin() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMemberReturnsInt const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMemberReturnsInt const&>, "");
 
 struct BeginMemberReturnsVoidPtr
 {
   __host__ __device__ const void* begin() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMemberReturnsVoidPtr const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMemberReturnsVoidPtr const&>, "");
 
 struct EmptyBeginMember
 {
@@ -148,7 +146,7 @@ struct EmptyBeginMember
   {};
   __host__ __device__ iterator begin() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, EmptyBeginMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, EmptyBeginMember const&>, "");
 
 struct PtrConvertibleBeginMember
 {
@@ -158,7 +156,7 @@ struct PtrConvertibleBeginMember
   };
   __host__ __device__ iterator begin() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, PtrConvertibleBeginMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, PtrConvertibleBeginMember const&>, "");
 
 struct NonConstBeginMember
 {
@@ -168,10 +166,10 @@ struct NonConstBeginMember
     return &x;
   }
 };
-static_assert(cuda::std::is_invocable_v<RangeBeginT, NonConstBeginMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, NonConstBeginMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, NonConstBeginMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, NonConstBeginMember const&>);
+static_assert(cuda::std::is_invocable_v<RangeBeginT, NonConstBeginMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, NonConstBeginMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, NonConstBeginMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, NonConstBeginMember const&>, "");
 
 struct EnabledBorrowingBeginMember
 {
@@ -180,8 +178,17 @@ struct EnabledBorrowingBeginMember
     return &globalBuff[0];
   }
 };
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<EnabledBorrowingBeginMember> = true;
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<EnabledBorrowingBeginMember> = true;
+}
+} // namespace std
+} // namespace cuda
 
 struct BeginMemberFunction
 {
@@ -209,12 +216,12 @@ __host__ __device__ constexpr bool testBeginMember()
   BeginMember a{};
   assert(cuda::std::ranges::begin(a) == &a.x);
   assert(cuda::std::ranges::cbegin(a) == &a.x);
-  static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMember&&>);
-  static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginMember&&>);
+  static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginMember&&>, "");
+  static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginMember&&>, "");
 
   NonConstBeginMember b{};
   assert(cuda::std::ranges::begin(b) == &b.x);
-  static_assert(!cuda::std::is_invocable_v<RangeCBeginT, NonConstBeginMember&>);
+  static_assert(!cuda::std::is_invocable_v<RangeCBeginT, NonConstBeginMember&>, "");
 
   EnabledBorrowingBeginMember c{};
   assert(cuda::std::ranges::begin(c) == &globalBuff[0]);
@@ -241,23 +248,23 @@ struct BeginFunction
     return &bf.x;
   }
 };
-static_assert(cuda::std::is_invocable_v<RangeBeginT, BeginFunction const&>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunction&&>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunction&>);
-static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginFunction const&>);
-static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginFunction&>);
+static_assert(cuda::std::is_invocable_v<RangeBeginT, BeginFunction const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunction&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunction&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginFunction const&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCBeginT, BeginFunction&>, "");
 
 struct BeginFunctionReturnsInt
 {
   __host__ __device__ friend int begin(BeginFunctionReturnsInt const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunctionReturnsInt const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunctionReturnsInt const&>, "");
 
 struct BeginFunctionReturnsVoidPtr
 {
   __host__ __device__ friend void* begin(BeginFunctionReturnsVoidPtr const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunctionReturnsVoidPtr const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunctionReturnsVoidPtr const&>, "");
 
 struct BeginFunctionReturnsPtrConvertible
 {
@@ -267,7 +274,7 @@ struct BeginFunctionReturnsPtrConvertible
   };
   __host__ __device__ friend iterator begin(BeginFunctionReturnsPtrConvertible const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunctionReturnsPtrConvertible const&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, BeginFunctionReturnsPtrConvertible const&>, "");
 
 struct BeginFunctionByValue
 {
@@ -276,7 +283,7 @@ struct BeginFunctionByValue
     return &globalBuff[1];
   }
 };
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginFunctionByValue>);
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, BeginFunctionByValue>, "");
 
 struct BeginFunctionEnabledBorrowing
 {
@@ -285,8 +292,18 @@ struct BeginFunctionEnabledBorrowing
     return &globalBuff[2];
   }
 };
+
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<BeginFunctionEnabledBorrowing> = true;
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<BeginFunctionEnabledBorrowing> = true;
+}
+} // namespace std
+} // namespace cuda
 
 struct BeginFunctionReturnsEmptyPtr
 {
@@ -325,7 +342,7 @@ __host__ __device__ constexpr bool testBeginFunction()
 {
   BeginFunction a{};
   const BeginFunction aa{};
-  static_assert(!cuda::std::invocable<RangeBeginT, decltype((a))>);
+  static_assert(!cuda::std::invocable<RangeBeginT, decltype((a))>, "");
   assert(cuda::std::ranges::cbegin(a) == &a.x);
   assert(cuda::std::ranges::begin(aa) == &aa.x);
   assert(cuda::std::ranges::cbegin(aa) == &aa.x);
@@ -346,21 +363,21 @@ __host__ __device__ constexpr bool testBeginFunction()
 
   BeginFunctionReturnsEmptyPtr d{};
   const BeginFunctionReturnsEmptyPtr dd{};
-  static_assert(!cuda::std::invocable<RangeBeginT, decltype((d))>);
+  static_assert(!cuda::std::invocable<RangeBeginT, decltype((d))>, "");
   assert(cuda::std::ranges::cbegin(d) == &d.x);
   assert(cuda::std::ranges::begin(dd) == &dd.x);
   assert(cuda::std::ranges::cbegin(dd) == &dd.x);
 
   BeginFunctionWithDataMember e{};
   const BeginFunctionWithDataMember ee{};
-  static_assert(!cuda::std::invocable<RangeBeginT, decltype((e))>);
+  static_assert(!cuda::std::invocable<RangeBeginT, decltype((e))>, "");
   assert(cuda::std::ranges::begin(ee) == &ee.x);
   assert(cuda::std::ranges::cbegin(e) == &e.x);
   assert(cuda::std::ranges::cbegin(ee) == &ee.x);
 
   BeginFunctionWithPrivateBeginMember f{};
   const BeginFunctionWithPrivateBeginMember ff{};
-  static_assert(!cuda::std::invocable<RangeBeginT, decltype((f))>);
+  static_assert(!cuda::std::invocable<RangeBeginT, decltype((f))>, "");
   assert(cuda::std::ranges::cbegin(f) == &f.y);
   assert(cuda::std::ranges::begin(ff) == &ff.y);
   assert(cuda::std::ranges::cbegin(ff) == &ff.y);
@@ -370,38 +387,37 @@ __host__ __device__ constexpr bool testBeginFunction()
 ASSERT_NOEXCEPT(cuda::std::ranges::begin(cuda::std::declval<int (&)[10]>()));
 ASSERT_NOEXCEPT(cuda::std::ranges::cbegin(cuda::std::declval<int (&)[10]>()));
 
-#if !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
+// needs c++17's guaranteed copy elision
+#if TEST_STD_VER > 2014 && !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
 _CCCL_GLOBAL_CONSTANT struct NoThrowMemberBegin
 {
   __host__ __device__ ThrowingIterator<int> begin() const noexcept; // auto(t.begin()) doesn't throw
 } ntmb;
-static_assert(noexcept(cuda::std::ranges::begin(ntmb)));
-static_assert(noexcept(cuda::std::ranges::cbegin(ntmb)));
+static_assert(noexcept(cuda::std::ranges::begin(ntmb)), "");
+static_assert(noexcept(cuda::std::ranges::cbegin(ntmb)), "");
 
 _CCCL_GLOBAL_CONSTANT struct NoThrowADLBegin
 {
   __host__ __device__ friend ThrowingIterator<int> begin(NoThrowADLBegin&) noexcept; // auto(begin(t)) doesn't throw
   __host__ __device__ friend ThrowingIterator<int> begin(const NoThrowADLBegin&) noexcept;
 } ntab;
-static_assert(noexcept(cuda::std::ranges::begin(ntab)));
-static_assert(noexcept(cuda::std::ranges::cbegin(ntab)));
-#endif // !TEST_COMPILER_MSVC_2019
+static_assert(noexcept(cuda::std::ranges::begin(ntab)), "");
+static_assert(noexcept(cuda::std::ranges::cbegin(ntab)), "");
+#endif // TEST_STD_VER > 2014 && !TEST_COMPILER_MSVC_2019
 
-#if !defined(TEST_COMPILER_ICC)
 _CCCL_GLOBAL_CONSTANT struct NoThrowMemberBeginReturnsRef
 {
   __host__ __device__ ThrowingIterator<int>& begin() const noexcept; // auto(t.begin()) may throw
 } ntmbrr;
-static_assert(!noexcept(cuda::std::ranges::begin(ntmbrr)));
-static_assert(!noexcept(cuda::std::ranges::cbegin(ntmbrr)));
-#endif // !TEST_COMPILER_ICC
+static_assert(!noexcept(cuda::std::ranges::begin(ntmbrr)), "");
+static_assert(!noexcept(cuda::std::ranges::cbegin(ntmbrr)), "");
 
 _CCCL_GLOBAL_CONSTANT struct BeginReturnsArrayRef
 {
   __host__ __device__ auto begin() const noexcept -> int (&)[10];
 } brar;
-static_assert(noexcept(cuda::std::ranges::begin(brar)));
-static_assert(noexcept(cuda::std::ranges::cbegin(brar)));
+static_assert(noexcept(cuda::std::ranges::begin(brar)), "");
+static_assert(noexcept(cuda::std::ranges::cbegin(brar)), "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -411,34 +427,32 @@ struct Holder
 {
   T t;
 };
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeBeginT, Holder<Incomplete>*&>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Holder<Incomplete>*&>);
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeBeginT, Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCBeginT, Holder<Incomplete>*&>, "");
 #endif // TEST_STD_VER > 2017
 
 int main(int, char**)
 {
-  static_assert(testReturnTypes());
+  static_assert(testReturnTypes(), "");
 
   testArray();
 #ifndef TEST_COMPILER_CUDACC_BELOW_11_3
-  static_assert(testArray());
+  static_assert(testArray(), "");
 #endif // TEST_COMPILER_CUDACC_BELOW_11_3
 
   testBeginMember();
-  static_assert(testBeginMember());
+  static_assert(testBeginMember(), "");
 
   testBeginFunction();
-  static_assert(testBeginFunction());
+  static_assert(testBeginFunction(), "");
 
-#if !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
+#if TEST_STD_VER > 2014 && !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
   unused(ntmb);
   unused(ntab);
-#endif // !TEST_COMPILER_MSVC_2019
-#if !defined(TEST_COMPILER_ICC)
+#endif // TEST_STD_VER > 2014 && !TEST_COMPILER_MSVC_2019
   unused(ntmbrr);
-#endif // !TEST_COMPILER_ICC
   unused(brar);
 
   return 0;
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/data.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/data.pass.cpp
index 1ee1440736c..c30f9c43653 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/data.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/data.pass.cpp
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: c++03, c++11
 
 // cuda::std::ranges::data
 
@@ -26,19 +25,23 @@ STATIC_TEST_GLOBAL_VAR int globalBuff[2] = {};
 
 struct Incomplete;
 
-static_assert(!cuda::std::is_invocable_v<RangeDataT, Incomplete[]>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, Incomplete (&&)[2]>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, Incomplete (&&)[2][2]>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, int[1]>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, int (&&)[1]>);
-static_assert(cuda::std::is_invocable_v<RangeDataT, int (&)[1]>);
+#if (!defined(_MSC_VER) || _MSC_VER >= 1923)
+// old MSVC has a bug where it doesn't properly handle rvalue arrays
+static_assert(!cuda::std::is_invocable_v<RangeDataT, int[1]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, int (&&)[1]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, int[1]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, int (&&)[1]>, "");
+#endif
 
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, Incomplete[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, Incomplete (&&)[2]>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, Incomplete (&&)[2][2]>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, int[1]>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, int (&&)[1]>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, int (&)[1]>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, Incomplete[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, Incomplete (&&)[2]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, Incomplete (&&)[2][2]>, "");
+static_assert(cuda::std::is_invocable_v<RangeDataT, int (&)[1]>, "");
+
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, Incomplete[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, Incomplete (&&)[2]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, Incomplete (&&)[2][2]>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, int (&)[1]>, "");
 
 struct DataMember
 {
@@ -48,14 +51,14 @@ struct DataMember
     return &x;
   }
 };
-static_assert(cuda::std::is_invocable_v<RangeDataT, DataMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, DataMember&&>);
-static_assert(cuda::std::is_invocable_v<RangeDataT, DataMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, DataMember const&&>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, DataMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, DataMember&&>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, DataMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, DataMember const&&>);
+static_assert(cuda::std::is_invocable_v<RangeDataT, DataMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, DataMember&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeDataT, DataMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, DataMember const&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, DataMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, DataMember&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, DataMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, DataMember const&&>, "");
 
 struct D
 {
@@ -86,25 +89,25 @@ __host__ __device__ constexpr bool testReturnTypes()
   }
   {
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::data(cuda::std::declval<D&>())), char*);
-    static_assert(!cuda::std::is_invocable_v<RangeDataT, D&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeDataT, D&&>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::data(cuda::std::declval<const D&>())), short*);
-    static_assert(!cuda::std::is_invocable_v<RangeDataT, const D&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeDataT, const D&&>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::cdata(cuda::std::declval<D&>())), short*);
-    static_assert(!cuda::std::is_invocable_v<RangeCDataT, D&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeCDataT, D&&>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::cdata(cuda::std::declval<const D&>())), short*);
-    static_assert(!cuda::std::is_invocable_v<RangeCDataT, const D&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeCDataT, const D&&>, "");
   }
   {
-    static_assert(!cuda::std::ranges::contiguous_range<NC>);
-    static_assert(cuda::std::ranges::contiguous_range<const NC>);
+    static_assert(!cuda::std::ranges::contiguous_range<NC>, "");
+    static_assert(cuda::std::ranges::contiguous_range<const NC>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::data(cuda::std::declval<NC&>())), int*);
-    static_assert(!cuda::std::is_invocable_v<RangeDataT, NC&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeDataT, NC&&>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::data(cuda::std::declval<const NC&>())), char*);
-    static_assert(!cuda::std::is_invocable_v<RangeDataT, const NC&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeDataT, const NC&&>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::cdata(cuda::std::declval<NC&>())), char*);
-    static_assert(!cuda::std::is_invocable_v<RangeCDataT, NC&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeCDataT, NC&&>, "");
     ASSERT_SAME_TYPE(decltype(cuda::std::ranges::cdata(cuda::std::declval<const NC&>())), char*);
-    static_assert(!cuda::std::is_invocable_v<RangeCDataT, const NC&&>);
+    static_assert(!cuda::std::is_invocable_v<RangeCDataT, const NC&&>, "");
   }
   return true;
 }
@@ -113,8 +116,8 @@ struct VoidDataMember
 {
   __host__ __device__ void* data() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, VoidDataMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, VoidDataMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, VoidDataMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, VoidDataMember const&>, "");
 
 struct Empty
 {};
@@ -122,8 +125,8 @@ struct EmptyDataMember
 {
   __host__ __device__ Empty data() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, EmptyDataMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, EmptyDataMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, EmptyDataMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, EmptyDataMember const&>, "");
 
 struct PtrConvertibleDataMember
 {
@@ -133,8 +136,8 @@ struct PtrConvertibleDataMember
   };
   __host__ __device__ Ptr data() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, PtrConvertibleDataMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, PtrConvertibleDataMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, PtrConvertibleDataMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, PtrConvertibleDataMember const&>, "");
 
 struct NonConstDataMember
 {
@@ -152,8 +155,17 @@ struct EnabledBorrowingDataMember
     return &globalBuff[0];
   }
 };
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<EnabledBorrowingDataMember> = true;
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<EnabledBorrowingDataMember> = true;
+}
+} // namespace std
+} // namespace cuda
 
 struct DataMemberAndBegin
 {
@@ -173,11 +185,11 @@ __host__ __device__ constexpr bool testDataMember()
 
   NonConstDataMember b{};
   assert(cuda::std::ranges::data(b) == &b.x);
-  static_assert(!cuda::std::is_invocable_v<RangeCDataT, decltype((b))>);
+  static_assert(!cuda::std::is_invocable_v<RangeCDataT, decltype((b))>, "");
 
   EnabledBorrowingDataMember c{};
   assert(cuda::std::ranges::data(cuda::std::move(c)) == &globalBuff[0]);
-  static_assert(!cuda::std::is_invocable_v<RangeCDataT, decltype(cuda::std::move(c))>);
+  static_assert(!cuda::std::is_invocable_v<RangeCDataT, decltype(cuda::std::move(c))>, "");
 
   DataMemberAndBegin d{};
   assert(cuda::std::ranges::data(d) == &d.x);
@@ -197,14 +209,14 @@ struct BeginMemberContiguousIterator
     return ContiguousIter(buff);
   }
 };
-static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&&>);
-static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&&>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&&>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&&>);
+static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&&>, "");
 
 struct BeginMemberRandomAccess
 {
@@ -212,14 +224,14 @@ struct BeginMemberRandomAccess
 
   __host__ __device__ random_access_iterator<const int*> begin() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRandomAccess&&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginMemberRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginMemberRandomAccess&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRandomAccess&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginMemberRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginMemberRandomAccess&&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRandomAccess&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginMemberRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginMemberRandomAccess&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRandomAccess&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginMemberRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginMemberRandomAccess&&>, "");
 
 struct BeginFriendContiguousIterator
 {
@@ -230,27 +242,27 @@ struct BeginFriendContiguousIterator
     return ContiguousIter(iter.buff);
   }
 };
-static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&&>);
-static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&&>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&&>);
-static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&&>);
+static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberContiguousIterator const&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberContiguousIterator const&&>, "");
 
 struct BeginFriendRandomAccess
 {
   __host__ __device__ friend random_access_iterator<const int*> begin(const BeginFriendRandomAccess iter);
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginFriendRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginFriendRandomAccess&&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginFriendRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginFriendRandomAccess&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginFriendRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginFriendRandomAccess&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginFriendRandomAccess&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginFriendRandomAccess&&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginFriendRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginFriendRandomAccess&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginFriendRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, const BeginFriendRandomAccess&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginFriendRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginFriendRandomAccess&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginFriendRandomAccess&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, const BeginFriendRandomAccess&&>, "");
 
 struct BeginMemberRvalue
 {
@@ -258,14 +270,14 @@ struct BeginMemberRvalue
 
   __host__ __device__ ContiguousIter begin() &&;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue&&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue const&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue const&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue const&&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberRvalue const&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberRvalue const&&>, "");
 
 struct BeginMemberBorrowingEnabled
 {
@@ -274,17 +286,26 @@ struct BeginMemberBorrowingEnabled
     return contiguous_iterator<const int*>{&globalBuff[1]};
   }
 };
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<BeginMemberBorrowingEnabled> = true;
-
-static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled&>);
-static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled&&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled const&>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled const&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled&&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled const&&>);
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<BeginMemberBorrowingEnabled> = true;
+}
+} // namespace std
+} // namespace cuda
+
+static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled&>, "");
+static_assert(cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, BeginMemberBorrowingEnabled const&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, BeginMemberBorrowingEnabled const&&>, "");
 
 __host__ __device__ constexpr bool testViaRangesBegin()
 {
@@ -302,7 +323,7 @@ __host__ __device__ constexpr bool testViaRangesBegin()
 
   BeginMemberBorrowingEnabled c{};
   assert(cuda::std::ranges::data(cuda::std::move(c)) == &globalBuff[1]);
-  static_assert(!cuda::std::is_invocable_v<RangeCDataT, decltype(cuda::std::move(c))>);
+  static_assert(!cuda::std::is_invocable_v<RangeCDataT, decltype(cuda::std::move(c))>, "");
 
   return true;
 }
@@ -315,10 +336,10 @@ struct Holder
 {
   T t;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, Holder<Incomplete>*&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, Holder<Incomplete>*&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, Holder<Incomplete>*&>, "");
 #endif // TEST_STD_VER >= 2020
 
 struct RandomButNotContiguous
@@ -326,20 +347,20 @@ struct RandomButNotContiguous
   __host__ __device__ random_access_iterator<int*> begin() const;
   __host__ __device__ random_access_iterator<int*> end() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeDataT, RandomButNotContiguous>);
-static_assert(!cuda::std::is_invocable_v<RangeDataT, RandomButNotContiguous&>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, RandomButNotContiguous>);
-static_assert(!cuda::std::is_invocable_v<RangeCDataT, RandomButNotContiguous&>);
+static_assert(!cuda::std::is_invocable_v<RangeDataT, RandomButNotContiguous>, "");
+static_assert(!cuda::std::is_invocable_v<RangeDataT, RandomButNotContiguous&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, RandomButNotContiguous>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCDataT, RandomButNotContiguous&>, "");
 
 int main(int, char**)
 {
-  static_assert(testReturnTypes());
+  static_assert(testReturnTypes(), "");
 
   testDataMember();
-  static_assert(testDataMember());
+  static_assert(testDataMember(), "");
 
   testViaRangesBegin();
-  static_assert(testViaRangesBegin());
+  static_assert(testViaRangesBegin(), "");
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/empty.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/empty.pass.cpp
index 09c826afc4a..20d6a0b66b7 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/empty.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/empty.pass.cpp
@@ -95,7 +95,7 @@ struct BoolConvertibleReturnType
   }
 };
 // old GCC seems to fall over the chaining of the noexcept clauses here
-#if (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9) && !defined(TEST_COMPILER_MSVC) && !defined(TEST_COMPILER_ICC)
+#if (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9) && !defined(TEST_COMPILER_MSVC)
 static_assert(!noexcept(cuda::std::ranges::empty(BoolConvertibleReturnType())));
 #endif // (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9)
 
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/end.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/end.pass.cpp
index 69f79cb6ba4..0e132c929e8 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/end.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/end.pass.cpp
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: c++03, c++11
 
 // cuda::std::ranges::end
 // cuda::std::ranges::cend
@@ -25,20 +24,24 @@ using RangeCEndT = decltype(cuda::std::ranges::cend);
 
 STATIC_TEST_GLOBAL_VAR int globalBuff[8] = {};
 
-static_assert(!cuda::std::is_invocable_v<RangeEndT, int (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, int (&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, int (&&)[10]>);
-static_assert(cuda::std::is_invocable_v<RangeEndT, int (&)[10]>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, int (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, int (&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, int (&&)[10]>);
-static_assert(cuda::std::is_invocable_v<RangeCEndT, int (&)[10]>);
+#if (!defined(_MSC_VER) || _MSC_VER >= 1923)
+// old MSVC has a bug where it doesn't properly handle rvalue arrays
+static_assert(!cuda::std::is_invocable_v<RangeEndT, int (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, int (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, int (&&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, int (&&)[10]>, "");
+#endif
+
+static_assert(!cuda::std::is_invocable_v<RangeEndT, int (&)[]>, "");
+static_assert(cuda::std::is_invocable_v<RangeEndT, int (&)[10]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, int (&)[]>, "");
+static_assert(cuda::std::is_invocable_v<RangeCEndT, int (&)[10]>, "");
 
 struct Incomplete;
-static_assert(!cuda::std::is_invocable_v<RangeEndT, Incomplete (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, Incomplete (&&)[42]>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, Incomplete (&&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, Incomplete (&&)[42]>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, Incomplete (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, Incomplete (&&)[42]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, Incomplete (&&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, Incomplete (&&)[42]>, "");
 
 struct EndMember
 {
@@ -51,14 +54,14 @@ struct EndMember
 };
 
 // Ensure that we can't call with rvalues with borrowing disabled.
-static_assert(cuda::std::is_invocable_v<RangeEndT, EndMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMember&&>);
-static_assert(cuda::std::is_invocable_v<RangeEndT, EndMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMember const&&>);
-static_assert(cuda::std::is_invocable_v<RangeCEndT, EndMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, EndMember&&>);
-static_assert(cuda::std::is_invocable_v<RangeCEndT, EndMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, EndMember const&&>);
+static_assert(cuda::std::is_invocable_v<RangeEndT, EndMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMember&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeEndT, EndMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMember const&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCEndT, EndMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, EndMember&&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCEndT, EndMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, EndMember const&&>, "");
 
 struct Different
 {
@@ -72,20 +75,20 @@ __host__ __device__ constexpr bool testReturnTypes()
   {
     int* x[2] = {};
     unused(x);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::end(x)), int**>);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cend(x)), int* const*>);
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::end(x)), int**>, "");
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cend(x)), int* const*>, "");
   }
   {
     int x[2][2] = {};
     unused(x);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::end(x)), int(*)[2]>);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cend(x)), const int(*)[2]>);
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::end(x)), int(*)[2]>, "");
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cend(x)), const int(*)[2]>, "");
   }
   {
     Different x{};
     unused(x);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::end(x)), sentinel_wrapper<char*>>);
-    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cend(x)), sentinel_wrapper<short*>>);
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::end(x)), sentinel_wrapper<char*>>, "");
+    static_assert(cuda::std::same_as<decltype(cuda::std::ranges::cend(x)), sentinel_wrapper<short*>>, "");
   }
   return true;
 }
@@ -112,14 +115,14 @@ struct EndMemberReturnsInt
   __host__ __device__ int begin() const;
   __host__ __device__ int end() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMemberReturnsInt const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMemberReturnsInt const&>, "");
 
 struct EndMemberReturnsVoidPtr
 {
   __host__ __device__ const void* begin() const;
   __host__ __device__ const void* end() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMemberReturnsVoidPtr const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndMemberReturnsVoidPtr const&>, "");
 
 struct PtrConvertible
 {
@@ -130,13 +133,13 @@ struct PtrConvertibleEndMember
   __host__ __device__ PtrConvertible begin() const;
   __host__ __device__ PtrConvertible end() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, PtrConvertibleEndMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, PtrConvertibleEndMember const&>, "");
 
 struct NoBeginMember
 {
   __host__ __device__ constexpr const int* end();
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, NoBeginMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, NoBeginMember const&>, "");
 
 struct NonConstEndMember
 {
@@ -150,10 +153,10 @@ struct NonConstEndMember
     return &x;
   }
 };
-static_assert(cuda::std::is_invocable_v<RangeEndT, NonConstEndMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, NonConstEndMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, NonConstEndMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, NonConstEndMember const&>);
+static_assert(cuda::std::is_invocable_v<RangeEndT, NonConstEndMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, NonConstEndMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, NonConstEndMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, NonConstEndMember const&>, "");
 
 struct EnabledBorrowingEndMember
 {
@@ -166,9 +169,17 @@ struct EnabledBorrowingEndMember
     return &globalBuff[0];
   }
 };
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<EnabledBorrowingEndMember> = true;
-
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<EnabledBorrowingEndMember> = true;
+}
+} // namespace std
+} // namespace cuda
 struct EndMemberFunction
 {
   int x;
@@ -190,7 +201,7 @@ struct EmptyEndMember
   __host__ __device__ Empty begin() const;
   __host__ __device__ Empty end() const;
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EmptyEndMember const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EmptyEndMember const&>, "");
 
 struct EmptyPtrEndMember
 {
@@ -213,7 +224,7 @@ __host__ __device__ constexpr bool testEndMember()
 
   NonConstEndMember b{};
   assert(cuda::std::ranges::end(b) == &b.x);
-  static_assert(!cuda::std::is_invocable_v<RangeCEndT, decltype((b))>);
+  static_assert(!cuda::std::is_invocable_v<RangeCEndT, decltype((b))>, "");
 
   EnabledBorrowingEndMember c{};
   assert(cuda::std::ranges::end(cuda::std::move(c)) == &globalBuff[0]);
@@ -243,48 +254,48 @@ struct EndFunction
   }
 };
 
-static_assert(cuda::std::is_invocable_v<RangeEndT, EndFunction const&>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunction&&>);
+static_assert(cuda::std::is_invocable_v<RangeEndT, EndFunction const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunction&&>, "");
 
-static_assert(cuda::std::is_invocable_v<RangeEndT, EndFunction const&>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunction&&>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunction&>);
-static_assert(cuda::std::is_invocable_v<RangeCEndT, EndFunction const&>);
-static_assert(cuda::std::is_invocable_v<RangeCEndT, EndFunction&>);
+static_assert(cuda::std::is_invocable_v<RangeEndT, EndFunction const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunction&&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunction&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCEndT, EndFunction const&>, "");
+static_assert(cuda::std::is_invocable_v<RangeCEndT, EndFunction&>, "");
 
 struct EndFunctionReturnsInt
 {
   __host__ __device__ friend constexpr int begin(EndFunctionReturnsInt const&);
   __host__ __device__ friend constexpr int end(EndFunctionReturnsInt const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsInt const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsInt const&>, "");
 
 struct EndFunctionReturnsVoidPtr
 {
   __host__ __device__ friend constexpr void* begin(EndFunctionReturnsVoidPtr const&);
   __host__ __device__ friend constexpr void* end(EndFunctionReturnsVoidPtr const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsVoidPtr const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsVoidPtr const&>, "");
 
 struct EndFunctionReturnsEmpty
 {
   __host__ __device__ friend constexpr Empty begin(EndFunctionReturnsEmpty const&);
   __host__ __device__ friend constexpr Empty end(EndFunctionReturnsEmpty const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsEmpty const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsEmpty const&>, "");
 
 struct EndFunctionReturnsPtrConvertible
 {
   __host__ __device__ friend constexpr PtrConvertible begin(EndFunctionReturnsPtrConvertible const&);
   __host__ __device__ friend constexpr PtrConvertible end(EndFunctionReturnsPtrConvertible const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsPtrConvertible const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, EndFunctionReturnsPtrConvertible const&>, "");
 
 struct NoBeginFunction
 {
   __host__ __device__ friend constexpr const int* end(NoBeginFunction const&);
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, NoBeginFunction const&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, NoBeginFunction const&>, "");
 
 struct EndFunctionByValue
 {
@@ -297,7 +308,7 @@ struct EndFunctionByValue
     return &globalBuff[1];
   }
 };
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, EndFunctionByValue>);
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, EndFunctionByValue>, "");
 
 struct EndFunctionEnabledBorrowing
 {
@@ -310,8 +321,17 @@ struct EndFunctionEnabledBorrowing
     return &globalBuff[2];
   }
 };
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<EndFunctionEnabledBorrowing> = true;
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<EndFunctionEnabledBorrowing> = true;
+}
+} // namespace std
+} // namespace cuda
 
 struct EndFunctionReturnsEmptyPtr
 {
@@ -375,7 +395,7 @@ __host__ __device__ constexpr bool testEndFunction()
   assert(cuda::std::ranges::end(a) == &a.x);
   assert(cuda::std::ranges::cend(a) == &a.x);
   EndFunction aa{};
-  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((aa))>);
+  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((aa))>, "");
   assert(cuda::std::ranges::cend(aa) == &aa.x);
 
   EndFunctionByValue b{};
@@ -390,28 +410,28 @@ __host__ __device__ constexpr bool testEndFunction()
   assert(cuda::std::ranges::end(d) == &d.x);
   assert(cuda::std::ranges::cend(d) == &d.x);
   EndFunctionReturnsEmptyPtr dd{};
-  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((dd))>);
+  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((dd))>, "");
   assert(cuda::std::ranges::cend(dd) == &dd.x);
 
   const EndFunctionWithDataMember e{};
   assert(cuda::std::ranges::end(e) == &e.x);
   assert(cuda::std::ranges::cend(e) == &e.x);
   EndFunctionWithDataMember ee{};
-  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((ee))>);
+  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((ee))>, "");
   assert(cuda::std::ranges::cend(ee) == &ee.x);
 
   const EndFunctionWithPrivateEndMember f{};
   assert(cuda::std::ranges::end(f) == &f.y);
   assert(cuda::std::ranges::cend(f) == &f.y);
   EndFunctionWithPrivateEndMember ff{};
-  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((ff))>);
+  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((ff))>, "");
   assert(cuda::std::ranges::cend(ff) == &ff.y);
 
   const BeginMemberEndFunction g{};
   assert(cuda::std::ranges::end(g) == &g.x);
   assert(cuda::std::ranges::cend(g) == &g.x);
   BeginMemberEndFunction gg{};
-  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((gg))>);
+  static_assert(!cuda::std::is_invocable_v<RangeEndT, decltype((gg))>, "");
   assert(cuda::std::ranges::cend(gg) == &gg.x);
 
   return true;
@@ -419,14 +439,15 @@ __host__ __device__ constexpr bool testEndFunction()
 ASSERT_NOEXCEPT(cuda::std::ranges::end(cuda::std::declval<int (&)[10]>()));
 ASSERT_NOEXCEPT(cuda::std::ranges::cend(cuda::std::declval<int (&)[10]>()));
 
-#if !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
+// needs c++17's guaranteed copy elision
+#if TEST_STD_VER > 2014 && !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
 _CCCL_GLOBAL_CONSTANT struct NoThrowMemberEnd
 {
   __host__ __device__ ThrowingIterator<int> begin() const;
   __host__ __device__ ThrowingIterator<int> end() const noexcept; // auto(t.end()) doesn't throw
 } ntme;
-static_assert(noexcept(cuda::std::ranges::end(ntme)));
-static_assert(noexcept(cuda::std::ranges::cend(ntme)));
+static_assert(noexcept(cuda::std::ranges::end(ntme)), "");
+static_assert(noexcept(cuda::std::ranges::cend(ntme)), "");
 
 _CCCL_GLOBAL_CONSTANT struct NoThrowADLEnd
 {
@@ -434,27 +455,25 @@ _CCCL_GLOBAL_CONSTANT struct NoThrowADLEnd
   __host__ __device__ friend ThrowingIterator<int> end(NoThrowADLEnd&) noexcept; // auto(end(t)) doesn't throw
   __host__ __device__ friend ThrowingIterator<int> end(const NoThrowADLEnd&) noexcept;
 } ntae;
-static_assert(noexcept(cuda::std::ranges::end(ntae)));
-static_assert(noexcept(cuda::std::ranges::cend(ntae)));
-#endif // !TEST_COMPILER_MSVC_2019
+static_assert(noexcept(cuda::std::ranges::end(ntae)), "");
+static_assert(noexcept(cuda::std::ranges::cend(ntae)), "");
+#endif // TEST_STD_VER > 2014 && !TEST_COMPILER_MSVC_2019
 
-#if !defined(TEST_COMPILER_ICC)
 _CCCL_GLOBAL_CONSTANT struct NoThrowMemberEndReturnsRef
 {
   __host__ __device__ ThrowingIterator<int> begin() const;
   __host__ __device__ ThrowingIterator<int>& end() const noexcept; // auto(t.end()) may throw
 } ntmerr;
-static_assert(!noexcept(cuda::std::ranges::end(ntmerr)));
-static_assert(!noexcept(cuda::std::ranges::cend(ntmerr)));
-#endif // !TEST_COMPILER_ICC
+static_assert(!noexcept(cuda::std::ranges::end(ntmerr)), "");
+static_assert(!noexcept(cuda::std::ranges::cend(ntmerr)), "");
 
 _CCCL_GLOBAL_CONSTANT struct EndReturnsArrayRef
 {
   __host__ __device__ auto begin() const noexcept -> int (&)[10];
   __host__ __device__ auto end() const noexcept -> int (&)[10];
 } erar;
-static_assert(noexcept(cuda::std::ranges::end(erar)));
-static_assert(noexcept(cuda::std::ranges::cend(erar)));
+static_assert(noexcept(cuda::std::ranges::end(erar)), "");
+static_assert(noexcept(cuda::std::ranges::cend(erar)), "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -464,34 +483,32 @@ struct Holder
 {
   T t;
 };
-static_assert(!cuda::std::is_invocable_v<RangeEndT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeEndT, Holder<Incomplete>*&>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeCEndT, Holder<Incomplete>*&>);
+static_assert(!cuda::std::is_invocable_v<RangeEndT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeEndT, Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeCEndT, Holder<Incomplete>*&>, "");
 #endif // TEST_STD_VER > 2017
 
 int main(int, char**)
 {
-  static_assert(testReturnTypes());
+  static_assert(testReturnTypes(), "");
 
   testArray();
 #ifndef TEST_COMPILER_CUDACC_BELOW_11_3
-  static_assert(testArray());
+  static_assert(testArray(), "");
 #endif // TEST_COMPILER_CUDACC_BELOW_11_3
 
   testEndMember();
-  static_assert(testEndMember());
+  static_assert(testEndMember(), "");
 
   testEndFunction();
-  static_assert(testEndFunction());
+  static_assert(testEndFunction(), "");
 
-#if !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
+#if TEST_STD_VER > 2014 && !defined(TEST_COMPILER_MSVC_2019) // broken noexcept
   unused(ntme);
   unused(ntae);
-#endif // !TEST_COMPILER_MSVC_2019
-#if !defined(TEST_COMPILER_ICC)
+#endif // TEST_STD_VER > 2014 && !TEST_COMPILER_MSVC_2019
   unused(ntmerr);
-#endif // !TEST_COMPILER_ICC
   unused(erar);
 
   return 0;
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/rbegin.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/rbegin.pass.cpp
index 373ac14a954..c36c374fde6 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/rbegin.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/rbegin.pass.cpp
@@ -587,14 +587,12 @@ static_assert(noexcept(cuda::std::ranges::rbegin(ntab)));
 static_assert(noexcept(cuda::std::ranges::crbegin(ntab)));
 #endif // !TEST_COMPILER_MSVC_2019
 
-#if !defined(TEST_COMPILER_ICC)
 _CCCL_GLOBAL_CONSTANT struct NoThrowMemberRBeginReturnsRef
 {
   __host__ __device__ ThrowingIterator<int>& rbegin() const noexcept; // auto(t.rbegin()) may throw
 } ntmbrr;
 static_assert(!noexcept(cuda::std::ranges::rbegin(ntmbrr)));
 static_assert(!noexcept(cuda::std::ranges::crbegin(ntmbrr)));
-#endif // !TEST_COMPILER_ICC
 
 _CCCL_GLOBAL_CONSTANT struct RBeginReturnsArrayRef
 {
@@ -603,7 +601,6 @@ _CCCL_GLOBAL_CONSTANT struct RBeginReturnsArrayRef
 static_assert(noexcept(cuda::std::ranges::rbegin(brar)));
 static_assert(noexcept(cuda::std::ranges::crbegin(brar)));
 
-#if !defined(TEST_COMPILER_ICC)
 _CCCL_GLOBAL_CONSTANT struct NoThrowBeginThrowingEnd
 {
   __host__ __device__ int* begin() const noexcept;
@@ -611,7 +608,6 @@ _CCCL_GLOBAL_CONSTANT struct NoThrowBeginThrowingEnd
 } ntbte;
 static_assert(!noexcept(cuda::std::ranges::rbegin(ntbte)));
 static_assert(!noexcept(cuda::std::ranges::crbegin(ntbte)));
-#endif // !TEST_COMPILER_ICC
 
 _CCCL_GLOBAL_CONSTANT struct NoThrowEndThrowingBegin
 {
@@ -657,13 +653,9 @@ int main(int, char**)
   unused(ntmb);
   unused(ntab);
 #endif // !TEST_COMPILER_MSVC_2019
-#if !defined(TEST_COMPILER_ICC)
   unused(ntmbrr);
-#endif // !TEST_COMPILER_ICC
   unused(brar);
-#if !defined(TEST_COMPILER_ICC)
   unused(ntbte);
-#endif // !TEST_COMPILER_ICC
   unused(ntetb);
 
   return 0;
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/rend.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/rend.pass.cpp
index 239b9d53873..3deea8bbfca 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/rend.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/rend.pass.cpp
@@ -647,7 +647,6 @@ static_assert(noexcept(cuda::std::ranges::rend(ntare)));
 static_assert(noexcept(cuda::std::ranges::crend(ntare)));
 #endif // !TEST_COMPILER_MSVC_2019
 
-#if !defined(TEST_COMPILER_ICC)
 _CCCL_GLOBAL_CONSTANT struct NoThrowMemberREndReturnsRef
 {
   __host__ __device__ ThrowingIterator<int> rbegin() const;
@@ -655,7 +654,6 @@ _CCCL_GLOBAL_CONSTANT struct NoThrowMemberREndReturnsRef
 } ntmrerr;
 static_assert(!noexcept(cuda::std::ranges::rend(ntmrerr)));
 static_assert(!noexcept(cuda::std::ranges::crend(ntmrerr)));
-#endif // !TEST_COMPILER_ICC
 
 _CCCL_GLOBAL_CONSTANT struct REndReturnsArrayRef
 {
@@ -673,7 +671,6 @@ _CCCL_GLOBAL_CONSTANT struct NoThrowBeginThrowingEnd
 static_assert(noexcept(cuda::std::ranges::rend(ntbte)));
 static_assert(noexcept(cuda::std::ranges::crend(ntbte)));
 
-#if !defined(TEST_COMPILER_ICC)
 _CCCL_GLOBAL_CONSTANT struct NoThrowEndThrowingBegin
 {
   __host__ __device__ int* begin() const;
@@ -681,7 +678,6 @@ _CCCL_GLOBAL_CONSTANT struct NoThrowEndThrowingBegin
 } ntetb;
 static_assert(!noexcept(cuda::std::ranges::rend(ntetb)));
 static_assert(!noexcept(cuda::std::ranges::crend(ntetb)));
-#endif // !TEST_COMPILER_ICC
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -719,14 +715,10 @@ int main(int, char**)
   unused(ntmre);
   unused(ntare);
 #endif // !TEST_COMPILER_MSVC_2019
-#if !defined(TEST_COMPILER_ICC)
   unused(ntmrerr);
-#endif // !TEST_COMPILER_ICC
   unused(rerar);
   unused(ntbte);
-#if !defined(TEST_COMPILER_ICC)
   unused(ntetb);
-#endif // !TEST_COMPILER_ICC
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.access/size.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.access/size.pass.cpp
index b554505b2c2..f8573137bec 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.access/size.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.access/size.pass.cpp
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: c++03, c++11
 
 // cuda::std::ranges::size
 
@@ -20,24 +19,24 @@
 
 using RangeSizeT = decltype(cuda::std::ranges::size);
 
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, int[]>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, int[1]>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, int (&&)[1]>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, int (&)[1]>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, int[]>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, int[1]>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, int (&&)[1]>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, int (&)[1]>, "");
 
 struct Incomplete;
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, Incomplete[]>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, Incomplete (&)[]>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, Incomplete (&&)[]>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, Incomplete[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, Incomplete (&)[]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, Incomplete (&&)[]>, "");
 
 #ifndef TEST_COMPILER_NVRTC
 extern Incomplete array_of_incomplete[42];
-static_assert(cuda::std::ranges::size(array_of_incomplete) == 42);
-static_assert(cuda::std::ranges::size(cuda::std::move(array_of_incomplete)) == 42);
+static_assert(cuda::std::ranges::size(array_of_incomplete) == 42, "");
+static_assert(cuda::std::ranges::size(cuda::std::move(array_of_incomplete)) == 42, "");
 
 extern const Incomplete const_array_of_incomplete[42];
-static_assert(cuda::std::ranges::size(const_array_of_incomplete) == 42);
-static_assert(cuda::std::ranges::size(static_cast<const Incomplete (&&)[42]>(array_of_incomplete)) == 42);
+static_assert(cuda::std::ranges::size(const_array_of_incomplete) == 42, "");
+static_assert(cuda::std::ranges::size(static_cast<const Incomplete (&&)[42]>(array_of_incomplete)) == 42, "");
 #endif // !TEST_COMPILER_NVRTC
 
 struct SizeMember
@@ -56,7 +55,7 @@ struct StaticSizeMember
   }
 };
 
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, const SizeMember>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, const SizeMember>, "");
 
 struct SizeFunction
 {
@@ -192,7 +191,7 @@ __host__ __device__ bool constexpr testHasSizeFunction()
 {
   assert(cuda::std::ranges::size(SizeFunction()) == 42);
   ASSERT_SAME_TYPE(decltype(cuda::std::ranges::size(SizeFunction())), size_t);
-  static_assert(!cuda::std::is_invocable_v<RangeSizeT, MoveOnlySizeFunction>);
+  static_assert(!cuda::std::is_invocable_v<RangeSizeT, MoveOnlySizeFunction>, "");
   assert(cuda::std::ranges::size(EnumSizeFunction()) == 42);
   assert(cuda::std::ranges::size(SizeFunctionConst()) == 42);
 
@@ -210,7 +209,7 @@ __host__ __device__ bool constexpr testHasSizeFunction()
 
 struct Empty
 {};
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, Empty>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, Empty>, "");
 
 struct InvalidReturnTypeMember
 {
@@ -247,14 +246,14 @@ struct BoolReturnTypeFunction
   __host__ __device__ friend bool size(BoolReturnTypeFunction const&);
 };
 
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeMember>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeFunction>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeMember (&)[4]>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeFunction (&)[4]>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, ConvertibleReturnTypeMember>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, ConvertibleReturnTypeFunction>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, BoolReturnTypeMember const&>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, BoolReturnTypeFunction const&>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeMember>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeFunction>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeMember (&)[4]>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, InvalidReturnTypeFunction (&)[4]>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, ConvertibleReturnTypeMember>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, ConvertibleReturnTypeFunction>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, BoolReturnTypeMember const&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, BoolReturnTypeFunction const&>, "");
 
 struct SizeMemberDisabled
 {
@@ -275,6 +274,7 @@ _CCCL_INLINE_VAR constexpr bool disable_sized_range<SizeMemberDisabled> = true;
 }
 } // namespace std
 } // namespace cuda
+
 struct ImproperlyDisabledMember
 {
   __host__ __device__ size_t size() const
@@ -337,10 +337,10 @@ _CCCL_INLINE_VAR constexpr bool disable_sized_range<const ImproperlyDisabledFunc
 } // namespace std
 } // namespace cuda
 
-static_assert(cuda::std::is_invocable_v<RangeSizeT, ImproperlyDisabledMember&>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, const ImproperlyDisabledMember&>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, ImproperlyDisabledFunction&>);
-static_assert(cuda::std::is_invocable_v<RangeSizeT, const ImproperlyDisabledFunction&>);
+static_assert(cuda::std::is_invocable_v<RangeSizeT, ImproperlyDisabledMember&>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, const ImproperlyDisabledMember&>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, ImproperlyDisabledFunction&>, "");
+static_assert(cuda::std::is_invocable_v<RangeSizeT, const ImproperlyDisabledFunction&>, "");
 
 // No begin end.
 struct HasMinusOperator
@@ -350,7 +350,7 @@ struct HasMinusOperator
     return 2;
   }
 };
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, HasMinusOperator>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, HasMinusOperator>, "");
 
 struct HasMinusBeginEnd
 {
@@ -416,8 +416,8 @@ struct InvalidMinusBeginEnd
 };
 
 // short is integer-like, but it is not other_forward_iterator's difference_type.
-static_assert(!cuda::std::same_as<other_forward_iterator::difference_type, short>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, InvalidMinusBeginEnd>);
+static_assert(!cuda::std::same_as<other_forward_iterator::difference_type, short>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, InvalidMinusBeginEnd>, "");
 
 struct RandomAccessRange
 {
@@ -481,8 +481,17 @@ struct DisabledSizeRangeWithBeginEnd
   }
 };
 
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::disable_sized_range<DisabledSizeRangeWithBeginEnd> = true;
+_CCCL_INLINE_VAR constexpr bool disable_sized_range<DisabledSizeRangeWithBeginEnd> = true;
+}
+} // namespace std
+} // namespace cuda
 
 struct SizeBeginAndEndMembers
 {
@@ -532,23 +541,23 @@ struct Holder
 {
   T t;
 };
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, Holder<Incomplete>*>);
-static_assert(!cuda::std::is_invocable_v<RangeSizeT, Holder<Incomplete>*&>);
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::is_invocable_v<RangeSizeT, Holder<Incomplete>*&>, "");
 #endif // TEST_STD_VER > 2017
 
 int main(int, char**)
 {
   testArrayType();
-  static_assert(testArrayType());
+  static_assert(testArrayType(), "");
 
   testHasSizeMember();
-  static_assert(testHasSizeMember());
+  static_assert(testHasSizeMember(), "");
 
   testHasSizeFunction();
-  static_assert(testHasSizeFunction());
+  static_assert(testHasSizeFunction(), "");
 
   testRanges();
-  static_assert(testRanges());
+  static_assert(testRanges(), "");
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/borrowing.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/borrowing.compile.pass.cpp
new file mode 100644
index 00000000000..14a44c21886
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/borrowing.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: msvc-19.16
+
+// template<class T>
+//   inline constexpr bool enable_borrowed_range<empty_view<T>> = true;
+
+#include <cuda/std/ranges>
+
+#include "test_range.h"
+
+static_assert(cuda::std::ranges::borrowed_range<cuda::std::ranges::empty_view<int>>);
+static_assert(cuda::std::ranges::borrowed_range<cuda::std::ranges::empty_view<int*>>);
+static_assert(cuda::std::ranges::borrowed_range<cuda::std::ranges::empty_view<BorrowedView>>);
+#if _LIBCUDACXX_HAS_RANGES
+static_assert(cuda::std::ranges::borrowed_range<cuda::std::ranges::empty_view<NonBorrowedView>>);
+#endif
+
+int main(int, char**)
+{
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/empty_view.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/empty_view.pass.cpp
new file mode 100644
index 00000000000..68c8149e15f
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/empty_view.pass.cpp
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: msvc-19.16
+
+// template<class T>
+// class empty_view;
+
+#include <cuda/std/cassert>
+#include <cuda/std/ranges>
+
+#include "test_macros.h"
+
+template <class T>
+__host__ __device__ constexpr void testType()
+{
+  static_assert(cuda::std::ranges::range<cuda::std::ranges::empty_view<T>>);
+  static_assert(cuda::std::ranges::range<const cuda::std::ranges::empty_view<T>>);
+  static_assert(cuda::std::ranges::view<cuda::std::ranges::empty_view<T>>);
+
+  cuda::std::ranges::empty_view<T> empty{};
+
+  assert(empty.begin() == nullptr);
+  assert(empty.end() == nullptr);
+  assert(empty.data() == nullptr);
+  assert(empty.size() == 0);
+  assert(empty.empty() == true);
+
+  assert(cuda::std::ranges::begin(empty) == nullptr);
+  assert(cuda::std::ranges::end(empty) == nullptr);
+  assert(cuda::std::ranges::data(empty) == nullptr);
+  assert(cuda::std::ranges::size(empty) == 0);
+  assert(cuda::std::ranges::empty(empty) == true);
+}
+
+struct Empty
+{};
+struct BigType
+{
+  char buff[8];
+};
+
+#if TEST_STD_VER >= 2020
+template <class T>
+concept ValidEmptyView = requires { typename cuda::std::ranges::empty_view<T>; };
+#else // ^^^ C++20 ^^^ / vvv C++17 vvv
+template <class T, class = void>
+constexpr bool ValidEmptyView = false;
+
+template <class T>
+constexpr bool ValidEmptyView<T, cuda::std::void_t<cuda::std::ranges::empty_view<T>>> = true;
+#endif // TEST_STD_VER <= 2017
+
+__host__ __device__ constexpr bool test()
+{
+  // Not objects:
+  static_assert(!ValidEmptyView<int&>);
+  static_assert(!ValidEmptyView<void>);
+
+  testType<int>();
+  testType<const int>();
+  testType<int*>();
+  testType<Empty>();
+  testType<const Empty>();
+  testType<BigType>();
+
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/views.empty.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/views.empty.pass.cpp
new file mode 100644
index 00000000000..68a5e132a54
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.adaptors/range.empty/views.empty.pass.cpp
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: msvc-19.16
+
+// template <class _Tp>
+// inline constexpr empty_view<_Tp> empty{};
+
+#include <cuda/std/cassert>
+#include <cuda/std/ranges>
+
+#include "test_macros.h"
+
+template <class T>
+__host__ __device__ constexpr void testType()
+{
+  ASSERT_SAME_TYPE(decltype(cuda::std::views::empty<T>), const cuda::std::ranges::empty_view<T>);
+  ASSERT_SAME_TYPE(decltype((cuda::std::views::empty<T>) ), const cuda::std::ranges::empty_view<T>&);
+
+  auto v = cuda::std::views::empty<T>;
+  assert(cuda::std::ranges::empty(v));
+}
+
+struct Empty
+{};
+struct BigType
+{
+  char buff[8];
+};
+
+__host__ __device__ constexpr bool test()
+{
+  testType<int>();
+  testType<const int>();
+  testType<int*>();
+  testType<Empty>();
+  testType<const Empty>();
+  testType<BigType>();
+
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.compile.pass.cpp
index 9def7c97daf..660ef5587de 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class T>
@@ -51,29 +51,29 @@ _CCCL_INLINE_VAR constexpr bool enable_borrowed_range<BorrowedRange> = true;
 } // namespace std
 } // namespace cuda
 
-static_assert(!cuda::std::ranges::borrowed_range<NotRange>);
-static_assert(!cuda::std::ranges::borrowed_range<NotRange&>);
-static_assert(!cuda::std::ranges::borrowed_range<const NotRange>);
-static_assert(!cuda::std::ranges::borrowed_range<const NotRange&>);
-static_assert(!cuda::std::ranges::borrowed_range<NotRange&&>);
+static_assert(!cuda::std::ranges::borrowed_range<NotRange>, "");
+static_assert(!cuda::std::ranges::borrowed_range<NotRange&>, "");
+static_assert(!cuda::std::ranges::borrowed_range<const NotRange>, "");
+static_assert(!cuda::std::ranges::borrowed_range<const NotRange&>, "");
+static_assert(!cuda::std::ranges::borrowed_range<NotRange&&>, "");
 
-static_assert(!cuda::std::ranges::borrowed_range<Range>);
-static_assert(cuda::std::ranges::borrowed_range<Range&>);
-static_assert(!cuda::std::ranges::borrowed_range<const Range>);
-static_assert(!cuda::std::ranges::borrowed_range<const Range&>);
-static_assert(!cuda::std::ranges::borrowed_range<Range&&>);
+static_assert(!cuda::std::ranges::borrowed_range<Range>, "");
+static_assert(cuda::std::ranges::borrowed_range<Range&>, "");
+static_assert(!cuda::std::ranges::borrowed_range<const Range>, "");
+static_assert(!cuda::std::ranges::borrowed_range<const Range&>, "");
+static_assert(!cuda::std::ranges::borrowed_range<Range&&>, "");
 
-static_assert(!cuda::std::ranges::borrowed_range<ConstRange>);
-static_assert(cuda::std::ranges::borrowed_range<ConstRange&>);
-static_assert(!cuda::std::ranges::borrowed_range<const ConstRange>);
-static_assert(cuda::std::ranges::borrowed_range<const ConstRange&>);
-static_assert(!cuda::std::ranges::borrowed_range<ConstRange&&>);
+static_assert(!cuda::std::ranges::borrowed_range<ConstRange>, "");
+static_assert(cuda::std::ranges::borrowed_range<ConstRange&>, "");
+static_assert(!cuda::std::ranges::borrowed_range<const ConstRange>, "");
+static_assert(cuda::std::ranges::borrowed_range<const ConstRange&>, "");
+static_assert(!cuda::std::ranges::borrowed_range<ConstRange&&>, "");
 
-static_assert(cuda::std::ranges::borrowed_range<BorrowedRange>);
-static_assert(cuda::std::ranges::borrowed_range<BorrowedRange&>);
-static_assert(cuda::std::ranges::borrowed_range<const BorrowedRange>);
-static_assert(cuda::std::ranges::borrowed_range<const BorrowedRange&>);
-static_assert(cuda::std::ranges::borrowed_range<BorrowedRange&&>);
+static_assert(cuda::std::ranges::borrowed_range<BorrowedRange>, "");
+static_assert(cuda::std::ranges::borrowed_range<BorrowedRange&>, "");
+static_assert(cuda::std::ranges::borrowed_range<const BorrowedRange>, "");
+static_assert(cuda::std::ranges::borrowed_range<const BorrowedRange&>, "");
+static_assert(cuda::std::ranges::borrowed_range<BorrowedRange&&>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.subsumption.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.subsumption.compile.pass.cpp
index 3d6a9e0cfa0..7238709adec 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.subsumption.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/borrowed_range.subsumption.compile.pass.cpp
@@ -27,7 +27,7 @@ __host__ __device__ consteval bool check_subsumption()
   return true;
 }
 
-static_assert(check_subsumption<int (&)[8]>());
+static_assert(check_subsumption<int (&)[8]>(), "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/enable_borrowed_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/enable_borrowed_range.compile.pass.cpp
index 6a4fa9656bb..3f840aad2a5 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/enable_borrowed_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/enable_borrowed_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // <ranges>
@@ -29,16 +29,16 @@ struct S
 __host__ __device__ void test()
 {
   using cuda::std::ranges::enable_borrowed_range;
-  static_assert(!enable_borrowed_range<char>);
-  static_assert(!enable_borrowed_range<int>);
-  static_assert(!enable_borrowed_range<double>);
-  static_assert(!enable_borrowed_range<S>);
+  static_assert(!enable_borrowed_range<char>, "");
+  static_assert(!enable_borrowed_range<int>, "");
+  static_assert(!enable_borrowed_range<double>, "");
+  static_assert(!enable_borrowed_range<S>, "");
 
   // Sequence containers
-  static_assert(!enable_borrowed_range<cuda::std::array<int, 0>>);
-  static_assert(!enable_borrowed_range<cuda::std::array<int, 42>>);
+  static_assert(!enable_borrowed_range<cuda::std::array<int, 0>>, "");
+  static_assert(!enable_borrowed_range<cuda::std::array<int, 42>>, "");
 #if defined(_LIBCUDACXX_HAS_VECTOR)
-  static_assert(!enable_borrowed_range<cuda::std::vector<int>>);
+  static_assert(!enable_borrowed_range<cuda::std::vector<int>>, "");
 #endif
 
   // Both cuda::std::span and cuda::std::string_view have their own test.
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/helper_aliases.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/helper_aliases.compile.pass.cpp
index 7e81ada296a..5f0773a0db9 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/helper_aliases.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/helper_aliases.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<range R>
@@ -29,15 +29,20 @@
 #include "test_range.h"
 
 static_assert(cuda::std::same_as<cuda::std::ranges::range_difference_t<test_range<cpp20_input_iterator>>,
-                                 cuda::std::iter_difference_t<int*>>);
+                                 cuda::std::iter_difference_t<int*>>,
+              "");
 static_assert(
-  cuda::std::same_as<cuda::std::ranges::range_value_t<test_range<cpp20_input_iterator>>, cuda::std::iter_value_t<int*>>);
+  cuda::std::same_as<cuda::std::ranges::range_value_t<test_range<cpp20_input_iterator>>, cuda::std::iter_value_t<int*>>,
+  "");
 static_assert(cuda::std::same_as<cuda::std::ranges::range_reference_t<test_range<cpp20_input_iterator>>,
-                                 cuda::std::iter_reference_t<int*>>);
+                                 cuda::std::iter_reference_t<int*>>,
+              "");
 static_assert(cuda::std::same_as<cuda::std::ranges::range_rvalue_reference_t<test_range<cpp20_input_iterator>>,
-                                 cuda::std::iter_rvalue_reference_t<int*>>);
+                                 cuda::std::iter_rvalue_reference_t<int*>>,
+              "");
 static_assert(cuda::std::same_as<cuda::std::ranges::range_common_reference_t<test_range<cpp20_input_iterator>>,
-                                 cuda::std::iter_common_reference_t<int*>>);
+                                 cuda::std::iter_common_reference_t<int*>>,
+              "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/iterator_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/iterator_t.compile.pass.cpp
index 14325655b81..b2eef8237e1 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/iterator_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/iterator_t.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class T>
@@ -19,20 +19,25 @@
 #include "test_range.h"
 
 static_assert(
-  cuda::std::same_as<cuda::std::ranges::iterator_t<test_range<cpp17_input_iterator>>, cpp17_input_iterator<int*>>);
+  cuda::std::same_as<cuda::std::ranges::iterator_t<test_range<cpp17_input_iterator>>, cpp17_input_iterator<int*>>, "");
 static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<test_range<cpp17_input_iterator> const>,
-                                 cpp17_input_iterator<int const*>>);
+                                 cpp17_input_iterator<int const*>>,
+              "");
 
 static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<test_non_const_range<cpp17_input_iterator>>,
-                                 cpp17_input_iterator<int*>>);
+                                 cpp17_input_iterator<int*>>,
+              "");
 
-static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<test_common_range<cpp17_input_iterator>>,
-                                 cpp17_input_iterator<int*>>);
+static_assert(
+  cuda::std::same_as<cuda::std::ranges::iterator_t<test_common_range<cpp17_input_iterator>>, cpp17_input_iterator<int*>>,
+  "");
 static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<test_common_range<cpp17_input_iterator> const>,
-                                 cpp17_input_iterator<int const*>>);
+                                 cpp17_input_iterator<int const*>>,
+              "");
 
 static_assert(cuda::std::same_as<cuda::std::ranges::iterator_t<test_non_const_common_range<cpp17_input_iterator>>,
-                                 cpp17_input_iterator<int*>>);
+                                 cpp17_input_iterator<int*>>,
+              "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range.compile.pass.cpp
index 5376856bfca..e4de07e5758 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class T>
@@ -17,35 +17,35 @@
 
 #include "test_range.h"
 
-static_assert(cuda::std::ranges::range<test_range<cpp20_input_iterator>>);
+static_assert(cuda::std::ranges::range<test_range<cpp20_input_iterator>>, "");
 
 struct incompatible_iterators
 {
   __host__ __device__ int* begin();
   __host__ __device__ long* end();
 };
-static_assert(!cuda::std::ranges::range<incompatible_iterators>);
+static_assert(!cuda::std::ranges::range<incompatible_iterators>, "");
 
 struct int_begin_int_end
 {
   __host__ __device__ int begin();
   __host__ __device__ int end();
 };
-static_assert(!cuda::std::ranges::range<int_begin_int_end>);
+static_assert(!cuda::std::ranges::range<int_begin_int_end>, "");
 
 struct iterator_begin_int_end
 {
   __host__ __device__ int* begin();
   __host__ __device__ int end();
 };
-static_assert(!cuda::std::ranges::range<iterator_begin_int_end>);
+static_assert(!cuda::std::ranges::range<iterator_begin_int_end>, "");
 
 struct int_begin_iterator_end
 {
   __host__ __device__ int begin();
   __host__ __device__ int* end();
 };
-static_assert(!cuda::std::ranges::range<int_begin_iterator_end>);
+static_assert(!cuda::std::ranges::range<int_begin_iterator_end>, "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -55,7 +55,7 @@ struct Holder
 {
   T t;
 };
-static_assert(!cuda::std::ranges::range<Holder<Incomplete>*>);
+static_assert(!cuda::std::ranges::range<Holder<Incomplete>*>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range_size_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range_size_t.compile.pass.cpp
index 59a1f961859..0e3eefacfae 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range_size_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/range_size_t.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<sized_range R>
@@ -36,31 +36,31 @@ struct A
   __host__ __device__ int* end();
   __host__ __device__ short size();
 };
-static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<A>, short>);
-static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<A&>, short>);
-static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<A&&>, short>);
-static_assert(!has_range_size_t<const A>);
-static_assert(!has_range_size_t<const A&>);
-static_assert(!has_range_size_t<const A&&>);
+static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<A>, short>, "");
+static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<A&>, short>, "");
+static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<A&&>, short>, "");
+static_assert(!has_range_size_t<const A>, "");
+static_assert(!has_range_size_t<const A&>, "");
+static_assert(!has_range_size_t<const A&&>, "");
 
 struct B
 {
   __host__ __device__ int* begin();
   __host__ __device__ int* end();
 };
-static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<B>, cuda::std::size_t>);
-static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<B&>, cuda::std::size_t>);
-static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<B&&>, cuda::std::size_t>);
-static_assert(!has_range_size_t<const B>);
-static_assert(!has_range_size_t<const B&>);
-static_assert(!has_range_size_t<const B&&>);
+static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<B>, cuda::std::size_t>, "");
+static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<B&>, cuda::std::size_t>, "");
+static_assert(cuda::std::same_as<cuda::std::ranges::range_size_t<B&&>, cuda::std::size_t>, "");
+static_assert(!has_range_size_t<const B>, "");
+static_assert(!has_range_size_t<const B&>, "");
+static_assert(!has_range_size_t<const B&&>, "");
 
 struct C
 {
   __host__ __device__ bidirectional_iterator<int*> begin();
   __host__ __device__ bidirectional_iterator<int*> end();
 };
-static_assert(!has_range_size_t<C>);
+static_assert(!has_range_size_t<C>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp
index 1924f86506a..8fbb03f755d 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.range/sentinel_t.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<range _Rp>
@@ -19,15 +19,18 @@
 #include "test_iterators.h"
 #include "test_range.h"
 
-static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_range<cpp20_input_iterator>>, sentinel>);
-static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_range<cpp20_input_iterator> const>, sentinel>);
-static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_non_const_range<cpp20_input_iterator>>, sentinel>);
+static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_range<cpp20_input_iterator>>, sentinel>, "");
+static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_range<cpp20_input_iterator> const>, sentinel>, "");
+static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_non_const_range<cpp20_input_iterator>>, sentinel>,
+              "");
 static_assert(
-  cuda::std::same_as<cuda::std::ranges::sentinel_t<test_common_range<forward_iterator>>, forward_iterator<int*>>);
+  cuda::std::same_as<cuda::std::ranges::sentinel_t<test_common_range<forward_iterator>>, forward_iterator<int*>>, "");
 static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_common_range<forward_iterator> const>,
-                                 forward_iterator<int const*>>);
+                                 forward_iterator<int const*>>,
+              "");
 static_assert(cuda::std::same_as<cuda::std::ranges::sentinel_t<test_non_const_common_range<forward_iterator>>,
-                                 forward_iterator<int*>>);
+                                 forward_iterator<int*>>,
+              "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/bidirectional_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/bidirectional_range.compile.pass.cpp
index 07b343e9bc7..bd76755dc8d 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/bidirectional_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/bidirectional_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -21,22 +21,22 @@ template <template <class...> class I>
 __host__ __device__ constexpr bool check_bidirectional_range()
 {
   constexpr bool result = cuda::std::ranges::bidirectional_range<test_range<I>>;
-  static_assert(cuda::std::ranges::bidirectional_range<test_range<I> const> == result);
-  static_assert(cuda::std::ranges::bidirectional_range<test_non_const_common_range<I>> == result);
-  static_assert(cuda::std::ranges::bidirectional_range<test_non_const_range<I>> == result);
-  static_assert(cuda::std::ranges::bidirectional_range<test_common_range<I>> == result);
-  static_assert(cuda::std::ranges::bidirectional_range<test_common_range<I> const> == result);
-  static_assert(!cuda::std::ranges::bidirectional_range<test_non_const_common_range<I> const>);
-  static_assert(!cuda::std::ranges::bidirectional_range<test_non_const_range<I> const>);
+  static_assert(cuda::std::ranges::bidirectional_range<test_range<I> const> == result, "");
+  static_assert(cuda::std::ranges::bidirectional_range<test_non_const_common_range<I>> == result, "");
+  static_assert(cuda::std::ranges::bidirectional_range<test_non_const_range<I>> == result, "");
+  static_assert(cuda::std::ranges::bidirectional_range<test_common_range<I>> == result, "");
+  static_assert(cuda::std::ranges::bidirectional_range<test_common_range<I> const> == result, "");
+  static_assert(!cuda::std::ranges::bidirectional_range<test_non_const_common_range<I> const>, "");
+  static_assert(!cuda::std::ranges::bidirectional_range<test_non_const_range<I> const>, "");
   return result;
 }
 
-static_assert(!check_bidirectional_range<cpp17_input_iterator>());
-static_assert(!check_bidirectional_range<cpp20_input_iterator>());
-static_assert(!check_bidirectional_range<forward_iterator>());
-static_assert(check_bidirectional_range<bidirectional_iterator>());
-static_assert(check_bidirectional_range<random_access_iterator>());
-static_assert(check_bidirectional_range<contiguous_iterator>());
+static_assert(!check_bidirectional_range<cpp17_input_iterator>(), "");
+static_assert(!check_bidirectional_range<cpp20_input_iterator>(), "");
+static_assert(!check_bidirectional_range<forward_iterator>(), "");
+static_assert(check_bidirectional_range<bidirectional_iterator>(), "");
+static_assert(check_bidirectional_range<random_access_iterator>(), "");
+static_assert(check_bidirectional_range<contiguous_iterator>(), "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -47,19 +47,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const&&>);
+static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const&&>, "");
 
-static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* (&) [10]>);
-static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(cuda::std::ranges::bidirectional_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp
index f92120be838..d29aa367dff 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/common_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -36,31 +36,31 @@ struct Range
   __host__ __device__ Sent end() const;
 };
 
-static_assert(!cuda::std::ranges::common_range<Common<cpp17_input_iterator<int*>>>); // not a sentinel for itself
-static_assert(!cuda::std::ranges::common_range<Common<cpp20_input_iterator<int*>>>); // not a sentinel for itself
-static_assert(cuda::std::ranges::common_range<Common<forward_iterator<int*>>>);
-static_assert(cuda::std::ranges::common_range<Common<bidirectional_iterator<int*>>>);
-static_assert(cuda::std::ranges::common_range<Common<random_access_iterator<int*>>>);
-static_assert(cuda::std::ranges::common_range<Common<contiguous_iterator<int*>>>);
-static_assert(cuda::std::ranges::common_range<Common<int*>>);
-
-static_assert(!cuda::std::ranges::common_range<NonCommon<cpp17_input_iterator<int*>>>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<cpp20_input_iterator<int*>>>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<forward_iterator<int*>>>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<bidirectional_iterator<int*>>>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<random_access_iterator<int*>>>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<contiguous_iterator<int*>>>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<int*>>);
+static_assert(!cuda::std::ranges::common_range<Common<cpp17_input_iterator<int*>>>, ""); // not a sentinel for itself
+static_assert(!cuda::std::ranges::common_range<Common<cpp20_input_iterator<int*>>>, ""); // not a sentinel for itself
+static_assert(cuda::std::ranges::common_range<Common<forward_iterator<int*>>>, "");
+static_assert(cuda::std::ranges::common_range<Common<bidirectional_iterator<int*>>>, "");
+static_assert(cuda::std::ranges::common_range<Common<random_access_iterator<int*>>>, "");
+static_assert(cuda::std::ranges::common_range<Common<contiguous_iterator<int*>>>, "");
+static_assert(cuda::std::ranges::common_range<Common<int*>>, "");
+
+static_assert(!cuda::std::ranges::common_range<NonCommon<cpp17_input_iterator<int*>>>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<cpp20_input_iterator<int*>>>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<forward_iterator<int*>>>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<bidirectional_iterator<int*>>>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<random_access_iterator<int*>>>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<contiguous_iterator<int*>>>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<int*>>, "");
 
 // Test when begin() and end() only differ by their constness.
-static_assert(!cuda::std::ranges::common_range<Range<int*, int const*>>);
+static_assert(!cuda::std::ranges::common_range<Range<int*, int const*>>, "");
 
 // Simple test with a sized_sentinel.
-static_assert(!cuda::std::ranges::common_range<Range<int*, sized_sentinel<int*>>>);
+static_assert(!cuda::std::ranges::common_range<Range<int*, sized_sentinel<int*>>>, "");
 
 // Make sure cv-qualification doesn't impact the concept when begin() and end() have matching qualifiers.
-static_assert(cuda::std::ranges::common_range<Common<forward_iterator<int*>> const>);
-static_assert(!cuda::std::ranges::common_range<NonCommon<forward_iterator<int*>> const>);
+static_assert(cuda::std::ranges::common_range<Common<forward_iterator<int*>> const>, "");
+static_assert(!cuda::std::ranges::common_range<NonCommon<forward_iterator<int*>> const>, "");
 
 // Test with a range that's a common_range only when const-qualified.
 struct Range1
@@ -69,8 +69,8 @@ struct Range1
   __host__ __device__ int const* begin() const;
   __host__ __device__ int const* end() const;
 };
-static_assert(!cuda::std::ranges::common_range<Range1>);
-static_assert(cuda::std::ranges::common_range<Range1 const>);
+static_assert(!cuda::std::ranges::common_range<Range1>, "");
+static_assert(cuda::std::ranges::common_range<Range1 const>, "");
 
 // Test with a range that's a common_range only when not const-qualified.
 struct Range2
@@ -79,8 +79,8 @@ struct Range2
   __host__ __device__ int* end();
   __host__ __device__ int const* end() const;
 };
-static_assert(cuda::std::ranges::common_range<Range2>);
-static_assert(!cuda::std::ranges::common_range<Range2 const>);
+static_assert(cuda::std::ranges::common_range<Range2>, "");
+static_assert(!cuda::std::ranges::common_range<Range2 const>, "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -91,19 +91,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>* const&&>);
-
-static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* (&) [10]>);
-static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::common_range<Holder<Incomplete>* const&&>, "");
+
+static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(cuda::std::ranges::common_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/contiguous_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/contiguous_range.compile.pass.cpp
index e45601f8df3..7bc8619ea17 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/contiguous_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/contiguous_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -24,21 +24,21 @@ template <template <class...> class I>
 __host__ __device__ constexpr bool check_range()
 {
   constexpr bool result = ranges::contiguous_range<test_range<I>>;
-  static_assert(ranges::contiguous_range<test_range<I> const> == result);
-  static_assert(ranges::contiguous_range<test_non_const_common_range<I>> == result);
-  static_assert(ranges::contiguous_range<test_non_const_range<I>> == result);
-  static_assert(ranges::contiguous_range<test_common_range<I>> == result);
-  static_assert(ranges::contiguous_range<test_common_range<I> const> == result);
-  static_assert(!ranges::contiguous_range<test_non_const_common_range<I> const>);
-  static_assert(!ranges::contiguous_range<test_non_const_range<I> const>);
+  static_assert(ranges::contiguous_range<test_range<I> const> == result, "");
+  static_assert(ranges::contiguous_range<test_non_const_common_range<I>> == result, "");
+  static_assert(ranges::contiguous_range<test_non_const_range<I>> == result, "");
+  static_assert(ranges::contiguous_range<test_common_range<I>> == result, "");
+  static_assert(ranges::contiguous_range<test_common_range<I> const> == result, "");
+  static_assert(!ranges::contiguous_range<test_non_const_common_range<I> const>, "");
+  static_assert(!ranges::contiguous_range<test_non_const_range<I> const>, "");
   return result;
 }
 
-static_assert(!check_range<cpp20_input_iterator>());
-static_assert(!check_range<forward_iterator>());
-static_assert(!check_range<bidirectional_iterator>());
-static_assert(!check_range<random_access_iterator>());
-static_assert(check_range<contiguous_iterator>());
+static_assert(!check_range<cpp20_input_iterator>(), "");
+static_assert(!check_range<forward_iterator>(), "");
+static_assert(!check_range<bidirectional_iterator>(), "");
+static_assert(!check_range<random_access_iterator>(), "");
+static_assert(check_range<contiguous_iterator>(), "");
 
 struct ContiguousWhenNonConst
 {
@@ -48,9 +48,9 @@ struct ContiguousWhenNonConst
   __host__ __device__ int* end();
   __host__ __device__ int* data() const;
 };
-static_assert(cuda::std::ranges::contiguous_range<ContiguousWhenNonConst>);
-static_assert(cuda::std::ranges::random_access_range<const ContiguousWhenNonConst>);
-static_assert(!cuda::std::ranges::contiguous_range<const ContiguousWhenNonConst>);
+static_assert(cuda::std::ranges::contiguous_range<ContiguousWhenNonConst>, "");
+static_assert(cuda::std::ranges::random_access_range<const ContiguousWhenNonConst>, "");
+static_assert(!cuda::std::ranges::contiguous_range<const ContiguousWhenNonConst>, "");
 
 struct ContiguousWhenConst
 {
@@ -60,9 +60,9 @@ struct ContiguousWhenConst
   __host__ __device__ int* end();
   __host__ __device__ const int* data() const;
 };
-static_assert(cuda::std::ranges::contiguous_range<const ContiguousWhenConst>);
-static_assert(cuda::std::ranges::random_access_range<ContiguousWhenConst>);
-static_assert(!cuda::std::ranges::contiguous_range<ContiguousWhenConst>);
+static_assert(cuda::std::ranges::contiguous_range<const ContiguousWhenConst>, "");
+static_assert(cuda::std::ranges::random_access_range<ContiguousWhenConst>, "");
+static_assert(!cuda::std::ranges::contiguous_range<ContiguousWhenConst>, "");
 
 struct DataFunctionWrongReturnType
 {
@@ -70,8 +70,8 @@ struct DataFunctionWrongReturnType
   __host__ __device__ const int* end() const;
   __host__ __device__ const char* data() const;
 };
-static_assert(cuda::std::ranges::random_access_range<DataFunctionWrongReturnType>);
-static_assert(!cuda::std::ranges::contiguous_range<DataFunctionWrongReturnType>);
+static_assert(cuda::std::ranges::random_access_range<DataFunctionWrongReturnType>, "");
+static_assert(!cuda::std::ranges::contiguous_range<DataFunctionWrongReturnType>, "");
 
 struct WrongObjectness
 {
@@ -79,7 +79,7 @@ struct WrongObjectness
   __host__ __device__ const int* end() const;
   __host__ __device__ void* data() const;
 };
-static_assert(cuda::std::ranges::contiguous_range<WrongObjectness>);
+static_assert(cuda::std::ranges::contiguous_range<WrongObjectness>, "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -90,19 +90,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>* const&&>);
+static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::contiguous_range<Holder<Incomplete>* const&&>, "");
 
-static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* (&) [10]>);
-static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(cuda::std::ranges::contiguous_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/forward_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/forward_range.compile.pass.cpp
index e8bf2b54cfb..91462f348f2 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/forward_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/forward_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -22,22 +22,22 @@ template <template <class...> class I>
 __host__ __device__ constexpr bool check_forward_range()
 {
   constexpr bool result = cuda::std::ranges::forward_range<test_range<I>>;
-  static_assert(cuda::std::ranges::forward_range<test_range<I> const> == result);
-  static_assert(cuda::std::ranges::forward_range<test_non_const_common_range<I>> == result);
-  static_assert(cuda::std::ranges::forward_range<test_non_const_range<I>> == result);
-  static_assert(cuda::std::ranges::forward_range<test_common_range<I>> == result);
-  static_assert(cuda::std::ranges::forward_range<test_common_range<I> const> == result);
-  static_assert(!cuda::std::ranges::forward_range<test_non_const_common_range<I> const>);
-  static_assert(!cuda::std::ranges::forward_range<test_non_const_range<I> const>);
+  static_assert(cuda::std::ranges::forward_range<test_range<I> const> == result, "");
+  static_assert(cuda::std::ranges::forward_range<test_non_const_common_range<I>> == result, "");
+  static_assert(cuda::std::ranges::forward_range<test_non_const_range<I>> == result, "");
+  static_assert(cuda::std::ranges::forward_range<test_common_range<I>> == result, "");
+  static_assert(cuda::std::ranges::forward_range<test_common_range<I> const> == result, "");
+  static_assert(!cuda::std::ranges::forward_range<test_non_const_common_range<I> const>, "");
+  static_assert(!cuda::std::ranges::forward_range<test_non_const_range<I> const>, "");
   return result;
 }
 
-static_assert(!check_forward_range<cpp17_input_iterator>());
-static_assert(!check_forward_range<cpp20_input_iterator>());
-static_assert(check_forward_range<forward_iterator>());
-static_assert(check_forward_range<bidirectional_iterator>());
-static_assert(check_forward_range<random_access_iterator>());
-static_assert(check_forward_range<contiguous_iterator>());
+static_assert(!check_forward_range<cpp17_input_iterator>(), "");
+static_assert(!check_forward_range<cpp20_input_iterator>(), "");
+static_assert(check_forward_range<forward_iterator>(), "");
+static_assert(check_forward_range<bidirectional_iterator>(), "");
+static_assert(check_forward_range<random_access_iterator>(), "");
+static_assert(check_forward_range<contiguous_iterator>(), "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -48,19 +48,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>* const&&>);
+static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::forward_range<Holder<Incomplete>* const&&>, "");
 
-static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* (&) [10]>);
-static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(cuda::std::ranges::forward_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp
index b498e8d3876..2e0e20435ab 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/input_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -18,29 +18,29 @@
 #include "test_iterators.h"
 #include "test_range.h"
 
-static_assert(cuda::std::ranges::input_range<test_range<cpp17_input_iterator>>);
-static_assert(cuda::std::ranges::input_range<test_range<cpp17_input_iterator> const>);
+static_assert(cuda::std::ranges::input_range<test_range<cpp17_input_iterator>>, "");
+static_assert(cuda::std::ranges::input_range<test_range<cpp17_input_iterator> const>, "");
 
-static_assert(cuda::std::ranges::input_range<test_range<cpp20_input_iterator>>);
-static_assert(cuda::std::ranges::input_range<test_range<cpp20_input_iterator> const>);
+static_assert(cuda::std::ranges::input_range<test_range<cpp20_input_iterator>>, "");
+static_assert(cuda::std::ranges::input_range<test_range<cpp20_input_iterator> const>, "");
 
-static_assert(cuda::std::ranges::input_range<test_non_const_range<cpp17_input_iterator>>);
-static_assert(cuda::std::ranges::input_range<test_non_const_range<cpp20_input_iterator>>);
+static_assert(cuda::std::ranges::input_range<test_non_const_range<cpp17_input_iterator>>, "");
+static_assert(cuda::std::ranges::input_range<test_non_const_range<cpp20_input_iterator>>, "");
 
-static_assert(!cuda::std::ranges::input_range<test_non_const_range<cpp17_input_iterator> const>);
-static_assert(!cuda::std::ranges::input_range<test_non_const_range<cpp20_input_iterator> const>);
+static_assert(!cuda::std::ranges::input_range<test_non_const_range<cpp17_input_iterator> const>, "");
+static_assert(!cuda::std::ranges::input_range<test_non_const_range<cpp20_input_iterator> const>, "");
 
-static_assert(cuda::std::ranges::input_range<test_common_range<forward_iterator>>);
-static_assert(!cuda::std::ranges::input_range<test_common_range<cpp20_input_iterator>>);
+static_assert(cuda::std::ranges::input_range<test_common_range<forward_iterator>>, "");
+static_assert(!cuda::std::ranges::input_range<test_common_range<cpp20_input_iterator>>, "");
 
-static_assert(cuda::std::ranges::input_range<test_common_range<forward_iterator> const>);
-static_assert(!cuda::std::ranges::input_range<test_common_range<cpp20_input_iterator> const>);
+static_assert(cuda::std::ranges::input_range<test_common_range<forward_iterator> const>, "");
+static_assert(!cuda::std::ranges::input_range<test_common_range<cpp20_input_iterator> const>, "");
 
-static_assert(cuda::std::ranges::input_range<test_non_const_common_range<forward_iterator>>);
-static_assert(!cuda::std::ranges::input_range<test_non_const_common_range<cpp20_input_iterator>>);
+static_assert(cuda::std::ranges::input_range<test_non_const_common_range<forward_iterator>>, "");
+static_assert(!cuda::std::ranges::input_range<test_non_const_common_range<cpp20_input_iterator>>, "");
 
-static_assert(!cuda::std::ranges::input_range<test_non_const_common_range<forward_iterator> const>);
-static_assert(!cuda::std::ranges::input_range<test_non_const_common_range<cpp20_input_iterator> const>);
+static_assert(!cuda::std::ranges::input_range<test_non_const_common_range<forward_iterator> const>, "");
+static_assert(!cuda::std::ranges::input_range<test_non_const_common_range<cpp20_input_iterator> const>, "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -51,19 +51,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>* const&&>);
-
-static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* (&) [10]>);
-static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::input_range<Holder<Incomplete>* const&&>, "");
+
+static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(cuda::std::ranges::input_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/output_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/output_range.compile.pass.cpp
index c6eff535496..41e8d9275f9 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/output_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/output_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R, class T>
@@ -28,18 +28,18 @@ struct GoodRange
   __host__ __device__ cpp17_output_iterator<T*> begin();
   __host__ __device__ sentinel end();
 };
-static_assert(cuda::std::ranges::range<GoodRange>);
-static_assert(cuda::std::output_iterator<cuda::std::ranges::iterator_t<GoodRange>, T>);
-static_assert(cuda::std::ranges::output_range<GoodRange, T>);
+static_assert(cuda::std::ranges::range<GoodRange>, "");
+static_assert(cuda::std::output_iterator<cuda::std::ranges::iterator_t<GoodRange>, T>, "");
+static_assert(cuda::std::ranges::output_range<GoodRange, T>, "");
 
 // Not satisfied when it's not a range
 struct NotRange
 {
   __host__ __device__ cpp17_output_iterator<T*> begin();
 };
-static_assert(!cuda::std::ranges::range<NotRange>);
-static_assert(cuda::std::output_iterator<cuda::std::ranges::iterator_t<NotRange>, T>);
-static_assert(!cuda::std::ranges::output_range<NotRange, T>);
+static_assert(!cuda::std::ranges::range<NotRange>, "");
+static_assert(cuda::std::output_iterator<cuda::std::ranges::iterator_t<NotRange>, T>, "");
+static_assert(!cuda::std::ranges::output_range<NotRange, T>, "");
 
 // Not satisfied when the iterator is not an output_iterator
 struct RangeWithBadIterator
@@ -47,9 +47,9 @@ struct RangeWithBadIterator
   __host__ __device__ cpp17_input_iterator<T const*> begin();
   __host__ __device__ sentinel end();
 };
-static_assert(cuda::std::ranges::range<RangeWithBadIterator>);
-static_assert(!cuda::std::output_iterator<cuda::std::ranges::iterator_t<RangeWithBadIterator>, T>);
-static_assert(!cuda::std::ranges::output_range<RangeWithBadIterator, T>);
+static_assert(cuda::std::ranges::range<RangeWithBadIterator>, "");
+static_assert(!cuda::std::output_iterator<cuda::std::ranges::iterator_t<RangeWithBadIterator>, T>, "");
+static_assert(!cuda::std::ranges::output_range<RangeWithBadIterator, T>, "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -60,19 +60,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>*, Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>*&, Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>*&&, Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const, Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const&, Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const&&, Holder<Incomplete>*>);
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>*, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>*&, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>*&&, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const&, Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const&&, Holder<Incomplete>*>, "");
 
-static_assert(cuda::std::ranges::output_range<Holder<Incomplete>* [10], Holder<Incomplete>*>);
-static_assert(cuda::std::ranges::output_range<Holder<Incomplete>* (&) [10], Holder<Incomplete>*>);
-static_assert(cuda::std::ranges::output_range<Holder<Incomplete>* (&&) [10], Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const[10], Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const (&)[10], Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const (&&)[10], Holder<Incomplete>*>);
+static_assert(cuda::std::ranges::output_range<Holder<Incomplete>* [10], Holder<Incomplete>*>, "");
+static_assert(cuda::std::ranges::output_range<Holder<Incomplete>* (&) [10], Holder<Incomplete>*>, "");
+static_assert(cuda::std::ranges::output_range<Holder<Incomplete>* (&&) [10], Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const[10], Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const (&)[10], Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::output_range<Holder<Incomplete>* const (&&)[10], Holder<Incomplete>*>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/random_access_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/random_access_range.compile.pass.cpp
index d97bee41d5b..55ebd6cdb7f 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/random_access_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/random_access_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -24,21 +24,21 @@ template <template <class...> class I>
 __host__ __device__ constexpr bool check_range()
 {
   constexpr bool result = ranges::random_access_range<test_range<I>>;
-  static_assert(ranges::random_access_range<test_range<I> const> == result);
-  static_assert(ranges::random_access_range<test_non_const_common_range<I>> == result);
-  static_assert(ranges::random_access_range<test_non_const_range<I>> == result);
-  static_assert(ranges::random_access_range<test_common_range<I>> == result);
-  static_assert(ranges::random_access_range<test_common_range<I> const> == result);
-  static_assert(!ranges::random_access_range<test_non_const_common_range<I> const>);
-  static_assert(!ranges::random_access_range<test_non_const_range<I> const>);
+  static_assert(ranges::random_access_range<test_range<I> const> == result, "");
+  static_assert(ranges::random_access_range<test_non_const_common_range<I>> == result, "");
+  static_assert(ranges::random_access_range<test_non_const_range<I>> == result, "");
+  static_assert(ranges::random_access_range<test_common_range<I>> == result, "");
+  static_assert(ranges::random_access_range<test_common_range<I> const> == result, "");
+  static_assert(!ranges::random_access_range<test_non_const_common_range<I> const>, "");
+  static_assert(!ranges::random_access_range<test_non_const_range<I> const>, "");
   return result;
 }
 
-static_assert(!check_range<cpp20_input_iterator>());
-static_assert(!check_range<forward_iterator>());
-static_assert(!check_range<bidirectional_iterator>());
-static_assert(check_range<random_access_iterator>());
-static_assert(check_range<contiguous_iterator>());
+static_assert(!check_range<cpp20_input_iterator>(), "");
+static_assert(!check_range<forward_iterator>(), "");
+static_assert(!check_range<bidirectional_iterator>(), "");
+static_assert(check_range<random_access_iterator>(), "");
+static_assert(check_range<contiguous_iterator>(), "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -49,19 +49,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>* const&&>);
+static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::random_access_range<Holder<Incomplete>* const&&>, "");
 
-static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* (&) [10]>);
-static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(cuda::std::ranges::random_access_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/viewable_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/viewable_range.compile.pass.cpp
index 9c42abed367..cab8e345462 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/viewable_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.refinements/viewable_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class R>
@@ -31,14 +31,14 @@
 // viewable_range<T> is not satisfied for (range=false, view=*, constructible_from=*, lvalue-or-movable=*)
 struct T1
 {};
-static_assert(!cuda::std::ranges::range<T1>);
+static_assert(!cuda::std::ranges::range<T1>, "");
 
-static_assert(!cuda::std::ranges::viewable_range<T1>);
-static_assert(!cuda::std::ranges::viewable_range<T1&>);
-static_assert(!cuda::std::ranges::viewable_range<T1&&>);
-static_assert(!cuda::std::ranges::viewable_range<T1 const>);
-static_assert(!cuda::std::ranges::viewable_range<T1 const&>);
-static_assert(!cuda::std::ranges::viewable_range<T1 const&&>);
+static_assert(!cuda::std::ranges::viewable_range<T1>, "");
+static_assert(!cuda::std::ranges::viewable_range<T1&>, "");
+static_assert(!cuda::std::ranges::viewable_range<T1&&>, "");
+static_assert(!cuda::std::ranges::viewable_range<T1 const>, "");
+static_assert(!cuda::std::ranges::viewable_range<T1 const&>, "");
+static_assert(!cuda::std::ranges::viewable_range<T1 const&&>, "");
 
 // viewable_range<T> is satisfied for (range=true, view=true, constructible_from=true, lvalue-or-movable=true)
 struct T2
@@ -47,16 +47,16 @@ struct T2
 {
   T2(T2 const&) = default;
 };
-static_assert(cuda::std::ranges::range<T2>);
-static_assert(cuda::std::ranges::view<T2>);
-static_assert(cuda::std::constructible_from<T2, T2>);
+static_assert(cuda::std::ranges::range<T2>, "");
+static_assert(cuda::std::ranges::view<T2>, "");
+static_assert(cuda::std::constructible_from<T2, T2>, "");
 
-static_assert(cuda::std::ranges::viewable_range<T2>);
-static_assert(cuda::std::ranges::viewable_range<T2&>);
-static_assert(cuda::std::ranges::viewable_range<T2&&>);
-static_assert(cuda::std::ranges::viewable_range<T2 const>);
-static_assert(cuda::std::ranges::viewable_range<T2 const&>);
-static_assert(cuda::std::ranges::viewable_range<T2 const&&>);
+static_assert(cuda::std::ranges::viewable_range<T2>, "");
+static_assert(cuda::std::ranges::viewable_range<T2&>, "");
+static_assert(cuda::std::ranges::viewable_range<T2&&>, "");
+static_assert(cuda::std::ranges::viewable_range<T2 const>, "");
+static_assert(cuda::std::ranges::viewable_range<T2 const&>, "");
+static_assert(cuda::std::ranges::viewable_range<T2 const&&>, "");
 
 // viewable_range<T> is satisfied for (range=true, view=true, constructible_from=true, lvalue-or-movable=false)
 struct T3
@@ -65,16 +65,16 @@ struct T3
 {
   T3(T3 const&) = default;
 };
-static_assert(cuda::std::ranges::range<T3>);
-static_assert(cuda::std::ranges::view<T3>);
-static_assert(cuda::std::constructible_from<T3, T3>);
+static_assert(cuda::std::ranges::range<T3>, "");
+static_assert(cuda::std::ranges::view<T3>, "");
+static_assert(cuda::std::constructible_from<T3, T3>, "");
 
-static_assert(cuda::std::ranges::viewable_range<T3>);
-static_assert(cuda::std::ranges::viewable_range<T3&>);
-static_assert(cuda::std::ranges::viewable_range<T3&&>);
-static_assert(cuda::std::ranges::viewable_range<T3 const>);
-static_assert(cuda::std::ranges::viewable_range<T3 const&>);
-static_assert(cuda::std::ranges::viewable_range<T3 const&&>);
+static_assert(cuda::std::ranges::viewable_range<T3>, "");
+static_assert(cuda::std::ranges::viewable_range<T3&>, "");
+static_assert(cuda::std::ranges::viewable_range<T3&&>, "");
+static_assert(cuda::std::ranges::viewable_range<T3 const>, "");
+static_assert(cuda::std::ranges::viewable_range<T3 const&>, "");
+static_assert(cuda::std::ranges::viewable_range<T3 const&&>, "");
 
 // viewable_range<T> is not satisfied for (range=true, view=true, constructible_from=false, lvalue-or-movable=true)
 struct T4
@@ -85,11 +85,11 @@ struct T4
   T4(T4&&)            = default; // necessary to model view
   T4& operator=(T4&&) = default; // necessary to model view
 };
-static_assert(cuda::std::ranges::range<T4 const&>);
-static_assert(cuda::std::ranges::view<cuda::std::remove_cvref_t<T4 const&>>);
-static_assert(!cuda::std::constructible_from<cuda::std::remove_cvref_t<T4 const&>, T4 const&>);
+static_assert(cuda::std::ranges::range<T4 const&>, "");
+static_assert(cuda::std::ranges::view<cuda::std::remove_cvref_t<T4 const&>>, "");
+static_assert(!cuda::std::constructible_from<cuda::std::remove_cvref_t<T4 const&>, T4 const&>, "");
 
-static_assert(!cuda::std::ranges::viewable_range<T4 const&>);
+static_assert(!cuda::std::ranges::viewable_range<T4 const&>, "");
 
 // A type that satisfies (range=true, view=true, constructible_from=false, lvalue-or-movable=false) can't be formed,
 // because views are movable by definition
@@ -97,31 +97,31 @@ static_assert(!cuda::std::ranges::viewable_range<T4 const&>);
 // viewable_range<T> is satisfied for (range=true, view=false, constructible_from=true, lvalue-or-movable=true)...
 struct T5 : test_range<cpp20_input_iterator>
 {};
-static_assert(cuda::std::ranges::range<T5>);
-static_assert(!cuda::std::ranges::view<T5>);
-static_assert(cuda::std::constructible_from<T5, T5>);
-static_assert(cuda::std::movable<T5>);
-static_assert(!cuda::std::movable<const T5>);
-
-static_assert(cuda::std::ranges::viewable_range<T5>); // movable
-static_assert(cuda::std::ranges::viewable_range<T5&>); // movable
-static_assert(cuda::std::ranges::viewable_range<T5&&>); // movable
-static_assert(!cuda::std::ranges::viewable_range<const T5>);
-static_assert(cuda::std::ranges::viewable_range<const T5&>); // lvalue
-static_assert(!cuda::std::ranges::viewable_range<const T5&&>);
+static_assert(cuda::std::ranges::range<T5>, "");
+static_assert(!cuda::std::ranges::view<T5>, "");
+static_assert(cuda::std::constructible_from<T5, T5>, "");
+static_assert(cuda::std::movable<T5>, "");
+static_assert(!cuda::std::movable<const T5>, "");
+
+static_assert(cuda::std::ranges::viewable_range<T5>, ""); // movable
+static_assert(cuda::std::ranges::viewable_range<T5&>, ""); // movable
+static_assert(cuda::std::ranges::viewable_range<T5&&>, ""); // movable
+static_assert(!cuda::std::ranges::viewable_range<const T5>, "");
+static_assert(cuda::std::ranges::viewable_range<const T5&>, ""); // lvalue
+static_assert(!cuda::std::ranges::viewable_range<const T5&&>, "");
 
 // ...but not if the (non-view, lvalue-or-movable) range is an initializer_list.
-static_assert(cuda::std::ranges::range<cuda::std::initializer_list<int>>);
-static_assert(!cuda::std::ranges::view<cuda::std::initializer_list<int>>);
-static_assert(cuda::std::constructible_from<cuda::std::initializer_list<int>, cuda::std::initializer_list<int>>);
-static_assert(cuda::std::movable<cuda::std::initializer_list<int>>);
-
-static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int>>);
-static_assert(cuda::std::ranges::viewable_range<cuda::std::initializer_list<int>&>);
-static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int>&&>);
-static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int> const>);
-static_assert(cuda::std::ranges::viewable_range<cuda::std::initializer_list<int> const&>);
-static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int> const&&>);
+static_assert(cuda::std::ranges::range<cuda::std::initializer_list<int>>, "");
+static_assert(!cuda::std::ranges::view<cuda::std::initializer_list<int>>, "");
+static_assert(cuda::std::constructible_from<cuda::std::initializer_list<int>, cuda::std::initializer_list<int>>, "");
+static_assert(cuda::std::movable<cuda::std::initializer_list<int>>, "");
+
+static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int>>, "");
+static_assert(cuda::std::ranges::viewable_range<cuda::std::initializer_list<int>&>, "");
+static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int>&&>, "");
+static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int> const>, "");
+static_assert(cuda::std::ranges::viewable_range<cuda::std::initializer_list<int> const&>, "");
+static_assert(!cuda::std::ranges::viewable_range<cuda::std::initializer_list<int> const&&>, "");
 
 // viewable_range<T> is not satisfied for (range=true, view=false, constructible_from=true, lvalue-or-movable=false)
 struct T6 : test_range<cpp20_input_iterator>
@@ -129,60 +129,60 @@ struct T6 : test_range<cpp20_input_iterator>
   __host__ __device__ T6(T6&&);
   T6& operator=(T6&&) = delete;
 };
-static_assert(cuda::std::ranges::range<T6>);
-static_assert(!cuda::std::ranges::view<T6>);
-static_assert(cuda::std::constructible_from<T6, T6>);
-static_assert(!cuda::std::movable<T6>);
-
-static_assert(!cuda::std::ranges::viewable_range<T6>);
-static_assert(cuda::std::ranges::viewable_range<T6&>); // lvalue
-static_assert(!cuda::std::ranges::viewable_range<T6&&>);
-static_assert(!cuda::std::ranges::viewable_range<const T6>);
-static_assert(cuda::std::ranges::viewable_range<const T6&>); // lvalue
-static_assert(!cuda::std::ranges::viewable_range<const T6&&>);
+static_assert(cuda::std::ranges::range<T6>, "");
+static_assert(!cuda::std::ranges::view<T6>, "");
+static_assert(cuda::std::constructible_from<T6, T6>, "");
+static_assert(!cuda::std::movable<T6>, "");
+
+static_assert(!cuda::std::ranges::viewable_range<T6>, "");
+static_assert(cuda::std::ranges::viewable_range<T6&>, ""); // lvalue
+static_assert(!cuda::std::ranges::viewable_range<T6&&>, "");
+static_assert(!cuda::std::ranges::viewable_range<const T6>, "");
+static_assert(cuda::std::ranges::viewable_range<const T6&>, ""); // lvalue
+static_assert(!cuda::std::ranges::viewable_range<const T6&&>, "");
 
 // viewable_range<T> is satisfied for (range=true, view=false, constructible_from=false, lvalue-or-movable=true)
 struct T7 : test_range<cpp20_input_iterator>
 {
   T7(T7 const&) = delete;
 };
-static_assert(cuda::std::ranges::range<T7&>);
-static_assert(!cuda::std::ranges::view<cuda::std::remove_cvref_t<T7&>>);
-static_assert(!cuda::std::constructible_from<cuda::std::remove_cvref_t<T7&>, T7&>);
+static_assert(cuda::std::ranges::range<T7&>, "");
+static_assert(!cuda::std::ranges::view<cuda::std::remove_cvref_t<T7&>>, "");
+static_assert(!cuda::std::constructible_from<cuda::std::remove_cvref_t<T7&>, T7&>, "");
 
-static_assert(!cuda::std::ranges::viewable_range<T7>);
-static_assert(cuda::std::ranges::viewable_range<T7&>);
-static_assert(!cuda::std::ranges::viewable_range<T7&&>);
-static_assert(!cuda::std::ranges::viewable_range<const T7>);
-static_assert(cuda::std::ranges::viewable_range<const T7&>);
-static_assert(!cuda::std::ranges::viewable_range<const T7&&>);
+static_assert(!cuda::std::ranges::viewable_range<T7>, "");
+static_assert(cuda::std::ranges::viewable_range<T7&>, "");
+static_assert(!cuda::std::ranges::viewable_range<T7&&>, "");
+static_assert(!cuda::std::ranges::viewable_range<const T7>, "");
+static_assert(cuda::std::ranges::viewable_range<const T7&>, "");
+static_assert(!cuda::std::ranges::viewable_range<const T7&&>, "");
 
 // viewable_range<T> is not satisfied for (range=true, view=false, constructible_from=false, lvalue-or-movable=false)
 struct T8 : test_range<cpp20_input_iterator>
 {
   T8(T8 const&) = delete;
 };
-static_assert(cuda::std::ranges::range<T8>);
-static_assert(!cuda::std::ranges::view<T8>);
-static_assert(!cuda::std::constructible_from<T8, T8>);
+static_assert(cuda::std::ranges::range<T8>, "");
+static_assert(!cuda::std::ranges::view<T8>, "");
+static_assert(!cuda::std::constructible_from<T8, T8>, "");
 
-static_assert(!cuda::std::ranges::viewable_range<T8>);
-static_assert(cuda::std::ranges::viewable_range<T8&>);
-static_assert(!cuda::std::ranges::viewable_range<T8&&>);
-static_assert(!cuda::std::ranges::viewable_range<const T8>);
-static_assert(cuda::std::ranges::viewable_range<const T8&>);
-static_assert(!cuda::std::ranges::viewable_range<const T8&&>);
+static_assert(!cuda::std::ranges::viewable_range<T8>, "");
+static_assert(cuda::std::ranges::viewable_range<T8&>, "");
+static_assert(!cuda::std::ranges::viewable_range<T8&&>, "");
+static_assert(!cuda::std::ranges::viewable_range<const T8>, "");
+static_assert(cuda::std::ranges::viewable_range<const T8&>, "");
+static_assert(!cuda::std::ranges::viewable_range<const T8&&>, "");
 
 // Test with a few degenerate types
-static_assert(!cuda::std::ranges::viewable_range<void>);
-static_assert(!cuda::std::ranges::viewable_range<int>);
-static_assert(!cuda::std::ranges::viewable_range<int (*)(char)>);
-static_assert(!cuda::std::ranges::viewable_range<int[]>);
-static_assert(!cuda::std::ranges::viewable_range<int[10]>);
-static_assert(!cuda::std::ranges::viewable_range<int (&)[]>); // not a range
-static_assert(cuda::std::ranges::viewable_range<int (&)[10]>); // OK, lvalue
-static_assert(!cuda::std::ranges::viewable_range<int (&&)[]>);
-static_assert(!cuda::std::ranges::viewable_range<int (&&)[10]>);
+static_assert(!cuda::std::ranges::viewable_range<void>, "");
+static_assert(!cuda::std::ranges::viewable_range<int>, "");
+static_assert(!cuda::std::ranges::viewable_range<int (*)(char)>, "");
+static_assert(!cuda::std::ranges::viewable_range<int[]>, "");
+static_assert(!cuda::std::ranges::viewable_range<int[10]>, "");
+static_assert(!cuda::std::ranges::viewable_range<int (&)[]>, ""); // not a range
+static_assert(cuda::std::ranges::viewable_range<int (&)[10]>, ""); // OK, lvalue
+static_assert(!cuda::std::ranges::viewable_range<int (&&)[]>, "");
+static_assert(!cuda::std::ranges::viewable_range<int (&&)[10]>, "");
 
 #if TEST_STD_VER > 2017
 // Test ADL-proofing.
@@ -193,19 +193,19 @@ struct Holder
   T t;
 };
 
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>*>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>*&>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>*&&>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const&>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const&&>);
-
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* [10]>);
-static_assert(cuda::std::ranges::viewable_range<Holder<Incomplete>* (&) [10]>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* (&&) [10]>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const[10]>);
-static_assert(cuda::std::ranges::viewable_range<Holder<Incomplete>* const (&)[10]>);
-static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const (&&)[10]>);
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>*>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>*&>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>*&&>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const&>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const&&>, "");
+
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* [10]>, "");
+static_assert(cuda::std::ranges::viewable_range<Holder<Incomplete>* (&) [10]>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* (&&) [10]>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const[10]>, "");
+static_assert(cuda::std::ranges::viewable_range<Holder<Incomplete>* const (&)[10]>, "");
+static_assert(!cuda::std::ranges::viewable_range<Holder<Incomplete>* const (&&)[10]>, "");
 #endif
 
 int main(int, char**)
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.sized/sized_range.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.sized/sized_range.compile.pass.cpp
index e6e43f4aab6..184f952a333 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.sized/sized_range.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.sized/sized_range.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // template<class T>
@@ -17,10 +17,10 @@
 
 #include "test_iterators.h"
 
-static_assert(cuda::std::ranges::sized_range<int[5]>);
-static_assert(cuda::std::ranges::sized_range<int (&)[5]>);
-static_assert(!cuda::std::ranges::sized_range<int (&)[]>);
-static_assert(!cuda::std::ranges::sized_range<int[]>);
+static_assert(cuda::std::ranges::sized_range<int[5]>, "");
+static_assert(cuda::std::ranges::sized_range<int (&)[5]>, "");
+static_assert(!cuda::std::ranges::sized_range<int (&)[]>, "");
+static_assert(!cuda::std::ranges::sized_range<int[]>, "");
 
 struct range_has_size
 {
@@ -28,8 +28,8 @@ struct range_has_size
   __host__ __device__ bidirectional_iterator<int*> end();
   __host__ __device__ int size();
 };
-static_assert(cuda::std::ranges::sized_range<range_has_size>);
-static_assert(!cuda::std::ranges::sized_range<range_has_size const>);
+static_assert(cuda::std::ranges::sized_range<range_has_size>, "");
+static_assert(!cuda::std::ranges::sized_range<range_has_size const>, "");
 
 struct range_has_const_size
 {
@@ -37,8 +37,8 @@ struct range_has_const_size
   __host__ __device__ bidirectional_iterator<int*> end();
   __host__ __device__ int size() const;
 };
-static_assert(cuda::std::ranges::sized_range<range_has_const_size>);
-static_assert(!cuda::std::ranges::sized_range<range_has_const_size const>);
+static_assert(cuda::std::ranges::sized_range<range_has_const_size>, "");
+static_assert(!cuda::std::ranges::sized_range<range_has_const_size const>, "");
 
 struct const_range_has_size
 {
@@ -46,9 +46,9 @@ struct const_range_has_size
   __host__ __device__ bidirectional_iterator<int*> end() const;
   __host__ __device__ int size();
 };
-static_assert(cuda::std::ranges::sized_range<const_range_has_size>);
-static_assert(cuda::std::ranges::range<const_range_has_size const>);
-static_assert(!cuda::std::ranges::sized_range<const_range_has_size const>);
+static_assert(cuda::std::ranges::sized_range<const_range_has_size>, "");
+static_assert(cuda::std::ranges::range<const_range_has_size const>, "");
+static_assert(!cuda::std::ranges::sized_range<const_range_has_size const>, "");
 
 struct const_range_has_const_size
 {
@@ -56,36 +56,36 @@ struct const_range_has_const_size
   __host__ __device__ bidirectional_iterator<int*> end() const;
   __host__ __device__ int size() const;
 };
-static_assert(cuda::std::ranges::sized_range<const_range_has_const_size>);
-static_assert(cuda::std::ranges::sized_range<const_range_has_const_size const>);
+static_assert(cuda::std::ranges::sized_range<const_range_has_const_size>, "");
+static_assert(cuda::std::ranges::sized_range<const_range_has_const_size const>, "");
 
 struct sized_sentinel_range_has_size
 {
   __host__ __device__ int* begin();
   __host__ __device__ int* end();
 };
-static_assert(cuda::std::ranges::sized_range<sized_sentinel_range_has_size>);
-static_assert(!cuda::std::ranges::sized_range<sized_sentinel_range_has_size const>);
+static_assert(cuda::std::ranges::sized_range<sized_sentinel_range_has_size>, "");
+static_assert(!cuda::std::ranges::sized_range<sized_sentinel_range_has_size const>, "");
 
 struct const_sized_sentinel_range_has_size
 {
   __host__ __device__ int* begin() const;
   __host__ __device__ int* end() const;
 };
-static_assert(cuda::std::ranges::sized_range<const_sized_sentinel_range_has_size>);
-static_assert(cuda::std::ranges::sized_range<const_sized_sentinel_range_has_size const>);
+static_assert(cuda::std::ranges::sized_range<const_sized_sentinel_range_has_size>, "");
+static_assert(cuda::std::ranges::sized_range<const_sized_sentinel_range_has_size const>, "");
 
 struct non_range_has_size
 {
   __host__ __device__ int size() const;
 };
 #if TEST_STD_VER > 2017
-static_assert(requires(non_range_has_size const x) { unused(cuda::std::ranges::size(x)); });
+static_assert(requires(non_range_has_size const x) { unused(cuda::std::ranges::size(x)); }, "");
 #else
-static_assert(cuda::std::invocable<decltype(cuda::std::ranges::size), non_range_has_size const>);
+static_assert(cuda::std::invocable<decltype(cuda::std::ranges::size), non_range_has_size const>, "");
 #endif
-static_assert(!cuda::std::ranges::sized_range<non_range_has_size>);
-static_assert(!cuda::std::ranges::sized_range<non_range_has_size const>);
+static_assert(!cuda::std::ranges::sized_range<non_range_has_size>, "");
+static_assert(!cuda::std::ranges::sized_range<non_range_has_size const>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/enable_view.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/enable_view.compile.pass.cpp
index 03d722ff9ae..8049943f17f 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/enable_view.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/enable_view.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // <ranges>
@@ -22,101 +22,124 @@
 // Doesn't derive from view_base
 struct Empty
 {};
-static_assert(!cuda::std::ranges::enable_view<Empty>);
-static_assert(!cuda::std::ranges::enable_view<Empty&>);
-static_assert(!cuda::std::ranges::enable_view<Empty&&>);
-static_assert(!cuda::std::ranges::enable_view<const Empty>);
-static_assert(!cuda::std::ranges::enable_view<const Empty&>);
-static_assert(!cuda::std::ranges::enable_view<const Empty&&>);
+static_assert(!cuda::std::ranges::enable_view<Empty>, "");
+static_assert(!cuda::std::ranges::enable_view<Empty&>, "");
+static_assert(!cuda::std::ranges::enable_view<Empty&&>, "");
+static_assert(!cuda::std::ranges::enable_view<const Empty>, "");
+static_assert(!cuda::std::ranges::enable_view<const Empty&>, "");
+static_assert(!cuda::std::ranges::enable_view<const Empty&&>, "");
 
 // Derives from view_base, but privately
 struct PrivateViewBase : private cuda::std::ranges::view_base
 {};
-static_assert(!cuda::std::ranges::enable_view<PrivateViewBase>);
-static_assert(!cuda::std::ranges::enable_view<PrivateViewBase&>);
-static_assert(!cuda::std::ranges::enable_view<PrivateViewBase&&>);
-static_assert(!cuda::std::ranges::enable_view<const PrivateViewBase>);
-static_assert(!cuda::std::ranges::enable_view<const PrivateViewBase&>);
-static_assert(!cuda::std::ranges::enable_view<const PrivateViewBase&&>);
+static_assert(!cuda::std::ranges::enable_view<PrivateViewBase>, "");
+static_assert(!cuda::std::ranges::enable_view<PrivateViewBase&>, "");
+static_assert(!cuda::std::ranges::enable_view<PrivateViewBase&&>, "");
+static_assert(!cuda::std::ranges::enable_view<const PrivateViewBase>, "");
+static_assert(!cuda::std::ranges::enable_view<const PrivateViewBase&>, "");
+static_assert(!cuda::std::ranges::enable_view<const PrivateViewBase&&>, "");
 
 // Derives from view_base, but specializes enable_view to false
 struct EnableViewFalse : cuda::std::ranges::view_base
 {};
-template <>
-constexpr bool cuda::std::ranges::enable_view<EnableViewFalse> = false;
 
-static_assert(!cuda::std::ranges::enable_view<EnableViewFalse>);
-static_assert(!cuda::std::ranges::enable_view<EnableViewFalse&>);
-static_assert(!cuda::std::ranges::enable_view<EnableViewFalse&&>);
-static_assert(cuda::std::ranges::enable_view<const EnableViewFalse>);
-static_assert(!cuda::std::ranges::enable_view<const EnableViewFalse&>);
-static_assert(!cuda::std::ranges::enable_view<const EnableViewFalse&&>);
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
+template <>
+constexpr bool enable_view<EnableViewFalse> = false;
+} // namespace ranges
+} // namespace std
+} // namespace cuda
+
+static_assert(!cuda::std::ranges::enable_view<EnableViewFalse>, "");
+static_assert(!cuda::std::ranges::enable_view<EnableViewFalse&>, "");
+static_assert(!cuda::std::ranges::enable_view<EnableViewFalse&&>, "");
+static_assert(cuda::std::ranges::enable_view<const EnableViewFalse>, "");
+static_assert(!cuda::std::ranges::enable_view<const EnableViewFalse&>, "");
+static_assert(!cuda::std::ranges::enable_view<const EnableViewFalse&&>, "");
 
 // Derives from view_base
 struct PublicViewBase : cuda::std::ranges::view_base
 {};
-static_assert(cuda::std::ranges::enable_view<PublicViewBase>);
-static_assert(!cuda::std::ranges::enable_view<PublicViewBase&>);
-static_assert(!cuda::std::ranges::enable_view<PublicViewBase&&>);
-static_assert(cuda::std::ranges::enable_view<const PublicViewBase>);
-static_assert(!cuda::std::ranges::enable_view<const PublicViewBase&>);
-static_assert(!cuda::std::ranges::enable_view<const PublicViewBase&&>);
+static_assert(cuda::std::ranges::enable_view<PublicViewBase>, "");
+static_assert(!cuda::std::ranges::enable_view<PublicViewBase&>, "");
+static_assert(!cuda::std::ranges::enable_view<PublicViewBase&&>, "");
+static_assert(cuda::std::ranges::enable_view<const PublicViewBase>, "");
+static_assert(!cuda::std::ranges::enable_view<const PublicViewBase&>, "");
+static_assert(!cuda::std::ranges::enable_view<const PublicViewBase&&>, "");
 
 // Does not derive from view_base, but specializes enable_view to true
 struct EnableViewTrue
 {};
+
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-constexpr bool cuda::std::ranges::enable_view<EnableViewTrue> = true;
+constexpr bool enable_view<EnableViewTrue> = true;
+}
+} // namespace std
+} // namespace cuda
 
-static_assert(cuda::std::ranges::enable_view<EnableViewTrue>);
-static_assert(!cuda::std::ranges::enable_view<EnableViewTrue&>);
-static_assert(!cuda::std::ranges::enable_view<EnableViewTrue&&>);
-static_assert(!cuda::std::ranges::enable_view<const EnableViewTrue>);
-static_assert(!cuda::std::ranges::enable_view<const EnableViewTrue&>);
-static_assert(!cuda::std::ranges::enable_view<const EnableViewTrue&&>);
+static_assert(cuda::std::ranges::enable_view<EnableViewTrue>, "");
+static_assert(!cuda::std::ranges::enable_view<EnableViewTrue&>, "");
+static_assert(!cuda::std::ranges::enable_view<EnableViewTrue&&>, "");
+static_assert(!cuda::std::ranges::enable_view<const EnableViewTrue>, "");
+static_assert(!cuda::std::ranges::enable_view<const EnableViewTrue&>, "");
+static_assert(!cuda::std::ranges::enable_view<const EnableViewTrue&&>, "");
 
 // Make sure that enable_view is a bool, not some other contextually-convertible-to-bool type.
 ASSERT_SAME_TYPE(decltype(cuda::std::ranges::enable_view<Empty>), const bool);
 ASSERT_SAME_TYPE(decltype(cuda::std::ranges::enable_view<PublicViewBase>), const bool);
 
+// view_interface requires c++17
+#if TEST_STD_VER >= 2017
 struct V1 : cuda::std::ranges::view_interface<V1>
 {};
-static_assert(cuda::std::ranges::enable_view<V1>);
-static_assert(!cuda::std::ranges::enable_view<V1&>);
-static_assert(!cuda::std::ranges::enable_view<V1&&>);
-static_assert(cuda::std::ranges::enable_view<const V1>);
-static_assert(!cuda::std::ranges::enable_view<const V1&>);
-static_assert(!cuda::std::ranges::enable_view<const V1&&>);
+static_assert(cuda::std::ranges::enable_view<V1>, "");
+static_assert(!cuda::std::ranges::enable_view<V1&>, "");
+static_assert(!cuda::std::ranges::enable_view<V1&&>, "");
+static_assert(cuda::std::ranges::enable_view<const V1>, "");
+static_assert(!cuda::std::ranges::enable_view<const V1&>, "");
+static_assert(!cuda::std::ranges::enable_view<const V1&&>, "");
 
 struct V2
     : cuda::std::ranges::view_interface<V1>
     , cuda::std::ranges::view_interface<V2>
 {};
-#if !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017 // MSVC seems to allow the conversion despite the ambiguity in
-                                                        // C++17
-static_assert(!cuda::std::ranges::enable_view<V2>);
-#endif // !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017
-static_assert(!cuda::std::ranges::enable_view<V2&>);
-static_assert(!cuda::std::ranges::enable_view<V2&&>);
-#if !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017 // MSVC seems to allow the conversion despite the ambiguity in
-                                                        // C++17
-static_assert(!cuda::std::ranges::enable_view<const V2>);
-#endif // !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017
-static_assert(!cuda::std::ranges::enable_view<const V2&>);
-static_assert(!cuda::std::ranges::enable_view<const V2&&>);
+#  if !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017 // MSVC seems to allow the conversion despite the ambiguity in
+                                                          // C++17
+static_assert(!cuda::std::ranges::enable_view<V2>, "");
+#  endif // !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017
+static_assert(!cuda::std::ranges::enable_view<V2&>, "");
+static_assert(!cuda::std::ranges::enable_view<V2&&>, "");
+#  if !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017 // MSVC seems to allow the conversion despite the ambiguity in
+                                                          // C++17
+static_assert(!cuda::std::ranges::enable_view<const V2>, "");
+#  endif // !defined(TEST_COMPILER_MSVC) || TEST_STD_VER > 2017
+static_assert(!cuda::std::ranges::enable_view<const V2&>, "");
+static_assert(!cuda::std::ranges::enable_view<const V2&&>, "");
 
 struct V3 : cuda::std::ranges::view_interface<V1>
 {};
-static_assert(cuda::std::ranges::enable_view<V3>);
-static_assert(!cuda::std::ranges::enable_view<V3&>);
-static_assert(!cuda::std::ranges::enable_view<V3&&>);
-static_assert(cuda::std::ranges::enable_view<const V3>);
-static_assert(!cuda::std::ranges::enable_view<const V3&>);
-static_assert(!cuda::std::ranges::enable_view<const V3&&>);
+static_assert(cuda::std::ranges::enable_view<V3>, "");
+static_assert(!cuda::std::ranges::enable_view<V3&>, "");
+static_assert(!cuda::std::ranges::enable_view<V3&&>, "");
+static_assert(cuda::std::ranges::enable_view<const V3>, "");
+static_assert(!cuda::std::ranges::enable_view<const V3&>, "");
+static_assert(!cuda::std::ranges::enable_view<const V3&&>, "");
 
 struct PrivateInherit : private cuda::std::ranges::view_interface<PrivateInherit>
 {};
-static_assert(!cuda::std::ranges::enable_view<PrivateInherit>);
+static_assert(!cuda::std::ranges::enable_view<PrivateInherit>, "");
+#endif // TEST_STD_VER >= 2017
 
 #if TEST_STD_VER > 2017
 // ADL-proof
@@ -126,10 +149,10 @@ struct Holder
 {
   T t;
 };
-static_assert(!cuda::std::ranges::enable_view<Holder<Incomplete>*>);
+static_assert(!cuda::std::ranges::enable_view<Holder<Incomplete>*>, "");
 #endif
 
-static_assert(!cuda::std::ranges::enable_view<void>);
+static_assert(!cuda::std::ranges::enable_view<void>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view.compile.pass.cpp
index d02cba70902..073944ad76c 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // <ranges>
@@ -30,11 +30,11 @@ struct NotMoveable : cuda::std::ranges::view_base
   __host__ __device__ friend int* end(NotMoveable&);
   __host__ __device__ friend int* end(NotMoveable const&);
 };
-static_assert(cuda::std::ranges::range<NotMoveable>);
-static_assert(!cuda::std::movable<NotMoveable>);
-static_assert(cuda::std::default_initializable<NotMoveable>);
-static_assert(cuda::std::ranges::enable_view<NotMoveable>);
-static_assert(!cuda::std::ranges::view<NotMoveable>);
+static_assert(cuda::std::ranges::range<NotMoveable>, "");
+static_assert(!cuda::std::movable<NotMoveable>, "");
+static_assert(cuda::std::default_initializable<NotMoveable>, "");
+static_assert(cuda::std::ranges::enable_view<NotMoveable>, "");
+static_assert(!cuda::std::ranges::view<NotMoveable>, "");
 
 // The type would be a view, but it's not default initializable
 struct NotDefaultInit : cuda::std::ranges::view_base
@@ -45,11 +45,11 @@ struct NotDefaultInit : cuda::std::ranges::view_base
   __host__ __device__ friend int* end(NotDefaultInit&);
   __host__ __device__ friend int* end(NotDefaultInit const&);
 };
-static_assert(cuda::std::ranges::range<NotDefaultInit>);
-static_assert(cuda::std::movable<NotDefaultInit>);
-static_assert(!cuda::std::default_initializable<NotDefaultInit>);
-static_assert(cuda::std::ranges::enable_view<NotDefaultInit>);
-static_assert(cuda::std::ranges::view<NotDefaultInit>);
+static_assert(cuda::std::ranges::range<NotDefaultInit>, "");
+static_assert(cuda::std::movable<NotDefaultInit>, "");
+static_assert(!cuda::std::default_initializable<NotDefaultInit>, "");
+static_assert(cuda::std::ranges::enable_view<NotDefaultInit>, "");
+static_assert(cuda::std::ranges::view<NotDefaultInit>, "");
 
 // The type would be a view, but it doesn't enable it with enable_view
 struct NotExplicitlyEnabled
@@ -62,11 +62,11 @@ struct NotExplicitlyEnabled
   __host__ __device__ friend int* end(NotExplicitlyEnabled&);
   __host__ __device__ friend int* end(NotExplicitlyEnabled const&);
 };
-static_assert(cuda::std::ranges::range<NotExplicitlyEnabled>);
-static_assert(cuda::std::movable<NotExplicitlyEnabled>);
-static_assert(cuda::std::default_initializable<NotExplicitlyEnabled>);
-static_assert(!cuda::std::ranges::enable_view<NotExplicitlyEnabled>);
-static_assert(!cuda::std::ranges::view<NotExplicitlyEnabled>);
+static_assert(cuda::std::ranges::range<NotExplicitlyEnabled>, "");
+static_assert(cuda::std::movable<NotExplicitlyEnabled>, "");
+static_assert(cuda::std::default_initializable<NotExplicitlyEnabled>, "");
+static_assert(!cuda::std::ranges::enable_view<NotExplicitlyEnabled>, "");
+static_assert(!cuda::std::ranges::view<NotExplicitlyEnabled>, "");
 
 // The type has everything else, but it's not a range
 struct NotARange : cuda::std::ranges::view_base
@@ -75,11 +75,11 @@ struct NotARange : cuda::std::ranges::view_base
   NotARange(NotARange&&)            = default;
   NotARange& operator=(NotARange&&) = default;
 };
-static_assert(!cuda::std::ranges::range<NotARange>);
-static_assert(cuda::std::movable<NotARange>);
-static_assert(cuda::std::default_initializable<NotARange>);
-static_assert(cuda::std::ranges::enable_view<NotARange>);
-static_assert(!cuda::std::ranges::view<NotARange>);
+static_assert(!cuda::std::ranges::range<NotARange>, "");
+static_assert(cuda::std::movable<NotARange>, "");
+static_assert(cuda::std::default_initializable<NotARange>, "");
+static_assert(cuda::std::ranges::enable_view<NotARange>, "");
+static_assert(!cuda::std::ranges::view<NotARange>, "");
 
 // The type satisfies all requirements
 struct View : cuda::std::ranges::view_base
@@ -92,11 +92,11 @@ struct View : cuda::std::ranges::view_base
   __host__ __device__ friend int* end(View&);
   __host__ __device__ friend int* end(View const&);
 };
-static_assert(cuda::std::ranges::range<View>);
-static_assert(cuda::std::movable<View>);
-static_assert(cuda::std::default_initializable<View>);
-static_assert(cuda::std::ranges::enable_view<View>);
-static_assert(cuda::std::ranges::view<View>);
+static_assert(cuda::std::ranges::range<View>, "");
+static_assert(cuda::std::movable<View>, "");
+static_assert(cuda::std::default_initializable<View>, "");
+static_assert(cuda::std::ranges::enable_view<View>, "");
+static_assert(cuda::std::ranges::view<View>, "");
 
 int main(int, char**)
 {
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view_base.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view_base.compile.pass.cpp
index 9c84fdbc135..5f3677045a8 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view_base.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.req/range.view/view_base.compile.pass.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11
 // UNSUPPORTED: msvc-19.16
 
 // <ranges>
@@ -17,8 +17,8 @@
 #include <cuda/std/ranges>
 #include <cuda/std/type_traits>
 
-static_assert(cuda::std::is_empty_v<cuda::std::ranges::view_base>);
-static_assert(cuda::std::is_trivial_v<cuda::std::ranges::view_base>);
+static_assert(cuda::std::is_empty_v<cuda::std::ranges::view_base>, "");
+static_assert(cuda::std::is_trivial_v<cuda::std::ranges::view_base>, "");
 
 // Make sure we can inherit from it, as it's intended (that wouldn't be the
 // case if e.g. it was marked as final).
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/ctor.pair_like_conv.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/ctor.pair_like_conv.pass.cpp
index 251a1ccfce6..d59863e6c0d 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/ctor.pair_like_conv.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/ctor.pair_like_conv.pass.cpp
@@ -7,9 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: msvc-19.16
-// UNSUPPORTED: gcc-7, gcc-8
 
 // class cuda::std::ranges::subrange;
 
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp
index 5ec15fd10d7..880094e5d44 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp
@@ -9,7 +9,6 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: msvc-19.16
-// UNSUPPORTED: icc
 
 // gcc is unable to get the construction of b right
 // UNSUPPORTED: gcc-7, gcc-8, gcc-9
diff --git a/libcudacxx/test/libcudacxx/std/ranges/range.utility/view.interface/view.interface.pass.cpp b/libcudacxx/test/libcudacxx/std/ranges/range.utility/view.interface/view.interface.pass.cpp
index 9fe2e1ce31b..18c1dfa25de 100644
--- a/libcudacxx/test/libcudacxx/std/ranges/range.utility/view.interface/view.interface.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/ranges/range.utility/view.interface/view.interface.pass.cpp
@@ -313,7 +313,7 @@ __host__ __device__ constexpr bool testEmpty()
 
   BoolConvertibleComparison boolConv{};
   // old GCC seems to fall over the noexcept clauses here
-#if (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9) && (!defined(TEST_COMPILER_MSVC)) && (!defined(TEST_COMPILER_ICC))
+#if (!defined(TEST_COMPILER_GCC) || __GNUC__ >= 9) && (!defined(TEST_COMPILER_MSVC))
   ASSERT_NOT_NOEXCEPT(boolConv.empty());
 #endif
 
diff --git a/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp b/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp
index f956bbe8119..59abb795d1b 100644
--- a/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp
@@ -23,8 +23,7 @@
 #include "test_macros.h"
 
 template <template <typename> class Barrier,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           typename Initializer = constructor_initializer>
 __host__ __device__ void test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.U.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.U.pass.cpp
index af88bf4c7b2..589df89149d 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.U.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.U.pass.cpp
@@ -100,12 +100,10 @@ static_assert(
 static_assert(
   cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<true, false>, MaybeNoexcept<false, false>>&, int>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_constructible_v<T, U> && !is_nothrow_move_constructible_v<T> &&
 // !is_nothrow_move_constructible_v<E>
 static_assert(
   !cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<false, false>, MaybeNoexcept<false, false>>&, int>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.copy.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.copy.pass.cpp
index 4823f6c7745..483dec3c1cc 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.copy.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.copy.pass.cpp
@@ -85,12 +85,10 @@ static_assert(cuda::std::is_copy_assignable_v<cuda::std::expected<MoveMayThrow,
 // is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(cuda::std::is_copy_assignable_v<cuda::std::expected<int, MoveMayThrow>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(
   cuda::std::__expected_can_copy_assign<MoveMayThrow, MoveMayThrow> == cuda::std::__smf_availability::__deleted, "");
 static_assert(!cuda::std::is_copy_assignable_v<cuda::std::expected<MoveMayThrow, MoveMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.move.pass.cpp
index 61c446ea53c..0bb51efeb9e 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.move.pass.cpp
@@ -83,10 +83,8 @@ static_assert(cuda::std::is_move_assignable_v<cuda::std::expected<MoveCtorMayThr
 // is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(cuda::std::is_move_assignable_v<cuda::std::expected<int, MoveCtorMayThrow>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_move_assignable_v<cuda::std::expected<MoveCtorMayThrow, MoveCtorMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 struct MoveAssignMayThrow
 {
@@ -100,7 +98,6 @@ struct MoveAssignMayThrow
 // Test noexcept
 static_assert(cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<int, int>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_assignable_v<T>
 static_assert(!cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<MoveAssignMayThrow, int>>, "");
 
@@ -112,7 +109,6 @@ static_assert(!cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<int,
 
 // !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<int, MoveCtorMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.copy.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.copy.pass.cpp
index 7569166bb4d..483eadff86c 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.copy.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.copy.pass.cpp
@@ -103,13 +103,11 @@ static_assert(cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<true,
               "");
 
 #ifndef TEST_COMPILER_MSVC_2017
-#  ifndef TEST_COMPILER_ICC
 // !is_nothrow_constructible_v<E, GF> && !is_nothrow_move_constructible_v<T> &&
 // !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<false, false>, MaybeNoexcept<false, false>>&,
                                           const cuda::std::unexpected<int>&>,
               "");
-#  endif // TEST_COMPILER_ICC
 #endif // TEST_COMPILER_MSVC_2017
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.move.pass.cpp
index db8ed0e6c0e..055789a88f1 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/assign.unexpected.move.pass.cpp
@@ -103,13 +103,11 @@ static_assert(cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<true,
               "");
 
 #ifndef TEST_COMPILER_MSVC_2017
-#  ifndef TEST_COMPILER_ICC
 // !is_nothrow_constructible_v<E, GF> && !is_nothrow_move_constructible_v<T> &&
 // !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<false, false>, MaybeNoexcept<false, false>>&,
                                           cuda::std::unexpected<int>&&>,
               "");
-#  endif // TEST_COMPILER_ICC
 #endif // !TEST_COMPILER_MSVC_2017
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp
index 21dcc47f32d..22b7e442ad5 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp
@@ -48,16 +48,12 @@ struct CtorFromInitalizerList
 
 static_assert(CanEmplace<cuda::std::expected<CtorFromInitalizerList<true>, int>, cuda::std::initializer_list<int>&>,
               "");
-#ifndef TEST_COMPILER_ICC
 static_assert(!CanEmplace<cuda::std::expected<CtorFromInitalizerList<false>, int>, cuda::std::initializer_list<int>&>,
               "");
-#endif // TEST_COMPILER_ICC
 static_assert(
   CanEmplace<cuda::std::expected<CtorFromInitalizerList<true>, int>, cuda::std::initializer_list<int>&, int>, "");
-#ifndef TEST_COMPILER_ICC
 static_assert(
   !CanEmplace<cuda::std::expected<CtorFromInitalizerList<false>, int>, cuda::std::initializer_list<int>&, int>, "");
-#endif // TEST_COMPILER_ICC
 
 struct Data
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.pass.cpp
index 30b2ec69267..cb5952a3ccd 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/assign/emplace.pass.cpp
@@ -49,10 +49,8 @@ struct CtorFromInt
 
 static_assert(CanEmplace<cuda::std::expected<CtorFromInt<true>, int>, int>, "");
 static_assert(CanEmplace<cuda::std::expected<CtorFromInt<true>, int>, int, int>, "");
-#ifndef TEST_COMPILER_ICC
 static_assert(!CanEmplace<cuda::std::expected<CtorFromInt<false>, int>, int>, "");
 static_assert(!CanEmplace<cuda::std::expected<CtorFromInt<false>, int>, int, int>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp
index cfd5f404f05..a03512a0b22 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp
@@ -93,14 +93,12 @@ static_assert(!cuda::std::is_trivially_move_constructible_v<cuda::std::expected<
 static_assert(!cuda::std::is_trivially_move_constructible_v<cuda::std::expected<MovableNonTrivial, MovableNonTrivial>>,
               "");
 
-#ifndef TEST_COMPILER_ICC
 // Test: The exception specification is equivalent to
 // is_nothrow_move_constructible_v<T> && is_nothrow_move_constructible_v<E>.
 static_assert(cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<int, int>>, "");
 static_assert(!cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<MoveMayThrow, int>>, "");
 static_assert(!cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<int, MoveMayThrow>>, "");
 static_assert(!cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<MoveMayThrow, MoveMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp
index 89e06bfbd7f..0fc6dfb5383 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp
@@ -61,15 +61,12 @@ static_assert(cuda::std::is_swappable_v<cuda::std::expected<MoveMayThrow, int>>,
 // is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(cuda::std::is_swappable_v<cuda::std::expected<int, MoveMayThrow>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_swappable_v<cuda::std::expected<MoveMayThrow, MoveMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 // Test noexcept
 static_assert(cuda::std::is_nothrow_swappable_v<cuda::std::expected<int, int>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<T>
 static_assert(!cuda::std::is_nothrow_swappable_v<cuda::std::expected<MoveMayThrow, int>>, "");
 
@@ -86,7 +83,6 @@ static_assert(!cuda::std::is_nothrow_swappable_v<cuda::std::expected<SwapMayThro
 
 // !is_nothrow_swappable_v<E>
 static_assert(!cuda::std::is_nothrow_swappable_v<cuda::std::expected<int, SwapMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp
index c864b9a36a4..e219d76a504 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp
@@ -72,10 +72,8 @@ static_assert(HasMemberSwap<MoveMayThrow, int>, "");
 // is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(HasMemberSwap<int, MoveMayThrow>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<T> && !is_nothrow_move_constructible_v<E>
 static_assert(!HasMemberSwap<MoveMayThrow, MoveMayThrow>, "");
-#endif // TEST_COMPILER_ICC
 
 // Test noexcept
 template <class T, class E, bool = HasMemberSwap<T, E>>
@@ -87,7 +85,6 @@ constexpr bool MemberSwapNoexcept<T, E, true> =
 
 static_assert(MemberSwapNoexcept<int, int>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<T>
 static_assert(!MemberSwapNoexcept<MoveMayThrow, int>, "");
 
@@ -104,7 +101,6 @@ static_assert(!MemberSwapNoexcept<SwapMayThrow, int>, "");
 
 // !is_nothrow_swappable_v<E>
 static_assert(!MemberSwapNoexcept<int, SwapMayThrow>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.free.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.free.pass.cpp
index 98058b00fe0..564ad1f0f55 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.free.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.free.pass.cpp
@@ -42,9 +42,7 @@ constexpr bool ADLSwapNoexcept<T, cuda::std::void_t<decltype(swap(cuda::std::dec
   noexcept(swap(cuda::std::declval<T&>(), cuda::std::declval<T&>()));
 
 static_assert(ADLSwapNoexcept<cuda::std::unexpected<NoexceptSwap>>, "");
-#ifndef TEST_COMPILER_ICC
 static_assert(!ADLSwapNoexcept<cuda::std::unexpected<MayThrowSwap>>, "");
-#endif // TEST_COMPILER_ICC
 
 // test constraint
 struct NonSwappable
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.member.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.member.pass.cpp
index 1e6c3dc7168..c0f77443ebe 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.member.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.unexpected/swap/swap.member.pass.cpp
@@ -42,9 +42,7 @@ constexpr bool
     noexcept(cuda::std::declval<T&>().swap(cuda::std::declval<T&>()));
 
 static_assert(MemberSwapNoexcept<cuda::std::unexpected<NoexceptSwap>>, "");
-#ifndef TEST_COMPILER_ICC
 static_assert(!MemberSwapNoexcept<cuda::std::unexpected<MayThrowSwap>>, "");
-#endif // TEST_COMPILER_ICC
 
 struct ADLSwap
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.move.pass.cpp
index 6fc626ceb60..d18bdb081a6 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.move.pass.cpp
@@ -76,13 +76,11 @@ struct MoveAssignMayThrow
 // Test noexcept
 static_assert(cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<void, int>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_assignable_v<E>
 static_assert(!cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<void, MoveAssignMayThrow>>, "");
 
 // !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_nothrow_move_assignable_v<cuda::std::expected<void, MoveCtorMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.unexpected.move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.unexpected.move.pass.cpp
index fb58c1cb708..a00e34f44bf 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.unexpected.move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/assign/assign.unexpected.move.pass.cpp
@@ -103,13 +103,11 @@ static_assert(cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<true,
               "");
 
 #ifndef TEST_COMPILER_MSVC_2017
-#  ifndef TEST_COMPILER_ICC
 // !is_nothrow_constructible_v<E, GF> && !is_nothrow_move_constructible_v<T> &&
 // !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_assignable_v<cuda::std::expected<MaybeNoexcept<false, false>, MaybeNoexcept<false, false>>&,
                                           cuda::std::unexpected<int>&&>,
               "");
-#  endif // TEST_COMPILER_ICC
 #endif // !TEST_COMPILER_MSVC_2017
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp
index 15243a43e9f..11b05b7b252 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp
@@ -75,13 +75,11 @@ static_assert(!cuda::std::is_move_constructible_v<cuda::std::expected<void, NonM
 static_assert(cuda::std::is_trivially_move_constructible_v<cuda::std::expected<void, int>>, "");
 static_assert(!cuda::std::is_trivially_move_constructible_v<cuda::std::expected<void, MovableNonTrivial>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // Test: noexcept(is_nothrow_move_constructible_v<E>)
 static_assert(cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<int, int>>, "");
 static_assert(!cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<MoveMayThrow, int>>, "");
 static_assert(!cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<int, MoveMayThrow>>, "");
 static_assert(!cuda::std::is_nothrow_move_constructible_v<cuda::std::expected<MoveMayThrow, MoveMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/free.swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/free.swap.pass.cpp
index ae636f3fff6..d23e5ecb4f0 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/free.swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/free.swap.pass.cpp
@@ -51,7 +51,6 @@ struct MoveMayThrow
 };
 static_assert(cuda::std::is_nothrow_swappable_v<cuda::std::expected<void, int>>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<E>
 static_assert(!cuda::std::is_nothrow_swappable_v<cuda::std::expected<void, MoveMayThrow>>, "");
 
@@ -60,7 +59,6 @@ struct SwapMayThrow
   __host__ __device__ friend void swap(SwapMayThrow&, SwapMayThrow&) noexcept(false) {}
 };
 static_assert(!cuda::std::is_nothrow_swappable_v<cuda::std::expected<void, SwapMayThrow>>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/member.swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/member.swap.pass.cpp
index 3bf0974bb42..e4a7a0676b5 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/member.swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/expected/expected.void/swap/member.swap.pass.cpp
@@ -69,7 +69,6 @@ constexpr bool MemberSwapNoexcept<E, true> = noexcept(
 
 static_assert(MemberSwapNoexcept<int>, "");
 
-#ifndef TEST_COMPILER_ICC
 // !is_nothrow_move_constructible_v<E>
 static_assert(!MemberSwapNoexcept<MoveMayThrow>, "");
 
@@ -80,7 +79,6 @@ struct SwapMayThrow
 
 // !is_nothrow_swappable_v<E>
 static_assert(!MemberSwapNoexcept<SwapMayThrow>, "");
-#endif // TEST_COMPILER_ICC
 
 __host__ __device__ TEST_CONSTEXPR_CXX20 bool test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp
index c91b7a1701b..caacd9f073b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp
@@ -49,8 +49,8 @@ template <class... Args>
 struct is_bind_frontable
 {
   template <class... LocalArgs>
-  __host__ __device__ static auto
-  test(int) -> decltype((void) cuda::std::bind_front(cuda::std::declval<LocalArgs>()...), cuda::std::true_type());
+  __host__ __device__ static auto test(int)
+    -> decltype((void) cuda::std::bind_front(cuda::std::declval<LocalArgs>()...), cuda::std::true_type());
 
   template <class...>
   __host__ __device__ static cuda::std::false_type test(...);
@@ -321,11 +321,9 @@ __host__ __device__ constexpr bool test()
       using X = decltype(cuda::std::bind_front(F{}));
       static_assert(cuda::std::is_invocable_v<X&>);
       static_assert(cuda::std::is_invocable_v<X const&>);
-#  ifndef TEST_COMPILER_ICC
-#    ifndef TEST_COMPILER_MSVC_2017 // ICE during invoke check
+#  ifndef TEST_COMPILER_MSVC_2017 // ICE during invoke check
       static_assert(!cuda::std::is_invocable_v<X>);
-#    endif // !TEST_COMPILER_MSVC_2017
-#  endif // !TEST_COMPILER_ICC
+#  endif // !TEST_COMPILER_MSVC_2017
       static_assert(cuda::std::is_invocable_v<X const>);
     }
 
@@ -342,18 +340,15 @@ __host__ __device__ constexpr bool test()
       static_assert(cuda::std::is_invocable_v<X&>);
       static_assert(cuda::std::is_invocable_v<X const&>);
       static_assert(cuda::std::is_invocable_v<X>);
-#  ifndef TEST_COMPILER_ICC
-#    ifndef TEST_COMPILER_MSVC_2017 // ICE during invoke check
+#  ifndef TEST_COMPILER_MSVC_2017 // ICE during invoke check
       static_assert(!cuda::std::is_invocable_v<X const>);
-#    endif // !TEST_COMPILER_MSVC_2017
-#  endif // !TEST_COMPILER_ICC
+#  endif // !TEST_COMPILER_MSVC_2017
     }
   }
 #endif
 
   // Some examples by Tim Song
-#ifndef TEST_COMPILER_ICC
-#  ifndef TEST_COMPILER_MSVC_2017 // ICE during invoke check
+#ifndef TEST_COMPILER_MSVC_2017 // ICE during invoke check
   {
     {
       struct T
@@ -379,8 +374,7 @@ __host__ __device__ constexpr bool test()
       static_assert(!cuda::std::is_invocable_v<X>);
     }
   }
-#  endif // !TEST_COMPILER_MSVC_2017
-#endif // !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC_2017
 
   // Test properties of the constructor of the unspecified-type returned by bind_front.
   {
@@ -468,7 +462,7 @@ __host__ __device__ constexpr bool test()
     takeAnything();
   }
 
-#if !defined(TEST_COMPILER_ICC) && !defined(TEST_COMPILER_MSVC_2017)
+#if !defined(TEST_COMPILER_MSVC_2017)
   // Make sure bind_front's unspecified type's operator() is SFINAE-friendly
   {
     using T = decltype(cuda::std::bind_front(cuda::std::declval<int (*)(int, int)>(), 1));
@@ -477,7 +471,7 @@ __host__ __device__ constexpr bool test()
     static_assert(!cuda::std::is_invocable<T, void*>::value);
     static_assert(!cuda::std::is_invocable<T, int, int>::value);
   }
-#endif // !TEST_COMPILER_ICC && !TEST_COMPILER_MSVC_2017
+#endif // !TEST_COMPILER_MSVC_2017
 
   return true;
 }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp
index 3585e6dce99..b35ce02fee3 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp
@@ -122,7 +122,7 @@ template <class Signature, class Expect, class Functor>
 __host__ __device__ void test_b12(Functor&& f)
 {
   // Create the callable object.
-  typedef Signature TestClass::*ClassFunc;
+  typedef Signature TestClass::* ClassFunc;
   ClassFunc func_ptr = &TestClass::operator();
 
   // Create the dummy arg.
@@ -145,7 +145,7 @@ template <class Expect, class Functor>
 __host__ __device__ void test_b34(Functor&& f)
 {
   // Create the callable object.
-  typedef int TestClass::*ClassFunc;
+  typedef int TestClass::* ClassFunc;
   ClassFunc func_ptr = &TestClass::data;
 
   // Check that the deduced return type of invoke is what is expected.
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp
index 543914731ed..4a0bca39e16 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp
@@ -525,14 +525,13 @@ void throws_in_constructor_test()
 
 __host__ __device__ void call_operator_sfinae_test()
 {
-#ifndef TEST_COMPILER_ICC
-#  if !defined(TEST_COMPILER_MSVC_2017)
+#if !defined(TEST_COMPILER_MSVC_2017)
   { // wrong number of arguments
     using T = decltype(cuda::std::not_fn(returns_true));
     static_assert(cuda::std::is_invocable<T>::value, ""); // callable only with no args
     static_assert(!cuda::std::is_invocable<T, bool>::value, "");
   }
-#  endif // !TEST_COMPILER_MSVC_2017
+#endif // !TEST_COMPILER_MSVC_2017
   { // violates const correctness (member function pointer)
     using T = decltype(cuda::std::not_fn(&MemFunCallable::return_value_nc));
     static_assert(cuda::std::is_invocable<T, MemFunCallable&>::value, "");
@@ -545,7 +544,6 @@ __host__ __device__ void call_operator_sfinae_test()
     static_assert(cuda::std::is_invocable<NCT>::value, "");
     static_assert(!cuda::std::is_invocable<CT>::value, "");
   }
-#endif // TEST_COMPILER_ICC
   // NVRTC appears to be unhappy about... the lambda?
   // but doesn't let me fix it with annotations
 #ifndef TEST_COMPILER_NVRTC
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp
index faa8bde67e7..2cd67ca6247 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp
@@ -35,7 +35,7 @@ __host__ __device__ void test_int_1()
 {
   // member data pointer
   {
-    int A_int_1::*fp = &A_int_1::data_;
+    int A_int_1::* fp = &A_int_1::data_;
     cuda::std::reference_wrapper<int A_int_1::*> r1(fp);
     A_int_1 a;
     assert(r1(a) == 5);
diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp
index 7364acc438e..4ce14c9acaa 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/memory/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/memory/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp
@@ -10,7 +10,6 @@
 // This code triggers https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104568
 // UNSUPPORTED: msvc
 // UNSUPPORTED: nvrtc
-// UNSUPPORTED: icc
 // UNSUPPORTED: nvhpc
 
 // Test the fix for https://llvm.org/PR54100
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp
index 78fd0c9890a..268d0907907 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.rel/is_nothrow_invocable.pass.cpp
@@ -171,9 +171,7 @@ int main(int, char**)
     static_assert(cuda::std::is_nothrow_invocable_r<Implicit, Fn>::value, "");
     static_assert(cuda::std::is_nothrow_invocable_r<double, Fn>::value, "");
     static_assert(cuda::std::is_nothrow_invocable_r<const volatile void, Fn>::value, "");
-#ifndef TEST_COMPILER_ICC
     static_assert(throws_invocable_r<ThrowsImplicit, Fn>(), "");
-#endif // TEST_COMPILER_ICC
     static_assert(!cuda::std::is_nothrow_invocable<Fn(), Explicit>(), "");
   }
   {
@@ -181,9 +179,7 @@ int main(int, char**)
     using Fn = CallObject<true, void, const Implicit&, const ThrowsImplicit&>;
     static_assert(cuda::std::is_nothrow_invocable<Fn, Implicit&, ThrowsImplicit&>::value, "");
     static_assert(cuda::std::is_nothrow_invocable<Fn, int, ThrowsImplicit&>::value, "");
-#ifndef TEST_COMPILER_ICC
     static_assert(throws_invocable<Fn, int, int>(), "");
-#endif // TEST_COMPILER_ICC
     static_assert(!cuda::std::is_nothrow_invocable<Fn>::value, "");
   }
   {
@@ -191,18 +187,14 @@ int main(int, char**)
     using Fn  = CallObject<true, void>;
     using Fn2 = CallObject<false, void>;
     static_assert(cuda::std::is_nothrow_invocable<Fn>::value, "");
-#ifndef TEST_COMPILER_ICC
     static_assert(throws_invocable<Fn2>(), "");
-#endif // TEST_COMPILER_ICC
   }
   {
     // Check that PMD derefs are noexcept
     using Fn = int(Tag::*);
     static_assert(cuda::std::is_nothrow_invocable<Fn, Tag&>::value, "");
     static_assert(cuda::std::is_nothrow_invocable_r<Implicit, Fn, Tag&>::value, "");
-#ifndef TEST_COMPILER_ICC
     static_assert(throws_invocable_r<ThrowsImplicit, Fn, Tag&>(), "");
-#endif // TEST_COMPILER_ICC
   }
 #if TEST_STD_VER >= 2017
   {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp
index 96988b28bc0..8a187fa4297 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp
@@ -88,7 +88,8 @@ template <class Tp>
 using always_bool = typename always_bool_imp<Tp>::type;
 
 template <class... Args>
-__host__ __device__ constexpr auto no_common_type_imp(int) -> always_bool<typename cuda::std::common_type<Args...>::type>
+__host__ __device__ constexpr auto no_common_type_imp(int)
+  -> always_bool<typename cuda::std::common_type<Args...>::type>
 {
   return false;
 }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp
index 2f7eb636ae4..445927790d3 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp
@@ -404,7 +404,7 @@ int main(int, char**)
     test_result_of<PMS3CV(S&, int, long), const int&>();
   }
   { // pointer to member data
-    typedef char S::*PMD;
+    typedef char S::* PMD;
     test_result_of<PMD(S&), char&>();
     test_result_of<PMD(S*), char&>();
     test_result_of<PMD(S* const), char&>();
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp
index dac77e6d9b6..a34dbeca162 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp
@@ -77,7 +77,7 @@ __host__ __device__ void test_result_of_imp()
 int main(int, char**)
 {
   {
-    typedef char F::*PMD;
+    typedef char F::* PMD;
     test_result_of_imp<PMD(F&), char&>();
     test_result_of_imp<PMD(F const&), char const&>();
     test_result_of_imp<PMD(F volatile&), char volatile&>();
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable.pass.cpp
index 944e7a81fcc..5799a95f8a6 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable.pass.cpp
@@ -65,7 +65,6 @@ int main(int, char**)
     static_assert(!cuda::std::is_nothrow_swappable<M>::value, "");
     static_assert(!cuda::std::is_nothrow_swappable<M&&>::value, "");
   }
-#ifndef TEST_COMPILER_ICC
   {
     // Test that it correctly deduces the noexcept of swap.
     static_assert(cuda::std::is_nothrow_swappable<A>::value, "");
@@ -73,7 +72,6 @@ int main(int, char**)
     static_assert(!cuda::std::is_nothrow_swappable<ThrowingMove>::value && cuda::std::is_swappable<ThrowingMove>::value,
                   "");
   }
-#endif // TEST_COMPILER_ICC
   {
     // Test that it doesn't drop the qualifiers
     static_assert(!cuda::std::is_nothrow_swappable<const A>::value, "");
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable_with.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable_with.pass.cpp
index f9b8d2afbc2..d6dda4c5327 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable_with.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_nothrow_swappable_with.pass.cpp
@@ -61,19 +61,15 @@ int main(int, char**)
     static_assert(!cuda::std::is_nothrow_swappable_with<int, int>::value, "");
     static_assert(cuda::std::is_nothrow_swappable_with<int&, int&>::value, "");
     static_assert(cuda::std::is_nothrow_swappable_with<M, M>::value, "");
-#ifndef TEST_COMPILER_ICC
     static_assert(cuda::std::is_swappable_with<A&, A&>::value && !cuda::std::is_nothrow_swappable_with<A&, A&>::value,
                   "");
-#endif // TEST_COMPILER_ICC
   }
   {
     // test that heterogeneous swap is allowed only if both 'swap(A, B)' and
     // 'swap(B, A)' are valid.
     static_assert(cuda::std::is_nothrow_swappable_with<A&, B&>::value, "");
-#ifndef TEST_COMPILER_ICC
     static_assert(!cuda::std::is_nothrow_swappable_with<A&, C&>::value && cuda::std::is_swappable_with<A&, C&>::value,
                   "");
-#endif // TEST_COMPILER_ICC
     static_assert(!cuda::std::is_nothrow_swappable_with<D&, C&>::value, "");
   }
   {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable_volatile.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable_volatile.pass.cpp
index df0bc927341..8d98dc59c5c 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable_volatile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable_volatile.pass.cpp
@@ -14,7 +14,6 @@
 // qualified types trivially copyable.
 // XFAIL: clang-3, clang-4, apple-clang-6, apple-clang-7, apple-clang-8, apple-clang-9.0
 // XFAIL: gcc-4.8, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9
-// XFAIL: icc
 
 // When we marked this XFAIL for MSVC, QA reported that it unexpectedly passed.
 // When we stopped marking it XFAIL for MSVC, QA reported that it unexpectedly
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.assign/move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.assign/move.pass.cpp
index c3019bd8383..b0db6638dbc 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.assign/move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.assign/move.pass.cpp
@@ -204,7 +204,6 @@ int main(int, char**)
     static_assert(cuda::std::is_nothrow_move_assignable<optional<Y>>::value, "");
   }
   {
-#ifndef TEST_COMPILER_ICC
     struct ThrowsMove
     {
       __host__ __device__ ThrowsMove() noexcept {}
@@ -235,7 +234,6 @@ int main(int, char**)
       }
     };
     static_assert(!cuda::std::is_nothrow_move_assignable<optional<ThrowsMoveAssign>>::value, "");
-#endif // TEST_COMPILER_ICC
     struct NoThrowMove
     {
       __host__ __device__ NoThrowMove() noexcept(false) {}
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/default.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/default.pass.cpp
index 041168b4c21..65731d66a31 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/default.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/default.pass.cpp
@@ -71,9 +71,7 @@ int main(int, char**)
   test_constexpr<optional<NonTrivialTypes::NoCtors>>();
   test_constexpr<optional<NonConstexprTypes::NoCtors>>();
 #endif
-#ifndef TEST_COMPILER_ICC
   test<optional<NonLiteralTypes::NoCtors>>();
-#endif // TEST_COMPILER_ICC
 
 #if !(defined(TEST_COMPILER_CUDACC_BELOW_11_3) && defined(TEST_COMPILER_CLANG))
   static_assert(test_constexpr<optional<int>>(), "");
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/in_place_t.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/in_place_t.pass.cpp
index 3337936253c..efe1f10f607 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/in_place_t.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/in_place_t.pass.cpp
@@ -116,7 +116,6 @@ int main(int, char**)
     optional<const int> opt(in_place, 5);
     assert(*opt == 5);
   }
-#ifndef TEST_COMPILER_ICC
   {
     const optional<X> opt(in_place);
     assert(static_cast<bool>(opt) == true);
@@ -132,7 +131,6 @@ int main(int, char**)
     assert(static_cast<bool>(opt) == true);
     assert(*opt == X(5, 4));
   }
-#endif // TEST_COMPILER_ICC
 #if !(defined(TEST_COMPILER_CUDACC_BELOW_11_3) && defined(TEST_COMPILER_CLANG))
   {
     constexpr optional<Y> opt(in_place);
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
index 8058f362982..b55a1a37b7b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
@@ -213,7 +213,6 @@ int main(int, char**)
   }
 #endif // !TEST_HAS_NO_EXCEPTIONS
   {
-#ifndef TEST_COMPILER_ICC
     struct ThrowsMove
     {
       __host__ __device__ ThrowsMove() noexcept(false) {}
@@ -221,7 +220,6 @@ int main(int, char**)
       __host__ __device__ ThrowsMove(ThrowsMove&&) noexcept(false) {}
     };
     static_assert(!cuda::std::is_nothrow_move_constructible<optional<ThrowsMove>>::value, "");
-#endif // TEST_COMPILER_ICC
     struct NoThrowMove
     {
       __host__ __device__ NoThrowMove() noexcept(false) {}
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp
index c2815c98d5c..6911e7397c7 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp
@@ -76,9 +76,7 @@ int main(int, char**)
   test_constexpr<optional<NonTrivialTypes::NoCtors>>();
   test_constexpr<optional<NonConstexprTypes::NoCtors>>();
 #endif
-#ifndef TEST_COMPILER_ICC
   test<optional<NonLiteralTypes::NoCtors>>();
-#endif // TEST_COMPILER_ICC
 
 #if !(defined(TEST_COMPILER_CUDACC_BELOW_11_3) && defined(TEST_COMPILER_CLANG))
   static_assert(test_constexpr<optional<int>>(), "");
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
index 5faee027db4..c824d1a4eea 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
@@ -80,9 +80,7 @@ int main(int, char**)
   {
     optional<X> opt;
     unused(opt);
-#ifndef TEST_COMPILER_ICC
     ASSERT_NOT_NOEXCEPT(opt.value());
-#endif // TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(opt.value()), X&);
   }
   {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
index 5f5c90f2664..b2da432aeb2 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
@@ -67,9 +67,7 @@ int main(int, char**)
   {
     const optional<X> opt;
     unused(opt);
-#ifndef TEST_COMPILER_ICC
     ASSERT_NOT_NOEXCEPT(opt.value());
-#endif // TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(opt.value()), X const&);
   }
 #if !(defined(TEST_COMPILER_CUDACC_BELOW_11_3) && defined(TEST_COMPILER_CLANG))
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
index 62b53a7aa55..24e81acaa8d 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
@@ -67,9 +67,7 @@ int main(int, char**)
   {
     const optional<X> opt;
     unused(opt);
-#ifndef TEST_COMPILER_ICC
     ASSERT_NOT_NOEXCEPT(cuda::std::move(opt).value());
-#endif // TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::move(opt).value()), X const&&);
   }
 #if !(defined(TEST_COMPILER_CUDACC_BELOW_11_3) && defined(TEST_COMPILER_CLANG))
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
index 8eeaf115855..6f5d653ca2b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
@@ -79,9 +79,7 @@ int main(int, char**)
   {
     optional<X> opt;
     unused(opt);
-#ifndef TEST_COMPILER_ICC
     ASSERT_NOT_NOEXCEPT(cuda::std::move(opt).value());
-#endif // TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::move(opt).value()), X&&);
   }
   {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp
index 6990228ac5b..26dedd6205f 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp
@@ -319,9 +319,7 @@ int main(int, char**)
   {
     optional<Y> opt1;
     optional<Y> opt2;
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(opt1.swap(opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == false);
     assert(static_cast<bool>(opt2) == false);
     opt1.swap(opt2);
@@ -332,9 +330,7 @@ int main(int, char**)
   {
     optional<Y> opt1(1);
     optional<Y> opt2;
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(opt1.swap(opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == true);
     assert(*opt1 == 1);
     assert(static_cast<bool>(opt2) == false);
@@ -348,9 +344,7 @@ int main(int, char**)
   {
     optional<Y> opt1;
     optional<Y> opt2(2);
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(opt1.swap(opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == false);
     assert(static_cast<bool>(opt2) == true);
     assert(*opt2 == 2);
@@ -364,9 +358,7 @@ int main(int, char**)
   {
     optional<Y> opt1(1);
     optional<Y> opt2(2);
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(opt1.swap(opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == true);
     assert(*opt1 == 1);
     assert(static_cast<bool>(opt2) == true);
@@ -382,9 +374,7 @@ int main(int, char**)
   {
     optional<TerminatesOnMoveAssignmentAndSwap> opt1;
     optional<TerminatesOnMoveAssignmentAndSwap> opt2;
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(opt1.swap(opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == false);
     assert(static_cast<bool>(opt2) == false);
     opt1.swap(opt2);
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/special_members.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/special_members.pass.cpp
index f4bac770a1f..f596b9dd279 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/special_members.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/special_members.pass.cpp
@@ -29,7 +29,6 @@ struct SpecialMemberTest
 {
   using O = cuda::std::optional<T>;
 
-#ifndef TEST_COMPILER_ICC
   static_assert(cuda::std::is_default_constructible_v<O>, "optional is always default constructible.");
 
   static_assert(cuda::std::is_copy_constructible_v<O> == cuda::std::is_copy_constructible_v<T>,
@@ -38,7 +37,6 @@ struct SpecialMemberTest
   static_assert(cuda::std::is_move_constructible_v<O>
                   == (cuda::std::is_copy_constructible_v<T> || cuda::std::is_move_constructible_v<T>),
                 "optional<T> is move constructible if and only if T is copy or move constructible.");
-#endif // TEST_COMPILER_ICC
 
   static_assert(cuda::std::is_copy_assignable_v<O>
                   == (cuda::std::is_copy_constructible_v<T> && cuda::std::is_copy_assignable_v<T>),
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/triviality.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/triviality.pass.cpp
index 10c6cc4464e..98316f65f55 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/triviality.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.object/triviality.pass.cpp
@@ -39,7 +39,6 @@ struct SpecialMemberTest
 {
   using O = cuda::std::optional<T>;
 
-#ifndef TEST_COMPILER_ICC
   static_assert(implies(cuda::std::is_trivially_copy_constructible_v<T>,
                         cuda::std::is_trivially_copy_constructible_v<O>),
                 "optional<T> is trivially copy constructible if T is trivially copy constructible.");
@@ -47,7 +46,6 @@ struct SpecialMemberTest
   static_assert(implies(cuda::std::is_trivially_move_constructible_v<T>,
                         cuda::std::is_trivially_move_constructible_v<O>),
                 "optional<T> is trivially move constructible if T is trivially move constructible");
-#endif // TEST_COMPILER_ICC
 
   static_assert(implies(cuda::std::is_trivially_copy_constructible_v<T> && cuda::std::is_trivially_copy_assignable_v<T>
                           && cuda::std::is_trivially_destructible_v<T>,
diff --git a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.specalg/swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.specalg/swap.pass.cpp
index 32cee42099d..041af209d75 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/optional/optional.specalg/swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/optional/optional.specalg/swap.pass.cpp
@@ -340,9 +340,7 @@ int main(int, char**)
   {
     optional<Y> opt1;
     optional<Y> opt2;
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(swap(opt1, opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == false);
     assert(static_cast<bool>(opt2) == false);
     swap(opt1, opt2);
@@ -353,9 +351,7 @@ int main(int, char**)
   {
     optional<Y> opt1(1);
     optional<Y> opt2;
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(swap(opt1, opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == true);
     assert(*opt1 == 1);
     assert(static_cast<bool>(opt2) == false);
@@ -369,9 +365,7 @@ int main(int, char**)
   {
     optional<Y> opt1;
     optional<Y> opt2(2);
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(swap(opt1, opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == false);
     assert(static_cast<bool>(opt2) == true);
     assert(*opt2 == 2);
@@ -385,9 +379,7 @@ int main(int, char**)
   {
     optional<Y> opt1(1);
     optional<Y> opt2(2);
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(swap(opt1, opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == true);
     assert(*opt1 == 1);
     assert(static_cast<bool>(opt2) == true);
@@ -403,9 +395,7 @@ int main(int, char**)
   {
     optional<TerminatesOnMoveAssignmentAndSwap> opt1;
     optional<TerminatesOnMoveAssignmentAndSwap> opt2;
-#ifndef TEST_COMPILER_ICC
     static_assert(noexcept(swap(opt1, opt2)) == false, "");
-#endif // TEST_COMPILER_ICC
     assert(static_cast<bool>(opt1) == false);
     assert(static_cast<bool>(opt2) == false);
     swap(opt1, opt2);
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/char_ptr_ctor.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/char_ptr_ctor.pass.cpp
index 2e5364ab2c9..624a65ddd6a 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/char_ptr_ctor.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/char_ptr_ctor.pass.cpp
@@ -63,11 +63,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_char_pointer_ctor()
     {
       assert(v[i] == (s[M - 1 - i] == '1'));
     }
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 10; i < v.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v[i] == false);
       }
@@ -81,11 +78,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_char_pointer_ctor()
     {
       assert(v[i] == (s[M - 1 - i] == '1'));
     }
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 10; i < v.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v[i] == false);
       }
@@ -99,11 +93,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_char_pointer_ctor()
     {
       assert(v[i] == (s[M - 1 - i] == '1'));
     }
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 10; i < v.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v[i] == false);
       }
@@ -117,11 +108,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_char_pointer_ctor()
     {
       assert(v[i] == (s[M - 1 - i] == 'b'));
     }
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 10; i < v.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v[i] == false);
       }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/default.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/default.pass.cpp
index 8988d271c04..a4271169927 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/default.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/default.pass.cpp
@@ -20,11 +20,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_default_ctor()
   {
     TEST_CONSTEXPR cuda::std::bitset<N> v1;
     assert(v1.size() == N);
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 0; i < v1.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v1[i] == false);
       }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/ull_ctor.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/ull_ctor.pass.cpp
index cbe955b61d4..62e566b6376 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/ull_ctor.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.cons/ull_ctor.pass.cpp
@@ -30,11 +30,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_val_ctor()
     {
       assert(v[i] == ((i & 1) != 0));
     }
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = M; i < v.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v[i] == false);
       }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/count.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/count.pass.cpp
index b6cafbf8fee..a311ecd3b97 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/count.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/count.pass.cpp
@@ -26,11 +26,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_count()
     const cuda::std::bitset<N> v(cases[c]);
     cuda::std::size_t c1 = v.count();
     cuda::std::size_t c2 = 0;
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 0; i < v.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         if (v[i])
         {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/flip_all.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/flip_all.pass.cpp
index 50d98a162e8..f60f9f428f7 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/flip_all.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/flip_all.pass.cpp
@@ -26,11 +26,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_flip_all()
     cuda::std::bitset<N> v1(cases[c]);
     cuda::std::bitset<N> v2 = v1;
     v2.flip();
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 0; i < v1.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v2[i] == ~v1[i]);
       }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/left_shift_eq.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/left_shift_eq.pass.cpp
index dd697adf159..45fb6a74c7d 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/left_shift_eq.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/left_shift_eq.pass.cpp
@@ -32,11 +32,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 bool test_left_shift()
       cuda::std::bitset<N> v1(cases[c]);
       cuda::std::bitset<N> v2 = v1;
       v1 <<= s;
-      _CCCL_DIAG_PUSH
-      _CCCL_DIAG_SUPPRESS_ICC(186)
       for (cuda::std::size_t i = 0; i < v1.size(); ++i)
       {
-        _CCCL_DIAG_POP
         {
           if (i < s)
           {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/not_all.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/not_all.pass.cpp
index 159c41179b9..4245f14c251 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/not_all.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/not_all.pass.cpp
@@ -25,11 +25,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_not_all()
   {
     cuda::std::bitset<N> v1(cases[c]);
     cuda::std::bitset<N> v2 = ~v1;
-    _CCCL_DIAG_PUSH
-    _CCCL_DIAG_SUPPRESS_ICC(186)
     for (cuda::std::size_t i = 0; i < v1.size(); ++i)
     {
-      _CCCL_DIAG_POP
       {
         assert(v2[i] == ~v1[i]);
       }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_and_eq.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_and_eq.pass.cpp
index 8e0aca78f52..65f0d5d7f66 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_and_eq.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_and_eq.pass.cpp
@@ -29,11 +29,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 bool test_op_and_eq()
       cuda::std::bitset<N> v2(cases[c2]);
       cuda::std::bitset<N> v3 = v1;
       v1 &= v2;
-      _CCCL_DIAG_PUSH
-      _CCCL_DIAG_SUPPRESS_ICC(186)
       for (cuda::std::size_t i = 0; i < v1.size(); ++i)
       {
-        _CCCL_DIAG_POP
         {
           assert(v1[i] == (v3[i] && v2[i]));
         }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp
index 1d1af9ccceb..c09062a5f1a 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp
@@ -35,11 +35,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 bool test_op_or_eq()
       cuda::std::bitset<N> v2(cases[c2]);
       cuda::std::bitset<N> v3 = v1;
       v1 |= v2;
-      _CCCL_DIAG_PUSH
-      _CCCL_DIAG_SUPPRESS_ICC(186)
       for (cuda::std::size_t i = 0; i < v1.size(); ++i)
       {
-        _CCCL_DIAG_POP
         {
           assert(v1[i] == (v3[i] || v2[i]));
         }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_xor_eq.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_xor_eq.pass.cpp
index b45c47e37d7..b5b927e93f8 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_xor_eq.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/op_xor_eq.pass.cpp
@@ -33,11 +33,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 bool test_op_xor_eq()
       cuda::std::bitset<N> v2(cases[c2]);
       cuda::std::bitset<N> v3 = v1;
       v1 ^= v2;
-      _CCCL_DIAG_PUSH
-      _CCCL_DIAG_SUPPRESS_ICC(186)
       for (cuda::std::size_t i = 0; i < v1.size(); ++i)
       {
-        _CCCL_DIAG_POP
         {
           assert(v1[i] == (v3[i] != v2[i]));
         }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/reset_all.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/reset_all.pass.cpp
index e7f3ba1feda..aa410d8e4db 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/reset_all.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/reset_all.pass.cpp
@@ -22,11 +22,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_reset_all()
   cuda::std::bitset<N> v;
   v.set();
   v.reset();
-  _CCCL_DIAG_PUSH
-  _CCCL_DIAG_SUPPRESS_ICC(186)
   for (cuda::std::size_t i = 0; i < v.size(); ++i)
   {
-    _CCCL_DIAG_POP
     {
       assert(!v[i]);
     }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp
index a87afb94cff..5aafb6dc9da 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp
@@ -34,13 +34,10 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 bool test_right_shift()
       cuda::std::bitset<N> v1(cases[c]);
       cuda::std::bitset<N> v2 = v1;
       v1 >>= s;
-      _CCCL_DIAG_PUSH
-      _CCCL_DIAG_SUPPRESS_ICC(186)
       for (cuda::std::size_t i = 0; i < v1.size(); ++i)
       {
         if (i + s < v1.size())
         {
-          _CCCL_DIAG_POP
           {
             assert(v1[i] == v2[i + s]);
           }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/set_all.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/set_all.pass.cpp
index f08ff34b5bd..0a34df86bbe 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/set_all.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/set_all.pass.cpp
@@ -21,11 +21,8 @@ __host__ __device__ TEST_CONSTEXPR_CXX14 void test_set_all()
 {
   cuda::std::bitset<N> v;
   v.set();
-  _CCCL_DIAG_PUSH
-  _CCCL_DIAG_SUPPRESS_ICC(186)
   for (cuda::std::size_t i = 0; i < v.size(); ++i)
   {
-    _CCCL_DIAG_POP
     {
       assert(v[i]);
     }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
index 25a0d2aff05..04da8fea7e2 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
@@ -20,7 +20,7 @@ template <cuda::std::size_t N>
 __host__ __device__ TEST_CONSTEXPR_CXX14 void test_to_ullong()
 {
   const cuda::std::size_t M = sizeof(unsigned long long) * CHAR_BIT < N ? sizeof(unsigned long long) * CHAR_BIT : N;
-  const bool is_M_zero      = cuda::std::integral_constant<bool, M == 0>::value; // avoid compiler warnings
+  const bool is_M_zero      = cuda::std::integral_constant < bool, M == 0 > ::value; // avoid compiler warnings
   const cuda::std::size_t X =
     is_M_zero ? sizeof(unsigned long long) * CHAR_BIT - 1 : sizeof(unsigned long long) * CHAR_BIT - M;
   const unsigned long long max = is_M_zero ? 0 : (unsigned long long) (-1) >> X;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
index 61953443dfd..c44a923622c 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
@@ -21,7 +21,7 @@ template <cuda::std::size_t N>
 __host__ __device__ TEST_CONSTEXPR_CXX14 void test_to_ulong()
 {
   const cuda::std::size_t M   = sizeof(unsigned long) * CHAR_BIT < N ? sizeof(unsigned long) * CHAR_BIT : N;
-  const bool is_M_zero        = cuda::std::integral_constant<bool, M == 0>::value; // avoid compiler warnings
+  const bool is_M_zero        = cuda::std::integral_constant < bool, M == 0 > ::value; // avoid compiler warnings
   const cuda::std::size_t X   = is_M_zero ? sizeof(unsigned long) * CHAR_BIT - 1 : sizeof(unsigned long) * CHAR_BIT - M;
   const cuda::std::size_t max = is_M_zero ? 0 : cuda::std::size_t(cuda::std::numeric_limits<unsigned long>::max()) >> X;
   cuda::std::size_t tests[]   = {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/decrement.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/decrement.pass.cpp
index ed24b18f8e7..d72d3d8646d 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/decrement.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/decrement.pass.cpp
@@ -50,7 +50,6 @@ int main(int, char**)
 
   static_assert(testConstexpr<weekday>(), "");
 
-#ifndef TEST_COMPILER_ICC
   for (unsigned i = 0; i <= 6; ++i)
   {
     weekday wd(i);
@@ -58,7 +57,6 @@ int main(int, char**)
     assert(((wd--).c_encoding() == euclidian_subtraction<unsigned, 0, 6>(i, 1)));
     assert(((wd).c_encoding() == euclidian_subtraction<unsigned, 0, 6>(i, 2)));
   }
-#endif // TEST_COMPILER_ICC
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/plus_minus_equal.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/plus_minus_equal.pass.cpp
index aedc3fff6c5..c891494755a 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/plus_minus_equal.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/plus_minus_equal.pass.cpp
@@ -71,14 +71,12 @@ int main(int, char**)
     assert(((wd).c_encoding() == euclidian_addition<unsigned, 0, 6>(i, 3)));
   }
 
-#ifndef TEST_COMPILER_ICC
   for (unsigned i = 0; i <= 6; ++i)
   {
     weekday wd(i);
     assert(((wd -= days{4}).c_encoding() == euclidian_subtraction<unsigned, 0, 6>(i, 4)));
     assert(((wd).c_encoding() == euclidian_subtraction<unsigned, 0, 6>(i, 4)));
   }
-#endif // TEST_COMPILER_ICC
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
index 0b17e7ff10e..fa4487c94da 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
@@ -69,9 +69,7 @@ int main(int, char**)
     {
       weekday wd = weekday{i} - days{j};
       assert(wd + days{j} == weekday{i});
-#ifndef TEST_COMPILER_ICC
       assert((wd.c_encoding() == euclidian_subtraction<unsigned, 0, 6>(i, j)));
-#endif // TEST_COMPILER_ICC
     }
   }
 
diff --git a/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
index 2a72004f266..31e4d56c3f8 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
@@ -137,7 +137,7 @@ __host__ __device__ void test_ext_int_0()
   typedef int (T::*mem2_t)() const;
   mem2_t mem2 = &T::mem2;
 
-  typedef int const T::*obj1_t;
+  typedef int const T::* obj1_t;
   obj1_t obj1 = &T::obj1;
 
   // member function w/ref
diff --git a/libcudacxx/test/libcudacxx/std/utilities/utility/exchange/exchange.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/utility/exchange/exchange.pass.cpp
index e23cd925598..08522690ae3 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/utility/exchange/exchange.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/utility/exchange/exchange.pass.cpp
@@ -132,9 +132,7 @@ int main(int, char**)
   static_assert(test_constexpr(), "");
 #endif
 
-#ifndef TEST_COMPILER_ICC
   static_assert(test_noexcept(), "");
-#endif // TEST_COMPILER_ICC
 
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/utility/pairs/pairs.pair/trivial_copy_move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/utility/pairs/pairs.pair/trivial_copy_move.pass.cpp
index 0b225097a33..7c15a194d65 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/utility/pairs/pairs.pair/trivial_copy_move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/utility/pairs/pairs.pair/trivial_copy_move.pass.cpp
@@ -16,6 +16,7 @@
 // pair(pair&&) = default;
 
 #include <cuda/std/cassert>
+#include <cuda/std/type_traits>
 #include <cuda/std/utility>
 
 #include "test_macros.h"
diff --git a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp
index a27ed8d89fd..fbba514ca6b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp
@@ -75,8 +75,8 @@ struct NotMoveAssignable
 };
 
 template <class Tp>
-__host__ __device__ auto
-can_swap_test(int) -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
+__host__ __device__ auto can_swap_test(int)
+  -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
 
 template <class Tp>
 __host__ __device__ auto can_swap_test(...) -> cuda::std::false_type;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp
index ba4cbef2595..4cc02b7405b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp
@@ -59,8 +59,8 @@ struct NotMoveConstructible
 };
 
 template <class Tp>
-__host__ __device__ auto
-can_swap_test(int) -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
+__host__ __device__ auto can_swap_test(int)
+  -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
 
 template <class Tp>
 __host__ __device__ auto can_swap_test(...) -> cuda::std::false_type;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_index.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_index.pass.cpp
index 39e2b569f68..be2d54ecab3 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_index.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_index.pass.cpp
@@ -40,18 +40,16 @@ __host__ __device__ void test_const_lvalue_get()
     using V = cuda::std::variant<int, const long>;
     constexpr V v(42);
 #if !defined(TEST_COMPILER_MSVC) && !(defined(TEST_COMPILER_GCC) && __GNUC__ < 9) \
-  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC)
+  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<0>(v));
-#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3
     ASSERT_SAME_TYPE(decltype(cuda::std::get<0>(v)), const int&);
     static_assert(cuda::std::get<0>(v) == 42, "");
   }
   {
     using V = cuda::std::variant<int, const long>;
     const V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<0>(v));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<0>(v)), const int&);
     assert(cuda::std::get<0>(v) == 42);
   }
@@ -59,18 +57,16 @@ __host__ __device__ void test_const_lvalue_get()
     using V = cuda::std::variant<int, const long>;
     constexpr V v(42l);
 #if !defined(TEST_COMPILER_MSVC) && !(defined(TEST_COMPILER_GCC) && __GNUC__ < 9) \
-  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC)
+  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<1>(v));
-#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3
     ASSERT_SAME_TYPE(decltype(cuda::std::get<1>(v)), const long&);
     static_assert(cuda::std::get<1>(v) == 42, "");
   }
   {
     using V = cuda::std::variant<int, const long>;
     const V v(42l);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<1>(v));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<1>(v)), const long&);
     assert(cuda::std::get<1>(v) == 42);
   }
@@ -105,9 +101,7 @@ __host__ __device__ void test_lvalue_get()
   {
     using V = cuda::std::variant<int, const long>;
     V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<0>(v));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<0>(v)), int&);
     assert(cuda::std::get<0>(v) == 42);
   }
@@ -155,9 +149,7 @@ __host__ __device__ void test_rvalue_get()
   {
     using V = cuda::std::variant<int, const long>;
     V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<0>(cuda::std::move(v)));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<0>(cuda::std::move(v))), int&&);
     assert(cuda::std::get<0>(cuda::std::move(v)) == 42);
   }
@@ -207,9 +199,7 @@ __host__ __device__ void test_const_rvalue_get()
   {
     using V = cuda::std::variant<int, const long>;
     const V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<0>(cuda::std::move(v)));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<0>(cuda::std::move(v))), const int&&);
     assert(cuda::std::get<0>(cuda::std::move(v)) == 42);
   }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_type.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_type.pass.cpp
index 63fdd99d591..e6f6f665a22 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_type.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.get/get_type.pass.cpp
@@ -34,18 +34,16 @@ __host__ __device__ void test_const_lvalue_get()
     using V = cuda::std::variant<int, const long>;
     constexpr V v(42);
 #if !defined(TEST_COMPILER_MSVC) && !(defined(TEST_COMPILER_GCC) && __GNUC__ < 9) \
-  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC)
+  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<int>(v));
-#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3
     ASSERT_SAME_TYPE(decltype(cuda::std::get<int>(v)), const int&);
     static_assert(cuda::std::get<int>(v) == 42, "");
   }
   {
     using V = cuda::std::variant<int, const long>;
     const V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<int>(v));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<int>(v)), const int&);
     assert(cuda::std::get<int>(v) == 42);
   }
@@ -53,18 +51,16 @@ __host__ __device__ void test_const_lvalue_get()
     using V = cuda::std::variant<int, const long>;
     constexpr V v(42l);
 #if !defined(TEST_COMPILER_MSVC) && !(defined(TEST_COMPILER_GCC) && __GNUC__ < 9) \
-  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_ICC)
+  && !defined(TEST_COMPILER_CUDACC_BELOW_11_3)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<const long>(v));
-#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_GCC && TEST_COMPILER_CUDACC_BELOW_11_3
     ASSERT_SAME_TYPE(decltype(cuda::std::get<const long>(v)), const long&);
     static_assert(cuda::std::get<const long>(v) == 42, "");
   }
   {
     using V = cuda::std::variant<int, const long>;
     const V v(42l);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<const long>(v));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<const long>(v)), const long&);
     assert(cuda::std::get<const long>(v) == 42);
   }
@@ -99,9 +95,7 @@ __host__ __device__ void test_lvalue_get()
   {
     using V = cuda::std::variant<int, const long>;
     V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<int>(v));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<int>(v)), int&);
     assert(cuda::std::get<int>(v) == 42);
   }
@@ -149,9 +143,7 @@ __host__ __device__ void test_rvalue_get()
   {
     using V = cuda::std::variant<int, const long>;
     V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<int>(cuda::std::move(v)));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<int>(cuda::std::move(v))), int&&);
     assert(cuda::std::get<int>(cuda::std::move(v)) == 42);
   }
@@ -201,9 +193,7 @@ __host__ __device__ void test_const_rvalue_get()
   {
     using V = cuda::std::variant<int, const long>;
     const V v(42);
-#if !defined(TEST_COMPILER_ICC)
     ASSERT_NOT_NOEXCEPT(cuda::std::get<int>(cuda::std::move(v)));
-#endif // !TEST_COMPILER_ICC
     ASSERT_SAME_TYPE(decltype(cuda::std::get<int>(cuda::std::move(v))), const int&&);
     assert(cuda::std::get<int>(cuda::std::move(v)) == 42);
   }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
index 2dc01c1bd38..088d349ff71 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
@@ -172,7 +172,6 @@ __host__ __device__ void test_T_assignment_noexcept()
     using V = cuda::std::variant<Dummy, NoThrowT>;
     static_assert(cuda::std::is_nothrow_assignable<V, int>::value, "");
   }
-#if !defined(TEST_COMPILER_ICC)
   {
     using V = cuda::std::variant<Dummy, ThrowsCtorT>;
     static_assert(!cuda::std::is_nothrow_assignable<V, int>::value, "");
@@ -181,7 +180,6 @@ __host__ __device__ void test_T_assignment_noexcept()
     using V = cuda::std::variant<Dummy, ThrowsAssignT>;
     static_assert(!cuda::std::is_nothrow_assignable<V, int>::value, "");
   }
-#endif // !TEST_COMPILER_ICC
 }
 
 __host__ __device__ void test_T_assignment_sfinae()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
index 328b3621b7c..1f1e020fa2b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
@@ -286,7 +286,6 @@ void makeEmpty(Variant& v)
 
 __host__ __device__ void test_copy_assignment_not_noexcept()
 {
-#if !defined(TEST_COMPILER_ICC)
   {
     using V = cuda::std::variant<CopyMaybeThrows>;
     static_assert(!cuda::std::is_nothrow_copy_assignable<V>::value, "");
@@ -295,7 +294,6 @@ __host__ __device__ void test_copy_assignment_not_noexcept()
     using V = cuda::std::variant<int, CopyDoesThrow>;
     static_assert(!cuda::std::is_nothrow_copy_assignable<V>::value, "");
   }
-#endif // !TEST_COMPILER_ICC
 }
 
 __host__ __device__ void test_copy_assignment_sfinae()
@@ -575,7 +573,7 @@ __host__ __device__ void test_copy_assignment_different_index()
     assert(&vref == &v1);
     assert(v1.index() == 1);
     assert(cuda::std::get<1>(v1).value == 42);
-#if !defined(TEST_COMPILER_MSVC) && !defined(TEST_COMPILER_ICC)
+#if !defined(TEST_COMPILER_MSVC)
     assert(CopyAssign::alive() == 2);
     assert(CopyAssign::copy_construct() == 1);
     assert(CopyAssign::move_construct() == 1);
@@ -585,7 +583,7 @@ __host__ __device__ void test_copy_assignment_different_index()
     // interactions of many weird things these tests are doing.
     asm volatile("" ::: "memory");
     assert(CopyAssign::copy_assign() == 0);
-#endif // !TEST_COMPILER_MSVC && !TEST_COMPILER_ICC
+#endif // !TEST_COMPILER_MSVC
   }
 #ifndef TEST_HAS_NO_EXCEPTIONS
 #  if defined(_LIBCUDACXX_HAS_STRING)
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
index be47597f3b8..5f743270ad8 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
@@ -182,7 +182,6 @@ __host__ __device__ void test_move_assignment_noexcept()
     using V = cuda::std::variant<int, MoveOnly>;
     static_assert(cuda::std::is_nothrow_move_assignable<V>::value, "");
   }
-#if !defined(TEST_COMPILER_ICC)
   {
     using V = cuda::std::variant<MoveOnlyNT>;
     static_assert(!cuda::std::is_nothrow_move_assignable<V>::value, "");
@@ -191,7 +190,6 @@ __host__ __device__ void test_move_assignment_noexcept()
     using V = cuda::std::variant<MoveOnlyOddNothrow>;
     static_assert(!cuda::std::is_nothrow_move_assignable<V>::value, "");
   }
-#endif // !TEST_COMPILER_ICC
 }
 
 __host__ __device__ void test_move_assignment_sfinae()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
index c19d40476a7..fb63a88a2d8 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
@@ -62,12 +62,10 @@ __host__ __device__ void test_T_ctor_noexcept()
     using V = cuda::std::variant<Dummy, NoThrowT>;
     static_assert(cuda::std::is_nothrow_constructible<V, int>::value, "");
   }
-#if !defined(TEST_COMPILER_ICC)
   {
     using V = cuda::std::variant<Dummy, ThrowsT>;
     static_assert(!cuda::std::is_nothrow_constructible<V, int>::value, "");
   }
-#endif // !TEST_COMPILER_ICC
 }
 
 __host__ __device__ void test_T_ctor_sfinae()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
index bded8e83f59..a85ada1db74 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
@@ -67,12 +67,10 @@ __host__ __device__ void test_default_ctor_noexcept()
     using V = cuda::std::variant<int>;
     static_assert(cuda::std::is_nothrow_default_constructible<V>::value, "");
   }
-#if !defined(TEST_COMPILER_ICC)
   {
     using V = cuda::std::variant<NotNoexcept>;
     static_assert(!cuda::std::is_nothrow_default_constructible<V>::value, "");
   }
-#endif // !TEST_COMPILER_ICC
 }
 
 #ifndef TEST_HAS_NO_EXCEPTIONS
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
index 668a717acca..1a580951d6e 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
@@ -160,7 +160,6 @@ __host__ __device__ void test_move_noexcept()
     using V = cuda::std::variant<int, MoveOnly>;
     static_assert(cuda::std::is_nothrow_move_constructible<V>::value, "");
   }
-#if !defined(TEST_COMPILER_ICC)
   {
     using V = cuda::std::variant<int, MoveOnlyNT>;
     static_assert(!cuda::std::is_nothrow_move_constructible<V>::value, "");
@@ -169,7 +168,6 @@ __host__ __device__ void test_move_noexcept()
     using V = cuda::std::variant<int, ThrowsMove>;
     static_assert(!cuda::std::is_nothrow_move_constructible<V>::value, "");
   }
-#endif // !TEST_COMPILER_ICC
 }
 
 __host__ __device__ void test_move_ctor_sfinae()
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
index ec2a53f5a7a..69da5a87905 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
@@ -472,8 +472,8 @@ void test_exceptions_different_alternatives()
 #endif // !TEST_HAS_NO_EXCEPTIONS
 
 template <class Var>
-__host__ __device__ constexpr auto
-has_swap_member_imp(int) -> decltype(cuda::std::declval<Var&>().swap(cuda::std::declval<Var&>()), true)
+__host__ __device__ constexpr auto has_swap_member_imp(int)
+  -> decltype(cuda::std::declval<Var&>().swap(cuda::std::declval<Var&>()), true)
 {
   return true;
 }
@@ -531,9 +531,7 @@ __host__ __device__ void test_swap_noexcept()
   {
     using V = cuda::std::variant<int, NothrowMoveCtor>;
     static_assert(cuda::std::is_swappable_v<V> && has_swap_member<V>(), "");
-#if !defined(TEST_COMPILER_ICC)
     static_assert(!cuda::std::is_nothrow_swappable_v<V>, "");
-#endif // !TEST_COMPILER_ICC
     // instantiate swap
     V v1, v2;
     v1.swap(v2);
@@ -542,9 +540,7 @@ __host__ __device__ void test_swap_noexcept()
   {
     using V = cuda::std::variant<int, ThrowingTypeWithNothrowSwap>;
     static_assert(cuda::std::is_swappable_v<V> && has_swap_member<V>(), "");
-#if !defined(TEST_COMPILER_ICC)
     static_assert(!cuda::std::is_nothrow_swappable_v<V>, "");
-#endif // !TEST_COMPILER_ICC
     // instantiate swap
     V v1, v2;
     v1.swap(v2);
@@ -553,9 +549,7 @@ __host__ __device__ void test_swap_noexcept()
   {
     using V = cuda::std::variant<int, ThrowingMoveAssignNothrowMoveCtor>;
     static_assert(cuda::std::is_swappable_v<V> && has_swap_member<V>(), "");
-#if !defined(TEST_COMPILER_ICC)
     static_assert(!cuda::std::is_nothrow_swappable_v<V>, "");
-#endif // !TEST_COMPILER_ICC
     // instantiate swap
     V v1, v2;
     v1.swap(v2);
diff --git a/libcudacxx/test/public_headers/CMakeLists.txt b/libcudacxx/test/public_headers/CMakeLists.txt
index adbd6eecaab..b0e48c19663 100644
--- a/libcudacxx/test/public_headers/CMakeLists.txt
+++ b/libcudacxx/test/public_headers/CMakeLists.txt
@@ -46,9 +46,6 @@ function(libcudacxx_add_public_header_test header)
                          PRIVATE
                          ${headertest_warning_levels_device}
                          -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
-  if (CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-    target_compile_definitions(headertest_${header_name} PRIVATE CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-  endif()
   if (CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
     target_compile_definitions(headertest_${header_name} PRIVATE CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
   endif()
diff --git a/libcudacxx/test/public_headers_host_only/CMakeLists.txt b/libcudacxx/test/public_headers_host_only/CMakeLists.txt
index 1bc51d17e10..57b6c70ede9 100644
--- a/libcudacxx/test/public_headers_host_only/CMakeLists.txt
+++ b/libcudacxx/test/public_headers_host_only/CMakeLists.txt
@@ -29,12 +29,10 @@ function(libcudacxx_add_std_header_test header)
   target_include_directories(headertest_std_${header_name} PRIVATE "${libcudacxx_SOURCE_DIR}/include")
   target_compile_options(headertest_std_${header_name} PRIVATE ${headertest_warning_levels_host})
   target_compile_definitions(headertest_std_${header_name} PRIVATE CCCL_ENABLE_ASSERTIONS)
-  if (CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-    target_compile_definitions(headertest_std_${header_name} PRIVATE CCCL_SUPPRESS_ICC_DEPRECATION_WARNING)
-  endif()
   if (CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
     target_compile_definitions(headertest_std_${header_name} PRIVATE CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
   endif()
+  target_compile_definitions(headertest_std_${header_name} PRIVATE CCCL_IGNORE_DEPRECATED_CPP_DIALECT)
 
   add_dependencies(libcudacxx.test.public_headers_host_only headertest_std_${header_name})
 endfunction()
diff --git a/libcudacxx/test/support/archetypes.h b/libcudacxx/test/support/archetypes.h
index 1621e2d07e9..07f18944854 100644
--- a/libcudacxx/test/support/archetypes.h
+++ b/libcudacxx/test/support/archetypes.h
@@ -343,7 +343,8 @@ namespace NonThrowingTypes
 namespace NonTrivialTypes
 {
 #define DEFINE_CTOR \
-  {}
+  {                 \
+  }
 #define DEFINE_CTOR_ANNOTATIONS __host__ __device__
 #define DEFINE_ASSIGN \
   {                   \
diff --git a/libcudacxx/test/support/charconv_test_helpers.h b/libcudacxx/test/support/charconv_test_helpers.h
index 09e50e90efe..c334ebfab54 100644
--- a/libcudacxx/test/support/charconv_test_helpers.h
+++ b/libcudacxx/test/support/charconv_test_helpers.h
@@ -128,8 +128,8 @@ struct to_chars_test_base
     return r;
   }
 
-  static auto
-  fromchars(char const* p, char const* ep, int base = 10) -> decltype(fromchars(p, ep, base, std::is_signed<X>()))
+  static auto fromchars(char const* p, char const* ep, int base = 10)
+    -> decltype(fromchars(p, ep, base, std::is_signed<X>()))
   {
     return fromchars(p, ep, base, std::is_signed<X>());
   }
diff --git a/libcudacxx/test/support/concurrent_agents.h b/libcudacxx/test/support/concurrent_agents.h
index 0e17de0b2c5..6b57b3531a0 100644
--- a/libcudacxx/test/support/concurrent_agents.h
+++ b/libcudacxx/test/support/concurrent_agents.h
@@ -50,8 +50,7 @@ __host__ __device__ void concurrent_agents_launch(Fs... fs)
      __syncthreads();),
     (std::thread threads[]{std::thread{std::forward<Fs>(fs)}...};
 
-     for (auto&& thread
-          : threads) { thread.join(); }))
+     for (auto&& thread : threads) { thread.join(); }))
 }
 
 #endif // _CONCURRENT_AGENTS_H
diff --git a/libcudacxx/test/support/counting_predicates.h b/libcudacxx/test/support/counting_predicates.h
index 85bce9875c6..227466e139d 100644
--- a/libcudacxx/test/support/counting_predicates.h
+++ b/libcudacxx/test/support/counting_predicates.h
@@ -91,16 +91,16 @@ class counting_predicate
   {}
 
   template <class... Args>
-  __host__ __device__ TEST_CONSTEXPR_CXX14 auto
-  operator()(Args&&... args) -> decltype(pred_(cuda::std::forward<Args>(args)...))
+  __host__ __device__ TEST_CONSTEXPR_CXX14 auto operator()(Args&&... args)
+    -> decltype(pred_(cuda::std::forward<Args>(args)...))
   {
     ++(*count_);
     return pred_(cuda::std::forward<Args>(args)...);
   }
 
   template <class... Args>
-  __host__ __device__ TEST_CONSTEXPR_CXX14 auto
-  operator()(Args&&... args) const -> decltype(pred_(cuda::std::forward<Args>(args)...))
+  __host__ __device__ TEST_CONSTEXPR_CXX14 auto operator()(Args&&... args) const
+    -> decltype(pred_(cuda::std::forward<Args>(args)...))
   {
     ++(*count_);
     return pred_(cuda::std::forward<Args>(args)...);
diff --git a/libcudacxx/test/support/cuda_space_selector.h b/libcudacxx/test/support/cuda_space_selector.h
index 2217699fbee..c848ff95e26 100644
--- a/libcudacxx/test/support/cuda_space_selector.h
+++ b/libcudacxx/test/support/cuda_space_selector.h
@@ -129,8 +129,7 @@ struct default_initializer
 };
 
 template <typename T,
-          template <typename, cuda::std::size_t>
-          class Provider,
+          template <typename, cuda::std::size_t> class Provider,
           typename Initializer           = constructor_initializer,
           cuda::std::size_t SharedOffset = 0>
 class memory_selector
diff --git a/libcudacxx/test/support/filesystem_dynamic_test_helper.py b/libcudacxx/test/support/filesystem_dynamic_test_helper.py
index dae70f2b9d6..7b475718495 100644
--- a/libcudacxx/test/support/filesystem_dynamic_test_helper.py
+++ b/libcudacxx/test/support/filesystem_dynamic_test_helper.py
@@ -1,6 +1,6 @@
-import sys
 import os
 import socket
+import sys
 
 # Ensure that this is being run on a specific platform
 assert (
diff --git a/libcudacxx/test/support/indirectly_readable.h b/libcudacxx/test/support/indirectly_readable.h
index fc99e8ab8cd..b3ea0560fe8 100644
--- a/libcudacxx/test/support/indirectly_readable.h
+++ b/libcudacxx/test/support/indirectly_readable.h
@@ -24,15 +24,21 @@ template <class Token>
 struct T2 : Common<Token>
 {};
 
+namespace cuda
+{
+namespace std
+{
 template <template <class> class T1Qual, template <class> class T2Qual, class Token>
-struct cuda::std::basic_common_reference<T1<Token>, T2<Token>, T1Qual, T2Qual>
+struct basic_common_reference<T1<Token>, T2<Token>, T1Qual, T2Qual>
 {
   using type = Common<Token>;
 };
 template <template <class> class T2Qual, template <class> class T1Qual, class Token>
-struct cuda::std::basic_common_reference<T2<Token>, T1<Token>, T2Qual, T1Qual>
-    : cuda::std::basic_common_reference<T1<Token>, T2<Token>, T1Qual, T2Qual>
+struct basic_common_reference<T2<Token>, T1<Token>, T2Qual, T1Qual>
+    : basic_common_reference<T1<Token>, T2<Token>, T1Qual, T2Qual>
 {};
+} // namespace std
+} // namespace cuda
 
 template <class Token>
 struct IndirectlyReadable
diff --git a/libcudacxx/test/support/is_transparent.h b/libcudacxx/test/support/is_transparent.h
index 35fa5ed1af7..39241874449 100644
--- a/libcudacxx/test/support/is_transparent.h
+++ b/libcudacxx/test/support/is_transparent.h
@@ -17,8 +17,8 @@
 struct transparent_less
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -28,8 +28,8 @@ struct transparent_less
 struct transparent_less_not_referenceable
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -39,8 +39,8 @@ struct transparent_less_not_referenceable
 struct transparent_less_no_type
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -52,8 +52,8 @@ struct transparent_less_no_type
 struct transparent_less_private
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -65,8 +65,8 @@ struct transparent_less_private
 struct transparent_less_not_a_type
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
diff --git a/libcudacxx/test/support/rapid-cxx-test.h b/libcudacxx/test/support/rapid-cxx-test.h
index 6429df670dc..792bb6451db 100644
--- a/libcudacxx/test/support/rapid-cxx-test.h
+++ b/libcudacxx/test/support/rapid-cxx-test.h
@@ -444,87 +444,87 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#define TEST_WARN_EQUAL_COLLECTIONS(...)                                         \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_WARN_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                           \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::warn;                           \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
+#define TEST_WARN_EQUAL_COLLECTIONS(...)                                        \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_WARN_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                          \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::warn;                          \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
   } while (false)
 #
 
-#define TEST_CHECK_EQUAL_COLLECTIONS(...)                                        \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_CHECK_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                          \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::check;                          \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
+#define TEST_CHECK_EQUAL_COLLECTIONS(...)                                       \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_CHECK_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                         \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::check;                         \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
   } while (false)
 #
 
-#define TEST_REQUIRE_EQUAL_COLLECTIONS(...)                                      \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_REQUIRE_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                        \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::require;                        \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
-    if (m_f.type != ::rapid_cxx_test::failure_type::none)                        \
-    {                                                                            \
-      return;                                                                    \
-    }                                                                            \
+#define TEST_REQUIRE_EQUAL_COLLECTIONS(...)                                     \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_REQUIRE_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                       \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::require;                       \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
+    if (m_f.type != ::rapid_cxx_test::failure_type::none)                       \
+    {                                                                           \
+      return;                                                                   \
+    }                                                                           \
   } while (false)
 #
 
-#define TEST_ASSERT_EQUAL_COLLECTIONS(...)                                       \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_ASSERT_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                         \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::assert;                         \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
-    if (m_f.type != ::rapid_cxx_test::failure_type::none)                        \
-    {                                                                            \
-      ::std::abort();                                                            \
-    }                                                                            \
+#define TEST_ASSERT_EQUAL_COLLECTIONS(...)                                      \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_ASSERT_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                        \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::assert;                        \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
+    if (m_f.type != ::rapid_cxx_test::failure_type::none)                       \
+    {                                                                           \
+      ::std::abort();                                                           \
+    }                                                                           \
   } while (false)
 #
 
diff --git a/libcudacxx/test/support/test_convertible.h b/libcudacxx/test/support/test_convertible.h
index 843a4e36849..5de6472f652 100644
--- a/libcudacxx/test/support/test_convertible.h
+++ b/libcudacxx/test/support/test_convertible.h
@@ -24,8 +24,8 @@ template <class Tp>
 __host__ __device__ void eat_type(Tp);
 
 template <class Tp, class... Args>
-__host__ __device__ constexpr auto
-test_convertible_imp(int) -> decltype(eat_type<Tp>({cuda::std::declval<Args>()...}), true)
+__host__ __device__ constexpr auto test_convertible_imp(int)
+  -> decltype(eat_type<Tp>({cuda::std::declval<Args>()...}), true)
 {
   return true;
 }
diff --git a/libcudacxx/test/support/test_iterators.h b/libcudacxx/test/support/test_iterators.h
index 3356d9b500e..46ae88fa8a1 100644
--- a/libcudacxx/test/support/test_iterators.h
+++ b/libcudacxx/test/support/test_iterators.h
@@ -11,12 +11,14 @@
 #define SUPPORT_TEST_ITERATORS_H
 
 #include <cuda/std/iterator>
+
 #if defined(_LIBCUDACXX_HAS_STDEXCEPT)
 #  include <cuda/std/stdexcept>
 #endif
 #include <cuda/std/cassert>
 #include <cuda/std/concepts>
 #include <cuda/std/cstddef>
+#include <cuda/std/utility>
 
 #include "test_macros.h"
 #include "type_algorithms.h"
@@ -77,7 +79,7 @@ class cpp17_output_iterator
   template <class T>
   void operator,(T const&) = delete;
 };
-#if TEST_STD_VER > 2014
+#if TEST_STD_VER >= 2014
 static_assert(cuda::std::output_iterator<cpp17_output_iterator<int*>, int>, "");
 #endif
 
@@ -147,7 +149,7 @@ class cpp17_input_iterator
   template <class T>
   void operator,(T const&) = delete;
 };
-#if TEST_STD_VER > 2014
+#if TEST_STD_VER >= 2014
 static_assert(cuda::std::input_or_output_iterator<cpp17_input_iterator<int*>>, "");
 static_assert(cuda::std::indirectly_readable<cpp17_input_iterator<int*>>, "");
 static_assert(cuda::std::input_iterator<cpp17_input_iterator<int*>>, "");
@@ -219,7 +221,7 @@ class forward_iterator
   template <class T>
   void operator,(T const&) = delete;
 };
-#if TEST_STD_VER > 2014
+#if TEST_STD_VER >= 2014
 static_assert(cuda::std::forward_iterator<forward_iterator<int*>>, "");
 #endif
 
@@ -300,7 +302,7 @@ class bidirectional_iterator
   template <class T>
   void operator,(T const&) = delete;
 };
-#if TEST_STD_VER > 2014
+#if TEST_STD_VER >= 2014
 static_assert(cuda::std::bidirectional_iterator<bidirectional_iterator<int*>>, "");
 #endif
 
@@ -438,7 +440,7 @@ class random_access_iterator
   template <class T>
   void operator,(T const&) = delete;
 };
-#if TEST_STD_VER > 2014
+#if TEST_STD_VER >= 2014
 static_assert(cuda::std::random_access_iterator<random_access_iterator<int*>>, "");
 
 template <class It>
@@ -1078,7 +1080,7 @@ struct NonThrowingIterator
   const T* current_;
 };
 
-#if TEST_STD_VER > 2014
+#if TEST_STD_VER >= 2014
 
 template <class It>
 class cpp20_input_iterator
@@ -1759,17 +1761,23 @@ struct Proxy
 #  endif // TEST_HAS_NO_SPACESHIP_OPERATOR
 };
 
+namespace cuda
+{
+namespace std
+{
 // This is to make ProxyIterator model `cuda::std::indirectly_readable`
 template <class T, class U, template <class> class TQual, template <class> class UQual>
-struct cuda::std::basic_common_reference<Proxy<T>, Proxy<U>, TQual, UQual>
-    : public cuda::std::enable_if<cuda::std::__has_common_reference<TQual<T>, UQual<U>>,
-                                  Proxy<cuda::std::common_reference_t<TQual<T>, UQual<U>>>>
+struct basic_common_reference<Proxy<T>, Proxy<U>, TQual, UQual>
+    : public enable_if<cuda::std::__has_common_reference<TQual<T>, UQual<U>>,
+                       Proxy<cuda::std::common_reference_t<TQual<T>, UQual<U>>>>
 {};
 
 template <class T, class U>
-struct cuda::std::common_type<Proxy<T>, Proxy<U>>
-    : public cuda::std::enable_if<cuda::std::__has_common_type<T, U>, Proxy<cuda::std::common_type_t<T, U>>>
+struct common_type<Proxy<T>, Proxy<U>>
+    : public enable_if<cuda::std::__has_common_type<T, U>, Proxy<cuda::std::common_type_t<T, U>>>
 {};
+} // namespace std
+} // namespace cuda
 
 // ProxyIterator
 // ======================================================================
@@ -1794,26 +1802,23 @@ struct ProxyIteratorBase<
   using iterator_category = cuda::std::input_iterator_tag;
 };
 
+template <class Base>
+__host__ __device__ auto get_iter_concept_impl(cuda::std::__priority_tag<3>)
+  -> cuda::std::enable_if_t<cuda::std::random_access_iterator<Base>, cuda::std::random_access_iterator_tag>;
+template <class Base>
+__host__ __device__ auto get_iter_concept_impl(cuda::std::__priority_tag<2>)
+  -> cuda::std::enable_if_t<cuda::std::bidirectional_iterator<Base>, cuda::std::bidirectional_iterator_tag>;
+template <class Base>
+__host__ __device__ auto get_iter_concept_impl(cuda::std::__priority_tag<1>)
+  -> cuda::std::enable_if_t<cuda::std::forward_iterator<Base>, cuda::std::forward_iterator_tag>;
+template <class Base>
+__host__ __device__ auto get_iter_concept_impl(cuda::std::__priority_tag<0>) -> cuda::std::input_iterator_tag;
+
 template <class Base, cuda::std::enable_if_t<cuda::std::input_iterator<Base>, int> = 0>
 __host__ __device__ constexpr auto get_iterator_concept()
 {
-  if constexpr (cuda::std::random_access_iterator<Base>)
-  {
-    return cuda::std::random_access_iterator_tag{};
-  }
-  else if constexpr (cuda::std::bidirectional_iterator<Base>)
-  {
-    return cuda::std::bidirectional_iterator_tag{};
-  }
-  else if constexpr (cuda::std::forward_iterator<Base>)
-  {
-    return cuda::std::forward_iterator_tag{};
-  }
-  else
-  {
-    return cuda::std::input_iterator_tag{};
-  }
-  _CCCL_UNREACHABLE();
+  using Tag = decltype(get_iter_concept_impl<Base>(cuda::std::__priority_tag<3>{}));
+  return Tag{};
 }
 
 template <class Base, cuda::std::enable_if_t<cuda::std::input_iterator<Base>, int> = 0>
diff --git a/libcudacxx/test/support/test_macros.h b/libcudacxx/test/support/test_macros.h
index dc9407fe035..35a5cc36d1a 100644
--- a/libcudacxx/test/support/test_macros.h
+++ b/libcudacxx/test/support/test_macros.h
@@ -65,9 +65,7 @@
 #  define TEST_HAS_BUILTIN_IDENTIFIER(X) 0
 #endif
 
-#if defined(__INTEL_COMPILER)
-#  define TEST_COMPILER_ICC
-#elif defined(__NVCOMPILER)
+#if defined(__NVCOMPILER)
 #  define TEST_COMPILER_NVHPC
 #elif defined(__clang__)
 #  define TEST_COMPILER_CLANG
@@ -463,9 +461,9 @@ __host__ __device__ constexpr bool unused(T&&...)
 #  endif
 #endif // defined(TEST_COMPILER_MSVC)
 
-#if defined(TEST_COMPILER_NVHPC) || defined(TEST_COMPILER_ICC)
+#if defined(TEST_COMPILER_NVHPC)
 #  define TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-#endif // TEST_COMPILER_NVHPC || TEST_COMPILER_ICC
+#endif // TEST_COMPILER_NVHPC
 
 #if (defined(TEST_WINDOWS_DLL) && !defined(_MSC_VER)) || defined(__MVS__)
 // Normally, a replaced e.g. 'operator new' ends up used if the user code
diff --git a/libcudacxx/test/support/test_range.h b/libcudacxx/test/support/test_range.h
index 39f0ec12d9a..83e8a25c425 100644
--- a/libcudacxx/test/support/test_range.h
+++ b/libcudacxx/test/support/test_range.h
@@ -14,7 +14,7 @@
 
 #include "test_iterators.h"
 
-#if TEST_STD_VER < 2017
+#if TEST_STD_VER < 2014
 #  error "test/support/test_range.h" can only be included in builds supporting ranges
 #endif
 
@@ -89,17 +89,29 @@ struct BorrowedRange
   __host__ __device__ int* end() const;
   __host__ __device__ BorrowedRange(BorrowedRange&&) = delete;
 };
+
+namespace cuda
+{
+namespace std
+{
+namespace ranges
+{
 template <>
-inline constexpr bool cuda::std::ranges::enable_borrowed_range<BorrowedRange> = true;
+_CCCL_INLINE_VAR constexpr bool enable_borrowed_range<BorrowedRange> = true;
+} // namespace ranges
+} // namespace std
+} // namespace cuda
 
 static_assert(!cuda::std::ranges::view<BorrowedRange>, "");
 static_assert(cuda::std::ranges::borrowed_range<BorrowedRange>, "");
 
-#if _LIBCUDACXX_HAS_RANGES
+#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
 using BorrowedView = cuda::std::ranges::empty_view<int>;
 static_assert(cuda::std::ranges::view<BorrowedView>, "");
 static_assert(cuda::std::ranges::borrowed_range<BorrowedView>, "");
+#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC2017)
 
+#if _LIBCUDACXX_HAS_RANGES
 using NonBorrowedView = cuda::std::ranges::single_view<int>;
 static_assert(cuda::std::ranges::view<NonBorrowedView>, "");
 static_assert(!cuda::std::ranges::borrowed_range<NonBorrowedView>, "");
diff --git a/libcudacxx/test/utils/libcudacxx/compiler.py b/libcudacxx/test/utils/libcudacxx/compiler.py
index f1643a9f3c8..00f0ec3f0e1 100644
--- a/libcudacxx/test/utils/libcudacxx/compiler.py
+++ b/libcudacxx/test/utils/libcudacxx/compiler.py
@@ -6,8 +6,9 @@
 #
 # ===----------------------------------------------------------------------===##
 
-import platform
 import os
+import platform
+
 import libcudacxx.util
 
 
@@ -113,11 +114,6 @@ def _initTypeAndVersion(self):
                 major_ver = macros["__NVCOMPILER_MAJOR__"].strip()
                 minor_ver = macros["__NVCOMPILER_MINOR__"].strip()
                 patchlevel = macros["__NVCOMPILER_PATCHLEVEL__"].strip()
-            elif "__INTEL_COMPILER" in macros.keys():
-                compiler_type = "icc"
-                major_ver = int(macros["__INTEL_COMPILER"]) / 100
-                minor_ver = (int(macros["__INTEL_COMPILER"]) % 100) / 10
-                patchlevel = int(macros["__INTEL_COMPILER"]) % 10
             elif "__clang__" in macros.keys():
                 compiler_type = "clang"
                 # Treat Apple's LLVM fork differently.
diff --git a/libcudacxx/test/utils/libcudacxx/dumpversion.cpp b/libcudacxx/test/utils/libcudacxx/dumpversion.cpp
index 55e2e6eed3c..162b6bf2d6a 100644
--- a/libcudacxx/test/utils/libcudacxx/dumpversion.cpp
+++ b/libcudacxx/test/utils/libcudacxx/dumpversion.cpp
@@ -26,11 +26,6 @@ int main()
   major_version = __NVCOMPILER;
   minor_version = ___NVCOMPILER_MINOR__;
   patch_level   = ___NVCOMPILER_PATCHLEVEL__;
-#elif defined(__INTEL_COMPILER)
-  compiler_type = "icc";
-  major_version = __INTEL_COMPILER / 100;
-  minor_version = (__INTEL_COMPILER % 100) / 10;
-  patch_level   = __INTEL_COMPILER % 10;
 #elif defined(__clang__)
 // Treat Apple's LLVM fork differently.
 #  if defined(__apple_build_version__)
diff --git a/libcudacxx/test/utils/libcudacxx/sym_check/util.py b/libcudacxx/test/utils/libcudacxx/sym_check/util.py
index 472396c16a9..45b70d881c1 100644
--- a/libcudacxx/test/utils/libcudacxx/sym_check/util.py
+++ b/libcudacxx/test/utils/libcudacxx/sym_check/util.py
@@ -8,11 +8,12 @@
 
 import ast
 import distutils.spawn
-import sys
 import re
-import libcudacxx.util
+import sys
 from pprint import pformat
 
+import libcudacxx.util
+
 
 def read_syms_from_list(slist):
     """
diff --git a/libcudacxx/test/utils/libcudacxx/test/config.py b/libcudacxx/test/utils/libcudacxx/test/config.py
index fc136ddb6af..c2b4871790e 100644
--- a/libcudacxx/test/utils/libcudacxx/test/config.py
+++ b/libcudacxx/test/utils/libcudacxx/test/config.py
@@ -6,24 +6,23 @@
 #
 # ===----------------------------------------------------------------------===##
 
+import ctypes
 import os
-import platform
 import pipes
+import platform
 import re
 import shlex
 import shutil
 import sys
-import ctypes
 
+import libcudacxx.util
 from libcudacxx.compiler import CXXCompiler
-from libcudacxx.test.target_info import make_target_info
 
 # The wildcard import is to support `eval(exec_str)` in
 # `Configuration.configure_executor()` below.
 from libcudacxx.test.executor import *  # noqa: F403
 from libcudacxx.test.executor import LocalExecutor, NoopExecutor
-
-import libcudacxx.util
+from libcudacxx.test.target_info import make_target_info
 
 
 def loadSiteConfig(lit_config, config, param_name, env_name):
@@ -424,10 +423,6 @@ def configure_cxx(self):
                     )
                 )
 
-            if "icc" in self.config.available_features:
-                self.cxx.link_flags += ["-lirc"]
-                self.cxx.compile_flags += ["-Xcompiler=-diag-disable=10441"]
-
     def _configure_clang_cl(self, clang_path):
         def _split_env_var(var):
             return [p.strip() for p in os.environ.get(var, "").split(";") if p.strip()]
diff --git a/libcudacxx/test/utils/libcudacxx/test/executor.py b/libcudacxx/test/utils/libcudacxx/test/executor.py
index e6fe6b256b6..2d139077b05 100644
--- a/libcudacxx/test/utils/libcudacxx/test/executor.py
+++ b/libcudacxx/test/utils/libcudacxx/test/executor.py
@@ -6,8 +6,8 @@
 #
 # ===----------------------------------------------------------------------===##
 
-import platform
 import os
+import platform
 
 from libcudacxx.test import tracing
 from libcudacxx.util import executeCommand
diff --git a/libcudacxx/test/utils/libcudacxx/test/format.py b/libcudacxx/test/utils/libcudacxx/test/format.py
index 753317fc2ad..1672482f9ad 100644
--- a/libcudacxx/test/utils/libcudacxx/test/format.py
+++ b/libcudacxx/test/utils/libcudacxx/test/format.py
@@ -13,12 +13,13 @@
 
 import lit.Test  # pylint: disable=import-error
 import lit.TestRunner  # pylint: disable=import-error
-from lit.TestRunner import ParserKind, IntegratedTestKeywordParser
-# pylint: disable=import-error
+from lit.TestRunner import IntegratedTestKeywordParser, ParserKind
 
-from libcudacxx.test.executor import LocalExecutor as LocalExecutor
 import libcudacxx.util
 
+# pylint: disable=import-error
+from libcudacxx.test.executor import LocalExecutor as LocalExecutor
+
 
 class LibcxxTestFormat(object):
     """
diff --git a/libcudacxx/test/utils/libcudacxx/test/googlebenchmark.py b/libcudacxx/test/utils/libcudacxx/test/googlebenchmark.py
index d260484c837..16d0e674051 100644
--- a/libcudacxx/test/utils/libcudacxx/test/googlebenchmark.py
+++ b/libcudacxx/test/utils/libcudacxx/test/googlebenchmark.py
@@ -1,4 +1,5 @@
 from __future__ import absolute_import
+
 import os
 import subprocess
 import sys
diff --git a/libcudacxx/test/utils/libcudacxx/util.py b/libcudacxx/test/utils/libcudacxx/util.py
index f9b92f43b90..433accf2272 100644
--- a/libcudacxx/test/utils/libcudacxx/util.py
+++ b/libcudacxx/test/utils/libcudacxx/util.py
@@ -6,7 +6,6 @@
 #
 # ===----------------------------------------------------------------------===##
 
-from contextlib import contextmanager
 import errno
 import os
 import platform
@@ -15,6 +14,7 @@
 import sys
 import tempfile
 import threading
+from contextlib import contextmanager
 
 
 # FIXME: Most of these functions are cribbed from LIT
diff --git a/pyproject.toml b/pyproject.toml
index 5ba84687f1e..20e78ab35ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,6 +2,12 @@
 
 [tool.ruff]
 target-version = "py310"
+fix = true
+show-fixes = true
+exclude = ["docs/tools"]
+
+[tool.ruff.lint]
+extend-select = ["I"]
 
 [tool.codespell]
 # To run codespell interactively and fix errors that pre-commit reports, try
diff --git a/python/cuda_cooperative/cuda/cooperative/_version.py b/python/cuda_cooperative/cuda/cooperative/_version.py
index 63cedf944ad..2d14b36f16b 100644
--- a/python/cuda_cooperative/cuda/cooperative/_version.py
+++ b/python/cuda_cooperative/cuda/cooperative/_version.py
@@ -4,4 +4,4 @@
 
 # This file is generated by ci/update_version.sh
 # Do not edit this file manually.
-__version__ = "0.1.2.8.0"
+__version__ = "0.1.3.0.0"
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/__init__.py b/python/cuda_cooperative/cuda/cooperative/experimental/__init__.py
index c9a530b2254..666ebe9e649 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/__init__.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/__init__.py
@@ -2,8 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from cuda.cooperative.experimental import block
-from cuda.cooperative.experimental import warp
+from cuda.cooperative.experimental import block, warp
 from cuda.cooperative.experimental._types import StatefulFunction
 
 __all__ = ["block", "warp", "StatefulFunction"]
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/_caching.py b/python/cuda_cooperative/cuda/cooperative/experimental/_caching.py
index d5069a5a52d..aa0503f391e 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/_caching.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/_caching.py
@@ -2,10 +2,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import hashlib
+import json
 import os
 import pickle
-import json
-import hashlib
 
 _ENABLE_CACHE = "CCCL_ENABLE_CACHE" in os.environ
 if _ENABLE_CACHE:
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/_common.py b/python/cuda_cooperative/cuda/cooperative/experimental/_common.py
index c79ac0c3fd8..82015e997e7 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/_common.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/_common.py
@@ -2,8 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import tempfile
 import re
+import tempfile
 from collections import namedtuple
 
 version = namedtuple("version", ("major", "minor"))
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py b/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py
index 25de1119b6e..1e86dd45dfe 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py
@@ -2,13 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import functools
+import importlib.resources as pkg_resources
 import os
 import shutil
+
 from cuda.bindings import nvrtc
 from cuda.cooperative.experimental._caching import disk_cache
 from cuda.cooperative.experimental._common import check_in, version
-import importlib.resources as pkg_resources
-import functools
 
 
 def CHECK_NVRTC(err, prog):
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/_types.py b/python/cuda_cooperative/cuda/cooperative/experimental/_types.py
index 97607f57d5f..400baa363d6 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/_types.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/_types.py
@@ -3,16 +3,17 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import re
+
+import jinja2
 import numba
+from llvmlite import ir
 from numba import cuda, types
 from numba.core import cgutils
-from llvmlite import ir
-from numba.core.typing import signature
 from numba.core.extending import intrinsic, overload
-from cuda.cooperative.experimental._nvrtc import compile
-from cuda.cooperative.experimental._common import find_unsigned
-import jinja2
+from numba.core.typing import signature
 
+from cuda.cooperative.experimental._common import find_unsigned
+from cuda.cooperative.experimental._nvrtc import compile
 
 NUMBA_TYPES_TO_CPP = {
     types.boolean: "bool",
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/block/__init__.py b/python/cuda_cooperative/cuda/cooperative/experimental/block/__init__.py
index da73294b518..dd3161df731 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/block/__init__.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/block/__init__.py
@@ -2,16 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+from cuda.cooperative.experimental.block._block_load_store import load, store
 from cuda.cooperative.experimental.block._block_merge_sort import merge_sort_keys
-from cuda.cooperative.experimental.block._block_reduce import reduce
-from cuda.cooperative.experimental.block._block_reduce import sum
-from cuda.cooperative.experimental.block._block_scan import exclusive_sum
-from cuda.cooperative.experimental.block._block_radix_sort import radix_sort_keys
 from cuda.cooperative.experimental.block._block_radix_sort import (
+    radix_sort_keys,
     radix_sort_keys_descending,
 )
-from cuda.cooperative.experimental.block._block_load_store import load
-from cuda.cooperative.experimental.block._block_load_store import store
+from cuda.cooperative.experimental.block._block_reduce import reduce, sum
+from cuda.cooperative.experimental.block._block_scan import exclusive_sum
 
 __all__ = [
     "merge_sort_keys",
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_load_store.py b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_load_store.py
index a6882504a44..bc3f5d80ad1 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_load_store.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_load_store.py
@@ -5,6 +5,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Dependency,
@@ -14,7 +15,6 @@
     Pointer,
     TemplateParameter,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 CUB_BLOCK_LOAD_ALGOS = {
     "direct": "::cub::BLOCK_LOAD_DIRECT",
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_merge_sort.py b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_merge_sort.py
index 6b6bc3ae41d..83911ffcac3 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_merge_sort.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_merge_sort.py
@@ -4,6 +4,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Constant,
@@ -15,7 +16,6 @@
     TemplateParameter,
     numba_type_to_wrapper,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def merge_sort_keys(
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_radix_sort.py b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_radix_sort.py
index 4aca5ed020d..ed18168393a 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_radix_sort.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_radix_sort.py
@@ -4,6 +4,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Dependency,
@@ -13,7 +14,6 @@
     TemplateParameter,
     Value,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def radix_sort_keys(dtype, threads_in_block, items_per_thread):
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_reduce.py b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_reduce.py
index ee579265a83..bdf69ab6205 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_reduce.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_reduce.py
@@ -4,6 +4,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Dependency,
@@ -15,7 +16,6 @@
     Value,
     numba_type_to_wrapper,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def reduce(dtype, threads_in_block, binary_op, methods=None):
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_scan.py b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_scan.py
index 78eb76824d0..5351cd98298 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_scan.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/block/_block_scan.py
@@ -4,16 +4,16 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
-    Invocable,
     Dependency,
     DependentArray,
     DependentOperator,
+    Invocable,
     Pointer,
     TemplateParameter,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def exclusive_sum(dtype, threads_in_block, items_per_thread, prefix_op=None):
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/warp/__init__.py b/python/cuda_cooperative/cuda/cooperative/experimental/warp/__init__.py
index 3748c27893a..18db6213041 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/warp/__init__.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/warp/__init__.py
@@ -2,9 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from cuda.cooperative.experimental.warp._warp_scan import exclusive_sum
-from cuda.cooperative.experimental.warp._warp_reduce import reduce
-from cuda.cooperative.experimental.warp._warp_reduce import sum
 from cuda.cooperative.experimental.warp._warp_merge_sort import merge_sort_keys
+from cuda.cooperative.experimental.warp._warp_reduce import reduce, sum
+from cuda.cooperative.experimental.warp._warp_scan import exclusive_sum
 
 __all__ = ["exclusive_sum", "reduce", "sum", "merge_sort_keys"]
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_merge_sort.py b/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_merge_sort.py
index f89968fc0d4..7f856c61ae5 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_merge_sort.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_merge_sort.py
@@ -4,6 +4,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Constant,
@@ -15,7 +16,6 @@
     TemplateParameter,
     numba_type_to_wrapper,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def merge_sort_keys(
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_reduce.py b/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_reduce.py
index 180e580f8a9..87033d6aa94 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_reduce.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_reduce.py
@@ -4,6 +4,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Dependency,
@@ -14,7 +15,6 @@
     TemplateParameter,
     numba_type_to_wrapper,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def reduce(dtype, binary_op, threads_in_warp=32, methods=None):
diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_scan.py b/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_scan.py
index 48a71081ab7..d3b6e3d688a 100644
--- a/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_scan.py
+++ b/python/cuda_cooperative/cuda/cooperative/experimental/warp/_warp_scan.py
@@ -5,6 +5,7 @@
 
 import numba
 
+from cuda.cooperative.experimental._common import make_binary_tempfile
 from cuda.cooperative.experimental._types import (
     Algorithm,
     Dependency,
@@ -13,7 +14,6 @@
     Pointer,
     TemplateParameter,
 )
-from cuda.cooperative.experimental._common import make_binary_tempfile
 
 
 def exclusive_sum(dtype, threads_in_warp=32):
diff --git a/python/cuda_cooperative/pyproject.toml b/python/cuda_cooperative/pyproject.toml
index 4ab52c80318..017c0be1e56 100644
--- a/python/cuda_cooperative/pyproject.toml
+++ b/python/cuda_cooperative/pyproject.toml
@@ -5,3 +5,9 @@
 [build-system]
 requires = ["packaging", "setuptools>=61.0.0", "wheel"]
 build-backend = "setuptools.build_meta"
+
+[tool.ruff]
+extend = "../../pyproject.toml"
+
+[tool.ruff.lint.isort]
+known-first-party = ["cuda.cooperative"]
diff --git a/python/cuda_cooperative/setup.py b/python/cuda_cooperative/setup.py
index c4bbd39dd03..5f954086cfe 100644
--- a/python/cuda_cooperative/setup.py
+++ b/python/cuda_cooperative/setup.py
@@ -5,11 +5,10 @@
 import os
 import shutil
 
-from setuptools import Command, setup, find_namespace_packages
+from setuptools import Command, find_namespace_packages, setup
 from setuptools.command.build_py import build_py
 from wheel.bdist_wheel import bdist_wheel
 
-
 project_path = os.path.abspath(os.path.dirname(__file__))
 cccl_path = os.path.abspath(os.path.join(project_path, "..", ".."))
 cccl_headers = [["cub", "cub"], ["libcudacxx", "include"], ["thrust", "thrust"]]
diff --git a/python/cuda_cooperative/tests/helpers.py b/python/cuda_cooperative/tests/helpers.py
index f40d6d2f103..7e3bb65cfc4 100644
--- a/python/cuda_cooperative/tests/helpers.py
+++ b/python/cuda_cooperative/tests/helpers.py
@@ -2,8 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from numba import types
 import numpy as np
+from numba import types
 
 NUMBA_TYPES_TO_NP = {
     types.int8: np.int8,
diff --git a/python/cuda_cooperative/tests/test_block_load.py b/python/cuda_cooperative/tests/test_block_load.py
index 4985d9aff36..fa2f311aab1 100644
--- a/python/cuda_cooperative/tests/test_block_load.py
+++ b/python/cuda_cooperative/tests/test_block_load.py
@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-from helpers import random_int, NUMBA_TYPES_TO_NP
+import numba
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 patch.patch_numba_linker(lto=True)
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
diff --git a/python/cuda_cooperative/tests/test_block_merge_sort.py b/python/cuda_cooperative/tests/test_block_merge_sort.py
index 4f9977e3ad0..8568bec1a49 100644
--- a/python/cuda_cooperative/tests/test_block_merge_sort.py
+++ b/python/cuda_cooperative/tests/test_block_merge_sort.py
@@ -2,13 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
+import numba
 import numpy as np
-import cuda.cooperative.experimental as cudax
-from helpers import random_int, NUMBA_TYPES_TO_NP
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 patch.patch_numba_linker(lto=True)
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
diff --git a/python/cuda_cooperative/tests/test_block_merge_sort_api.py b/python/cuda_cooperative/tests/test_block_merge_sort_api.py
index e35ff0b16db..a48aaa9cd18 100644
--- a/python/cuda_cooperative/tests/test_block_merge_sort_api.py
+++ b/python/cuda_cooperative/tests/test_block_merge_sort_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_block_radix_sort.py b/python/cuda_cooperative/tests/test_block_radix_sort.py
index 3ee2a2a5a10..7e6e2642937 100644
--- a/python/cuda_cooperative/tests/test_block_radix_sort.py
+++ b/python/cuda_cooperative/tests/test_block_radix_sort.py
@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-from helpers import random_int, NUMBA_TYPES_TO_NP
+import numba
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 patch.patch_numba_linker(lto=True)
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
diff --git a/python/cuda_cooperative/tests/test_block_radix_sort_api.py b/python/cuda_cooperative/tests/test_block_radix_sort_api.py
index 8e97cb42eac..2311cc87a95 100644
--- a/python/cuda_cooperative/tests/test_block_radix_sort_api.py
+++ b/python/cuda_cooperative/tests/test_block_radix_sort_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_block_reduce.py b/python/cuda_cooperative/tests/test_block_reduce.py
index e4f638835b2..4fa483829cf 100644
--- a/python/cuda_cooperative/tests/test_block_reduce.py
+++ b/python/cuda_cooperative/tests/test_block_reduce.py
@@ -2,8 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
+import numba
+import numpy as np
+import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
+from numba import cuda, types
+from numba.core import cgutils
 from numba.core.extending import (
     lower_builtin,
     make_attribute_wrapper,
@@ -12,12 +16,9 @@
     type_callable,
     typeof_impl,
 )
-from numba.core import cgutils
-import numpy as np
-from helpers import random_int, NUMBA_TYPES_TO_NP
-import pytest
-from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_block_reduce_api.py b/python/cuda_cooperative/tests/test_block_reduce_api.py
index a730d27cd79..aa964777d35 100644
--- a/python/cuda_cooperative/tests/test_block_reduce_api.py
+++ b/python/cuda_cooperative/tests/test_block_reduce_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_block_scan.py b/python/cuda_cooperative/tests/test_block_scan.py
index e9568d7d46c..56a362aa547 100644
--- a/python/cuda_cooperative/tests/test_block_scan.py
+++ b/python/cuda_cooperative/tests/test_block_scan.py
@@ -2,10 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
 import numba
-from helpers import random_int, NUMBA_TYPES_TO_NP
+import numpy as np
+import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
+from numba import cuda, types
+from numba.core import cgutils
 from numba.core.extending import (
     lower_builtin,
     make_attribute_wrapper,
@@ -14,10 +16,9 @@
     type_callable,
     typeof_impl,
 )
-from numba.core import cgutils
-import numpy as np
-import pytest
-from numba import cuda, types
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_block_scan_api.py b/python/cuda_cooperative/tests/test_block_scan_api.py
index 91ca4152aeb..47e4c85e608 100644
--- a/python/cuda_cooperative/tests/test_block_scan_api.py
+++ b/python/cuda_cooperative/tests/test_block_scan_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_block_store.py b/python/cuda_cooperative/tests/test_block_store.py
index ff9058faedc..ce488f31a7b 100644
--- a/python/cuda_cooperative/tests/test_block_store.py
+++ b/python/cuda_cooperative/tests/test_block_store.py
@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-from helpers import random_int, NUMBA_TYPES_TO_NP
+import numba
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 patch.patch_numba_linker(lto=True)
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
diff --git a/python/cuda_cooperative/tests/test_warp_merge_sort.py b/python/cuda_cooperative/tests/test_warp_merge_sort.py
index 75c5ee07247..6e25e9d8d9f 100644
--- a/python/cuda_cooperative/tests/test_warp_merge_sort.py
+++ b/python/cuda_cooperative/tests/test_warp_merge_sort.py
@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-from helpers import random_int, NUMBA_TYPES_TO_NP
+import numba
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 patch.patch_numba_linker(lto=True)
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
diff --git a/python/cuda_cooperative/tests/test_warp_merge_sort_api.py b/python/cuda_cooperative/tests/test_warp_merge_sort_api.py
index 5a741a20217..be21b86bb8e 100644
--- a/python/cuda_cooperative/tests/test_warp_merge_sort_api.py
+++ b/python/cuda_cooperative/tests/test_warp_merge_sort_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_warp_reduce.py b/python/cuda_cooperative/tests/test_warp_reduce.py
index 3770fa7190b..08fac26bd8c 100644
--- a/python/cuda_cooperative/tests/test_warp_reduce.py
+++ b/python/cuda_cooperative/tests/test_warp_reduce.py
@@ -2,13 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
+import numba
 import numpy as np
-from helpers import random_int, NUMBA_TYPES_TO_NP
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
-import numba
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_warp_reduce_api.py b/python/cuda_cooperative/tests/test_warp_reduce_api.py
index 197cac35709..926c3bc79e9 100644
--- a/python/cuda_cooperative/tests/test_warp_reduce_api.py
+++ b/python/cuda_cooperative/tests/test_warp_reduce_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_warp_scan.py b/python/cuda_cooperative/tests/test_warp_scan.py
index 4b734a0829b..09ab8253a2f 100644
--- a/python/cuda_cooperative/tests/test_warp_scan.py
+++ b/python/cuda_cooperative/tests/test_warp_scan.py
@@ -2,13 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
 import numba
-from helpers import random_int, NUMBA_TYPES_TO_NP
 import numpy as np
 import pytest
+from helpers import NUMBA_TYPES_TO_NP, random_int
 from numba import cuda, types
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_cooperative/tests/test_warp_scan_api.py b/python/cuda_cooperative/tests/test_warp_scan_api.py
index a5a23e7f4f8..3b28e9b10b2 100644
--- a/python/cuda_cooperative/tests/test_warp_scan_api.py
+++ b/python/cuda_cooperative/tests/test_warp_scan_api.py
@@ -2,11 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from pynvjitlink import patch
-import cuda.cooperative.experimental as cudax
-import numpy as np
 import numba
+import numpy as np
 from numba import cuda
+from pynvjitlink import patch
+
+import cuda.cooperative.experimental as cudax
 
 numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
 
diff --git a/python/cuda_parallel/cuda/parallel/_version.py b/python/cuda_parallel/cuda/parallel/_version.py
index 63cedf944ad..2d14b36f16b 100644
--- a/python/cuda_parallel/cuda/parallel/_version.py
+++ b/python/cuda_parallel/cuda/parallel/_version.py
@@ -4,4 +4,4 @@
 
 # This file is generated by ci/update_version.sh
 # Do not edit this file manually.
-__version__ = "0.1.2.8.0"
+__version__ = "0.1.3.0.0"
diff --git a/python/cuda_parallel/cuda/parallel/experimental/_bindings.py b/python/cuda_parallel/cuda/parallel/experimental/_bindings.py
index 64adea39a2b..c19ceebbf3e 100644
--- a/python/cuda_parallel/cuda/parallel/experimental/_bindings.py
+++ b/python/cuda_parallel/cuda/parallel/experimental/_bindings.py
@@ -3,9 +3,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import ctypes
 import os
 import shutil
-import ctypes
 from functools import lru_cache
 from typing import List, Optional
 
diff --git a/python/cuda_parallel/cuda/parallel/experimental/_caching.py b/python/cuda_parallel/cuda/parallel/experimental/_caching.py
new file mode 100644
index 00000000000..998312b4bbf
--- /dev/null
+++ b/python/cuda_parallel/cuda/parallel/experimental/_caching.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import functools
+
+from numba import cuda
+
+
+def cache_with_key(key):
+    """
+    Decorator to cache the result of the decorated function.  Uses the
+    provided `key` function to compute the key for cache lookup. `key`
+    receives all arguments passed to the function.
+
+    Notes
+    -----
+    The CUDA compute capability of the current device is appended to
+    the cache key returned by `key`.
+    """
+
+    def deco(func):
+        cache = {}
+
+        @functools.wraps(func)
+        def inner(*args, **kwargs):
+            cc = cuda.get_current_device().compute_capability
+            cache_key = (key(*args, **kwargs), cc)
+            if cache_key not in cache:
+                result = func(*args, **kwargs)
+                cache[cache_key] = result
+            return cache[cache_key]
+
+        return inner
+
+    return deco
+
+
+class CachableFunction:
+    """
+    A type that wraps a function and provides custom comparison
+    (__eq__) and hash (__hash__) implementations.
+
+    The purpose of this class is to enable caching and comparison of
+    functions based on their bytecode, constants, and closures, while
+    ignoring other attributes such as their names or docstrings.
+    """
+
+    def __init__(self, func):
+        self._func = func
+        self._identity = (
+            self._func.__code__.co_code,
+            self._func.__code__.co_consts,
+            self._func.__closure__,
+        )
+
+    def __eq__(self, other):
+        return self._identity == other._identity
+
+    def __hash__(self):
+        return hash(self._identity)
+
+    def __repr__(self):
+        return str(self._func)
diff --git a/python/cuda_parallel/cuda/parallel/experimental/_cccl.py b/python/cuda_parallel/cuda/parallel/experimental/_cccl.py
index 4d56f4b0398..e09191dac2c 100644
--- a/python/cuda_parallel/cuda/parallel/experimental/_cccl.py
+++ b/python/cuda_parallel/cuda/parallel/experimental/_cccl.py
@@ -3,12 +3,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import numba
-import functools
 import ctypes
+import functools
+
+import numba
 import numpy as np
-from numba import types, cuda
+from numba import cuda, types
 
+from ._utils.cai import DeviceArrayLike, get_dtype, is_contiguous
 from .iterators._iterators import IteratorBase
 
 
@@ -130,8 +132,10 @@ def _numpy_type_to_info(numpy_type: np.dtype) -> TypeInfo:
     return _numba_type_to_info(numba_type)
 
 
-def _device_array_to_cccl_iter(array) -> Iterator:
-    info = _numpy_type_to_info(array.dtype)
+def _device_array_to_cccl_iter(array: DeviceArrayLike) -> Iterator:
+    if not is_contiguous(array):
+        raise ValueError("Non-contiguous arrays are not supported.")
+    info = _numpy_type_to_info(get_dtype(array))
     return Iterator(
         info.size,
         info.alignment,
diff --git a/python/cuda_parallel/cuda/parallel/experimental/_utils/__init__.py b/python/cuda_parallel/cuda/parallel/experimental/_utils/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cuda_parallel/cuda/parallel/experimental/_utils/cai.py b/python/cuda_parallel/cuda/parallel/experimental/_utils/cai.py
new file mode 100644
index 00000000000..4d435171aad
--- /dev/null
+++ b/python/cuda_parallel/cuda/parallel/experimental/_utils/cai.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""
+Utilities for extracting information from `__cuda_array_interface__`.
+"""
+
+from typing import Optional, Tuple
+
+import numpy as np
+
+from ..typing import DeviceArrayLike
+
+
+def get_dtype(arr: DeviceArrayLike) -> np.dtype:
+    return np.dtype(arr.__cuda_array_interface__["typestr"])
+
+
+def get_strides(arr: DeviceArrayLike) -> Optional[Tuple]:
+    return arr.__cuda_array_interface__["strides"]
+
+
+def get_shape(arr: DeviceArrayLike) -> Tuple:
+    return arr.__cuda_array_interface__["shape"]
+
+
+def is_contiguous(arr: DeviceArrayLike) -> bool:
+    shape, strides = get_shape(arr), get_strides(arr)
+
+    if strides is None:
+        return True
+
+    if any(dim == 0 for dim in shape):
+        # array has no elements
+        return True
+
+    if all(dim == 1 for dim in shape):
+        # there is a single element:
+        return True
+
+    itemsize = get_dtype(arr).itemsize
+
+    if strides[-1] == itemsize:
+        # assume C-contiguity
+        expected_stride = itemsize
+        for dim, stride in zip(reversed(shape), reversed(strides)):
+            if stride != expected_stride:
+                return False
+            expected_stride *= dim
+        return True
+    elif strides[0] == itemsize:
+        # assume F-contiguity
+        expected_stride = itemsize
+        for dim, stride in zip(shape, strides):
+            if stride != expected_stride:
+                return False
+            expected_stride *= dim
+        return True
+    else:
+        # not contiguous
+        return False
diff --git a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py
index 29e7786b5f8..7a1a26bbc9f 100644
--- a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py
+++ b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py
@@ -3,15 +3,22 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+from __future__ import annotations  # TODO: required for Python 3.7 docs env
+
 import ctypes
+from typing import Callable
+
 import numba
 import numpy as np
 from numba import cuda
 from numba.cuda.cudadrv import enums
-from typing import Callable
 
 from .. import _cccl as cccl
-from .._bindings import get_paths, get_bindings
+from .._bindings import get_bindings, get_paths
+from .._caching import CachableFunction, cache_with_key
+from .._utils import cai
+from ..iterators._iterators import IteratorBase
+from ..typing import DeviceArrayLike
 
 
 class _Op:
@@ -41,12 +48,18 @@ def _dtype_validation(dt1, dt2):
 
 class _Reduce:
     # TODO: constructor shouldn't require concrete `d_in`, `d_out`:
-    def __init__(self, d_in, d_out, op: Callable, h_init: np.ndarray):
+    def __init__(
+        self,
+        d_in: DeviceArrayLike | IteratorBase,
+        d_out: DeviceArrayLike,
+        op: Callable,
+        h_init: np.ndarray,
+    ):
         d_in_cccl = cccl.to_cccl_iter(d_in)
         self._ctor_d_in_cccl_type_enum_name = cccl.type_enum_as_name(
             d_in_cccl.value_type.type.value
         )
-        self._ctor_d_out_dtype = d_out.dtype
+        self._ctor_d_out_dtype = cai.get_dtype(d_out)
         self._ctor_init_dtype = h_init.dtype
         cc_major, cc_minor = cuda.get_current_device().compute_capability
         cub_path, thrust_path, libcudacxx_path, cuda_include_path = get_paths()
@@ -119,22 +132,32 @@ def __del__(self):
         bindings.cccl_device_reduce_cleanup(ctypes.byref(self.build_result))
 
 
+def make_cache_key(
+    d_in: DeviceArrayLike | IteratorBase,
+    d_out: DeviceArrayLike,
+    op: Callable,
+    h_init: np.ndarray,
+):
+    d_in_key = d_in.kind if isinstance(d_in, IteratorBase) else cai.get_dtype(d_in)
+    d_out_key = cai.get_dtype(d_out)
+    op_key = CachableFunction(op)
+    h_init_key = h_init.dtype
+    return (d_in_key, d_out_key, op_key, h_init_key)
+
+
 # TODO Figure out `sum` without operator and initial value
 # TODO Accept stream
-def reduce_into(d_in, d_out, op: Callable, h_init: np.ndarray):
+@cache_with_key(make_cache_key)
+def reduce_into(
+    d_in: DeviceArrayLike | IteratorBase,
+    d_out: DeviceArrayLike,
+    op: Callable,
+    h_init: np.ndarray,
+):
     """Computes a device-wide reduction using the specified binary ``op`` functor and initial value ``init``.
 
     Example:
-        The code snippet below illustrates a user-defined min-reduction of a
-        device vector of ``int`` data elements.
-
-        .. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
-            :language: python
-            :dedent:
-            :start-after: example-begin imports
-            :end-before: example-end imports
-
-        Below is the code snippet that demonstrates the usage of the ``reduce_into`` API:
+        The code snippet below demonstrates the usage of the ``reduce_into`` API:
 
         .. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
             :language: python
diff --git a/python/cuda_parallel/cuda/parallel/experimental/iterators/__init__.py b/python/cuda_parallel/cuda/parallel/experimental/iterators/__init__.py
index 01fd759a6ae..656338da4c8 100644
--- a/python/cuda_parallel/cuda/parallel/experimental/iterators/__init__.py
+++ b/python/cuda_parallel/cuda/parallel/experimental/iterators/__init__.py
@@ -1,6 +1,7 @@
-from . import _iterators
 import numba
 
+from . import _iterators
+
 
 def CacheModifiedInputIterator(device_array, modifier):
     """Random Access Cache Modified Iterator that wraps a native device pointer.
@@ -8,6 +9,22 @@ def CacheModifiedInputIterator(device_array, modifier):
     Similar to https://nvidia.github.io/cccl/cub/api/classcub_1_1CacheModifiedInputIterator.html
 
     Currently the only supported modifier is "stream" (LOAD_CS).
+
+    Example:
+        The code snippet below demonstrates the usage of a ``CacheModifiedInputIterator``:
+
+        .. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
+            :language: python
+            :dedent:
+            :start-after: example-begin cache-iterator
+            :end-before: example-end cache-iterator
+
+    Args:
+        device_array: CUDA device array storing the input sequence of data items
+        modifier: The PTX cache load modifier
+
+    Returns:
+        A ``CacheModifiedInputIterator`` object initialized with ``device_array``
     """
     if modifier != "stream":
         raise NotImplementedError("Only stream modifier is supported")
@@ -18,15 +35,74 @@ def CacheModifiedInputIterator(device_array, modifier):
 
 
 def ConstantIterator(value):
-    """Returns an Iterator representing a sequence of constant values."""
+    """Returns an Iterator representing a sequence of constant values.
+
+    Similar to https://nvidia.github.io/cccl/thrust/api/classthrust_1_1constant__iterator.html
+
+    Example:
+        The code snippet below demonstrates the usage of a ``ConstantIterator``
+        representing the sequence ``[10, 10, 10]``:
+
+        .. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
+            :language: python
+            :dedent:
+            :start-after: example-begin constant-iterator
+            :end-before: example-end constant-iterator
+
+    Args:
+        value: The value of every item in the sequence
+
+    Returns:
+        A ``ConstantIterator`` object initialized to ``value``
+    """
     return _iterators.ConstantIterator(value)
 
 
 def CountingIterator(offset):
-    """Returns an Iterator representing a sequence of incrementing values."""
+    """Returns an Iterator representing a sequence of incrementing values.
+
+    Similar to https://nvidia.github.io/cccl/thrust/api/classthrust_1_1counting__iterator.html
+
+    Example:
+        The code snippet below demonstrates the usage of a ``CountingIterator``
+        representing the sequence ``[10, 11, 12]``:
+
+        .. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
+            :language: python
+            :dedent:
+            :start-after: example-begin counting-iterator
+            :end-before: example-end counting-iterator
+
+    Args:
+        offset: The initial value of the sequence
+
+    Returns:
+        A ``CountingIterator`` object initialized to ``offset``
+    """
     return _iterators.CountingIterator(offset)
 
 
 def TransformIterator(it, op):
-    """Returns an Iterator representing a transformed sequence of values."""
+    """Returns an Iterator representing a transformed sequence of values.
+
+    Similar to https://nvidia.github.io/cccl/cub/api/classcub_1_1TransformInputIterator.html
+
+    Example:
+        The code snippet below demonstrates the usage of a ``TransformIterator``
+        composed with a ``CountingIterator``, transforming the sequence ``[10, 11, 12]``
+        by squaring each item before reducing the output:
+
+        .. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
+            :language: python
+            :dedent:
+            :start-after: example-begin transform-iterator
+            :end-before: example-end transform-iterator
+
+    Args:
+        it: The iterator object to be transformed
+        op: The transform operation
+
+    Returns:
+        A ``TransformIterator`` object to transform the items in ``it`` using ``op``
+    """
     return _iterators.make_transform_iterator(it, op)
diff --git a/python/cuda_parallel/cuda/parallel/experimental/iterators/_iterators.py b/python/cuda_parallel/cuda/parallel/experimental/iterators/_iterators.py
index e92578efbe7..c933ab5c84c 100644
--- a/python/cuda_parallel/cuda/parallel/experimental/iterators/_iterators.py
+++ b/python/cuda_parallel/cuda/parallel/experimental/iterators/_iterators.py
@@ -1,15 +1,18 @@
 import ctypes
 import operator
+import uuid
 from functools import lru_cache
-from typing import Dict, Callable
+from typing import Callable, Dict
 
+import numba
+import numpy as np
 from llvmlite import ir
+from numba import cuda, types
 from numba.core.extending import intrinsic, overload
 from numba.core.typing.ctypes_utils import to_ctypes
-from numba import cuda, types
-import numba
-import numpy as np
+from numba.cuda.dispatcher import CUDADispatcher
 
+from .._caching import CachableFunction
 
 _DEVICE_POINTER_SIZE = 8
 _DEVICE_POINTER_BITWIDTH = _DEVICE_POINTER_SIZE * 8
@@ -20,16 +23,35 @@ def cached_compile(func, sig, abi_name=None, **kwargs):
     return cuda.compile(func, sig, abi_info={"abi_name": abi_name}, **kwargs)
 
 
+class IteratorKind:
+    def __init__(self, value_type):
+        self.value_type = value_type
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}[{str(self.value_type)}]"
+
+    def __eq__(self, other):
+        return type(self) is type(other) and self.value_type == other.value_type
+
+    def __hash__(self):
+        return hash(self.value_type)
+
+
+@lru_cache(maxsize=None)
+def _get_abi_suffix(kind: IteratorKind):
+    # given an IteratorKind, return a UUID. The value is cached so
+    # that the same UUID is always returned for a given IteratorKind.
+    return uuid.uuid4().hex
+
+
 class IteratorBase:
     """
     An Iterator is a wrapper around a pointer, and must define the following:
 
-    - a `state` property that returns a `ctypes.c_void_p` object, representing
-      a pointer to some data.
-    - an `advance` (static) method that receives the state pointer and performs
+    - an `advance` (static) method that receives the pointer and performs
       an action that advances the pointer by the offset `distance`
       (returns nothing).
-    - a `dereference` (static) method that dereferences the state pointer
+    - a `dereference` (static) method that dereferences the pointer
       and returns a value.
 
     Iterators are not meant to be used directly. They are constructed and passed
@@ -38,29 +60,40 @@ class IteratorBase:
     The `advance` and `dereference` must be compilable to device code by numba.
     """
 
-    def __init__(self, numba_type: types.Type, value_type: types.Type, abi_name: str):
+    iterator_kind_type: type  # must be a subclass of IteratorKind
+
+    def __init__(
+        self,
+        cvalue: ctypes.c_void_p,
+        numba_type: types.Type,
+        value_type: types.Type,
+    ):
         """
         Parameters
         ----------
+        cvalue
+          A ctypes type representing the object pointed to by the iterator.
         numba_type
-          A numba type that specifies how to interpret the state pointer.
+          A numba type representing the type of the input to the advance
+          and dereference functions.
         value_type
           The numba type of the value returned by the dereference operation.
-        abi_name
-          A unique identifier that will determine the abi_names for the
-          advance and dereference operations.
         """
+        self.cvalue = cvalue
         self.numba_type = numba_type
         self.value_type = value_type
-        self.abi_name = abi_name
+
+    @property
+    def kind(self):
+        return self.__class__.iterator_kind_type(self.value_type)
 
     # TODO: should we cache this? Current docs environment doesn't allow
     # using Python > 3.7. We could use a hand-rolled cached_property if
     # needed.
     @property
     def ltoirs(self) -> Dict[str, bytes]:
-        advance_abi_name = self.abi_name + "_advance"
-        deref_abi_name = self.abi_name + "_dereference"
+        advance_abi_name = "advance_" + _get_abi_suffix(self.kind)
+        deref_abi_name = "dereference_" + _get_abi_suffix(self.kind)
         advance_ltoir, _ = cached_compile(
             self.__class__.advance,
             (
@@ -81,7 +114,7 @@ def ltoirs(self) -> Dict[str, bytes]:
 
     @property
     def state(self) -> ctypes.c_void_p:
-        raise NotImplementedError("Subclasses must override advance staticmethod")
+        return ctypes.cast(ctypes.pointer(self.cvalue), ctypes.c_void_p)
 
     @staticmethod
     def advance(state, distance):
@@ -122,16 +155,20 @@ def impl(ptr, offset):
     return impl
 
 
+class RawPointerType(IteratorKind):
+    pass
+
+
 class RawPointer(IteratorBase):
-    def __init__(self, ptr: int, ntype: types.Type):
-        value_type = ntype
-        self._cvalue = ctypes.c_void_p(ptr)
+    iterator_kind_type = RawPointerType
+
+    def __init__(self, ptr: int, value_type: types.Type):
+        cvalue = ctypes.c_void_p(ptr)
         numba_type = types.CPointer(types.CPointer(value_type))
-        abi_name = f"{self.__class__.__name__}_{str(value_type)}"
         super().__init__(
+            cvalue=cvalue,
             numba_type=numba_type,
             value_type=value_type,
-            abi_name=abi_name,
         )
 
     @staticmethod
@@ -142,13 +179,9 @@ def advance(state, distance):
     def dereference(state):
         return state[0][0]
 
-    @property
-    def state(self) -> ctypes.c_void_p:
-        return ctypes.cast(ctypes.pointer(self._cvalue), ctypes.c_void_p)
-
 
-def pointer(container, ntype: types.Type) -> RawPointer:
-    return RawPointer(container.__cuda_array_interface__["data"][0], ntype)
+def pointer(container, value_type: types.Type) -> RawPointer:
+    return RawPointer(container.__cuda_array_interface__["data"][0], value_type)
 
 
 @intrinsic
@@ -172,16 +205,21 @@ def codegen(context, builder, sig, args):
     return base.dtype(base), codegen
 
 
+class CacheModifiedPointerType(IteratorKind):
+    pass
+
+
 class CacheModifiedPointer(IteratorBase):
+    iterator_kind_type = CacheModifiedPointerType
+
     def __init__(self, ptr: int, ntype: types.Type):
-        self._cvalue = ctypes.c_void_p(ptr)
+        cvalue = ctypes.c_void_p(ptr)
         value_type = ntype
         numba_type = types.CPointer(types.CPointer(value_type))
-        abi_name = f"{self.__class__.__name__}_{str(value_type)}"
         super().__init__(
+            cvalue=cvalue,
             numba_type=numba_type,
             value_type=value_type,
-            abi_name=abi_name,
         )
 
     @staticmethod
@@ -192,21 +230,22 @@ def advance(state, distance):
     def dereference(state):
         return load_cs(state[0])
 
-    @property
-    def state(self) -> ctypes.c_void_p:
-        return ctypes.cast(ctypes.pointer(self._cvalue), ctypes.c_void_p)
+
+class ConstantIteratorKind(IteratorKind):
+    pass
 
 
 class ConstantIterator(IteratorBase):
+    iterator_kind_type = ConstantIteratorKind
+
     def __init__(self, value: np.number):
         value_type = numba.from_dtype(value.dtype)
-        self._cvalue = to_ctypes(value_type)(value)
+        cvalue = to_ctypes(value_type)(value)
         numba_type = types.CPointer(value_type)
-        abi_name = f"{self.__class__.__name__}_{str(value_type)}"
         super().__init__(
+            cvalue=cvalue,
             numba_type=numba_type,
             value_type=value_type,
-            abi_name=abi_name,
         )
 
     @staticmethod
@@ -217,21 +256,22 @@ def advance(state, distance):
     def dereference(state):
         return state[0]
 
-    @property
-    def state(self) -> ctypes.c_void_p:
-        return ctypes.cast(ctypes.pointer(self._cvalue), ctypes.c_void_p)
+
+class CountingIteratorKind(IteratorKind):
+    pass
 
 
 class CountingIterator(IteratorBase):
+    iterator_kind_type = CountingIteratorKind
+
     def __init__(self, value: np.number):
         value_type = numba.from_dtype(value.dtype)
-        self._cvalue = to_ctypes(value_type)(value)
+        cvalue = to_ctypes(value_type)(value)
         numba_type = types.CPointer(value_type)
-        abi_name = f"{self.__class__.__name__}_{str(value_type)}"
         super().__init__(
+            cvalue=cvalue,
             numba_type=numba_type,
             value_type=value_type,
-            abi_name=abi_name,
         )
 
     @staticmethod
@@ -242,9 +282,13 @@ def advance(state, distance):
     def dereference(state):
         return state[0]
 
-    @property
-    def state(self) -> ctypes.c_void_p:
-        return ctypes.cast(ctypes.pointer(self._cvalue), ctypes.c_void_p)
+
+class TransformIteratorKind(IteratorKind):
+    def __eq__(self, other):
+        return type(self) is type(other) and self.value_type == other.value_type
+
+    def __hash__(self):
+        return hash(self.value_type)
 
 
 def make_transform_iterator(it, op: Callable):
@@ -256,31 +300,32 @@ def make_transform_iterator(it, op: Callable):
     op = cuda.jit(op, device=True)
 
     class TransformIterator(IteratorBase):
-        def __init__(self, it: IteratorBase, op):
+        iterator_kind_type = TransformIteratorKind
+
+        def __init__(self, it: IteratorBase, op: CUDADispatcher):
             self._it = it
+            self._op = CachableFunction(op.py_func)
             numba_type = it.numba_type
-            # TODO: the abi name below isn't unique enough when we have e.g.,
-            # two identically named `op` functions with different
-            # signatures, bytecodes, and/or closure variables.
-            op_abi_name = f"{self.__class__.__name__}_{op.py_func.__name__}"
-
             # TODO: it would be nice to not need to compile `op` to get
             # its return type, but there's nothing in the numba API
             # to do that (yet),
             _, op_retty = cached_compile(
                 op,
                 (self._it.value_type,),
-                abi_name=op_abi_name,
+                abi_name=f"{op.__name__}_{_get_abi_suffix(self.kind)}",
                 output="ltoir",
             )
             value_type = op_retty
-            abi_name = f"{self.__class__.__name__}_{it.abi_name}_{op_abi_name}"
             super().__init__(
+                cvalue=it.cvalue,
                 numba_type=numba_type,
                 value_type=value_type,
-                abi_name=abi_name,
             )
 
+        @property
+        def kind(self):
+            return self.__class__.iterator_kind_type((self._it.kind, self._op))
+
         @staticmethod
         def advance(state, distance):
             return it_advance(state, distance)
@@ -289,8 +334,12 @@ def advance(state, distance):
         def dereference(state):
             return op(it_dereference(state))
 
-        @property
-        def state(self) -> ctypes.c_void_p:
-            return it.state
+        def __hash__(self):
+            return hash((self._it, self._op))
+
+        def __eq__(self, other):
+            if not isinstance(other.kind, TransformIteratorKind):
+                return NotImplemented
+            return self._it == other._it and self._op == other._op
 
     return TransformIterator(it, op)
diff --git a/python/cuda_parallel/cuda/parallel/experimental/typing.py b/python/cuda_parallel/cuda/parallel/experimental/typing.py
new file mode 100644
index 00000000000..1c4e9c9975f
--- /dev/null
+++ b/python/cuda_parallel/cuda/parallel/experimental/typing.py
@@ -0,0 +1,12 @@
+from typing_extensions import (
+    Protocol,
+)  # TODO: typing_extensions required for Python 3.7 docs env
+
+
+class DeviceArrayLike(Protocol):
+    """
+    Objects representing a device array, having a `.__cuda_array_interface__`
+    attribute.
+    """
+
+    __cuda_array_interface__: dict
diff --git a/python/cuda_parallel/pyproject.toml b/python/cuda_parallel/pyproject.toml
index a07da6723ff..c73736e496a 100644
--- a/python/cuda_parallel/pyproject.toml
+++ b/python/cuda_parallel/pyproject.toml
@@ -16,3 +16,9 @@ module = [
 ]
 ignore_missing_imports = true
 follow_imports = "skip"
+
+[tool.ruff]
+extend = "../../pyproject.toml"
+
+[tool.ruff.lint.isort]
+known-first-party = ["cuda.parallel"]
diff --git a/python/cuda_parallel/setup.py b/python/cuda_parallel/setup.py
index 40c998fafee..bb7cbb3ac44 100644
--- a/python/cuda_parallel/setup.py
+++ b/python/cuda_parallel/setup.py
@@ -6,12 +6,11 @@
 import shutil
 import subprocess
 
-from setuptools import Command, Extension, setup, find_namespace_packages
-from setuptools.command.build_py import build_py
+from setuptools import Command, Extension, find_namespace_packages, setup
 from setuptools.command.build_ext import build_ext
+from setuptools.command.build_py import build_py
 from wheel.bdist_wheel import bdist_wheel
 
-
 project_path = os.path.abspath(os.path.dirname(__file__))
 cccl_path = os.path.abspath(os.path.join(project_path, "..", ".."))
 cccl_headers = [["cub", "cub"], ["libcudacxx", "include"], ["thrust", "thrust"]]
@@ -100,7 +99,8 @@ def build_extension(self, ext):
     ],
     packages=find_namespace_packages(include=["cuda.*"]),
     python_requires=">=3.9",
-    install_requires=["numba>=0.60.0", "cuda-python", "jinja2"],
+    # TODO: typing_extensions required for Python 3.7 docs env
+    install_requires=["numba>=0.60.0", "cuda-python", "jinja2", "typing_extensions"],
     extras_require={
         "test": [
             "pytest",
diff --git a/python/cuda_parallel/tests/test_iterators.py b/python/cuda_parallel/tests/test_iterators.py
new file mode 100644
index 00000000000..1ba88124508
--- /dev/null
+++ b/python/cuda_parallel/tests/test_iterators.py
@@ -0,0 +1,72 @@
+import cupy as cp
+import numpy as np
+
+from cuda.parallel.experimental.iterators import (
+    CacheModifiedInputIterator,
+    ConstantIterator,
+    CountingIterator,
+    TransformIterator,
+)
+
+
+def test_constant_iterator_equality():
+    it1 = ConstantIterator(np.int32(0))
+    it2 = ConstantIterator(np.int32(0))
+    it3 = ConstantIterator(np.int32(1))
+    it4 = ConstantIterator(np.int64(9))
+
+    assert it1.kind == it2.kind == it3.kind
+    assert it1.kind != it4.kind
+
+
+def test_counting_iterator_equality():
+    it1 = CountingIterator(np.int32(0))
+    it2 = CountingIterator(np.int32(0))
+    it3 = CountingIterator(np.int32(1))
+    it4 = CountingIterator(np.int64(9))
+
+    assert it1.kind == it2.kind == it3.kind
+    assert it1.kind != it4.kind
+
+
+def test_cache_modified_input_iterator_equality():
+    ary1 = cp.asarray([0, 1, 2], dtype="int32")
+    ary2 = cp.asarray([3, 4, 5], dtype="int32")
+    ary3 = cp.asarray([0, 1, 2], dtype="int64")
+
+    it1 = CacheModifiedInputIterator(ary1, "stream")
+    it2 = CacheModifiedInputIterator(ary1, "stream")
+    it3 = CacheModifiedInputIterator(ary2, "stream")
+    it4 = CacheModifiedInputIterator(ary3, "stream")
+
+    assert it1.kind == it2.kind == it3.kind
+    assert it1.kind != it4.kind
+
+
+def test_equality_transform_iterator():
+    def op1(x):
+        return x
+
+    def op2(x):
+        return 2 * x
+
+    def op3(x):
+        return x
+
+    it = CountingIterator(np.int32(0))
+    it1 = TransformIterator(it, op1)
+    it2 = TransformIterator(it, op1)
+    it3 = TransformIterator(it, op3)
+
+    assert it1.kind == it2.kind == it3.kind
+
+    ary1 = cp.asarray([0, 1, 2])
+    ary2 = cp.asarray([3, 4, 5])
+    it4 = TransformIterator(ary1, op1)
+    it5 = TransformIterator(ary1, op1)
+    it6 = TransformIterator(ary1, op2)
+    it7 = TransformIterator(ary1, op3)
+    it8 = TransformIterator(ary2, op1)
+
+    assert it4.kind == it5.kind == it7.kind == it8.kind
+    assert it4.kind != it6.kind
diff --git a/python/cuda_parallel/tests/test_reduce.py b/python/cuda_parallel/tests/test_reduce.py
index 0f454e3603b..9549ef7bee3 100644
--- a/python/cuda_parallel/tests/test_reduce.py
+++ b/python/cuda_parallel/tests/test_reduce.py
@@ -2,43 +2,42 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import cupy as cp
-import numpy
-import pytest
 import random
+
+import cupy as cp
 import numba.cuda
 import numba.types
+import numpy as np
+import pytest
 
 import cuda.parallel.experimental.algorithms as algorithms
 import cuda.parallel.experimental.iterators as iterators
 
 
 def random_int(shape, dtype):
-    return numpy.random.randint(0, 5, size=shape).astype(dtype)
+    return np.random.randint(0, 5, size=shape).astype(dtype)
 
 
 def type_to_problem_sizes(dtype):
-    if dtype in [numpy.uint8, numpy.int8]:
+    if dtype in [np.uint8, np.int8]:
         return [2, 4, 5, 6]
-    elif dtype in [numpy.uint16, numpy.int16]:
+    elif dtype in [np.uint16, np.int16]:
         return [4, 8, 12, 14]
-    elif dtype in [numpy.uint32, numpy.int32]:
+    elif dtype in [np.uint32, np.int32]:
         return [16, 20, 24, 28]
-    elif dtype in [numpy.uint64, numpy.int64]:
+    elif dtype in [np.uint64, np.int64]:
         return [16, 20, 24, 28]
     else:
         raise ValueError("Unsupported dtype")
 
 
-@pytest.mark.parametrize(
-    "dtype", [numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64]
-)
+@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64])
 def test_device_reduce(dtype):
     def op(a, b):
         return a + b
 
     init_value = 42
-    h_init = numpy.array([init_value], dtype=dtype)
+    h_init = np.array([init_value], dtype=dtype)
     d_output = numba.cuda.device_array(1, dtype=dtype)
     reduce_into = algorithms.reduce_into(d_output, d_output, op, h_init)
 
@@ -47,7 +46,7 @@ def op(a, b):
         h_input = random_int(num_items, dtype)
         d_input = numba.cuda.to_device(h_input)
         temp_storage_size = reduce_into(None, d_input, d_output, None, h_init)
-        d_temp_storage = numba.cuda.device_array(temp_storage_size, dtype=numpy.uint8)
+        d_temp_storage = numba.cuda.device_array(temp_storage_size, dtype=np.uint8)
         reduce_into(d_temp_storage, d_input, d_output, None, h_init)
         h_output = d_output.copy_to_host()
         assert h_output[0] == sum(h_input) + init_value
@@ -57,19 +56,19 @@ def test_complex_device_reduce():
     def op(a, b):
         return a + b
 
-    h_init = numpy.array([40.0 + 2.0j], dtype=complex)
+    h_init = np.array([40.0 + 2.0j], dtype=complex)
     d_output = numba.cuda.device_array(1, dtype=complex)
     reduce_into = algorithms.reduce_into(d_output, d_output, op, h_init)
 
     for num_items in [42, 420000]:
-        h_input = numpy.random.random(num_items) + 1j * numpy.random.random(num_items)
+        h_input = np.random.random(num_items) + 1j * np.random.random(num_items)
         d_input = numba.cuda.to_device(h_input)
         temp_storage_bytes = reduce_into(None, d_input, d_output, None, h_init)
-        d_temp_storage = numba.cuda.device_array(temp_storage_bytes, numpy.uint8)
+        d_temp_storage = numba.cuda.device_array(temp_storage_bytes, np.uint8)
         reduce_into(d_temp_storage, d_input, d_output, None, h_init)
 
         result = d_output.copy_to_host()[0]
-        expected = numpy.sum(h_input, initial=h_init[0])
+        expected = np.sum(h_input, initial=h_init[0])
         assert result == pytest.approx(expected)
 
 
@@ -77,9 +76,9 @@ def test_device_reduce_dtype_mismatch():
     def min_op(a, b):
         return a if a < b else b
 
-    dtypes = [numpy.int32, numpy.int64]
-    h_inits = [numpy.array([], dt) for dt in dtypes]
-    h_inputs = [numpy.array([], dt) for dt in dtypes]
+    dtypes = [np.int32, np.int64]
+    h_inits = [np.array([], dt) for dt in dtypes]
+    h_inputs = [np.array([], dt) for dt in dtypes]
     d_outputs = [numba.cuda.device_array(1, dt) for dt in dtypes]
     d_inputs = [numba.cuda.to_device(h_inp) for h_inp in h_inputs]
 
@@ -109,14 +108,14 @@ def add_op(a, b):
         expected_result = add_op(expected_result, v)
 
     if use_numpy_array:
-        h_input = numpy.array(l_varr, dtype_inp)
+        h_input = np.array(l_varr, dtype_inp)
         d_input = numba.cuda.to_device(h_input)
     else:
         d_input = i_input
 
     d_output = numba.cuda.device_array(1, dtype_out)  # to store device sum
 
-    h_init = numpy.array([start_sum_with], dtype_out)
+    h_init = np.array([start_sum_with], dtype_out)
 
     reduce_into = algorithms.reduce_into(
         d_in=d_input, d_out=d_output, op=add_op, h_init=h_init
@@ -125,7 +124,7 @@ def add_op(a, b):
     temp_storage_size = reduce_into(
         None, d_in=d_input, d_out=d_output, num_items=len(l_varr), h_init=h_init
     )
-    d_temp_storage = numba.cuda.device_array(temp_storage_size, dtype=numpy.uint8)
+    d_temp_storage = numba.cuda.device_array(temp_storage_size, dtype=np.uint8)
 
     reduce_into(d_temp_storage, d_input, d_output, len(l_varr), h_init)
 
@@ -168,9 +167,9 @@ def test_device_sum_cache_modified_input_it(
 ):
     rng = random.Random(0)
     l_varr = [rng.randrange(100) for _ in range(num_items)]
-    dtype_inp = numpy.dtype(supported_value_type)
+    dtype_inp = np.dtype(supported_value_type)
     dtype_out = dtype_inp
-    input_devarr = numba.cuda.to_device(numpy.array(l_varr, dtype=dtype_inp))
+    input_devarr = numba.cuda.to_device(np.array(l_varr, dtype=dtype_inp))
     i_input = iterators.CacheModifiedInputIterator(input_devarr, modifier="stream")
     _test_device_sum_with_iterator(
         l_varr, start_sum_with, i_input, dtype_inp, dtype_out, use_numpy_array
@@ -181,7 +180,7 @@ def test_device_sum_constant_it(
     use_numpy_array, supported_value_type, num_items=3, start_sum_with=10
 ):
     l_varr = [42 for distance in range(num_items)]
-    dtype_inp = numpy.dtype(supported_value_type)
+    dtype_inp = np.dtype(supported_value_type)
     dtype_out = dtype_inp
     i_input = iterators.ConstantIterator(dtype_inp.type(42))
     _test_device_sum_with_iterator(
@@ -193,7 +192,7 @@ def test_device_sum_counting_it(
     use_numpy_array, supported_value_type, num_items=3, start_sum_with=10
 ):
     l_varr = [start_sum_with + distance for distance in range(num_items)]
-    dtype_inp = numpy.dtype(supported_value_type)
+    dtype_inp = np.dtype(supported_value_type)
     dtype_out = dtype_inp
     i_input = iterators.CountingIterator(dtype_inp.type(start_sum_with))
     _test_device_sum_with_iterator(
@@ -217,8 +216,8 @@ def test_device_sum_map_mul2_count_it(
 ):
     l_varr = [2 * (start_sum_with + distance) for distance in range(num_items)]
     vtn_out, vtn_inp = value_type_name_pair
-    dtype_inp = numpy.dtype(vtn_inp)
-    dtype_out = numpy.dtype(vtn_out)
+    dtype_inp = np.dtype(vtn_inp)
+    dtype_out = np.dtype(vtn_out)
     i_input = iterators.TransformIterator(
         iterators.CountingIterator(dtype_inp.type(start_sum_with)), mul2
     )
@@ -248,8 +247,8 @@ def test_device_sum_map_mul_map_mul_count_it(
         fac_out * (fac_mid * (start_sum_with + distance))
         for distance in range(num_items)
     ]
-    dtype_inp = numpy.dtype(vtn_inp)
-    dtype_out = numpy.dtype(vtn_out)
+    dtype_inp = np.dtype(vtn_inp)
+    dtype_out = np.dtype(vtn_out)
     mul_funcs = {2: mul2, 3: mul3}
     i_input = iterators.TransformIterator(
         iterators.TransformIterator(
@@ -275,8 +274,8 @@ def test_device_sum_map_mul2_cp_array_it(
     use_numpy_array, value_type_name_pair, num_items=3, start_sum_with=10
 ):
     vtn_out, vtn_inp = value_type_name_pair
-    dtype_inp = numpy.dtype(vtn_inp)
-    dtype_out = numpy.dtype(vtn_out)
+    dtype_inp = np.dtype(vtn_inp)
+    dtype_out = np.dtype(vtn_out)
     rng = random.Random(0)
     l_d_in = [rng.randrange(100) for _ in range(num_items)]
     a_d_in = cp.array(l_d_in, dtype_inp)
@@ -285,3 +284,269 @@ def test_device_sum_map_mul2_cp_array_it(
     _test_device_sum_with_iterator(
         l_varr, start_sum_with, i_input, dtype_inp, dtype_out, use_numpy_array
     )
+
+
+def test_reducer_caching():
+    def sum_op(x, y):
+        return x + y
+
+    # inputs are device arrays
+    reducer_1 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int64"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int64"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is reducer_2
+
+    # inputs are device arrays of different dtype:
+    reducer_1 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int64"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int32"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is not reducer_2
+
+    # outputs are of different dtype:
+    reducer_1 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int64"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int64"),
+        cp.zeros(1, dtype="int32"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is not reducer_2
+
+    # inputs are of same dtype but different size
+    # (should still use cached reducer):
+    reducer_1 = algorithms.reduce_into(
+        cp.zeros(3, dtype="int64"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        cp.zeros(5, dtype="int64"),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is reducer_2
+
+    # inputs are counting iterators of the
+    # same value type:
+    reducer_1 = algorithms.reduce_into(
+        iterators.CountingIterator(np.int32(0)),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.CountingIterator(np.int32(0)),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is reducer_2
+
+    # inputs are counting iterators of different value type:
+    reducer_1 = algorithms.reduce_into(
+        iterators.CountingIterator(np.int32(0)),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.CountingIterator(np.int64(0)),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is not reducer_2
+
+    def op1(x):
+        return x
+
+    def op2(x):
+        return 2 * x
+
+    def op3(x):
+        return x
+
+    # inputs are TransformIterators
+    reducer_1 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is reducer_2
+
+    # inputs are TransformIterators with different
+    # op:
+    reducer_1 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op2),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is not reducer_2
+
+    # inputs are TransformIterators with same op
+    # but different name:
+    reducer_1 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op3),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+
+    # inputs are CountingIterators of same kind
+    # but different state:
+    reducer_1 = algorithms.reduce_into(
+        iterators.CountingIterator(np.int32(0)),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.CountingIterator(np.int32(1)),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+
+    assert reducer_1 is reducer_2
+
+    # inputs are TransformIterators of same kind
+    # but different state:
+    ary1 = cp.asarray([0, 1, 2], dtype="int64")
+    ary2 = cp.asarray([0, 1], dtype="int64")
+    reducer_1 = algorithms.reduce_into(
+        iterators.TransformIterator(ary1, op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.TransformIterator(ary2, op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is reducer_2
+
+    # inputs are TransformIterators of same kind
+    # but different state:
+    reducer_1 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(1)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is reducer_2
+
+    # inputs are TransformIterators with different kind:
+    reducer_1 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int32(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    reducer_2 = algorithms.reduce_into(
+        iterators.TransformIterator(iterators.CountingIterator(np.int64(0)), op1),
+        cp.zeros(1, dtype="int64"),
+        sum_op,
+        np.zeros([0], dtype="int64"),
+    )
+    assert reducer_1 is not reducer_2
+
+
+@pytest.fixture(params=[True, False])
+def array_2d(request):
+    f_contiguous = request.param
+    arr = cp.random.rand(5, 10)
+    if f_contiguous:
+        try:
+            return cp.asfortranarray(arr)
+        except ImportError:  # cublas unavailable
+            return arr
+    else:
+        return arr
+
+
+def test_reduce_2d_array(array_2d):
+    def binary_op(x, y):
+        return x + y
+
+    d_out = cp.empty(1, dtype=array_2d.dtype)
+    h_init = np.asarray([0], dtype=array_2d.dtype)
+    d_in = array_2d
+    reduce_into = algorithms.reduce_into(
+        d_in=d_in, d_out=d_out, op=binary_op, h_init=h_init
+    )
+    temp_storage_size = reduce_into(
+        None, d_in=d_in, d_out=d_out, num_items=d_in.size, h_init=h_init
+    )
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+    reduce_into(d_temp_storage, d_in, d_out, d_in.size, h_init)
+    np.testing.assert_allclose(d_in.sum().get(), d_out.get())
+
+
+def test_reduce_non_contiguous():
+    def binary_op(x, y):
+        return x + y
+
+    size = 10
+    d_out = cp.empty(1, dtype="int64")
+    h_init = np.asarray([0], dtype="int64")
+
+    d_in = cp.zeros((size, 2))[:, 0]
+    with pytest.raises(ValueError, match="Non-contiguous arrays are not supported."):
+        _ = algorithms.reduce_into(d_in, d_out, binary_op, h_init)
+
+    d_in = cp.zeros(size)[::2]
+    with pytest.raises(ValueError, match="Non-contiguous arrays are not supported."):
+        _ = algorithms.reduce_into(d_in, d_out, binary_op, h_init)
diff --git a/python/cuda_parallel/tests/test_reduce_api.py b/python/cuda_parallel/tests/test_reduce_api.py
index d76d073ab8d..c8c20f51cd7 100644
--- a/python/cuda_parallel/tests/test_reduce_api.py
+++ b/python/cuda_parallel/tests/test_reduce_api.py
@@ -2,15 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# example-begin imports
-import cupy as cp
-import numpy as np
-import cuda.parallel.experimental.algorithms as algorithms
-# example-end imports
-
 
 def test_device_reduce():
     # example-begin reduce-min
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+
     def min_op(a, b):
         return a if a < b else b
 
@@ -35,3 +34,147 @@ def min_op(a, b):
     expected_output = 0
     assert (d_output == expected_output).all()
     # example-end reduce-min
+
+
+def test_cache_modified_input_iterator():
+    # example-begin cache-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    values = [8, 6, 7, 5, 3, 0, 9]
+    d_input = cp.array(values, dtype=np.int32)
+    d_output = cp.empty(1, dtype=np.int32)
+
+    iterator = iterators.CacheModifiedInputIterator(
+        d_input, modifier="stream"
+    )  # Input sequence
+    h_init = np.array([0], dtype=np.int32)  # Initial value for the reduction
+    d_output = cp.empty(1, dtype=np.int32)  # Storage for output
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(iterator, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, iterator, d_output, len(values), h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, iterator, d_output, len(values), h_init)
+
+    expected_output = functools.reduce(lambda a, b: a + b, values)
+    assert (d_output == expected_output).all()
+    # example-end cache-iterator
+
+
+def test_constant_iterator():
+    # example-begin constant-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    value = 10
+    num_items = 3
+
+    constant_it = iterators.ConstantIterator(np.int32(value))  # Input sequence
+    h_init = np.array([0], dtype=np.int32)  # Initial value for the reduction
+    d_output = cp.empty(1, dtype=np.int32)  # Storage for output
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(constant_it, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, constant_it, d_output, num_items, h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, constant_it, d_output, num_items, h_init)
+
+    expected_output = functools.reduce(lambda a, b: a + b, [value] * num_items)
+    assert (d_output == expected_output).all()
+    # example-end constant-iterator
+
+
+def test_counting_iterator():
+    # example-begin counting-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    first_item = 10
+    num_items = 3
+
+    first_it = iterators.CountingIterator(np.int32(first_item))  # Input sequence
+    h_init = np.array([0], dtype=np.int32)  # Initial value for the reduction
+    d_output = cp.empty(1, dtype=np.int32)  # Storage for output
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(first_it, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, first_it, d_output, num_items, h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, first_it, d_output, num_items, h_init)
+
+    expected_output = functools.reduce(
+        lambda a, b: a + b, range(first_item, first_item + num_items)
+    )
+    assert (d_output == expected_output).all()
+    # example-end counting-iterator
+
+
+def test_transform_iterator():
+    # example-begin transform-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    def square_op(a):
+        return a**2
+
+    first_item = 10
+    num_items = 3
+
+    transform_it = iterators.TransformIterator(
+        iterators.CountingIterator(np.int32(first_item)), square_op
+    )  # Input sequence
+    h_init = np.array([0], dtype=np.int32)  # Initial value for the reduction
+    d_output = cp.empty(1, dtype=np.int32)  # Storage for output
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(transform_it, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, transform_it, d_output, num_items, h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, transform_it, d_output, num_items, h_init)
+
+    expected_output = functools.reduce(
+        lambda a, b: a + b, [a**2 for a in range(first_item, first_item + num_items)]
+    )
+    assert (d_output == expected_output).all()
+    # example-end transform-iterator
diff --git a/thrust/cmake/ThrustHeaderTesting.cmake b/thrust/cmake/ThrustHeaderTesting.cmake
index 9a39c93e26a..7321e0db7a5 100644
--- a/thrust/cmake/ThrustHeaderTesting.cmake
+++ b/thrust/cmake/ThrustHeaderTesting.cmake
@@ -121,12 +121,6 @@ function(thrust_add_header_test thrust_target label definitions)
     HEADERS ${headers}
   )
   target_link_libraries(${headertest_target} PUBLIC ${thrust_target})
-  target_compile_definitions(${headertest_target} PRIVATE
-    ${header_definitions}
-    "THRUST_CPP11_REQUIRED_NO_ERROR"
-    "THRUST_CPP14_REQUIRED_NO_ERROR"
-    "THRUST_MODERN_GCC_REQUIRED_NO_ERROR"
-  )
   thrust_clone_target_properties(${headertest_target} ${thrust_target})
 
   if ("CUDA" STREQUAL "${config_device}")
diff --git a/thrust/examples/arbitrary_transformation.cu b/thrust/examples/arbitrary_transformation.cu
index ead08af9bb9..369794c6192 100644
--- a/thrust/examples/arbitrary_transformation.cu
+++ b/thrust/examples/arbitrary_transformation.cu
@@ -3,13 +3,10 @@
 #include <thrust/device_vector.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/zip_iterator.h>
+#include <thrust/zip_function.h>
 
 #include <iostream>
 
-#if !defined(THRUST_LEGACY_GCC)
-#  include <thrust/zip_function.h>
-#endif // >= C++11
-
 #include "include/host_device.h"
 
 // This example shows how to implement an arbitrary transformation of
@@ -54,7 +51,6 @@ struct arbitrary_functor1
   }
 };
 
-#if !defined(THRUST_LEGACY_GCC)
 struct arbitrary_functor2
 {
   __host__ __device__ void operator()(const float& a, const float& b, const float& c, float& d)
@@ -63,7 +59,6 @@ struct arbitrary_functor2
     d = a + b * c;
   }
 };
-#endif // >= C++11
 
 int main()
 {
@@ -95,7 +90,6 @@ int main()
   }
 
   // apply the transformation using zip_function
-#if !defined(THRUST_LEGACY_GCC)
   thrust::device_vector<float> D2(5);
   thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(A.begin(), B.begin(), C.begin(), D2.begin())),
                    thrust::make_zip_iterator(thrust::make_tuple(A.end(), B.end(), C.end(), D2.end())),
@@ -107,5 +101,4 @@ int main()
   {
     std::cout << A[i] << " + " << B[i] << " * " << C[i] << " = " << D2[i] << std::endl;
   }
-#endif // >= C++11
 }
diff --git a/thrust/scripts/gdb-pretty-printers.py b/thrust/scripts/gdb-pretty-printers.py
index eb4b66f914e..f1c8a262bb9 100644
--- a/thrust/scripts/gdb-pretty-printers.py
+++ b/thrust/scripts/gdb-pretty-printers.py
@@ -1,6 +1,7 @@
-import gdb
 import sys
 
+import gdb
+
 if sys.version_info[0] > 2:
     Iterator = object
 else:
diff --git a/thrust/testing/async_copy.cu b/thrust/testing/async_copy.cu
index cf67652ac06..cd18d83a782 100644
--- a/thrust/testing/async_copy.cu
+++ b/thrust/testing/async_copy.cu
@@ -207,11 +207,6 @@ struct test_async_copy_counting_iterator_input_to_host_vector
       f0.wait();
 
       ASSERT_EQUAL(d0, d1);
-
-#  if _CCCL_COMPILER(ICC)
-      // ICC fails this for some unknown reason - see #1468.
-      KNOWN_FAILURE;
-#  endif // _CCCL_COMPILER(ICC)
     }
   };
 };
diff --git a/thrust/testing/async_transform.cu b/thrust/testing/async_transform.cu
index 0f94f9d88f3..bfb30006ff2 100644
--- a/thrust/testing/async_transform.cu
+++ b/thrust/testing/async_transform.cu
@@ -128,10 +128,8 @@ DEFINE_SYNC_TRANSFORM_UNARY_INVOKER(
 ///////////////////////////////////////////////////////////////////////////////
 
 template <template <typename> class AsyncTransformUnaryInvoker,
-          template <typename>
-          class SyncTransformUnaryInvoker,
-          template <typename>
-          class UnaryOperation>
+          template <typename> class SyncTransformUnaryInvoker,
+          template <typename> class UnaryOperation>
 struct test_async_transform_unary
 {
   template <typename T>
@@ -222,10 +220,8 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
 ///////////////////////////////////////////////////////////////////////////////
 
 template <template <typename> class AsyncTransformUnaryInvoker,
-          template <typename>
-          class SyncTransformUnaryInvoker,
-          template <typename>
-          class UnaryOperation>
+          template <typename> class SyncTransformUnaryInvoker,
+          template <typename> class UnaryOperation>
 struct test_async_transform_unary_inplace
 {
   template <typename T>
@@ -305,10 +301,8 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
 ///////////////////////////////////////////////////////////////////////////////
 
 template <template <typename> class AsyncTransformUnaryInvoker,
-          template <typename>
-          class SyncTransformUnaryInvoker,
-          template <typename>
-          class UnaryOperation>
+          template <typename> class SyncTransformUnaryInvoker,
+          template <typename> class UnaryOperation>
 struct test_async_transform_unary_counting_iterator
 {
   template <typename T>
diff --git a/thrust/testing/cmake/check_source_files.cmake b/thrust/testing/cmake/check_source_files.cmake
index 900300c6730..866f5e7db8c 100644
--- a/thrust/testing/cmake/check_source_files.cmake
+++ b/thrust/testing/cmake/check_source_files.cmake
@@ -84,24 +84,6 @@ if (NOT valid_count EQUAL 5)
     "Matched ${valid_count} times, expected 5.")
 endif()
 
-################################################################################
-# Legacy macro checks.
-# Check all files in Thrust to make sure that they aren't using the legacy
-# CUB_RUNTIME_ENABLED and __THRUST_HAS_CUDART__ macros.
-#
-# These macros depend on __CUDA_ARCH__ and are not compatible with NV_IF_TARGET.
-# They are provided for legacy purposes and should be replaced with
-# [THRUST|CUB]_RDC_ENABLED and NV_IF_TARGET in Thrust/CUB code.
-#
-#
-set(legacy_macro_header_exclusions
-  # This header defines a legacy CUDART macro:
-  thrust/system/cuda/config.h
-)
-
-set(cub_legacy_macro_regex "CUB_RUNTIME_ENABLED")
-set(thrust_legacy_macro_regex "__THRUST_HAS_CUDART__")
-
 ################################################################################
 # Read source files:
 foreach(src ${thrust_srcs})
@@ -163,21 +145,6 @@ foreach(src ${thrust_srcs})
       set(found_errors 1)
     endif()
   endif()
-
-  if (NOT ${src} IN_LIST legacy_macro_header_exclusions)
-    count_substrings("${src_contents}" "${thrust_legacy_macro_regex}" thrust_count)
-    count_substrings("${src_contents}" "${cub_legacy_macro_regex}" cub_count)
-
-    if (NOT thrust_count EQUAL 0)
-      message("'${src}' uses __THRUST_HAS_CUDART__. Replace with THRUST_RDC_ENABLED and NV_IF_TARGET.")
-      set(found_errors 1)
-    endif()
-
-    if (NOT cub_count EQUAL 0)
-      message("'${src}' uses CUB_RUNTIME_ENABLED. Replace with CUB_RDC_ENABLED and NV_IF_TARGET.")
-      set(found_errors 1)
-    endif()
-  endif()
 endforeach()
 
 if (NOT found_errors EQUAL 0)
diff --git a/thrust/testing/cuda/transform.cu b/thrust/testing/cuda/transform.cu
index 2e474ccfb5a..594194cd183 100644
--- a/thrust/testing/cuda/transform.cu
+++ b/thrust/testing/cuda/transform.cu
@@ -347,8 +347,8 @@ DECLARE_UNITTEST(TestTransformBinaryCudaStreams);
 
 struct sum_five
 {
-  _CCCL_HOST_DEVICE auto
-  operator()(std::int8_t a, std::int16_t b, std::int32_t c, std::int64_t d, float e) const -> double
+  _CCCL_HOST_DEVICE auto operator()(std::int8_t a, std::int16_t b, std::int32_t c, std::int64_t d, float e) const
+    -> double
   {
     return a + b + c + d + e;
   }
@@ -373,8 +373,8 @@ public:
   }
 
   template <typename Tuple>
-  _CCCL_HOST_DEVICE auto
-  operator()(Tuple&& t) const -> decltype(detail::zip_detail::apply(std::declval<sum_five>(), THRUST_FWD(t)))
+  _CCCL_HOST_DEVICE auto operator()(Tuple&& t) const
+    -> decltype(detail::zip_detail::apply(std::declval<sum_five>(), THRUST_FWD(t)))
   {
     // not calling func, so we would get a wrong result if we were called
     return {};
diff --git a/thrust/testing/functional.cu b/thrust/testing/functional.cu
index 7757ed47bed..1818084fb34 100644
--- a/thrust/testing/functional.cu
+++ b/thrust/testing/functional.cu
@@ -211,8 +211,8 @@ THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestIdentityFunctional()
 
   // value categories when casting to different type
   static_assert(::cuda::std::is_same<decltype(thrust::identity<int>{}(3.14)), int&&>::value, "");
-  // unfortunately, old versions of MSVC pick the `const int&` overload instead of `int&&`
-#if _CCCL_COMPILER(MSVC, >=, 19, 29)
+  // unfortunately, old versions of MSVC or nvcc in MSVC mode pick the `const int&` overload instead of `int&&`
+#if !_CCCL_COMPILER(MSVC, <, 19, 29) && !(_CCCL_COMPILER(MSVC) && _CCCL_CUDA_COMPILER(NVCC, <, 12, 1))
   static_assert(::cuda::std::is_same<decltype(thrust::identity<int>{}(d)), int&&>::value, "");
   static_assert(::cuda::std::is_same<decltype(thrust::identity<int>{}(as_const(d))), int&&>::value, "");
 #endif
diff --git a/thrust/testing/scan.cu b/thrust/testing/scan.cu
index 6a98cb99e61..c30eee013b6 100644
--- a/thrust/testing/scan.cu
+++ b/thrust/testing/scan.cu
@@ -25,18 +25,6 @@ template <class Vector>
 void TestScanSimple()
 {
   using T = typename Vector::value_type;
-
-  // icc miscompiles the intermediate sum updates for custom_numeric.
-  // The issue doesn't happen with opts disabled, or on other compilers.
-  // Printing the intermediate sum each iteration "fixes" the issue,
-  // so likely a bad optimization.
-#if _CCCL_COMPILER(ICC)
-  if (std::is_same<T, custom_numeric>::value)
-  {
-    return;
-  }
-#endif
-
   typename Vector::iterator iter;
 
   Vector input(5);
diff --git a/thrust/testing/unittest/testframework.h b/thrust/testing/unittest/testframework.h
index f3f9c942b35..cd5799e0ca0 100644
--- a/thrust/testing/unittest/testframework.h
+++ b/thrust/testing/unittest/testframework.h
@@ -571,10 +571,8 @@ class VariableUnitTest : public UnitTest
 
 template <template <typename> class TestName,
           typename TypeList,
-          template <typename, typename>
-          class Vector,
-          template <typename>
-          class Alloc>
+          template <typename, typename> class Vector,
+          template <typename> class Alloc>
 struct VectorUnitTest : public UnitTest
 {
   VectorUnitTest()
diff --git a/thrust/testing/unittest/util_async.h b/thrust/testing/unittest/util_async.h
index 4e129ff1a82..15293598b12 100644
--- a/thrust/testing/unittest/util_async.h
+++ b/thrust/testing/unittest/util_async.h
@@ -2,8 +2,6 @@
 
 #include <thrust/detail/config.h>
 
-#include <thrust/detail/cpp14_required.h>
-
 #if _CCCL_STD_VER >= 2014
 
 #  include <thrust/future.h>
diff --git a/thrust/testing/zip_function.cu b/thrust/testing/zip_function.cu
index 2a7117a05b1..279960592b5 100644
--- a/thrust/testing/zip_function.cu
+++ b/thrust/testing/zip_function.cu
@@ -1,17 +1,15 @@
 #include <thrust/detail/config.h>
 
-#if !defined(THRUST_LEGACY_GCC)
+#include <thrust/device_vector.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/remove.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/zip_function.h>
 
-#  include <thrust/device_vector.h>
-#  include <thrust/iterator/zip_iterator.h>
-#  include <thrust/remove.h>
-#  include <thrust/sort.h>
-#  include <thrust/transform.h>
-#  include <thrust/zip_function.h>
+#include <iostream>
 
-#  include <iostream>
-
-#  include <unittest/unittest.h>
+#include <unittest/unittest.h>
 
 using namespace unittest;
 
@@ -35,9 +33,9 @@ struct TestZipFunctionCtor
   {
     ASSERT_EQUAL(thrust::zip_function<SumThree>()(thrust::make_tuple(1, 2, 3)), SumThree{}(1, 2, 3));
     ASSERT_EQUAL(thrust::zip_function<SumThree>(SumThree{})(thrust::make_tuple(1, 2, 3)), SumThree{}(1, 2, 3));
-#  ifdef __cpp_deduction_guides
+#ifdef __cpp_deduction_guides
     ASSERT_EQUAL(thrust::zip_function(SumThree{})(thrust::make_tuple(1, 2, 3)), SumThree{}(1, 2, 3));
-#  endif // __cpp_deduction_guides
+#endif // __cpp_deduction_guides
   }
 };
 SimpleUnitTest<TestZipFunctionCtor, type_list<int>> TestZipFunctionCtorInstance;
@@ -176,4 +174,3 @@ struct TestNestedZipFunction2
   }
 };
 SimpleUnitTest<TestNestedZipFunction2, type_list<int, float>> TestNestedZipFunctionInstance2;
-#endif // _CCCL_STD_VER
diff --git a/thrust/thrust/async/copy.h b/thrust/thrust/async/copy.h
index d52526444a0..b821f908a79 100644
--- a/thrust/thrust/async/copy.h
+++ b/thrust/thrust/async/copy.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/async/for_each.h b/thrust/thrust/async/for_each.h
index 17376343fed..9cdc9c1e048 100644
--- a/thrust/thrust/async/for_each.h
+++ b/thrust/thrust/async/for_each.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/async/reduce.h b/thrust/thrust/async/reduce.h
index d4cd7cd7c3f..18172bd692f 100644
--- a/thrust/thrust/async/reduce.h
+++ b/thrust/thrust/async/reduce.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/async/scan.h b/thrust/thrust/async/scan.h
index fcbb41ad43c..f58b37e8d3b 100644
--- a/thrust/thrust/async/scan.h
+++ b/thrust/thrust/async/scan.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/async/sort.h b/thrust/thrust/async/sort.h
index bd294048311..19304e38660 100644
--- a/thrust/thrust/async/sort.h
+++ b/thrust/thrust/async/sort.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/async/transform.h b/thrust/thrust/async/transform.h
index 901518c12e8..c54b328388e 100644
--- a/thrust/thrust/async/transform.h
+++ b/thrust/thrust/async/transform.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/complex.h b/thrust/thrust/complex.h
index 1cca771fa45..b46b88c6b09 100644
--- a/thrust/thrust/complex.h
+++ b/thrust/thrust/complex.h
@@ -341,19 +341,13 @@ struct complex
 private:
 #if _CCCL_CUDA_COMPILER(NVCC, <, 11, 7)
   struct __align__(sizeof(T) * 2) storage
-#elif _CCCL_COMPILER(ICC)
-  struct storage
-#else // !(_CCCL_COMPILER(ICC) || _CCCL_CUDA_COMPILER(NVCC, <, 11, 7))
+#else // _CCCL_CUDA_COMPILER(NVCC, <, 11, 7))
   struct alignas(sizeof(T) * 2) storage
-#endif // !(_CCCL_COMPILER(ICC) || _CCCL_CUDA_COMPILER(NVCC, <, 11, 7))
+#endif //  _CCCL_CUDA_COMPILER(NVCC, <, 11, 7))
   {
     T x;
     T y;
-  }
-#if _CCCL_COMPILER(ICC)
-  __attribute__((aligned(sizeof(T) * 2)))
-#endif // _CCCL_COMPILER(ICC)
-  ;
+  };
   storage data;
 };
 
diff --git a/thrust/thrust/detail/config/compiler.h b/thrust/thrust/detail/config/compiler.h
index 25d8ebfb29e..0a74cf3baea 100644
--- a/thrust/thrust/detail/config/compiler.h
+++ b/thrust/thrust/detail/config/compiler.h
@@ -30,87 +30,6 @@
 #  pragma system_header
 #endif // no system header
 
-// enumerate host compilers we know about
-//! deprecated [Since 2.7]
-#define THRUST_HOST_COMPILER_UNKNOWN 0
-//! deprecated [Since 2.7]
-#define THRUST_HOST_COMPILER_MSVC 1
-//! deprecated [Since 2.7]
-#define THRUST_HOST_COMPILER_GCC 2
-//! deprecated [Since 2.7]
-#define THRUST_HOST_COMPILER_CLANG 3
-//! deprecated [Since 2.7]
-#define THRUST_HOST_COMPILER_INTEL 4
-
-// enumerate device compilers we know about
-//! deprecated [Since 2.7]
-#define THRUST_DEVICE_COMPILER_UNKNOWN 0
-//! deprecated [Since 2.7]
-#define THRUST_DEVICE_COMPILER_MSVC 1
-//! deprecated [Since 2.7]
-#define THRUST_DEVICE_COMPILER_GCC 2
-//! deprecated [Since 2.7]
-#define THRUST_DEVICE_COMPILER_CLANG 3
-//! deprecated [Since 2.7]
-#define THRUST_DEVICE_COMPILER_NVCC 4
-
-// figure out which host compiler we're using
-#if _CCCL_COMPILER(MSVC)
-//! deprecated [Since 2.7]
-#  define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_MSVC
-//! deprecated [Since 2.7]
-#  define THRUST_MSVC_VERSION _MSC_VER
-//! deprecated [Since 2.7]
-#  define THRUST_MSVC_VERSION_FULL _MSC_FULL_VER
-#elif _CCCL_COMPILER(ICC)
-//! deprecated [Since 2.7]
-#  define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_INTEL
-#elif _CCCL_COMPILER(CLANG)
-//! deprecated [Since 2.7]
-#  define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_CLANG
-//! deprecated [Since 2.7]
-#  define THRUST_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
-#elif _CCCL_COMPILER(GCC)
-//! deprecated [Since 2.7]
-#  define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_GCC
-//! deprecated [Since 2.7]
-#  define THRUST_GCC_VERSION   (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#  if _CCCL_COMPILER(GCC, >=, 5)
-//! deprecated [Since 2.7]
-#    define THRUST_MODERN_GCC
-#  else
-//! deprecated [Since 2.7]
-#    define THRUST_LEGACY_GCC
-#  endif
-#else
-//! deprecated [Since 2.7]
-#  define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_UNKNOWN
-#endif // TRUST_HOST_COMPILER
-
-// figure out which device compiler we're using
-#if defined(__CUDACC__) || defined(_NVHPC_CUDA)
-//! deprecated [Since 2.7]
-#  define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_NVCC
-#elif _CCCL_COMPILER(MSVC)
-//! deprecated [Since 2.7]
-#  define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_MSVC
-#elif _CCCL_COMPILER(GCC)
-//! deprecated [Since 2.7]
-#  define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_GCC
-#elif _CCCL_COMPILER(CLANG)
-// CUDA-capable clang should behave similar to NVCC.
-#  if defined(__CUDA__)
-//! deprecated [Since 2.7]
-#    define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_NVCC
-#  else
-//! deprecated [Since 2.7]
-#    define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_CLANG
-#  endif
-#else
-//! deprecated [Since 2.7]
-#  define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_UNKNOWN
-#endif
-
 // is the device compiler capable of compiling omp?
 #if defined(_OPENMP) || defined(_NVHPC_STDPAR_OPENMP)
 #  define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_TRUE
diff --git a/thrust/thrust/detail/config/compiler_fence.h b/thrust/thrust/detail/config/compiler_fence.h
deleted file mode 100644
index 4b93b682c99..00000000000
--- a/thrust/thrust/detail/config/compiler_fence.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *  Copyright 2008-2013 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#include <thrust/detail/preprocessor.h>
-
-#if _CCCL_COMPILER(MSVC)
-#  pragma message( \
-    "warning: The functionality in this header is unsafe, deprecated, and will soon be removed. Use C++11 atomics instead.")
-#else
-#warning The functionality in this header is unsafe, deprecated, and will soon be removed. Use C++11 or C11 atomics instead.
-#endif
-
-// msvc case
-#if _CCCL_COMPILER(MSVC)
-
-#  ifndef _DEBUG
-
-#    include <intrin.h>
-#    pragma intrinsic(_ReadWriteBarrier)
-#    define __thrust_compiler_fence() _ReadWriteBarrier()
-#  else
-
-#    define __thrust_compiler_fence() \
-      do                              \
-      {                               \
-      } while (0)
-
-#  endif // _DEBUG
-
-// gcc case
-#elif _CCCL_COMPILER(GCC)
-
-#  if _CCCL_COMPILER(GCC, >=, 4, 2) // atomic built-ins were introduced ~4.2
-#    define __thrust_compiler_fence() __sync_synchronize()
-#  else
-// allow the code to compile without any guarantees
-#    define __thrust_compiler_fence() \
-      do                              \
-      {                               \
-      } while (0)
-#  endif // _CCCL_COMPILER(GCC, >=, 4, 2)
-
-// unknown case
-#elif _CCCL_COMPILER(CLANG)
-#  define __thrust_compiler_fence() __sync_synchronize()
-#else
-
-// allow the code to compile without any guarantees
-#  define __thrust_compiler_fence() \
-    do                              \
-    {                               \
-    } while (0)
-
-#endif
diff --git a/thrust/thrust/detail/config/cpp_compatibility.h b/thrust/thrust/detail/config/cpp_compatibility.h
index 6094319dae0..a45115688b4 100644
--- a/thrust/thrust/detail/config/cpp_compatibility.h
+++ b/thrust/thrust/detail/config/cpp_compatibility.h
@@ -52,30 +52,3 @@
 #  define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT static constexpr
 
 #endif
-
-// These definitions were intended for internal use only and are now obsolete.
-// If you relied on them, consider porting your code to use the functionality
-// in libcu++'s <nv/target> header.
-// For a temporary workaround, define THRUST_PROVIDE_LEGACY_ARCH_MACROS to make
-// them available again. These should be considered deprecated and will be
-// fully removed in a future version.
-#ifdef THRUST_PROVIDE_LEGACY_ARCH_MACROS
-#  ifndef THRUST_IS_DEVICE_CODE
-#    if defined(_NVHPC_CUDA)
-#      define THRUST_IS_DEVICE_CODE      __builtin_is_device_code()
-#      define THRUST_IS_HOST_CODE        (!__builtin_is_device_code())
-#      define THRUST_INCLUDE_DEVICE_CODE 1
-#      define THRUST_INCLUDE_HOST_CODE   1
-#    elif defined(__CUDA_ARCH__)
-#      define THRUST_IS_DEVICE_CODE      1
-#      define THRUST_IS_HOST_CODE        0
-#      define THRUST_INCLUDE_DEVICE_CODE 1
-#      define THRUST_INCLUDE_HOST_CODE   0
-#    else
-#      define THRUST_IS_DEVICE_CODE      0
-#      define THRUST_IS_HOST_CODE        1
-#      define THRUST_INCLUDE_DEVICE_CODE 0
-#      define THRUST_INCLUDE_HOST_CODE   1
-#    endif
-#  endif
-#endif // THRUST_PROVIDE_LEGACY_ARCH_MACROS
diff --git a/thrust/thrust/detail/config/cpp_dialect.h b/thrust/thrust/detail/config/cpp_dialect.h
index 0d93e1ae37a..62850f61465 100644
--- a/thrust/thrust/detail/config/cpp_dialect.h
+++ b/thrust/thrust/detail/config/cpp_dialect.h
@@ -34,17 +34,9 @@
 
 // Deprecation warnings may be silenced by defining the following macros. These
 // may be combined.
-// - CCCL_IGNORE_DEPRECATED_CPP_DIALECT:
-//   Ignore all deprecated C++ dialects and outdated compilers.
-// - CCCL_IGNORE_DEPRECATED_CPP_11:
-//   Ignore deprecation warnings when compiling with C++11. C++03 and outdated
-//   compilers will still issue warnings.
-// - CCCL_IGNORE_DEPRECATED_CPP_14:
-//   Ignore deprecation warnings when compiling with C++14. C++03 and outdated
-//   compilers will still issue warnings.
 // - CCCL_IGNORE_DEPRECATED_COMPILER
 //   Ignore deprecation warnings when using deprecated compilers. Compiling
-//   with C++03, C++11 and C++14 will still issue warnings.
+//   with deprecated C++ dialects will still issue warnings.
 
 #define THRUST_CPP_DIALECT _CCCL_STD_VER
 
@@ -55,6 +47,7 @@
 #  define THRUST_COMP_DEPR_IMPL(msg) _CCCL_PRAGMA(GCC warning #msg)
 #endif
 
+// Compiler checks:
 // clang-format off
 #define THRUST_COMPILER_DEPRECATION(REQ) \
   THRUST_COMP_DEPR_IMPL(Thrust requires at least REQ. Define CCCL_IGNORE_DEPRECATED_COMPILER to suppress this message.)
@@ -62,14 +55,12 @@
 #define THRUST_COMPILER_DEPRECATION_SOFT(REQ, CUR)                                                        \
   THRUST_COMP_DEPR_IMPL(                                                                                  \
     Thrust requires at least REQ. CUR is deprecated but still supported. CUR support will be removed in a \
-      future release. Define CCCL_IGNORE_DEPRECATED_CPP_DIALECT to suppress this message.)
+      future release. Define CCCL_IGNORE_DEPRECATED_COMPILER to suppress this message.)
 // clang-format on
 
 #ifndef CCCL_IGNORE_DEPRECATED_COMPILER
-
-// Compiler checks:
-#  if _CCCL_COMPILER(GCC, <, 5)
-THRUST_COMPILER_DEPRECATION(GCC 5.0);
+#  if _CCCL_COMPILER(GCC, <, 7)
+THRUST_COMPILER_DEPRECATION(GCC 7.0);
 #  elif _CCCL_COMPILER(CLANG, <, 7)
 THRUST_COMPILER_DEPRECATION(Clang 7.0);
 #  elif _CCCL_COMPILER(MSVC, <, 19, 10)
@@ -79,22 +70,17 @@ THRUST_COMPILER_DEPRECATION(MSVC 2019(19.20 / 16.0 / 14.20));
 // >=2017, <2019. Soft deprecation message:
 THRUST_COMPILER_DEPRECATION_SOFT(MSVC 2019(19.20 / 16.0 / 14.20), MSVC 2017);
 #  endif
-
 #endif // CCCL_IGNORE_DEPRECATED_COMPILER
 
-#if _CCCL_STD_VER < 2011
-// <C++11. Hard upgrade message:
-THRUST_COMPILER_DEPRECATION(C++ 17);
-#elif _CCCL_STD_VER == 2011 && !defined(CCCL_IGNORE_DEPRECATED_CPP_11)
-// =C++11. Soft upgrade message:
-THRUST_COMPILER_DEPRECATION_SOFT(C++ 17, C++ 11);
-#elif _CCCL_STD_VER == 2014 && !defined(CCCL_IGNORE_DEPRECATED_CPP_14)
-// =C++14. Soft upgrade message:
-THRUST_COMPILER_DEPRECATION_SOFT(C++ 17, C++ 14);
-#endif // _CCCL_STD_VER >= 2017
-
 #undef THRUST_COMPILER_DEPRECATION_SOFT
 #undef THRUST_COMPILER_DEPRECATION
+
+// C++17 dialect check:
+#ifndef CCCL_IGNORE_DEPRECATED_CPP_DIALECT
+#  if _CCCL_STD_VER < 2017
+THRUST_COMP_DEPR_IMPL(
+  Thrust requires at least C++ 17. Define CCCL_IGNORE_DEPRECATED_CPP_DIALECT to suppress this message.)
+#  endif // _CCCL_STD_VER >= 2017
+#endif
+
 #undef THRUST_COMP_DEPR_IMPL
-#undef THRUST_COMP_DEPR_IMPL0
-#undef THRUST_COMP_DEPR_IMPL1
diff --git a/thrust/thrust/detail/cpp14_required.h b/thrust/thrust/detail/cpp14_required.h
deleted file mode 100644
index 62894c1fc95..00000000000
--- a/thrust/thrust/detail/cpp14_required.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2018 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config/cpp_dialect.h>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#ifndef THRUST_CPP14_REQUIRED_NO_ERROR
-#  if _CCCL_STD_VER < 2014
-#    error C++14 is required for this Thrust feature; please upgrade your compiler or pass the appropriate -std=c++14 flag to it.
-#  endif
-#endif
diff --git a/thrust/thrust/detail/event_error.h b/thrust/thrust/detail/event_error.h
index 60f8bb9f92a..fb9e6c27b89 100644
--- a/thrust/thrust/detail/event_error.h
+++ b/thrust/thrust/detail/event_error.h
@@ -28,7 +28,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/detail/functional.inl b/thrust/thrust/detail/functional.inl
deleted file mode 100644
index 7ebd570ca02..00000000000
--- a/thrust/thrust/detail/functional.inl
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *  Copyright 2008-2021 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#include <thrust/functional.h>
-
-THRUST_NAMESPACE_BEGIN
-
-namespace detail
-{
-
-template <typename Operation>
-struct unary_traits_imp;
-
-template <typename Operation>
-struct unary_traits_imp<Operation*>
-{
-  using function_type = Operation;
-  using param_type    = const function_type&;
-  using result_type   = typename Operation::result_type;
-  using argument_type = typename Operation::argument_type;
-}; // end unary_traits_imp
-
-template <typename Result, typename Argument>
-struct unary_traits_imp<Result (*)(Argument)>
-{
-  using function_type = Result (*)(Argument);
-  using param_type    = Result (*)(Argument);
-  using result_type   = Result;
-  using argument_type = Argument;
-}; // end unary_traits_imp
-
-template <typename Operation>
-struct binary_traits_imp;
-
-template <typename Operation>
-struct binary_traits_imp<Operation*>
-{
-  using function_type        = Operation;
-  using param_type           = const function_type&;
-  using result_type          = typename Operation::result_type;
-  using first_argument_type  = typename Operation::first_argument_type;
-  using second_argument_type = typename Operation::second_argument_type;
-}; // end binary_traits_imp
-
-template <typename Result, typename Argument1, typename Argument2>
-struct binary_traits_imp<Result (*)(Argument1, Argument2)>
-{
-  using function_type        = Result (*)(Argument1, Argument2);
-  using param_type           = Result (*)(Argument1, Argument2);
-  using result_type          = Result;
-  using first_argument_type  = Argument1;
-  using second_argument_type = Argument2;
-}; // end binary_traits_imp
-
-} // namespace detail
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-template <typename Operation>
-struct unary_traits
-{
-  using function_type = typename detail::unary_traits_imp<Operation*>::function_type;
-  using param_type    = typename detail::unary_traits_imp<Operation*>::param_type;
-  using result_type   = typename detail::unary_traits_imp<Operation*>::result_type;
-  using argument_type = typename detail::unary_traits_imp<Operation*>::argument_type;
-}; // end unary_traits
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-template <typename Result, typename Argument>
-struct unary_traits<Result (*)(Argument)>
-{
-  using function_type = Result (*)(Argument);
-  using param_type    = Result (*)(Argument);
-  using result_type   = Result;
-  using argument_type = Argument;
-}; // end unary_traits
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-template <typename Operation>
-struct binary_traits
-{
-  using function_type        = typename detail::binary_traits_imp<Operation*>::function_type;
-  using param_type           = typename detail::binary_traits_imp<Operation*>::param_type;
-  using result_type          = typename detail::binary_traits_imp<Operation*>::result_type;
-  using first_argument_type  = typename detail::binary_traits_imp<Operation*>::first_argument_type;
-  using second_argument_type = typename detail::binary_traits_imp<Operation*>::second_argument_type;
-}; // end binary_traits
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-template <typename Result, typename Argument1, typename Argument2>
-struct binary_traits<Result (*)(Argument1, Argument2)>
-{
-  using function_type        = Result (*)(Argument1, Argument2);
-  using param_type           = Result (*)(Argument1, Argument2);
-  using result_type          = Result;
-  using first_argument_type  = Argument1;
-  using second_argument_type = Argument2;
-}; // end binary_traits
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-template <typename Predicate>
-_CCCL_HOST_DEVICE unary_negate<Predicate> not1(const Predicate& pred)
-{
-  return unary_negate<Predicate>(pred);
-} // end not1()
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-template <typename BinaryPredicate>
-_CCCL_HOST_DEVICE binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
-{
-  return binary_negate<BinaryPredicate>(pred);
-} // end not2()
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-THRUST_NAMESPACE_END
diff --git a/thrust/thrust/detail/functional/actor.h b/thrust/thrust/detail/functional/actor.h
index 2de51b62285..c727be09306 100644
--- a/thrust/thrust/detail/functional/actor.h
+++ b/thrust/thrust/detail/functional/actor.h
@@ -82,8 +82,8 @@ template <unsigned int Pos>
 struct argument
 {
   template <typename... Ts>
-  _CCCL_HOST_DEVICE auto
-  eval(Ts&&... args) const -> decltype(thrust::get<Pos>(thrust::tuple<Ts&&...>{THRUST_FWD(args)...}))
+  _CCCL_HOST_DEVICE auto eval(Ts&&... args) const
+    -> decltype(thrust::get<Pos>(thrust::tuple<Ts&&...>{THRUST_FWD(args)...}))
   {
     return thrust::get<Pos>(thrust::tuple<Ts&&...>{THRUST_FWD(args)...});
   }
diff --git a/thrust/thrust/detail/functional/operators.h b/thrust/thrust/detail/functional/operators.h
index f4f89f5f7d7..57c52b92520 100644
--- a/thrust/thrust/detail/functional/operators.h
+++ b/thrust/thrust/detail/functional/operators.h
@@ -263,8 +263,8 @@ struct unary_plus
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(+THRUST_FWD(t1))) -> decltype(+THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(+THRUST_FWD(t1)))
+    -> decltype(+THRUST_FWD(t1))
   {
     return +THRUST_FWD(t1);
   }
@@ -277,8 +277,8 @@ struct prefix_increment
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(++THRUST_FWD(t1))) -> decltype(++THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(++THRUST_FWD(t1)))
+    -> decltype(++THRUST_FWD(t1))
   {
     return ++THRUST_FWD(t1);
   }
@@ -291,8 +291,8 @@ struct postfix_increment
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(THRUST_FWD(t1)++)) -> decltype(THRUST_FWD(t1)++)
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(THRUST_FWD(t1)++))
+    -> decltype(THRUST_FWD(t1)++)
   {
     return THRUST_FWD(t1)++;
   }
@@ -305,8 +305,8 @@ struct prefix_decrement
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(--THRUST_FWD(t1))) -> decltype(--THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(--THRUST_FWD(t1)))
+    -> decltype(--THRUST_FWD(t1))
   {
     return --THRUST_FWD(t1);
   }
@@ -319,8 +319,8 @@ struct postfix_decrement
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(THRUST_FWD(t1)--)) -> decltype(THRUST_FWD(t1)--)
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(THRUST_FWD(t1)--))
+    -> decltype(THRUST_FWD(t1)--)
   {
     return THRUST_FWD(t1)--;
   }
@@ -333,8 +333,8 @@ struct bit_not
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(~THRUST_FWD(t1))) -> decltype(~THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(~THRUST_FWD(t1)))
+    -> decltype(~THRUST_FWD(t1))
   {
     return ~THRUST_FWD(t1);
   }
diff --git a/thrust/thrust/detail/modern_gcc_required.h b/thrust/thrust/detail/modern_gcc_required.h
deleted file mode 100644
index b639d1d933a..00000000000
--- a/thrust/thrust/detail/modern_gcc_required.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2018 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config/cpp_dialect.h>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#ifndef THRUST_MODERN_GCC_REQUIRED_NO_ERROR
-#  if _CCCL_COMPILER(GCC) && !defined(THRUST_MODERN_GCC)
-#    error GCC 5 or later is required for this Thrust feature; please upgrade your compiler.
-#  endif
-#endif
diff --git a/thrust/thrust/detail/preprocessor.h b/thrust/thrust/detail/preprocessor.h
index 0c191d3b460..1ca936dba3f 100644
--- a/thrust/thrust/detail/preprocessor.h
+++ b/thrust/thrust/detail/preprocessor.h
@@ -630,8 +630,7 @@
 /// \def THRUST_CURRENT_FUNCTION
 /// \brief The name of the current function as a string.
 ///
-#if defined(__GNUC__) || (defined(__MWERKS__) && (__MWERKS__ >= 0x3000)) || (defined(__ICC) && (__ICC >= 600)) \
-  || defined(__ghs__)
+#if defined(__GNUC__) || (defined(__MWERKS__) && (__MWERKS__ >= 0x3000)) || defined(__ghs__)
 #  define THRUST_CURRENT_FUNCTION __PRETTY_FUNCTION__
 #elif defined(__DMC__) && (__DMC__ >= 0x810)
 #  define THRUST_CURRENT_FUNCTION __PRETTY_FUNCTION__
diff --git a/thrust/thrust/detail/tuple_transform.h b/thrust/thrust/detail/tuple_transform.h
index 49ccdf11cdd..c7cdfcd5b2e 100644
--- a/thrust/thrust/detail/tuple_transform.h
+++ b/thrust/thrust/detail/tuple_transform.h
@@ -35,8 +35,7 @@ namespace detail
 {
 
 template <typename Tuple,
-          template <typename>
-          class UnaryMetaFunction,
+          template <typename> class UnaryMetaFunction,
           typename UnaryFunction,
           typename IndexSequence = thrust::make_index_sequence<thrust::tuple_size<Tuple>::value>>
 struct tuple_transform_functor;
diff --git a/thrust/thrust/detail/type_traits/pointer_traits.h b/thrust/thrust/detail/type_traits/pointer_traits.h
index 9ac54ee7860..7570aba2242 100644
--- a/thrust/thrust/detail/type_traits/pointer_traits.h
+++ b/thrust/thrust/detail/type_traits/pointer_traits.h
@@ -84,8 +84,7 @@ struct rebind_pointer<Ptr<OldT, Tail...>, T>
 template <template <typename, typename, typename, typename...> class Ptr,
           typename OldT,
           typename Tag,
-          template <typename...>
-          class Ref,
+          template <typename...> class Ref,
           typename... RefTail,
           typename... PtrTail,
           typename T>
@@ -100,11 +99,9 @@ struct rebind_pointer<Ptr<OldT, Tag, Ref<OldT, RefTail...>, PtrTail...>, T>
 template <template <typename, typename, typename, typename...> class Ptr,
           typename OldT,
           typename Tag,
-          template <typename...>
-          class Ref,
+          template <typename...> class Ref,
           typename... RefTail,
-          template <typename...>
-          class DerivedPtr,
+          template <typename...> class DerivedPtr,
           typename... DerivedPtrTail,
           typename T>
 struct rebind_pointer<Ptr<OldT, Tag, Ref<OldT, RefTail...>, DerivedPtr<OldT, DerivedPtrTail...>>, T>
@@ -130,8 +127,7 @@ struct rebind_pointer<Ptr<OldT, Tag, typename std::add_lvalue_reference<OldT>::t
 template <template <typename, typename, typename, typename...> class Ptr,
           typename OldT,
           typename Tag,
-          template <typename...>
-          class DerivedPtr,
+          template <typename...> class DerivedPtr,
           typename... DerivedPtrTail,
           typename T>
 struct rebind_pointer<Ptr<OldT, Tag, typename std::add_lvalue_reference<OldT>::type, DerivedPtr<OldT, DerivedPtrTail...>>,
diff --git a/thrust/thrust/functional.h b/thrust/thrust/functional.h
index a654a8b79b7..a72a3473840 100644
--- a/thrust/thrust/functional.h
+++ b/thrust/thrust/functional.h
@@ -39,112 +39,6 @@
 
 THRUST_NAMESPACE_BEGIN
 
-/*! \addtogroup function_objects Function Objects
- */
-
-//! deprecated [Since 2.6]
-template <typename Operation>
-struct CCCL_DEPRECATED unary_traits;
-
-//! deprecated [Since 2.6]
-template <typename Operation>
-struct CCCL_DEPRECATED binary_traits;
-
-/*! \addtogroup function_object_adaptors Function Object Adaptors
- *  \ingroup function_objects
- *  \{
- */
-
-/*! \p unary_function is an empty base class: it contains no member functions
- *  or member variables, but only type information. The only reason it exists
- *  is to make it more convenient to define types that are models of the
- *  concept Adaptable Unary Function. Specifically, any model of Adaptable
- *  Unary Function must define nested aliases. Those are
- *  provided by the base class \p unary_function.
- *
- *  deprecated [Since 2.6]
- *
- *  The following code snippet demonstrates how to construct an
- *  Adaptable Unary Function using \p unary_function.
- *
- *  \code
- *  struct sine : public thrust::unary_function<float,float>
- *  {
- *    __host__ __device__
- *    float operator()(float x) { return sinf(x); }
- *  };
- *  \endcode
- *
- *  \note Because C++11 language support makes the functionality of
- *        \c unary_function obsolete, its use is optional if C++11 language
- *        features are enabled.
- *
- *  \see https://en.cppreference.com/w/cpp/utility/functional/unary_function
- *  \see binary_function
- */
-template <typename Argument, typename Result>
-struct CCCL_DEPRECATED unary_function
-{
-  /*! \typedef argument_type
-   *  \brief The type of the function object's argument.
-   */
-  using argument_type = Argument;
-
-  /*! \typedef result_type;
-   *  \brief The type of the function object's result.
-   */
-  using result_type = Result;
-}; // end unary_function
-
-/*! \p binary_function is an empty base class: it contains no member functions
- *  or member variables, but only type information. The only reason it exists
- *  is to make it more convenient to define types that are models of the
- *  concept Adaptable Binary Function. Specifically, any model of Adaptable
- *  Binary Function must define nested aliases. Those are
- *  provided by the base class \p binary_function.
- *
- *  deprecated [Since 2.6]
- *
- *  The following code snippet demonstrates how to construct an
- *  Adaptable Binary Function using \p binary_function.
- *
- *  \code
- *  struct exponentiate : public thrust::binary_function<float,float,float>
- *  {
- *    __host__ __device__
- *    float operator()(float x, float y) { return powf(x,y); }
- *  };
- *  \endcode
- *
- *  \note Because C++11 language support makes the functionality of
- *        \c binary_function obsolete, its use is optional if C++11 language
- *        features are enabled.
- *
- *  \see https://en.cppreference.com/w/cpp/utility/functional/binary_function
- *  \see unary_function
- */
-template <typename Argument1, typename Argument2, typename Result>
-struct CCCL_DEPRECATED binary_function
-{
-  /*! \typedef first_argument_type
-   *  \brief The type of the function object's first argument.
-   */
-  using first_argument_type = Argument1;
-
-  /*! \typedef second_argument_type
-   *  \brief The type of the function object's second argument.
-   */
-  using second_argument_type = Argument2;
-
-  /*! \typedef result_type
-   *  \brief The type of the function object's result;
-   */
-  using result_type = Result;
-}; // end binary_function
-
-/*! \}
- */
-
 /*! \addtogroup predefined_function_objects Predefined Function Objects
  *  \ingroup function_objects
  */
@@ -199,7 +93,6 @@ struct CCCL_DEPRECATED binary_function
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/plus
- *  \see binary_function
  */
 template <typename T = void>
 struct plus : public ::cuda::std::plus<T>
@@ -241,7 +134,6 @@ struct plus : public ::cuda::std::plus<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/minus
- *  \see binary_function
  */
 template <typename T = void>
 struct minus : public ::cuda::std::minus<T>
@@ -283,7 +175,6 @@ struct minus : public ::cuda::std::minus<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/multiplies
- *  \see binary_function
  */
 template <typename T = void>
 struct multiplies : public ::cuda::std::multiplies<T>
@@ -325,7 +216,6 @@ struct multiplies : public ::cuda::std::multiplies<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/divides
- *  \see binary_function
  */
 template <typename T = void>
 struct divides : public ::cuda::std::divides<T>
@@ -367,7 +257,6 @@ struct divides : public ::cuda::std::divides<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/modulus
- *  \see binary_function
  */
 template <typename T = void>
 struct modulus : public ::cuda::std::modulus<T>
@@ -406,7 +295,6 @@ struct modulus : public ::cuda::std::modulus<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/negate
- *  \see unary_function
  */
 template <typename T = void>
 struct negate : ::cuda::std::negate<T>
@@ -442,8 +330,6 @@ struct negate : ::cuda::std::negate<T>
  *                    thrust::square<float>());
  *  // V2 is now {1, 4, 9, ..., 1000000}
  *  \endcode
- *
- *  \see unary_function
  */
 template <typename T = void>
 struct square
@@ -489,7 +375,6 @@ struct square<void>
  * Comparable</a>.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/equal_to
- *  \see binary_function
  */
 template <typename T = void>
 struct equal_to : public ::cuda::std::equal_to<T>
@@ -509,7 +394,6 @@ struct equal_to : public ::cuda::std::equal_to<T>
  * Comparable</a>.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/not_equal_to
- *  \see binary_function
  */
 template <typename T = void>
 struct not_equal_to : public ::cuda::std::not_equal_to<T>
@@ -529,7 +413,6 @@ struct not_equal_to : public ::cuda::std::not_equal_to<T>
  * Comparable</a>.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/greater
- *  \see binary_function
  */
 template <typename T = void>
 struct greater : public ::cuda::std::greater<T>
@@ -549,7 +432,6 @@ struct greater : public ::cuda::std::greater<T>
  * Comparable</a>.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/less
- *  \see binary_function
  */
 template <typename T = void>
 struct less : public ::cuda::std::less<T>
@@ -569,7 +451,6 @@ struct less : public ::cuda::std::less<T>
  * Comparable</a>.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/greater_equal
- *  \see binary_function
  */
 template <typename T = void>
 struct greater_equal : public ::cuda::std::greater_equal<T>
@@ -589,7 +470,6 @@ struct greater_equal : public ::cuda::std::greater_equal<T>
  * Comparable</a>.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/less_equal
- *  \see binary_function
  */
 template <typename T = void>
 struct less_equal : public ::cuda::std::less_equal<T>
@@ -616,7 +496,6 @@ struct less_equal : public ::cuda::std::less_equal<T>
  *  \tparam T must be convertible to \c bool.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/logical_and
- *  \see binary_function
  */
 template <typename T = void>
 struct logical_and : public ::cuda::std::logical_and<T>
@@ -635,7 +514,6 @@ struct logical_and : public ::cuda::std::logical_and<T>
  *  \tparam T must be convertible to \c bool.
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/logical_or
- *  \see binary_function
  */
 template <typename T = void>
 struct logical_or : public ::cuda::std::logical_or<T>
@@ -668,7 +546,6 @@ struct logical_or : public ::cuda::std::logical_or<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/logical_not
- *  \see unary_function
  */
 template <typename T = void>
 struct logical_not : public ::cuda::std::logical_not<T>
@@ -716,8 +593,6 @@ struct logical_not : public ::cuda::std::logical_not<T>
  *                    thrust::bit_and<int>());
  *  // V3 is now {1&13, 2&13, 3&13, ..., 1000%13}
  *  \endcode
- *
- *  \see binary_function
  */
 template <typename T = void>
 struct bit_and : public ::cuda::std::bit_and<T>
@@ -757,8 +632,6 @@ struct bit_and : public ::cuda::std::bit_and<T>
  *                    thrust::bit_or<int>());
  *  // V3 is now {1|13, 2|13, 3|13, ..., 1000|13}
  *  \endcode
- *
- *  \see binary_function
  */
 template <typename T = void>
 struct bit_or : public ::cuda::std::bit_or<T>
@@ -798,8 +671,6 @@ struct bit_or : public ::cuda::std::bit_or<T>
  *                    thrust::bit_xor<int>());
  *  // V3 is now {1^13, 2^13, 3^13, ..., 1000^13}
  *  \endcode
- *
- *  \see binary_function
  */
 template <typename T = void>
 struct bit_xor : public ::cuda::std::bit_xor<T>
@@ -835,7 +706,6 @@ struct bit_xor : public ::cuda::std::bit_xor<T>
  *  \endcode
  *
  *  \see https://en.cppreference.com/w/cpp/utility/functional/identity
- *  \see unary_function
  */
 // TODO(bgruber): this version can also act as a functor casting to T making it not equivalent to ::cuda::std::identity
 template <typename T = void>
@@ -893,7 +763,6 @@ struct identity<void> : ::cuda::std::__identity
  *
  *  \see minimum
  *  \see min
- *  \see binary_function
  */
 template <typename T = void>
 struct maximum : ::cuda::maximum<T>
@@ -940,7 +809,6 @@ struct maximum : ::cuda::maximum<T>
  *
  *  \see maximum
  *  \see max
- *  \see binary_function
  */
 template <typename T = void>
 struct minimum : ::cuda::minimum<T>
@@ -980,7 +848,6 @@ struct minimum : ::cuda::minimum<T>
  *
  *  \see identity
  *  \see project2nd
- *  \see binary_function
  */
 template <typename T1 = void, typename T2 = void>
 struct project1st
@@ -1017,8 +884,8 @@ struct project1st<void, void>
   using is_transparent = void;
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1, typename T2>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&&) const
-    noexcept(noexcept(THRUST_FWD(t1))) -> decltype(THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&&) const noexcept(noexcept(THRUST_FWD(t1)))
+    -> decltype(THRUST_FWD(t1))
   {
     return THRUST_FWD(t1);
   }
@@ -1040,7 +907,6 @@ struct project1st<void, void>
  *
  *  \see identity
  *  \see project1st
- *  \see binary_function
  */
 template <typename T1 = void, typename T2 = void>
 struct project2nd
@@ -1077,8 +943,8 @@ struct project2nd<void, void>
   using is_transparent = void;
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1, typename T2>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&&, T2&& t2) const
-    noexcept(noexcept(THRUST_FWD(t2))) -> decltype(THRUST_FWD(t2))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&&, T2&& t2) const noexcept(noexcept(THRUST_FWD(t2)))
+    -> decltype(THRUST_FWD(t2))
   {
     return THRUST_FWD(t2);
   }
@@ -1093,133 +959,6 @@ struct project2nd<void, void>
  *  \{
  */
 
-/*! \p unary_negate is a function object adaptor: it is an Adaptable Predicate
- *  that represents the logical negation of some other Adaptable Predicate.
- *  That is: if \c f is an object of class <tt>unary_negate<AdaptablePredicate></tt>,
- *  then there exists an object \c pred of class \c AdaptablePredicate such
- *  that <tt>f(x)</tt> always returns the same value as <tt>!pred(x)</tt>.
- *  There is rarely any reason to construct a <tt>unary_negate</tt> directly;
- *  it is almost always easier to use the helper function not1.
- *
- *  deprecated [Since 2.6]
- *
- *  \see https://en.cppreference.com/w/cpp/utility/functional/unary_negate
- *  \see not1
- */
-template <typename Predicate>
-struct CCCL_DEPRECATED unary_negate
-{
-  using argument_type = typename Predicate::argument_type;
-  using result_type   = bool;
-
-  /*! Constructor takes a \p Predicate object to negate.
-   *  \param p The \p Predicate object to negate.
-   */
-  _CCCL_HOST_DEVICE explicit unary_negate(Predicate p)
-      : pred(p)
-  {}
-
-  /*! Function call operator. The return value is <tt>!pred(x)</tt>.
-   */
-  _CCCL_EXEC_CHECK_DISABLE
-  _CCCL_HOST_DEVICE bool operator()(const typename Predicate::argument_type& x)
-  {
-    return !pred(x);
-  }
-
-  /*! \cond
-   */
-  Predicate pred;
-  /*! \endcond
-   */
-}; // end unary_negate
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-/*! \p not1 is a helper function to simplify the creation of Adaptable Predicates:
- *  it takes an Adaptable Predicate \p pred as an argument and returns a new Adaptable
- *  Predicate that represents the negation of \p pred. That is: if \c pred is an object
- *  of a type which models Adaptable Predicate, then the the type of the result
- *  \c npred of <tt>not1(pred)</tt> is also a model of Adaptable Predicate and
- *  <tt>npred(x)</tt> always returns the same value as <tt>!pred(x)</tt>.
- *
- *  deprecated [Since 2.6]
- *
- *  \param pred The Adaptable Predicate to negate.
- *  \return A new object, <tt>npred</tt> such that <tt>npred(x)</tt> always returns
- *          the same value as <tt>!pred(x)</tt>.
- *  \tparam Predicate is a model of <a
- * href="https://en.cppreference.com/w/cpp/utility/functional/unary_negate">Adaptable Predicate</a>.
- *  \see unary_negate
- *  \see not2
- */
-template <typename Predicate>
-_CCCL_HOST_DEVICE
-CCCL_DEPRECATED_BECAUSE("Use thrust::not_fn instead") unary_negate<Predicate> not1(const Predicate& pred);
-_CCCL_SUPPRESS_DEPRECATED_POP
-
-/*! \p binary_negate is a function object adaptor: it is an Adaptable Binary
- *  Predicate that represents the logical negation of some other Adaptable
- *  Binary Predicate. That is: if \c f is an object of class <tt>binary_negate<AdaptablePredicate></tt>,
- *  then there exists an object \c pred of class \c AdaptableBinaryPredicate
- *  such that <tt>f(x,y)</tt> always returns the same value as <tt>!pred(x,y)</tt>.
- *  There is rarely any reason to construct a <tt>binary_negate</tt> directly;
- *  it is almost always easier to use the helper function not2.
- *
- *  deprecated [Since 2.6]
- *
- *  \see https://en.cppreference.com/w/cpp/utility/functional/binary_negate
- */
-template <typename Predicate>
-struct CCCL_DEPRECATED binary_negate
-{
-  using first_argument_type  = typename Predicate::first_argument_type;
-  using second_argument_type = typename Predicate::second_argument_type;
-  using result_type          = bool;
-
-  /*! Constructor takes a \p Predicate object to negate.
-   *  \param p The \p Predicate object to negate.
-   */
-  _CCCL_HOST_DEVICE explicit binary_negate(Predicate p)
-      : pred(p)
-  {}
-
-  /*! Function call operator. The return value is <tt>!pred(x,y)</tt>.
-   */
-  _CCCL_EXEC_CHECK_DISABLE
-  _CCCL_HOST_DEVICE bool operator()(const first_argument_type& x, const second_argument_type& y)
-  {
-    return !pred(x, y);
-  }
-
-  /*! \cond
-   */
-  Predicate pred;
-  /*! \endcond
-   */
-}; // end binary_negate
-
-_CCCL_SUPPRESS_DEPRECATED_PUSH
-/*! \p not2 is a helper function to simplify the creation of Adaptable Binary Predicates:
- *  it takes an Adaptable Binary Predicate \p pred as an argument and returns a new Adaptable
- *  Binary Predicate that represents the negation of \p pred. That is: if \c pred is an object
- *  of a type which models Adaptable Binary Predicate, then the the type of the result
- *  \c npred of <tt>not2(pred)</tt> is also a model of Adaptable Binary Predicate and
- *  <tt>npred(x,y)</tt> always returns the same value as <tt>!pred(x,y)</tt>.
- *
- *  deprecated [Since 2.6]
- *
- *  \param pred The Adaptable Binary Predicate to negate.
- *  \return A new object, <tt>npred</tt> such that <tt>npred(x,y)</tt> always returns
- *          the same value as <tt>!pred(x,y)</tt>.
- *  \tparam Binary Predicate is a model of an Adaptable Binary Predicate.
- *  \see binary_negate
- *  \see not1
- */
-template <typename BinaryPredicate>
-_CCCL_HOST_DEVICE
-CCCL_DEPRECATED_BECAUSE("Use thrust::not_fn instead") binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred);
-_CCCL_SUPPRESS_DEPRECATED_POP
-
 namespace detail
 {
 template <typename F>
@@ -1228,15 +967,15 @@ struct not_fun_t
   F f;
 
   template <typename... Ts>
-  _CCCL_HOST_DEVICE auto
-  operator()(Ts&&... args) noexcept(noexcept(!f(std::forward<Ts>(args)...))) -> decltype(!f(std::forward<Ts>(args)...))
+  _CCCL_HOST_DEVICE auto operator()(Ts&&... args) noexcept(noexcept(!f(std::forward<Ts>(args)...)))
+    -> decltype(!f(std::forward<Ts>(args)...))
   {
     return !f(std::forward<Ts>(args)...);
   }
 
   template <typename... Ts>
-  _CCCL_HOST_DEVICE auto operator()(Ts&&... args) const
-    noexcept(noexcept(!f(std::forward<Ts>(args)...))) -> decltype(!f(std::forward<Ts>(args)...))
+  _CCCL_HOST_DEVICE auto operator()(Ts&&... args) const noexcept(noexcept(!f(std::forward<Ts>(args)...)))
+    -> decltype(!f(std::forward<Ts>(args)...))
   {
     return !f(std::forward<Ts>(args)...);
   }
@@ -1378,6 +1117,5 @@ _LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(THRUST_NS_QUALIFIER::logical_or);
 _LIBCUDACXX_END_NAMESPACE_CUDA
 #endif // _CCCL_DOXYGEN_INVOKED
 
-#include <thrust/detail/functional.inl>
 #include <thrust/detail/functional/operators.h>
 #include <thrust/detail/type_traits/is_commutative.h>
diff --git a/thrust/thrust/future.h b/thrust/thrust/future.h
index 39f25a0b0c0..46f70d58f58 100644
--- a/thrust/thrust/future.h
+++ b/thrust/thrust/future.h
@@ -29,7 +29,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/optional.h b/thrust/thrust/optional.h
index 788f6597953..3cef29ae7ec 100644
--- a/thrust/thrust/optional.h
+++ b/thrust/thrust/optional.h
@@ -154,16 +154,18 @@ template <
 #  endif
   typename = enable_if_t<std::is_member_pointer<decay_t<Fn>>::value>,
   int      = 0>
-_CCCL_HOST_DEVICE constexpr auto invoke(Fn&& f, Args&&... args) noexcept(
-  noexcept(std::mem_fn(f)(std::forward<Args>(args)...))) -> decltype(std::mem_fn(f)(std::forward<Args>(args)...))
+_CCCL_HOST_DEVICE constexpr auto
+invoke(Fn&& f, Args&&... args) noexcept(noexcept(std::mem_fn(f)(std::forward<Args>(args)...)))
+  -> decltype(std::mem_fn(f)(std::forward<Args>(args)...))
 {
   return std::mem_fn(f)(std::forward<Args>(args)...);
 }
 
 _CCCL_EXEC_CHECK_DISABLE
 template <typename Fn, typename... Args, typename = enable_if_t<!std::is_member_pointer<decay_t<Fn>>::value>>
-_CCCL_HOST_DEVICE constexpr auto invoke(Fn&& f, Args&&... args) noexcept(noexcept(
-  std::forward<Fn>(f)(std::forward<Args>(args)...))) -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...))
+_CCCL_HOST_DEVICE constexpr auto
+invoke(Fn&& f, Args&&... args) noexcept(noexcept(std::forward<Fn>(f)(std::forward<Args>(args)...)))
+  -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...))
 {
   return std::forward<Fn>(f)(std::forward<Args>(args)...);
 }
diff --git a/thrust/thrust/system/cuda/config.h b/thrust/thrust/system/cuda/config.h
index c5c7f73242c..f25345eb615 100644
--- a/thrust/thrust/system/cuda/config.h
+++ b/thrust/thrust/system/cuda/config.h
@@ -66,37 +66,6 @@
 #  define THRUST_RDC_ENABLED
 #endif
 
-/**
- * \def __THRUST_HAS_CUDART__
- *
- * Whether or not the active compiler pass is allowed to invoke device kernels
- * or methods from the CUDA runtime API.
- *
- * This macro should not be used in Thrust, as it depends on `__CUDA_ARCH__`
- * and is not compatible with `NV_IF_TARGET`. It is provided for legacy
- * purposes only.
- *
- * Replace any usages with `THRUST_RDC_ENABLED` and `NV_IF_TARGET`.
- */
-#ifdef CUB_RUNTIME_ENABLED
-#  define __THRUST_HAS_CUDART__ 1
-#else
-#  define __THRUST_HAS_CUDART__ 0
-#endif
-
-// These definitions were intended for internal use only and are now obsolete.
-// If you relied on them, consider porting your code to use the functionality
-// in libcu++'s <nv/target> header.
-//
-// For a temporary workaround, define THRUST_PROVIDE_LEGACY_ARCH_MACROS to make
-// them available again. These should be considered deprecated and will be
-// fully removed in a future version.
-#ifdef THRUST_PROVIDE_LEGACY_ARCH_MACROS
-#  ifdef __CUDA_ARCH__
-#    define THRUST_DEVICE_CODE
-#  endif // __CUDA_ARCH__
-#endif // THRUST_PROVIDE_LEGACY_ARCH_MACROS
-
 #ifdef THRUST_AGENT_ENTRY_NOINLINE
 #  define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__
 #else
diff --git a/thrust/thrust/system/cuda/detail/async/copy.h b/thrust/thrust/system/cuda/detail/async/copy.h
index 39c3b647cba..d73724de621 100644
--- a/thrust/thrust/system/cuda/detail/async/copy.h
+++ b/thrust/thrust/system/cuda/detail/async/copy.h
@@ -38,7 +38,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/customization.h b/thrust/thrust/system/cuda/detail/async/customization.h
index e1f3786d20d..9ce296267b2 100644
--- a/thrust/thrust/system/cuda/detail/async/customization.h
+++ b/thrust/thrust/system/cuda/detail/async/customization.h
@@ -38,7 +38,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/exclusive_scan.h b/thrust/thrust/system/cuda/detail/async/exclusive_scan.h
index 8f534c3c626..19758e28405 100644
--- a/thrust/thrust/system/cuda/detail/async/exclusive_scan.h
+++ b/thrust/thrust/system/cuda/detail/async/exclusive_scan.h
@@ -36,7 +36,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/for_each.h b/thrust/thrust/system/cuda/detail/async/for_each.h
index 412c0c36408..3e2f6ee25c0 100644
--- a/thrust/thrust/system/cuda/detail/async/for_each.h
+++ b/thrust/thrust/system/cuda/detail/async/for_each.h
@@ -39,7 +39,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/inclusive_scan.h b/thrust/thrust/system/cuda/detail/async/inclusive_scan.h
index d7e4c8a94c7..bc7f9165a6c 100644
--- a/thrust/thrust/system/cuda/detail/async/inclusive_scan.h
+++ b/thrust/thrust/system/cuda/detail/async/inclusive_scan.h
@@ -36,7 +36,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/reduce.h b/thrust/thrust/system/cuda/detail/async/reduce.h
index 8842540b985..fc65efb0f9f 100644
--- a/thrust/thrust/system/cuda/detail/async/reduce.h
+++ b/thrust/thrust/system/cuda/detail/async/reduce.h
@@ -40,7 +40,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/scan.h b/thrust/thrust/system/cuda/detail/async/scan.h
index f64015c0921..278f97d8388 100644
--- a/thrust/thrust/system/cuda/detail/async/scan.h
+++ b/thrust/thrust/system/cuda/detail/async/scan.h
@@ -37,6 +37,5 @@
 #  pragma system_header
 #endif // no system header
 
-#include <thrust/detail/cpp14_required.h>
 #include <thrust/system/cuda/detail/async/exclusive_scan.h>
 #include <thrust/system/cuda/detail/async/inclusive_scan.h>
diff --git a/thrust/thrust/system/cuda/detail/async/sort.h b/thrust/thrust/system/cuda/detail/async/sort.h
index 3bb5207127b..a37514cdf5a 100644
--- a/thrust/thrust/system/cuda/detail/async/sort.h
+++ b/thrust/thrust/system/cuda/detail/async/sort.h
@@ -38,7 +38,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/async/transform.h b/thrust/thrust/system/cuda/detail/async/transform.h
index d987f5c67c5..1f0f4d57f7d 100644
--- a/thrust/thrust/system/cuda/detail/async/transform.h
+++ b/thrust/thrust/system/cuda/detail/async/transform.h
@@ -38,7 +38,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h b/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h
index 80bbc2bc650..a4ac894308d 100644
--- a/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h
+++ b/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h
@@ -67,10 +67,26 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron
       , stream(stream_)
   {}
 
+  // cudaLaunchKernelEx requires C++11, but unfortunately <cuda_runtime.h> checks this using the __cplusplus macro,
+  // which is reported wrongly for MSVC. CTK 12.3 fixed this by additionally detecting _MSV_VER. As a workaround, we
+  // provide our own copy of cudaLaunchKernelEx when it is not available from the CTK.
+#if _CCCL_COMPILER(MSVC) && _CCCL_CUDACC_BELOW(12, 3)
+  // Copied from <cuda_runtime.h>
+  template <typename... ExpTypes, typename... ActTypes>
+  static cudaError_t _CCCL_HOST
+  cudaLaunchKernelEx_MSVC_workaround(const cudaLaunchConfig_t* config, void (*kernel)(ExpTypes...), ActTypes&&... args)
+  {
+    return [&](ExpTypes... coercedArgs) {
+      void* pArgs[] = {&coercedArgs...};
+      return ::cudaLaunchKernelExC(config, (const void*) kernel, pArgs);
+    }(std::forward<ActTypes>(args)...);
+  }
+#endif
+
   template <class K, class... Args>
   cudaError_t _CCCL_HOST doit_host(K k, Args const&... args) const
   {
-#if _THRUST_HAS_PDL
+#if _CCCL_HAS_PDL
     if (dependent_launch)
     {
       cudaLaunchAttribute attribute[1];
@@ -84,10 +100,14 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron
       config.stream           = stream;
       config.attrs            = attribute;
       config.numAttrs         = 1;
+#  if _CCCL_COMPILER(MSVC) && _CCCL_CUDACC_BELOW(12, 3)
+      cudaLaunchKernelEx_MSVC_workaround(&config, k, args...);
+#  else
       cudaLaunchKernelEx(&config, k, args...);
+#  endif
     }
     else
-#endif // _THRUST_HAS_PDL
+#endif // _CCCL_HAS_PDL
     {
       k<<<grid, block, shared_mem, stream>>>(args...);
     }
diff --git a/thrust/thrust/system/cuda/detail/core/util.h b/thrust/thrust/system/cuda/detail/core/util.h
index 7d22c2a064f..46681423790 100644
--- a/thrust/thrust/system/cuda/detail/core/util.h
+++ b/thrust/thrust/system/cuda/detail/core/util.h
@@ -402,7 +402,7 @@ THRUST_RUNTIME_FUNCTION typename get_plan<Agent>::type get_agent_plan(int ptx_ve
 #  ifdef __CUDA_ARCH__
     plan = get_agent_plan_dev<Agent>();
 #  else
-    static cub::Mutex mutex;
+    static std::mutex mutex;
     bool lock = false;
     if (d_ptr == 0)
     {
@@ -410,7 +410,7 @@ THRUST_RUNTIME_FUNCTION typename get_plan<Agent>::type get_agent_plan(int ptx_ve
       cudaGetSymbolAddress(&d_ptr, agent_plan_device);
     }
     if (lock)
-      mutex.Lock();
+      mutex.lock();
     f<<<1,1,0,s>>>((AgentPlan*)d_ptr);
     cudaMemcpyAsync((void*)&plan,
                     d_ptr,
@@ -418,7 +418,7 @@ THRUST_RUNTIME_FUNCTION typename get_plan<Agent>::type get_agent_plan(int ptx_ve
                     cudaMemcpyDeviceToHost,
                     s);
     if (lock)
-      mutex.Unlock();
+      mutex.unlock();
     cudaStreamSynchronize(s);
 #  endif
     return plan;
diff --git a/thrust/thrust/system/cuda/detail/future.inl b/thrust/thrust/system/cuda/detail/future.inl
index 343a18156ec..da2347e3552 100644
--- a/thrust/thrust/system/cuda/detail/future.inl
+++ b/thrust/thrust/system/cuda/detail/future.inl
@@ -17,7 +17,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
@@ -461,7 +460,6 @@ public:
   _CCCL_DIAG_PUSH
   _CCCL_DIAG_SUPPRESS_CLANG("-Wreorder")
   _CCCL_DIAG_SUPPRESS_GCC("-Wreorder")
-  _CCCL_DIAG_SUPPRESS_ICC(2407)
   template <typename ComputeContent>
   _CCCL_HOST explicit async_addressable_value_with_keep_alives(
     unique_stream&& stream, keep_alives_type&& keep_alives, ComputeContent&& compute_content)
diff --git a/thrust/thrust/system/cuda/detail/transform.h b/thrust/thrust/system/cuda/detail/transform.h
index 4cf879c3eaa..64154a8f6da 100644
--- a/thrust/thrust/system/cuda/detail/transform.h
+++ b/thrust/thrust/system/cuda/detail/transform.h
@@ -276,8 +276,8 @@ OutputIt THRUST_FUNCTION cub_transform_many(
 }
 
 template <typename... Ts, std::size_t... Is>
-THRUST_FUNCTION auto
-convert_to_std_tuple(tuple<Ts...> t, ::cuda::std::index_sequence<Is...>) -> ::cuda::std::tuple<Ts...>
+THRUST_FUNCTION auto convert_to_std_tuple(tuple<Ts...> t, ::cuda::std::index_sequence<Is...>)
+  -> ::cuda::std::tuple<Ts...>
 {
   return ::cuda::std::tuple<Ts...>{get<Is>(t)...};
 }
diff --git a/thrust/thrust/system/cuda/future.h b/thrust/thrust/system/cuda/future.h
index 3ecd9307038..71f4f94181b 100644
--- a/thrust/thrust/system/cuda/future.h
+++ b/thrust/thrust/system/cuda/future.h
@@ -14,7 +14,6 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/cpp14_required.h>
 
 #if _CCCL_STD_VER >= 2014
 
diff --git a/thrust/thrust/type_traits/is_contiguous_iterator.h b/thrust/thrust/type_traits/is_contiguous_iterator.h
index 7950422a4e0..95f6c503b15 100644
--- a/thrust/thrust/type_traits/is_contiguous_iterator.h
+++ b/thrust/thrust/type_traits/is_contiguous_iterator.h
@@ -229,8 +229,8 @@ using unwrap_contiguous_iterator_t = typename detail::contiguous_iterator_traits
 
 //! Converts a contiguous iterator to its underlying raw pointer.
 template <typename ContiguousIterator>
-_CCCL_HOST_DEVICE auto
-unwrap_contiguous_iterator(ContiguousIterator it) -> unwrap_contiguous_iterator_t<ContiguousIterator>
+_CCCL_HOST_DEVICE auto unwrap_contiguous_iterator(ContiguousIterator it)
+  -> unwrap_contiguous_iterator_t<ContiguousIterator>
 {
   static_assert(thrust::is_contiguous_iterator<ContiguousIterator>::value,
                 "unwrap_contiguous_iterator called with non-contiguous iterator.");
diff --git a/thrust/thrust/version.h b/thrust/thrust/version.h
index 5fe9a97ac29..77d54b47608 100644
--- a/thrust/thrust/version.h
+++ b/thrust/thrust/version.h
@@ -61,7 +61,7 @@
  *         <tt>THRUST_VERSION / 100 % 1000</tt> is the minor version.
  *         <tt>THRUST_VERSION / 100000</tt> is the major version.
  */
-#define THRUST_VERSION 200800 // macro expansion with ## requires this to be a single value
+#define THRUST_VERSION 300000 // macro expansion with ## requires this to be a single value
 
 /*! \def THRUST_MAJOR_VERSION
  *  \brief The preprocessor macro \p THRUST_MAJOR_VERSION encodes the
diff --git a/thrust/thrust/zip_function.h b/thrust/thrust/zip_function.h
index 24c52cc34eb..e9bd1bf9848 100644
--- a/thrust/thrust/zip_function.h
+++ b/thrust/thrust/zip_function.h
@@ -16,14 +16,12 @@
 #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
 #  pragma system_header
 #endif // no system header
-#include <thrust/detail/modern_gcc_required.h>
-#if !defined(THRUST_LEGACY_GCC)
 
-#  include <thrust/detail/type_deduction.h>
-#  include <thrust/tuple.h>
-#  include <thrust/type_traits/integer_sequence.h>
+#include <thrust/detail/type_deduction.h>
+#include <thrust/tuple.h>
+#include <thrust/type_traits/integer_sequence.h>
 
-#  include <cuda/functional>
+#include <cuda/functional>
 
 THRUST_NAMESPACE_BEGIN
 
@@ -42,7 +40,7 @@ namespace zip_detail
 {
 
 // Add workaround for decltype(auto) on C++11-only compilers:
-#  if _CCCL_STD_VER >= 2014
+#if _CCCL_STD_VER >= 2014
 
 _CCCL_EXEC_CHECK_DISABLE
 template <typename Function, typename Tuple, std::size_t... Is>
@@ -58,7 +56,7 @@ _CCCL_HOST_DEVICE decltype(auto) apply(Function&& func, Tuple&& args)
   return apply_impl(THRUST_FWD(func), THRUST_FWD(args), make_index_sequence<tuple_size>{});
 }
 
-#  else // _CCCL_STD_VER
+#else // _CCCL_STD_VER
 
 _CCCL_EXEC_CHECK_DISABLE
 template <typename Function, typename Tuple, std::size_t... Is>
@@ -71,7 +69,7 @@ _CCCL_HOST_DEVICE auto apply_impl(Function&& func, Tuple&& args, index_sequence<
       THRUST_FWD(args),
       make_index_sequence<thrust::tuple_size<typename std::decay<Tuple>::type>::value>{}))
 
-#  endif // _CCCL_STD_VER
+#endif // _CCCL_STD_VER
 
 } // namespace zip_detail
 } // namespace detail
@@ -149,7 +147,7 @@ class zip_function
   {}
 
 // Add workaround for decltype(auto) on C++11-only compilers:
-#  if _CCCL_STD_VER >= 2014
+#if _CCCL_STD_VER >= 2014
 
   template <typename Tuple>
   _CCCL_HOST_DEVICE decltype(auto) operator()(Tuple&& args) const
@@ -157,7 +155,7 @@ class zip_function
     return detail::zip_detail::apply(func, THRUST_FWD(args));
   }
 
-#  else // _CCCL_STD_VER
+#else // _CCCL_STD_VER
 
   // Can't just use THRUST_DECLTYPE_RETURNS here since we need to use
   // std::declval for the signature components:
@@ -169,7 +167,7 @@ class zip_function
     return detail::zip_detail::apply(func, THRUST_FWD(args));
   }
 
-#  endif // _CCCL_STD_VER
+#endif // _CCCL_STD_VER
 
   //! Returns a reference to the underlying function.
   _CCCL_HOST_DEVICE Function& underlying_function() const
@@ -208,5 +206,3 @@ template <typename F>
 struct proclaims_copyable_arguments<THRUST_NS_QUALIFIER::zip_function<F>> : proclaims_copyable_arguments<F>
 {};
 _LIBCUDACXX_END_NAMESPACE_CUDA
-
-#endif