From 60761e0946faaef9b7acb096d0db0c840dc52dc4 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Wed, 10 Apr 2024 13:45:53 -0400
Subject: [PATCH] Enable extra NVBench features in windows build. (#169)

* Enable extra NVBench features in windows build.

These were delayed as they required changes to the devcontainers.

* Revamp nvml.dll logic.
---
 ci/matrix.yaml          | 72 ++++++++++++++++++-------------------
 cmake/NVBenchNVML.cmake | 80 ++++++++++++++++++++++-------------------
 2 files changed, 79 insertions(+), 73 deletions(-)

diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index 8308d635..563dac05 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -47,40 +47,40 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
 # Configurations that will run for every PR
 pull_request:
   nvcc:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7   }
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8   }
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9   }
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9  }
-    - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11  }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7   }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8   }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9   }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10  }
-    - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11  }
-    - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12  }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9  }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10 }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11 }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12 }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13 }
-    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7   }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8   }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9   }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10  }
-    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11  }
-    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12  }
+    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7     }
+    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8     }
+    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9     }
+    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9    }
+    - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11    }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7     }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8     }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9     }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10    }
+    - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11    }
+    - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12    }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9    }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10   }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11   }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12   }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13   }
+    - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7     }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8     }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9     }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10    }
+    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11    }
+    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12    }
     # Fails to compile simple input on CTK12.4. Try to add later.
-    # {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13  }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9  }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17 }
-    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18,   extra_build_args: "-cmake-options '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler'"}
-    - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"}
-    - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"}
+    # {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13    }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9    }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17   }
+    - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18,  extra_build_args: "-cmake-options '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler'"}
+    - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019 }
+    - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022 }
diff --git a/cmake/NVBenchNVML.cmake b/cmake/NVBenchNVML.cmake
index f2aadbbe..4b005f3c 100644
--- a/cmake/NVBenchNVML.cmake
+++ b/cmake/NVBenchNVML.cmake
@@ -1,37 +1,43 @@
-# Since this file is installed, we need to make sure that the CUDAToolkit has
-# been found by consumers:
-if (NOT TARGET CUDA::toolkit)
-  find_package(CUDAToolkit REQUIRED)
-endif()
-
-if (WIN32)
-  # The CUDA:: targets currently don't provide dll locations through the
-  # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries
-  # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls`
-  # CMake function from copying the dlls to the build / install directories.
-  # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845
-  # and the other CMake issues it links to.
-  #
-  # We create a nvbench-specific target that configures the nvml interface as
-  # described here:
-  # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538
-  #
-  # Use find_file instead of find_library, which would search for a .lib file.
-  # This is also nice because find_file searches recursively (find_library
-  # does not) and some versions of CTK nest nvml.dll several directories deep
-  # under C:\Windows\System32.
-  find_file(NVBench_NVML_DLL nvml.dll REQUIRED
-    DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32."
-    PATHS "C:/Windows/System32"
-  )
-  mark_as_advanced(NVBench_NVML_DLL)
-  add_library(nvbench::nvml SHARED IMPORTED)
-  target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit)
-  set_target_properties(nvbench::nvml PROPERTIES
-    IMPORTED_LOCATION "${NVBench_NVML_DLL}"
-    IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}"
-  )
-else()
-  # Linux is much easier...
-  add_library(nvbench::nvml ALIAS CUDA::nvml)
-endif()
+# Since this file is installed, we need to make sure that the CUDAToolkit has
+# been found by consumers:
+if (NOT TARGET CUDA::toolkit)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
+if (WIN32)
+  # The CUDA:: targets currently don't provide dll locations through the
+  # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries
+  # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls`
+  # CMake function from copying the dlls to the build / install directories.
+  # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845
+  # and the other CMake issues it links to.
+  #
+  # We create a nvbench-specific target that configures the nvml interface as
+  # described here:
+  # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538
+  #
+  # Use find_file instead of find_library, which would search for a .lib file.
+  # This is also nice because find_file searches recursively (find_library
+  # does not) and some versions of CTK nest nvml.dll several directories deep
+  # under C:\Windows\System32.
+  find_file(NVBench_NVML_DLL nvml.dll
+    DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32."
+    PATHS "C:/Windows/System32"
+  )
+  mark_as_advanced(NVBench_NVML_DLL)
+endif()
+
+if (NVBench_NVML_DLL)
+  add_library(nvbench::nvml SHARED IMPORTED)
+  target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit)
+  set_target_properties(nvbench::nvml PROPERTIES
+    IMPORTED_LOCATION "${NVBench_NVML_DLL}"
+    IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}"
+  )
+elseif(TARGET CUDA::nvml)
+  add_library(nvbench::nvml ALIAS CUDA::nvml)
+else()
+  message(FATAL_ERROR "Could not find nvml.dll or CUDA::nvml target. "
+          "Set -DNVBench_ENABLE_NVML=OFF to disable NVML support "
+          "or set -DNVBench_NVML_DLL to the full path to nvml.dll on Windows.")
+endif()