From 1b607775a1809a2034e65d28fdebd4c6e39e330f Mon Sep 17 00:00:00 2001 From: Janusz Lisiecki <39967756+JanuszL@users.noreply.github.com> Date: Fri, 9 Feb 2024 09:19:17 +0100 Subject: [PATCH] Add missing calls to nvmlShutdown (#5311) - in some places, DALI initializes nvml but misses to call nvmlShutdown to shut it down gracefully Signed-off-by: Janusz Lisiecki --- dali/core/mm/malloc_resource.cc | 4 +++- .../nvjpeg/nvjpeg_decoder_decoupled_api_test.cc | 12 ++++++------ dali/operators/reader/gds_mem_test.cu | 4 +++- .../reader/loader/video/video_loader_decoder_gpu.cc | 1 + dali/operators/reader/video_reader_op_test.cc | 5 +---- dali/operators/sequence/optical_flow/optical_flow.h | 6 +++++- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/dali/core/mm/malloc_resource.cc b/dali/core/mm/malloc_resource.cc index 6380cccab2b..d297bfcc1eb 100644 --- a/dali/core/mm/malloc_resource.cc +++ b/dali/core/mm/malloc_resource.cc @@ -95,7 +95,9 @@ cuda_malloc_async_memory_resource::cuda_malloc_async_memory_resource(int device_ #if NVML_ENABLED static const float driverVersion = []() { nvml::Init(); - return nvml::GetDriverVersion(); + auto ret = nvml::GetDriverVersion(); + nvml::Shutdown(); + return ret; }(); if (driverVersion < 470.60) { cudaMemPool_t memPool; diff --git a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api_test.cc b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api_test.cc index 70ab8c6a8c8..4938d834abf 100644 --- a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api_test.cc +++ b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api_test.cc @@ -182,13 +182,10 @@ TYPED_TEST(nvjpegDecodeDecoupledAPITest, TestSingleTiffDecode4T) { this->TiffTestDecode(4); } -#if NVJPEG_VER_MAJOR >= 11 +#if NVJPEG_VER_MAJOR >= 11 && NVML_ENABLED void PrintDeviceInfo() { unsigned int device_count; - if (!nvmlIsInitialized()) { - nvml::Init(); - return; - } + nvml::Init(); CUDA_CALL(nvmlDeviceGetCount_v2(&device_count)); for (unsigned int device_idx = 0; device_idx < device_count; device_idx++) { auto info = nvml::GetDeviceInfo(device_idx); @@ -198,6 +195,7 @@ void PrintDeviceInfo() { << " cc_m " << info.cap_minor << std::endl; } + nvml::Shutdown(); } /** @@ -206,7 +204,9 @@ void PrintDeviceInfo() { bool ShouldUseHwDecoder() { // HW decoder is disabled for drivers < 455.x, see // dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h for details + nvml::Init(); static float driver_version = nvml::GetDriverVersion(); + nvml::Shutdown(); static bool device_supports_hw_decoder = nvml::isHWDecoderSupported(); return device_supports_hw_decoder && driver_version >= 455; } @@ -510,7 +510,7 @@ class HwDecoderRandomCropUtilizationTest : public ::testing::Test { TEST_F(HwDecoderRandomCropUtilizationTest, UtilizationTest) { this->pipeline_.Run(); } -#endif +#endif // NVJPEG_VER_MAJOR >= 11 && NVML_ENABLED class Nvjpeg2kTest : public ::testing::Test { public: diff --git a/dali/operators/reader/gds_mem_test.cu b/dali/operators/reader/gds_mem_test.cu index 5b64b936558..67053b32140 100644 --- a/dali/operators/reader/gds_mem_test.cu +++ b/dali/operators/reader/gds_mem_test.cu @@ -62,7 +62,9 @@ void SkipIfIncompatible(TestBody &&body) { #if NVML_ENABLED static const int driverVersion = []() { nvml::Init(); - return nvml::GetCudaDriverVersion(); + auto ret = nvml::GetCudaDriverVersion(); + nvml::Shutdown(); + return ret; }(); #if defined(__aarch64__) if (driverVersion < 12020) { diff --git a/dali/operators/reader/loader/video/video_loader_decoder_gpu.cc b/dali/operators/reader/loader/video/video_loader_decoder_gpu.cc index 80fba740f9e..ba57079efc3 100644 --- a/dali/operators/reader/loader/video/video_loader_decoder_gpu.cc +++ b/dali/operators/reader/loader/video/video_loader_decoder_gpu.cc @@ -42,6 +42,7 @@ void VideoLoaderDecoderGpu::InitCudaStream() { { nvml::Init(); static float driver_version = nvml::GetDriverVersion(); + nvml::Shutdown(); if (driver_version > 460 && driver_version < 470.21) { DALI_WARN_ONCE("Warning: Decoding on a default stream. Performance may be affected."); return; diff --git a/dali/operators/reader/video_reader_op_test.cc b/dali/operators/reader/video_reader_op_test.cc index 8ad5728ad39..857189ceb4a 100644 --- a/dali/operators/reader/video_reader_op_test.cc +++ b/dali/operators/reader/video_reader_op_test.cc @@ -125,6 +125,7 @@ TEST_F(VideoReaderTest, MultipleVideoResolution) { #if NVML_ENABLED nvml::Init(); driverVersion = nvml::GetDriverVersion(); + nvml::Shutdown(); #endif @@ -188,10 +189,6 @@ TEST_F(VideoReaderTest, MultipleVideoResolution) { FAIL() << "Unexpected label"; } } - -#if NVML_ENABLED - nvml::Shutdown(); -#endif } TEST_F(VideoReaderTest, PackedBFrames) { diff --git a/dali/operators/sequence/optical_flow/optical_flow.h b/dali/operators/sequence/optical_flow/optical_flow.h index 7c34662fff1..34453feb053 100644 --- a/dali/operators/sequence/optical_flow/optical_flow.h +++ b/dali/operators/sequence/optical_flow/optical_flow.h @@ -78,7 +78,11 @@ class OpticalFlow : public StatelessOperator { #endif } - ~OpticalFlow(); + ~OpticalFlow() { +#if NVML_ENABLED + nvml::Shutdown(); +#endif + } DISABLE_COPY_MOVE_ASSIGN(OpticalFlow); protected: