Warning fixes:

* Corrected some CUDA version include guards in `graph/instance.hpp` which were enabling an unused parameter in CUDA 11.0 - 11.3.x * Explicitly cast a `size_t` to `unsigned int` in `module.hpp`, to avoid narrowing warnings * In the asyncAPI example, now Using iostreams printing rather than a `printf()` with an inexact format specifier (`size_t` vs `unsigned long`) * Now using float literals, rather than double literals, to set float variables or fill float buffers, in the bandwidthTest and jacobiCudaGraphs examples * streamOrderedAllocation example: Explicit cast from `size_t` to `int` to avoid a warning about narrowing graph_unused
eyalroz · Sep 23, 2024 · 310d477 · 310d477
1 parent 0932d68
commit 310d477
Show file tree

Hide file tree

Showing 7 changed files with 9 additions and 9 deletions.
diff --git a/examples/modified_cuda_samples/asyncAPI/asyncAPI.cu b/examples/modified_cuda_samples/asyncAPI/asyncAPI.cu
@@ -34,7 +34,7 @@ bool correct_output(cuda::span<const int> data, const int x)
 	for (size_t i = 0; i < data.size(); i++)
 		if (data[i] != x)
 		{
-			printf("Error! data[%lu] = %d, ref = %d\n", i, data[i], x);
+			std::cout << "Error! data" << i << " = " << data[i] << " ref = " << x << '\n';
 			return false;
 		}
 	return true;

diff --git a/examples/modified_cuda_samples/bandwidthtest/bandwidthtest.cpp b/examples/modified_cuda_samples/bandwidthtest/bandwidthtest.cpp
@@ -105,8 +105,8 @@ int main()
 	cuda::memory::copy(h_aPinned, h_aPageable, bytes);
 	// Note: the following two instructions can be replaced with CUDA API wrappers
 	// calls - cuda::memory::host::zero(), but that won't improve anything
-	std::fill_n(h_bPageable, nElements, 0.0);
-	std::fill_n(h_bPinned, nElements, 0.0);
+	std::fill_n(h_bPageable, nElements, 0.0f);
+	std::fill_n(h_bPinned, nElements, 0.0f);
 
 	std::cout << "\nDevice: " << cuda::device::current::get().name() << "\n";
 	std::cout << "\nTransfer size (MB): " << (bytes / Mi) << "\n";

diff --git a/examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp b/examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp
@@ -149,7 +149,7 @@ int main(int argc, char **argv)
 
 	createLinearSystem(A, b);
 
-	float convergence_threshold = 1.0e-2;
+	float convergence_threshold = 1.0e-2f;
 	int max_num_iterations = 4 * N_ROWS * N_ROWS;
 
 	// create timer

diff --git a/examples/modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu b/examples/modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu
@@ -115,7 +115,7 @@ int basicStreamOrderedAllocation(
 	auto d_c = span<float>(stream.enqueue.allocate(c.size() * sizeof(float)));
 	stream.enqueue.copy(d_a, a);
 	stream.enqueue.copy(d_b, b);
-	stream.enqueue.kernel_launch(vectorAddGPU, launch_config, d_a.data(), d_b.data(), d_c.data(), c.size());
+	stream.enqueue.kernel_launch(vectorAddGPU, launch_config, d_a.data(), d_b.data(), d_c.data(), (int) c.size());
 	stream.enqueue.free(d_a);
 	stream.enqueue.free(d_b);
 	stream.enqueue.copy(c, d_c);

diff --git a/src/cuda/api/graph/instance.hpp b/src/cuda/api/graph/instance.hpp
@@ -446,7 +446,7 @@ void set_node_parameters(
 
 inline instance_t instantiate(
 	const template_t& template_
-#if CUDA_VERSION >= 11000
+#if CUDA_VERSION >= 11040
 	, bool free_previous_allocations_before_relaunch = false
 #endif
 #if CUDA_VERSION >= 12000

diff --git a/src/cuda/api/module.hpp b/src/cuda/api/module.hpp
@@ -67,7 +67,7 @@ inline void destroy(handle_t handle, context::handle_t context_handle, device::i
 inline unique_span<kernel::handle_t> get_kernel_handles(handle_t module_handle, size_t num_kernels)
 {
 	auto result = make_unique_span<kernel::handle_t>(num_kernels);
-	auto status = cuModuleEnumerateFunctions(result.data(), num_kernels, module_handle);
+	auto status = cuModuleEnumerateFunctions(result.data(), (unsigned int) num_kernels, module_handle);
 	throw_if_error_lazy(status, "Failed enumerating the kernels in " + module::detail_::identify(module_handle));
 	return result;
 }

diff --git a/src/cuda/api/multi_wrapper_impls/graph.hpp b/src/cuda/api/multi_wrapper_impls/graph.hpp
@@ -98,7 +98,7 @@ inline void instance_t::upload(const stream_t& stream) const
 #endif // CUDA_VERSION >= 11010
 
 inline instance_t template_t::instantiate(
-#if CUDA_VERSION >= 11000
+#if CUDA_VERSION >= 11040
 	bool free_previous_allocations_before_relaunch
 #endif
 #if CUDA_VERSION >= 11700
@@ -112,7 +112,7 @@ inline instance_t template_t::instantiate(
 {
 	return graph::instantiate(
 		*this
-#if CUDA_VERSION >= 11000
+#if CUDA_VERSION >= 11040
 		, free_previous_allocations_before_relaunch
 #endif
 #if CUDA_VERSION >= 11700