Fixes #547, fixes #548, fixes #549, fixes #550: Launch configurtation…

… validation code revamp * Validation code is now consolidated in `launch_config.hpp` and the multi-pwrapper implementations file * Now performing more validation at more opportunities before kernel launch * Now ensuring the launch config dimensions don't result in 0-volumne * Now using `get_attribute()` for the device-level maximum shared memory per block, rather than getting all properties * Now performing a comparison in the proper direction within `detail_::validate_block_dimension_compatibility()`
eyalroz · Oct 7, 2023 · e51f7a7 · e51f7a7
1 parent ef07b10
commit e51f7a7
Show file tree

Hide file tree

Showing 7 changed files with 217 additions and 98 deletions.
diff --git a/examples/by_api_module/error_handling.cu b/examples/by_api_module/error_handling.cu
@@ -32,6 +32,23 @@ int main(int, char **)
 		die_("An error was outstanding, despite our not having committed any 'sticky' errors)");
 	}
 
+	cuda::device::current::set(cuda::device::get(0));
+	auto device = cuda::device::current::get();
+
+	bool got_expected_exception = false;
+	try {
+		cuda::launch_configuration_t lc = cuda::launch_config_builder()
+			.overall_size(2048)
+			.block_dimensions(15000) // Note: higher than the possible maximum for know CUDA devices
+			.build();
+		(void) lc;
+	} catch (::std::invalid_argument& ex) {
+		got_expected_exception = true;
+	}
+	if (not got_expected_exception) {
+		die_("Should have gotten an ::std::invalid_argument exception about a launch configuration, but - didn't");
+	}
+
 	std::cout << "SUCCESS\n";
 	return EXIT_SUCCESS;
 }
diff --git a/src/cuda/api/error.hpp b/src/cuda/api/error.hpp
@@ -202,12 +202,18 @@ constexpr inline bool operator!=(const named_t& lhs, const status_t& rhs) noexce
 /**
  * @brief Determine whether the API call returning the specified status had succeeded
  */
+///@{
 constexpr bool is_success(status_t status)  { return status == static_cast<status_t>(status::success); }
+constexpr bool is_success(cudaError_t status) { return static_cast<status_t>(status) == static_cast<status_t>(status::success); }
+///@}
 
 /**
  * @brief Determine whether the API call returning the specified status had failed
  */
+///@{
 constexpr bool is_failure(status_t status)  { return not is_success(status); }
+constexpr bool is_failure(cudaError_t status)  { return is_failure(static_cast<status_t>(status)); }
+///@}
 
 /**
  * Obtain a brief textual explanation for a specified kind of CUDA Runtime API status

diff --git a/src/cuda/api/kernel.hpp b/src/cuda/api/kernel.hpp
@@ -97,7 +97,7 @@ inline attribute_value_t get_attribute_in_current_context(handle_t handle, attri
  * way.
  *
  * @note The association of a `kernel_t` with an individual device or context is somewhat
- * tenuous. That is, the same function could be used with any other compatible device;
+ * tenuous. That is, the same function could be used with any other validate_block_dimensions_compatibility device;
  * However, many/most of the features, attributes and settings are context-specific
  * or device-specific.
  *

diff --git a/src/cuda/api/kernel_launch.hpp b/src/cuda/api/kernel_launch.hpp
@@ -151,7 +151,7 @@ void enqueue_raw_kernel_launch_in_current_context(
 	static_assert(::std::is_function<decayed_kf_type>::value or is_function_ptr<decayed_kf_type>::value,
 		"Only a bona fide function can be launched as a CUDA kernel");
 #ifndef NDEBUG
-	detail_::validate(launch_configuration);
+	validate(launch_configuration);
 #endif
 	if (launch_configuration.block_cooperation == thread_blocks_may_not_cooperate) {
 		// regular plain vanilla launch

diff --git a/src/cuda/api/launch_config_builder.hpp b/src/cuda/api/launch_config_builder.hpp
@@ -44,6 +44,20 @@ inline dimensions_t div_rounding_up(overall_dimensions_t overall_dims, block_dim
 
 } // namespace grid
 
+namespace detail_ {
+
+static void validate_all_dimension_compatibility(
+	grid::block_dimensions_t   block,
+	grid::dimensions_t         grid,
+	grid::overall_dimensions_t overall)
+{
+	if (grid * block != overall) {
+		throw ::std::invalid_argument("specified block, grid and overall dimensions do not agree");
+	}
+}
+
+} // namespace detail_
+
 class launch_config_builder_t {
 public:
 	void resolve_dimensions()  {
@@ -194,132 +208,76 @@ class launch_config_builder_t {
 
 	launch_config_builder_t& operator=(launch_configuration_t config)
 	{
-		thread_block_cooperation = config.block_cooperation;
-		dynamic_shared_memory_size_ = config.dynamic_shared_memory_size;
 #ifndef NDEBUG
-		block_dims_acceptable_to_kernel_or_device(config.dimensions.block);
+		detail_::validate(config);
+		if (kernel_) { detail_::validate_compatibility(*kernel_, config); }
+		if (device_) { detail_::validate_compatibility(device(), config); }
 #endif
+		thread_block_cooperation = config.block_cooperation;
+		dynamic_shared_memory_size_ = config.dynamic_shared_memory_size;
 		dimensions(config.dimensions);
 		return *this;
 	}
 
 #ifndef NDEBUG
-	static void compatible(
+	static void validate_compatibility(
 		const kernel_t*         kernel_ptr,
 		memory::shared::size_t  shared_mem_size)
 	{
 		if (kernel_ptr == nullptr) { return; }
-		if (shared_mem_size == 0) { return; }
-		auto max_shared = kernel_ptr->get_maximum_dynamic_shared_memory_per_block();
-		if (shared_mem_size > max_shared) {
-			throw ::std::invalid_argument("Requested dynamic shared memory size "
-				+ ::std::to_string(shared_mem_size) + " exceeds kernel's maximum allowed value of "
-				+ ::std::to_string(max_shared));
-		}
+		detail_::validate_compatibility(*kernel_ptr, shared_mem_size);
 	}
 
-	static void compatible(
+	static void validate_compatibility(
 		optional<device::id_t> maybe_device_id,
 		memory::shared::size_t shared_mem_size)
 	{
 		if (not maybe_device_id) { return; }
-		if (shared_mem_size == 0) { return; }
-		auto max_shared = device(maybe_device_id).properties().max_shared_memory_per_block();
-		if (shared_mem_size > max_shared) {
-			throw ::std::invalid_argument(
-				"Requested dynamic shared memory size " + ::std::to_string(shared_mem_size)
-				+ " exceeds the device maximum of " + ::std::to_string(max_shared));
-		}
+		detail_::validate_compatibility(device(maybe_device_id), shared_mem_size);
 	}
 
 	void validate_dynamic_shared_memory_size(memory::shared::size_t size)
 	{
-		compatible(kernel_, size);
-		compatible(device_, size);
+		validate_compatibility(kernel_, size);
+		validate_compatibility(device_, size);
 	}
 
-	// Note: This ignores the value of dimensions.grid an dimensions.faltatt
-	static void compatible(
+	static void validate_block_dimension_compatibility(
 		const kernel_t*          kernel_ptr,
 		grid::block_dimensions_t block_dims)
 	{
 		if (kernel_ptr == nullptr) { return; }
-		auto max_block_size = kernel_ptr->maximum_threads_per_block();
-		auto volume = block_dims.volume();
-		if (volume > max_block_size) {
-			throw ::std::invalid_argument(
-				"specified block dimensions result in blocks of size " + ::std::to_string(volume)
-				+ ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size)
-				+ " for " + kernel::detail_::identify(*kernel_ptr));
-		}
+		return detail_::validate_block_dimension_compatibility(*kernel_ptr, block_dims);
 	}
 
-	static void compatible(
+	static void validate_block_dimension_compatibility(
 		optional<device::id_t>    maybe_device_id,
 		grid::block_dimensions_t  block_dims)
 	{
 		if (not maybe_device_id) { return; }
-		auto dev = device(maybe_device_id);
-		auto max_block_size = dev.maximum_threads_per_block();
-		auto volume = block_dims.volume();
-		if (volume > max_block_size) {
-			throw ::std::invalid_argument(
-			"specified block dimensions result in blocks of size " + ::std::to_string(volume)
-			+ ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size)
-			+ " for " + device::detail_::identify(dev.id()));
-		}
-		auto dim_maxima  = grid::block_dimensions_t{
-			static_cast<grid::block_dimension_t>(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)),
-			static_cast<grid::block_dimension_t>(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)),
-			static_cast<grid::block_dimension_t>(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z))
-		};
-		auto check =
-			[dev](grid::block_dimension_t dim, grid::block_dimension_t max, const char* axis) {
-				if (max > dim) {
-					throw ::std::invalid_argument(
-						::std::string("specified block ") + axis + "-axis dimension " + ::std::to_string(dim)
-						+ " exceeds the maximum supported " + axis + " dimension of " + ::std::to_string(max)
-						+ " for " + device::detail_::identify(dev.id()));
-				}
-			};
-		check(block_dims.x, dim_maxima.x, "X");
-		check(block_dims.y, dim_maxima.y, "Y");
-		check(block_dims.z, dim_maxima.z, "Z");
-	}
-
-	void block_dims_acceptable_to_kernel_or_device(grid::block_dimensions_t block_dims) const
-	{
-		compatible(kernel_, block_dims);
-		compatible(device_, block_dims);
-	}
-
-	static void dimensions_compatible(
-		grid::block_dimensions_t   block,
-		grid::dimensions_t         grid,
-		grid::overall_dimensions_t overall)
-	{
-		if (grid * block != overall) {
-			throw ::std::invalid_argument("specified block, grid and overall dimensions do not agree");
-		}
+		detail_::validate_block_dimension_compatibility(device(maybe_device_id), block_dims);
 	}
 
 	void validate_block_dimensions(grid::block_dimensions_t block_dims) const
 	{
+		detail_::validate_block_dimensions(block_dims);
 		if (dimensions_.grid and dimensions_.overall) {
-			dimensions_compatible(block_dims, dimensions_.grid.value(), dimensions_.overall.value());
+			detail_::validate_all_dimension_compatibility(
+				block_dims, dimensions_.grid.value(), dimensions_.overall.value());
 		}
-		block_dims_acceptable_to_kernel_or_device(block_dims);
+		// TODO: Check divisibility
+		validate_block_dimension_compatibility(kernel_, block_dims);
+		validate_block_dimension_compatibility(device_, block_dims);
 	}
 
 	void validate_grid_dimensions(grid::dimensions_t grid_dims) const
 	{
+		detail_::validate_grid_dimensions(grid_dims);
 		if (dimensions_.block and dimensions_.overall) {
-			if (grid_dims * dimensions_.block.value() != dimensions_.overall.value()) {
-				throw ::std::invalid_argument(
-				"specified grid dimensions conflict with the already-specified "
-				"block and overall dimensions");
-			}
+			detail_::validate_all_dimension_compatibility(
+				dimensions_.block.value(), grid_dims, dimensions_.overall.value());
 		}
+		// TODO: Check divisibility
 	}
 
 	void validate_overall_dimensions(grid::overall_dimensions_t overall_dims) const
@@ -339,9 +297,9 @@ class launch_config_builder_t {
 			auto block_dims = dimensions_.block ?
 						dimensions_.block.value() :
 						get_composite_dimensions().block;
-			compatible(kernel_ptr, block_dims);
+			validate_block_dimension_compatibility(kernel_ptr, block_dims);
 		}
-		compatible(kernel_ptr, dynamic_shared_memory_size_);
+		validate_compatibility(kernel_ptr, dynamic_shared_memory_size_);
 	}
 
 	void validate_device(device::id_t device_id) const
@@ -350,17 +308,18 @@ class launch_config_builder_t {
 			auto block_dims = dimensions_.block ?
 				dimensions_.block.value() :
 				get_composite_dimensions().block;
-			compatible(device_id, block_dims);
+			validate_block_dimension_compatibility(device_id, block_dims);
 		}
-		compatible(device_id, dynamic_shared_memory_size_);
+		validate_compatibility(device_id, dynamic_shared_memory_size_);
 	}
 
 	void validate_composite_dimensions(grid::composite_dimensions_t composite_dims) const
 	{
-		compatible(kernel_, composite_dims.block);
-		compatible(device_, composite_dims.block);
+		validate_block_dimension_compatibility(kernel_, composite_dims.block);
+		validate_block_dimension_compatibility(device_, composite_dims.block);
 
 		// Is there anything to validate regarding the grid dims?
+		validate_block_dimension_compatibility(device_, composite_dims.grid);
 	}
 #endif // ifndef NDEBUG
 

diff --git a/src/cuda/api/launch_configuration.hpp b/src/cuda/api/launch_configuration.hpp
@@ -19,6 +19,33 @@
 
 namespace cuda {
 
+class kernel_t;
+
+namespace detail_ {
+
+inline void validate_block_dimensions(grid::block_dimensions_t block_dims)
+{
+	if (block_dims.volume() == 0) {
+		throw ::std::invalid_argument("Zero-volume grid-of-blocks dimensions provided");
+	}
+}
+
+inline void validate_grid_dimensions(grid::dimensions_t grid_dims)
+{
+	if (grid_dims.volume() == 0) {
+		throw ::std::invalid_argument("Zero-volume block dimensions provided");
+	}
+}
+
+// Note: The reason for the verbose name is the identity of the block and grid dimension types
+void validate_block_dimension_compatibility(const device_t &device, grid::block_dimensions_t block_dims);
+void validate_block_dimension_compatibility(const kernel_t &kernel, grid::block_dimensions_t block_dims);
+
+void validate_compatibility(const kernel_t &kernel, memory::shared::size_t shared_mem_size);
+void validate_compatibility(const device_t &device, memory::shared::size_t shared_mem_size);
+
+} // namespace detail_
+
 struct launch_configuration_t {
 	grid::composite_dimensions_t dimensions {0 , 0 };
 
@@ -122,17 +149,32 @@ constexpr bool operator!=(const launch_configuration_t lhs, const launch_configu
 
 namespace detail_ {
 
+// Note: This will not check anything related to the device or the kernel
+// with which the launch configuration is to be used
 inline void validate(launch_configuration_t launch_config) noexcept(false)
 {
-	if (launch_config.dimensions.grid.volume() == 0) {
-		throw ::std::invalid_argument("Launch config specifies a zero-volume grid-of-blocks");
-	}
-	if (launch_config.dimensions.block.volume() == 0) {
-		throw ::std::invalid_argument("Launch config specifies a zero-volume block dimensions");
-	}
-	// TODO: Consider adding device-specific validations here, like checking for
-	// block size limits, shared mem size limits etc - by taking an optional device
-	// as a parameter
+	validate_block_dimensions(launch_config.dimensions.block);
+	validate_grid_dimensions(launch_config.dimensions.grid);
+}
+
+inline void validate_compatibility(
+	const device_t& device,
+	launch_configuration_t launch_config) noexcept(false)
+{
+	validate(launch_config);
+	validate_block_dimension_compatibility(device, launch_config.dimensions.block);
+	//  Uncomment if we actually get such checks
+	//	validate_grid_dimension_compatibility(device, launch_config.dimensions.grid);
+}
+
+inline void validate_compatibility(
+	const kernel_t& kernel,
+	launch_configuration_t launch_config) noexcept(false)
+{
+	validate(launch_config);
+	validate_block_dimension_compatibility(kernel, launch_config.dimensions.block);
+	//  Uncomment if we actually get such checks
+	//	validate_grid_dimension_compatibility(kernel, launch_config.dimensions.grid);
 }
 
 } // namespace detail_