Skip to content

Commit

Permalink
Fixes #547, fixes #548, fixes #549, fixes #550: Launch configurtation…
Browse files Browse the repository at this point in the history
… validation code revamp

* Validation code is now consolidated in `launch_config.hpp` and the multi-pwrapper implementations file
* Now performing more validation at more opportunities before kernel launch
* Now ensuring the launch config dimensions don't result in 0-volumne
* Now using `get_attribute()` for the device-level maximum shared memory per block, rather than getting all properties
* Now performing a comparison in the proper direction within `detail_::validate_block_dimension_compatibility()`
  • Loading branch information
eyalroz committed Oct 7, 2023
1 parent ef07b10 commit e51f7a7
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 98 deletions.
17 changes: 17 additions & 0 deletions examples/by_api_module/error_handling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,23 @@ int main(int, char **)
die_("An error was outstanding, despite our not having committed any 'sticky' errors)");
}

cuda::device::current::set(cuda::device::get(0));
auto device = cuda::device::current::get();

bool got_expected_exception = false;
try {
cuda::launch_configuration_t lc = cuda::launch_config_builder()
.overall_size(2048)
.block_dimensions(15000) // Note: higher than the possible maximum for know CUDA devices
.build();
(void) lc;
} catch (::std::invalid_argument& ex) {
got_expected_exception = true;
}
if (not got_expected_exception) {
die_("Should have gotten an ::std::invalid_argument exception about a launch configuration, but - didn't");
}

std::cout << "SUCCESS\n";
return EXIT_SUCCESS;
}
6 changes: 6 additions & 0 deletions src/cuda/api/error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,18 @@ constexpr inline bool operator!=(const named_t& lhs, const status_t& rhs) noexce
/**
* @brief Determine whether the API call returning the specified status had succeeded
*/
///@{
constexpr bool is_success(status_t status) { return status == static_cast<status_t>(status::success); }
constexpr bool is_success(cudaError_t status) { return static_cast<status_t>(status) == static_cast<status_t>(status::success); }
///@}

/**
* @brief Determine whether the API call returning the specified status had failed
*/
///@{
constexpr bool is_failure(status_t status) { return not is_success(status); }
constexpr bool is_failure(cudaError_t status) { return is_failure(static_cast<status_t>(status)); }
///@}

/**
* Obtain a brief textual explanation for a specified kind of CUDA Runtime API status
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ inline attribute_value_t get_attribute_in_current_context(handle_t handle, attri
* way.
*
* @note The association of a `kernel_t` with an individual device or context is somewhat
* tenuous. That is, the same function could be used with any other compatible device;
* tenuous. That is, the same function could be used with any other validate_block_dimensions_compatibility device;
* However, many/most of the features, attributes and settings are context-specific
* or device-specific.
*
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/kernel_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void enqueue_raw_kernel_launch_in_current_context(
static_assert(::std::is_function<decayed_kf_type>::value or is_function_ptr<decayed_kf_type>::value,
"Only a bona fide function can be launched as a CUDA kernel");
#ifndef NDEBUG
detail_::validate(launch_configuration);
validate(launch_configuration);
#endif
if (launch_configuration.block_cooperation == thread_blocks_may_not_cooperate) {
// regular plain vanilla launch
Expand Down
133 changes: 46 additions & 87 deletions src/cuda/api/launch_config_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ inline dimensions_t div_rounding_up(overall_dimensions_t overall_dims, block_dim

} // namespace grid

namespace detail_ {

static void validate_all_dimension_compatibility(
grid::block_dimensions_t block,
grid::dimensions_t grid,
grid::overall_dimensions_t overall)
{
if (grid * block != overall) {
throw ::std::invalid_argument("specified block, grid and overall dimensions do not agree");
}
}

} // namespace detail_

class launch_config_builder_t {
public:
void resolve_dimensions() {
Expand Down Expand Up @@ -194,132 +208,76 @@ class launch_config_builder_t {

launch_config_builder_t& operator=(launch_configuration_t config)
{
thread_block_cooperation = config.block_cooperation;
dynamic_shared_memory_size_ = config.dynamic_shared_memory_size;
#ifndef NDEBUG
block_dims_acceptable_to_kernel_or_device(config.dimensions.block);
detail_::validate(config);
if (kernel_) { detail_::validate_compatibility(*kernel_, config); }
if (device_) { detail_::validate_compatibility(device(), config); }
#endif
thread_block_cooperation = config.block_cooperation;
dynamic_shared_memory_size_ = config.dynamic_shared_memory_size;
dimensions(config.dimensions);
return *this;
}

#ifndef NDEBUG
static void compatible(
static void validate_compatibility(
const kernel_t* kernel_ptr,
memory::shared::size_t shared_mem_size)
{
if (kernel_ptr == nullptr) { return; }
if (shared_mem_size == 0) { return; }
auto max_shared = kernel_ptr->get_maximum_dynamic_shared_memory_per_block();
if (shared_mem_size > max_shared) {
throw ::std::invalid_argument("Requested dynamic shared memory size "
+ ::std::to_string(shared_mem_size) + " exceeds kernel's maximum allowed value of "
+ ::std::to_string(max_shared));
}
detail_::validate_compatibility(*kernel_ptr, shared_mem_size);
}

static void compatible(
static void validate_compatibility(
optional<device::id_t> maybe_device_id,
memory::shared::size_t shared_mem_size)
{
if (not maybe_device_id) { return; }
if (shared_mem_size == 0) { return; }
auto max_shared = device(maybe_device_id).properties().max_shared_memory_per_block();
if (shared_mem_size > max_shared) {
throw ::std::invalid_argument(
"Requested dynamic shared memory size " + ::std::to_string(shared_mem_size)
+ " exceeds the device maximum of " + ::std::to_string(max_shared));
}
detail_::validate_compatibility(device(maybe_device_id), shared_mem_size);
}

void validate_dynamic_shared_memory_size(memory::shared::size_t size)
{
compatible(kernel_, size);
compatible(device_, size);
validate_compatibility(kernel_, size);
validate_compatibility(device_, size);
}

// Note: This ignores the value of dimensions.grid an dimensions.faltatt
static void compatible(
static void validate_block_dimension_compatibility(
const kernel_t* kernel_ptr,
grid::block_dimensions_t block_dims)
{
if (kernel_ptr == nullptr) { return; }
auto max_block_size = kernel_ptr->maximum_threads_per_block();
auto volume = block_dims.volume();
if (volume > max_block_size) {
throw ::std::invalid_argument(
"specified block dimensions result in blocks of size " + ::std::to_string(volume)
+ ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size)
+ " for " + kernel::detail_::identify(*kernel_ptr));
}
return detail_::validate_block_dimension_compatibility(*kernel_ptr, block_dims);
}

static void compatible(
static void validate_block_dimension_compatibility(
optional<device::id_t> maybe_device_id,
grid::block_dimensions_t block_dims)
{
if (not maybe_device_id) { return; }
auto dev = device(maybe_device_id);
auto max_block_size = dev.maximum_threads_per_block();
auto volume = block_dims.volume();
if (volume > max_block_size) {
throw ::std::invalid_argument(
"specified block dimensions result in blocks of size " + ::std::to_string(volume)
+ ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size)
+ " for " + device::detail_::identify(dev.id()));
}
auto dim_maxima = grid::block_dimensions_t{
static_cast<grid::block_dimension_t>(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)),
static_cast<grid::block_dimension_t>(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)),
static_cast<grid::block_dimension_t>(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z))
};
auto check =
[dev](grid::block_dimension_t dim, grid::block_dimension_t max, const char* axis) {
if (max > dim) {
throw ::std::invalid_argument(
::std::string("specified block ") + axis + "-axis dimension " + ::std::to_string(dim)
+ " exceeds the maximum supported " + axis + " dimension of " + ::std::to_string(max)
+ " for " + device::detail_::identify(dev.id()));
}
};
check(block_dims.x, dim_maxima.x, "X");
check(block_dims.y, dim_maxima.y, "Y");
check(block_dims.z, dim_maxima.z, "Z");
}

void block_dims_acceptable_to_kernel_or_device(grid::block_dimensions_t block_dims) const
{
compatible(kernel_, block_dims);
compatible(device_, block_dims);
}

static void dimensions_compatible(
grid::block_dimensions_t block,
grid::dimensions_t grid,
grid::overall_dimensions_t overall)
{
if (grid * block != overall) {
throw ::std::invalid_argument("specified block, grid and overall dimensions do not agree");
}
detail_::validate_block_dimension_compatibility(device(maybe_device_id), block_dims);
}

void validate_block_dimensions(grid::block_dimensions_t block_dims) const
{
detail_::validate_block_dimensions(block_dims);
if (dimensions_.grid and dimensions_.overall) {
dimensions_compatible(block_dims, dimensions_.grid.value(), dimensions_.overall.value());
detail_::validate_all_dimension_compatibility(
block_dims, dimensions_.grid.value(), dimensions_.overall.value());
}
block_dims_acceptable_to_kernel_or_device(block_dims);
// TODO: Check divisibility
validate_block_dimension_compatibility(kernel_, block_dims);
validate_block_dimension_compatibility(device_, block_dims);
}

void validate_grid_dimensions(grid::dimensions_t grid_dims) const
{
detail_::validate_grid_dimensions(grid_dims);
if (dimensions_.block and dimensions_.overall) {
if (grid_dims * dimensions_.block.value() != dimensions_.overall.value()) {
throw ::std::invalid_argument(
"specified grid dimensions conflict with the already-specified "
"block and overall dimensions");
}
detail_::validate_all_dimension_compatibility(
dimensions_.block.value(), grid_dims, dimensions_.overall.value());
}
// TODO: Check divisibility
}

void validate_overall_dimensions(grid::overall_dimensions_t overall_dims) const
Expand All @@ -339,9 +297,9 @@ class launch_config_builder_t {
auto block_dims = dimensions_.block ?
dimensions_.block.value() :
get_composite_dimensions().block;
compatible(kernel_ptr, block_dims);
validate_block_dimension_compatibility(kernel_ptr, block_dims);
}
compatible(kernel_ptr, dynamic_shared_memory_size_);
validate_compatibility(kernel_ptr, dynamic_shared_memory_size_);
}

void validate_device(device::id_t device_id) const
Expand All @@ -350,17 +308,18 @@ class launch_config_builder_t {
auto block_dims = dimensions_.block ?
dimensions_.block.value() :
get_composite_dimensions().block;
compatible(device_id, block_dims);
validate_block_dimension_compatibility(device_id, block_dims);
}
compatible(device_id, dynamic_shared_memory_size_);
validate_compatibility(device_id, dynamic_shared_memory_size_);
}

void validate_composite_dimensions(grid::composite_dimensions_t composite_dims) const
{
compatible(kernel_, composite_dims.block);
compatible(device_, composite_dims.block);
validate_block_dimension_compatibility(kernel_, composite_dims.block);
validate_block_dimension_compatibility(device_, composite_dims.block);

// Is there anything to validate regarding the grid dims?
validate_block_dimension_compatibility(device_, composite_dims.grid);
}
#endif // ifndef NDEBUG

Expand Down
60 changes: 51 additions & 9 deletions src/cuda/api/launch_configuration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,33 @@

namespace cuda {

class kernel_t;

namespace detail_ {

inline void validate_block_dimensions(grid::block_dimensions_t block_dims)
{
if (block_dims.volume() == 0) {
throw ::std::invalid_argument("Zero-volume grid-of-blocks dimensions provided");
}
}

inline void validate_grid_dimensions(grid::dimensions_t grid_dims)
{
if (grid_dims.volume() == 0) {
throw ::std::invalid_argument("Zero-volume block dimensions provided");
}
}

// Note: The reason for the verbose name is the identity of the block and grid dimension types
void validate_block_dimension_compatibility(const device_t &device, grid::block_dimensions_t block_dims);
void validate_block_dimension_compatibility(const kernel_t &kernel, grid::block_dimensions_t block_dims);

void validate_compatibility(const kernel_t &kernel, memory::shared::size_t shared_mem_size);
void validate_compatibility(const device_t &device, memory::shared::size_t shared_mem_size);

} // namespace detail_

struct launch_configuration_t {
grid::composite_dimensions_t dimensions {0 , 0 };

Expand Down Expand Up @@ -122,17 +149,32 @@ constexpr bool operator!=(const launch_configuration_t lhs, const launch_configu

namespace detail_ {

// Note: This will not check anything related to the device or the kernel
// with which the launch configuration is to be used
inline void validate(launch_configuration_t launch_config) noexcept(false)
{
if (launch_config.dimensions.grid.volume() == 0) {
throw ::std::invalid_argument("Launch config specifies a zero-volume grid-of-blocks");
}
if (launch_config.dimensions.block.volume() == 0) {
throw ::std::invalid_argument("Launch config specifies a zero-volume block dimensions");
}
// TODO: Consider adding device-specific validations here, like checking for
// block size limits, shared mem size limits etc - by taking an optional device
// as a parameter
validate_block_dimensions(launch_config.dimensions.block);
validate_grid_dimensions(launch_config.dimensions.grid);
}

inline void validate_compatibility(
const device_t& device,
launch_configuration_t launch_config) noexcept(false)
{
validate(launch_config);
validate_block_dimension_compatibility(device, launch_config.dimensions.block);
// Uncomment if we actually get such checks
// validate_grid_dimension_compatibility(device, launch_config.dimensions.grid);
}

inline void validate_compatibility(
const kernel_t& kernel,
launch_configuration_t launch_config) noexcept(false)
{
validate(launch_config);
validate_block_dimension_compatibility(kernel, launch_config.dimensions.block);
// Uncomment if we actually get such checks
// validate_grid_dimension_compatibility(kernel, launch_config.dimensions.grid);
}

} // namespace detail_
Expand Down
Loading

0 comments on commit e51f7a7

Please sign in to comment.