Skip to content

Commit

Permalink
Drop CUB APIs with a debug_synchronous parameter (#3330)
Browse files Browse the repository at this point in the history
Fixes: #3329
  • Loading branch information
bernhardmgruber authored Jan 10, 2025
1 parent 1ad31e0 commit 38e3d0d
Show file tree
Hide file tree
Showing 29 changed files with 0 additions and 3,107 deletions.
70 changes: 0 additions & 70 deletions cub/cub/device/device_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -266,24 +266,6 @@ public:
d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename InputIteratorT, typename OutputIteratorT, typename DifferenceOpT, typename NumItemsT = std::uint32_t>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractLeftCopy(
void* d_temp_storage,
std::size_t& temp_storage_bytes,
InputIteratorT d_input,
OutputIteratorT d_output,
NumItemsT num_items,
DifferenceOpT difference_op,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return SubtractLeftCopy(d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Subtracts the left element of each adjacent pair of elements residing within device-accessible memory.
//!
Expand Down Expand Up @@ -397,23 +379,6 @@ public:
d_temp_storage, temp_storage_bytes, d_input, d_input, num_items, difference_op, stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename RandomAccessIteratorT, typename DifferenceOpT, typename NumItemsT = std::uint32_t>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractLeft(
void* d_temp_storage,
std::size_t& temp_storage_bytes,
RandomAccessIteratorT d_input,
NumItemsT num_items,
DifferenceOpT difference_op,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return SubtractLeft(d_temp_storage, temp_storage_bytes, d_input, num_items, difference_op, stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Subtracts the right element of each adjacent pair of elements residing within device-accessible memory.
//!
Expand Down Expand Up @@ -544,24 +509,6 @@ public:
d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename InputIteratorT, typename OutputIteratorT, typename DifferenceOpT, typename NumItemsT = std::uint32_t>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractRightCopy(
void* d_temp_storage,
std::size_t& temp_storage_bytes,
InputIteratorT d_input,
OutputIteratorT d_output,
NumItemsT num_items,
DifferenceOpT difference_op,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return SubtractRightCopy(d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Subtracts the right element of each adjacent pair of elements residing within device-accessible memory.
//!
Expand Down Expand Up @@ -663,23 +610,6 @@ public:
return AdjacentDifference<may_alias, read_left>(
d_temp_storage, temp_storage_bytes, d_input, d_input, num_items, difference_op, stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename RandomAccessIteratorT, typename DifferenceOpT, typename NumItemsT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractRight(
void* d_temp_storage,
std::size_t& temp_storage_bytes,
RandomAccessIteratorT d_input,
NumItemsT num_items,
DifferenceOpT difference_op,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return SubtractRight(d_temp_storage, temp_storage_bytes, d_input, num_items, difference_op, stream);
}
#endif // _CCCL_DOXYGEN_INVOKED
};

CUB_NAMESPACE_END
246 changes: 0 additions & 246 deletions cub/cub/device/device_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -205,35 +205,6 @@ struct DeviceHistogram
stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram,
int num_levels,
LevelT lower_level,
LevelT upper_level,
OffsetT num_samples,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return HistogramEven(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
num_samples,
stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Computes an intensity histogram from a sequence of data samples using equal-width bins.
//!
Expand Down Expand Up @@ -385,39 +356,6 @@ struct DeviceHistogram
stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram,
int num_levels,
LevelT lower_level,
LevelT upper_level,
OffsetT num_row_samples,
OffsetT num_rows,
size_t row_stride_bytes,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return HistogramEven(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
num_row_samples,
num_rows,
row_stride_bytes,
stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using
//! equal-width bins.
Expand Down Expand Up @@ -587,40 +525,6 @@ struct DeviceHistogram
stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <int NUM_CHANNELS,
int NUM_ACTIVE_CHANNELS,
typename SampleIteratorT,
typename CounterT,
typename LevelT,
typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
const int num_levels[NUM_ACTIVE_CHANNELS],
const LevelT lower_level[NUM_ACTIVE_CHANNELS],
const LevelT upper_level[NUM_ACTIVE_CHANNELS],
OffsetT num_pixels,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return MultiHistogramEven(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
num_pixels,
stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Computes per-channel intensity histograms from a sequence of
//! multi-channel "pixel" data samples using equal-width bins.
Expand Down Expand Up @@ -835,44 +739,6 @@ struct DeviceHistogram
is_byte_sample);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <int NUM_CHANNELS,
int NUM_ACTIVE_CHANNELS,
typename SampleIteratorT,
typename CounterT,
typename LevelT,
typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
const int num_levels[NUM_ACTIVE_CHANNELS],
const LevelT lower_level[NUM_ACTIVE_CHANNELS],
const LevelT upper_level[NUM_ACTIVE_CHANNELS],
OffsetT num_row_pixels,
OffsetT num_rows,
size_t row_stride_bytes,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return MultiHistogramEven(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
num_row_pixels,
num_rows,
row_stride_bytes,
stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @} end member group
//! @name Custom bin ranges
//! @{
Expand Down Expand Up @@ -998,26 +864,6 @@ struct DeviceHistogram
stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram,
int num_levels,
const LevelT* d_levels,
OffsetT num_samples,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return HistogramRange(
d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_samples, stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
//!
Expand Down Expand Up @@ -1156,37 +1002,6 @@ struct DeviceHistogram
stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram,
int num_levels,
const LevelT* d_levels,
OffsetT num_row_samples,
OffsetT num_rows,
size_t row_stride_bytes,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return HistogramRange(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
d_levels,
num_row_samples,
num_rows,
row_stride_bytes,
stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples
//! using the specified bin boundary levels.
Expand Down Expand Up @@ -1345,31 +1160,6 @@ struct DeviceHistogram
stream);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <int NUM_CHANNELS,
int NUM_ACTIVE_CHANNELS,
typename SampleIteratorT,
typename CounterT,
typename LevelT,
typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
const int num_levels[NUM_ACTIVE_CHANNELS],
const LevelT* const d_levels[NUM_ACTIVE_CHANNELS],
OffsetT num_pixels,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return MultiHistogramRange(
d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_pixels, stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//! @rst
//! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using
//! the specified bin boundary levels.
Expand Down Expand Up @@ -1573,42 +1363,6 @@ struct DeviceHistogram
is_byte_sample);
}

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <int NUM_CHANNELS,
int NUM_ACTIVE_CHANNELS,
typename SampleIteratorT,
typename CounterT,
typename LevelT,
typename OffsetT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange(
void* d_temp_storage,
size_t& temp_storage_bytes,
SampleIteratorT d_samples,
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
const int num_levels[NUM_ACTIVE_CHANNELS],
const LevelT* const d_levels[NUM_ACTIVE_CHANNELS],
OffsetT num_row_pixels,
OffsetT num_rows,
size_t row_stride_bytes,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return MultiHistogramRange(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
d_levels,
num_row_pixels,
num_rows,
row_stride_bytes,
stream);
}
#endif // _CCCL_DOXYGEN_INVOKED

//@} end member group
};

Expand Down
Loading

0 comments on commit 38e3d0d

Please sign in to comment.