diff --git a/cub/cub/device/device_adjacent_difference.cuh b/cub/cub/device/device_adjacent_difference.cuh index a63ff9111e1..1af5f01f033 100644 --- a/cub/cub/device/device_adjacent_difference.cuh +++ b/cub/cub/device/device_adjacent_difference.cuh @@ -266,24 +266,6 @@ public: d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractLeftCopy( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - InputIteratorT d_input, - OutputIteratorT d_output, - NumItemsT num_items, - DifferenceOpT difference_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SubtractLeftCopy(d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Subtracts the left element of each adjacent pair of elements residing within device-accessible memory. //! @@ -397,23 +379,6 @@ public: d_temp_storage, temp_storage_bytes, d_input, d_input, num_items, difference_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractLeft( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - RandomAccessIteratorT d_input, - NumItemsT num_items, - DifferenceOpT difference_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SubtractLeft(d_temp_storage, temp_storage_bytes, d_input, num_items, difference_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Subtracts the right element of each adjacent pair of elements residing within device-accessible memory. //! @@ -544,24 +509,6 @@ public: d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractRightCopy( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - InputIteratorT d_input, - OutputIteratorT d_output, - NumItemsT num_items, - DifferenceOpT difference_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SubtractRightCopy(d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Subtracts the right element of each adjacent pair of elements residing within device-accessible memory. //! @@ -663,23 +610,6 @@ public: return AdjacentDifference( d_temp_storage, temp_storage_bytes, d_input, d_input, num_items, difference_op, stream); } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED static CUB_RUNTIME_FUNCTION cudaError_t SubtractRight( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - RandomAccessIteratorT d_input, - NumItemsT num_items, - DifferenceOpT difference_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SubtractRight(d_temp_storage, temp_storage_bytes, d_input, num_items, difference_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/device_histogram.cuh b/cub/cub/device/device_histogram.cuh index cd3b922028a..b8a92334047 100644 --- a/cub/cub/device/device_histogram.cuh +++ b/cub/cub/device/device_histogram.cuh @@ -205,35 +205,6 @@ struct DeviceHistogram stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram, - int num_levels, - LevelT lower_level, - LevelT upper_level, - OffsetT num_samples, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return HistogramEven( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_histogram, - num_levels, - lower_level, - upper_level, - num_samples, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes an intensity histogram from a sequence of data samples using equal-width bins. //! @@ -385,39 +356,6 @@ struct DeviceHistogram stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram, - int num_levels, - LevelT lower_level, - LevelT upper_level, - OffsetT num_row_samples, - OffsetT num_rows, - size_t row_stride_bytes, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return HistogramEven( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_histogram, - num_levels, - lower_level, - upper_level, - num_row_samples, - num_rows, - row_stride_bytes, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using //! equal-width bins. @@ -587,40 +525,6 @@ struct DeviceHistogram stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram[NUM_ACTIVE_CHANNELS], - const int num_levels[NUM_ACTIVE_CHANNELS], - const LevelT lower_level[NUM_ACTIVE_CHANNELS], - const LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_pixels, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return MultiHistogramEven( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_histogram, - num_levels, - lower_level, - upper_level, - num_pixels, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes per-channel intensity histograms from a sequence of //! multi-channel "pixel" data samples using equal-width bins. @@ -835,44 +739,6 @@ struct DeviceHistogram is_byte_sample); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram[NUM_ACTIVE_CHANNELS], - const int num_levels[NUM_ACTIVE_CHANNELS], - const LevelT lower_level[NUM_ACTIVE_CHANNELS], - const LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - size_t row_stride_bytes, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return MultiHistogramEven( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_histogram, - num_levels, - lower_level, - upper_level, - num_row_pixels, - num_rows, - row_stride_bytes, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group //! @name Custom bin ranges //! @{ @@ -998,26 +864,6 @@ struct DeviceHistogram stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram, - int num_levels, - const LevelT* d_levels, - OffsetT num_samples, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return HistogramRange( - d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_samples, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. //! @@ -1156,37 +1002,6 @@ struct DeviceHistogram stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram, - int num_levels, - const LevelT* d_levels, - OffsetT num_row_samples, - OffsetT num_rows, - size_t row_stride_bytes, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return HistogramRange( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_histogram, - num_levels, - d_levels, - num_row_samples, - num_rows, - row_stride_bytes, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples //! using the specified bin boundary levels. @@ -1345,31 +1160,6 @@ struct DeviceHistogram stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram[NUM_ACTIVE_CHANNELS], - const int num_levels[NUM_ACTIVE_CHANNELS], - const LevelT* const d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_pixels, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return MultiHistogramRange( - d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_pixels, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using //! the specified bin boundary levels. @@ -1573,42 +1363,6 @@ struct DeviceHistogram is_byte_sample); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_histogram[NUM_ACTIVE_CHANNELS], - const int num_levels[NUM_ACTIVE_CHANNELS], - const LevelT* const d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - size_t row_stride_bytes, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return MultiHistogramRange( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_histogram, - num_levels, - d_levels, - num_row_pixels, - num_rows, - row_stride_bytes, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //@} end member group }; diff --git a/cub/cub/device/device_merge_sort.cuh b/cub/cub/device/device_merge_sort.cuh index d42f6033a7e..9fd714746d4 100644 --- a/cub/cub/device/device_merge_sort.cuh +++ b/cub/cub/device/device_merge_sort.cuh @@ -245,25 +245,6 @@ public: return SortPairsNoNVTX(d_temp_storage, temp_storage_bytes, d_keys, d_items, num_items, compare_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyIteratorT d_keys, - ValueIteratorT d_items, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, temp_storage_bytes, d_keys, d_items, num_items, compare_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * @brief Sorts items using a merge sorting method. * @@ -410,40 +391,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsCopy( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyInputIteratorT d_input_keys, - ValueInputIteratorT d_input_items, - KeyIteratorT d_output_keys, - ValueIteratorT d_output_items, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsCopy( - d_temp_storage, - temp_storage_bytes, - d_input_keys, - d_input_items, - d_output_keys, - d_output_items, - num_items, - compare_op, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -569,24 +516,6 @@ public: return SortKeysNoNVTX(d_temp_storage, temp_storage_bytes, d_keys, num_items, compare_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyIteratorT d_keys, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys( - d_temp_storage, temp_storage_bytes, d_keys, num_items, compare_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -728,25 +657,6 @@ public: d_temp_storage, temp_storage_bytes, d_input_keys, d_output_keys, num_items, compare_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysCopy( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyInputIteratorT d_input_keys, - KeyIteratorT d_output_keys, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysCopy( - d_temp_storage, temp_storage_bytes, d_input_keys, d_output_keys, num_items, compare_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * @brief Sorts items using a merge sorting method. * @@ -856,25 +766,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_items, num_items, compare_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortPairs( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyIteratorT d_keys, - ValueIteratorT d_items, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortPairs( - d_temp_storage, temp_storage_bytes, d_keys, d_items, num_items, compare_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * @brief Sorts items using a merge sorting method. * @@ -975,24 +866,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, num_items, compare_op, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeys( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyIteratorT d_keys, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortKeys( - d_temp_storage, temp_storage_bytes, d_keys, num_items, compare_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * @brief Sorts items using a merge sorting method. * diff --git a/cub/cub/device/device_partition.cuh b/cub/cub/device/device_partition.cuh index 1b9eef947fa..768d8413e6f 100644 --- a/cub/cub/device/device_partition.cuh +++ b/cub/cub/device/device_partition.cuh @@ -222,30 +222,6 @@ struct DevicePartition stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Flagged( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - FlagIterator d_flags, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Flagged( - d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Uses the ``select_op`` functor to split the corresponding items from ``d_in`` into //! a partitioned sequence ``d_out``. The total number of items copied into the first partition is written @@ -404,30 +380,6 @@ struct DevicePartition stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t - If(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - NumItemsT num_items, - SelectOp select_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return If( - d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t - If(void* d_temp_storage, - std::size_t& temp_storage_bytes, - InputIteratorT d_in, - FirstOutputIteratorT d_first_part_out, - SecondOutputIteratorT d_second_part_out, - UnselectedOutputIteratorT d_unselected_out, - NumSelectedIteratorT d_num_selected_out, - NumItemsT num_items, - SelectFirstPartOp select_first_part_op, - SelectSecondPartOp select_second_part_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return If( - d_temp_storage, - temp_storage_bytes, - d_in, - d_first_part_out, - d_second_part_out, - d_unselected_out, - d_num_selected_out, - num_items, - select_first_part_op, - select_second_part_op, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/device_radix_sort.cuh b/cub/cub/device/device_radix_sort.cuh index 32156b75e34..25798297682 100644 --- a/cub/cub/device/device_radix_sort.cuh +++ b/cub/cub/device/device_radix_sort.cuh @@ -362,37 +362,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - begin_bit, - end_bit, - stream); - } -#endif - //! @rst //! Sorts key-value pairs into ascending order using :math:`\approx 2N` auxiliary storage. //! @@ -817,26 +786,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, stream); - } -#endif - //! @rst //! Sorts key-value pairs into ascending order using :math:`\approx N` auxiliary storage. //! @@ -1251,37 +1200,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - begin_bit, - end_bit, - stream); - } -#endif - //! @rst //! Sorts key-value pairs into descending order using :math:`\approx 2N` auxiliary storage. //! @@ -1705,26 +1623,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsDescending( - d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, stream); - } -#endif - //! @rst //! Sorts key-value pairs into descending order using :math:`\approx N` auxiliary storage. //! @@ -2411,26 +2309,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys( - d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items, begin_bit, end_bit, stream); - } -#endif - //! @brief Sorts keys into ascending order. (`~N` auxiliary storage required). //! //! @par @@ -2551,24 +2429,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items, begin_bit, end_bit, stream); - } -#endif - //! @rst //! Sorts keys into ascending order using :math:`\approx N` auxiliary storage. //! @@ -2944,26 +2804,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysDescending( - d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items, begin_bit, end_bit, stream); - } -#endif - //! @rst //! Sorts keys into descending order using :math:`\approx 2N` auxiliary storage. //! @@ -3344,25 +3184,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - NumItemsT num_items, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysDescending( - d_temp_storage, temp_storage_bytes, d_keys, num_items, begin_bit, end_bit, stream); - } -#endif - //! @rst //! Sorts keys into descending order using :math:`\approx N` auxiliary storage. //! diff --git a/cub/cub/device/device_reduce.cuh b/cub/cub/device/device_reduce.cuh index 0841662261d..a5c3de4a313 100644 --- a/cub/cub/device/device_reduce.cuh +++ b/cub/cub/device/device_reduce.cuh @@ -227,26 +227,6 @@ struct DeviceReduce d_temp_storage, temp_storage_bytes, d_in, d_out, static_cast(num_items), reduction_op, init, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t Reduce( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - ReductionOpT reduction_op, - T init, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Reduce( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide sum using the addition (``+``) operator. //! @@ -352,23 +332,6 @@ struct DeviceReduce stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t - Sum(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide minimum using the less-than (``<``) operator. //! @@ -478,23 +441,6 @@ struct DeviceReduce stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t - Min(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Finds the first device-wide minimum using the less-than (``<``) operator and also returns the index of that item. //! @@ -754,23 +700,6 @@ struct DeviceReduce d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_items, cub::ArgMin(), initial_value, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ArgMin( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ::cuda::std::int64_t num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide maximum using the greater-than (``>``) operator. //! @@ -878,23 +807,6 @@ struct DeviceReduce stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t - Max(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Max(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Finds the first device-wide maximum using the greater-than (``>``) operator and also returns the index of that //! item. @@ -1158,23 +1070,6 @@ struct DeviceReduce d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_items, cub::ArgMax(), initial_value, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ArgMax( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ::cuda::std::int64_t num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Fuses transform and reduce operations //! @@ -1498,47 +1393,6 @@ struct DeviceReduce static_cast(num_items), stream); } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t ReduceByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - UniqueOutputIteratorT d_unique_out, - ValuesInputIteratorT d_values_in, - AggregatesOutputIteratorT d_aggregates_out, - NumRunsOutputIteratorT d_num_runs_out, - ReductionOpT reduction_op, - int num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ReduceByKey( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_unique_out, - d_values_in, - d_aggregates_out, - d_num_runs_out, - reduction_op, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/device_run_length_encode.cuh b/cub/cub/device/device_run_length_encode.cuh index 73512b2296f..751cdd46424 100644 --- a/cub/cub/device/device_run_length_encode.cuh +++ b/cub/cub/device/device_run_length_encode.cuh @@ -230,29 +230,6 @@ struct DeviceRunLengthEncode stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Encode( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - UniqueOutputIteratorT d_unique_out, - LengthsOutputIteratorT d_counts_out, - NumRunsOutputIteratorT d_num_runs_out, - int num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Encode( - d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Enumerates the starting offsets and lengths of all non-trivial runs //! (of ``length > 1``) of same-valued keys in the sequence ``d_in``. @@ -384,30 +361,6 @@ struct DeviceRunLengthEncode num_items, stream); } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t - NonTrivialRuns( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OffsetsOutputIteratorT d_offsets_out, - LengthsOutputIteratorT d_lengths_out, - NumRunsOutputIteratorT d_num_runs_out, - int num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return NonTrivialRuns( - d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/device_scan.cuh b/cub/cub/device/device_scan.cuh index 1c06c83af66..0c1638bd955 100644 --- a/cub/cub/device/device_scan.cuh +++ b/cub/cub/device/device_scan.cuh @@ -207,24 +207,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveSum( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveSum( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix sum in-place. //! The value of ``0`` is applied as the initial value, and is assigned to ``*d_data``. @@ -301,22 +283,6 @@ struct DeviceScan return ExclusiveSum(d_temp_storage, temp_storage_bytes, d_data, d_data, num_items, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveSum( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveSum(d_temp_storage, temp_storage_bytes, d_data, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix scan using the specified //! binary ``scan_op`` functor. The ``init_value`` value is applied as @@ -449,26 +415,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - InitValueT init_value, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveScan( - d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix scan using the specified //! binary ``scan_op`` functor. The ``init_value`` value is applied as @@ -578,25 +524,6 @@ struct DeviceScan return ExclusiveScan(d_temp_storage, temp_storage_bytes, d_data, d_data, scan_op, init_value, num_items, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - ScanOpT scan_op, - InitValueT init_value, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveScan( - d_temp_storage, temp_storage_bytes, d_data, scan_op, init_value, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix scan using the specified //! binary ``scan_op`` functor. The ``init_value`` value is provided as a future value. @@ -738,31 +665,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - FutureValue init_value, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveScan( - d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix scan using the specified binary ``scan_op`` functor. //! The ``init_value`` value is provided as a future value. @@ -879,29 +781,6 @@ struct DeviceScan return ExclusiveScan(d_temp_storage, temp_storage_bytes, d_data, d_data, scan_op, init_value, num_items, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - ScanOpT scan_op, - FutureValue init_value, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveScan( - d_temp_storage, temp_storage_bytes, d_data, scan_op, init_value, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group //! @name Inclusive scans //! @{ @@ -1002,24 +881,6 @@ struct DeviceScan d_temp_storage, temp_storage_bytes, d_in, d_out, ::cuda::std::plus<>{}, NullType{}, num_items, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t InclusiveSum( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return InclusiveSum( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide inclusive prefix sum in-place. //! @@ -1095,22 +956,6 @@ struct DeviceScan return InclusiveSum(d_temp_storage, temp_storage_bytes, d_data, d_data, num_items, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t InclusiveSum( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return InclusiveSum(d_temp_storage, temp_storage_bytes, d_data, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide inclusive prefix scan using the specified binary ``scan_op`` functor. //! @@ -1332,25 +1177,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t InclusiveScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return InclusiveScan( - d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide inclusive prefix scan using the specified binary ``scan_op`` functor. //! @@ -1450,23 +1276,6 @@ struct DeviceScan return InclusiveScan(d_temp_storage, temp_storage_bytes, d_data, d_data, scan_op, num_items, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t InclusiveScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - ScanOpT scan_op, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return InclusiveScan(d_temp_storage, temp_storage_bytes, d_data, scan_op, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix sum-by-key with key equality //! defined by ``equality_op``. The value of ``0`` is applied as the initial @@ -1607,30 +1416,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template , - typename NumItemsT = std::uint32_t> - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveSumByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - ValuesInputIteratorT d_values_in, - ValuesOutputIteratorT d_values_out, - NumItemsT num_items, - EqualityOpT equality_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveSumByKey( - d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, num_items, equality_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide exclusive prefix scan-by-key using the //! specified binary ``scan_op`` functor. The key equality is defined by @@ -1813,48 +1598,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template , - typename NumItemsT = std::uint32_t> - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScanByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - ValuesInputIteratorT d_values_in, - ValuesOutputIteratorT d_values_out, - ScanOpT scan_op, - InitValueT init_value, - NumItemsT num_items, - EqualityOpT equality_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ExclusiveScanByKey( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_values_in, - d_values_out, - scan_op, - init_value, - num_items, - equality_op, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide inclusive prefix sum-by-key with key equality defined by ``equality_op``. //! @@ -1989,30 +1732,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template , - typename NumItemsT = std::uint32_t> - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t InclusiveSumByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - ValuesInputIteratorT d_values_in, - ValuesOutputIteratorT d_values_out, - NumItemsT num_items, - EqualityOpT equality_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return InclusiveSumByKey( - d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, num_items, equality_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide inclusive prefix scan-by-key using the //! specified binary ``scan_op`` functor. The key equality is defined by ``equality_op``. @@ -2179,32 +1898,6 @@ struct DeviceScan stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template , - typename NumItemsT = std::uint32_t> - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t InclusiveScanByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - ValuesInputIteratorT d_values_in, - ValuesOutputIteratorT d_values_out, - ScanOpT scan_op, - NumItemsT num_items, - EqualityOpT equality_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return InclusiveScanByKey( - d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, scan_op, num_items, equality_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group }; diff --git a/cub/cub/device/device_segmented_radix_sort.cuh b/cub/cub/device/device_segmented_radix_sort.cuh index 6bde88ed9da..ae47119bfa3 100644 --- a/cub/cub/device/device_segmented_radix_sort.cuh +++ b/cub/cub/device/device_segmented_radix_sort.cuh @@ -264,43 +264,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into ascending order. (``~N`` auxiliary storage required) //! @@ -475,39 +438,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into descending order. (``~2N`` auxiliary storage required). //! @@ -682,43 +612,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into descending order. (``~N`` auxiliary storage required). //! @@ -897,39 +790,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group //! @name Keys-only //! @{ @@ -1091,39 +951,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into ascending order. (``~N`` auxiliary storage required). //! @@ -1290,37 +1117,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys( - d_temp_storage, - temp_storage_bytes, - d_keys, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into descending order. (``~2N`` auxiliary storage required). //! @@ -1478,39 +1274,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into descending order. (``~N`` auxiliary storage required). //! @@ -1675,37 +1438,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysDescending( - d_temp_storage, - temp_storage_bytes, - d_keys, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group }; diff --git a/cub/cub/device/device_segmented_reduce.cuh b/cub/cub/device/device_segmented_reduce.cuh index 591930ad01c..5eac51ee742 100644 --- a/cub/cub/device/device_segmented_reduce.cuh +++ b/cub/cub/device/device_segmented_reduce.cuh @@ -271,42 +271,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t Reduce( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - ReductionOpT reduction_op, - T initial_value, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Reduce( - d_temp_storage, - temp_storage_bytes, - d_in, - d_out, - num_segments, - d_begin_offsets, - d_end_offsets, - reduction_op, - initial_value, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide segmented sum using the addition (``+``) operator. //! @@ -425,26 +389,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t - Sum(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Sum( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide segmented minimum using the less-than (``<``) operator. //! @@ -571,26 +515,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t - Min(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Min( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Finds the first device-wide minimum in each segment using the //! less-than (``<``) operator, also returning the in-segment index of that item. @@ -741,26 +665,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ArgMin( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ArgMin( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Computes a device-wide segmented maximum using the greater-than (``>``) operator. //! @@ -876,26 +780,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t - Max(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Max( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Finds the first device-wide maximum in each segment using the //! greater-than (``>``) operator, also returning the in-segment index of that item @@ -1048,26 +932,6 @@ public: initial_value, stream); } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t ArgMax( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return ArgMax( - d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/device_segmented_sort.cuh b/cub/cub/device/device_segmented_sort.cuh index 26b55f9988e..1fb5656b82f 100644 --- a/cub/cub/device/device_segmented_sort.cuh +++ b/cub/cub/device/device_segmented_sort.cuh @@ -305,35 +305,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -502,35 +473,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -701,26 +643,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeys( - d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -892,26 +814,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortKeysDescending( - d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into ascending order. Approximately //! ``num_items + 2 * num_segments`` auxiliary storage required. @@ -1048,35 +950,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeys( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortKeys( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into descending order. //! Approximately ``num_items + 2 * num_segments`` auxiliary storage required. @@ -1213,35 +1086,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeysDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortKeysDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into ascending order. //! Approximately ``2 * num_segments`` auxiliary storage required. @@ -1380,26 +1224,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeys( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortKeys( - d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of keys into descending order. //! Approximately ``2 * num_segments`` auxiliary storage required. @@ -1537,26 +1361,6 @@ public: d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeysDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortKeysDescending( - d_temp_storage, temp_storage_bytes, d_keys, num_items, num_segments, d_begin_offsets, d_end_offsets, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -1756,39 +1560,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -1984,39 +1755,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -2212,35 +1950,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: // Internal version without NVTX range template @@ -2435,35 +2144,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return SortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into ascending order. //! Approximately ``2 * num_items + 2 * num_segments`` auxiliary storage required. @@ -2622,39 +2302,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortPairs( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into descending order. //! Approximately ``2 * num_items + 2 * num_segments`` auxiliary storage required. @@ -2813,39 +2460,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortPairsDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - const KeyT* d_keys_in, - KeyT* d_keys_out, - const ValueT* d_values_in, - ValueT* d_values_out, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_keys_out, - d_values_in, - d_values_out, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into ascending order. //! Approximately ``2 * num_segments`` auxiliary storage required. @@ -3010,35 +2624,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortPairs( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortPairs( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Sorts segments of key-value pairs into descending order. //! Approximately ``2 * num_segments`` auxiliary storage required. @@ -3202,35 +2787,6 @@ public: stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t StableSortPairsDescending( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return StableSortPairsDescending( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group }; diff --git a/cub/cub/device/device_select.cuh b/cub/cub/device/device_select.cuh index 72e47cbebfe..7d5099ca7e1 100644 --- a/cub/cub/device/device_select.cuh +++ b/cub/cub/device/device_select.cuh @@ -203,26 +203,6 @@ struct DeviceSelect stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Flagged( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - FlagIterator d_flags, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - ::cuda::std::int64_t num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Flagged( - d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Uses the ``d_flags`` sequence to selectively compact the items in `d_data``. //! The total number of items selected is written to ``d_num_selected_out``. @@ -339,25 +319,6 @@ struct DeviceSelect stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Flagged( - void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - FlagIterator d_flags, - NumSelectedIteratorT d_num_selected_out, - ::cuda::std::int64_t num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Flagged( - d_temp_storage, temp_storage_bytes, d_data, d_flags, d_num_selected_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Uses the ``select_op`` functor to selectively copy items from ``d_in`` into ``d_out``. //! The total number of items selected is written to ``d_num_selected_out``. @@ -497,26 +458,6 @@ struct DeviceSelect stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t - If(void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - ::cuda::std::int64_t num_items, - SelectOp select_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return If( - d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Uses the ``select_op`` functor to selectively compact items in ``d_data``. //! The total number of items selected is written to ``d_num_selected_out``. @@ -647,25 +588,6 @@ struct DeviceSelect stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t - If(void* d_temp_storage, - size_t& temp_storage_bytes, - IteratorT d_data, - NumSelectedIteratorT d_num_selected_out, - ::cuda::std::int64_t num_items, - SelectOp select_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return If( - d_temp_storage, temp_storage_bytes, d_data, d_num_selected_out, num_items, select_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Uses the ``select_op`` functor applied to ``d_flags`` to selectively copy the //! corresponding items from ``d_in`` into ``d_out``. @@ -1010,25 +932,6 @@ struct DeviceSelect stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Unique( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - NumSelectedIteratorT d_num_selected_out, - ::cuda::std::int64_t num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Unique( - d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @rst //! Given an input sequence ``d_keys_in`` and ``d_values_in`` with runs of key-value pairs with consecutive //! equal-valued keys, only the first key and its value from each run is selectively copied @@ -1328,45 +1231,6 @@ struct DeviceSelect ::cuda::std::equal_to<>{}, stream); } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t UniqueByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeyInputIteratorT d_keys_in, - ValueInputIteratorT d_values_in, - KeyOutputIteratorT d_keys_out, - ValueOutputIteratorT d_values_out, - NumSelectedIteratorT d_num_selected_out, - NumItemsT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return UniqueByKey( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_values_in, - d_keys_out, - d_values_out, - d_num_selected_out, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/device_spmv.cuh b/cub/cub/device/device_spmv.cuh index 114454a791a..5a751181842 100644 --- a/cub/cub/device/device_spmv.cuh +++ b/cub/cub/device/device_spmv.cuh @@ -207,39 +207,6 @@ struct DeviceSpmv return DispatchSpmv::Dispatch(d_temp_storage, temp_storage_bytes, spmv_params, stream); } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION static cudaError_t CsrMV( - void* d_temp_storage, - size_t& temp_storage_bytes, - const ValueT* d_values, - const int* d_row_offsets, - const int* d_column_indices, - const ValueT* d_vector_x, - ValueT* d_vector_y, - int num_rows, - int num_cols, - int num_nonzeros, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return CsrMV( - d_temp_storage, - temp_storage_bytes, - d_values, - d_row_offsets, - d_column_indices, - d_vector_x, - d_vector_y, - num_rows, - num_cols, - num_nonzeros, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - //! @} end member group }; diff --git a/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh b/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh index 6908b7d0638..a8c733ef309 100644 --- a/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh +++ b/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh @@ -137,29 +137,6 @@ struct DispatchAdjacentDifference , stream(stream) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CCCL_DEPRECATED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchAdjacentDifference( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - InputIteratorT d_input, - OutputIteratorT d_output, - OffsetT num_items, - DifferenceOpT difference_op, - cudaStream_t stream, - bool debug_synchronous) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_input(d_input) - , d_output(d_output) - , num_items(num_items) - , difference_op(difference_op) - , stream(stream) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - /// Invocation template CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke() @@ -319,24 +296,6 @@ struct DispatchAdjacentDifference return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION static cudaError_t Dispatch( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - InputIteratorT d_input, - OutputIteratorT d_output, - OffsetT num_items, - DifferenceOpT difference_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch(d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_histogram.cuh b/cub/cub/device/dispatch/dispatch_histogram.cuh index b3b52fc8391..900f758cdfb 100644 --- a/cub/cub/device/dispatch/dispatch_histogram.cuh +++ b/cub/cub/device/dispatch/dispatch_histogram.cuh @@ -1031,39 +1031,6 @@ public: return error; } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION static cudaError_t DispatchRange( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], - int num_output_levels[NUM_ACTIVE_CHANNELS], - const LevelT* const d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - OffsetT row_stride_samples, - cudaStream_t stream, - bool debug_synchronous, - Int2Type is_byte_sample) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return DispatchRange( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - d_levels, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - is_byte_sample); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * Dispatch routine for HistogramRange, specialized for 8-bit sample types * (computes 256-bin privatized histograms and then reduces to user-specified levels) @@ -1197,39 +1164,6 @@ public: return error; } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION static cudaError_t DispatchRange( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], - const int num_output_levels[NUM_ACTIVE_CHANNELS], - const LevelT* const d_levels[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - OffsetT row_stride_samples, - cudaStream_t stream, - bool debug_synchronous, - Int2Type is_byte_sample) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return DispatchRange( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - d_levels, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - is_byte_sample); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * Dispatch routine for HistogramEven, specialized for sample types larger than 8-bit * @@ -1415,41 +1349,6 @@ public: return error; } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t DispatchEven( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], - const int num_output_levels[NUM_ACTIVE_CHANNELS], - const LevelT lower_level[NUM_ACTIVE_CHANNELS], - const LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - OffsetT row_stride_samples, - cudaStream_t stream, - bool debug_synchronous, - Int2Type is_byte_sample) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return DispatchEven( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - lower_level, - upper_level, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - is_byte_sample); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * Dispatch routine for HistogramEven, specialized for 8-bit sample types * (computes 256-bin privatized histograms and then reduces to user-specified levels) @@ -1586,41 +1485,6 @@ public: return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t DispatchEven( - void* d_temp_storage, - size_t& temp_storage_bytes, - SampleIteratorT d_samples, - CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], - const int num_output_levels[NUM_ACTIVE_CHANNELS], - const LevelT lower_level[NUM_ACTIVE_CHANNELS], - const LevelT upper_level[NUM_ACTIVE_CHANNELS], - OffsetT num_row_pixels, - OffsetT num_rows, - OffsetT row_stride_samples, - cudaStream_t stream, - bool debug_synchronous, - Int2Type is_byte_sample) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return DispatchEven( - d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - lower_level, - upper_level, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - is_byte_sample); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_merge_sort.cuh b/cub/cub/device/dispatch/dispatch_merge_sort.cuh index 507b7776de6..1d455bdfbf1 100644 --- a/cub/cub/device/dispatch/dispatch_merge_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_merge_sort.cuh @@ -399,33 +399,6 @@ struct DispatchMergeSort , ptx_version(ptx_version) {} - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchMergeSort( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyInputIteratorT d_input_keys, - ValueInputIteratorT d_input_items, - KeyIteratorT d_output_keys, - ValueIteratorT d_output_items, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_input_keys(d_input_keys) - , d_input_items(d_input_items) - , d_output_keys(d_output_keys) - , d_output_items(d_output_items) - , num_items(num_items) - , compare_op(compare_op) - , stream(stream) - , ptx_version(ptx_version) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } - // Invocation template CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke() @@ -682,33 +655,6 @@ struct DispatchMergeSort return error; } - - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - KeyInputIteratorT d_input_keys, - ValueInputIteratorT d_input_items, - KeyIteratorT d_output_keys, - ValueIteratorT d_output_items, - OffsetT num_items, - CompareOpT compare_op, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_input_keys, - d_input_items, - d_output_keys, - d_output_items, - num_items, - compare_op, - stream); - } }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh index ed971c1a739..0d4d9bf1ea9 100644 --- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh @@ -934,33 +934,6 @@ struct DispatchRadixSort , decomposer(decomposer) {} - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchRadixSort( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - OffsetT num_items, - int begin_bit, - int end_bit, - bool is_overwrite_okay, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_keys(d_keys) - , d_values(d_values) - , num_items(num_items) - , begin_bit(begin_bit) - , end_bit(end_bit) - , stream(stream) - , ptx_version(ptx_version) - , is_overwrite_okay(is_overwrite_okay) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } - //------------------------------------------------------------------------------ // Small-problem (single tile) invocation //------------------------------------------------------------------------------ @@ -1872,25 +1845,6 @@ struct DispatchRadixSort return error; } - - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - OffsetT num_items, - int begin_bit, - int end_bit, - bool is_overwrite_okay, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream); - } }; /****************************************************************************** @@ -2027,39 +1981,6 @@ struct DispatchSegmentedRadixSort , decomposer(decomposer) {} - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchSegmentedRadixSort( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - OffsetT num_items, - OffsetT num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - bool is_overwrite_okay, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_keys(d_keys) - , d_values(d_values) - , num_items(num_items) - , num_segments(num_segments) - , d_begin_offsets(d_begin_offsets) - , d_end_offsets(d_end_offsets) - , begin_bit(begin_bit) - , end_bit(end_bit) - , stream(stream) - , ptx_version(ptx_version) - , is_overwrite_okay(is_overwrite_okay) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } - //------------------------------------------------------------------------------ // Multi-segment invocation //------------------------------------------------------------------------------ @@ -2428,39 +2349,6 @@ struct DispatchSegmentedRadixSort return error; } - - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - int num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - int begin_bit, - int end_bit, - bool is_overwrite_okay, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - begin_bit, - end_bit, - is_overwrite_okay, - stream); - } }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh index c9ea3fc1bd2..0cca1e1a982 100644 --- a/cub/cub/device/dispatch/dispatch_reduce.cuh +++ b/cub/cub/device/dispatch/dispatch_reduce.cuh @@ -345,33 +345,6 @@ struct DispatchReduce , launcher_factory(launcher_factory) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchReduce( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - OffsetT num_items, - ReductionOpT reduction_op, - InitT init, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_in(d_in) - , d_out(d_out) - , num_items(num_items) - , reduction_op(reduction_op) - , init(init) - , stream(stream) - , ptx_version(ptx_version) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - //--------------------------------------------------------------------------- // Small-problem (single tile) invocation //--------------------------------------------------------------------------- @@ -689,25 +662,6 @@ struct DispatchReduce return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - OffsetT num_items, - ReductionOpT reduction_op, - InitT init, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; /** @@ -884,37 +838,6 @@ struct DispatchSegmentedReduce , ptx_version(ptx_version) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchSegmentedReduce( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - ReductionOpT reduction_op, - InitT init, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_in(d_in) - , d_out(d_out) - , num_segments(num_segments) - , d_begin_offsets(d_begin_offsets) - , d_end_offsets(d_end_offsets) - , reduction_op(reduction_op) - , init(init) - , stream(stream) - , ptx_version(ptx_version) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - //--------------------------------------------------------------------------- // Chained policy invocation //--------------------------------------------------------------------------- @@ -1109,37 +1032,6 @@ struct DispatchSegmentedReduce return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - ReductionOpT reduction_op, - InitT init, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_in, - d_out, - num_segments, - d_begin_offsets, - d_end_offsets, - reduction_op, - init, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh b/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh index 8449a10ea62..804371588f3 100644 --- a/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh +++ b/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh @@ -541,39 +541,6 @@ struct DispatchReduceByKey return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - UniqueOutputIteratorT d_unique_out, - ValuesInputIteratorT d_values_in, - AggregatesOutputIteratorT d_aggregates_out, - NumRunsOutputIteratorT d_num_runs_out, - EqualityOpT equality_op, - ReductionOpT reduction_op, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_unique_out, - d_values_in, - d_aggregates_out, - d_num_runs_out, - equality_op, - reduction_op, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_rle.cuh b/cub/cub/device/dispatch/dispatch_rle.cuh index 5c8f1e01d0f..b1542462a58 100644 --- a/cub/cub/device/dispatch/dispatch_rle.cuh +++ b/cub/cub/device/dispatch/dispatch_rle.cuh @@ -539,35 +539,6 @@ struct DeviceRleDispatch return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OffsetsOutputIteratorT d_offsets_out, - LengthsOutputIteratorT d_lengths_out, - NumRunsOutputIteratorT d_num_runs_out, - EqualityOpT equality_op, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_in, - d_offsets_out, - d_lengths_out, - d_num_runs_out, - equality_op, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_scan.cuh b/cub/cub/device/dispatch/dispatch_scan.cuh index 676b08df49e..0ba4cc1dcae 100644 --- a/cub/cub/device/dispatch/dispatch_scan.cuh +++ b/cub/cub/device/dispatch/dispatch_scan.cuh @@ -329,33 +329,6 @@ struct DispatchScan , ptx_version(ptx_version) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchScan( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - OffsetT num_items, - ScanOpT scan_op, - InitValueT init_value, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_in(d_in) - , d_out(d_out) - , scan_op(scan_op) - , init_value(init_value) - , num_items(num_items) - , stream(stream) - , ptx_version(ptx_version) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - template CUB_RUNTIME_FUNCTION _CCCL_HOST _CCCL_FORCEINLINE cudaError_t Invoke(InitKernel init_kernel, ScanKernel scan_kernel) { @@ -588,25 +561,6 @@ struct DispatchScan return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - OutputIteratorT d_out, - ScanOpT scan_op, - InitValueT init_value, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch(d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_scan_by_key.cuh b/cub/cub/device/dispatch/dispatch_scan_by_key.cuh index 6f2432874b5..c88656dff48 100644 --- a/cub/cub/device/dispatch/dispatch_scan_by_key.cuh +++ b/cub/cub/device/dispatch/dispatch_scan_by_key.cuh @@ -341,37 +341,6 @@ struct DispatchScanByKey , ptx_version(ptx_version) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchScanByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - ValuesInputIteratorT d_values_in, - ValuesOutputIteratorT d_values_out, - EqualityOp equality_op, - ScanOpT scan_op, - InitValueT init_value, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous, - int ptx_version) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_keys_in(d_keys_in) - , d_values_in(d_values_in) - , d_values_out(d_values_out) - , equality_op(equality_op) - , scan_op(scan_op) - , init_value(init_value) - , num_items(num_items) - , stream(stream) - , ptx_version(ptx_version) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - template CUB_RUNTIME_FUNCTION _CCCL_HOST _CCCL_FORCEINLINE cudaError_t Invoke(InitKernel init_kernel, ScanKernel scan_kernel) { @@ -616,37 +585,6 @@ struct DispatchScanByKey return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeysInputIteratorT d_keys_in, - ValuesInputIteratorT d_values_in, - ValuesOutputIteratorT d_values_out, - EqualityOp equality_op, - ScanOpT scan_op, - InitValueT init_value, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_values_in, - d_values_out, - equality_op, - scan_op, - init_value, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh index 9250ab87f61..9d011d414ba 100644 --- a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh @@ -820,35 +820,6 @@ struct DispatchSegmentedSort , stream(stream) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchSegmentedSort( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - OffsetT num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - bool is_overwrite_okay, - cudaStream_t stream, - bool debug_synchronous) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_keys(d_keys) - , d_values(d_values) - , num_items(num_items) - , num_segments(num_segments) - , d_begin_offsets(d_begin_offsets) - , d_end_offsets(d_end_offsets) - , is_overwrite_okay(is_overwrite_okay) - , stream(stream) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - template CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke() { @@ -1128,37 +1099,6 @@ struct DispatchSegmentedSort return error; } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - DoubleBuffer& d_keys, - DoubleBuffer& d_values, - OffsetT num_items, - int num_segments, - BeginOffsetIteratorT d_begin_offsets, - EndOffsetIteratorT d_end_offsets, - bool is_overwrite_okay, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - num_items, - num_segments, - d_begin_offsets, - d_end_offsets, - is_overwrite_okay, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED - private: CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE int GetNumPasses(int radix_bits) { diff --git a/cub/cub/device/dispatch/dispatch_select_if.cuh b/cub/cub/device/dispatch/dispatch_select_if.cuh index be8b09a5c9a..c41dfb389eb 100644 --- a/cub/cub/device/dispatch/dispatch_select_if.cuh +++ b/cub/cub/device/dispatch/dispatch_select_if.cuh @@ -840,37 +840,6 @@ struct DispatchSelectIf return CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch)); } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - InputIteratorT d_in, - FlagsInputIteratorT d_flags, - SelectedOutputIteratorT d_selected_out, - NumSelectedIteratorT d_num_selected_out, - SelectOpT select_op, - EqualityOpT equality_op, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_in, - d_flags, - d_selected_out, - d_num_selected_out, - select_op, - equality_op, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_spmv_orig.cuh b/cub/cub/device/dispatch/dispatch_spmv_orig.cuh index 2fb435699db..6dc4f44aeca 100644 --- a/cub/cub/device/dispatch/dispatch_spmv_orig.cuh +++ b/cub/cub/device/dispatch/dispatch_spmv_orig.cuh @@ -892,44 +892,6 @@ struct DispatchSpmv return error; } -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - template - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN - _CCCL_FORCEINLINE static cudaError_t - Dispatch(void* d_temp_storage, - size_t& temp_storage_bytes, - SpmvParamsT& spmv_params, - cudaStream_t stream, - bool debug_synchronous, - Spmv1ColKernelT spmv_1col_kernel, - SpmvSearchKernelT spmv_search_kernel, - SpmvKernelT spmv_kernel, - SegmentFixupKernelT segment_fixup_kernel, - SpmvEmptyMatrixKernelT spmv_empty_matrix_kernel, - KernelConfig spmv_config, - KernelConfig segment_fixup_config) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - spmv_params, - stream, - spmv_1col_kernel, - spmv_search_kernel, - spmv_kernel, - segment_fixup_kernel, - spmv_empty_matrix_kernel, - spmv_config, - segment_fixup_config); - } -#endif // _CCCL_DOXYGEN_INVOKED - /** * @brief Internal dispatch routine for computing a device-wide reduction * @@ -988,21 +950,6 @@ struct DispatchSpmv return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - SpmvParamsT& spmv_params, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch(d_temp_storage, temp_storage_bytes, spmv_params, stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh index fc259499b85..2d5566d76a3 100644 --- a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh +++ b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh @@ -551,39 +551,6 @@ struct DispatchThreeWayPartitionIf return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - std::size_t& temp_storage_bytes, - InputIteratorT d_in, - FirstOutputIteratorT d_first_part_out, - SecondOutputIteratorT d_second_part_out, - UnselectedOutputIteratorT d_unselected_out, - NumSelectedIteratorT d_num_selected_out, - SelectFirstPartOp select_first_part_op, - SelectSecondPartOp select_second_part_op, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_in, - d_first_part_out, - d_second_part_out, - d_unselected_out, - d_num_selected_out, - select_first_part_op, - select_second_part_op, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/dispatch_unique_by_key.cuh b/cub/cub/device/dispatch/dispatch_unique_by_key.cuh index 46ac0a44b9b..e07084fe24a 100644 --- a/cub/cub/device/dispatch/dispatch_unique_by_key.cuh +++ b/cub/cub/device/dispatch/dispatch_unique_by_key.cuh @@ -322,35 +322,6 @@ struct DispatchUniqueByKey , stream(stream) {} -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE DispatchUniqueByKey( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeyInputIteratorT d_keys_in, - ValueInputIteratorT d_values_in, - KeyOutputIteratorT d_keys_out, - ValueOutputIteratorT d_values_out, - NumSelectedIteratorT d_num_selected_out, - EqualityOpT equality_op, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - : d_temp_storage(d_temp_storage) - , temp_storage_bytes(temp_storage_bytes) - , d_keys_in(d_keys_in) - , d_values_in(d_values_in) - , d_keys_out(d_keys_out) - , d_values_out(d_values_out) - , d_num_selected_out(d_num_selected_out) - , equality_op(equality_op) - , num_items(num_items) - , stream(stream) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - } -#endif // _CCCL_DOXYGEN_INVOKED - /****************************************************************************** * Dispatch entrypoints ******************************************************************************/ @@ -626,37 +597,6 @@ struct DispatchUniqueByKey return error; } - -#ifndef _CCCL_DOXYGEN_INVOKED // Do not document - CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED - CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch( - void* d_temp_storage, - size_t& temp_storage_bytes, - KeyInputIteratorT d_keys_in, - ValueInputIteratorT d_values_in, - KeyOutputIteratorT d_keys_out, - ValueOutputIteratorT d_values_out, - NumSelectedIteratorT d_num_selected_out, - EqualityOpT equality_op, - OffsetT num_items, - cudaStream_t stream, - bool debug_synchronous) - { - CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG - - return Dispatch( - d_temp_storage, - temp_storage_bytes, - d_keys_in, - d_values_in, - d_keys_out, - d_values_out, - d_num_selected_out, - equality_op, - num_items, - stream); - } -#endif // _CCCL_DOXYGEN_INVOKED }; CUB_NAMESPACE_END diff --git a/cub/cub/util_debug.cuh b/cub/cub/util_debug.cuh index 3971c6a99ca..275c915e8f2 100644 --- a/cub/cub/util_debug.cuh +++ b/cub/cub/util_debug.cuh @@ -309,18 +309,4 @@ inline _CCCL_HOST_DEVICE void va_printf(char const*, Args const&...) # endif #endif -#define CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED \ - CCCL_DEPRECATED_BECAUSE( \ - "CUB no longer accepts `debug_synchronous` parameter. " \ - "Define CUB_DEBUG_SYNC instead, or silence this message with " \ - "CCCL_IGNORE_DEPRECATED_API.") - -#define CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG \ - if (debug_synchronous) \ - { \ - _CubLog("%s\n", \ - "CUB no longer accepts `debug_synchronous` parameter. " \ - "Define CUB_DEBUG_SYNC instead."); \ - } - CUB_NAMESPACE_END diff --git a/docs/repo.toml b/docs/repo.toml index 7e0d3108bbb..e949beb6e7c 100644 --- a/docs/repo.toml +++ b/docs/repo.toml @@ -179,7 +179,6 @@ doxygen_predefined = [ "CCCL_DEPRECATED=", "CUB_STATIC_ASSERT(cond,msg)=", "CUB_RUNTIME_FUNCTION", - "CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED", "CUDASTF_HOST", "CUDASTF_DEVICE", "CUDASTF_HOST_DEVICE"