diff --git a/libcudacxx/include/cuda/std/__bit/countl.h b/libcudacxx/include/cuda/std/__bit/countl.h index 13c29835e7..699be1abfb 100644 --- a/libcudacxx/include/cuda/std/__bit/countl.h +++ b/libcudacxx/include/cuda/std/__bit/countl.h @@ -23,8 +23,8 @@ #include #include +#include #include -#include #include #include #include @@ -32,28 +32,34 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -template +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) + _CCCL_AND(sizeof(_Tp) >= sizeof(uint32_t) _CCCL_AND(sizeof(_Tp) <= sizeof(uint64_t)))) _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __countl_zero(_Tp __t) noexcept { - using _Sp = _If; - auto __clz_result = _CUDA_VSTD::__cccl_clz(static_cast<_Sp>(__t)); - if (!__cccl_default_is_constant_evaluated()) + if (_CUDA_VSTD::is_constant_evaluated() && __t == 0) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return __clz_result;), (return __t ? __clz_result : numeric_limits<_Sp>::digits;)) + return numeric_limits<_Tp>::digits; } - return __t ? __clz_result : numeric_limits<_Sp>::digits; + using _Sp = _If; + auto __clz_result = _CUDA_VSTD::__cccl_clz(static_cast<_Sp>(__t)); + NV_IF_ELSE_TARGET(NV_IS_DEVICE, + (return __clz_result;), // + (return __t ? __clz_result : numeric_limits<_Tp>::digits;)) } -template -_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __countl_zero(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) < sizeof(uint32_t))) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countl_zero(_Tp __t) noexcept { return _CUDA_VSTD::__countl_zero(static_cast(__t)) - (numeric_limits::digits - numeric_limits<_Tp>::digits); } -template -_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)), int> __countl_zero(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) > sizeof(uint64_t))) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countl_zero(_Tp __t) noexcept { constexpr int _Ratio = sizeof(_Tp) / sizeof(uint64_t); struct _Array @@ -71,18 +77,22 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)) return numeric_limits<_Tp>::digits; } -template -_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> -countl_zero(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countl_zero(_Tp __t) noexcept { - return _CUDA_VSTD::__countl_zero(__t); + auto __ret = _CUDA_VSTD::__countl_zero(static_cast<_Tp>(__t)); + _CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits); + return __ret; } -template -_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> -countl_one(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countl_one(_Tp __t) noexcept { - return _CUDA_VSTD::__countl_zero(static_cast<_Tp>(~__t)); + auto __ret = _CUDA_VSTD::__countl_zero(static_cast<_Tp>(~__t)); + _CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits); + return __ret; } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/countr.h b/libcudacxx/include/cuda/std/__bit/countr.h index f6c1bdc4e9..54ccc6731b 100644 --- a/libcudacxx/include/cuda/std/__bit/countr.h +++ b/libcudacxx/include/cuda/std/__bit/countr.h @@ -22,9 +22,9 @@ #endif // no system header #include -#include +#include +#include #include -#include #include #include #include @@ -32,20 +32,24 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD -template -_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __countr_zero(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) <= sizeof(uint64_t))) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countr_zero(_Tp __t) noexcept { - using _Sp = _If; - auto __ctz_result = _CUDA_VSTD::__cccl_ctz(static_cast<_Sp>(__t)); - if (!__cccl_default_is_constant_evaluated()) + if (_CUDA_VSTD::is_constant_evaluated() && __t == 0) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return __ctz_result;), (return __t ? __ctz_result : numeric_limits<_Tp>::digits;)) + return numeric_limits<_Tp>::digits; } - return __t ? __ctz_result : numeric_limits<_Tp>::digits; + using _Sp = _If; + auto __ctz_result = _CUDA_VSTD::__cccl_ctz(static_cast<_Sp>(__t)); + NV_IF_ELSE_TARGET(NV_IS_DEVICE, + (return __ctz_result;), // + (return __t ? __ctz_result : numeric_limits<_Tp>::digits;)) } -template -_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)), int> __countr_zero(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) > sizeof(uint64_t))) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countr_zero(_Tp __t) noexcept { constexpr int _Ratio = sizeof(_Tp) / sizeof(uint64_t); struct _Array @@ -63,18 +67,22 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)) return numeric_limits<_Tp>::digits; } -template -_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> -countr_zero(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept { - return _CUDA_VSTD::__countr_zero(__t); + auto __ret = _CUDA_VSTD::__countr_zero(__t); + _CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits); + return __ret; } -template -_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> -countr_one(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept { - return _CUDA_VSTD::__countr_zero(static_cast<_Tp>(~__t)); + auto __ret = _CUDA_VSTD::__countr_zero(static_cast<_Tp>(~__t)); + _CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits); + return __ret; } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__bit/has_single_bit.h b/libcudacxx/include/cuda/std/__bit/has_single_bit.h index a32c1ae0e8..1a6e3805c6 100644 --- a/libcudacxx/include/cuda/std/__bit/has_single_bit.h +++ b/libcudacxx/include/cuda/std/__bit/has_single_bit.h @@ -22,14 +22,14 @@ #endif // no system header #include -#include +#include #include _LIBCUDACXX_BEGIN_NAMESPACE_STD -template -_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, bool> -has_single_bit(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept { return _CUDA_VSTD::popcount(__t) == 1; } diff --git a/libcudacxx/include/cuda/std/__bit/popcount.h b/libcudacxx/include/cuda/std/__bit/popcount.h index c29b0bd8a0..636106bd9c 100644 --- a/libcudacxx/include/cuda/std/__bit/popcount.h +++ b/libcudacxx/include/cuda/std/__bit/popcount.h @@ -22,22 +22,24 @@ #endif // no system header #include -#include +#include #include #include #include _LIBCUDACXX_BEGIN_NAMESPACE_STD -template -_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t __popcount(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) <= sizeof(uint64_t))) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __popcount(_Tp __t) noexcept { using _Sp = _If; return _CUDA_VSTD::__cccl_popc(static_cast<_Sp>(__t)); } -template -_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)), int> __popcount(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) > sizeof(uint64_t))) +_LIBCUDACXX_HIDE_FROM_ABI constexpr int __popcount(_Tp __t) noexcept { constexpr int _Ratio = sizeof(_Tp) / sizeof(uint64_t); int __count = 0; @@ -49,11 +51,13 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)) return __count; } -template -_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int> -popcount(_Tp __t) noexcept +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { - return _CUDA_VSTD::__popcount(__t); + auto __ret = _CUDA_VSTD::__popcount(__t); + _CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits); + return __ret; } _LIBCUDACXX_END_NAMESPACE_STD