Skip to content

Commit

Permalink
add concept-like macros and assumptions
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato committed Jan 9, 2025
1 parent 298d9a1 commit dabacb4
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 51 deletions.
48 changes: 29 additions & 19 deletions libcudacxx/include/cuda/std/__bit/countl.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,43 @@

#include <cuda/std/__bit/bit_cast.h>
#include <cuda/std/__bit/clz.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/conditional.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_constant_evaluated.h>
#include <cuda/std/__type_traits/is_unsigned_integer.h>
#include <cuda/std/cstdint>
#include <cuda/std/limits>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp)
_CCCL_AND(sizeof(_Tp) >= sizeof(uint32_t) _CCCL_AND(sizeof(_Tp) <= sizeof(uint64_t))))
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) == sizeof(uint32_t) || sizeof(_Tp) == sizeof(uint64_t), int>
__countl_zero(_Tp __t) noexcept
{
using _Sp = _If<sizeof(_Tp) == sizeof(uint32_t), uint32_t, uint64_t>;
auto __clz_result = _CUDA_VSTD::__cccl_clz(static_cast<_Sp>(__t));
if (!__cccl_default_is_constant_evaluated())
if (_CUDA_VSTD::is_constant_evaluated() && __t == 0)
{
NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return __clz_result;), (return __t ? __clz_result : numeric_limits<_Sp>::digits;))
return numeric_limits<_Tp>::digits;
}
return __t ? __clz_result : numeric_limits<_Sp>::digits;
using _Sp = _If<sizeof(_Tp) == sizeof(uint32_t), uint32_t, uint64_t>;
auto __clz_result = _CUDA_VSTD::__cccl_clz(static_cast<_Sp>(__t));
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
(return __clz_result;), //
(return __t ? __clz_result : numeric_limits<_Tp>::digits;))
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) < sizeof(uint32_t), int> __countl_zero(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) < sizeof(uint32_t)))
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countl_zero(_Tp __t) noexcept
{
return _CUDA_VSTD::__countl_zero(static_cast<uint32_t>(__t))
- (numeric_limits<uint32_t>::digits - numeric_limits<_Tp>::digits);
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)), int> __countl_zero(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) > sizeof(uint64_t)))
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countl_zero(_Tp __t) noexcept
{
constexpr int _Ratio = sizeof(_Tp) / sizeof(uint64_t);
struct _Array
Expand All @@ -71,18 +77,22 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t))
return numeric_limits<_Tp>::digits;
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countl_zero(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp))
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countl_zero(_Tp __t) noexcept
{
return _CUDA_VSTD::__countl_zero(__t);
auto __ret = _CUDA_VSTD::__countl_zero(static_cast<_Tp>(__t));
_CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits);
return __ret;
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countl_one(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp))
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countl_one(_Tp __t) noexcept
{
return _CUDA_VSTD::__countl_zero(static_cast<_Tp>(~__t));
auto __ret = _CUDA_VSTD::__countl_zero(static_cast<_Tp>(~__t));
_CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits);
return __ret;
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
46 changes: 27 additions & 19 deletions libcudacxx/include/cuda/std/__bit/countr.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,34 @@
#endif // no system header

#include <cuda/std/__bit/bit_cast.h>
#include <cuda/std/__bit/ctz.h>
#include <cuda/std/__bit/clz.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/conditional.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_constant_evaluated.h>
#include <cuda/std/__type_traits/is_unsigned_integer.h>
#include <cuda/std/cstdint>
#include <cuda/std/limits>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) <= sizeof(uint64_t), int> __countr_zero(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) <= sizeof(uint64_t)))
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countr_zero(_Tp __t) noexcept
{
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
auto __ctz_result = _CUDA_VSTD::__cccl_ctz(static_cast<_Sp>(__t));
if (!__cccl_default_is_constant_evaluated())
if (_CUDA_VSTD::is_constant_evaluated() && __t == 0)
{
NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return __ctz_result;), (return __t ? __ctz_result : numeric_limits<_Tp>::digits;))
return numeric_limits<_Tp>::digits;
}
return __t ? __ctz_result : numeric_limits<_Tp>::digits;
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
auto __ctz_result = _CUDA_VSTD::__cccl_ctz(static_cast<_Sp>(__t));
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
(return __ctz_result;), //
(return __t ? __ctz_result : numeric_limits<_Tp>::digits;))
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)), int> __countr_zero(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) > sizeof(uint64_t)))
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __countr_zero(_Tp __t) noexcept
{
constexpr int _Ratio = sizeof(_Tp) / sizeof(uint64_t);
struct _Array
Expand All @@ -63,18 +67,22 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t))
return numeric_limits<_Tp>::digits;
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countr_zero(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp))
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept
{
return _CUDA_VSTD::__countr_zero(__t);
auto __ret = _CUDA_VSTD::__countr_zero(__t);
_CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits);
return __ret;
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
countr_one(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp))
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept
{
return _CUDA_VSTD::__countr_zero(static_cast<_Tp>(~__t));
auto __ret = _CUDA_VSTD::__countr_zero(static_cast<_Tp>(~__t));
_CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits);
return __ret;
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down
8 changes: 4 additions & 4 deletions libcudacxx/include/cuda/std/__bit/has_single_bit.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
#endif // no system header

#include <cuda/std/__bit/popcount.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/is_unsigned_integer.h>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, bool>
has_single_bit(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp))
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept
{
return _CUDA_VSTD::popcount(__t) == 1;
}
Expand Down
22 changes: 13 additions & 9 deletions libcudacxx/include/cuda/std/__bit/popcount.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,24 @@
#endif // no system header

#include <cuda/std/__bit/popc.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/is_unsigned_integer.h>
#include <cuda/std/cstdint>
#include <cuda/std/limits>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<sizeof(_Tp) <= sizeof(uint64_t), int> __popcount(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) <= sizeof(uint64_t)))
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __popcount(_Tp __t) noexcept
{
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
return _CUDA_VSTD::__cccl_popc(static_cast<_Sp>(__t));
}

template <class _Tp>
_LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t)), int> __popcount(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp) _CCCL_AND(sizeof(_Tp) > sizeof(uint64_t)))
_LIBCUDACXX_HIDE_FROM_ABI constexpr int __popcount(_Tp __t) noexcept
{
constexpr int _Ratio = sizeof(_Tp) / sizeof(uint64_t);
int __count = 0;
Expand All @@ -49,11 +51,13 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<(sizeof(_Tp) > sizeof(uint64_t))
return __count;
}

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr enable_if_t<__cccl_is_unsigned_integer<_Tp>::value, int>
popcount(_Tp __t) noexcept
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_unsigned_integer, _Tp))
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept
{
return _CUDA_VSTD::__popcount(__t);
auto __ret = _CUDA_VSTD::__popcount(__t);
_CCCL_BUILTIN_ASSUME(__ret >= 0 && __ret <= numeric_limits<_Tp>::digits);
return __ret;
}

_LIBCUDACXX_END_NAMESPACE_STD
Expand Down

0 comments on commit dabacb4

Please sign in to comment.