Skip to content

Commit

Permalink
__cccl_ctz/__cccl_clz returns the same value if the input is zero on …
Browse files Browse the repository at this point in the history
…all conditions
  • Loading branch information
fbusato committed Feb 4, 2025
1 parent c8e2dbb commit bc96c20
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 16 deletions.
7 changes: 5 additions & 2 deletions libcudacxx/include/cuda/std/__bit/clz.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,18 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_clz(_Tp __x) noexcept
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
(return sizeof(_Tp) == sizeof(uint32_t) ? __clz(static_cast<uint32_t>(__x)) //
: __clzll(static_cast<uint64_t>(__x));),
(return _CUDA_VSTD::__host_runtime_clz(__x);))
(return __x == 0 ? numeric_limits<_Tp>::digits : _CUDA_VSTD::__host_runtime_clz(__x);))
}

// __cccl_clz returns numeric_limits<_Tp>::digits if __x == 0 on both host and device
template <typename _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_clz(_Tp __x) noexcept
{
static_assert(is_same_v<_Tp, uint32_t> || is_same_v<_Tp, uint64_t>);
#if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED)
return is_constant_evaluated() ? _CUDA_VSTD::__constexpr_clz(__x) : _CUDA_VSTD::__runtime_clz(__x);
return is_constant_evaluated()
? (__x == 0 ? numeric_limits<_Tp>::digits : _CUDA_VSTD::__constexpr_clz(__x))
: _CUDA_VSTD::__runtime_clz(__x);
#else
return _CUDA_VSTD::__constexpr_clz(__x);
#endif
Expand Down
9 changes: 3 additions & 6 deletions libcudacxx/include/cuda/std/__bit/countl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,9 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __countl_zero(_Tp __t) noexcept
{
return numeric_limits<_Tp>::digits;
}
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
auto __clz_result = _CUDA_VSTD::__cccl_clz(static_cast<_Sp>(__t));
__clz_result -= numeric_limits<_Sp>::digits - numeric_limits<_Tp>::digits;
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
(return __clz_result;), // if __t == 0 __clz_result is already equal to numeric_limits<_Tp>::digits
(return __t == 0 ? numeric_limits<_Tp>::digits : __clz_result;))
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
constexpr auto __digits_diff = numeric_limits<_Sp>::digits - numeric_limits<_Tp>::digits;
return _CUDA_VSTD::__cccl_clz(static_cast<_Sp>(__t)) - __digits_diff;
}

_CCCL_TEMPLATE(class _Tp)
Expand Down
8 changes: 2 additions & 6 deletions libcudacxx/include/cuda/std/__bit/countr.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,8 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __countr_zero(_Tp __t) noexcept
{
return numeric_limits<_Tp>::digits;
}
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
auto __ctz_result = _CUDA_VSTD::__cccl_ctz(static_cast<_Sp>(__t));
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
// if __t == 0 __ctz_result is already equal to numeric_limits<_Tp>::digits
(return sizeof(_Tp) < sizeof(uint32_t) && __t == 0 ? numeric_limits<_Tp>::digits : __ctz_result;),
(return __t == 0 ? numeric_limits<_Tp>::digits : __ctz_result;))
using _Sp = _If<sizeof(_Tp) <= sizeof(uint32_t), uint32_t, uint64_t>;
return _CUDA_VSTD::__cccl_ctz(static_cast<_Sp>(__t));
}

_CCCL_TEMPLATE(class _Tp)
Expand Down
7 changes: 5 additions & 2 deletions libcudacxx/include/cuda/std/__bit/ctz.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,18 @@ _LIBCUDACXX_HIDE_FROM_ABI int __runtime_ctz(_Tp __x) noexcept
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
(return sizeof(_Tp) == sizeof(uint32_t) ? __clz(__brev(static_cast<uint32_t>(__x)))
: __clzll(__brevll(static_cast<uint64_t>(__x)));),
(return _CUDA_VSTD::__host_runtime_ctz(__x);))
(return __x == 0 ? numeric_limits<_Tp>::digits : _CUDA_VSTD::__host_runtime_ctz(__x);))
}

// __cccl_clz returns numeric_limits<_Tp>::digits if __x == 0 on both host and device
template <typename _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr int __cccl_ctz(_Tp __x) noexcept
{
static_assert(is_same_v<_Tp, uint32_t> || is_same_v<_Tp, uint64_t>);
#if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED)
return is_constant_evaluated() ? _CUDA_VSTD::__constexpr_ctz(__x) : _CUDA_VSTD::__runtime_ctz(__x);
return is_constant_evaluated()
? (__x == 0 ? numeric_limits<_Tp>::digits : _CUDA_VSTD::__constexpr_ctz(__x))
: _CUDA_VSTD::__runtime_ctz(__x);
#else
NV_IF_ELSE_TARGET(NV_IS_DEVICE, //
(return _CUDA_VSTD::__constexpr_ctz(__x);),
Expand Down

0 comments on commit bc96c20

Please sign in to comment.