diff --git a/c2h/include/c2h/bfloat16.cuh b/c2h/include/c2h/bfloat16.cuh index b7598562715..795b918b60f 100644 --- a/c2h/include/c2h/bfloat16.cuh +++ b/c2h/include/c2h/bfloat16.cuh @@ -36,6 +36,7 @@ #include +#include #include #include @@ -232,19 +233,36 @@ inline std::ostream& operator<<(std::ostream& out, const __nv_bfloat16& x) * Traits overloads ******************************************************************************/ +_LIBCUDACXX_BEGIN_NAMESPACE_STD template <> -struct CUB_NS_QUALIFIER::FpLimits +struct __is_extended_floating_point : true_type +{}; + +#ifndef _CCCL_NO_VARIABLE_TEMPLATES +template <> +_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v = true; +#endif // _CCCL_NO_VARIABLE_TEMPLATES + +template <> +class __numeric_limits_impl { - static __host__ __device__ __forceinline__ bfloat16_t Max() +public: + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t max() + { + return bfloat16_t(numeric_limits<__nv_bfloat16>::max()); + } + + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t min() { - return bfloat16_t::max(); + return bfloat16_t(numeric_limits<__nv_bfloat16>::min()); } - static __host__ __device__ __forceinline__ bfloat16_t Lowest() + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t lowest() { - return bfloat16_t::lowest(); + return bfloat16_t(numeric_limits<__nv_bfloat16>::lowest()); } }; +_LIBCUDACXX_END_NAMESPACE_STD template <> struct CUB_NS_QUALIFIER::NumericTraits diff --git a/c2h/include/c2h/half.cuh b/c2h/include/c2h/half.cuh index 3e59c0933f3..53255d58227 100644 --- a/c2h/include/c2h/half.cuh +++ b/c2h/include/c2h/half.cuh @@ -37,6 +37,7 @@ #include +#include #include #include @@ -327,19 +328,36 @@ inline std::ostream& operator<<(std::ostream& out, const __half& x) * Traits overloads ******************************************************************************/ +_LIBCUDACXX_BEGIN_NAMESPACE_STD template <> -struct CUB_NS_QUALIFIER::FpLimits +struct __is_extended_floating_point : true_type +{}; + +#ifndef _CCCL_NO_VARIABLE_TEMPLATES +template <> +_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v = true; +#endif // _CCCL_NO_VARIABLE_TEMPLATES + +template <> +class __numeric_limits_impl { - static __host__ __device__ __forceinline__ half_t Max() +public: + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t max() + { + return half_t(numeric_limits<__half>::max()); + } + + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t min() { - return (half_t::max)(); + return half_t(numeric_limits<__half>::min()); } - static __host__ __device__ __forceinline__ half_t Lowest() + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t lowest() { - return half_t::lowest(); + return half_t(numeric_limits<__half>::lowest()); } }; +_LIBCUDACXX_END_NAMESPACE_STD template <> struct CUB_NS_QUALIFIER::NumericTraits diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh index 726445ffbc1..229d7ea9a38 100644 --- a/cub/cub/util_type.cuh +++ b/cub/cub/util_type.cuh @@ -888,116 +888,20 @@ struct BaseTraits } }; -template -struct FpLimits; - -template <> -struct FpLimits -{ - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Max() - { - return ::cuda::std::numeric_limits::max(); - } - - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Lowest() - { - return ::cuda::std::numeric_limits::lowest(); - } -}; - -template <> -struct FpLimits -{ - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Max() - { - return ::cuda::std::numeric_limits::max(); - } - - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Lowest() - { - return ::cuda::std::numeric_limits::lowest(); - } -}; - -# if defined(_CCCL_HAS_NVFP16) -template <> -struct FpLimits<__half> -{ - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Max() - { - unsigned short max_word = 0x7BFF; - return reinterpret_cast<__half&>(max_word); - } - - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Lowest() - { - unsigned short lowest_word = 0xFBFF; - return reinterpret_cast<__half&>(lowest_word); - } -}; -# endif // _CCCL_HAS_NVFP16 - -# if defined(_CCCL_HAS_NVBF16) -template <> -struct FpLimits<__nv_bfloat16> -{ - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Max() - { - unsigned short max_word = 0x7F7F; - return reinterpret_cast<__nv_bfloat16&>(max_word); - } - - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Lowest() - { - unsigned short lowest_word = 0xFF7F; - return reinterpret_cast<__nv_bfloat16&>(lowest_word); - } -}; -# endif // _CCCL_HAS_NVBF16 - -# if defined(__CUDA_FP8_TYPES_EXIST__) -template <> -struct FpLimits<__nv_fp8_e4m3> -{ - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Max() - { - unsigned char max_word = 0x7EU; - __nv_fp8_e4m3 ret_val; - memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e4m3)); - return ret_val; - } - - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Lowest() - { - unsigned char lowest_word = 0xFEU; - __nv_fp8_e4m3 ret_val; - memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e4m3)); - return ret_val; - } -}; - -template <> -struct FpLimits<__nv_fp8_e5m2> +template +struct CCCL_DEPRECATED_BECAUSE("Use cuda::std::numeric_limits instead") FpLimits { - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Max() + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max() { - unsigned char max_word = 0x7BU; - __nv_fp8_e5m2 ret_val; - memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e5m2)); - return ret_val; + return ::cuda::std::numeric_limits::max(); } - static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Lowest() + static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest() { - unsigned char lowest_word = 0xFBU; - __nv_fp8_e5m2 ret_val; - memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e5m2)); - return ret_val; + return ::cuda::std::numeric_limits::lowest(); } }; -# endif // __CUDA_FP8_TYPES_EXIST__ - /** * Basic type traits (fp primitive specialization) */ @@ -1027,12 +931,16 @@ struct BaseTraits static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max() { + _CCCL_SUPPRESS_DEPRECATED_PUSH return FpLimits::Max(); + _CCCL_SUPPRESS_DEPRECATED_POP } static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest() { + _CCCL_SUPPRESS_DEPRECATED_PUSH return FpLimits::Lowest(); + _CCCL_SUPPRESS_DEPRECATED_POP } }; diff --git a/cub/test/catch2_test_util_type.cu b/cub/test/catch2_test_util_type.cu index 9e06b57edc9..d09392b1fb8 100644 --- a/cub/test/catch2_test_util_type.cu +++ b/cub/test/catch2_test_util_type.cu @@ -32,6 +32,7 @@ #include #include +#include C2H_TEST("Tests non_void_value_t", "[util][type]") { @@ -80,3 +81,44 @@ C2H_TEST("Test CUB_DEFINE_DETECT_NESTED_TYPE", "[util][type]") STATIC_REQUIRE(cat_detect::value); STATIC_REQUIRE(!cat_detect::value); } + +using types = c2h::type_list< + char, + signed char, + unsigned char, + short, + unsigned short, + int, + unsigned int, + long, + unsigned long, + long long, + unsigned long long, +#if TEST_HALF_T() + __half, + half_t, +#endif // TEST_HALF_T() +#if TEST_BF_T() + __nv_bfloat16, + bfloat16_t, +#endif // TEST_BF_T() + float, + double +#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE + , + long double +#endif // _LIBCUDACXX_HAS_NO_LONG_DOUBLE + >; + +C2H_TEST("Test FpLimits agrees with numeric_limits", "[util][type]", types) +{ + using T = c2h::get<0, TestType>; + CAPTURE(c2h::type_name()); + _CCCL_SUPPRESS_DEPRECATED_PUSH + CHECK(cub::FpLimits::Max() == cuda::std::numeric_limits::max()); + CHECK(cub::FpLimits::Lowest() == cuda::std::numeric_limits::lowest()); + + CHECK(cub::FpLimits::Max() == cuda::std::numeric_limits::max()); + CHECK(cub::FpLimits::Lowest() == cuda::std::numeric_limits::lowest()); + _CCCL_SUPPRESS_DEPRECATED_POP +}