Skip to content

Commit

Permalink
Deprecate cub::FpLimits in favor of cuda::std::numeric_limits
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Jan 31, 2025
1 parent 3e47ee8 commit cf55ae5
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 104 deletions.
25 changes: 25 additions & 0 deletions c2h/include/c2h/bfloat16.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

#include <cub/util_type.cuh>

#include <cuda/std/limits>
#include <cuda/std/type_traits>

#include <cstdint>
Expand Down Expand Up @@ -232,6 +233,29 @@ inline std::ostream& operator<<(std::ostream& out, const __nv_bfloat16& x)
* Traits overloads
******************************************************************************/

_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <>
class __numeric_limits_impl<bfloat16_t, __numeric_limits_type::__floating_point>
{
public:
static __host__ __device__ __forceinline__ bfloat16_t max()
{
return bfloat16_t(numeric_limits<__nv_bfloat16>::max());
}

static __host__ __device__ __forceinline__ bfloat16_t min()
{
return bfloat16_t(numeric_limits<__nv_bfloat16>::min());
}

static __host__ __device__ __forceinline__ bfloat16_t lowest()
{
return bfloat16_t(numeric_limits<__nv_bfloat16>::lowest());
}
};
_LIBCUDACXX_END_NAMESPACE_STD

_CCCL_SUPPRESS_DEPRECATED_PUSH
template <>
struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
{
Expand All @@ -245,6 +269,7 @@ struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
return bfloat16_t::lowest();
}
};
_CCCL_SUPPRESS_DEPRECATED_POP

template <>
struct CUB_NS_QUALIFIER::NumericTraits<bfloat16_t>
Expand Down
25 changes: 25 additions & 0 deletions c2h/include/c2h/half.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <cub/util_type.cuh>

#include <cuda/std/limits>
#include <cuda/std/type_traits>

#include <cstdint>
Expand Down Expand Up @@ -327,6 +328,29 @@ inline std::ostream& operator<<(std::ostream& out, const __half& x)
* Traits overloads
******************************************************************************/

_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <>
class __numeric_limits_impl<half_t, __numeric_limits_type::__floating_point>
{
public:
static __host__ __device__ __forceinline__ half_t max()
{
return half_t(numeric_limits<__half>::max());
}

static __host__ __device__ __forceinline__ half_t min()
{
return half_t(numeric_limits<__half>::min());
}

static __host__ __device__ __forceinline__ half_t lowest()
{
return half_t(numeric_limits<__half>::lowest());
}
};
_LIBCUDACXX_END_NAMESPACE_STD

_CCCL_SUPPRESS_DEPRECATED_PUSH
template <>
struct CUB_NS_QUALIFIER::FpLimits<half_t>
{
Expand All @@ -340,6 +364,7 @@ struct CUB_NS_QUALIFIER::FpLimits<half_t>
return half_t::lowest();
}
};
_CCCL_SUPPRESS_DEPRECATED_POP

template <>
struct CUB_NS_QUALIFIER::NumericTraits<half_t>
Expand Down
112 changes: 8 additions & 104 deletions cub/cub/util_type.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -888,116 +888,20 @@ struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
}
};

template <typename _T>
struct FpLimits;

template <>
struct FpLimits<float>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Max()
{
return ::cuda::std::numeric_limits<float>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Lowest()
{
return ::cuda::std::numeric_limits<float>::lowest();
}
};

template <>
struct FpLimits<double>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Max()
{
return ::cuda::std::numeric_limits<double>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Lowest()
{
return ::cuda::std::numeric_limits<double>::lowest();
}
};

# if defined(_CCCL_HAS_NVFP16)
template <>
struct FpLimits<__half>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Max()
{
unsigned short max_word = 0x7BFF;
return reinterpret_cast<__half&>(max_word);
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Lowest()
{
unsigned short lowest_word = 0xFBFF;
return reinterpret_cast<__half&>(lowest_word);
}
};
# endif // _CCCL_HAS_NVFP16

# if defined(_CCCL_HAS_NVBF16)
template <>
struct FpLimits<__nv_bfloat16>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Max()
{
unsigned short max_word = 0x7F7F;
return reinterpret_cast<__nv_bfloat16&>(max_word);
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Lowest()
{
unsigned short lowest_word = 0xFF7F;
return reinterpret_cast<__nv_bfloat16&>(lowest_word);
}
};
# endif // _CCCL_HAS_NVBF16

# if defined(__CUDA_FP8_TYPES_EXIST__)
template <>
struct FpLimits<__nv_fp8_e4m3>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Max()
{
unsigned char max_word = 0x7EU;
__nv_fp8_e4m3 ret_val;
memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e4m3));
return ret_val;
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Lowest()
{
unsigned char lowest_word = 0xFEU;
__nv_fp8_e4m3 ret_val;
memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e4m3));
return ret_val;
}
};

template <>
struct FpLimits<__nv_fp8_e5m2>
template <typename T>
struct CCCL_DEPRECATED_BECAUSE("Use cuda::std::numeric_limits instead") FpLimits
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Max()
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
{
unsigned char max_word = 0x7BU;
__nv_fp8_e5m2 ret_val;
memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e5m2));
return ret_val;
return ::cuda::std::numeric_limits<T>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Lowest()
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
{
unsigned char lowest_word = 0xFBU;
__nv_fp8_e5m2 ret_val;
memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e5m2));
return ret_val;
return ::cuda::std::numeric_limits<T>::lowest();
}
};

# endif // __CUDA_FP8_TYPES_EXIST__

/**
* Basic type traits (fp primitive specialization)
*/
Expand Down Expand Up @@ -1027,12 +931,12 @@ struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
{
return FpLimits<T>::Max();
return ::cuda::std::numeric_limits<T>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
{
return FpLimits<T>::Lowest();
return ::cuda::std::numeric_limits<T>::lowest();
}
};

Expand Down

0 comments on commit cf55ae5

Please sign in to comment.