Deprecate cub::FpLimits in favor of cuda::std::numeric_limits

NVIDIA · Jan 31, 2025 · cf55ae5 · cf55ae5
1 parent 3e47ee8
commit cf55ae5
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 104 deletions.
diff --git a/c2h/include/c2h/bfloat16.cuh b/c2h/include/c2h/bfloat16.cuh
@@ -36,6 +36,7 @@
 
 #include <cub/util_type.cuh>
 
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
 #include <cstdint>
@@ -232,6 +233,29 @@ inline std::ostream& operator<<(std::ostream& out, const __nv_bfloat16& x)
  * Traits overloads
  ******************************************************************************/
 
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+template <>
+class __numeric_limits_impl<bfloat16_t, __numeric_limits_type::__floating_point>
+{
+public:
+  static __host__ __device__ __forceinline__ bfloat16_t max()
+  {
+    return bfloat16_t(numeric_limits<__nv_bfloat16>::max());
+  }
+
+  static __host__ __device__ __forceinline__ bfloat16_t min()
+  {
+    return bfloat16_t(numeric_limits<__nv_bfloat16>::min());
+  }
+
+  static __host__ __device__ __forceinline__ bfloat16_t lowest()
+  {
+    return bfloat16_t(numeric_limits<__nv_bfloat16>::lowest());
+  }
+};
+_LIBCUDACXX_END_NAMESPACE_STD
+
+_CCCL_SUPPRESS_DEPRECATED_PUSH
 template <>
 struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
 {
@@ -245,6 +269,7 @@ struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
     return bfloat16_t::lowest();
   }
 };
+_CCCL_SUPPRESS_DEPRECATED_POP
 
 template <>
 struct CUB_NS_QUALIFIER::NumericTraits<bfloat16_t>

diff --git a/c2h/include/c2h/half.cuh b/c2h/include/c2h/half.cuh
@@ -37,6 +37,7 @@
 
 #include <cub/util_type.cuh>
 
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
 #include <cstdint>
@@ -327,6 +328,29 @@ inline std::ostream& operator<<(std::ostream& out, const __half& x)
  * Traits overloads
  ******************************************************************************/
 
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+template <>
+class __numeric_limits_impl<half_t, __numeric_limits_type::__floating_point>
+{
+public:
+  static __host__ __device__ __forceinline__ half_t max()
+  {
+    return half_t(numeric_limits<__half>::max());
+  }
+
+  static __host__ __device__ __forceinline__ half_t min()
+  {
+    return half_t(numeric_limits<__half>::min());
+  }
+
+  static __host__ __device__ __forceinline__ half_t lowest()
+  {
+    return half_t(numeric_limits<__half>::lowest());
+  }
+};
+_LIBCUDACXX_END_NAMESPACE_STD
+
+_CCCL_SUPPRESS_DEPRECATED_PUSH
 template <>
 struct CUB_NS_QUALIFIER::FpLimits<half_t>
 {
@@ -340,6 +364,7 @@ struct CUB_NS_QUALIFIER::FpLimits<half_t>
     return half_t::lowest();
   }
 };
+_CCCL_SUPPRESS_DEPRECATED_POP
 
 template <>
 struct CUB_NS_QUALIFIER::NumericTraits<half_t>

diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh
@@ -888,116 +888,20 @@ struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
   }
 };
 
-template <typename _T>
-struct FpLimits;
-
-template <>
-struct FpLimits<float>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Max()
-  {
-    return ::cuda::std::numeric_limits<float>::max();
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Lowest()
-  {
-    return ::cuda::std::numeric_limits<float>::lowest();
-  }
-};
-
-template <>
-struct FpLimits<double>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Max()
-  {
-    return ::cuda::std::numeric_limits<double>::max();
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Lowest()
-  {
-    return ::cuda::std::numeric_limits<double>::lowest();
-  }
-};
-
-#  if defined(_CCCL_HAS_NVFP16)
-template <>
-struct FpLimits<__half>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Max()
-  {
-    unsigned short max_word = 0x7BFF;
-    return reinterpret_cast<__half&>(max_word);
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Lowest()
-  {
-    unsigned short lowest_word = 0xFBFF;
-    return reinterpret_cast<__half&>(lowest_word);
-  }
-};
-#  endif // _CCCL_HAS_NVFP16
-
-#  if defined(_CCCL_HAS_NVBF16)
-template <>
-struct FpLimits<__nv_bfloat16>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Max()
-  {
-    unsigned short max_word = 0x7F7F;
-    return reinterpret_cast<__nv_bfloat16&>(max_word);
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Lowest()
-  {
-    unsigned short lowest_word = 0xFF7F;
-    return reinterpret_cast<__nv_bfloat16&>(lowest_word);
-  }
-};
-#  endif // _CCCL_HAS_NVBF16
-
-#  if defined(__CUDA_FP8_TYPES_EXIST__)
-template <>
-struct FpLimits<__nv_fp8_e4m3>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Max()
-  {
-    unsigned char max_word = 0x7EU;
-    __nv_fp8_e4m3 ret_val;
-    memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e4m3));
-    return ret_val;
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Lowest()
-  {
-    unsigned char lowest_word = 0xFEU;
-    __nv_fp8_e4m3 ret_val;
-    memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e4m3));
-    return ret_val;
-  }
-};
-
-template <>
-struct FpLimits<__nv_fp8_e5m2>
+template <typename T>
+struct CCCL_DEPRECATED_BECAUSE("Use cuda::std::numeric_limits instead") FpLimits
 {
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Max()
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
   {
-    unsigned char max_word = 0x7BU;
-    __nv_fp8_e5m2 ret_val;
-    memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e5m2));
-    return ret_val;
+    return ::cuda::std::numeric_limits<T>::max();
   }
 
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Lowest()
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
   {
-    unsigned char lowest_word = 0xFBU;
-    __nv_fp8_e5m2 ret_val;
-    memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e5m2));
-    return ret_val;
+    return ::cuda::std::numeric_limits<T>::lowest();
   }
 };
 
-#  endif // __CUDA_FP8_TYPES_EXIST__
-
 /**
  * Basic type traits (fp primitive specialization)
  */
@@ -1027,12 +931,12 @@ struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>
 
   static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
   {
-    return FpLimits<T>::Max();
+    return ::cuda::std::numeric_limits<T>::max();
   }
 
   static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
   {
-    return FpLimits<T>::Lowest();
+    return ::cuda::std::numeric_limits<T>::lowest();
   }
 };