diff --git a/c2h/generators.cu b/c2h/generators.cu index 8044eabe6fe..e378bbc3119 100644 --- a/c2h/generators.cu +++ b/c2h/generators.cu @@ -478,15 +478,15 @@ template void init_key_segments(const c2h::device_vector& segment_offsets, float* out, std::size_t element_size); template void init_key_segments( const c2h::device_vector& segment_offsets, custom_type_state_t* out, std::size_t element_size); -#ifdef _CCCL_HAS_NVFP16 +#if TEST_HALF_T() template void init_key_segments(const c2h::device_vector& segment_offsets, half_t* out, std::size_t element_size); -#endif // _CCCL_HAS_NVFP16 +#endif // TEST_HALF_T() -#ifdef _CCCL_HAS_NVBF16 +#if TEST_BF_T() template void init_key_segments(const c2h::device_vector& segment_offsets, bfloat16_t* out, std::size_t element_size); -#endif // _CCCL_HAS_NVBF16 +#endif // TEST_BF_T() } // namespace detail template @@ -552,15 +552,15 @@ INSTANTIATE(double); INSTANTIATE(bool); INSTANTIATE(char); -#ifdef _CCCL_HAS_NVFP16 +#if TEST_HALF_T() INSTANTIATE(half_t); INSTANTIATE(__half); -#endif // _CCCL_HAS_NVFP16 +#endif // TEST_HALF_T() -#ifdef _CCCL_HAS_NVBF16 +#if TEST_BF_T() INSTANTIATE(bfloat16_t); INSTANTIATE(__nv_bfloat16); -#endif // _CCCL_HAS_NVBF16 +#endif // TEST_BF_T() #undef INSTANTIATE_RND #undef INSTANTIATE_MOD diff --git a/c2h/include/c2h/extended_types.h b/c2h/include/c2h/extended_types.h index d8e53acd71e..534de8806da 100644 --- a/c2h/include/c2h/extended_types.h +++ b/c2h/include/c2h/extended_types.h @@ -30,21 +30,29 @@ #include #ifndef TEST_HALF_T -# define TEST_HALF_T _CCCL_HAS_NVFP16 -#endif +# if defined(_CCCL_HAS_NVFP16) +# define TEST_HALF_T() 1 +# else // defined(_CCCL_HAS_NVFP16) +# define TEST_HALF_T() 0 +# endif // defined(_CCCL_HAS_NVFP16) +#endif // TEST_HALF_T #ifndef TEST_BF_T -# define TEST_BF_T _CCCL_HAS_NVBF16 -#endif - -#ifdef TEST_HALF_T +# if defined(_CCCL_HAS_NVBF16) +# define TEST_BF_T() 1 +# else // defined(_CCCL_HAS_NVBF16) +# define TEST_BF_T() 0 +# endif // defined(_CCCL_HAS_NVBF16) +#endif // TEST_BF_T + +#if TEST_HALF_T() # include # include -#endif +#endif // TEST_HALF_T() -#ifdef TEST_BF_T +#if TEST_BF_T() # include # include -#endif +#endif // TEST_BF_T() diff --git a/cub/test/catch2_segmented_sort_helper.cuh b/cub/test/catch2_segmented_sort_helper.cuh index f8a081a125a..b2a14038d49 100644 --- a/cub/test/catch2_segmented_sort_helper.cuh +++ b/cub/test/catch2_segmented_sort_helper.cuh @@ -265,21 +265,21 @@ struct unwrap_value_t_impl using type = T; }; -#if TEST_HALF_T +#if TEST_HALF_T() template <> struct unwrap_value_t_impl { using type = __half; }; -#endif +#endif // TEST_HALF_T() -#if TEST_BF_T +#if TEST_BF_T() template <> struct unwrap_value_t_impl { using type = __nv_bfloat16; }; -#endif +#endif // TEST_BF_T() template using unwrap_value_t = typename unwrap_value_t_impl::type; diff --git a/cub/test/catch2_test_device_histogram.cu b/cub/test/catch2_test_device_histogram.cu index e258fb4bcb6..c3a1716866a 100644 --- a/cub/test/catch2_test_device_histogram.cu +++ b/cub/test/catch2_test_device_histogram.cu @@ -69,7 +69,7 @@ auto cast_if_half_pointer(T* p) -> T* return p; } -#if TEST_HALF_T +#if TEST_HALF_T() auto cast_if_half_pointer(half_t* p) -> __half* { return reinterpret_cast<__half*>(p); @@ -79,7 +79,7 @@ auto cast_if_half_pointer(const half_t* p) -> const __half* { return reinterpret_cast(p); } -#endif +#endif // TEST_HALF_T() template using caller_vector = c2h:: @@ -412,9 +412,9 @@ using types = std::uint32_t, std::int64_t, std::uint64_t, -#if TEST_HALF_T +#if TEST_HALF_T() half_t, -#endif +#endif // TEST_HALF_T() float, double>; diff --git a/cub/test/catch2_test_device_radix_sort_keys.cu b/cub/test/catch2_test_device_radix_sort_keys.cu index d09003f7d74..f5323f413f1 100644 --- a/cub/test/catch2_test_device_radix_sort_keys.cu +++ b/cub/test/catch2_test_device_radix_sort_keys.cu @@ -48,6 +48,7 @@ #include "catch2_radix_sort_helper.cuh" #include "catch2_test_launch_helper.h" #include +#include // %PARAM% TEST_LAUNCH lid 0:1:2 @@ -70,12 +71,12 @@ using bit_window_key_types = c2h::type_list; // clang-format on using bit_window_key_types = c2h::type_list; diff --git a/cub/test/catch2_test_device_reduce.cu b/cub/test/catch2_test_device_reduce.cu index d6cfdb11521..2142259cdc6 100644 --- a/cub/test/catch2_test_device_reduce.cu +++ b/cub/test/catch2_test_device_reduce.cu @@ -77,14 +77,13 @@ using full_type_list = c2h::type_list, type_pair>; // clang-format off using full_type_list = c2h::type_list< type_pair -#if TEST_HALF_T -, type_pair // testing half -#endif -#if TEST_BF_T -, type_pair // testing bf16 - +#if TEST_HALF_T() +, type_pair +#endif // TEST_HALF_T() +#if TEST_BF_T() +, type_pair +#endif // TEST_BF_T() >; -#endif // clang-format on #elif TEST_TYPES == 4 // DPX SIMD instructions diff --git a/cub/test/catch2_test_device_reduce.cuh b/cub/test/catch2_test_device_reduce.cuh index ed9a806d19e..81199ed4551 100644 --- a/cub/test/catch2_test_device_reduce.cuh +++ b/cub/test/catch2_test_device_reduce.cuh @@ -46,7 +46,7 @@ #include #include -#if TEST_HALF_T +#if TEST_HALF_T() // Half support is provided by SM53+. We currently test against a few older architectures. // The specializations below can be removed once we drop these architectures. @@ -107,7 +107,12 @@ __host__ __device__ __forceinline__ // return a; } -#endif // TEST_HALF_T + +CUB_NAMESPACE_END + +#endif // TEST_HALF_T() + +CUB_NAMESPACE_BEGIN /** * @brief Introduces the required NumericTraits for `c2h::custom_type_t`. @@ -173,21 +178,21 @@ struct ExtendedFloatSum return result; } -#if TEST_HALF_T +#if TEST_HALF_T() __host__ __device__ __half operator()(__half a, __half b) const { uint16_t result = this->operator()(half_t{a}, half_t(b)).raw(); return reinterpret_cast<__half&>(result); } -#endif +#endif // TEST_HALF_T() -#if TEST_BF_T +#if TEST_BF_T() __device__ __nv_bfloat16 operator()(__nv_bfloat16 a, __nv_bfloat16 b) const { uint16_t result = this->operator()(bfloat16_t{a}, bfloat16_t(b)).raw(); return reinterpret_cast<__nv_bfloat16&>(result); } -#endif +#endif // TEST_BF_T() }; template @@ -196,7 +201,7 @@ inline It unwrap_it(It it) return it; } -#if TEST_HALF_T +#if TEST_HALF_T() inline __half* unwrap_it(half_t* it) { return reinterpret_cast<__half*>(it); @@ -209,9 +214,9 @@ inline thrust::constant_iterator<__half, OffsetT> unwrap_it(thrust::constant_ite __half val = wrapped_val.operator __half(); return thrust::constant_iterator<__half, OffsetT>(val); } -#endif +#endif // TEST_HALF_T() -#if TEST_BF_T +#if TEST_BF_T() inline __nv_bfloat16* unwrap_it(bfloat16_t* it) { return reinterpret_cast<__nv_bfloat16*>(it); @@ -224,7 +229,7 @@ thrust::constant_iterator<__nv_bfloat16, OffsetT> inline unwrap_it(thrust::const __nv_bfloat16 val = wrapped_val.operator __nv_bfloat16(); return thrust::constant_iterator<__nv_bfloat16, OffsetT>(val); } -#endif +#endif // TEST_BF_T() template using unwrap_value_t = typename std::remove_reference()))>::type; diff --git a/cub/test/catch2_test_device_reduce_by_key.cu b/cub/test/catch2_test_device_reduce_by_key.cu index ee8726219f2..0709caacf64 100644 --- a/cub/test/catch2_test_device_reduce_by_key.cu +++ b/cub/test/catch2_test_device_reduce_by_key.cu @@ -58,12 +58,12 @@ using full_type_list = c2h::type_list, typ // clang-format off using full_type_list = c2h::type_list< type_triple -#if TEST_HALF_T +#if TEST_HALF_T() , type_triple // testing half -#endif -#if TEST_BF_T +#endif // TEST_HALF_T() +#if TEST_BF_T() , type_triple // testing bf16 -#endif +#endif // TEST_BF_T() >; // clang-format on #endif diff --git a/cub/test/catch2_test_device_scan.cu b/cub/test/catch2_test_device_scan.cu index d9cf517f55d..3dbb505f690 100644 --- a/cub/test/catch2_test_device_scan.cu +++ b/cub/test/catch2_test_device_scan.cu @@ -64,12 +64,12 @@ using full_type_list = c2h::type_list, type_pair>; // clang-format off using full_type_list = c2h::type_list< type_pair -#if TEST_HALF_T +#if TEST_HALF_T() , type_pair // testing half -#endif -#if TEST_BF_T +#endif // TEST_HALF_T() +#if TEST_BF_T() , type_pair // testing bf16 -#endif +#endif // TEST_BF_T() >; // clang-format on #endif diff --git a/cub/test/catch2_test_device_scan_by_key.cu b/cub/test/catch2_test_device_scan_by_key.cu index 6942d22d09e..f4c090a7f58 100644 --- a/cub/test/catch2_test_device_scan_by_key.cu +++ b/cub/test/catch2_test_device_scan_by_key.cu @@ -69,12 +69,12 @@ using full_type_list = // clang-format off using full_type_list = c2h::type_list< type_quad -#if TEST_HALF_T +#if TEST_HALF_T() , type_quad // testing half -#endif -#if TEST_BF_T +#endif // TEST_HALF_T() +#if TEST_BF_T() , type_quad // testing bf16 -#endif +#endif // TEST_BF_T() >; // clang-format on #endif diff --git a/cub/test/catch2_test_device_segmented_radix_sort_keys.cu b/cub/test/catch2_test_device_segmented_radix_sort_keys.cu index fe02c670cbe..224caf9a43b 100644 --- a/cub/test/catch2_test_device_segmented_radix_sort_keys.cu +++ b/cub/test/catch2_test_device_segmented_radix_sort_keys.cu @@ -45,6 +45,7 @@ #include "catch2_radix_sort_helper.cuh" #include "catch2_test_launch_helper.h" #include +#include // TODO replace with DeviceSegmentedRadixSort::SortKeys interface once https://github.com/NVIDIA/cccl/issues/50 is // addressed Temporary wrapper that allows specializing the DeviceSegmentedRadixSort algorithm for different offset @@ -120,12 +121,12 @@ using bit_window_key_types = c2h::type_list; // clang-format on using bit_window_key_types = c2h::type_list; diff --git a/cub/test/catch2_test_device_segmented_reduce.cu b/cub/test/catch2_test_device_segmented_reduce.cu index c524a7ef753..3bbd10eb1fe 100644 --- a/cub/test/catch2_test_device_segmented_reduce.cu +++ b/cub/test/catch2_test_device_segmented_reduce.cu @@ -65,12 +65,12 @@ using full_type_list = c2h::type_list, type_pair>; // clang-format off using full_type_list = c2h::type_list< type_pair -#if TEST_HALF_T +#if TEST_HALF_T() , type_pair // testing half -#endif -#if TEST_BF_T +#endif // TEST_HALF_T() +#if TEST_BF_T() , type_pair // testing bf16 -#endif +#endif // TEST_BF_T() >; // clang-format on #endif diff --git a/cub/test/catch2_test_device_segmented_sort_keys.cu b/cub/test/catch2_test_device_segmented_sort_keys.cu index 3d392e8e8f6..eb6cbd5eeca 100644 --- a/cub/test/catch2_test_device_segmented_sort_keys.cu +++ b/cub/test/catch2_test_device_segmented_sort_keys.cu @@ -28,6 +28,7 @@ #include "insert_nested_NVTX_range_guard.h" // above header needs to be included first #include +#include #include "catch2_radix_sort_helper.cuh" #include "catch2_segmented_sort_helper.cuh" @@ -43,14 +44,14 @@ using key_types = c2h::type_list; C2H_TEST("DeviceSegmentedSortKeys: No segments", "[keys][segmented][sort][device]") diff --git a/cub/test/catch2_test_device_segmented_sort_pairs.cu b/cub/test/catch2_test_device_segmented_sort_pairs.cu index f24d30dbed1..347a9d7e380 100644 --- a/cub/test/catch2_test_device_segmented_sort_pairs.cu +++ b/cub/test/catch2_test_device_segmented_sort_pairs.cu @@ -40,14 +40,14 @@ using pair_types = c2h::type_list, c2h::type_list, c2h::type_list -#if TEST_HALF_T +#if TEST_HALF_T() , c2h::type_list -#endif -#if TEST_BF_T +#endif // TEST_HALF_T() +#if TEST_BF_T() , c2h::type_list -#endif +#endif // TEST_BF_T() >; C2H_TEST("DeviceSegmentedSortPairs: No segments", "[pairs][segmented][sort][device]") diff --git a/cub/test/test_util.h b/cub/test/test_util.h index 9a5fefcc69c..6794c0a57ef 100644 --- a/cub/test/test_util.h +++ b/cub/test/test_util.h @@ -434,7 +434,7 @@ inline bool IsNaN(double4 val) return (IsNaN(val.y) || IsNaN(val.x) || IsNaN(val.w) || IsNaN(val.z)); } -#ifdef TEST_HALF_T +#if TEST_HALF_T() template <> inline bool IsNaN(half_t val) { @@ -443,9 +443,9 @@ inline bool IsNaN(half_t val) // commented bit is always true, leaving for documentation: return (((bits >= 0x7C01) && (bits <= 0x7FFF)) || ((bits >= 0xFC01) /*&& (bits <= 0xFFFFFFFF)*/)); } -#endif +#endif // TEST_HALF_T() -#ifdef TEST_BF_T +#if TEST_BF_T() template <> inline bool IsNaN(bfloat16_t val) { @@ -454,7 +454,7 @@ inline bool IsNaN(bfloat16_t val) // commented bit is always true, leaving for documentation: return (((bits >= 0x7F81) && (bits <= 0x7FFF)) || ((bits >= 0xFF81) /*&& (bits <= 0xFFFFFFFF)*/)); } -#endif +#endif // TEST_BF_T() /** * Generates random keys. diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce.cu b/cub/test/thread_reduce/catch2_test_thread_reduce.cu index ba7342db9a5..7cd9928f37b 100644 --- a/cub/test/thread_reduce/catch2_test_thread_reduce.cu +++ b/cub/test/thread_reduce/catch2_test_thread_reduce.cu @@ -247,12 +247,12 @@ struct cub_operator_to_identity> **********************************************************************************************************************/ using narrow_precision_type_list = c2h::type_list< -#ifdef TEST_HALF_T +#if TEST_HALF_T() __half, -#endif -#ifdef TEST_BF_T +#endif // TEST_HALF_T() +#if TEST_BF_T() __nv_bfloat16 -#endif +#endif // TEST_BF_T() >; using fp_type_list = @@ -432,7 +432,7 @@ C2H_TEST("ThreadReduce Floating-Point Type Tests", "[reduce][thread]", fp_type_l } } -#if defined(TEST_HALF_T) || defined(TEST_BF_T) +#if TEST_HALF_T() || TEST_BF_T() C2H_TEST("ThreadReduce Narrow PrecisionType Tests", "[reduce][thread][narrow]", @@ -457,7 +457,7 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests", } } -#endif // defined(TEST_HALF_T) || defined(TEST_BF_T) +#endif // TEST_HALF_T() || TEST_BF_T() #if defined(CCCL_CHECK_SASS)