diff --git a/libcudacxx/include/cuda/__ptx/ptx_helper_functions.h b/libcudacxx/include/cuda/__ptx/ptx_helper_functions.h index b536a87fb63..9d9c7b7c8d4 100644 --- a/libcudacxx/include/cuda/__ptx/ptx_helper_functions.h +++ b/libcudacxx/include/cuda/__ptx/ptx_helper_functions.h @@ -27,6 +27,8 @@ #include #include +#if _CCCL_HAS_CUDA_COMPILER + _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX /************************************************************* @@ -101,9 +103,9 @@ inline _CCCL_DEVICE _Tp* __from_ptr_gmem(_CUDA_VSTD::size_t __ptr) template inline _CCCL_DEVICE _CUDA_VSTD::uint32_t __as_b32(_Tp __val) { -#if _CCCL_STD_VER >= 2017 +# if _CCCL_STD_VER >= 2017 static_assert(sizeof(_Tp) == 4, ""); -#endif // _CCCL_STD_VER >= 2017 +# endif // _CCCL_STD_VER >= 2017 // Consider using std::bitcast return *reinterpret_cast<_CUDA_VSTD::uint32_t*>(&__val); } @@ -111,13 +113,15 @@ inline _CCCL_DEVICE _CUDA_VSTD::uint32_t __as_b32(_Tp __val) template inline _CCCL_DEVICE _CUDA_VSTD::uint64_t __as_b64(_Tp __val) { -#if _CCCL_STD_VER >= 2017 +# if _CCCL_STD_VER >= 2017 static_assert(sizeof(_Tp) == 8, ""); -#endif // _CCCL_STD_VER >= 2017 +# endif // _CCCL_STD_VER >= 2017 // Consider using std::bitcast return *reinterpret_cast<_CUDA_VSTD::uint64_t*>(&__val); } _LIBCUDACXX_END_NAMESPACE_CUDA_PTX +#endif // _CCCL_HAS_CUDA_COMPILER + #endif // _CUDA_PTX_HELPER_FUNCTIONS_H_ diff --git a/libcudacxx/include/cuda/discard_memory b/libcudacxx/include/cuda/discard_memory index 6da2ea209c4..5177b7ee407 100644 --- a/libcudacxx/include/cuda/discard_memory +++ b/libcudacxx/include/cuda/discard_memory @@ -21,11 +21,12 @@ # pragma system_header #endif // no system header +#include #include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, size_t __nbytes) noexcept +inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, _CUDA_VSTD::size_t __nbytes) noexcept { // The discard PTX instruction is only available with PTX ISA 7.4 and later #if __cccl_ptx_isa < 740ULL diff --git a/libcudacxx/include/cuda/pipeline b/libcudacxx/include/cuda/pipeline index d034c931644..c9ee75ae111 100644 --- a/libcudacxx/include/cuda/pipeline +++ b/libcudacxx/include/cuda/pipeline @@ -141,6 +141,8 @@ # pragma system_header #endif // no system header +#include +#include #include #include #include diff --git a/libcudacxx/include/cuda/std/__exception/cuda_error.h b/libcudacxx/include/cuda/std/__exception/cuda_error.h index 40af7d6c3e6..fdc32cf0571 100644 --- a/libcudacxx/include/cuda/std/__exception/cuda_error.h +++ b/libcudacxx/include/cuda/std/__exception/cuda_error.h @@ -22,10 +22,6 @@ # pragma system_header #endif // no system header -#if _CCCL_CUDA_COMPILER(CLANG) -# include -#endif // _CCCL_CUDA_COMPILER(CLANG) - #include #if !_CCCL_COMPILER(NVRTC) @@ -40,8 +36,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA /** * @brief Exception thrown when a CUDA error is encountered. */ -#if _CCCL_HAS_CUDA_COMPILER -# ifndef _CCCL_NO_EXCEPTIONS +#ifndef _CCCL_NO_EXCEPTIONS class cuda_error : public ::std::runtime_error { private: @@ -50,37 +45,36 @@ class cuda_error : public ::std::runtime_error char __buffer[256]; }; - static char* __format_cuda_error(::cudaError_t __status, const char* __msg, char* __msg_buffer) noexcept + static char* __format_cuda_error(const int __status, const char* __msg, char* __msg_buffer) noexcept { ::snprintf(__msg_buffer, 256, "cudaError %d: %s", __status, __msg); return __msg_buffer; } public: - cuda_error(::cudaError_t __status, const char* __msg, __msg_storage __msg_buffer = {0}) noexcept + cuda_error(const int __status, const char* __msg, __msg_storage __msg_buffer = {0}) noexcept : ::std::runtime_error(__format_cuda_error(__status, __msg, __msg_buffer.__buffer)) {} }; -_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(::cudaError_t __status, const char* __msg) +_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(const int __status, const char* __msg) { NV_IF_ELSE_TARGET(NV_IS_HOST, (throw ::cuda::cuda_error(__status, __msg);), ((void) __status; (void) __msg; _CUDA_VSTD_NOVERSION::terminate();)) } -# else // ^^^ !_CCCL_NO_EXCEPTIONS ^^^ / vvv _CCCL_NO_EXCEPTIONS vvv +#else // ^^^ !_CCCL_NO_EXCEPTIONS ^^^ / vvv _CCCL_NO_EXCEPTIONS vvv class cuda_error { public: - _LIBCUDACXX_HIDE_FROM_ABI cuda_error(::cudaError_t, const char*) noexcept {} + _LIBCUDACXX_HIDE_FROM_ABI cuda_error(const int, const char*) noexcept {} }; -_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(::cudaError_t, const char*) +_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(const int, const char*) { _CUDA_VSTD_NOVERSION::terminate(); } -# endif // _CCCL_NO_EXCEPTIONS -#endif // _CCCL_CUDA_COMPILER +#endif // _CCCL_NO_EXCEPTIONS _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/detail/__access_property b/libcudacxx/include/cuda/std/detail/__access_property index 9ab8eac51d8..8d85b166938 100644 --- a/libcudacxx/include/cuda/std/detail/__access_property +++ b/libcudacxx/include/cuda/std/detail/__access_property @@ -129,6 +129,8 @@ * (v. August 20, 2021) */ +#include + _LIBCUDACXX_BEGIN_NAMESPACE_CUDA namespace __detail_ap @@ -136,12 +138,12 @@ namespace __detail_ap _CCCL_HOST_DEVICE constexpr uint32_t __ap_floor_log2(uint32_t __x) { - return (__x == 1 | __x == 0) ? 0 : 1 + __ap_floor_log2(__x >> 1); + return ((__x == 1) || (__x == 0)) ? 0 : 1 + __ap_floor_log2(__x >> 1); } _CCCL_HOST_DEVICE constexpr uint32_t __ap_ceil_log2(uint32_t __x) { - return (__x == 1 | __x == 0) ? 0 : __ap_floor_log2(__x - 1) + 1; + return ((__x == 1) || (__x == 0)) ? 0 : __ap_floor_log2(__x - 1) + 1; } _CCCL_HOST_DEVICE constexpr uint32_t __ap_min(uint32_t __a, uint32_t __b) noexcept @@ -429,7 +431,7 @@ _CCCL_HOST_DEVICE constexpr std::uint64_t __block( cudaAccessProperty __hit_prop, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) { - return (__total_bytes <= (size_t{0xFFFFFFFF}) & __total_bytes != 0 & __hit_bytes <= __total_bytes) + return (__total_bytes <= (size_t{0xFFFFFFFF}) && __total_bytes != 0 && __hit_bytes <= __total_bytes) ? __sm_80::__block_descriptor_builder( reinterpret_cast(__ptr), __hit_bytes, diff --git a/libcudacxx/include/cuda/std/detail/__annotated_ptr b/libcudacxx/include/cuda/std/detail/__annotated_ptr index 1991fdab2e2..7a477245cc4 100644 --- a/libcudacxx/include/cuda/std/detail/__annotated_ptr +++ b/libcudacxx/include/cuda/std/detail/__annotated_ptr @@ -137,15 +137,16 @@ namespace __detail_ap template _CCCL_DEVICE void* __associate_address_space(void* __ptr, _Property __prop) { +#if _CCCL_HAS_CUDA_COMPILER if (std::is_same<_Property, access_property::shared>::value == true) { bool __b = __isShared(__ptr); _CCCL_ASSERT(__b, ""); -#if defined(_CCCL_BUILTIN_ASSUME) +# if defined(_CCCL_BUILTIN_ASSUME) _CCCL_BUILTIN_ASSUME(__b); -#else // ^^^ _CCCL_BUILTIN_ASSUME ^^^ / vvv !_CCCL_BUILTIN_ASSUME vvv +# else // ^^^ _CCCL_BUILTIN_ASSUME ^^^ / vvv !_CCCL_BUILTIN_ASSUME vvv (void) __b; -#endif // !_CCCL_BUILTIN_ASSUME +# endif // !_CCCL_BUILTIN_ASSUME } else if (std::is_same<_Property, access_property::global>::value == true || std::is_same<_Property, access_property::normal>::value == true @@ -155,12 +156,13 @@ _CCCL_DEVICE void* __associate_address_space(void* __ptr, _Property __prop) { bool __b = __isGlobal(__ptr); _CCCL_ASSERT(__b, ""); -#if defined(_CCCL_BUILTIN_ASSUME) +# if defined(_CCCL_BUILTIN_ASSUME) _CCCL_BUILTIN_ASSUME(__b); -#else // ^^^ !_CCCL_BUILTIN_ASSUME ^^^ / vvv _CCCL_BUILTIN_ASSUME vvv +# else // ^^^ !_CCCL_BUILTIN_ASSUME ^^^ / vvv _CCCL_BUILTIN_ASSUME vvv (void) __b; -#endif // !_CCCL_BUILTIN_ASSUME +# endif // !_CCCL_BUILTIN_ASSUME } +#endif // _CCCL_HAS_CUDA_COMPILER return __ptr; } @@ -174,6 +176,7 @@ _CCCL_DEVICE void* __associate_descriptor(void* __ptr, __Prop __prop) template <> inline _CCCL_DEVICE void* __associate_descriptor(void* __ptr, std::uint64_t __prop) { + (void) __prop; NV_IF_ELSE_TARGET(NV_PROVIDES_SM_80, (return __nv_associate_access_property(__ptr, __prop);), (return __ptr;)) } diff --git a/libcudacxx/include/cuda/stream_ref b/libcudacxx/include/cuda/stream_ref index a8b044909eb..857a35f6da4 100644 --- a/libcudacxx/include/cuda/stream_ref +++ b/libcudacxx/include/cuda/stream_ref @@ -38,9 +38,6 @@ private: } // cuda */ -#include -// cuda_runtime_api needs to come first - #include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) @@ -51,6 +48,8 @@ private: # pragma system_header #endif // no system header +#include + #include #include #include diff --git a/libcudacxx/test/public_headers_host_only/CMakeLists.txt b/libcudacxx/test/public_headers_host_only/CMakeLists.txt index 1bc51d17e10..3cc44d510c0 100644 --- a/libcudacxx/test/public_headers_host_only/CMakeLists.txt +++ b/libcudacxx/test/public_headers_host_only/CMakeLists.txt @@ -2,12 +2,18 @@ # without anything else but also pretents to be a std header add_custom_target(libcudacxx.test.public_headers_host_only) +if ("NVHPC" STREQUAL "${CMAKE_CXX_COMPILER_ID}") + find_package(NVHPC) +else() + find_package(CUDAToolkit) +endif() + # Grep all public headers file(GLOB public_headers_host_only LIST_DIRECTORIES false RELATIVE "${libcudacxx_SOURCE_DIR}/include/" CONFIGURE_DEPENDS - "${libcudacxx_SOURCE_DIR}/include/cuda/std/*" + "${libcudacxx_SOURCE_DIR}/include/cuda/*" ) # mdspan is currently not supported on msvc outside of C++20 @@ -36,6 +42,13 @@ function(libcudacxx_add_std_header_test header) target_compile_definitions(headertest_std_${header_name} PRIVATE CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING) endif() + # We want to ensure that we can build headers within with a host compiler but we need cuda_runtime_api.h + if ("NVHPC" STREQUAL "${CMAKE_CXX_COMPILER_ID}") + target_link_libraries(headertest_std_${header_name} NVHPC::CUDART) + else() + target_link_libraries(headertest_std_${header_name} CUDA::cudart) + endif() + add_dependencies(libcudacxx.test.public_headers_host_only headertest_std_${header_name}) endfunction()