From d83d166cbf598f74a0f911ec18050aab433bd9a4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 14 Nov 2024 18:37:33 +0100 Subject: [PATCH 1/6] GPU: Fix debug message --- .../Base/opencl-common/GPUReconstructionOCL.cxx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx index 0e1c94eced7e3..de32f03340c03 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cxx @@ -115,16 +115,17 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime() clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, nullptr); clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Available Platform %d: (%s %s) %s %s", i_platform, platform_profile, platform_version, platform_vendor, platform_name); - } + const char* platformUsageInfo = ""; if (!found && CheckPlatform(i_platform)) { found = true; mInternals->platform = mInternals->platforms[i_platform]; if (mProcessingSettings.debugLevel >= 2) { - GPUInfo(" Using this platform"); + platformUsageInfo = " !!! Using this platform !!!"; } } + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Available Platform %d: (%s %s) %s %s%s", i_platform, platform_profile, platform_version, platform_vendor, platform_name, platformUsageInfo); + } } } From 65dd0a4232afa234c4a07aff28a3bb257f56b61a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 15 Nov 2024 23:46:53 +0100 Subject: [PATCH 2/6] GPU: Clarify file name --- .../Base/GPUReconstructionIncludesDeviceAll.template.h | 4 ---- ...nels.template.h => GPUReconstructionKernelList.template.h} | 0 GPU/GPUTracking/CMakeLists.txt | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) rename GPU/GPUTracking/Base/{GPUReconstructionKernels.template.h => GPUReconstructionKernelList.template.h} (100%) diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h index 8a23a6792dcd1..4822332a1839c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h @@ -25,10 +25,6 @@ namespace gpu } // namespace GPUCA_NAMESPACE using namespace GPUCA_NAMESPACE::gpu; -#if !defined(GPUCA_OPENCL1) && (!defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE)) -#define GPUCA_KRNL_NOOCL1 -#endif - // clang-format off $>,APPEND,">,PREPEND,#include ">, > diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.template.h b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h similarity index 100% rename from GPU/GPUTracking/Base/GPUReconstructionKernels.template.h rename to GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index f97e966287d41..937346fe478c3 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -265,7 +265,7 @@ endif() file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) file(GENERATE OUTPUT include_gpu_onthefly/GPUReconstructionKernelList.h - INPUT Base/GPUReconstructionKernels.template.h + INPUT Base/GPUReconstructionKernelList.template.h ) file(GENERATE OUTPUT include_gpu_onthefly/GPUReconstructionKernelIncludes.h From d94bdd098f8974e83873a919b5eb77c759b80386 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 16 Nov 2024 00:13:52 +0100 Subject: [PATCH 3/6] GPU OpenCL: subgroup functions not defined for int8 --- GPU/Common/GPUCommonAlgorithm.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index f86bd42fe82f6..e5a963b4c2020 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -338,8 +338,29 @@ GPUdi() void GPUCommonAlgorithm::swap(T& a, T& b) // Nothing to do, work_group functions available #pragma OPENCL EXTENSION cl_khr_subgroups : enable -#define warp_scan_inclusive_add(v) sub_group_scan_inclusive_add(v) -#define warp_broadcast(v, i) sub_group_broadcast(v, i) +template +GPUdi() T work_group_scan_inclusive_add_FUNC(T v) +{ + return sub_group_scan_inclusive_add(v); +} +template <> // FIXME: It seems OpenCL does not support 8 and 16 bit subgroup operations +GPUdi() uint8_t work_group_scan_inclusive_add_FUNC(uint8_t v) +{ + return sub_group_scan_inclusive_add((uint32_t)v); +} +template +GPUdi() T work_group_broadcast_FUNC(T v, int32_t i) +{ + return sub_group_broadcast(v, i); +} +template <> +GPUdi() uint8_t work_group_broadcast_FUNC(uint8_t v, int32_t i) +{ + return sub_group_broadcast((uint32_t)v, i); +} + +#define warp_scan_inclusive_add(v) work_group_scan_inclusive_add_FUNC(v) +#define warp_broadcast(v, i) work_group_broadcast_FUNC(v, i) #elif (defined(__CUDACC__) || defined(__HIPCC__)) // CUDA and HIP work the same way using cub, need just different header From c8f1b1b1ee32203a12c3f6ad67565996c9b8cd05 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 16 Nov 2024 00:43:14 +0100 Subject: [PATCH 4/6] GPU OpenCL: Fix RTC source generation --- GPU/GPUTracking/Base/opencl2/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl2/CMakeLists.txt b/GPU/GPUTracking/Base/opencl2/CMakeLists.txt index 0a4168b130766..73062ad82f728 100644 --- a/GPU/GPUTracking/Base/opencl2/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl2/CMakeLists.txt @@ -53,7 +53,7 @@ if(OPENCL2_ENABLED_SPIRV) # BUILD OpenCL2 intermediate code for SPIR-V target MAIN_DEPENDENCY ${CL_SRC} IMPLICIT_DEPENDS CXX ${CL_SRC} COMMAND_EXPAND_LISTS - COMMENT "Compiling OpenCL2 CL source file ${CL_SRC} to SPIRV") + COMMENT "Compiling OpenCL2 CL source file ${CL_SRC} to SPIRV ${CL_BIN}.spirv") create_binary_resource(${CL_BIN}.spirv ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.spirv.o) set(SRCS ${SRCS} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.spirv.o) @@ -64,12 +64,14 @@ if(OPENCL2_ENABLED) # BUILD OpenCL2 source code for runtime compilation target add_custom_command( OUTPUT ${CL_BIN}.src COMMAND ${LLVM_CLANG} - ${OCL_DEFINECL} -cl-no-stdinc + ${OCL_FLAGS} + ${OCL_DEFINECL} + -cl-no-stdinc -E ${CL_SRC} > ${CL_BIN}.src MAIN_DEPENDENCY ${CL_SRC} IMPLICIT_DEPENDS CXX ${CL_SRC} COMMAND_EXPAND_LISTS - COMMENT "Preparing OpenCL2 CL source file for run time compilation") + COMMENT "Preparing OpenCL2 CL source file for run time compilation ${CL_BIN}.src") create_binary_resource(${CL_BIN}.src ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.src.o) set(SRCS ${SRCS} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.src.o) From 4836c7a614b89aa3eba435616134a91b59e69215 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 16 Nov 2024 01:11:49 +0100 Subject: [PATCH 5/6] GPU OpenCL: OpenCL >=2 should use generic address space for pointer kernel arguments, otherwise clang fails to derive the address space if used in a variadic template --- GPU/GPUTracking/Definitions/GPUDef.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index abc6bb04291d3..38784b1ded80e 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -25,7 +25,11 @@ // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) #ifdef __OPENCL__ #define GPUPtr1(a, b) uint64_t b - #define GPUPtr2(a, b) ((__global a) (a) b) + #ifdef __OPENCLCPP__ + #define GPUPtr2(a, b) ((__generic a) (a) b) + #else + #define GPUPtr2(a, b) ((__global a) (a) b) + #endif #else #define GPUPtr1(a, b) a b #define GPUPtr2(a, b) b From a6ef46360908b61e8c0547e3a64424563bec9789 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 16 Nov 2024 01:41:51 +0100 Subject: [PATCH 6/6] GPU OpenCL: Workaround for some clang name mangling issues --- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 4 ++-- GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 25be5b3647d57..8220b743dde0e 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -78,7 +78,7 @@ struct refitTrackTypes { } // anonymous namespace template <> -GPUd() void GPUTrackingRefit::initProp(GPUTPCGMPropagator& prop) +GPUd() void GPUTrackingRefit::initProp(GPUTPCGMPropagator& prop) // FIXME: GPUgeneric() needed to make the clang spirv output link correctly { prop.SetMaterialTPC(); prop.SetMaxSinPhi(GPUCA_MAX_SIN_PHI); @@ -91,7 +91,7 @@ GPUd() void GPUTrackingRefit::initProp(GPUTPCGMPropagator& p } template <> -GPUd() void GPUTrackingRefit::initProp(const Propagator*& prop) +GPUd() void GPUTrackingRefit::initProp(const Propagator*& prop) // FIXME: GPUgeneric() needed to make the clang spirv output link correctly { prop = mPpropagator; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index ba17b88436845..05e75232297a3 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -585,8 +585,8 @@ GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() MEM_GLO #endif // GPUCA_GPUCODE #if !defined(__OPENCL1__) -template <> -GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, MEM_LG(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) +template <> // FIXME: GPUgeneric() needed to make the clang spirv output link correctly +GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, MEM_LG(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { GPUTPCThreadMemory rMem; rMem.mISH = iTracklet;