diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index e21f78af6b..95fc57319d 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -218,6 +218,8 @@ CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; +CONSTFIX char CreateProgramWithILName[] = "clCreateProgramWithILKHR"; +CONSTFIX char GetKernelSubGroupInfoName[] = "clGetKernelSubGroupInfoKHR"; #undef CONSTFIX @@ -316,6 +318,13 @@ cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, cl_uint num_configs, const cl_command_buffer_update_type_khr *config_types, const void **configs); +using clCreateProgramWithILKHR_fn = CL_API_ENTRY +cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); + +using clGetKernelSubGroupInfoKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_kernel, cl_device_id, cl_kernel_sub_group_info, size_t, + const void *, size_t, void *, size_t *); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index b9cd7e38fc..8ef5ba4967 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -61,7 +61,8 @@ ur_result_t cl_adapter::checkDeviceExtensions( // doesn't report them. if (isIntelFPGAEmuDevice(Dev) && (Ext == "cl_intel_device_attribute_query" || - Ext == "cl_intel_required_subgroup_size")) { + Ext == "cl_intel_required_subgroup_size" || + Ext == "cl_khr_subgroups")) { Supported = true; continue; } diff --git a/source/adapters/opencl/extension_functions.def b/source/adapters/opencl/extension_functions.def index 98359465ed..3f5e3ea917 100644 --- a/source/adapters/opencl/extension_functions.def +++ b/source/adapters/opencl/extension_functions.def @@ -24,3 +24,5 @@ CL_EXTENSION_FUNC(clCommandFillBufferKHR) CL_EXTENSION_FUNC(clEnqueueCommandBufferKHR) CL_EXTENSION_FUNC(clGetCommandBufferInfoKHR) CL_EXTENSION_FUNC(clUpdateMutableCommandsKHR) +CL_EXTENSION_FUNC(clCreateProgramWithILKHR) +CL_EXTENSION_FUNC(clGetKernelSubGroupInfoKHR) diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 617b6a9b2c..f60c8a2715 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// #include "common.hpp" +#include "device.hpp" #include #include @@ -189,11 +190,39 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - mapURKernelSubGroupInfoToCL(propName), - InputValueSize, InputValue.get(), - sizeof(size_t), &RetVal, pPropSizeRet); + // We need to allow for the possibility that this device runs an older CL and + // supports the original khr subgroup extension. + cl_ext::clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo = nullptr; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + if (DevVer < oclv::V2_1) { + bool SubgroupExtSupported = false; + + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), {"cl_khr_subgroups"}, + SubgroupExtSupported)); + if (!SubgroupExtSupported) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + cl_context Context = nullptr; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(Context), + &Context, nullptr)); + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + Context, cl_ext::ExtFuncPtrCache->clGetKernelSubGroupInfoKHRCache, + cl_ext::GetKernelSubGroupInfoName, &GetKernelSubGroupInfo)); + } else { + GetKernelSubGroupInfo = clGetKernelSubGroupInfo; + } + + cl_int Ret = GetKernelSubGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 20aaa8fd3a..1682b8c0d0 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -99,16 +99,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - using ApiFuncT = - cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); - ApiFuncT FuncPtr = - reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - CurPlatform, "clCreateProgramWithILKHR")); + cl_ext::clCreateProgramWithILKHR_fn CreateProgramWithIL = nullptr; - assert(FuncPtr != nullptr); + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + cl_adapter::cast(hContext), + cl_ext::ExtFuncPtrCache->clCreateProgramWithILKHRCache, + cl_ext::CreateProgramWithILName, &CreateProgramWithIL)); - *phProgram = cl_adapter::cast( - FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + *phProgram = cl_adapter::cast(CreateProgramWithIL( + cl_adapter::cast(hContext), pIL, length, &Err)); } // INVALID_VALUE is only returned in three circumstances according to the cl