Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use extension version of clGetKernelSubGroupInfo when necessary. #2258

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR";
CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR";
CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR";
CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR";
CONSTFIX char CreateProgramWithILName[] = "clCreateProgramWithILKHR";
CONSTFIX char GetKernelSubGroupInfoName[] = "clGetKernelSubGroupInfoKHR";

#undef CONSTFIX

Expand Down Expand Up @@ -316,6 +318,13 @@ cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, cl_uint num_configs,
const cl_command_buffer_update_type_khr *config_types,
const void **configs);

using clCreateProgramWithILKHR_fn = CL_API_ENTRY
cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);

using clGetKernelSubGroupInfoKHR_fn = CL_API_ENTRY
cl_int(CL_API_CALL *)(cl_kernel, cl_device_id, cl_kernel_sub_group_info, size_t,
const void *, size_t, void *, size_t *);

template <typename T> struct FuncPtrCache {
std::map<cl_context, T> Map;
std::mutex Mutex;
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/opencl/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ ur_result_t cl_adapter::checkDeviceExtensions(
// doesn't report them.
if (isIntelFPGAEmuDevice(Dev) &&
(Ext == "cl_intel_device_attribute_query" ||
Ext == "cl_intel_required_subgroup_size")) {
Ext == "cl_intel_required_subgroup_size" ||
Ext == "cl_khr_subgroups")) {
Supported = true;
continue;
}
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/opencl/extension_functions.def
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ CL_EXTENSION_FUNC(clCommandFillBufferKHR)
CL_EXTENSION_FUNC(clEnqueueCommandBufferKHR)
CL_EXTENSION_FUNC(clGetCommandBufferInfoKHR)
CL_EXTENSION_FUNC(clUpdateMutableCommandsKHR)
CL_EXTENSION_FUNC(clCreateProgramWithILKHR)
CL_EXTENSION_FUNC(clGetKernelSubGroupInfoKHR)
39 changes: 34 additions & 5 deletions source/adapters/opencl/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//
//===----------------------------------------------------------------------===//
#include "common.hpp"
#include "device.hpp"

#include <algorithm>
#include <cstddef>
Expand Down Expand Up @@ -189,11 +190,39 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
InputValueSize = MaxDims * sizeof(size_t);
}

cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast<cl_kernel>(hKernel),
cl_adapter::cast<cl_device_id>(hDevice),
mapURKernelSubGroupInfoToCL(propName),
InputValueSize, InputValue.get(),
sizeof(size_t), &RetVal, pPropSizeRet);
// We need to allow for the possibility that this device runs an older CL and
// supports the original khr subgroup extension.
cl_ext::clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo = nullptr;

oclv::OpenCLVersion DevVer;
CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(
cl_adapter::cast<cl_device_id>(hDevice), DevVer));

if (DevVer < oclv::V2_1) {
bool SubgroupExtSupported = false;

UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions(
cl_adapter::cast<cl_device_id>(hDevice), {"cl_khr_subgroups"},
SubgroupExtSupported));
if (!SubgroupExtSupported) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
cl_context Context = nullptr;
CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast<cl_kernel>(hKernel),
CL_KERNEL_CONTEXT, sizeof(Context),
&Context, nullptr));
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext(
Context, cl_ext::ExtFuncPtrCache->clGetKernelSubGroupInfoKHRCache,
cl_ext::GetKernelSubGroupInfoName, &GetKernelSubGroupInfo));
} else {
GetKernelSubGroupInfo = clGetKernelSubGroupInfo;
}

cl_int Ret = GetKernelSubGroupInfo(cl_adapter::cast<cl_kernel>(hKernel),
cl_adapter::cast<cl_device_id>(hDevice),
mapURKernelSubGroupInfoToCL(propName),
InputValueSize, InputValue.get(),
sizeof(size_t), &RetVal, pPropSizeRet);

if (Ret == CL_INVALID_OPERATION) {
// clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does
Expand Down
15 changes: 7 additions & 8 deletions source/adapters/opencl/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
}
}

using ApiFuncT =
cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);
ApiFuncT FuncPtr =
reinterpret_cast<ApiFuncT>(clGetExtensionFunctionAddressForPlatform(
CurPlatform, "clCreateProgramWithILKHR"));
cl_ext::clCreateProgramWithILKHR_fn CreateProgramWithIL = nullptr;

assert(FuncPtr != nullptr);
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext(
cl_adapter::cast<cl_context>(hContext),
cl_ext::ExtFuncPtrCache->clCreateProgramWithILKHRCache,
cl_ext::CreateProgramWithILName, &CreateProgramWithIL));

*phProgram = cl_adapter::cast<ur_program_handle_t>(
FuncPtr(cl_adapter::cast<cl_context>(hContext), pIL, length, &Err));
*phProgram = cl_adapter::cast<ur_program_handle_t>(CreateProgramWithIL(
cl_adapter::cast<cl_context>(hContext), pIL, length, &Err));
}

// INVALID_VALUE is only returned in three circumstances according to the cl
Expand Down
Loading