Skip to content

Commit

Permalink
[EXP][Command-buffer] OpenCL kernel command update
Browse files Browse the repository at this point in the history
Implement the API for updating the kernel commands in a command-buffer
defined by oneapi-src#1089 for
the OpenCL adapter.

This depends on support for the
[cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch)
extension.

Tested on Intel GPU OpenCL implementations with the
[command-buffer emulation
layer](https://github.com/bashbaug/SimpleOpenCLSamples/tree/main/layers/10_cmdbufemu).

```bash
$ OPENCL_LAYERS=<path/to/SimpleOpenCLSamples/build/layers/10_cmdbufemu/libCmdBufEmu.so> ./bin/test-exp_command_buffer --platform="Intel(R) OpenCL Graphics"
```
  • Loading branch information
EwanC committed Feb 15, 2024
1 parent f11823e commit 6abac95
Show file tree
Hide file tree
Showing 11 changed files with 478 additions and 82 deletions.
347 changes: 279 additions & 68 deletions source/adapters/opencl/command_buffer.cpp

Large diffs are not rendered by default.

83 changes: 81 additions & 2 deletions source/adapters/opencl/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,93 @@
#include <CL/cl_ext.h>
#include <ur/ur.hpp>

/// Handle to a kernel command.
struct ur_exp_command_buffer_command_handle_t_ {
/// Command-buffer this command belongs to.
ur_exp_command_buffer_handle_t hCommandBuffer;
/// OpenCL command-handle.
cl_mutable_command_khr CLMutableCommand;
/// Work-dimension the command was originally created with.
cl_uint WorkDim;
/// Internal & External reference counts.
/// We need to maintain these because in OpenCL a command-handle isn't
/// reference counting, but is tied to the lifetime of the parent
/// command-buffer. This is not the case in UR where a command-handle is
/// reference counted.
std::atomic_uint32_t RefCountInternal;
std::atomic_uint32_t RefCountExternal;

ur_exp_command_buffer_command_handle_t_(
ur_exp_command_buffer_handle_t hCommandBuffer,
cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim)
: hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand),
WorkDim(WorkDim), RefCountInternal(0), RefCountExternal(0) {}

uint32_t incrementInternalReferenceCount() noexcept {
return ++RefCountInternal;
}
uint32_t decrementInternalReferenceCount() noexcept {
return --RefCountInternal;
}

uint32_t incrementExternalReferenceCount() noexcept {
return ++RefCountExternal;
}
uint32_t decrementExternalReferenceCount() noexcept {
return --RefCountExternal;
}
uint32_t getExternalReferenceCount() const noexcept {
return RefCountExternal;
}
};

/// Handle to a command-buffer object.
struct ur_exp_command_buffer_handle_t_ {
/// UR queue belonging to the command-buffer, required for OpenCL creation.
ur_queue_handle_t hInternalQueue;
/// Context the command-buffer is created for.
ur_context_handle_t hContext;
/// OpenCL command-buffer object.
cl_command_buffer_khr CLCommandBuffer;
/// Set to true if the kernel commands in the command-buffer can be updated,
/// false otherwise
bool IsUpdatable;
/// Set to true if the command-buffer has been finalized, false otherwise
bool IsFinalized;
/// List of commands in the command-buffer.
std::vector<ur_exp_command_buffer_command_handle_t> CommandHandles;
/// Internal & External reference counts of the command-buffer. We do this
/// manually rather than forward to the OpenCL retain/release APIs because
/// we also need to track the lifetimes of command handle objects, which
/// extended the lifetime of a UR command-buffer even if its reference
/// count is zero.
std::atomic_uint32_t RefCountInternal;
std::atomic_uint32_t RefCountExternal;

ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue,
ur_context_handle_t hContext,
cl_command_buffer_khr CLCommandBuffer)
cl_command_buffer_khr CLCommandBuffer,
bool IsUpdatable)
: hInternalQueue(hQueue), hContext(hContext),
CLCommandBuffer(CLCommandBuffer) {}
CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable),
IsFinalized(false), RefCountInternal(0), RefCountExternal(0) {}

~ur_exp_command_buffer_handle_t_();

uint32_t incrementInternalReferenceCount() noexcept {
return ++RefCountInternal;
}
uint32_t decrementInternalReferenceCount() noexcept {
return --RefCountInternal;
}

uint32_t incrementExternalReferenceCount() noexcept {
return ++RefCountExternal;
}
uint32_t decrementExternalReferenceCount() noexcept {
return --RefCountExternal;
}
uint32_t getExternalReferenceCount() const noexcept {
return RefCountExternal;
}
};
32 changes: 32 additions & 0 deletions source/adapters/opencl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,35 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) {
*NativeHandle = reinterpret_cast<ur_native_handle_t>(URObj);
return UR_RESULT_SUCCESS;
}

cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev,
bool &Result) {
size_t ExtSize = 0;
CL_RETURN_ON_FAILURE(
clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize));

std::string ExtStr(ExtSize, '\0');
CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize,
ExtStr.data(), nullptr));

std::string SupportedExtensions(ExtStr.c_str());
if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") ==
std::string::npos) {
Result = false;
return CL_SUCCESS;
}

// All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must
// be supported by a device for UR update.
cl_mutable_dispatch_fields_khr mutable_capabilities;
CL_RETURN_ON_FAILURE(clGetDeviceInfo(
Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr));
const cl_mutable_dispatch_fields_khr required_caps =
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR |
CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR |
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
Result = (mutable_capabilities & required_caps) == required_caps;
return CL_SUCCESS;
}
9 changes: 9 additions & 0 deletions source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR";
CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR";
CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR";
CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR";
CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR";

#undef CONSTFIX

Expand Down Expand Up @@ -305,6 +306,10 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret);

using clUpdateMutableCommandsKHR_fn = CL_API_ENTRY
cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer,
const cl_mutable_base_config_khr *mutable_config);

template <typename T> struct FuncPtrCache {
std::map<cl_context, T> Map;
std::mutex Mutex;
Expand Down Expand Up @@ -344,6 +349,7 @@ struct ExtFuncPtrCacheT {
FuncPtrCache<clCommandFillBufferKHR_fn> clCommandFillBufferKHRCache;
FuncPtrCache<clEnqueueCommandBufferKHR_fn> clEnqueueCommandBufferKHRCache;
FuncPtrCache<clGetCommandBufferInfoKHR_fn> clGetCommandBufferInfoKHRCache;
FuncPtrCache<clUpdateMutableCommandsKHR_fn> clUpdateMutableCommandsKHRCache;
};
// A raw pointer is used here since the lifetime of this map has to be tied to
// piTeardown to avoid issues with static destruction order (a user application
Expand Down Expand Up @@ -414,3 +420,6 @@ static ur_result_t getExtFuncFromContext(cl_context Context,
ur_result_t mapCLErrorToUR(cl_int Result);

ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle);

cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev,
bool &Result);
6 changes: 5 additions & 1 deletion source/adapters/opencl/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
std::string::npos);
}
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
return ReturnValue(false);
cl_device_id Dev = cl_adapter::cast<cl_device_id>(hDevice);
bool Supported = false;
CL_RETURN_ON_FAILURE(
deviceSupportsURCommandBufferKernelUpdate(Dev, Supported));
return ReturnValue(Supported);
}
default: {
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down
26 changes: 23 additions & 3 deletions test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,30 @@ struct BufferFillCommandTest
UUR_RETURN_ON_FATAL_FAILURE(
urUpdatableCommandBufferExpExecutionTest::SetUp());

// First argument is buffer to fill (will also be hidden accessor arg)
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE,
sizeof(val) * global_size, nullptr,
&buffer));

// TODO - Enable single code path after https://github.com/oneapi-src/unified-runtime/pull/1176
// is merged
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
// First argument is buffer to fill
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer));
} else {
// First argument is buffer to fill
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(buffer),
nullptr, &buffer));
}
// second arg is hidden accessor
struct {
size_t offsets[1] = {0};
} accessor;
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr,
&accessor));

// Second argument is scalar to fill with.
AddPodArg(val);
ASSERT_SUCCESS(
urKernelSetArgValue(kernel, 2, sizeof(val), nullptr, &val));

// Append kernel command to command-buffer and close command-buffer
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
Expand Down
29 changes: 25 additions & 4 deletions test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,17 @@ struct BufferSaxpyKernelTest
0, nullptr, nullptr));
}

// Index 0 is output buffer
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0]));
// TODO: Enable single code path once https://github.com/oneapi-src/unified-runtime/pull/1176
// is merged
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
// Index 0 is output buffer
ASSERT_SUCCESS(
urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0]));
} else {
// Index 0 is output buffer
ASSERT_SUCCESS(urKernelSetArgValue(
kernel, 0, sizeof(ur_mem_handle_t), nullptr, &buffers[0]));
}
// Index 1 is output accessor
struct {
size_t offsets[1] = {0};
Expand All @@ -41,13 +50,25 @@ struct BufferSaxpyKernelTest
// Index 2 is A
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A));
// Index 3 is X buffer
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1]));
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
ASSERT_SUCCESS(
urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1]));
} else {
ASSERT_SUCCESS(urKernelSetArgValue(
kernel, 3, sizeof(ur_mem_handle_t), nullptr, &buffers[1]));
}

// Index 4 is X buffer accessor
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr,
&accessor));
// Index 5 is Y buffer
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2]));
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
ASSERT_SUCCESS(
urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2]));
} else {
ASSERT_SUCCESS(urKernelSetArgValue(
kernel, 5, sizeof(ur_mem_handle_t), nullptr, &buffers[2]));
}

// Index 6 is Y buffer accessor
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr,
Expand Down
4 changes: 4 additions & 0 deletions test/conformance/exp_command_buffer/fixtures.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());

ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND,
sizeof(backend), &backend, nullptr));

size_t returned_size;
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0,
nullptr, &returned_size));
Expand Down Expand Up @@ -97,6 +100,7 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {

ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
ur_bool_t updatable_command_buffer_support = false;
ur_platform_backend_t backend{};
};

struct urUpdatableCommandBufferExpExecutionTest
Expand Down
4 changes: 4 additions & 0 deletions test/conformance/exp_command_buffer/invalid_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ struct InvalidUpdateTest
}

void TearDown() override {
// Workaround an issue with the OpenCL adapter implementing urUsmFree
// using a blocking free where hangs
EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle));

if (shared_ptr) {
EXPECT_SUCCESS(urUSMFree(context, shared_ptr));
}
Expand Down
12 changes: 12 additions & 0 deletions test/conformance/exp_command_buffer/ndrange_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ TEST_P(NDRangeUpdateTest, Update3D) {
// Update the kernel work dimensions to 2, and update global size, local size,
// and global offset to new values.
TEST_P(NDRangeUpdateTest, Update2D) {
if (backend == UR_PLATFORM_BACKEND_OPENCL) {
// OpenCL cl_khr_command_buffer_mutable_dispatch does not support
// updating the work dimension.
GTEST_SKIP();
}

// Run command-buffer prior to update an verify output
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
Expand Down Expand Up @@ -205,6 +211,12 @@ TEST_P(NDRangeUpdateTest, Update2D) {
// Update the kernel work dimensions to 1, and check that previously
// set global size, local size, and global offset update accordingly.
TEST_P(NDRangeUpdateTest, Update1D) {
if (backend == UR_PLATFORM_BACKEND_OPENCL) {
// OpenCL cl_khr_command_buffer_mutable_dispatch does not support
// updating the work dimension.
GTEST_SKIP();
}

// Run command-buffer prior to update an verify output
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) {
ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3];

// Update direct access flag
bool indirect_access = false;
bool indirect_access = true;
new_exec_info_descs[0] = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype
nullptr, // pNext
Expand All @@ -179,14 +179,14 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) {
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
allocation_size, &new_shared_ptr));
ASSERT_NE(new_shared_ptr, nullptr);
void *pointers = {new_shared_ptr};
void *pointers[1] = {new_shared_ptr};
new_exec_info_descs[2] = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype
nullptr, // pNext
UR_KERNEL_EXEC_INFO_USM_PTRS, // propName
sizeof(pointers), // propSize
nullptr, // pProperties
&pointers, // pPropValue
nullptr, // pProperties
pointers, // pPropValue
};

ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {
Expand Down

0 comments on commit 6abac95

Please sign in to comment.