Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EXP][Command-buffer] OpenCL kernel command update #7

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -7929,6 +7929,8 @@ typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_co
/// + `NULL == phCommandBuffer`
/// - ::UR_RESULT_ERROR_INVALID_CONTEXT
/// - ::UR_RESULT_ERROR_INVALID_DEVICE
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP.
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
UR_APIEXPORT ur_result_t UR_APICALL
Expand Down
2 changes: 2 additions & 0 deletions scripts/core/exp-command-buffer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ params:
returns:
- $X_RESULT_ERROR_INVALID_CONTEXT
- $X_RESULT_ERROR_INVALID_DEVICE
- $X_RESULT_ERROR_INVALID_OPERATION:
- "If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP."
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
- $X_RESULT_ERROR_OUT_OF_RESOURCES
--- #--------------------------------------------------------------------------
Expand Down
349 changes: 281 additions & 68 deletions source/adapters/opencl/command_buffer.cpp

Large diffs are not rendered by default.

83 changes: 81 additions & 2 deletions source/adapters/opencl/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,93 @@
#include <CL/cl_ext.h>
#include <ur/ur.hpp>

/// Handle to a kernel command.
struct ur_exp_command_buffer_command_handle_t_ {
/// Command-buffer this command belongs to.
ur_exp_command_buffer_handle_t hCommandBuffer;
/// OpenCL command-handle.
cl_mutable_command_khr CLMutableCommand;
/// Work-dimension the command was originally created with.
cl_uint WorkDim;
/// Internal & External reference counts.
/// We need to maintain these because in OpenCL a command-handle isn't
/// reference counting, but is tied to the lifetime of the parent
/// command-buffer. This is not the case in UR where a command-handle is
/// reference counted.
std::atomic_uint32_t RefCountInternal;
std::atomic_uint32_t RefCountExternal;

ur_exp_command_buffer_command_handle_t_(
ur_exp_command_buffer_handle_t hCommandBuffer,
cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim)
: hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand),
WorkDim(WorkDim), RefCountInternal(0), RefCountExternal(0) {}

uint32_t incrementInternalReferenceCount() noexcept {
return ++RefCountInternal;
}
uint32_t decrementInternalReferenceCount() noexcept {
return --RefCountInternal;
}

uint32_t incrementExternalReferenceCount() noexcept {
return ++RefCountExternal;
}
uint32_t decrementExternalReferenceCount() noexcept {
return --RefCountExternal;
}
uint32_t getExternalReferenceCount() const noexcept {
return RefCountExternal;
}
};

/// Handle to a command-buffer object.
struct ur_exp_command_buffer_handle_t_ {
/// UR queue belonging to the command-buffer, required for OpenCL creation.
ur_queue_handle_t hInternalQueue;
/// Context the command-buffer is created for.
ur_context_handle_t hContext;
/// OpenCL command-buffer object.
cl_command_buffer_khr CLCommandBuffer;
/// Set to true if the kernel commands in the command-buffer can be updated,
/// false otherwise
bool IsUpdatable;
/// Set to true if the command-buffer has been finalized, false otherwise
bool IsFinalized;
/// List of commands in the command-buffer.
std::vector<ur_exp_command_buffer_command_handle_t> CommandHandles;
/// Internal & External reference counts of the command-buffer. We do this
/// manually rather than forward to the OpenCL retain/release APIs because
/// we also need to track the lifetimes of command handle objects, which
/// extended the lifetime of a UR command-buffer even if its reference
/// count is zero.
std::atomic_uint32_t RefCountInternal;
std::atomic_uint32_t RefCountExternal;

ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue,
ur_context_handle_t hContext,
cl_command_buffer_khr CLCommandBuffer)
cl_command_buffer_khr CLCommandBuffer,
bool IsUpdatable)
: hInternalQueue(hQueue), hContext(hContext),
CLCommandBuffer(CLCommandBuffer) {}
CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable),
IsFinalized(false), RefCountInternal(0), RefCountExternal(0) {}

~ur_exp_command_buffer_handle_t_();

uint32_t incrementInternalReferenceCount() noexcept {
return ++RefCountInternal;
}
uint32_t decrementInternalReferenceCount() noexcept {
return --RefCountInternal;
}

uint32_t incrementExternalReferenceCount() noexcept {
return ++RefCountExternal;
}
uint32_t decrementExternalReferenceCount() noexcept {
return --RefCountExternal;
}
uint32_t getExternalReferenceCount() const noexcept {
return RefCountExternal;
}
};
32 changes: 32 additions & 0 deletions source/adapters/opencl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,35 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) {
*NativeHandle = reinterpret_cast<ur_native_handle_t>(URObj);
return UR_RESULT_SUCCESS;
}

cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev,
bool &Result) {
size_t ExtSize = 0;
CL_RETURN_ON_FAILURE(
clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize));

std::string ExtStr(ExtSize, '\0');
CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize,
ExtStr.data(), nullptr));

std::string SupportedExtensions(ExtStr.c_str());
if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") ==
std::string::npos) {
Result = false;
return CL_SUCCESS;
}

// All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must
// be supported by a device for UR update.
cl_mutable_dispatch_fields_khr mutable_capabilities;
CL_RETURN_ON_FAILURE(clGetDeviceInfo(
Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr));
const cl_mutable_dispatch_fields_khr required_caps =
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR |
CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR |
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
Result = (mutable_capabilities & required_caps) == required_caps;
return CL_SUCCESS;
}
9 changes: 9 additions & 0 deletions source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR";
CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR";
CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR";
CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR";
CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR";

#undef CONSTFIX

Expand Down Expand Up @@ -305,6 +306,10 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret);

using clUpdateMutableCommandsKHR_fn = CL_API_ENTRY
cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer,
const cl_mutable_base_config_khr *mutable_config);

template <typename T> struct FuncPtrCache {
std::map<cl_context, T> Map;
std::mutex Mutex;
Expand Down Expand Up @@ -344,6 +349,7 @@ struct ExtFuncPtrCacheT {
FuncPtrCache<clCommandFillBufferKHR_fn> clCommandFillBufferKHRCache;
FuncPtrCache<clEnqueueCommandBufferKHR_fn> clEnqueueCommandBufferKHRCache;
FuncPtrCache<clGetCommandBufferInfoKHR_fn> clGetCommandBufferInfoKHRCache;
FuncPtrCache<clUpdateMutableCommandsKHR_fn> clUpdateMutableCommandsKHRCache;
};
// A raw pointer is used here since the lifetime of this map has to be tied to
// piTeardown to avoid issues with static destruction order (a user application
Expand Down Expand Up @@ -414,3 +420,6 @@ static ur_result_t getExtFuncFromContext(cl_context Context,
ur_result_t mapCLErrorToUR(cl_int Result);

ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle);

cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev,
bool &Result);
6 changes: 5 additions & 1 deletion source/adapters/opencl/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
std::string::npos);
}
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
return ReturnValue(false);
cl_device_id Dev = cl_adapter::cast<cl_device_id>(hDevice);
bool Supported = false;
CL_RETURN_ON_FAILURE(
deviceSupportsURCommandBufferKernelUpdate(Dev, Supported));
return ReturnValue(Supported);
}
default: {
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down
2 changes: 2 additions & 0 deletions source/loader/ur_libapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7030,6 +7030,8 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp(
/// + `NULL == phCommandBuffer`
/// - ::UR_RESULT_ERROR_INVALID_CONTEXT
/// - ::UR_RESULT_ERROR_INVALID_DEVICE
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP.
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
ur_result_t UR_APICALL urCommandBufferCreateExp(
Expand Down
2 changes: 2 additions & 0 deletions source/ur_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5966,6 +5966,8 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp(
/// + `NULL == phCommandBuffer`
/// - ::UR_RESULT_ERROR_INVALID_CONTEXT
/// - ::UR_RESULT_ERROR_INVALID_DEVICE
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP.
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
ur_result_t UR_APICALL urCommandBufferCreateExp(
Expand Down
26 changes: 23 additions & 3 deletions test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,30 @@ struct BufferFillCommandTest
UUR_RETURN_ON_FATAL_FAILURE(
urUpdatableCommandBufferExpExecutionTest::SetUp());

// First argument is buffer to fill (will also be hidden accessor arg)
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE,
sizeof(val) * global_size, nullptr,
&buffer));

// TODO - Enable single code path after https://github.com/oneapi-src/unified-runtime/pull/1176
// is merged
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
// First argument is buffer to fill
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer));
} else {
// First argument is buffer to fill
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(buffer),
nullptr, &buffer));
}
// second arg is hidden accessor
struct {
size_t offsets[1] = {0};
} accessor;
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr,
&accessor));

// Second argument is scalar to fill with.
AddPodArg(val);
ASSERT_SUCCESS(
urKernelSetArgValue(kernel, 2, sizeof(val), nullptr, &val));

// Append kernel command to command-buffer and close command-buffer
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
Expand Down
29 changes: 25 additions & 4 deletions test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,17 @@ struct BufferSaxpyKernelTest
0, nullptr, nullptr));
}

// Index 0 is output buffer
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0]));
// TODO: Enable single code path once https://github.com/oneapi-src/unified-runtime/pull/1176
// is merged
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
// Index 0 is output buffer
ASSERT_SUCCESS(
urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0]));
} else {
// Index 0 is output buffer
ASSERT_SUCCESS(urKernelSetArgValue(
kernel, 0, sizeof(ur_mem_handle_t), nullptr, &buffers[0]));
}
// Index 1 is output accessor
struct {
size_t offsets[1] = {0};
Expand All @@ -41,13 +50,25 @@ struct BufferSaxpyKernelTest
// Index 2 is A
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A));
// Index 3 is X buffer
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1]));
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
ASSERT_SUCCESS(
urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1]));
} else {
ASSERT_SUCCESS(urKernelSetArgValue(
kernel, 3, sizeof(ur_mem_handle_t), nullptr, &buffers[1]));
}

// Index 4 is X buffer accessor
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr,
&accessor));
// Index 5 is Y buffer
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2]));
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
ASSERT_SUCCESS(
urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2]));
} else {
ASSERT_SUCCESS(urKernelSetArgValue(
kernel, 5, sizeof(ur_mem_handle_t), nullptr, &buffers[2]));
}

// Index 6 is Y buffer accessor
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr,
Expand Down
4 changes: 4 additions & 0 deletions test/conformance/exp_command_buffer/fixtures.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());

ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND,
sizeof(backend), &backend, nullptr));

size_t returned_size;
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0,
nullptr, &returned_size));
Expand Down Expand Up @@ -97,6 +100,7 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {

ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
ur_bool_t updatable_command_buffer_support = false;
ur_platform_backend_t backend{};
};

struct urUpdatableCommandBufferExpExecutionTest
Expand Down
4 changes: 4 additions & 0 deletions test/conformance/exp_command_buffer/invalid_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ struct InvalidUpdateTest
}

void TearDown() override {
// Workaround an issue with the OpenCL adapter implementing urUsmFree
// using a blocking free where hangs
EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle));

if (shared_ptr) {
EXPECT_SUCCESS(urUSMFree(context, shared_ptr));
}
Expand Down
12 changes: 12 additions & 0 deletions test/conformance/exp_command_buffer/ndrange_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ TEST_P(NDRangeUpdateTest, Update3D) {
// Update the kernel work dimensions to 2, and update global size, local size,
// and global offset to new values.
TEST_P(NDRangeUpdateTest, Update2D) {
if (backend == UR_PLATFORM_BACKEND_OPENCL) {
// OpenCL cl_khr_command_buffer_mutable_dispatch does not support
// updating the work dimension.
GTEST_SKIP();
}

// Run command-buffer prior to update an verify output
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
Expand Down Expand Up @@ -205,6 +211,12 @@ TEST_P(NDRangeUpdateTest, Update2D) {
// Update the kernel work dimensions to 1, and check that previously
// set global size, local size, and global offset update accordingly.
TEST_P(NDRangeUpdateTest, Update1D) {
if (backend == UR_PLATFORM_BACKEND_OPENCL) {
// OpenCL cl_khr_command_buffer_mutable_dispatch does not support
// updating the work dimension.
GTEST_SKIP();
}

// Run command-buffer prior to update an verify output
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) {
ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3];

// Update direct access flag
bool indirect_access = false;
bool indirect_access = true;
new_exec_info_descs[0] = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype
nullptr, // pNext
Expand All @@ -179,14 +179,14 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) {
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
allocation_size, &new_shared_ptr));
ASSERT_NE(new_shared_ptr, nullptr);
void *pointers = {new_shared_ptr};
void *pointers[1] = {new_shared_ptr};
new_exec_info_descs[2] = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype
nullptr, // pNext
UR_KERNEL_EXEC_INFO_USM_PTRS, // propName
sizeof(pointers), // propSize
nullptr, // pProperties
&pointers, // pPropValue
nullptr, // pProperties
pointers, // pPropValue
};

ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {
Expand Down