From a9b47f950e1f9a5cc80e5b9067618b1682d5b911 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Wed, 8 May 2024 12:59:36 +0100 Subject: [PATCH] [CTS] Added test for sequencing events This tests that the `urEnqueueEventsWaitWithBarrier` function functions as a barrier. That is, it blocks until all events prior to it in the queue are completed. Since this test may fail non-deterministically, it is ran a few times. Note that current adapters seem to always execute events sequentially, so this is a no-op. --- test/conformance/device_code/CMakeLists.txt | 1 + test/conformance/device_code/sequence.cpp | 39 +++++ .../enqueue_adapter_level_zero_v2.match | 1 + .../enqueue/enqueue_adapter_native_cpu.match | 4 + .../enqueue/enqueue_adapter_opencl.match | 3 - .../urEnqueueEventsWaitWithBarrier.cpp | 159 +++++++++++++++++- .../testing/include/uur/fixtures.h | 6 +- 7 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 test/conformance/device_code/sequence.cpp delete mode 100644 test/conformance/enqueue/enqueue_adapter_opencl.match diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 912402b7a5..2120d26bf3 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -158,6 +158,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/sequence.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/linker_error.cpp) diff --git a/test/conformance/device_code/sequence.cpp b/test/conformance/device_code/sequence.cpp new file mode 100644 index 0000000000..da176d315c --- /dev/null +++ b/test/conformance/device_code/sequence.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +class Add; +class Mul; + +int main() { + sycl::queue deviceQueue; + uint32_t val = 0; + + auto buff = sycl::buffer(&val, 1); + + deviceQueue.submit([&](sycl::handler &cgh) { + auto acc = buff.get_access(cgh); + cgh.single_task([=]() { + for (uint32_t i = 0; i < 1000; i++) { + volatile uint32_t tmp = acc[0]; + acc[0] = tmp + 1; + } + }); + }); + + deviceQueue.submit([&](sycl::handler &cgh) { + auto acc = buff.get_access(cgh); + cgh.single_task([=]() { + for (uint32_t i = 0; i < 2; i++) { + volatile uint32_t tmp = acc[0]; + acc[0] = tmp * 2; + } + }); + }); + + return 0; +} diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match index e9daa41f3c..3ac58a0684 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match @@ -1,4 +1,5 @@ {{NONDETERMINISTIC}} +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueKernelLaunchTest.InvalidKernelArgs/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/enqueue/enqueue_adapter_native_cpu.match b/test/conformance/enqueue/enqueue_adapter_native_cpu.match index a7d6797f94..f1ef4ef6f6 100644 --- a/test/conformance/enqueue/enqueue_adapter_native_cpu.match +++ b/test/conformance/enqueue/enqueue_adapter_native_cpu.match @@ -18,6 +18,10 @@ {{OPT}}urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueEventsWaitWithBarrierTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesLaunchOnly/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessNonEventDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ {{OPT}}urEnqueueKernelLaunchTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match deleted file mode 100644 index 27ae88c43d..0000000000 --- a/test/conformance/enqueue/enqueue_adapter_opencl.match +++ /dev/null @@ -1,3 +0,0 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__OpenCL___{{.*}}_UsePoolEnabled diff --git a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp index fe630c4018..410ecc99a1 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp @@ -1,7 +1,8 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "ur_api.h" #include struct urEnqueueEventsWaitWithBarrierTest : uur::urMultiQueueTest { @@ -36,6 +37,32 @@ struct urEnqueueEventsWaitWithBarrierTest : uur::urMultiQueueTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueEventsWaitWithBarrierTest); +struct urEnqueueEventsWaitWithBarrierOrderingTest : uur::urProgramTest { + void SetUp() override { + program_name = "sequence"; + UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + sizeof(uint32_t), nullptr, &buffer)); + + auto entry_points = + uur::KernelsEnvironment::instance->GetEntryPointNames(program_name); + std::cout << entry_points[0]; + + ASSERT_SUCCESS(urKernelCreate(program, "_ZTS3Add", &add_kernel)); + ASSERT_SUCCESS(urKernelCreate(program, "_ZTS3Mul", &mul_kernel)); + } + + void TearDown() override { uur::urProgramTest::TearDown(); } + + ur_kernel_handle_t add_kernel; + ur_kernel_handle_t mul_kernel; + ur_mem_handle_t buffer = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueEventsWaitWithBarrierOrderingTest); + TEST_P(urEnqueueEventsWaitWithBarrierTest, Success) { ur_event_handle_t event1 = nullptr; ur_event_handle_t waitEvent = nullptr; @@ -97,3 +124,133 @@ TEST_P(urEnqueueEventsWaitWithBarrierTest, InvalidNullPtrEventWaitList) { ASSERT_SUCCESS(urEventRelease(validEvent)); } + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, + SuccessEventDependenciesBarrierOnly) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + ur_event_handle_t event; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, &event); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, + SuccessEventDependenciesLaunchOnly) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + ur_event_handle_t event; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, nullptr); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, + &count, nullptr, 1, &event, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, + &count, nullptr, 1, &event, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, SuccessEventDependencies) { + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend == UR_PLATFORM_BACKEND_OPENCL) { + GTEST_SKIP() << "Causes hangs in CI"; + } + constexpr size_t offset = 0; + constexpr size_t count = 1; + ur_event_handle_t event; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, &event); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch( + queue, add_kernel, 1, &offset, &count, nullptr, 1, &event, &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch( + queue, mul_kernel, 1, &offset, &count, nullptr, 1, &event, &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, &event)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, + SuccessNonEventDependencies) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, nullptr); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 0aeee2d909..b853164fb6 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1314,6 +1314,11 @@ struct KernelLaunchHelper { sizeof(zero), 0, size, 0, nullptr, nullptr)); ASSERT_SUCCESS(urQueueFinish(queue)); + SetBuffer1DArg(mem_handle, buffer_index); + *out_buffer = mem_handle; + } + + void SetBuffer1DArg(ur_mem_handle_t mem_handle, size_t *buffer_index) { ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index, nullptr, mem_handle)); if (buffer_index) { @@ -1350,7 +1355,6 @@ struct KernelLaunchHelper { &accessor)); current_arg_index += 2; } - *out_buffer = mem_handle; } template void AddPodArg(T data) {