diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index af0bc83d8a..3849e53d32 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -159,6 +159,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/linker_error.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/increment.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/increment.cpp b/test/conformance/device_code/increment.cpp new file mode 100644 index 0000000000..da176d315c --- /dev/null +++ b/test/conformance/device_code/increment.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +class Add; +class Mul; + +int main() { + sycl::queue deviceQueue; + uint32_t val = 0; + + auto buff = sycl::buffer(&val, 1); + + deviceQueue.submit([&](sycl::handler &cgh) { + auto acc = buff.get_access(cgh); + cgh.single_task([=]() { + for (uint32_t i = 0; i < 1000; i++) { + volatile uint32_t tmp = acc[0]; + acc[0] = tmp + 1; + } + }); + }); + + deviceQueue.submit([&](sycl::handler &cgh) { + auto acc = buff.get_access(cgh); + cgh.single_task([=]() { + for (uint32_t i = 0; i < 2; i++) { + volatile uint32_t tmp = acc[0]; + acc[0] = tmp * 2; + } + }); + }); + + return 0; +} diff --git a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp index fe630c4018..a313039563 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -36,6 +36,32 @@ struct urEnqueueEventsWaitWithBarrierTest : uur::urMultiQueueTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueEventsWaitWithBarrierTest); +struct urEnqueueEventsWaitWithBarrierOrderingTest : uur::urProgramTest { + void SetUp() override { + program_name = "increment"; + UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + sizeof(uint32_t), nullptr, &buffer)); + + auto entry_points = + uur::KernelsEnvironment::instance->GetEntryPointNames(program_name); + std::cout << entry_points[0]; + + ASSERT_SUCCESS(urKernelCreate(program, "_ZTS3Add", &add_kernel)); + ASSERT_SUCCESS(urKernelCreate(program, "_ZTS3Mul", &mul_kernel)); + } + + void TearDown() override { uur::urProgramTest::TearDown(); } + + ur_kernel_handle_t add_kernel; + ur_kernel_handle_t mul_kernel; + ur_mem_handle_t buffer = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueEventsWaitWithBarrierOrderingTest); + TEST_P(urEnqueueEventsWaitWithBarrierTest, Success) { ur_event_handle_t event1 = nullptr; ur_event_handle_t waitEvent = nullptr; @@ -97,3 +123,127 @@ TEST_P(urEnqueueEventsWaitWithBarrierTest, InvalidNullPtrEventWaitList) { ASSERT_SUCCESS(urEventRelease(validEvent)); } + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, + SuccessEventDependenciesBarrierOnly) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + ur_event_handle_t event; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, &event); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, + SuccessEventDependenciesLaunchOnly) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + ur_event_handle_t event; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, nullptr); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, + &count, nullptr, 1, &event, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, + &count, nullptr, 1, &event, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, SuccessEventDependencies) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + ur_event_handle_t event; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, &event); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch( + queue, add_kernel, 1, &offset, &count, nullptr, 1, &event, &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, &event)); + EXPECT_SUCCESS(urEnqueueKernelLaunch( + queue, mul_kernel, 1, &offset, &count, nullptr, 1, &event, &event)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 1, &event, &event)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} + +TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, + SuccessNonEventDependencies) { + constexpr size_t offset = 0; + constexpr size_t count = 1; + + uur::KernelLaunchHelper addHelper(platform, context, add_kernel, queue); + uur::KernelLaunchHelper mulHelper(platform, context, mul_kernel, queue); + + addHelper.SetBuffer1DArg(buffer, nullptr); + mulHelper.SetBuffer1DArg(buffer, nullptr); + + for (size_t i = 0; i < 10; i++) { + constexpr uint32_t ONE = 1; + urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, + 0, nullptr, nullptr); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, + &count, nullptr, 0, nullptr, + nullptr)); + EXPECT_SUCCESS( + urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); + } +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index e57a31584a..583a27741e 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1300,6 +1300,11 @@ struct KernelLaunchHelper { sizeof(zero), 0, size, 0, nullptr, nullptr)); ASSERT_SUCCESS(urQueueFinish(queue)); + SetBuffer1DArg(mem_handle, buffer_index); + *out_buffer = mem_handle; + } + + void SetBuffer1DArg(ur_mem_handle_t mem_handle, size_t *buffer_index) { ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index, nullptr, mem_handle)); if (buffer_index) { @@ -1336,7 +1341,6 @@ struct KernelLaunchHelper { &accessor)); current_arg_index += 2; } - *out_buffer = mem_handle; } template void AddPodArg(T data) {