From 5c59aa58b782ab3e06a8c8727bac50dac7e4282c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 10 Dec 2024 16:44:34 +0000 Subject: [PATCH] Fix validation error in WithCommandQueue.SubgroupSize (#748) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Respect maxComputeWorkgroupSubgroups limit Change-Id: I1242c9bf13dbd10dfc87351767a6698e1fa8879c Signed-off-by: Kévin Petit --- src/api.cpp | 5 +++-- src/device.cpp | 6 ++++++ src/device.hpp | 4 ++++ src/kernel.hpp | 3 +-- tests/api/subgroup_size.cpp | 22 ++++++++++++---------- tests/api/testcl.hpp | 29 +++++++++++++++++++++++++++++ 6 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/api.cpp b/src/api.cpp index ec114074..e60d63fd 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -942,9 +942,10 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev, } copy_ptr = val_subgroup_sizes.data(); size_ret = sizeof(size_t) * val_subgroup_sizes.size(); - break; + } else { + ret = CL_INVALID_VALUE; } - [[fallthrough]]; + break; default: ret = CL_INVALID_VALUE; break; diff --git a/src/device.cpp b/src/device.cpp index 12dc7e33..0678d6a1 100644 --- a/src/device.cpp +++ b/src/device.cpp @@ -1106,6 +1106,12 @@ void cvk_device::log_limits_and_memory_information() { limits.maxComputeWorkGroupSize[0], limits.maxComputeWorkGroupSize[1], limits.maxComputeWorkGroupSize[2]); + cvk_info(" Min subgroup size: %u", + m_subgroup_size_control_properties.minSubgroupSize); + cvk_info(" Max subgroup size: %u", + m_subgroup_size_control_properties.maxSubgroupSize); + cvk_info(" Max subgroups per work-group: %u", + m_subgroup_size_control_properties.maxComputeWorkgroupSubgroups); // Print memoy information cvk_info("Device has %u memory types:", m_mem_properties.memoryTypeCount); diff --git a/src/device.hpp b/src/device.hpp index a4dff861..0d2b0fd2 100644 --- a/src/device.hpp +++ b/src/device.hpp @@ -356,6 +356,10 @@ struct cvk_device : public _cl_device_id, if (!supports_subgroups()) { return 0; } + if (supports_subgroup_size_selection()) { + return m_subgroup_size_control_properties + .maxComputeWorkgroupSubgroups; + } return ceil_div(max_work_group_size(), static_cast(sub_group_size())); } diff --git a/src/kernel.hpp b/src/kernel.hpp index 533f8fe6..3d011adc 100644 --- a/src/kernel.hpp +++ b/src/kernel.hpp @@ -116,8 +116,7 @@ struct cvk_kernel : public _cl_kernel, api_object { } size_t max_num_sub_groups(const cvk_device* device) const { - return ceil_div(max_work_group_size(device), - static_cast(device->sub_group_size())); + return device->max_num_sub_groups(); } const std::array& required_work_group_size() const { diff --git a/tests/api/subgroup_size.cpp b/tests/api/subgroup_size.cpp index 6ee26a1e..d02665d6 100644 --- a/tests/api/subgroup_size.cpp +++ b/tests/api/subgroup_size.cpp @@ -25,6 +25,7 @@ static const char* program_source = R"( )"; TEST_F(WithCommandQueue, SubgroupSizes) { + REQUIRE_EXTENSION("cl_intel_required_subgroup_size"); std::vector subgroup_sizes; size_t raw_size; GetDeviceInfo(CL_DEVICE_SUB_GROUP_SIZES_INTEL, 0, nullptr, &raw_size); @@ -32,15 +33,15 @@ TEST_F(WithCommandQueue, SubgroupSizes) { GetDeviceInfo(CL_DEVICE_SUB_GROUP_SIZES_INTEL, raw_size, subgroup_sizes.data(), nullptr); - size_t max_work_group_size; - GetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), - &max_work_group_size, nullptr); + size_t max_num_sub_groups; + GetDeviceInfo(CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(size_t), + &max_num_sub_groups, nullptr); auto buffer = CreateBuffer(0, sizeof(cl_int)); - auto run = [this](const char* kernel_prefix, size_t subgroup_size, - size_t max_work_group_size, cl_mem buffer) { - cl_int expected_num_sub_groups = max_work_group_size / subgroup_size; + auto run = [this, max_num_sub_groups](const char* kernel_prefix, + size_t subgroup_size, cl_mem buffer) { + cl_int expected_num_sub_groups = max_num_sub_groups; size_t work_size = expected_num_sub_groups * subgroup_size; char source[512]; snprintf(source, sizeof(source), program_source, kernel_prefix); @@ -51,9 +52,10 @@ TEST_F(WithCommandQueue, SubgroupSizes) { auto result = EnqueueMapBuffer(buffer, true, 0, 0, sizeof(cl_int)); #if 0 - printf("\tkernel_prefix '%s' subgroup_size %lu max_work_group_size %lu " + printf("\tkernel_prefix '%s' subgroup_size %lu " + "max_num_sub_groups %lu " "result %u expected %u\n", - kernel_prefix, subgroup_size, max_work_group_size, *result, + kernel_prefix, subgroup_size, max_num_sub_groups, *result, expected_num_sub_groups); #endif ASSERT_EQ(expected_num_sub_groups, *result); @@ -65,12 +67,12 @@ TEST_F(WithCommandQueue, SubgroupSizes) { snprintf(attribute, sizeof(attribute), "__attribute__((intel_reqd_sub_group_size(%lu))) ", subgroup_size); - run(attribute, subgroup_size, max_work_group_size, buffer); + run(attribute, subgroup_size, buffer); } for (auto subgroup_size : subgroup_sizes) { auto cfg = CLVK_CONFIG_SCOPED_OVERRIDE(force_subgroup_size, uint32_t, subgroup_size, true); - run("", subgroup_size, max_work_group_size, buffer); + run("", subgroup_size, buffer); } } #endif diff --git a/tests/api/testcl.hpp b/tests/api/testcl.hpp index 878f6ab3..9eb2dfbc 100644 --- a/tests/api/testcl.hpp +++ b/tests/api/testcl.hpp @@ -775,8 +775,37 @@ class WithCommandQueue : public WithContext { param_value, param_value_size_ret); ASSERT_CL_SUCCESS(err); } + + bool HasExtension(const std::string& name) { + std::vector exts; + size_t num_extensions; + GetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, + &num_extensions); + + exts.resize(num_extensions); + + GetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, + num_extensions * sizeof(cl_name_version), exts.data(), + nullptr); + + for (auto const& ext : exts) { + if (ext.name == name) { + return true; + } + } + + return false; + } }; +#define REQUIRE_EXTENSION(name) \ + do { \ + if (!HasExtension(name)) { \ + GTEST_SKIP() << "Device does not support " << name \ + << " extension"; \ + } \ + } while (0); + class WithProfiledCommandQueue : public WithCommandQueue { protected: void SetUp() override {