From c5abf1e636f2d4ea6f6b82a4c11332f388fe0038 Mon Sep 17 00:00:00 2001 From: Syed Faaiz Date: Wed, 28 Aug 2024 10:53:56 -0700 Subject: [PATCH] use cl_arm_printf extension for testing (#671) * use cl_arm_printf extension for testing * fix space * address issues with formating and logic * downstream and upstream clangs differ * remove get_buffer_size func * revert untouched code formating * revert some more formating * hopefully last revert of formating * move get_properties_index func to context level * remove whitespace * fix clang-format issues * add new line * delete space * Add fixes to testcl * fix tests * fix space * Implement printf_callback * get rid of unused funcs and vars * fix format * remove duplication and prioritize config buff size * fix space * rework logic for string parsing * fix format * fix format * add profiling command queue back * fix format * fix format * fix include issues * add os based includes * use user_data to transmit buffer from test side * remove space * fix space * remove unused func * use user_data properly * clean up leftover code --- src/api.cpp | 3 ++- src/context.hpp | 44 +++++++++++++++++++++++++++++- src/device.cpp | 1 + src/printf.cpp | 16 ++++++++--- src/printf.hpp | 4 ++- src/program.cpp | 6 ++--- src/queue.cpp | 5 +++- src/queue.hpp | 3 ++- tests/api/printf.cpp | 64 +++++++++++++++++++++----------------------- tests/api/testcl.hpp | 36 ++++++++++++++++++++----- 10 files changed, 130 insertions(+), 52 deletions(-) diff --git a/src/api.cpp b/src/api.cpp index fc6d7863..de032465 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -1013,7 +1013,8 @@ cl_context CLVK_API_CALL clCreateContext( return nullptr; } - cl_context context = new cvk_context(icd_downcast(devices[0]), properties); + cl_context context = + new cvk_context(icd_downcast(devices[0]), properties, user_data); if (errcode_ret != nullptr) { *errcode_ret = CL_SUCCESS; diff --git a/src/context.hpp b/src/context.hpp index 3e526c2b..cbfaaa8a 100644 --- a/src/context.hpp +++ b/src/context.hpp @@ -16,6 +16,11 @@ #include "device.hpp" #include "objects.hpp" +#include "unit.hpp" + +using cvk_printf_callback_t = void(CL_CALLBACK*)(const char* buffer, size_t len, + size_t complete, + void* user_data); using cvk_context_callback_pointer_type = void(CL_CALLBACK*)(cl_context context, void* user_data); @@ -28,7 +33,8 @@ struct cvk_context : public _cl_context, refcounted, object_magic_header { - cvk_context(cvk_device* device, const cl_context_properties* props) + cvk_context(cvk_device* device, const cl_context_properties* props, + void* user_data) : m_device(device) { if (props) { @@ -41,6 +47,26 @@ struct cvk_context : public _cl_context, } m_properties.push_back(*props); } + // Get printf buffer size from extension. + auto buff_size_prop_index = + get_property_index(CL_PRINTF_BUFFERSIZE_ARM); + if (buff_size_prop_index != -1 && !config.printf_buffer_size.set) { + m_printf_buffersize = m_properties[buff_size_prop_index]; + } else { + m_printf_buffersize = config.printf_buffer_size; + } + + // Get printf callback from extension + auto printf_callback_prop_index = + get_property_index(CL_PRINTF_CALLBACK_ARM); + if (printf_callback_prop_index != -1) { + m_printf_callback = + (cvk_printf_callback_t)m_properties[printf_callback_prop_index]; + m_user_data = user_data; + } else { + m_printf_callback = nullptr; + m_user_data = nullptr; + } } virtual ~cvk_context() { @@ -73,11 +99,27 @@ struct cvk_context : public _cl_context, return size <= m_device->max_mem_alloc_size(); } + int get_property_index(const int prop) { + for (unsigned i = 0; i < m_properties.size(); i += 2) { + if (m_properties[i] == prop) { + return i + 1; + } + } + return -1; + } + + size_t get_printf_buffersize() { return m_printf_buffersize; } + cvk_printf_callback_t get_printf_callback() { return m_printf_callback; } + void* get_printf_userdata() { return m_user_data; } + private: cvk_device* m_device; std::mutex m_callbacks_lock; std::vector m_destuctor_callbacks; std::vector m_properties; + size_t m_printf_buffersize; + cvk_printf_callback_t m_printf_callback; + void* m_user_data; }; static inline cvk_context* icd_downcast(cl_context context) { diff --git a/src/device.cpp b/src/device.cpp index ab3bf5ee..09f70aa6 100644 --- a/src/device.cpp +++ b/src/device.cpp @@ -619,6 +619,7 @@ void cvk_device::build_extension_ils_list() { #endif MAKE_NAME_VERSION(1, 0, 0, "cl_khr_spirv_no_integer_wrap_decoration"), MAKE_NAME_VERSION(1, 0, 0, "cl_arm_non_uniform_work_group_size"), + MAKE_NAME_VERSION(1, 0, 0, "cl_arm_printf"), MAKE_NAME_VERSION(1, 0, 0, "cl_khr_suggested_local_work_size"), MAKE_NAME_VERSION(1, 0, 0, "cl_khr_3d_image_writes"), // MAKE_NAME_VERSION(0, 9, 0, "cl_khr_semaphore"), diff --git a/src/printf.cpp b/src/printf.cpp index 8638407d..104c620d 100644 --- a/src/printf.cpp +++ b/src/printf.cpp @@ -157,7 +157,8 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) { } void process_printf(char*& data, const printf_descriptor_map_t& descs, - char* data_end) { + char* data_end, cvk_printf_callback_t printf_cb, + void* printf_userdata) { uint32_t printf_id = read_inc_buff(data); auto& format_string = descs.at(printf_id).format_string; @@ -239,11 +240,18 @@ void process_printf(char*& data, const printf_descriptor_map_t& descs, arg_idx++; } - printf("%s", printf_out.str().c_str()); + auto output = printf_out.str(); + if (printf_cb != nullptr) { + auto len = output.size(); + printf_cb(output.c_str(), len, data >= data_end, printf_userdata); + } else { + printf("%s", output.c_str()); + } } cl_int cvk_printf(cvk_mem* printf_buffer, - const printf_descriptor_map_t& descriptors) { + const printf_descriptor_map_t& descriptors, + cvk_printf_callback_t printf_cb, void* printf_userdata) { CVK_ASSERT(printf_buffer); if (!printf_buffer->map()) { cvk_error("Could not map printf buffer"); @@ -258,7 +266,7 @@ cl_int cvk_printf(cvk_mem* printf_buffer, auto* data_end = data + limit; while (data < data_end) { - process_printf(data, descriptors, data_end); + process_printf(data, descriptors, data_end, printf_cb, printf_userdata); } if (buffer_size < bytes_written) { diff --git a/src/printf.hpp b/src/printf.hpp index 9941ab18..7f762453 100644 --- a/src/printf.hpp +++ b/src/printf.hpp @@ -14,6 +14,7 @@ #pragma once +#include "context.hpp" #include "memory.hpp" #include @@ -28,4 +29,5 @@ using printf_descriptor_map_t = std::unordered_map; // Process the contents of the printf buffer and print the results to stdout cl_int cvk_printf(cvk_mem* printf_buffer, - const printf_descriptor_map_t& descriptors); + const printf_descriptor_map_t& descriptors, + cvk_printf_callback_t cb_func, void* printf_userdata); diff --git a/src/program.cpp b/src/program.cpp index 61609693..cf88fda4 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -967,10 +967,10 @@ std::string cvk_program::prepare_build_options(const cvk_device* device) const { } } + auto buff_size = m_context->get_printf_buffersize(); + options += " -enable-printf "; - options += - " -printf-buffer-size=" + std::to_string(config.printf_buffer_size) + - " "; + options += " -printf-buffer-size=" + std::to_string(buff_size) + " "; #if COMPILER_AVAILABLE options += " " + config.clspv_options() + " "; diff --git a/src/queue.cpp b/src/queue.cpp index 19cbe680..861370a1 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -1126,7 +1126,10 @@ cl_int cvk_command_kernel::do_post_action() { cvk_error_fn("printf buffer was not created"); return CL_OUT_OF_RESOURCES; } - return cvk_printf(buffer, m_kernel->program()->printf_descriptors()); + + return cvk_printf(buffer, m_kernel->program()->printf_descriptors(), + m_queue->context()->get_printf_callback(), + m_queue->context()->get_printf_userdata()); } return CL_SUCCESS; diff --git a/src/queue.hpp b/src/queue.hpp index 12d96e32..d3bc593c 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -178,7 +178,8 @@ struct cvk_command_queue : public _cl_command_queue, if (!m_printf_buffer) { cl_int status; m_printf_buffer = cvk_buffer::create( - context(), 0, config.printf_buffer_size, nullptr, &status); + context(), 0, m_context->get_printf_buffersize(), nullptr, + &status); CVK_ASSERT(status == CL_SUCCESS); } return m_printf_buffer.get(); diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp index 87e2ac14..cbade14c 100644 --- a/tests/api/printf.cpp +++ b/tests/api/printf.cpp @@ -29,6 +29,12 @@ #include #endif +void printf_callback(const char* buffer, size_t len, size_t complete, + void* user_data) { + std::string* user_buffer = (std::string*)user_data; + *user_buffer += std::string(buffer); +} + static std::string stdoutFileName; #define BUFFER_SIZE 1024 @@ -128,17 +134,19 @@ TEST_F(WithCommandQueue, SimplePrintf) { ASSERT_STREQ(printf_buffer, message); } -TEST_F(WithCommandQueue, TooLongPrintf) { +TEST_F(WithCommandQueueNoSetUp, TooLongPrintf) { + std::string buffer = ""; // each print takes 12 bytes (4 for the printf_id, and 2*4 for the 2 integer // to print) + 4 for the byte written counter - auto cfg1 = - CLVK_CONFIG_SCOPED_OVERRIDE(printf_buffer_size, uint32_t, 28, true); - - temp_folder_deletion temp; - stdoutFileName = getStdoutFileName(temp); - - int fd; - ASSERT_TRUE(getStdout(fd)); + cl_context_properties properties[4] = { + CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, + CL_PRINTF_BUFFERSIZE_ARM, (cl_context_properties)28}; + WithCommandQueue::SetUpWithContextProperties( + properties, reinterpret_cast(&buffer)); + + // We only get the first 2 prints because the buffer is too small to get + // the last one. + const char* message = "get_global_id(0) = 0\nget_global_id(1) = 0\n"; const char* source = R"( kernel void test_printf() { @@ -154,28 +162,23 @@ TEST_F(WithCommandQueue, TooLongPrintf) { EnqueueNDRangeKernel(kernel, 1, nullptr, &gws, &lws, 0, nullptr, nullptr); Finish(); - releaseStdout(fd); - auto printf_buffer = getStdoutContent(); - ASSERT_NE(printf_buffer, nullptr); - - // We only get the first 2 prints because the buffer is too small to get the - // last one. - const char* message = "get_global_id(0) = 0\nget_global_id(1) = 0\n"; - ASSERT_STREQ(printf_buffer, message); + ASSERT_STREQ(buffer.c_str(), message); } -TEST_F(WithCommandQueue, TooLongPrintf2) { +TEST_F(WithCommandQueueNoSetUp, TooLongPrintf2) { + std::string buffer = ""; // each print takes 12 bytes (4 for the printf_id, and 2*4 for the 2 integer // to print) + 4 for the byte written counter + 8 which are not enough for // the third print, but should not cause any issue in clvk - auto cfg1 = - CLVK_CONFIG_SCOPED_OVERRIDE(printf_buffer_size, uint32_t, 36, true); - - temp_folder_deletion temp; - stdoutFileName = getStdoutFileName(temp); - - int fd; - ASSERT_TRUE(getStdout(fd)); + cl_context_properties properties[4] = { + CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, + CL_PRINTF_BUFFERSIZE_ARM, (cl_context_properties)36}; + WithCommandQueue::SetUpWithContextProperties( + properties, reinterpret_cast(&buffer)); + + // We only get the first 2 prints because the buffer is too small to get + // the last one. + const char* message = "get_global_id(0) = 0\nget_global_id(1) = 0\n"; const char* source = R"( kernel void test_printf() { @@ -191,14 +194,7 @@ TEST_F(WithCommandQueue, TooLongPrintf2) { EnqueueNDRangeKernel(kernel, 1, nullptr, &gws, &lws, 0, nullptr, nullptr); Finish(); - releaseStdout(fd); - auto printf_buffer = getStdoutContent(); - ASSERT_NE(printf_buffer, nullptr); - - // We only get the first 2 prints because the buffer is too small to get the - // last one. - const char* message = "get_global_id(0) = 0\nget_global_id(1) = 0\n"; - ASSERT_STREQ(printf_buffer, message); + ASSERT_STREQ(buffer.c_str(), message); } TEST_F(WithCommandQueue, PrintfMissingLengthModifier) { diff --git a/tests/api/testcl.hpp b/tests/api/testcl.hpp index 43c318da..b012d849 100644 --- a/tests/api/testcl.hpp +++ b/tests/api/testcl.hpp @@ -206,10 +206,16 @@ class WithContext : public ::testing::Test { cl_platform_id platform() const { return gPlatform; } - void SetUp() override { + void SetUp() override { SetUpWithContextProperties(nullptr, nullptr); } + + void SetUpWithContextProperties(const cl_context_properties* properties, + void* user_data) { cl_int err; - m_context = - clCreateContext(nullptr, 1, &gDevice, nullptr, nullptr, &err); + m_context = clCreateContext( + properties, 1, &gDevice, + [](const char* errinfo, const void* private_info, size_t cb, + void* user_data) {}, + user_data, &err); ASSERT_CL_SUCCESS(err); } @@ -510,12 +516,23 @@ class WithCommandQueue : public WithContext { #ifndef COMPILER_AVAILABLE GTEST_SKIP(); #endif - WithContext::SetUp(); auto queue = CreateCommandQueue(device(), properties); m_queue = queue.release(); } - void SetUp() override { SetUpQueue(0); } + void SetUpWithProperties(const cl_context_properties* context_properties, + const cl_command_queue_properties queue_properties, + void* user_data) { + WithContext::SetUpWithContextProperties(context_properties, user_data); + SetUpQueue(queue_properties); + } + + void SetUpWithContextProperties(const cl_context_properties* properties, + void* user_data) { + SetUpWithProperties(properties, 0, user_data); + } + + void SetUp() override { SetUpWithProperties(nullptr, 0, nullptr); } void TearDown() override { #ifdef COMPILER_AVAILABLE @@ -760,5 +777,12 @@ class WithCommandQueue : public WithContext { class WithProfiledCommandQueue : public WithCommandQueue { protected: - void SetUp() override { SetUpQueue(CL_QUEUE_PROFILING_ENABLE); } + void SetUp() override { + SetUpWithProperties(nullptr, CL_QUEUE_PROFILING_ENABLE, nullptr); + } +}; + +class WithCommandQueueNoSetUp : public WithCommandQueue { +protected: + void SetUp() override{}; };