Skip to content

Commit

Permalink
Add CTS for scratch register reading.
Browse files Browse the repository at this point in the history
  • Loading branch information
aviralni committed Oct 17, 2024
1 parent 32527cc commit 79e34c0
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 6 deletions.
123 changes: 123 additions & 0 deletions conformance_tests/tools/debug/src/test_debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "test_debug.hpp"
#include "test_debug_utils.hpp"
#include "test_harness/zet_intel_gpu_debug.h"

namespace lzt = level_zero_tests;

Expand Down Expand Up @@ -1376,6 +1377,128 @@ void zetDebugReadWriteRegistersTest::run_read_write_registers_test(
}
}

void zetDebugReadWriteRegistersTest::run_read_registers_test(
std::vector<ze_device_handle_t> &devices, bool use_sub_devices) {
for (auto &device : devices) {
print_device(device);
if (!is_debug_supported(device))
continue;

synchro->clear_debugger_signal();
debugHelper = launch_process(LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH,
device, use_sub_devices);

zet_debug_event_t module_event;
attach_and_get_module_event(debugHelper.id(), synchro, device, debugSession,
module_event);

if (module_event.flags & ZET_DEBUG_EVENT_FLAG_NEED_ACK) {
LOG_DEBUG << "[Debugger] Acking event: "
<< lzt::debuggerEventTypeString[module_event.type];
lzt::debug_ack_event(debugSession, &module_event);
}

uint64_t gpu_buffer_va = 0;
synchro->wait_for_application_signal();
if (!synchro->get_app_gpu_buffer_address(gpu_buffer_va)) {
FAIL() << "[Debugger] Could not get a valid GPU buffer VA";
}
synchro->clear_application_signal();

zet_debug_memory_space_desc_t memorySpaceDesc;
memorySpaceDesc.type = ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT;
int sizeToRead = 512;
uint8_t *kernel_buffer = new uint8_t[sizeToRead];
// set buffer[0] to 0 to break the loop. See debug_loop_slm.cl
kernel_buffer[0] = 0;
memorySpaceDesc.address = gpu_buffer_va;

ze_device_thread_t device_threads = {};
device_threads.slice = UINT32_MAX;
device_threads.subslice = UINT32_MAX;
device_threads.eu = UINT32_MAX;
device_threads.thread = UINT32_MAX;

LOG_INFO << "[Debugger] Stopping all device threads";
// give time to app to launch the kernel
std::this_thread::sleep_for(std::chrono::seconds(6));
lzt::debug_interrupt(debugSession, device_threads);

std::vector<ze_device_thread_t> stopped_threads;
if (!find_stopped_threads(debugSession, device, device_threads, true,
stopped_threads)) {
delete[] kernel_buffer;
FAIL() << "[Debugger] Did not find stopped threads";
}

LOG_INFO << "[Debugger] Reading/Writing Thread Scratch Register on "
"interrupted threads";

for (auto &stopped_thread : stopped_threads) {
std::vector<zet_debug_regset_properties_t> register_set_properties =
lzt::get_register_set_properties(device);
if (lzt::is_heapless_mode(stopped_thread, device, debugSession)) {
for (auto &register_set : register_set_properties) {
if ((register_set.type ==
ZET_DEBUG_REGSET_TYPE_THREAD_SCRATCH_INTEL_GPU) &&
(register_set.generalFlags & ZET_DEBUG_REGSET_FLAG_READABLE)) {
LOG_DEBUG << "[Debugger] Register set type " << register_set.type
<< " is readable";
size_t reg_size_in_bytes =
register_set.count * register_set.byteSize;

uint64_t *thread_scratch_reg_values =
new uint64_t[reg_size_in_bytes];
ASSERT_EQ(zetDebugReadRegisters(
debugSession, stopped_thread,
ZET_DEBUG_REGSET_TYPE_DEBUG_SCRATCH_INTEL_GPU, 0,
register_set.count, thread_scratch_reg_values),
ZE_RESULT_SUCCESS);
} else {
FAIL() << "[Debugger] Register set type " << register_set.type
<< " is NOT readable";
}
if (register_set.generalFlags & ZET_DEBUG_REGSET_FLAG_WRITEABLE) {
FAIL() << "[Debugger] Register set type " << register_set.type
<< " should NOT be Writable";
} else {
LOG_INFO << "[Debugger] Register set " << register_set.type
<< " type is NOT writeable";
}
}
} else {
GTEST_SKIP() << "Test is not supported on this device";
}
}

lzt::debug_write_memory(debugSession, device_threads, memorySpaceDesc, 1,
kernel_buffer);
delete[] kernel_buffer;

LOG_INFO << "[Debugger] resuming interrupted threads";
lzt::debug_resume(debugSession, device_threads);
debugHelper.wait();

std::vector<zet_debug_event_type_t> expectedEvents = {
ZET_DEBUG_EVENT_TYPE_MODULE_UNLOAD, ZET_DEBUG_EVENT_TYPE_PROCESS_EXIT};

if (!check_events(debugSession, expectedEvents)) {
FAIL() << "[Debugger] Did not receive expected events";
}

lzt::debug_detach(debugSession);
ASSERT_EQ(debugHelper.exit_code(), 0);
}
}

TEST_F(
zetDebugReadWriteRegistersTest,
GivenActiveDebugSessionWhenReadingScratchRegistersThenDataReadIsDoneSuccessfully) {
auto driver = lzt::get_default_driver();
auto devices = lzt::get_devices(driver);
run_read_registers_test(devices, false);
}

TEST_F(
zetDebugReadWriteRegistersTest,
GivenActiveDebugSessionWhenReadingAndWritingRegistersThenValidDataReadAndDataWrittenSuccessfully) {
Expand Down
2 changes: 2 additions & 0 deletions conformance_tests/tools/debug/src/test_debug.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ class zetDebugReadWriteRegistersTest : public zetDebugMemAccessTest {
void TearDown() override { zetDebugMemAccessTest::TearDown(); }
void run_read_write_registers_test(std::vector<ze_device_handle_t> &devices,
bool use_sub_devices);
void run_read_registers_test(std::vector<ze_device_handle_t> &devices,
bool use_sub_devices);
};

class zetDebugThreadControlTest : public zetDebugBaseSetup {
Expand Down
1 change: 1 addition & 0 deletions conformance_tests/tools/debug/src/test_debug_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ typedef enum {
ATTACH_AFTER_MODULE_DESTROYED,
LONG_RUNNING_KERNEL_INTERRUPTED,
LONG_RUNNING_KERNEL_INTERRUPTED_SLM,
LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH,
PAGE_FAULT,
MULTIPLE_THREADS,
MULTIPLE_CQ,
Expand Down
132 changes: 132 additions & 0 deletions conformance_tests/tools/debug/src/test_debug_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,133 @@ void run_long_kernel(ze_context_handle_t context, ze_device_handle_t device,
}
}

void run_long_kernel_scratch(ze_context_handle_t context,
ze_device_handle_t device,
process_synchro &synchro, debug_options &options) {

auto command_list = lzt::create_command_list(device);
auto command_queue = lzt::create_command_queue(device);
std::string module_name = options.module_name_in;

std::string kernel_name = "long_kernel_slm";
size_t slm_buffer_size = 512; // NOTE: Not all SKUs have same SLM so can go too big.

synchro.wait_for_debugger_signal();
const char *build_flags ="-g -igc_opts 'VISAOptions=-forcespills'";
auto module =
lzt::create_module(device, module_name, ZE_MODULE_FORMAT_IL_SPIRV,
build_flags /* include debug symbols*/, nullptr);

auto kernel = lzt::create_function(module, kernel_name);
auto size = slm_buffer_size;

ze_kernel_properties_t kernel_properties = {
ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES, nullptr};
EXPECT_EQ(ZE_RESULT_SUCCESS,
zeKernelGetProperties(kernel, &kernel_properties));
int threadCount = std::ceil(size / kernel_properties.maxSubgroupSize);

LOG_INFO << "[Application] Problem size: " << size
<< ". Kernel maxSubGroupSize: " << kernel_properties.maxSubgroupSize
<< ". GPU thread count: ceil (P size/maxSubGroupSize) = "
<< threadCount;

auto dest_buffer_d =
lzt::allocate_device_memory(size, size, 0, 0, device, context);
auto dest_buffer_s =
lzt::allocate_shared_memory(size, size, 0, 0, device, context);
auto src_buffer_d =
lzt::allocate_device_memory(size, size, 0, 0, device, context);
auto src_buffer_s =
lzt::allocate_shared_memory(size, size, 0, 0, device, context);

void *slm_output_s = nullptr;
slm_output_s = lzt::allocate_shared_memory(slm_buffer_size, slm_buffer_size,
0, 0, device, context);

unsigned long loop_max = 1000000000;

auto loop_counter_d = lzt::allocate_device_memory(
loop_counter_alloc_size, loop_counter_alloc_size, 0, 0, device, context);
auto loop_counter_s = lzt::allocate_shared_memory(
loop_counter_alloc_size, loop_counter_alloc_size, 0, 0, device, context);

LOG_DEBUG << "[Application] Allocated source device memory at: " << std::hex
<< src_buffer_d;
LOG_DEBUG << "[Application] Allocated destination device memory at: "
<< std::hex << dest_buffer_d;

std::memset(dest_buffer_s, 1, size);
std::memset(src_buffer_s, 0, size);
std::memset(loop_counter_s, 0, loop_counter_alloc_size);
for (size_t i = 0; i < size; i++) {
static_cast<uint8_t *>(src_buffer_s)[i] = (i + 1 & 0xFF);
}

lzt::set_argument_value(kernel, 0, sizeof(dest_buffer_d), &dest_buffer_d);
lzt::set_argument_value(kernel, 1, sizeof(src_buffer_d), &src_buffer_d);
lzt::set_argument_value(kernel, 2, sizeof(loop_counter_d), &loop_counter_d);
lzt::set_argument_value(kernel, 3, sizeof(loop_max), &loop_max);
lzt::set_argument_value(kernel, 4, sizeof(slm_output_s), &slm_output_s);

uint32_t group_size_x = 1;
uint32_t group_size_y = 1;
uint32_t group_size_z = 1;
lzt::suggest_group_size(kernel, size, 1, 1, group_size_x, group_size_y,
group_size_z);
lzt::set_group_size(kernel, group_size_x, 1, 1);
ze_group_count_t group_count = {};
group_count.groupCountX = size / group_size_x;
group_count.groupCountY = 1;
group_count.groupCountZ = 1;

lzt::append_memory_copy(command_list, src_buffer_d, src_buffer_s, size);
lzt::append_barrier(command_list);
lzt::append_launch_function(command_list, kernel, &group_count, nullptr, 0,
nullptr);
lzt::append_barrier(command_list);
lzt::append_memory_copy(command_list, dest_buffer_s, dest_buffer_d, size);
lzt::append_memory_copy(command_list, loop_counter_s, loop_counter_d,
loop_counter_alloc_size);
lzt::close_command_list(command_list);

LOG_DEBUG << "[Application] launching execution of " << kernel_name;

synchro.update_gpu_buffer_address(reinterpret_cast<uint64_t>(src_buffer_d));
synchro.notify_debugger();

lzt::execute_command_lists(command_queue, 1, &command_list, nullptr);
lzt::synchronize(command_queue, UINT64_MAX);

for (size_t i = 1; i < size; i++) {
EXPECT_EQ(static_cast<uint8_t *>(dest_buffer_s)[i],
static_cast<uint8_t *>(src_buffer_s)[i]);
if (static_cast<uint8_t *>(dest_buffer_s)[i] !=
static_cast<uint8_t *>(src_buffer_s)[i]) {
LOG_ERROR << "[Application] Buffer Sanity check did not pass";
break;
}
}

// cleanup
lzt::free_memory(context, dest_buffer_s);
lzt::free_memory(context, dest_buffer_d);
lzt::free_memory(context, src_buffer_s);
lzt::free_memory(context, src_buffer_d);
lzt::free_memory(context, loop_counter_s);
lzt::free_memory(context, loop_counter_d);
lzt::free_memory(context, slm_output_s);

lzt::destroy_function(kernel);
lzt::destroy_module(module);
lzt::destroy_command_list(command_list);
lzt::destroy_command_queue(command_queue);

if (::testing::Test::HasFailure()) {
exit(1);
}
}

void run_multiple_threads(ze_context_handle_t context,
ze_device_handle_t device, process_synchro &synchro,
debug_options &options) {
Expand Down Expand Up @@ -1227,6 +1354,11 @@ int main(int argc, char **argv) {
options.kernel_name_in = "long_kernel_slm";
run_long_kernel(context, device, synchro, options);
break;
case LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH:
options.use_custom_module = true;
options.module_name_in = "debug_loop_slm.spv";
run_long_kernel_scratch(context, device, synchro, options);
break;
case MULTIPLE_THREADS:
run_multiple_threads(context, device, synchro, options);
break;
Expand Down
8 changes: 6 additions & 2 deletions utils/test_harness/tools/include/test_harness_debug.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ void debug_resume(const zet_debug_session_handle_t &debug_session,
const ze_device_thread_t &device_thread);

void clear_exceptions(const ze_device_handle_t &device,
const zet_debug_session_handle_t &debug_session,
const ze_device_thread_t &device_thread);
const zet_debug_session_handle_t &debug_session,
const ze_device_thread_t &device_thread);

void debug_read_memory(const zet_debug_session_handle_t &debug_session,
const ze_device_thread_t &device_thread,
Expand Down Expand Up @@ -79,6 +79,10 @@ void debug_write_registers(const zet_debug_session_handle_t &debug_session,

std::vector<uint8_t> get_debug_info(const zet_module_handle_t &module);

bool is_heapless_mode(ze_device_thread_t stopped_thread,
ze_device_handle_t &device_handle,
zet_debug_session_handle_t debug_session);

}; // namespace level_zero_tests

#endif /* TEST_HARNESS_DEBUG_HPP */
34 changes: 30 additions & 4 deletions utils/test_harness/tools/src/test_harness_debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ bool get_register_set_props(ze_device_handle_t device,
}

void clear_exceptions(const ze_device_handle_t &device,
const zet_debug_session_handle_t &debug_session,
const ze_device_thread_t &device_thread) {
const zet_debug_session_handle_t &debug_session,
const ze_device_thread_t &device_thread) {
size_t reg_size_in_bytes = 0;

zet_debug_regset_properties_t cr_reg_prop;
Expand All @@ -198,8 +198,7 @@ void clear_exceptions(const ze_device_handle_t &device,
cr_reg_prop.count, cr_values),
ZE_RESULT_SUCCESS);

uint32_values[1] &=
~((1 << 26) | (1 << 30));
uint32_values[1] &= ~((1 << 26) | (1 << 30));
ASSERT_EQ(zetDebugWriteRegisters(debug_session, device_thread,
ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, 0,
cr_reg_prop.count, cr_values),
Expand Down Expand Up @@ -305,4 +304,31 @@ std::vector<uint8_t> get_debug_info(const zet_module_handle_t &module_handle) {
return debug_info;
}

bool is_heapless_mode(ze_device_thread_t stopped_thread,
ze_device_handle_t &device_handle,
zet_debug_session_handle_t debug_session) {

uint8_t *mode_values = nullptr;
bool result = false;
std::vector<zet_debug_regset_properties_t> regset_properties =
lzt::get_register_set_properties(device_handle);
for (auto &register_set : regset_properties) {
if (register_set.type == ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU) {
auto reg_size_in_bytes = register_set.count * register_set.byteSize;
mode_values = new uint8_t[reg_size_in_bytes];
EXPECT_EQ(
zetDebugReadRegisters(debug_session, stopped_thread,
ZET_DEBUG_REGSET_TYPE_MODE_FLAGS_INTEL_GPU, 0,
register_set.count, mode_values),
ZE_RESULT_SUCCESS);

uint32_t *uint32_t_values = (uint32_t *)mode_values;
LOG_DEBUG << "[Debugger] mode value: %u " << uint32_t_values[0];
result = (uint32_t_values[0] & ZET_DEBUG_MODE_FLAG_HEAPLESS);
}
}

return result;
}

} // namespace level_zero_tests

0 comments on commit 79e34c0

Please sign in to comment.