Skip to content

Commit

Permalink
Fixed default so trace buffers not registered if trace_size = 0
Browse files Browse the repository at this point in the history
  • Loading branch information
jackl-xilinx committed Mar 8, 2025
1 parent 743b010 commit 61e86f5
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
12 changes: 8 additions & 4 deletions python/utils/xrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,15 @@ def write_out_trace(trace, file_name):
f.write(out_str)


def execute(app, input_one=None, input_two=None, trace_after_output=False):
def execute(app, input_one=None, input_two=None, enable_trace=False, trace_after_output=False):
if not (input_one is None):
app.buffers[3].write(input_one)
if not (input_two is None):
app.buffers[4].write(input_two)

app.run()

if trace_after_output:
if trace_after_output or not enable_trace:
if not (input_two is None):
return app.buffers[5].read(), 0
else:
Expand All @@ -237,6 +237,9 @@ def xrt_test_run(
trace_after_output=False,
):
enable_trace = opts.trace_size > 0
if opts.verbosity >= 1:
print("trace size = ",str(opts.trace_size))
print("enable_trace = ",str(enable_trace))

app = setup_aie(
opts.xclbin,
Expand All @@ -254,10 +257,11 @@ def xrt_test_run(
)

out_size = out_volume * out_data.itemsize
# print("out_size: " + str(out_size))
if opts.verbosity >= 1:
print("out_size: " + str(out_size))

start = time.time_ns()
full_output, trace_buffer = execute(app, in1_data, in2_data, trace_after_output)
full_output, trace_buffer = execute(app, in1_data, in2_data, enable_trace, trace_after_output)
stop = time.time_ns()
npu_time = stop - start
print("npu_time: ", npu_time)
Expand Down
29 changes: 20 additions & 9 deletions runtime_lib/test_lib/xrt_test_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,12 @@ int xrt_test_run(int IN1_VOLUME, int IN2_VOLUME, int OUT_VOLUME,
kernel.group_id(4));
auto bo_out = xrt::bo(device, OUT_VOLUME * sizeof(T3), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(5));
auto bo_trace = xrt::bo(device, myargs.trace_size, XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(7));

// Workaround so we declare a really small trace buffer when one is not used
int tmp_trace_size = (myargs.trace_size > 0) ? myargs.trace_size : 1;
auto bo_trace = xrt::bo(device, tmp_trace_size, XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(7));

if (myargs.verbosity >= 1)
std::cout << "Writing data into buffer objects.\n";

Expand All @@ -98,14 +101,16 @@ int xrt_test_run(int IN1_VOLUME, int IN2_VOLUME, int OUT_VOLUME,
init_bufIn2(bufIn2, IN2_VOLUME);
init_bufOut(bufOut, OUT_VOLUME); // <<< what size do I pass it?

memset(bufTrace, 0, myargs.trace_size);
if (myargs.trace_size > 0)
memset(bufTrace, 0, myargs.trace_size);

// sync host to device memories
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_in2.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);
if (myargs.trace_size > 0)
bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);

// ------------------------------------------------------
// Initialize run configs
Expand Down Expand Up @@ -135,7 +140,8 @@ int xrt_test_run(int IN1_VOLUME, int IN2_VOLUME, int OUT_VOLUME,
run.wait();
auto stop = std::chrono::high_resolution_clock::now();
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
if (myargs.trace_size > 0)
bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

if (iter < myargs.n_warmup_iterations)
/* Warmup iterations do not count towards average runtime. */
Expand Down Expand Up @@ -246,7 +252,9 @@ int xrt_test_run(int IN1_VOLUME, int OUT_VOLUME, struct args myargs) {
kernel.group_id(3));
auto bo_out = xrt::bo(device, OUT_VOLUME * sizeof(T3), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(4));
auto bo_trace = xrt::bo(device, myargs.trace_size, XRT_BO_FLAGS_HOST_ONLY,
// Workaround so we declare a really small trace buffer when one is not used
int tmp_trace_size = (myargs.trace_size > 0) ? myargs.trace_size : 1;
auto bo_trace = xrt::bo(device, tmp_trace_size, XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(7));

if (myargs.verbosity >= 1)
Expand All @@ -264,13 +272,15 @@ int xrt_test_run(int IN1_VOLUME, int OUT_VOLUME, struct args myargs) {
init_bufIn1(bufIn1, IN1_VOLUME);
init_bufOut(bufOut,
OUT_VOLUME); // <<< what size do I pass it? reset with trace?
memset(bufTrace, 0, myargs.trace_size);
if (myargs.trace_size > 0)
memset(bufTrace, 0, myargs.trace_size);

// sync host to device memories
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);
if (myargs.trace_size > 0)
bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);

// ------------------------------------------------------
// Initialize run configs
Expand Down Expand Up @@ -301,7 +311,8 @@ int xrt_test_run(int IN1_VOLUME, int OUT_VOLUME, struct args myargs) {
run.wait();
auto stop = std::chrono::high_resolution_clock::now();
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
if (myargs.trace_size > 0)
bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

if (iter < myargs.n_warmup_iterations)
/* Warmup iterations do not count towards average runtime. */
Expand Down

0 comments on commit 61e86f5

Please sign in to comment.