From c69e5ad3fb607309078888a1bc2ab5a3366a1033 Mon Sep 17 00:00:00 2001 From: rej Date: Tue, 19 Mar 2024 14:50:54 +0100 Subject: [PATCH] Get rid of indexed memory access to accumulators/accumulators_next. Slices=2 --- src/1_58bit_mul.v | 25 ++++++++++++++++++------- test/Makefile | 2 +- test/test.py | 4 ++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/1_58bit_mul.v b/src/1_58bit_mul.v index 979196d..b048cdf 100644 --- a/src/1_58bit_mul.v +++ b/src/1_58bit_mul.v @@ -4,6 +4,8 @@ */ // TODOs: +// + remove indexed memory access in out_queue and accumulators/accumulators_next +// * get rid of (slice_count - 1) wait once indexed memory access is removed // * special weights 243..255 can be used to a) initiate readout, // b) shift accumulators by 1, // c) set beta&gamma, @@ -162,18 +164,27 @@ module systolic_array #( wire [16:0] value_curr = accumulators [i*W+j]; wire [16:0] value_next = accumulators_next[i*W+j]; wire [16:0] value_queue = out_queue [i*W+j]; - wire skip = (j != slice_counter) | arg_left_zero_curr[i]; + // wire skip = (j != slice_counter) | arg_left_zero_curr[i]; + wire skip = arg_left_zero_curr[i]; wire sign = arg_left_sign_curr[i]; - wire signed [7:0] addend = $signed(arg_top_curr[j*8 +: 8]); - assign accumulators_next[i*W+j] = - reset ? 0 : - skip ? accumulators[i*W+j] + 0 : - (sign ? accumulators[i*W+j] - addend : - accumulators[i*W+j] + addend); + // wire signed [7:0] addend = $signed(arg_top_curr[j*8 +: 8]); + wire signed [7:0] addend = $signed(arg_top_curr[slice_counter*8 +: 8]); + if (j == 0) begin + assign accumulators_next[i*W+W-1] = + reset ? 0 : + skip ? accumulators[i*W+j] + 0 : + (sign ? accumulators[i*W+j] - addend : + accumulators[i*W+j] + addend); + end else begin + assign accumulators_next[i*W+j-1] = + reset ? 0 : + accumulators[i*W+j]; + end end endgenerate assign out = out_queue[0] >> 8; + // assign out = out_queue[0][7:0]; // assign out = out_queue[out_queue_counter] >> 8; // assign out = out_queue[out_queue_counter][7:0]; endmodule diff --git a/test/Makefile b/test/Makefile index fc07bfc..572affa 100644 --- a/test/Makefile +++ b/test/Makefile @@ -2,7 +2,7 @@ # See https://docs.cocotb.org/en/stable/quickstart.html for more info # global parameters -COMPUTE_SLICES ?= 1 +COMPUTE_SLICES ?= 2 COMPILE_ARGS += -DCOMPUTE_SLICES=$(COMPUTE_SLICES) # defaults diff --git a/test/test.py b/test/test.py index 58201f5..bdb9daa 100644 --- a/test/test.py +++ b/test/test.py @@ -120,7 +120,7 @@ async def gemm(dut, weights, inputs, weights_per_byte = 4, compute_block_width = # Wait until all slices have finished accunulating dut.ui_in.value = 0 dut.uio_in.value = 0 - await ClockCycles(dut.clk, compute_slices) + await ClockCycles(dut.clk, compute_slices + (compute_slices-1)) # Move accumulators to output queue dut.ena.value = 0 @@ -225,7 +225,7 @@ async def test_gemm_small(dut): inputs = random_matrix(-127, 127, (K, M)) expected = matrix_mul(weights, inputs) - await reset_run_and_validate_gemm(dut, weights, inputs, expected) + await reset_run_and_validate_gemm(dut, weights, inputs, expected, verbose=True) @cocotb.test() async def test_gemm_large(dut):