Skip to content

Commit

Permalink
Merge pull request #1135 from Xilinx/bugfix/mvu_4x4ext
Browse files Browse the repository at this point in the history
Align datatype extension capabilities between all DSP-based MVU implementations.
  • Loading branch information
auphelia authored Jul 17, 2024
2 parents 438a482 + 7503470 commit 376f704
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 16 deletions.
10 changes: 6 additions & 4 deletions finn-rtllib/mvu/mvu_4sx4u.sv
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
module mvu_4sx4u #(
int unsigned PE,
int unsigned SIMD,
int unsigned WEIGHT_WIDTH,
int unsigned ACTIVATION_WIDTH,
int unsigned ACCU_WIDTH,

int unsigned VERSION = 1, // Version 1 (DSP48E1) *must* commit to NARROW_WEIGHTS
Expand All @@ -49,8 +51,8 @@ module mvu_4sx4u #(
// Input
input logic last,
input logic zero, // ignore current inputs and force this partial product to zero
input logic signed [PE-1:0][SIMD-1:0][3:0] w, // signed weights
input logic [SIMD-1:0][3:0] a, // unsigned activations (override by SIGNED_ACTIVATIONS)
input logic signed [PE-1:0][SIMD-1:0][WEIGHT_WIDTH -1:0] w, // signed weights
input logic [SIMD-1:0][ACTIVATION_WIDTH-1:0] a, // unsigned activations (override by SIGNED_ACTIVATIONS)

// Ouput
output logic vld,
Expand Down Expand Up @@ -141,14 +143,14 @@ module mvu_4sx4u #(
for(genvar s = 0; s < SIMD; s++) begin : genSIMD

// Input Lane Assembly
uwire [17:0] bb = { {(14){SIGNED_ACTIVATIONS && a[s][3]}}, a[s] };
uwire [17:0] bb = { {(18-ACTIVATION_WIDTH){SIGNED_ACTIVATIONS && a[s][ACTIVATION_WIDTH-1]}}, a[s] };
logic [29:0] aa;
logic [26:0] dd;
logic [ 1:0] xx[3:1];
if(1) begin : blkVectorize
uwire signed [3:0] ww[PE_END - PE_BEG];
for(genvar pe = 0; pe < PE_END - PE_BEG; pe++) begin
assign ww[pe] = w[PE_BEG + pe][s];
assign ww[pe] = $signed(w[PE_BEG + pe][s]);
if(pe > 0) begin
if(BEHAVIORAL) assign xx[pe + PE_REM] = zero? 0 : ww[pe] * a[s];
`ifndef VERILATOR
Expand Down
4 changes: 2 additions & 2 deletions finn-rtllib/mvu/mvu_8sx8u_dsp48.sv
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
module mvu_8sx8u_dsp48 #(
int unsigned PE,
int unsigned SIMD,
int unsigned ACCU_WIDTH,
int unsigned ACTIVATION_WIDTH,
int unsigned WEIGHT_WIDTH,
int unsigned ACTIVATION_WIDTH,
int unsigned ACCU_WIDTH,

int unsigned VERSION = 1,
bit SIGNED_ACTIVATIONS = 0,
Expand Down
7 changes: 4 additions & 3 deletions finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ module mvu_vvu_8sx9_dsp58 #(
bit IS_MVU,
int unsigned PE,
int unsigned SIMD,
int unsigned ACTIVATION_WIDTH,
int unsigned WEIGHT_WIDTH,
int unsigned ACCU_WIDTH,
int unsigned WEIGHT_WIDTH,
int unsigned ACTIVATION_WIDTH,
int unsigned ACCU_WIDTH,

bit SIGNED_ACTIVATIONS = 0,
int unsigned SEGMENTLEN = 0, // Default to 0 (which implies a single segment)
bit FORCE_BEHAVIORAL = 0,
Expand Down
23 changes: 16 additions & 7 deletions finn-rtllib/mvu/mvu_vvu_axi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,22 @@ module mvu_vvu_axi #(

case(COMPUTE_CORE)
"mvu_vvu_8sx9_dsp58":
mvu_vvu_8sx9_dsp58 #(.IS_MVU(IS_MVU), .PE(PE), .SIMD(DSP_SIMD), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH),
.ACCU_WIDTH(ACCU_WIDTH), .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN),
.FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core (
mvu_vvu_8sx9_dsp58 #(
.IS_MVU(IS_MVU),
.PE(PE), .SIMD(DSP_SIMD),
.WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH),
.SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN),
.FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)
) core (
.clk(dsp_clk), .rst, .en(dsp_en),
.last(dsp_last), .zero(dsp_zero), .w(dsp_w), .a(dsp_a),
.vld(dsp_vld), .p(dsp_p)
);
"mvu_4sx4u_dsp48e1":
mvu_4sx4u #(
.PE(PE), .SIMD(DSP_SIMD),
.ACCU_WIDTH(ACCU_WIDTH), .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS),
.WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH),
.SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS),
.VERSION(1), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)
) core (
.clk(dsp_clk), .rst, .en(dsp_en),
Expand All @@ -320,16 +325,20 @@ module mvu_vvu_axi #(
"mvu_4sx4u_dsp48e2":
mvu_4sx4u #(
.PE(PE), .SIMD(DSP_SIMD),
.ACCU_WIDTH(ACCU_WIDTH), .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS),
.WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH),
.SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS),
.VERSION(2), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)
) core (
.clk(dsp_clk), .rst, .en(dsp_en),
.last(dsp_last), .zero(dsp_zero), .w(dsp_w), .a(dsp_a),
.vld(dsp_vld), .p(dsp_p)
);
"mvu_8sx8u_dsp48":
mvu_8sx8u_dsp48 #(.PE(PE), .SIMD(DSP_SIMD), .ACCU_WIDTH(ACCU_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH),
.SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core (
mvu_8sx8u_dsp48 #(
.PE(PE), .SIMD(DSP_SIMD),
.WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH),
.SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)
) core (
.clk(dsp_clk), .rst, .en(dsp_en),
.last(dsp_last), .zero(dsp_zero), .w(dsp_w), .a(dsp_a),
.vld(dsp_vld), .p(dsp_p)
Expand Down
165 changes: 165 additions & 0 deletions finn-rtllib/mvu/tb/mvu_3sx3u_tb.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
module mvu_3sx3u_tb;

localparam int unsigned ROUNDS = 157;

localparam int unsigned MH = 32;
localparam int unsigned MW = 60;
localparam int unsigned PE = 1;
localparam int unsigned SIMD = 1;

localparam int unsigned ACTIVATION_WIDTH = 3;
localparam int unsigned WEIGHT_WIDTH = 3;
localparam int unsigned ACCU_WIDTH = 16;


//-----------------------------------------------------------------------
// Global Control
logic clk = 1;
always #5ns clk = !clk;

logic rst = 1;
initial begin
repeat(16) @(posedge clk);
rst <= 0;
end

//-----------------------------------------------------------------------
// DUT
logic [PE-1:0][SIMD-1:0][WEIGHT_WIDTH-1:0] s_axis_weights_tdata;
logic s_axis_weights_tvalid;
uwire s_axis_weights_tready;

logic [SIMD-1:0][ACTIVATION_WIDTH-1:0] s_axis_input_tdata;
logic s_axis_input_tvalid;
uwire s_axis_input_tready;

uwire [PE-1:0][ACCU_WIDTH-1:0] m_axis_output_tdata;
uwire m_axis_output_tvalid;
logic m_axis_output_tready;

mvu_vvu_axi #(
.IS_MVU(1),
.COMPUTE_CORE("mvu_4sx4u_dsp48e2"),
.MH(MH), .MW(MW),
.PE(PE), .SIMD(SIMD),

.ACTIVATION_WIDTH(ACTIVATION_WIDTH),
.WEIGHT_WIDTH(WEIGHT_WIDTH),
.ACCU_WIDTH(ACCU_WIDTH)
//int unsigned SEGMENTLEN = 0,
//bit FORCE_BEHAVIORAL = 0,
) dut (
.ap_clk(clk), .ap_clk2x('x), .ap_rst_n(!rst),
.s_axis_weights_tdata, .s_axis_weights_tvalid, .s_axis_weights_tready,
.s_axis_input_tdata, .s_axis_input_tvalid, .s_axis_input_tready,
.m_axis_output_tdata, .m_axis_output_tvalid, .m_axis_output_tready
);

//-----------------------------------------------------------------------
// Stimuli

//- Infinite Weight Feed ------------
typedef logic signed [WEIGHT_WIDTH-1:0] weights_t[MH][MW];
function weights_t calc_WEIGHTS();
automatic weights_t ret;
std::randomize(ret);
return ret;
endfunction : calc_WEIGHTS
weights_t WEIGHTS = calc_WEIGHTS();

initial begin
s_axis_weights_tdata = 'x;
s_axis_weights_tvalid = 0;
@(posedge clk iff !rst);

forever begin
for(int unsigned h = 0; h < MH; h+=PE) begin
for(int unsigned w = 0; w < MW; w+=SIMD) begin
for(int unsigned pe = 0; pe < PE; pe++) begin
for(int unsigned simd = 0; simd < SIMD; simd++) begin
s_axis_weights_tdata[pe][simd] <= WEIGHTS[h+pe][w+simd];
end
end
s_axis_weights_tvalid <= 1;
@(posedge clk iff s_axis_weights_tready);
s_axis_weights_tvalid <= 0;
s_axis_weights_tdata <= 'x;
end
end
end
end

//- Input Feed and Reference Computation
typedef logic [PE-1:0][ACCU_WIDTH-1:0] outvec_t;
outvec_t Q_ref[$] = {};

initial begin
s_axis_input_tdata = 'x;
s_axis_input_tvalid = 0;
@(posedge clk iff !rst);

repeat(ROUNDS) begin : blkRounds
automatic logic [MH-1:0][ACCU_WIDTH-1:0] accus = '{ default: 0 };

for(int unsigned w = 0; w < MW; w+=SIMD) begin : blkSF
for(int unsigned simd = 0; simd < SIMD; simd++) begin : blkSIMD
automatic logic [ACTIVATION_WIDTH-1:0] act = $urandom();
for(int unsigned h = 0; h < MH; h++) begin : blkMH
automatic logic signed [ACCU_WIDTH-1:0] prod = WEIGHTS[h][w+simd] * $signed({1'b0, act});
accus[h] += prod;
end : blkMH
s_axis_input_tdata[simd] <= act;
end : blkSIMD
s_axis_input_tvalid <= 1;
@(posedge clk iff s_axis_input_tready);
s_axis_input_tvalid <= 0;
s_axis_input_tdata <= 'x;
end : blkSF

for(int unsigned h = 0; h < MH; h+=PE) begin
Q_ref.push_back(accus[h+:PE]);
end

end : blkRounds
end

//- Output Checker
initial begin
automatic int timeout = 0;

m_axis_output_tready = 0;
@(posedge clk iff !rst);

m_axis_output_tready <= 1;
while(timeout < MW/SIMD+16) begin
@(posedge clk);
if(!m_axis_output_tvalid) timeout++;
else begin
automatic outvec_t exp;

assert(Q_ref.size()) else begin
$error("Spurious output.");
$stop;
end

exp = Q_ref.pop_front();
assert(m_axis_output_tdata === exp) else begin
$error("Mismatched output %p instead of %p.", m_axis_output_tdata, exp);
$stop;
end

timeout = 0;
end
end
m_axis_output_tready <= 0;

assert(Q_ref.size() == 0) else begin
$error("Missing output.");
$stop;
end

$display("Test completed.");
$finish;
end

endmodule : mvu_3sx3u_tb

0 comments on commit 376f704

Please sign in to comment.