From 77c4949a1f1318c048516ca78ce25d3d1ec2e628 Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 8 Aug 2023 14:15:55 +0200 Subject: [PATCH] Add `mem_to_banks_detailed` (#194) * Add `mem_to_banks_detailed` Adds sideband data in the response path to be managed externally. `mem_to_banks` now wraps the `mem_to_banks_detailed`. * `mem_to_banks_detailed`: Rename sideband signals `atop` renamed to `wuser` `ersp` renamed to `ruser` --- Bender.yml | 4 +- README.md | 1 + common_cells.core | 4 +- src/mem_to_banks.sv | 169 ++++++-------------------- src/mem_to_banks_detailed.sv | 225 +++++++++++++++++++++++++++++++++++ src_files.yml | 4 +- 6 files changed, 274 insertions(+), 133 deletions(-) create mode 100644 src/mem_to_banks_detailed.sv diff --git a/Bender.yml b/Bender.yml index c1ed5f88..2158f642 100644 --- a/Bender.yml +++ b/Bender.yml @@ -98,9 +98,11 @@ sources: # Level 3 - src/cdc_fifo_gray_clearable.sv - src/cdc_2phase_clearable.sv - - src/mem_to_banks.sv + - src/mem_to_banks_detailed.sv - src/stream_arbiter.sv - src/stream_omega_net.sv + # Level 4 + - src/mem_to_banks.sv - target: simulation files: diff --git a/README.md b/README.md index 2737d3b2..388a0033 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ Please note that cells with status *deprecated* are not to be used for new desig | `stream_throttle` | Restrict the number of outstanding transfers in a stream. | active | | | `sub_per_hash` | Substitution-permutation hash function | active | | | `popcount` | Combinatorial popcount (hamming weight) | active | | +| `mem_to_banks_detailed` | Split memory access over multiple parallel banks with detailed response signals | active | | | `mem_to_banks` | Split memory access over multiple parallel banks | active | | ### Data Structures diff --git a/common_cells.core b/common_cells.core index 179d2820..abb573b9 100644 --- a/common_cells.core +++ b/common_cells.core @@ -81,9 +81,11 @@ filesets: # Level 3 - src/cdc_fifo_gray_clearable.sv - src/cdc_2phase_clearable.sv - - src/mem_to_banks.sv + - src/mem_to_banks_detailed.sv - src/stream_arbiter.sv - src/stream_omega_net.sv + # Level 4 + - src/mem_to_banks.sv file_type : systemVerilogSource deprecated: diff --git a/src/mem_to_banks.sv b/src/mem_to_banks.sv index ca215bdb..7008f150 100644 --- a/src/mem_to_banks.sv +++ b/src/mem_to_banks.sv @@ -82,135 +82,44 @@ module mem_to_banks #( input oup_data_t [NumBanks-1:0] bank_rdata_i ); - localparam int unsigned DataBytes = $bits(inp_strb_t); - localparam int unsigned BitsPerBank = $bits(oup_data_t); - localparam int unsigned BytesPerBank = $bits(oup_strb_t); + mem_to_banks_detailed #( + .AddrWidth ( AddrWidth ), + .DataWidth ( DataWidth ), + .WUserWidth ( AtopWidth ), + .RUserWidth ( 1 ), + .NumBanks ( NumBanks ), + .HideStrb ( HideStrb ), + .MaxTrans ( MaxTrans ), + .FifoDepth ( FifoDepth ), + .wuser_t ( atop_t ), + .addr_t ( addr_t ), + .inp_data_t ( inp_data_t ), + .inp_strb_t ( inp_strb_t ), + .oup_data_t ( oup_data_t ), + .oup_strb_t ( oup_strb_t ) + ) i_mem_to_banks_detailed ( + .clk_i, + .rst_ni, + .req_i, + .gnt_o, + .addr_i, + .wdata_i, + .strb_i, + .wuser_i ( atop_i ), + .we_i, + .rvalid_o, + .rdata_o, + .ruser_o (), + .bank_req_o, + .bank_gnt_i, + .bank_addr_o, + .bank_wdata_o, + .bank_strb_o, + .bank_wuser_o ( bank_atop_o ), + .bank_we_o, + .bank_rvalid_i, + .bank_rdata_i, + .bank_ruser_i ('0) + ); - typedef struct packed { - addr_t addr; - oup_data_t wdata; - oup_strb_t strb; - atop_t atop; - logic we; - } req_t; - - logic req_valid; - logic [NumBanks-1:0] req_ready, - resp_valid, resp_ready; - req_t [NumBanks-1:0] bank_req, - bank_oup; - logic [NumBanks-1:0] bank_req_internal, bank_gnt_internal, zero_strobe, dead_response; - logic dead_write_fifo_full; - - function automatic addr_t align_addr(input addr_t addr); - return (addr >> $clog2(DataBytes)) << $clog2(DataBytes); - endfunction - - // Handle requests. - assign req_valid = req_i & gnt_o; - for (genvar i = 0; unsigned'(i) < NumBanks; i++) begin : gen_reqs - assign bank_req[i].addr = align_addr(addr_i) + i * BytesPerBank; - assign bank_req[i].wdata = wdata_i[i*BitsPerBank+:BitsPerBank]; - assign bank_req[i].strb = strb_i[i*BytesPerBank+:BytesPerBank]; - assign bank_req[i].atop = atop_i; - assign bank_req[i].we = we_i; - stream_fifo #( - .FALL_THROUGH ( 1'b1 ), - .DATA_WIDTH ( $bits(req_t) ), - .DEPTH ( FifoDepth ), - .T ( req_t ) - ) i_ft_reg ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .usage_o (), - .data_i ( bank_req[i] ), - .valid_i ( req_valid ), - .ready_o ( req_ready[i] ), - .data_o ( bank_oup[i] ), - .valid_o ( bank_req_internal[i] ), - .ready_i ( bank_gnt_internal[i] ) - ); - assign bank_addr_o[i] = bank_oup[i].addr; - assign bank_wdata_o[i] = bank_oup[i].wdata; - assign bank_strb_o[i] = bank_oup[i].strb; - assign bank_atop_o[i] = bank_oup[i].atop; - assign bank_we_o[i] = bank_oup[i].we; - - assign zero_strobe[i] = (bank_oup[i].strb == '0); - - if (HideStrb) begin : gen_hide_strb - assign bank_req_o[i] = (bank_oup[i].we && zero_strobe[i]) ? 1'b0 : bank_req_internal[i]; - assign bank_gnt_internal[i] = (bank_oup[i].we && zero_strobe[i]) ? 1'b1 : bank_gnt_i[i]; - end else begin : gen_legacy_strb - assign bank_req_o[i] = bank_req_internal[i]; - assign bank_gnt_internal[i] = bank_gnt_i[i]; - end - end - - // Grant output if all our requests have been granted. - assign gnt_o = (&req_ready) & (&resp_ready) & !dead_write_fifo_full; - - if (HideStrb) begin : gen_dead_write_fifo - fifo_v3 #( - .FALL_THROUGH ( 1'b0 ), - .DEPTH ( MaxTrans+1 ), - .DATA_WIDTH ( NumBanks ) - ) i_dead_write_fifo ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( dead_write_fifo_full ), - .empty_o (), - .usage_o (), - .data_i ( bank_we_o & zero_strobe ), - .push_i ( req_i & gnt_o ), - .data_o ( dead_response ), - .pop_i ( rvalid_o ) - ); - end else begin : gen_no_dead_write_fifo - assign dead_response = '0; - assign dead_write_fifo_full = 1'b0; - end - - // Handle responses. - for (genvar i = 0; unsigned'(i) < NumBanks; i++) begin : gen_resp_regs - stream_fifo #( - .FALL_THROUGH ( 1'b1 ), - .DATA_WIDTH ( $bits(oup_data_t) ), - .DEPTH ( FifoDepth ), - .T ( oup_data_t ) - ) i_ft_reg ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .usage_o (), - .data_i ( bank_rdata_i[i] ), - .valid_i ( bank_rvalid_i[i] ), - .ready_o ( resp_ready[i] ), - .data_o ( rdata_o[i*BitsPerBank+:BitsPerBank] ), - .valid_o ( resp_valid[i] ), - .ready_i ( rvalid_o & !dead_response[i] ) - ); - end - assign rvalid_o = &(resp_valid | dead_response); - - // Assertions - // pragma translate_off - `ifndef VERILATOR - `ifndef SYNTHESIS - initial begin - assume (DataWidth != 0 && (DataWidth & (DataWidth - 1)) == 0) - else $fatal(1, "Data width must be a power of two!"); - assume (DataWidth % NumBanks == 0) - else $fatal(1, "Data width must be evenly divisible over banks!"); - assume ((DataWidth / NumBanks) % 8 == 0) - else $fatal(1, "Data width of each bank must be divisible into 8-bit bytes!"); - end - `endif - `endif - // pragma translate_on endmodule diff --git a/src/mem_to_banks_detailed.sv b/src/mem_to_banks_detailed.sv new file mode 100644 index 00000000..2f0bc23f --- /dev/null +++ b/src/mem_to_banks_detailed.sv @@ -0,0 +1,225 @@ +// Copyright (c) 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Wolfgang Roenninger + +/// Split memory access over multiple parallel banks, where each bank has its own req/gnt +/// request and valid response direction. +module mem_to_banks_detailed #( + /// Input address width. + parameter int unsigned AddrWidth = 32'd0, + /// Input data width, must be a power of two. + parameter int unsigned DataWidth = 32'd0, + /// Request sideband width. + parameter int unsigned WUserWidth = 32'd0, + /// Response sideband width. + parameter int unsigned RUserWidth = 32'd0, + /// Number of banks at output, must evenly divide `DataWidth`. + parameter int unsigned NumBanks = 32'd0, + /// Remove transactions that have zero strobe + parameter bit HideStrb = 1'b0, + /// Number of outstanding transactions + parameter int unsigned MaxTrans = 32'd1, + /// FIFO depth, must be >=1 + parameter int unsigned FifoDepth = 32'd1, + /// Request sideband type. + parameter type wuser_t = logic [WUserWidth-1:0], + /// Dependent parameter, do not override! Address type. + localparam type addr_t = logic [AddrWidth-1:0], + /// Dependent parameter, do not override! Input data type. + localparam type inp_data_t = logic [DataWidth-1:0], + /// Dependent parameter, do not override! Input write strobe type. + localparam type inp_strb_t = logic [DataWidth/8-1:0], + /// Dependent parameter, do not override! Input response sideband type. + localparam type inp_ruser_t = logic [NumBanks-1:0][RUserWidth-1:0], + /// Dependent parameter, do not override! Output data type. + localparam type oup_data_t = logic [DataWidth/NumBanks-1:0], + /// Dependent parameter, do not override! Output write strobe type. + localparam type oup_strb_t = logic [DataWidth/NumBanks/8-1:0], + /// Dependent parameter, do not override! Output response sideband type. + localparam type oup_ruser_t = logic [RUserWidth-1:0] +) ( + /// Clock input. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Memory request to split, request is valid. + input logic req_i, + /// Memory request to split, request can be granted. + output logic gnt_o, + /// Memory request to split, request address, byte-wise. + input addr_t addr_i, + /// Memory request to split, request write data. + input inp_data_t wdata_i, + /// Memory request to split, request write strobe. + input inp_strb_t strb_i, + /// Memory request to split, request sideband. + input wuser_t wuser_i, + /// Memory request to split, request write enable, active high. + input logic we_i, + /// Memory request to split, response is valid. Required for read and write requests + output logic rvalid_o, + /// Memory request to split, response read data. + output inp_data_t rdata_o, + /// Memory request to split, response sideband. + output inp_ruser_t ruser_o, + /// Memory bank request, request is valid. + output logic [NumBanks-1:0] bank_req_o, + /// Memory bank request, request can be granted. + input logic [NumBanks-1:0] bank_gnt_i, + /// Memory bank request, request address, byte-wise. Will be different for each bank. + output addr_t [NumBanks-1:0] bank_addr_o, + /// Memory bank request, request write data. + output oup_data_t [NumBanks-1:0] bank_wdata_o, + /// Memory bank request, request write strobe. + output oup_strb_t [NumBanks-1:0] bank_strb_o, + /// Memory bank request, request sideband. + output wuser_t [NumBanks-1:0] bank_wuser_o, + /// Memory bank request, request write enable, active high. + output logic [NumBanks-1:0] bank_we_o, + /// Memory bank request, response is valid. Required for read and write requests + input logic [NumBanks-1:0] bank_rvalid_i, + /// Memory bank request, response read data. + input oup_data_t [NumBanks-1:0] bank_rdata_i, + /// Memory bank request, response sideband. + input oup_ruser_t [NumBanks-1:0] bank_ruser_i +); + + localparam int unsigned DataBytes = $bits(inp_strb_t); + localparam int unsigned BitsPerBank = $bits(oup_data_t); + localparam int unsigned BytesPerBank = $bits(oup_strb_t); + + typedef struct packed { + addr_t addr; + oup_data_t wdata; + oup_strb_t strb; + wuser_t wuser; + logic we; + } req_t; + + logic req_valid; + logic [NumBanks-1:0] req_ready, + resp_valid, resp_ready; + req_t [NumBanks-1:0] bank_req, + bank_oup; + logic [NumBanks-1:0] bank_req_internal, bank_gnt_internal, zero_strobe, dead_response; + logic dead_write_fifo_full; + + function automatic addr_t align_addr(input addr_t addr); + return (addr >> $clog2(DataBytes)) << $clog2(DataBytes); + endfunction + + // Handle requests. + assign req_valid = req_i & gnt_o; + for (genvar i = 0; unsigned'(i) < NumBanks; i++) begin : gen_reqs + assign bank_req[i].addr = align_addr(addr_i) + i * BytesPerBank; + assign bank_req[i].wdata = wdata_i[i*BitsPerBank+:BitsPerBank]; + assign bank_req[i].strb = strb_i[i*BytesPerBank+:BytesPerBank]; + assign bank_req[i].wuser = wuser_i; + assign bank_req[i].we = we_i; + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DATA_WIDTH ( $bits(req_t) ), + .DEPTH ( FifoDepth ), + .T ( req_t ) + ) i_ft_reg ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .usage_o (), + .data_i ( bank_req[i] ), + .valid_i ( req_valid ), + .ready_o ( req_ready[i] ), + .data_o ( bank_oup[i] ), + .valid_o ( bank_req_internal[i] ), + .ready_i ( bank_gnt_internal[i] ) + ); + assign bank_addr_o[i] = bank_oup[i].addr; + assign bank_wdata_o[i] = bank_oup[i].wdata; + assign bank_strb_o[i] = bank_oup[i].strb; + assign bank_wuser_o[i] = bank_oup[i].wuser; + assign bank_we_o[i] = bank_oup[i].we; + + assign zero_strobe[i] = (bank_oup[i].strb == '0); + + if (HideStrb) begin : gen_hide_strb + assign bank_req_o[i] = (bank_oup[i].we && zero_strobe[i]) ? 1'b0 : bank_req_internal[i]; + assign bank_gnt_internal[i] = (bank_oup[i].we && zero_strobe[i]) ? 1'b1 : bank_gnt_i[i]; + end else begin : gen_legacy_strb + assign bank_req_o[i] = bank_req_internal[i]; + assign bank_gnt_internal[i] = bank_gnt_i[i]; + end + end + + // Grant output if all our requests have been granted. + assign gnt_o = (&req_ready) & (&resp_ready) & !dead_write_fifo_full; + + if (HideStrb) begin : gen_dead_write_fifo + fifo_v3 #( + .FALL_THROUGH ( 1'b0 ), + .DEPTH ( MaxTrans+1 ), + .DATA_WIDTH ( NumBanks ) + ) i_dead_write_fifo ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( dead_write_fifo_full ), + .empty_o (), + .usage_o (), + .data_i ( bank_we_o & zero_strobe ), + .push_i ( req_i & gnt_o ), + .data_o ( dead_response ), + .pop_i ( rvalid_o ) + ); + end else begin : gen_no_dead_write_fifo + assign dead_response = '0; + assign dead_write_fifo_full = 1'b0; + end + + // Handle responses. + for (genvar i = 0; unsigned'(i) < NumBanks; i++) begin : gen_resp_regs + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DATA_WIDTH ( $bits(oup_data_t) + $bits(oup_ruser_t) ), + .DEPTH ( FifoDepth ) + ) i_ft_reg ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .usage_o (), + .data_i ( {bank_rdata_i[i], bank_ruser_i[i]} ), + .valid_i ( bank_rvalid_i[i] ), + .ready_o ( resp_ready[i] ), + .data_o ( {rdata_o[i*BitsPerBank+:BitsPerBank], ruser_o[i]} ), + .valid_o ( resp_valid[i] ), + .ready_i ( rvalid_o & !dead_response[i] ) + ); + end + assign rvalid_o = &(resp_valid | dead_response); + + // Assertions + // pragma translate_off + `ifndef VERILATOR + `ifndef SYNTHESIS + initial begin + assume (DataWidth != 0 && (DataWidth & (DataWidth - 1)) == 0) + else $fatal(1, "Data width must be a power of two!"); + assume (DataWidth % NumBanks == 0) + else $fatal(1, "Data width must be evenly divisible over banks!"); + assume ((DataWidth / NumBanks) % 8 == 0) + else $fatal(1, "Data width of each bank must be divisible into 8-bit bytes!"); + end + `endif + `endif + // pragma translate_on +endmodule diff --git a/src_files.yml b/src_files.yml index 46a4c9c8..64204a50 100644 --- a/src_files.yml +++ b/src_files.yml @@ -77,9 +77,11 @@ common_cells_all: # Level 3 - src/cdc_fifo_gray_clearable.sv - src/cdc_2phase_clearable.sv - - src/mem_to_banks.sv + - src/mem_to_banks_detailed.sv - src/stream_arbiter.sv - src/stream_omega_net.sv + # Level 4 + - src/mem_to_banks.sv # Deprecated modules # Level 0