Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

architectural clock gating + exe_pipeline struct field level clock gating #632

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions machines/arch_filelist.mk
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ VSOURCES += $(BASEJUMP_STL_DIR)/bsg_misc/bsg_unconcentrate_static.v
VSOURCES += $(BASEJUMP_STL_DIR)/bsg_misc/bsg_mux2_gatestack.v
VSOURCES += $(BASEJUMP_STL_DIR)/bsg_misc/bsg_counting_leading_zeros.v
VSOURCES += $(BASEJUMP_STL_DIR)/bsg_misc/bsg_mul_add_unsigned.v
VSOURCES += $(BASEJUMP_STL_DIR)/bsg_misc/bsg_icg_pos.v


VHEADERS += $(BASEJUMP_STL_DIR)/bsg_tag/bsg_tag_client.v
Expand Down Expand Up @@ -174,6 +175,7 @@ VSOURCES += $(BSG_MANYCORE_DIR)/v/vanilla_bean/regfile.v
VSOURCES += $(BSG_MANYCORE_DIR)/v/vanilla_bean/regfile_synth.v
VSOURCES += $(BSG_MANYCORE_DIR)/v/vanilla_bean/regfile_hard.v
VSOURCES += $(BSG_MANYCORE_DIR)/v/vanilla_bean/scoreboard.v
VSOURCES += $(BSG_MANYCORE_DIR)/v/vanilla_bean/exe_pipeline.v

VSOURCES += $(BSG_MANYCORE_DIR)/v/bsg_manycore_pod_ruche_array.v
VSOURCES += $(BSG_MANYCORE_DIR)/v/bsg_manycore_pod_ruche.v
Expand Down
124 changes: 124 additions & 0 deletions v/vanilla_bean/exe_pipeline.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/**
* exe_pipeline.v
*
* This module instantiates flops for the EXE pipeline,
* and implements reset, clear, clock-gating logic (business logic) for each field.
*
*/


`include "bsg_vanilla_defines.vh"


module exe_pipeline
import bsg_vanilla_pkg::*;
#(parameter data_width_p=RV32_reg_data_width_gp
)
(
input clk_i
, input reset_i
, input en_i
, input clear_i

, input exe_signals_s exe_i
, output exe_signals_s exe_o
);


// Enable logic
wire pc_plus4_en = en_i & (exe_i.valid | exe_i.icache_miss);
wire pred_or_jump_addr_en = en_i & (exe_i.decode.is_branch_op | exe_i.decode.is_jal_op | exe_i.decode.is_jalr_op);
wire instr_en = en_i;
wire rs1_val_en = en_i & exe_i.decode.read_rs1;
wire rs2_val_en = en_i & (exe_i.decode.read_rs2 | exe_i.decode.is_csr_op | (exe_i.decode.read_frs2 & exe_i.decode.is_store_op));
wire mem_addr_op2_en = en_i & (exe_i.decode.is_load_op | exe_i.decode.is_store_op
| exe_i.decode.is_lr_op | exe_i.decode.is_lr_aq_op | exe_i.decode.is_amo_op);

// pc_plus4 DFF
bsg_dff_reset_en #(
.width_p(data_width_p)
,.reset_val_p(0)
) dff_pc_plus4 (
.clk_i(clk_i)
,.reset_i(reset_i)
,.en_i(pc_plus4_en)
,.data_i(exe_i.pc_plus4)
,.data_o(exe_o.pc_plus4)
);

// pred_or_jump_addr DFF
bsg_dff_reset_en #(
.width_p(data_width_p)
,.reset_val_p(0)
) dff_pred_or_jump_addr (
.clk_i(clk_i)
,.reset_i(reset_i)
,.en_i(pred_or_jump_addr_en)
,.data_i(exe_i.pred_or_jump_addr)
,.data_o(exe_o.pred_or_jump_addr)
);

// rs1, rs2 val DFF
bsg_dff_reset_en #(
.width_p(data_width_p)
,.reset_val_p(0)
) rs1_val_dff (
.clk_i(clk_i)
,.reset_i(reset_i)
,.en_i(rs1_val_en)
,.data_i(exe_i.rs1_val)
,.data_o(exe_o.rs1_val)
);

bsg_dff_reset_en #(
.width_p(data_width_p)
,.reset_val_p(0)
) rs2_val_dff (
.clk_i(clk_i)
,.reset_i(reset_i)
,.en_i(rs2_val_en)
,.data_i(exe_i.rs2_val)
,.data_o(exe_o.rs2_val)
);

// mem_addr_op2 DFF
bsg_dff_reset_en #(
.width_p(RV32_Iimm_width_gp)
,.reset_val_p(0)
) mem_addr_op2_dff (
.clk_i(clk_i)
,.reset_i(reset_i)
,.en_i(mem_addr_op2_en)
,.data_i(exe_i.mem_addr_op2)
,.data_o(exe_o.mem_addr_op2)
);

// CONTRL FLOPS
always_ff @ (posedge clk_i) begin
if (reset_i) begin
exe_o.decode <= '0;
exe_o.instruction <= '0;
exe_o.icache_miss <= 1'b0;
exe_o.valid <= 1'b0;
exe_o.branch_predicted_taken <= 1'b0;
end
else begin
if (en_i) begin
exe_o.decode <= exe_i.decode;
exe_o.instruction <= exe_i.instruction;
exe_o.icache_miss <= exe_i.icache_miss;
exe_o.valid <= exe_i.valid;
exe_o.branch_predicted_taken <= exe_i.branch_predicted_taken;
end
else if (clear_i) begin
exe_o.decode <= '0;
exe_o.instruction <= '0;
exe_o.icache_miss <= 1'b0;
exe_o.valid <= 1'b0;
exe_o.branch_predicted_taken <= 1'b0;
end
end
end


endmodule
80 changes: 56 additions & 24 deletions v/vanilla_bean/vanilla_core.v
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,14 @@ module vanilla_core
, input [y_cord_width_p-1:0] global_y_i
);


// Hard ICG
logic clk_hard_lo;
logic clk_hard_en;
bsg_icg_pos icg0 (
.clk_i(clk_i)
,.en_i(clk_hard_en)
,.clk_o(clk_hard_lo)
);

// reset edge down detect
logic reset_r;
Expand All @@ -128,7 +135,7 @@ module vanilla_core
// pipeline signals
// ctrl signals set to zero when reset_i is high.
// data signals are not reset to zero.
logic id_en, exe_en, mem_ctrl_en, mem_data_en,
logic id_en, exe_en, exe_clear, mem_ctrl_en, mem_data_en,
fp_exe_ctrl_en, fp_exe_data_en, flw_wb_ctrl_en, flw_wb_data_en;
id_signals_s id_r, id_n;
exe_signals_s exe_r, exe_n;
Expand Down Expand Up @@ -255,7 +262,7 @@ module vanilla_core
,.num_rs_p(2)
,.x0_tied_to_zero_p(1)
) int_rf (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)

,.w_v_i(int_rf_wen)
Expand All @@ -282,7 +289,7 @@ module vanilla_core
,.num_clear_port_p(1)
,.x0_tied_to_zero_p(1)
) int_sb (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)

,.src_id_i({id_r.instruction.rs2, id_r.instruction.rs1})
Expand Down Expand Up @@ -316,7 +323,7 @@ module vanilla_core
,.num_rs_p(3)
,.x0_tied_to_zero_p(0)
) float_rf (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)

,.w_v_i(float_rf_wen)
Expand All @@ -343,7 +350,7 @@ module vanilla_core
,.num_src_port_p(3)
,.num_clear_port_p(1)
) float_sb (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)

,.src_id_i({id_r.instruction[31:27], id_r.instruction.rs2, id_r.instruction.rs1})
Expand Down Expand Up @@ -622,14 +629,13 @@ module vanilla_core
// //
//////////////////////////////

bsg_dff_reset_en #(
.width_p($bits(exe_signals_s))
) exe_pipeline (
.clk_i(clk_i)
exe_pipeline exe_pipeline (
.clk_i(clk_hard_lo)
,.reset_i(reset_i)
,.en_i(exe_en)
,.data_i(exe_n)
,.data_o(exe_r)
,.clear_i(exe_clear)
,.exe_i(exe_n)
,.exe_o(exe_r)
);


Expand Down Expand Up @@ -806,7 +812,7 @@ module vanilla_core
bsg_dff_reset_en #(
.width_p($bits(fp_exe_ctrl_signals_s))
) fp_exe_ctrl_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)
,.en_i(fp_exe_ctrl_en)
,.data_i(fp_exe_ctrl_n)
Expand All @@ -816,7 +822,7 @@ module vanilla_core
bsg_dff_en #(
.width_p($bits(fp_exe_data_signals_s))
) fp_exe_data_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.en_i(fp_exe_data_en)
,.data_i(fp_exe_data_n)
,.data_o(fp_exe_data_r)
Expand All @@ -840,7 +846,7 @@ module vanilla_core
logic [reg_addr_width_lp-1:0] fpu1_rd_r;

fpu_float fpu_float0 (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)

,.stall_fpu1_i(stall_fpu1_li)
Expand Down Expand Up @@ -926,7 +932,7 @@ module vanilla_core
bsg_dff_reset_en #(
.width_p($bits(mem_ctrl_signals_s))
) mem_ctrl_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)
,.en_i(mem_ctrl_en)
,.data_i(mem_ctrl_n)
Expand All @@ -936,7 +942,7 @@ module vanilla_core
bsg_dff_en #(
.width_p($bits(mem_data_signals_s))
) mem_data_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.en_i(mem_data_en)
,.data_i(mem_data_n)
,.data_o(mem_data_r)
Expand Down Expand Up @@ -1045,7 +1051,7 @@ module vanilla_core
bsg_dff_reset #(
.width_p($bits(wb_ctrl_signals_s))
) wb_ctrl_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)
,.data_i(wb_ctrl_n)
,.data_o(wb_ctrl_r)
Expand All @@ -1054,7 +1060,7 @@ module vanilla_core
bsg_dff #(
.width_p($bits(wb_data_signals_s))
) wb_data_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.data_i(wb_data_n)
,.data_o(wb_data_r)
);
Expand All @@ -1068,7 +1074,7 @@ module vanilla_core
bsg_dff_reset_en #(
.width_p($bits(flw_wb_ctrl_signals_s))
) flw_wb_ctrl_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.reset_i(reset_i)
,.en_i(flw_wb_ctrl_en)
,.data_i(flw_wb_ctrl_n)
Expand All @@ -1078,7 +1084,7 @@ module vanilla_core
bsg_dff_en #(
.width_p($bits(flw_wb_data_signals_s))
) flw_wb_data_pipeline (
.clk_i(clk_i)
.clk_i(clk_hard_lo)
,.en_i(flw_wb_data_en)
,.data_i(flw_wb_data_n)
,.data_o(flw_wb_data_r)
Expand Down Expand Up @@ -1587,18 +1593,21 @@ module vanilla_core

if (stall_all) begin
exe_en = 1'b0;
exe_clear = 1'b0;
npc_write_en = 1'b0;
end
else begin
npc_write_en = (exe_r.valid & mstatus_r.mie) | exe_r.decode.is_mret_op;
if (flush | stall_id) begin
exe_en = 1'b1;
exe_n = '0;
exe_en = 1'b0;
exe_clear = 1'b1;
//exe_n = '0;
end
else if (id_r.decode.is_fp_op) begin
// for fp_op, we still want to keep track of npc_r.
// so we set the valid and pc_plus4.
exe_en = 1'b1;
exe_clear = 1'b0;
exe_n = '{
pc_plus4: id_r.pc_plus4,
valid: id_r.valid,
Expand All @@ -1614,6 +1623,7 @@ module vanilla_core
end
else begin
exe_en = 1'b1;
exe_clear = 1'b0;
end
end
end
Expand Down Expand Up @@ -1953,8 +1963,30 @@ module vanilla_core
assign stall_fpu2_li = stall_remote_flw_wb;


// Backend Clk Enable logic
wire all_bubble = ~(exe_r.valid)
& ~(mem_ctrl_r.write_rd | mem_ctrl_r.write_frd)
& ~(wb_ctrl_r.write_rd)
& ~(fp_exe_ctrl_r.fp_decode.is_fpu_float_op
| fp_exe_ctrl_r.fp_decode.is_fpu_int_op
| fp_exe_ctrl_r.fp_decode.is_fdiv_op
| fp_exe_ctrl_r.fp_decode.is_fsqrt_op)
& ~(fpu1_v_r | imul_v_lo)
& ~fpu_float_v_lo
& ~flw_wb_ctrl_r.valid;


// When to disable backend clk?
// 1) pipeline is in stall ID, and it's filled up with bubbles.
// 2) icache bubble is in MEM, and it's waiting on ifetch.
assign clk_hard_en = reset_i
| int_remote_load_resp_v_i
| float_remote_load_resp_v_i
| idiv_v_lo | fdiv_fsqrt_v_lo
| ifetch_v_i
| ~((stall_id & all_bubble) | mem_ctrl_r.icache_miss);



// synopsys translate_off
always_ff @ (negedge clk_i) begin
Expand Down