-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tracking issue for implementing wide multiply #94
Comments
2024-08-03I'm starting on wide multiply. Smallest test case that uses just 2 DSPs is 16x32. I mean, there's definitely a smaller one, but that's small enough. I'll just post it here: // Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
// --------------------------------------------------------------------------------
// Tool Version: Vivado v.2022.1 (lin64) Build 3526262 Mon Apr 18 15:47:01 MDT 2022
// Date : Mon Jul 22 07:00:25 2024
// Host : aoi running 64-bit Ubuntu 18.04.6 LTS
// Command : write_verilog compiled.v -force
// Design : multiplier
// Purpose : This is a Verilog netlist of the current design or from a specific cell of the design. The output is an
// IEEE 1364-2001 compliant Verilog HDL file that contains netlist information obtained from the input
// design files.
// Device : xc7z020clg484-1
// --------------------------------------------------------------------------------
`timescale 1 ps / 1 ps
(* use_dsp = "yes" *)
(* STRUCTURAL_NETLIST = "yes" *)
module multiplier
(a,
b,
out);
input [15:0]a;
input [31:0]b;
output [63:0]out;
wire \<const0> ;
wire \<const1> ;
wire GND_2;
wire VCC_2;
wire [15:0]a;
wire [31:0]b;
wire [47:0]\^out ;
wire out__0_n_106;
wire out__0_n_107;
wire out__0_n_108;
wire out__0_n_109;
wire out__0_n_110;
wire out__0_n_111;
wire out__0_n_112;
wire out__0_n_113;
wire out__0_n_114;
wire out__0_n_115;
wire out__0_n_116;
wire out__0_n_117;
wire out__0_n_118;
wire out__0_n_119;
wire out__0_n_120;
wire out__0_n_121;
wire out__0_n_122;
wire out__0_n_123;
wire out__0_n_124;
wire out__0_n_125;
wire out__0_n_126;
wire out__0_n_127;
wire out__0_n_128;
wire out__0_n_129;
wire out__0_n_130;
wire out__0_n_131;
wire out__0_n_132;
wire out__0_n_133;
wire out__0_n_134;
wire out__0_n_135;
wire out__0_n_136;
wire out__0_n_137;
wire out__0_n_138;
wire out__0_n_139;
wire out__0_n_140;
wire out__0_n_141;
wire out__0_n_142;
wire out__0_n_143;
wire out__0_n_144;
wire out__0_n_145;
wire out__0_n_146;
wire out__0_n_147;
wire out__0_n_148;
wire out__0_n_149;
wire out__0_n_150;
wire out__0_n_151;
wire out__0_n_152;
wire out__0_n_153;
wire out__0_n_58;
wire out__0_n_59;
wire out__0_n_60;
wire out__0_n_61;
wire out__0_n_62;
wire out__0_n_63;
wire out__0_n_64;
wire out__0_n_65;
wire out__0_n_66;
wire out__0_n_67;
wire out__0_n_68;
wire out__0_n_69;
wire out__0_n_70;
wire out__0_n_71;
wire out__0_n_72;
wire out__0_n_73;
wire out__0_n_74;
wire out__0_n_75;
wire out__0_n_76;
wire out__0_n_77;
wire out__0_n_78;
wire out__0_n_79;
wire out__0_n_80;
wire out__0_n_81;
wire out__0_n_82;
wire out__0_n_83;
wire out__0_n_84;
wire out__0_n_85;
wire out__0_n_86;
wire out__0_n_87;
wire out__0_n_88;
wire out__1_n_58;
wire out__1_n_59;
wire out__1_n_60;
wire out__1_n_61;
wire out__1_n_62;
wire out__1_n_63;
wire out__1_n_64;
wire out__1_n_65;
wire out__1_n_66;
wire out__1_n_67;
wire out__1_n_68;
wire out__1_n_69;
wire out__1_n_70;
wire out__1_n_71;
wire out__1_n_72;
wire out__1_n_73;
wire out__1_n_74;
assign out[63] = \<const0> ;
assign out[62] = \<const0> ;
assign out[61] = \<const0> ;
assign out[60] = \<const0> ;
assign out[59] = \<const0> ;
assign out[58] = \<const0> ;
assign out[57] = \<const0> ;
assign out[56] = \<const0> ;
assign out[55] = \<const0> ;
assign out[54] = \<const0> ;
assign out[53] = \<const0> ;
assign out[52] = \<const0> ;
assign out[51] = \<const0> ;
assign out[50] = \<const0> ;
assign out[49] = \<const0> ;
assign out[48] = \<const0> ;
assign out[47:0] = \^out [47:0];
GND GND
(.G(\<const0> ));
GND GND_1
(.G(GND_2));
VCC VCC
(.P(\<const1> ));
VCC VCC_1
(.P(VCC_2));
(* METHODOLOGY_DRC_VIOS = "{SYNTH-13 {cell *THIS*}}" *)
DSP48E1 #(
.ACASCREG(0),
.ADREG(1),
.ALUMODEREG(0),
.AREG(0),
.AUTORESET_PATDET("NO_RESET"),
.A_INPUT("DIRECT"),
.BCASCREG(0),
.BREG(0),
.B_INPUT("DIRECT"),
.CARRYINREG(0),
.CARRYINSELREG(0),
.CREG(1),
.DREG(1),
.INMODEREG(0),
.MASK(48'h3FFFFFFFFFFF),
.MREG(0),
.OPMODEREG(0),
.PATTERN(48'h000000000000),
.PREG(0),
.SEL_MASK("MASK"),
.SEL_PATTERN("PATTERN"),
.USE_DPORT("FALSE"),
.USE_MULT("MULTIPLY"),
.USE_PATTERN_DETECT("NO_PATDET"),
.USE_SIMD("ONE48"))
out__0
(.A({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,b[16:0]}),
.ACIN({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.ALUMODE({\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.B({\<const0> ,\<const0> ,a}),
.BCIN({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.C({VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2}),
.CARRYCASCIN(\<const0> ),
.CARRYIN(\<const0> ),
.CARRYINSEL({\<const0> ,\<const0> ,\<const0> }),
.CEA1(\<const0> ),
.CEA2(\<const0> ),
.CEAD(\<const0> ),
.CEALUMODE(\<const0> ),
.CEB1(\<const0> ),
.CEB2(\<const0> ),
.CEC(\<const0> ),
.CECARRYIN(\<const0> ),
.CECTRL(\<const0> ),
.CED(\<const0> ),
.CEINMODE(\<const0> ),
.CEM(\<const0> ),
.CEP(\<const0> ),
.CLK(\<const0> ),
.D({GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2}),
.INMODE({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.MULTSIGNIN(\<const0> ),
.OPMODE({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const1> ,\<const0> ,\<const1> }),
.P({out__0_n_58,out__0_n_59,out__0_n_60,out__0_n_61,out__0_n_62,out__0_n_63,out__0_n_64,out__0_n_65,out__0_n_66,out__0_n_67,out__0_n_68,out__0_n_69,out__0_n_70,out__0_n_71,out__0_n_72,out__0_n_73,out__0_n_74,out__0_n_75,out__0_n_76,out__0_n_77,out__0_n_78,out__0_n_79,out__0_n_80,out__0_n_81,out__0_n_82,out__0_n_83,out__0_n_84,out__0_n_85,out__0_n_86,out__0_n_87,out__0_n_88,\^out [16:0]}),
.PCIN({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.PCOUT({out__0_n_106,out__0_n_107,out__0_n_108,out__0_n_109,out__0_n_110,out__0_n_111,out__0_n_112,out__0_n_113,out__0_n_114,out__0_n_115,out__0_n_116,out__0_n_117,out__0_n_118,out__0_n_119,out__0_n_120,out__0_n_121,out__0_n_122,out__0_n_123,out__0_n_124,out__0_n_125,out__0_n_126,out__0_n_127,out__0_n_128,out__0_n_129,out__0_n_130,out__0_n_131,out__0_n_132,out__0_n_133,out__0_n_134,out__0_n_135,out__0_n_136,out__0_n_137,out__0_n_138,out__0_n_139,out__0_n_140,out__0_n_141,out__0_n_142,out__0_n_143,out__0_n_144,out__0_n_145,out__0_n_146,out__0_n_147,out__0_n_148,out__0_n_149,out__0_n_150,out__0_n_151,out__0_n_152,out__0_n_153}),
.RSTA(\<const0> ),
.RSTALLCARRYIN(\<const0> ),
.RSTALUMODE(\<const0> ),
.RSTB(\<const0> ),
.RSTC(\<const0> ),
.RSTCTRL(\<const0> ),
.RSTD(\<const0> ),
.RSTINMODE(\<const0> ),
.RSTM(\<const0> ),
.RSTP(\<const0> ));
(* METHODOLOGY_DRC_VIOS = "{SYNTH-13 {cell *THIS*}}" *)
DSP48E1 #(
.ACASCREG(0),
.ADREG(1),
.ALUMODEREG(0),
.AREG(0),
.AUTORESET_PATDET("NO_RESET"),
.A_INPUT("DIRECT"),
.BCASCREG(0),
.BREG(0),
.B_INPUT("DIRECT"),
.CARRYINREG(0),
.CARRYINSELREG(0),
.CREG(1),
.DREG(1),
.INMODEREG(0),
.MASK(48'h3FFFFFFFFFFF),
.MREG(0),
.OPMODEREG(0),
.PATTERN(48'h000000000000),
.PREG(0),
.SEL_MASK("MASK"),
.SEL_PATTERN("PATTERN"),
.USE_DPORT("FALSE"),
.USE_MULT("MULTIPLY"),
.USE_PATTERN_DETECT("NO_PATDET"),
.USE_SIMD("ONE48"))
out__1
(.A({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,a}),
.ACIN({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.ALUMODE({\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.B({\<const0> ,\<const0> ,\<const0> ,b[31:17]}),
.BCIN({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.C({VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2,VCC_2}),
.CARRYCASCIN(\<const0> ),
.CARRYIN(\<const0> ),
.CARRYINSEL({\<const0> ,\<const0> ,\<const0> }),
.CEA1(\<const0> ),
.CEA2(\<const0> ),
.CEAD(\<const0> ),
.CEALUMODE(\<const0> ),
.CEB1(\<const0> ),
.CEB2(\<const0> ),
.CEC(\<const0> ),
.CECARRYIN(\<const0> ),
.CECTRL(\<const0> ),
.CED(\<const0> ),
.CEINMODE(\<const0> ),
.CEM(\<const0> ),
.CEP(\<const0> ),
.CLK(\<const0> ),
.D({GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2,GND_2}),
.INMODE({\<const0> ,\<const0> ,\<const0> ,\<const0> ,\<const0> }),
.MULTSIGNIN(\<const0> ),
.OPMODE({\<const1> ,\<const0> ,\<const1> ,\<const0> ,\<const1> ,\<const0> ,\<const1> }),
.P({out__1_n_58,out__1_n_59,out__1_n_60,out__1_n_61,out__1_n_62,out__1_n_63,out__1_n_64,out__1_n_65,out__1_n_66,out__1_n_67,out__1_n_68,out__1_n_69,out__1_n_70,out__1_n_71,out__1_n_72,out__1_n_73,out__1_n_74,\^out [47:17]}),
.PCIN({out__0_n_106,out__0_n_107,out__0_n_108,out__0_n_109,out__0_n_110,out__0_n_111,out__0_n_112,out__0_n_113,out__0_n_114,out__0_n_115,out__0_n_116,out__0_n_117,out__0_n_118,out__0_n_119,out__0_n_120,out__0_n_121,out__0_n_122,out__0_n_123,out__0_n_124,out__0_n_125,out__0_n_126,out__0_n_127,out__0_n_128,out__0_n_129,out__0_n_130,out__0_n_131,out__0_n_132,out__0_n_133,out__0_n_134,out__0_n_135,out__0_n_136,out__0_n_137,out__0_n_138,out__0_n_139,out__0_n_140,out__0_n_141,out__0_n_142,out__0_n_143,out__0_n_144,out__0_n_145,out__0_n_146,out__0_n_147,out__0_n_148,out__0_n_149,out__0_n_150,out__0_n_151,out__0_n_152,out__0_n_153}),
.RSTA(\<const0> ),
.RSTALLCARRYIN(\<const0> ),
.RSTALUMODE(\<const0> ),
.RSTB(\<const0> ),
.RSTC(\<const0> ),
.RSTCTRL(\<const0> ),
.RSTD(\<const0> ),
.RSTINMODE(\<const0> ),
.RSTM(\<const0> ),
.RSTP(\<const0> ));
endmodule First thing to notice is that PCOUT/PCIN are being used. How's it being used? I'm assuming it's beign used as an intermediate output that's then added to another intermediate product to produce the final result. Ok, seems not all that crazy. |
2024-08-04Working on multiply smoketest. Plan is this:
I'm hoping that's all I need. Again, there's the optimization using PCIN instead of C. We should get that going ASAP, or at least be thinking about how to capture it. Here's one way to rewrite: #lang rosette
(define bw 12)
(define half-bw (/ bw 2))
(define-symbolic a (bitvector half-bw))
(define-symbolic b (bitvector bw))
(define b0 (extract (- half-bw 1) 0 b))
(define b1 (extract (- bw 1) half-bw b))
(define spec (bvmul (zero-extend a (bitvector bw)) b))
(define rewritten
(bvadd
(bvmul (zero-extend a (bitvector bw)) (zero-extend b0 (bitvector bw)))
(bvshl (bvmul (zero-extend a (bitvector bw)) (zero-extend b1 (bitvector bw))) (bv half-bw bw))))
(verify (assert (bveq spec rewritten))) That's not quite how Vivado implements. I suspect because it's using a different feature of the DSP. Yeah, see: 3'b101 : qz_o_mux <= {{17{pcin_in[47]}}, pcin_in[47:17]}; in the DSP model. For now I think my rewriting will also work. 2024-08-11Something might be up with the ah, here's the problem: the verilog outputting function needs to take roots as an argument. those things will become the outputs. Ok, done. Next thing up: Add rewrite to handle DSP with three inputs. |
2024-08-12Ok so now the issue i'm running into is this:
This is a somewhat difficult problem. I don't think it's a dealbreaker, but it will require some thought. I think it might be best to use the indirect approach here. |
2024-08-19I don't know if the issue from last time makes sense. The alleged problem is that we need to get a spec for a 3-arg DSP sketch, but when we get the spec, it only has 2 args. But how could that be? No yeah, it does make sense. Because the top-level a*b == (some big expression involving subexpr mapped to c) Maybe we can just force the choice of the three args to the DSP? will that be enough? I don't think so. Consider the spec that's being currently erroneously extracted:
If we forced the choice of a, b, and the expression mapped to c, we could still legally pick all of these nodes, as those other nodes come further down the chain. I think the quick hack is to use an iteratively refining extraction that biases towards ensuring the specific nodes are extracted. Algorithm for iteratively extracting: Ok, that seems to be working. I'm now getting all three modules instantiated in the egraph. Now just struggling with extraction. what's wrong in extraction? It's saying wires aren't yet implemented. I swear I encountered this already: namely that we do need to support extracting to wires, because sometimes wires are placeholders for outputs. When did we encounter this? Why isn't it fixed? Hm, I'm not actually sure that yet applies here though. I don't think we should be extracting wires. I think it might have to do with how we convert the expression back to Churchroad commands?
Seems like the A input is a wire when it shouldn't be. Is there nothing else in that eclass? how? In the meeting, Zach was confused about the whole spec extraction problem. Fundamentally, he was confused as to why we needed a free variable. Here's an attempt at a clean explanation. The top level problem looks ROUGHLY (ignoring shifts) like
Which maps like this:
where DSP0 is programmed to implement 2-input mul and DSP1 is programmed to implement 3-input multiply accumulate. Synthesizing configurations for DSP0 and DSP1 such that
is where we run into solver issues w/ multiplication. Thus, our strategy is to rewrite the LHS:
Now, instead of sending the full
But now, what should query 2 be? Option 1 is this:
Essentially, ignore the rewritten LHS and use the original Option 2 is this:
That is, use the rewritten LHS. Lastly, option 3 is this:
Specifically, take option 2, and rewrite the In the meeting on Monday, I stated that, when calling Lakeroad to configure DSP1, we need to force the extraction of a spec looking like the LHS of Option 2 or Option 3. Zach's question in meeting was, essentially, why can't the spec look like There's another question, about whether to use Option 2 or Option 3 -- I only really realized Option 2 was an option while typing this up. I'll give it a try. |
2024-08-21Coming back to the problem at hand. Input a is getting unioned with a wire. Why? And is nothing else in that eclass with the wire? How do we pick what a, b, c are unioned with? The problem is a familiar one in the end. we have these things defined:
Then later we do:
I thought this was the issue we fixed by splicing. We could potentially fix this with rewrites, too. I am trying out the rewrites, but these introduce a cycle that then leads to a stack overflow. I really feel like there should be a way to fix this via Yosys. Can I get the problematic Verilog? Posted a question here: YosysHQ/yosys#4557 |
2024-08-25The Yosys folks told me in a meeting that there wasn't a pass to do what I'm asking for. Talking to them, though, I realized I could probably do it myself. I think the thought was this: in the churchroad backend, just compile concatenations differently. Always give them a new name. Can I describe the problem really simply? In Verilog, you can assign to a concatenation: Interestingly, we've extracted an infinite loop on the structural Verilog we extract. OK, we're getting a final extracted expression, but it's a bit weird: module top(
input [32-1:0] b,
output [32-1:0] out,
output [32-1:0] out,
output [32-1:0] out,
);
assign out = wire_57;
assign out = wire_335;
assign out = wire_414;
logic [32-1:0] wire_414 = wire_499[31:0];
logic [48-1:0] wire_499;
localparam [5*8:0] wire_181 = "XOR12";
localparam [5*8:0] wire_179 = "FALSE";
localparam [5*8:0] wire_177 = "ONE48";
localparam [9*8:0] wire_175 = "NO_PATDET";
localparam [8*8:0] wire_173 = "MULTIPLY";
localparam [7*8:0] wire_171 = "PATTERN";
localparam [4*8:0] wire_169 = "MASK";
localparam [1*8:0] wire_167 = "A";
localparam [9-1:0] wire_165 = 9'd0;
localparam [32-1:0] wire_456 = 32'd1;
localparam [1*8:0] wire_161 = "B";
localparam [6*8:0] wire_159 = "DIRECT";
localparam [5*8:0] wire_157 = "RESET";
localparam [8*8:0] wire_155 = "NO_RESET";
localparam [2*8:0] wire_153 = "AD";
localparam [32-1:0] wire_151 = 32'd0;
localparam [48-1:0] wire_453 = 48'd0;
localparam [9-1:0] wire_450 = 9'd309;
localparam [5-1:0] wire_449 = 5'd0;
logic [27-1:0] wire_400 = wire_412[26:0];
logic [32-1:0] wire_412 = wire_396[31:0];
logic [33-1:0] wire_396 = wire_398[32:0];
logic [34-1:0] wire_398 = wire_364[33:0];
logic [35-1:0] wire_364 = wire_366[34:0];
logic [36-1:0] wire_366 = wire_368[35:0];
logic [37-1:0] wire_368 = wire_370[36:0];
logic [38-1:0] wire_370 = wire_372[37:0];
logic [39-1:0] wire_372 = wire_374[38:0];
logic [40-1:0] wire_374 = wire_376[39:0];
logic [41-1:0] wire_376 = wire_378[40:0];
logic [42-1:0] wire_378 = wire_380[41:0];
logic [43-1:0] wire_380 = wire_382[42:0];
logic [44-1:0] wire_382 = wire_384[43:0];
logic [45-1:0] wire_384 = wire_386[44:0];
logic [46-1:0] wire_386 = wire_388[45:0];
logic [47-1:0] wire_388 = wire_390[46:0];
localparam [1-1:0] wire_448 = 1'd1;
localparam [3-1:0] wire_447 = 3'd6;
localparam [1-1:0] wire_446 = 1'd0;
logic [48-1:0] wire_390 = { wire_402, wire_388 };
logic [1-1:0] wire_402 = wire_368[36:36];
localparam [18-1:0] wire_445 = 18'd0;
logic [18-1:0] wire_392 = { wire_429, wire_410 };
logic [16-1:0] wire_410 = wire_392[15:0];
localparam [2-1:0] wire_429 = 2'd0;
localparam [4-1:0] wire_444 = 4'd0;
localparam [30-1:0] wire_443 = 30'd0;
logic [30-1:0] wire_394 = { wire_431, wire_408 };
logic [16-1:0] wire_408 = wire_394[15:0];
localparam [14-1:0] wire_431 = 14'd0;
logic [32-1:0] wire_335 = wire_360[31:0];
logic [48-1:0] wire_360;
localparam [9-1:0] wire_353 = 9'd53;
localparam [27-1:0] wire_351 = 27'd0;
logic [18-1:0] wire_323 = { wire_429, wire_333 };
logic [16-1:0] wire_333 = wire_13[31:16];
logic [32-1:0] wire_13 = b;
localparam [4-1:0] wire_345 = 4'd1;
logic [32-1:0] wire_57 = wire_306[31:0];
logic [48-1:0] wire_306;
DSP48E2 #(
.USE_MULT(wire_173),
.AUTORESET_PATDET(wire_155),
.IS_RSTINMODE_INVERTED(wire_446),
.PREADDINSEL(wire_167),
.IS_RSTM_INVERTED(wire_446),
.SEL_PATTERN(wire_171),
.IS_RSTB_INVERTED(wire_446),
.IS_CLK_INVERTED(wire_446),
.BMULTSEL(wire_161),
.OPMODEREG(wire_151),
.ACASCREG(wire_151),
.INMODEREG(wire_151),
.USE_PATTERN_DETECT(wire_175),
.IS_CARRYIN_INVERTED(wire_446),
.CARRYINSELREG(wire_151),
.MASK(wire_453),
.IS_OPMODE_INVERTED(wire_165),
.IS_RSTALUMODE_INVERTED(wire_446),
.IS_RSTP_INVERTED(wire_446),
.A_INPUT(wire_159),
.USE_WIDEXOR(wire_179),
.IS_RSTC_INVERTED(wire_446),
.ADREG(wire_151),
.DREG(wire_151),
.ALUMODEREG(wire_151),
.BCASCREG(wire_151),
.IS_INMODE_INVERTED(wire_449),
.IS_RSTD_INVERTED(wire_446),
.IS_ALUMODE_INVERTED(wire_444),
.RND(wire_453),
.MREG(wire_151),
.CARRYINREG(wire_151),
.AUTORESET_PRIORITY(wire_157),
.CREG(wire_151),
.IS_RSTALLCARRYIN_INVERTED(wire_446),
.AREG(wire_151),
.BREG(wire_151),
.IS_RSTA_INVERTED(wire_446),
.PATTERN(wire_453),
.PREG(wire_151),
.IS_RSTCTRL_INVERTED(wire_446),
.SEL_MASK(wire_169),
.B_INPUT(wire_159),
.USE_SIMD(wire_177),
.XORSIMD(wire_181),
.AMULTSEL(wire_153)
) module_354 (
.A(wire_394),
.ACIN(wire_443),
.ALUMODE(wire_345),
.B(wire_323),
.BCIN(wire_445),
.C(wire_453),
.CARRYCASCIN(wire_446),
.CARRYIN(wire_446),
.CARRYINSEL(wire_447),
.CEA1(wire_448),
.CEA2(wire_448),
.CEAD(wire_448),
.CEALUMODE(wire_448),
.CEB1(wire_448),
.CEB2(wire_448),
.CEC(wire_448),
.CECARRYIN(wire_448),
.CECTRL(wire_448),
.CED(wire_448),
.CEINMODE(wire_448),
.CEM(wire_448),
.CEP(wire_448),
.CLK(wire_446),
.D(wire_351),
.INMODE(wire_449),
.MULTSIGNIN(wire_446),
.OPMODE(wire_353),
.PCIN(wire_453),
.RSTA(wire_446),
.RSTALLCARRYIN(wire_446),
.RSTALUMODE(wire_446),
.RSTB(wire_446),
.RSTC(wire_446),
.RSTCTRL(wire_446),
.RSTD(wire_446),
.RSTINMODE(wire_446),
.RSTM(wire_446),
.RSTP(wire_446),
.P(wire_360));
DSP48E2 #(
.MASK(wire_453),
.MREG(wire_151),
.ADREG(wire_151),
.IS_RSTCTRL_INVERTED(wire_446),
.ALUMODEREG(wire_151),
.PREADDINSEL(wire_167),
.ACASCREG(wire_151),
.BCASCREG(wire_151),
.BMULTSEL(wire_161),
.IS_RSTINMODE_INVERTED(wire_446),
.BREG(wire_151),
.SEL_MASK(wire_169),
.SEL_PATTERN(wire_171),
.CREG(wire_151),
.USE_PATTERN_DETECT(wire_175),
.AMULTSEL(wire_153),
.USE_SIMD(wire_177),
.AREG(wire_151),
.IS_ALUMODE_INVERTED(wire_444),
.IS_OPMODE_INVERTED(wire_165),
.IS_RSTC_INVERTED(wire_446),
.PREG(wire_151),
.IS_RSTB_INVERTED(wire_446),
.CARRYINREG(wire_151),
.RND(wire_453),
.B_INPUT(wire_159),
.IS_INMODE_INVERTED(wire_449),
.IS_RSTP_INVERTED(wire_446),
.USE_WIDEXOR(wire_179),
.CARRYINSELREG(wire_151),
.A_INPUT(wire_159),
.DREG(wire_151),
.IS_RSTM_INVERTED(wire_446),
.IS_CARRYIN_INVERTED(wire_446),
.INMODEREG(wire_151),
.OPMODEREG(wire_151),
.USE_MULT(wire_173),
.IS_RSTD_INVERTED(wire_446),
.XORSIMD(wire_181),
.AUTORESET_PRIORITY(wire_157),
.IS_CLK_INVERTED(wire_446),
.AUTORESET_PATDET(wire_155),
.IS_RSTA_INVERTED(wire_446),
.PATTERN(wire_453),
.IS_RSTALLCARRYIN_INVERTED(wire_446),
.IS_RSTALUMODE_INVERTED(wire_446)
) module_102 (
.A(wire_394),
.ACIN(wire_443),
.ALUMODE(wire_345),
.B(wire_392),
.BCIN(wire_445),
.C(wire_453),
.CARRYCASCIN(wire_446),
.CARRYIN(wire_446),
.CARRYINSEL(wire_447),
.CEA1(wire_448),
.CEA2(wire_448),
.CEAD(wire_448),
.CEALUMODE(wire_448),
.CEB1(wire_448),
.CEB2(wire_448),
.CEC(wire_448),
.CECARRYIN(wire_448),
.CECTRL(wire_448),
.CED(wire_448),
.CEINMODE(wire_448),
.CEM(wire_448),
.CEP(wire_448),
.CLK(wire_446),
.D(wire_351),
.INMODE(wire_449),
.MULTSIGNIN(wire_446),
.OPMODE(wire_353),
.PCIN(wire_453),
.RSTA(wire_446),
.RSTALLCARRYIN(wire_446),
.RSTALUMODE(wire_446),
.RSTB(wire_446),
.RSTC(wire_446),
.RSTCTRL(wire_446),
.RSTD(wire_446),
.RSTINMODE(wire_446),
.RSTM(wire_446),
.RSTP(wire_446),
.P(wire_306));
DSP48E2 #(
.IS_RSTB_INVERTED(wire_446),
.IS_RSTCTRL_INVERTED(wire_446),
.ALUMODEREG(wire_151),
.IS_RSTC_INVERTED(wire_446),
.IS_RSTINMODE_INVERTED(wire_446),
.CARRYINSELREG(wire_456),
.RND(wire_453),
.PATTERN(wire_453),
.XORSIMD(wire_181),
.IS_RSTM_INVERTED(wire_446),
.ADREG(wire_151),
.A_INPUT(wire_159),
.BREG(wire_151),
.DREG(wire_151),
.AUTORESET_PRIORITY(wire_157),
.ACASCREG(wire_151),
.CREG(wire_151),
.IS_RSTALLCARRYIN_INVERTED(wire_446),
.IS_RSTP_INVERTED(wire_446),
.MASK(wire_453),
.IS_INMODE_INVERTED(wire_449),
.MREG(wire_151),
.IS_RSTA_INVERTED(wire_446),
.BCASCREG(wire_151),
.IS_RSTALUMODE_INVERTED(wire_446),
.B_INPUT(wire_159),
.OPMODEREG(wire_151),
.PREADDINSEL(wire_167),
.SEL_PATTERN(wire_171),
.USE_PATTERN_DETECT(wire_175),
.USE_MULT(wire_173),
.CARRYINREG(wire_151),
.BMULTSEL(wire_161),
.USE_SIMD(wire_177),
.INMODEREG(wire_151),
.IS_CLK_INVERTED(wire_446),
.SEL_MASK(wire_169),
.IS_RSTD_INVERTED(wire_446),
.PREG(wire_151),
.IS_ALUMODE_INVERTED(wire_444),
.AREG(wire_151),
.USE_WIDEXOR(wire_179),
.IS_CARRYIN_INVERTED(wire_446),
.AMULTSEL(wire_153),
.AUTORESET_PATDET(wire_155),
.IS_OPMODE_INVERTED(wire_165)
) module_454 (
.A(wire_394),
.ACIN(wire_443),
.ALUMODE(wire_444),
.B(wire_392),
.BCIN(wire_445),
.C(wire_390),
.CARRYCASCIN(wire_446),
.CARRYIN(wire_446),
.CARRYINSEL(wire_447),
.CEA1(wire_448),
.CEA2(wire_448),
.CEAD(wire_448),
.CEALUMODE(wire_448),
.CEB1(wire_448),
.CEB2(wire_448),
.CEC(wire_448),
.CECARRYIN(wire_448),
.CECTRL(wire_448),
.CED(wire_448),
.CEINMODE(wire_448),
.CEM(wire_448),
.CEP(wire_448),
.CLK(wire_446),
.D(wire_400),
.INMODE(wire_449),
.MULTSIGNIN(wire_446),
.OPMODE(wire_450),
.PCIN(wire_453),
.RSTA(wire_446),
.RSTALLCARRYIN(wire_446),
.RSTALUMODE(wire_446),
.RSTB(wire_446),
.RSTC(wire_446),
.RSTCTRL(wire_446),
.RSTD(wire_446),
.RSTINMODE(wire_446),
.RSTM(wire_446),
.RSTP(wire_446),
.P(wire_499));
endmodule Why are there three DSPs? I think this is doable with two. That question is a little less important to me right now, we can worry about finding more optimal designs once it's correct. Why are there three outputs? Where is our input Okay so now we're extracting cycles, which isn't a problem until you realize the generated Verilog is useless. So we should figure out how to prevent cycles. One thought: soemtimes shr/shls are ok in structural when they're shifts by constants. How do we encode that into the extractor? One thing we cdould do would be to rewrite shifts by constants. Temporary solution is to compile shls, but assert that they are shls by constants. Taht's a hacky but conservative soln. because the compiler shouldn't care whether they're shls by constants, but i care right now b/c i don't want to start accidentally allowing shifts into our structural verilog working now! Woohoo! working on an even wider mul now. assume all multiplies are full-size, i.e. nbit*mbit = n+m bit
similar for b, so if we apply both rules we get
so that's two multiplication rules (one for each side) plus a rule that distributes a shift plus a rule that simplifies shifts. |
2024-08-26Thinking about how to implement even wider mul. The interesting thing is that our representation of multiply preserves width. In the DSP we're really aiming to implement Can we just ignore the fact that our muls are How do we rewrite I think it's similar to what we had before.
Note that we're actually adding in the zero/sign extensions here, when we cast If we have a rewrite doing the same for the second argument, then we should eventually get only multiplies which are sign- or zero-extended, which we can then simplify. That is, if we then do the equivalent second-arg rewrite:
There's a quick and dirty way to implement this: just implement this exact rewrite as-is. Easy! 2024-08-27Every time I go to start implementing this, it feels a bit more complicated than expected. Also, random thought: does SMTLIB choose to use nxn=n (rather than nxn=2n) multiplication so that they don't have to worry about signedness? maybe not. i don't think you need to care about signedness if the two inputs are the same length, right? |
2024-09-01Need to just get this transformation done. So how do we do it? I wrote before (plus annotations i'm writing rn):
So primarily we need to implement
2024-09-02Made progress yesterday and today. Getting to the point where I can finally make at least one call out to Lakeroad on the wide multiply example. However, now I'm running into an issue: the spec looks a bit odd. Not odd, maybe, but not what we want.
Seems to me like the problem is that the inputs a and b are too wide. I fixed that by putting an extract on top of them when instantiating the PrimitiveInterfaceDSP. Now the problem is that the output is too wide (64 bits). I feel like that might be due to our rewrite being overly aggressive about keeping bitwidths wide. Like, when we split up the mult, we should absolutely be able to shrink the overall bitwidth eventually instead of keeping it at 64. ~~ some time later... I've gotten pretty far now. The issue I'm running into now is that the ensure extract extractor can fail---it can potentially not find ways to extract the desired subexprs, seemingly. I gotta double chekc that that's what's actually happening. Unclear what the issue is. Seems like the nodes I'm trying to force extraction of are somehow not reachable from any expression nodes. They're children of HasType/RealBitwidth/DSP nodes, but not children of any other nodes somehow. Disabling subsumes didn't fix it. Okay, everything's working okay -- next need to implement mapping to DSP3, as the Add in the top level node is still unimplemented. 2024-09-03So close. One thing that's weird, though, is that our Okay, finally got Churchroad calling out to Lakeroad for a 3-arg DSP. Synth problem is timing out. Here's the generated file (with command at the top)
I'm wondering if it's the wide mul in there that's the problem. Interesting, if i shrink it down to 16 bits (i.e. all signals 16 bits) it still times out on bitwuzla, but yices gets it. So what bitwidth here is the problem? the add bitwidth? the mul bitwidth? This still times out:
But when output and c are 40 bits, it doesn't. Terminates at 45. Seems to spin at 46. Weird -- is this really just the limit of the solvers? Or is there something more complicated (or less complicated, more stupid) happening? Regardless, I think it's probably time to use the internal shift feature inside the DSP, rather than trying to use a full wide add. Next thing to do is to add a unit test to Lakeraod to see if we can use the shift feature. |
No description provided.
The text was updated successfully, but these errors were encountered: