From cfc642f1df0bb274e1cdfbc3130621b4d4e4de2b Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Sat, 18 Apr 2020 18:14:48 -0400 Subject: [PATCH 01/15] script to run mlp layer with variable input added --- test/utils/run-mlp-layer.sh | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 test/utils/run-mlp-layer.sh diff --git a/test/utils/run-mlp-layer.sh b/test/utils/run-mlp-layer.sh new file mode 100755 index 00000000..22618113 --- /dev/null +++ b/test/utils/run-mlp-layer.sh @@ -0,0 +1,46 @@ +set -v +set -e +path=`pwd` #path to your puma directory +echo $path +cppfile=fully-connected-layer #name for cpp file that you want to compile ex- mlp_l4_mnist.cpp, conv-layer.cpp, convmax-layer.cpp +name=fully #name for the folder generated by compiler +pumaenv=pumaenv #name for the environment +fileno=0 +name=$name$fileno + +#layer parameter +in=64 +out=10 + + +#copying mlp config file +rm ${path}/puma-simulator/include/config.py #remove existing config file +cp ${path}/puma-simulator/include/example-configs/config-mlp.py ${path}/puma-simulator/include/config.py #copy the mlp config file to include +#copying model file +rm ${path}/puma-compiler/test/${cppfile}.cpp ${path}/puma-compiler/test/${cppfile}.h +cp ${path}/puma-simulator/test/mlp_l4_mnist/${cppfile}.cpp ${path}/puma-compiler/test/${cppfile}.cpp #copy the mlp config file to include +cp ${path}/puma-simulator/test/mlp_l4_mnist/${cppfile}.h ${path}/puma-compiler/test/${cppfile}.h #copy the mlp config file to include + +cd ${path}/puma-compiler/src +source ~/.bashrc +conda activate ${pumaenv} + +make clean +make + +cd ${path}/puma-compiler/test +make clean +make ${cppfile}.test +export LD_LIBRARY_PATH=`pwd`/../src:$LD_LIBRARY_PATH +./${cppfile}.test ${in} ${out} ${fileno} +echo $cppfile +./generate-py.sh +cp -r ${name} ../../puma-simulator/test/testasm + +cd ${path}/puma-simulator/src + + +python dpe.py -n ${name} + + + From 501d7826f9ad4a54ec5aaf6f57ee168db164fa77 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Sat, 18 Apr 2020 18:15:22 -0400 Subject: [PATCH 02/15] model file for fully connected layer added --- test/mlp_l4_mnist/fully-connected-layer.cpp | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 test/mlp_l4_mnist/fully-connected-layer.cpp diff --git a/test/mlp_l4_mnist/fully-connected-layer.cpp b/test/mlp_l4_mnist/fully-connected-layer.cpp new file mode 100644 index 00000000..4590f883 --- /dev/null +++ b/test/mlp_l4_mnist/fully-connected-layer.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 IMPACT Research Group, University of Illinois. + * All rights reserved. + * + * This file is covered by the LICENSE.txt license file in the root directory. + * + */ + +#include +#include +#include +#include + +#include "puma.h" +#include "fully-connected-layer.h" + +int main(int argc, char** argv) { + + //Model model = Model::create("fully-connected-layer"); + + // Process parameters + unsigned int in_size; + unsigned int out_size; + if(argc == 4) { + in_size = atoi(argv[1]); + out_size = atoi(argv[2]); + } + + std:: string str=std::string("fully") + argv[3] + std::string("-connected-layer"); + Model model = Model::create(str); + + // Input + auto in = InputVector::create(model, "in", in_size); + + // Output + auto out = OutputVector::create(model, "out", out_size); + + // Layer + out = fully_connected_layer(model, "", in_size, out_size, in); + + // Compile + model.compile(); + + // Bind data + ModelInstance modelInstance = ModelInstance::create(model); + float* weights = new float[in_size*out_size]; + fully_connected_layer_bind(modelInstance, "", weights); + modelInstance.generateData(); + + // Destroy model + model.destroy(); + delete[] weights; + + return 0; + +} + From b878af05c5b0eeb27343176ac5264262bac49d26 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Sat, 18 Apr 2020 18:15:55 -0400 Subject: [PATCH 03/15] model file for FC layer added --- test/mlp_l4_mnist/fully-connected-layer.h | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 test/mlp_l4_mnist/fully-connected-layer.h diff --git a/test/mlp_l4_mnist/fully-connected-layer.h b/test/mlp_l4_mnist/fully-connected-layer.h new file mode 100644 index 00000000..b4679cbd --- /dev/null +++ b/test/mlp_l4_mnist/fully-connected-layer.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 IMPACT Research Group, University of Illinois. + * All rights reserved. + * + * This file is covered by the LICENSE.txt license file in the root directory. + * + */ + +#ifndef _PUMA_TEST_FULLY_CONNECTED_LAYER_ +#define _PUMA_TEST_FULLY_CONNECTED_LAYER_ + +#include "puma.h" + +static Vector fully_connected_layer(Model model, std::string layerName, unsigned int in_size, unsigned int out_size, Vector in) { + + ConstantMatrix mat = ConstantMatrix::create(model, layerName + "mat", in_size, out_size); + + return sig(mat*in); + +} + +static void fully_connected_layer_bind(ModelInstance modelInstance, std::string layerName, float* weights) { + modelInstance.bind(layerName + "mat", weights); +} + +#endif + From b526770dbd0aa5f76924607166abe3cce61ac4b9 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Tue, 12 May 2020 14:32:41 -0400 Subject: [PATCH 04/15] cnn script updated --- test/utils/run-cnn-benchmark.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/utils/run-cnn-benchmark.sh b/test/utils/run-cnn-benchmark.sh index 5168c1e4..bfb755da 100755 --- a/test/utils/run-cnn-benchmark.sh +++ b/test/utils/run-cnn-benchmark.sh @@ -5,13 +5,13 @@ echo $path cppfile=conv-layer #name for cpp file that you want to compile ex- mlp_l4_mnist.cpp, conv-layer.cpp, convmax-layer.cpp name=conv #name for the folder generated by compiler pumaenv=pumaenv #name for the environment -fileno=0 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers) +fileno=31 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers) name=$name$fileno #layer parameters inx=9 iny=9 -inC=64 -outC=64 +inC=16 +outC=16 kx=3 ky=3 p=1 From 960c2680550b661d85c3550ee0fd0d1ee181ff9c Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Tue, 12 May 2020 14:36:18 -0400 Subject: [PATCH 05/15] ADC dictionary updated --- include/constants.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/constants.py b/include/constants.py index 85889215..c99d688d 100644 --- a/include/constants.py +++ b/include/constants.py @@ -148,25 +148,41 @@ adc_lat_dict = {'1' : 12.5, '2' : 25, '4' : 50, + '5' : 62.5, + '6' : 75, + '7' : 87.5, '8' : 100, + '9' : 112.5, '16': 200} adc_pow_dyn_dict = {'1' : 0.225, '2' : 0.45, '4' : 0.9, + '5' : 1.125, + '6' : 1.35, + '7' : 1.575, '8' : 1.8, + '9' : 2.025, '16': 3.6} adc_pow_leak_dict = {'1' : 0.025, '2' : 0.05, '4' : 0.1, + '5' : 0.125, + '6' : 0.150, + '7' : 0.175, '8' : 0.2, + '9' : 0.225, '16': 0.4} adc_area_dict = {'1' : 0.0012, '2' : 0.0012, '4' : 0.0012, + '5' : 0.0012, + '6' : 0.0012, + '7' : 0.0012, '8' : 0.0012, + '9' : 0.0012, '16': 0.0012} # SNH (MVM pipeline) From 0f0975c5293b326ded0340bdcf1650dae7ec23e9 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Fri, 15 May 2020 16:49:47 -0400 Subject: [PATCH 06/15] input and weight precision variable to config --- include/example-configs/config-cnn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py index af9f7668..90085929 100644 --- a/include/example-configs/config-cnn.py +++ b/include/example-configs/config-cnn.py @@ -30,7 +30,9 @@ data_width = num_bits # (in bits) xbdata_width = data_width # (in bits) instrn_width = 48 # (in bits) - +# Input and Weight parameters +input_prec = 16 +weight_width = 16 # Change here - Specify the IMA parameters here xbar_bits = 2 num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training 8-fw, 8-bw and 16-delta physical xbars. From d73d2737894572e73875d24e6f04798658fd4ad7 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Fri, 15 May 2020 16:49:57 -0400 Subject: [PATCH 07/15] input and weight precision variable to config --- include/example-configs/config-mlp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/example-configs/config-mlp.py b/include/example-configs/config-mlp.py index 8cd88e99..ee3647fa 100644 --- a/include/example-configs/config-mlp.py +++ b/include/example-configs/config-mlp.py @@ -30,7 +30,9 @@ data_width = num_bits # (in bits) xbdata_width = data_width # (in bits) instrn_width = 48 # (in bits) - +# Input and Weight parameters +input_prec = 16 +weight_width = 16 # Change here - Specify the IMA parameters here xbar_bits = 2 num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training 8-fw, 8-bw and 16-delta physical xbars. From ac07681d161bca4776de0a3fe289ed1c10cb0ea4 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Fri, 15 May 2020 16:50:28 -0400 Subject: [PATCH 08/15] input and weight precision feature added --- src/ima.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/ima.py b/src/ima.py index ddf31af1..b9830f71 100644 --- a/src/ima.py +++ b/src/ima.py @@ -598,7 +598,7 @@ def inner_product (mat_id, key): self.xb_outMem_list[mat_id][key].reset () ## Loop to cover all bits of inputs - for k in xrange (cfg.xbdata_width / cfg.dac_res): + for k in xrange (cfg.input_prec / cfg.dac_res): #for k in xrange (1): # read the values from the xbar's input register out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res) @@ -612,7 +612,7 @@ def inner_product (mat_id, key): out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through # Do for (data_width/xbar_bits) xbars - num_xb = cfg.data_width / cfg.xbar_bits + num_xb = cfg.weight_width / cfg.xbar_bits out_xbar = [[] for x in range(num_xb)] out_snh = [[] for x in range(num_xb)] for m in range (num_xb): @@ -793,7 +793,7 @@ def xbComputeLatency (self, mask): print("adccccc.adc_res", adccccc.adc_res) print("---") ''' - latency_ip = lat_temp * ((cfg.xbdata_width / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0)) + latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0)) #changed xbdata_width to input_prec ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --) num_phase = 4 lat_temp = self.matrix_list[0]['f'][0].getOpLatency() @@ -862,8 +862,9 @@ def xbComputeLatency (self, mask): # (EDRAM + Controller always latency >= 2) - Follow this else deisgn breaks if (ex_op == 'st' and self.stage_latency[sId] == 0): # read the data from dataMem or xb_outMem depending on address - st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register - ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified + # changed data_width to weight_width + st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register + ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified if (st_data_addr >= cfg.num_xbar * cfg.xbar_size): for num in range (self.de_r2): # modified ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified @@ -891,13 +892,13 @@ def xbComputeLatency (self, mask): # Check whether datamem access for st has finished elif (self.de_opcode == 'st' and self.stage_cycle[sId] == self.stage_latency[sId]): # read the data from dataMem or xb_outMem depending on address - st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register - ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified + st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register + ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified if (st_data_addr >= datamem_off): - for num in range (cfg.edram_buswidth / cfg.data_width): # modified + for num in range (cfg.edram_buswidth / cfg.weight_width): # modified ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified else: - for num in range (cfg.edram_buswidth / cfg.data_width): # modified + for num in range (cfg.edram_buswidth / cfg.weight_width): # modified ex_val1[num] = readFromXbarMem (self, st_data_addr+num) # combine counter and data ramstore = [str(self.de_val1), ex_val1[:]] # modified - 1st item in list: counter value, 2nd item: list of values to be written to edram From 26cbf64f7742b350b9390f32130073d6bc87dd87 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Wed, 27 May 2020 18:03:23 -0400 Subject: [PATCH 09/15] input precision changed for energy for loop --- src/ima.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ima.py b/src/ima.py index b9830f71..f261644b 100644 --- a/src/ima.py +++ b/src/ima.py @@ -793,7 +793,7 @@ def xbComputeLatency (self, mask): print("adccccc.adc_res", adccccc.adc_res) print("---") ''' - latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0)) #changed xbdata_width to input_prec + latency_ip = lat_temp * ((cfg.xbdata_width / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0)) #changed xbdata_width to input_prec ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --) num_phase = 4 lat_temp = self.matrix_list[0]['f'][0].getOpLatency() @@ -862,9 +862,8 @@ def xbComputeLatency (self, mask): # (EDRAM + Controller always latency >= 2) - Follow this else deisgn breaks if (ex_op == 'st' and self.stage_latency[sId] == 0): # read the data from dataMem or xb_outMem depending on address - # changed data_width to weight_width - st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register - ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified + st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register + ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified if (st_data_addr >= cfg.num_xbar * cfg.xbar_size): for num in range (self.de_r2): # modified ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified @@ -892,13 +891,13 @@ def xbComputeLatency (self, mask): # Check whether datamem access for st has finished elif (self.de_opcode == 'st' and self.stage_cycle[sId] == self.stage_latency[sId]): # read the data from dataMem or xb_outMem depending on address - st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register - ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified + st_data_addr = self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register + ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified if (st_data_addr >= datamem_off): - for num in range (cfg.edram_buswidth / cfg.weight_width): # modified + for num in range (cfg.edram_buswidth / cfg.data_width): # modified ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified else: - for num in range (cfg.edram_buswidth / cfg.weight_width): # modified + for num in range (cfg.edram_buswidth / cfg.data_width): # modified ex_val1[num] = readFromXbarMem (self, st_data_addr+num) # combine counter and data ramstore = [str(self.de_val1), ex_val1[:]] # modified - 1st item in list: counter value, 2nd item: list of values to be written to edram From a7365c3bdced38ddc0b432c545be0170d294e2c1 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Wed, 27 May 2020 18:45:31 -0400 Subject: [PATCH 10/15] effect of quantization on latency term added --- src/ima.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ima.py b/src/ima.py index f261644b..1b110e72 100644 --- a/src/ima.py +++ b/src/ima.py @@ -793,7 +793,8 @@ def xbComputeLatency (self, mask): print("adccccc.adc_res", adccccc.adc_res) print("---") ''' - latency_ip = lat_temp * ((cfg.xbdata_width / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0)) #changed xbdata_width to input_prec + latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))*(math.ceil(cfg.weight_width/cfg.xbar_bits) / \ + math.ceil(cfg.data_width/cfg.xbar_bits)) # last term to account for the effect of quantization on latency ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --) num_phase = 4 lat_temp = self.matrix_list[0]['f'][0].getOpLatency() From 06767d4bd9d652bec775e76439e29b81ee7e7b18 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Wed, 27 May 2020 18:46:38 -0400 Subject: [PATCH 11/15] input and weight precision term added --- include/example-configs/config-cnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py index 90085929..d60c657e 100644 --- a/include/example-configs/config-cnn.py +++ b/include/example-configs/config-cnn.py @@ -28,7 +28,7 @@ # Fixed parameters addr_width = 22 # Added to address larger address space for conv layers (#TODO: Compiler needs to fix shared memory reuse) data_width = num_bits # (in bits) -xbdata_width = data_width # (in bits) +xbdata_width = data_width # (in bits), equivalent to input_prec instrn_width = 48 # (in bits) # Input and Weight parameters input_prec = 16 From afaf088a7249254740ec200d923f67a96486c39f Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Wed, 27 May 2020 18:51:32 -0400 Subject: [PATCH 12/15] ceil function for # of xbs --- src/ima.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ima.py b/src/ima.py index 1b110e72..42514185 100644 --- a/src/ima.py +++ b/src/ima.py @@ -598,7 +598,7 @@ def inner_product (mat_id, key): self.xb_outMem_list[mat_id][key].reset () ## Loop to cover all bits of inputs - for k in xrange (cfg.input_prec / cfg.dac_res): + for k in xrange (cfg.input_prec / cfg.dac_res): #quantization affects the # of streams #for k in xrange (1): # read the values from the xbar's input register out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res) @@ -612,7 +612,7 @@ def inner_product (mat_id, key): out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through # Do for (data_width/xbar_bits) xbars - num_xb = cfg.weight_width / cfg.xbar_bits + num_xb = math.ceil(cfg.weight_width / cfg.xbar_bits) # # of XBs change with quantization out_xbar = [[] for x in range(num_xb)] out_snh = [[] for x in range(num_xb)] for m in range (num_xb): From 3754659aaf71820a1cb1ee8857d5ea26b9fe58fe Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Wed, 27 May 2020 19:40:20 -0400 Subject: [PATCH 13/15] ceil and int added for division --- src/ima.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ima.py b/src/ima.py index 42514185..1897b670 100644 --- a/src/ima.py +++ b/src/ima.py @@ -598,7 +598,7 @@ def inner_product (mat_id, key): self.xb_outMem_list[mat_id][key].reset () ## Loop to cover all bits of inputs - for k in xrange (cfg.input_prec / cfg.dac_res): #quantization affects the # of streams + for k in xrange (int(math.ceil(cfg.input_prec / cfg.dac_res))): #quantization affects the # of streams #for k in xrange (1): # read the values from the xbar's input register out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res) @@ -612,7 +612,7 @@ def inner_product (mat_id, key): out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through # Do for (data_width/xbar_bits) xbars - num_xb = math.ceil(cfg.weight_width / cfg.xbar_bits) # # of XBs change with quantization + num_xb = int(math.ceil(cfg.weight_width / cfg.xbar_bits)) # # of XBs change with quantization out_xbar = [[] for x in range(num_xb)] out_snh = [[] for x in range(num_xb)] for m in range (num_xb): From 13e8cc1e8b1a13917b68f8c19f46dd664f59117d Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Wed, 27 May 2020 19:44:03 -0400 Subject: [PATCH 14/15] config reverted to original --- test/utils/run-cnn-benchmark.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/utils/run-cnn-benchmark.sh b/test/utils/run-cnn-benchmark.sh index bfb755da..5168c1e4 100755 --- a/test/utils/run-cnn-benchmark.sh +++ b/test/utils/run-cnn-benchmark.sh @@ -5,13 +5,13 @@ echo $path cppfile=conv-layer #name for cpp file that you want to compile ex- mlp_l4_mnist.cpp, conv-layer.cpp, convmax-layer.cpp name=conv #name for the folder generated by compiler pumaenv=pumaenv #name for the environment -fileno=31 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers) +fileno=0 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers) name=$name$fileno #layer parameters inx=9 iny=9 -inC=16 -outC=16 +inC=64 +outC=64 kx=3 ky=3 p=1 From 5f013fe1eca10928b1cb800d6608155015e419e8 Mon Sep 17 00:00:00 2001 From: Shubham Negi Date: Mon, 1 Jun 2020 19:13:56 -0400 Subject: [PATCH 15/15] updated for corner case --- src/ima.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ima.py b/src/ima.py index 1897b670..ea257a1f 100644 --- a/src/ima.py +++ b/src/ima.py @@ -612,7 +612,7 @@ def inner_product (mat_id, key): out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through # Do for (data_width/xbar_bits) xbars - num_xb = int(math.ceil(cfg.weight_width / cfg.xbar_bits)) # # of XBs change with quantization + num_xb = int(math.ceil(float(cfg.weight_width) / cfg.xbar_bits)) # # of XBs change with quantization out_xbar = [[] for x in range(num_xb)] out_snh = [[] for x in range(num_xb)] for m in range (num_xb): @@ -793,8 +793,8 @@ def xbComputeLatency (self, mask): print("adccccc.adc_res", adccccc.adc_res) print("---") ''' - latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))*(math.ceil(cfg.weight_width/cfg.xbar_bits) / \ - math.ceil(cfg.data_width/cfg.xbar_bits)) # last term to account for the effect of quantization on latency + latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))*(math.ceil(float(cfg.weight_width)/ \ + cfg.xbar_bits) /math.ceil(float(cfg.data_width)/cfg.xbar_bits)) # last term to account for the effect of quantization on latency ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --) num_phase = 4 lat_temp = self.matrix_list[0]['f'][0].getOpLatency()