From cfc642f1df0bb274e1cdfbc3130621b4d4e4de2b Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Sat, 18 Apr 2020 18:14:48 -0400
Subject: [PATCH 01/15] script to run mlp layer with variable input added

---
 test/utils/run-mlp-layer.sh | 46 +++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100755 test/utils/run-mlp-layer.sh
diff --git a/test/utils/run-mlp-layer.sh b/test/utils/run-mlp-layer.sh
new file mode 100755
index 00000000..22618113
--- /dev/null
+++ b/test/utils/run-mlp-layer.sh
@@ -0,0 +1,46 @@
+set -v
+set -e
+path=`pwd` #path to your puma directory
+echo $path
+cppfile=fully-connected-layer #name for cpp file that you want to compile ex- mlp_l4_mnist.cpp, conv-layer.cpp, convmax-layer.cpp
+name=fully #name for the folder generated by compiler
+pumaenv=pumaenv #name for the environment 
+fileno=0
+name=$name$fileno
+
+#layer parameter
+in=64
+out=10
+
+
+#copying mlp config file
+rm ${path}/puma-simulator/include/config.py #remove existing config file
+cp ${path}/puma-simulator/include/example-configs/config-mlp.py  ${path}/puma-simulator/include/config.py #copy the mlp config file to include
+#copying model file
+rm ${path}/puma-compiler/test/${cppfile}.cpp ${path}/puma-compiler/test/${cppfile}.h   
+cp ${path}/puma-simulator/test/mlp_l4_mnist/${cppfile}.cpp  ${path}/puma-compiler/test/${cppfile}.cpp #copy the mlp config file to include 
+cp ${path}/puma-simulator/test/mlp_l4_mnist/${cppfile}.h  ${path}/puma-compiler/test/${cppfile}.h #copy the mlp config file to include 
+
+cd ${path}/puma-compiler/src
+source ~/.bashrc
+conda activate ${pumaenv}
+
+make clean
+make
+
+cd ${path}/puma-compiler/test
+make clean
+make ${cppfile}.test
+export LD_LIBRARY_PATH=`pwd`/../src:$LD_LIBRARY_PATH
+./${cppfile}.test ${in} ${out} ${fileno}
+echo $cppfile  
+./generate-py.sh 
+cp -r ${name} ../../puma-simulator/test/testasm
+
+cd ${path}/puma-simulator/src
+
+
+python dpe.py -n ${name} 
+
+
+

From 501d7826f9ad4a54ec5aaf6f57ee168db164fa77 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Sat, 18 Apr 2020 18:15:22 -0400
Subject: [PATCH 02/15] model file for fully connected layer added

---
 test/mlp_l4_mnist/fully-connected-layer.cpp | 57 +++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 test/mlp_l4_mnist/fully-connected-layer.cpp

diff --git a/test/mlp_l4_mnist/fully-connected-layer.cpp b/test/mlp_l4_mnist/fully-connected-layer.cpp
new file mode 100644
index 00000000..4590f883
--- /dev/null
+++ b/test/mlp_l4_mnist/fully-connected-layer.cpp
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2019 IMPACT Research Group, University of Illinois.
+ *  All rights reserved.
+ *
+ *  This file is covered by the LICENSE.txt license file in the root directory.
+ *
+ */
+
+#include <assert.h>
+#include <string>
+#include <vector>
+#include <iostream>
+ 
+#include "puma.h"
+#include "fully-connected-layer.h"
+
+int main(int argc, char** argv) {
+
+    //Model model = Model::create("fully-connected-layer");
+
+    // Process parameters
+    unsigned int in_size;
+    unsigned int out_size;
+    if(argc == 4) {
+        in_size = atoi(argv[1]);
+        out_size = atoi(argv[2]);
+	}
+	
+    std:: string str=std::string("fully") + argv[3] + std::string("-connected-layer");
+    Model model = Model::create(str);
+
+    // Input
+    auto in = InputVector::create(model, "in", in_size);
+
+    // Output
+    auto out = OutputVector::create(model, "out", out_size);
+
+    // Layer
+    out = fully_connected_layer(model, "", in_size, out_size, in);
+
+    // Compile
+    model.compile();
+
+    // Bind data
+    ModelInstance modelInstance = ModelInstance::create(model);
+    float* weights = new float[in_size*out_size];
+    fully_connected_layer_bind(modelInstance, "", weights);
+    modelInstance.generateData();
+
+    // Destroy model
+    model.destroy();
+    delete[] weights;
+
+    return 0;
+
+}
+

From b878af05c5b0eeb27343176ac5264262bac49d26 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Sat, 18 Apr 2020 18:15:55 -0400
Subject: [PATCH 03/15] model file for FC layer added

---
 test/mlp_l4_mnist/fully-connected-layer.h | 27 +++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 test/mlp_l4_mnist/fully-connected-layer.h

diff --git a/test/mlp_l4_mnist/fully-connected-layer.h b/test/mlp_l4_mnist/fully-connected-layer.h
new file mode 100644
index 00000000..b4679cbd
--- /dev/null
+++ b/test/mlp_l4_mnist/fully-connected-layer.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2019 IMPACT Research Group, University of Illinois.
+ *  All rights reserved.
+ *
+ *  This file is covered by the LICENSE.txt license file in the root directory.
+ *
+ */
+
+#ifndef _PUMA_TEST_FULLY_CONNECTED_LAYER_
+#define _PUMA_TEST_FULLY_CONNECTED_LAYER_
+
+#include "puma.h"
+
+static Vector fully_connected_layer(Model model, std::string layerName, unsigned int in_size, unsigned int out_size, Vector in) {
+
+    ConstantMatrix mat = ConstantMatrix::create(model, layerName + "mat", in_size, out_size);
+
+    return sig(mat*in);
+
+}
+
+static void fully_connected_layer_bind(ModelInstance modelInstance, std::string layerName, float* weights) {
+    modelInstance.bind(layerName + "mat", weights);
+}
+
+#endif
+

From b526770dbd0aa5f76924607166abe3cce61ac4b9 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Tue, 12 May 2020 14:32:41 -0400
Subject: [PATCH 04/15] cnn script updated

---
 test/utils/run-cnn-benchmark.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/utils/run-cnn-benchmark.sh b/test/utils/run-cnn-benchmark.sh
index 5168c1e4..bfb755da 100755
--- a/test/utils/run-cnn-benchmark.sh
+++ b/test/utils/run-cnn-benchmark.sh
@@ -5,13 +5,13 @@ echo $path
 cppfile=conv-layer #name for cpp file that you want to compile ex- mlp_l4_mnist.cpp, conv-layer.cpp, convmax-layer.cpp
 name=conv #name for the folder generated by compiler
 pumaenv=pumaenv #name for the environment 
-fileno=0 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers)
+fileno=31 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers)
 name=$name$fileno
 #layer parameters
 inx=9
 iny=9
-inC=64
-outC=64
+inC=16
+outC=16
 kx=3
 ky=3
 p=1

From 960c2680550b661d85c3550ee0fd0d1ee181ff9c Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Tue, 12 May 2020 14:36:18 -0400
Subject: [PATCH 05/15] ADC dictionary updated

---
 include/constants.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/include/constants.py b/include/constants.py
index 85889215..c99d688d 100644
--- a/include/constants.py
+++ b/include/constants.py
@@ -148,25 +148,41 @@
 adc_lat_dict = {'1' : 12.5,
                 '2' : 25,
                 '4' : 50,
+		'5' : 62.5,			
+		'6' : 75,
+		'7' : 87.5,
                 '8' : 100,
+		'9' : 112.5,
                 '16': 200}
 
 adc_pow_dyn_dict = {'1' : 0.225,
                     '2' : 0.45,
                     '4' : 0.9,
+		    '5' : 1.125,			
+		    '6' : 1.35,
+		    '7' : 1.575,
                     '8' : 1.8,
+		    '9' : 2.025,
                     '16': 3.6}
 
 adc_pow_leak_dict = {'1' : 0.025,
                      '2' : 0.05,
                      '4' : 0.1,
+		     '5' : 0.125,			
+		     '6' : 0.150,
+		     '7' : 0.175,
                      '8' : 0.2,
+		     '9' : 0.225,
                      '16': 0.4}
 
 adc_area_dict = {'1' : 0.0012,
                  '2' : 0.0012,
                  '4' : 0.0012,
+		 '5' : 0.0012,			
+		 '6' : 0.0012,
+		 '7' : 0.0012,
                  '8' : 0.0012,
+		 '9' : 0.0012,
                  '16': 0.0012}
 
 # SNH (MVM pipeline)

From 0f0975c5293b326ded0340bdcf1650dae7ec23e9 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Fri, 15 May 2020 16:49:47 -0400
Subject: [PATCH 06/15] input and weight precision variable to config

---
 include/example-configs/config-cnn.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py
index af9f7668..90085929 100644
--- a/include/example-configs/config-cnn.py
+++ b/include/example-configs/config-cnn.py
@@ -30,7 +30,9 @@
 data_width = num_bits # (in bits)
 xbdata_width = data_width # (in bits)
 instrn_width = 48 # (in bits)
-
+# Input and Weight parameters
+input_prec = 16
+weight_width = 16
 # Change here - Specify the IMA parameters here
 xbar_bits = 2
 num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training  8-fw, 8-bw and 16-delta physical xbars.

From d73d2737894572e73875d24e6f04798658fd4ad7 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Fri, 15 May 2020 16:49:57 -0400
Subject: [PATCH 07/15] input and weight precision variable to config

---
 include/example-configs/config-mlp.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/example-configs/config-mlp.py b/include/example-configs/config-mlp.py
index 8cd88e99..ee3647fa 100644
--- a/include/example-configs/config-mlp.py
+++ b/include/example-configs/config-mlp.py
@@ -30,7 +30,9 @@
 data_width = num_bits # (in bits)
 xbdata_width = data_width # (in bits)
 instrn_width = 48 # (in bits)
-
+# Input and Weight parameters
+input_prec = 16
+weight_width = 16
 # Change here - Specify the IMA parameters here
 xbar_bits = 2
 num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training  8-fw, 8-bw and 16-delta physical xbars.

From ac07681d161bca4776de0a3fe289ed1c10cb0ea4 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Fri, 15 May 2020 16:50:28 -0400
Subject: [PATCH 08/15] input and weight precision feature added

---
 src/ima.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/ima.py b/src/ima.py
index ddf31af1..b9830f71 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -598,7 +598,7 @@ def inner_product (mat_id, key):
                     self.xb_outMem_list[mat_id][key].reset ()
 
                     ## Loop to cover all bits of inputs
-                    for k in xrange (cfg.xbdata_width / cfg.dac_res):
+                    for k in xrange (cfg.input_prec / cfg.dac_res):
                     #for k in xrange (1):
                         # read the values from the xbar's input register
                         out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res)
@@ -612,7 +612,7 @@ def inner_product (mat_id, key):
                         out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through
 
                         # Do for (data_width/xbar_bits) xbars
-                        num_xb = cfg.data_width / cfg.xbar_bits
+                        num_xb = cfg.weight_width / cfg.xbar_bits
                         out_xbar = [[] for x in range(num_xb)]
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):
@@ -793,7 +793,7 @@ def xbComputeLatency (self, mask):
                     print("adccccc.adc_res", adccccc.adc_res)
                 print("---")
                 '''
-                latency_ip = lat_temp * ((cfg.xbdata_width / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))
+                latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))  #changed xbdata_width to input_prec
                 ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --)
                 num_phase = 4
                 lat_temp = self.matrix_list[0]['f'][0].getOpLatency()
@@ -862,8 +862,9 @@ def xbComputeLatency (self, mask):
                 # (EDRAM + Controller always latency >= 2) - Follow this else deisgn breaks
                 if (ex_op == 'st' and self.stage_latency[sId] == 0):
                     # read the data from dataMem or xb_outMem depending on address
-                    st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register
-                    ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified
+		    # changed data_width to weight_width
+                    st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register
+                    ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified
                     if (st_data_addr >= cfg.num_xbar * cfg.xbar_size):
                         for num in range (self.de_r2): # modified
                             ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified
@@ -891,13 +892,13 @@ def xbComputeLatency (self, mask):
             # Check whether datamem access for st has finished
             elif (self.de_opcode == 'st' and self.stage_cycle[sId] == self.stage_latency[sId]):
                 # read the data from dataMem or xb_outMem depending on address
-                st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register
-                ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified
+                st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register
+                ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified
                 if (st_data_addr >= datamem_off):
-                    for num in range (cfg.edram_buswidth / cfg.data_width): # modified
+                    for num in range (cfg.edram_buswidth / cfg.weight_width): # modified
                         ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified
                 else:
-                    for num in range (cfg.edram_buswidth / cfg.data_width): # modified
+                    for num in range (cfg.edram_buswidth / cfg.weight_width): # modified
                         ex_val1[num] = readFromXbarMem (self, st_data_addr+num)
                 # combine counter and data
                 ramstore = [str(self.de_val1), ex_val1[:]] # modified - 1st item in list: counter value, 2nd item: list of values to be written to edram

From 26cbf64f7742b350b9390f32130073d6bc87dd87 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Wed, 27 May 2020 18:03:23 -0400
Subject: [PATCH 09/15] input precision changed for energy for loop

---
 src/ima.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/ima.py b/src/ima.py
index b9830f71..f261644b 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -793,7 +793,7 @@ def xbComputeLatency (self, mask):
                     print("adccccc.adc_res", adccccc.adc_res)
                 print("---")
                 '''
-                latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))  #changed xbdata_width to input_prec
+                latency_ip = lat_temp * ((cfg.xbdata_width / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))  #changed xbdata_width to input_prec
                 ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --)
                 num_phase = 4
                 lat_temp = self.matrix_list[0]['f'][0].getOpLatency()
@@ -862,9 +862,8 @@ def xbComputeLatency (self, mask):
                 # (EDRAM + Controller always latency >= 2) - Follow this else deisgn breaks
                 if (ex_op == 'st' and self.stage_latency[sId] == 0):
                     # read the data from dataMem or xb_outMem depending on address
-		    # changed data_width to weight_width
-                    st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register
-                    ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified
+                    st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register
+                    ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified
                     if (st_data_addr >= cfg.num_xbar * cfg.xbar_size):
                         for num in range (self.de_r2): # modified
                             ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified
@@ -892,13 +891,13 @@ def xbComputeLatency (self, mask):
             # Check whether datamem access for st has finished
             elif (self.de_opcode == 'st' and self.stage_cycle[sId] == self.stage_latency[sId]):
                 # read the data from dataMem or xb_outMem depending on address
-                st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.weight_width) # address of data in register
-                ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.weight_width)] # modified
+                st_data_addr =  self.de_r1 + self.ex_vec_count * (cfg.edram_buswidth/cfg.data_width) # address of data in register
+                ex_val1 = ['' for num in range (cfg.edram_buswidth/cfg.data_width)] # modified
                 if (st_data_addr >= datamem_off):
-                    for num in range (cfg.edram_buswidth / cfg.weight_width): # modified
+                    for num in range (cfg.edram_buswidth / cfg.data_width): # modified
                         ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified
                 else:
-                    for num in range (cfg.edram_buswidth / cfg.weight_width): # modified
+                    for num in range (cfg.edram_buswidth / cfg.data_width): # modified
                         ex_val1[num] = readFromXbarMem (self, st_data_addr+num)
                 # combine counter and data
                 ramstore = [str(self.de_val1), ex_val1[:]] # modified - 1st item in list: counter value, 2nd item: list of values to be written to edram

From a7365c3bdced38ddc0b432c545be0170d294e2c1 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Wed, 27 May 2020 18:45:31 -0400
Subject: [PATCH 10/15] effect of quantization on latency term added

---
 src/ima.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ima.py b/src/ima.py
index f261644b..1b110e72 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -793,7 +793,8 @@ def xbComputeLatency (self, mask):
                     print("adccccc.adc_res", adccccc.adc_res)
                 print("---")
                 '''
-                latency_ip = lat_temp * ((cfg.xbdata_width / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))  #changed xbdata_width to input_prec
+                latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))*(math.ceil(cfg.weight_width/cfg.xbar_bits) / \
+				math.ceil(cfg.data_width/cfg.xbar_bits)) # last term to account for the effect of quantization on latency 
                 ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --)
                 num_phase = 4
                 lat_temp = self.matrix_list[0]['f'][0].getOpLatency()

From 06767d4bd9d652bec775e76439e29b81ee7e7b18 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Wed, 27 May 2020 18:46:38 -0400
Subject: [PATCH 11/15] input and weight precision term added

---
 include/example-configs/config-cnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py
index 90085929..d60c657e 100644
--- a/include/example-configs/config-cnn.py
+++ b/include/example-configs/config-cnn.py
@@ -28,7 +28,7 @@
 # Fixed parameters
 addr_width = 22 # Added to address larger address space for conv layers (#TODO: Compiler needs to fix shared memory reuse)
 data_width = num_bits # (in bits)
-xbdata_width = data_width # (in bits)
+xbdata_width = data_width # (in bits), equivalent to input_prec
 instrn_width = 48 # (in bits)
 # Input and Weight parameters
 input_prec = 16

From afaf088a7249254740ec200d923f67a96486c39f Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Wed, 27 May 2020 18:51:32 -0400
Subject: [PATCH 12/15] ceil function for # of xbs

---
 src/ima.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ima.py b/src/ima.py
index 1b110e72..42514185 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -598,7 +598,7 @@ def inner_product (mat_id, key):
                     self.xb_outMem_list[mat_id][key].reset ()
 
                     ## Loop to cover all bits of inputs
-                    for k in xrange (cfg.input_prec / cfg.dac_res):
+                    for k in xrange (cfg.input_prec / cfg.dac_res): #quantization affects the # of streams
                     #for k in xrange (1):
                         # read the values from the xbar's input register
                         out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res)
@@ -612,7 +612,7 @@ def inner_product (mat_id, key):
                         out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through
 
                         # Do for (data_width/xbar_bits) xbars
-                        num_xb = cfg.weight_width / cfg.xbar_bits
+                        num_xb = math.ceil(cfg.weight_width / cfg.xbar_bits)  # # of XBs change with quantization
                         out_xbar = [[] for x in range(num_xb)]
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):

From 3754659aaf71820a1cb1ee8857d5ea26b9fe58fe Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Wed, 27 May 2020 19:40:20 -0400
Subject: [PATCH 13/15] ceil and int added for division

---
 src/ima.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ima.py b/src/ima.py
index 42514185..1897b670 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -598,7 +598,7 @@ def inner_product (mat_id, key):
                     self.xb_outMem_list[mat_id][key].reset ()
 
                     ## Loop to cover all bits of inputs
-                    for k in xrange (cfg.input_prec / cfg.dac_res): #quantization affects the # of streams
+                    for k in xrange (int(math.ceil(cfg.input_prec / cfg.dac_res))): #quantization affects the # of streams
                     #for k in xrange (1):
                         # read the values from the xbar's input register
                         out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res)
@@ -612,7 +612,7 @@ def inner_product (mat_id, key):
                         out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through
 
                         # Do for (data_width/xbar_bits) xbars
-                        num_xb = math.ceil(cfg.weight_width / cfg.xbar_bits)  # # of XBs change with quantization
+                        num_xb = int(math.ceil(cfg.weight_width / cfg.xbar_bits))  # # of XBs change with quantization
                         out_xbar = [[] for x in range(num_xb)]
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):

From 13e8cc1e8b1a13917b68f8c19f46dd664f59117d Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Wed, 27 May 2020 19:44:03 -0400
Subject: [PATCH 14/15] config reverted to original

---
 test/utils/run-cnn-benchmark.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/utils/run-cnn-benchmark.sh b/test/utils/run-cnn-benchmark.sh
index bfb755da..5168c1e4 100755
--- a/test/utils/run-cnn-benchmark.sh
+++ b/test/utils/run-cnn-benchmark.sh
@@ -5,13 +5,13 @@ echo $path
 cppfile=conv-layer #name for cpp file that you want to compile ex- mlp_l4_mnist.cpp, conv-layer.cpp, convmax-layer.cpp
 name=conv #name for the folder generated by compiler
 pumaenv=pumaenv #name for the environment 
-fileno=31 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers)
+fileno=0 #variable so that conv folder generated by compilers do not overlap (u might want to change this variable to different int values for different layers)
 name=$name$fileno
 #layer parameters
 inx=9
 iny=9
-inC=16
-outC=16
+inC=64
+outC=64
 kx=3
 ky=3
 p=1

From 5f013fe1eca10928b1cb800d6608155015e419e8 Mon Sep 17 00:00:00 2001
From: Shubham Negi <snegi@ecbric-ws-lnx08.ecn.purdue.edu>
Date: Mon, 1 Jun 2020 19:13:56 -0400
Subject: [PATCH 15/15] updated for corner case

---
 src/ima.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ima.py b/src/ima.py
index 1897b670..ea257a1f 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -612,7 +612,7 @@ def inner_product (mat_id, key):
                         out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through
 
                         # Do for (data_width/xbar_bits) xbars
-                        num_xb = int(math.ceil(cfg.weight_width / cfg.xbar_bits))  # # of XBs change with quantization
+                        num_xb = int(math.ceil(float(cfg.weight_width) / cfg.xbar_bits))  # # of XBs change with quantization
                         out_xbar = [[] for x in range(num_xb)]
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):
@@ -793,8 +793,8 @@ def xbComputeLatency (self, mask):
                     print("adccccc.adc_res", adccccc.adc_res)
                 print("---")
                 '''
-                latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))*(math.ceil(cfg.weight_width/cfg.xbar_bits) / \
-				math.ceil(cfg.data_width/cfg.xbar_bits)) # last term to account for the effect of quantization on latency 
+                latency_ip = lat_temp * ((cfg.input_prec / cfg.dac_res) + num_stage - 1) * float(int(fb_found>0))*(math.ceil(float(cfg.weight_width)/ \
+                cfg.xbar_bits) /math.ceil(float(cfg.data_width)/cfg.xbar_bits)) # last term to account for the effect of quantization on latency
                 ## MVM outer product occurs in 4 cycles to take care of all i/o polarities (++, +-, -+, --)
                 num_phase = 4
                 lat_temp = self.matrix_list[0]['f'][0].getOpLatency()