diff --git a/how_to_run.md b/how_to_run.md
index bf1dc791..53491caa 100644
--- a/how_to_run.md
+++ b/how_to_run.md
@@ -99,6 +99,11 @@ num_tile_compute = 23 # number of tiles mapped by dnn (leaving input and output
 # Do not change this - total number of tiles
 num_tile = num_node * num_tile_compute + 2 # +1 for first tile (I/O tile) - dummy, others - compute -- (Line 95)
 ```
+#### 6.2 - Setup constants file:
+
+Use the appropriate constants file from ```puma-simulator/include/example-constants/(constant file name)```
+For example: for 128x128 crossbar use ```constants-128.py```
+Copy the file to ```puma-simulator/include/``` and rename it to ```constants.py```
 
 ### 7. Run your model, in this example, the ```lstm-layer.cpp```:
 
@@ -185,3 +190,12 @@ number of tiles mapped: 23
 ### 10. To run Regression tests after running with weights for inference, go to simulator/test/val.
 
 ```python reg_test_1.py -n mlp```
+
+### 11. Quantization:
+Change the ```input_prec``` and ```weight_width``` parameters in config file to see the effects of quantization.
+
+```
+# Input and Weight parameters
+input_prec = 16
+weight_width = 16
+```
diff --git a/include/config.py b/include/config.py
index e09ebedc..afea3bf5 100644
--- a/include/config.py
+++ b/include/config.py
@@ -7,7 +7,7 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
-sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
+sparse_opt = 0 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
@@ -35,7 +35,7 @@
 # Fixed parameters
 addr_width = 22 # Added to address larger address space for conv layers (#TODO: Compiler needs to fix shared memory reuse)
 data_width = num_bits # (in bits)
-xbdata_width = data_width # (in bits)
+xbdata_width = data_width # (in bits), equivalent to input_prec
 instrn_width = 48 # (in bits)
 # Input and Weight parameters
 input_prec = 16
@@ -50,15 +50,19 @@
 num_adc_per_matrix = 2
 num_adc = num_adc_per_matrix * num_matrix
 
+#uncomment this line for homogeneous ADC precision
+adc_res_new ={}
+
+#uncomment adc_res_new for heterogenous adcs
 # The idea is to have different ADC resolution value for each ADC.
 # The number of ADC if defined by num_adc property. Currently it is 2 * num_matrix(2) = 4
 # NOTE: Only taking in account indexes 0 and 2, 1 and 3 are ignored, because ADCs 1 and 3 are assumed t be equal to 0 and 2. 
-adc_res_new = {
-                'matrix_adc_0' : 8,
-                'matrix_adc_1' : 4,
-                'matrix_adc_2' : 8,
-                'matrix_adc_3' : 4
-              }
+#adc_res_new = {
+#                'matrix_adc_0' : 8,
+#                'matrix_adc_1' : 4,
+#                'matrix_adc_2' : 8,
+#                'matrix_adc_3' : 4
+#              }
 
 num_ALU = num_matrix*2
 #dataMem_size = num_matrix*(6*xbar_size) # 4 for 4 input spaces within matrix (1 for f/b each, 2 for d)
diff --git a/include/constants.py b/include/constants.py
index a2cfe755..5072cc32 100644
--- a/include/constants.py
+++ b/include/constants.py
@@ -64,41 +64,50 @@
 
 # IMA component latency/power/area dictionary (all values in ns, mw, mm2)
 # XBAR - Models from ISAAC paper
-xbar_lat_dict = {'2': {'32' : 32,   # first indexed by xbar_bits then by xbar_size
+xbar_lat_dict = {'2': {'16' : 16,
+		                   '32' : 32,   # first indexed by xbar_bits then by xbar_size
                        '64' : 64,
                        '128': 128,
                        '256': 256},
-                 '4': {'32' : 32,
+                 '4': {'16' : 16,
+		                   '32' : 32,
                        '64' : 64,
                        '128': 128,
                        '256': 256},
-                 '6': {'32' : 32,
+                 '6': {'16' : 16,
+		                   '32' : 32,
                        '64' : 64,
                        '128': 128,
                        '256': 256}}
 
-xbar_pow_dict = {'2': {'32' : 0.01875,
+xbar_pow_dict = {'2': {'16' : 0.0046875,
+		                   '32' : 0.01875,
                        '64' : 0.075,
                        '128': 0.3,
                        '256': 1.2},
-                 '4': {'32' : 0.01875,
+                 '4': {'16' : 0.0046875,
+		                   '32' : 0.01875,
                        '64' : 0.075,
                        '128': 0.3,
                        '256': 1.2},
-                 '6': {'32' : 0.01875,
+                 '6': {'16' : 0.0046875,
+		                   '32' : 0.01875,
                        '64' : 0.075,
                        '128': 0.3,
                        '256': 1.2}}
 
-xbar_area_dict = {'2': {'32' : 1.5625 * 10**(-6),
+xbar_area_dict = {'2': {'16' : 3.90625 * 10**(-7),
+		                    '32' : 1.5625 * 10**(-6),
                        '64' : 6.25 * 10**(-6),
                        '128': 2.5 * 10**(-5),
                        '256': 1.0 * 10**(-4)},
-                  '4': {'32' : 1.5625 * 10**(-6),
+                  '4': {'16' : 3.90625 * 10**(-7),
+		                    '32' : 1.5625 * 10**(-6),
                        '64' : 6.25 * 10**(-6),
                        '128': 2.5 * 10**(-5),
                        '256': 1.0 * 10**(-4)},
-                  '6': {'32' : 1.5625 * 10**(-6),
+                  '6': {'16' : 3.90625 * 10**(-7),
+		                    '32' : 1.5625 * 10**(-6),
                        '64' : 6.25 * 10**(-6),
                        '128': 2.5 * 10**(-5),
                        '256': 1.0 * 10**(-4)}}
@@ -110,7 +119,7 @@
 
 xbar_ip_lat = 100.0
 #xbar_ip_pow = (1.37*2.0) # xbar_ip_pow (includes all mvmu)
-xbar_ip_pow = (1.37*2.0) - 1.04 # xbar_ip_pow (includes all mvmu except ADC - uncomment num_access for ADC object)
+xbar_ip_pow = (1.37*2.0) - 1.04 if cfg.training else 1.37-1.04 # xbar_ip_pow (includes all mvmu except ADC - uncomment num_access for ADC object), 
 
 # Note the read and write lat/pow are for entire xbar
 xbar_rd_lat = 328.0 * 1000 * (1/32.0)
@@ -236,126 +245,110 @@
 dataMem_lat_dict = {'256' : 1,
                     '512' : 1,
                     '1024': 1,
-                    '2048': 1,
-		    '4096':1}
+                    '2048': 1}
 
 dataMem_pow_dyn_dict = {'256' : 0.16,
                         '512' : 0.24,
                         '1024': 0.33,
-                        '2048': 0.57,
-			'4096': 0.57}
+                        '2048': 0.57}
 
 dataMem_pow_leak_dict = {'256' : 0.044,
                          '512' : 0.078,
                          '1024': 0.147,
-                         '2048': 0.33,
-			 '4096': 0.33}
+                         '2048': 0.33}
 
 dataMem_area_dict = {'256' : 0.00056,
                      '512' : 0.00108,
                      '1024': 0.00192,
-                     '2048': 0.00392,
-		     '4096': 0.00392}
-
-dataMem_lat_dict = {'256' : 1,
-                    '512' : 1,
-                    '1024': 1,
-                    '2048': 1,
-		    '4096':1}
-
-dataMem_pow_dyn_dict = {'256' : 0.16,
-                        '512' : 0.24,
-                        '1024': 0.33,
-                        '2048': 0.57,
-			'4096': 0.57}
-
-dataMem_pow_leak_dict = {'256' : 0.044,
-                         '512' : 0.078,
-                         '1024': 0.147,
-                         '2048': 0.33,
-			 '4096': 0.33}
-
-dataMem_area_dict = {'256' : 0.00056,
-                     '512' : 0.00108,
-                     '1024': 0.00192,
-                     '2048': 0.00392,
-		     '4096': 0.00392}
+                     '2048': 0.00392}
 
 # Instruction Memory value dictionary
 instrnMem_lat_dict = {'512' : 1,
                       '1024': 1,
-                      '2048': 1,
-		      '4096': 1,
-		      '8192': 1}
+                      '2048': 1}
 
 instrnMem_pow_dyn_dict = {'512' : 0.46,
                           '1024': 0.53,
-                          '2048': 0.65,
-		      	  '4096': 0.65,
-		          '8192': 0.65}
+                          '2048': 0.65}
 
 instrnMem_pow_leak_dict = {'512' : 0.078,
                            '1024': 0.147,
-                           '2048': 0.33,
-		           '4096': 0.33,
-		           '8192': 0.33}
+                           '2048': 0.33}
 
 
 instrnMem_area_dict = {'512' : 0.00108,
                        '1024': 0.00192,
-                       '2048': 0.0041,
-		       '4096': 0.0041,
-		       '8192': 0.0041}
+                       '2048': 0.0041}
 
 
 # Xbar_inMem value dictionary (1 access means reading (dac_res) bits for each xbar row)
 # for computing average power of ima - scale dyn_pow down by xbar_size
-xbar_inMem_lat_dict = {'32'  : 1, # indexed with xbar size
+xbar_inMem_lat_dict = {'16'  : 1,
+		                   '32'  : 1, # indexed with xbar size
                        '64'  : 1,
                        '128' : 1,
                        '256' : 1}
 
-xbar_inMem_pow_dyn_read_dict = {'32'  : 0.3,
+xbar_inMem_pow_dyn_read_dict = {'16'  : 0.3, #doesn't change much as we move from 32 to 16, because these are very small memories
+		       		                  '32'  : 0.3,
                                 '64'  : 0.7,
                                 '128' : 1.7,
                                 '256' : 4.7}
 
-xbar_inMem_pow_dyn_write_dict = {'32'  : 0.1,
+xbar_inMem_pow_dyn_write_dict = {'16'  : 0.1,
+		       		                   '32'  : 0.1,
                                  '64'  : 0.1,
                                  '128' : 0.16,
                                  '256' : 0.2}
 
-xbar_inMem_pow_leak_dict = {'32'  : 0.009,
+xbar_inMem_pow_leak_dict = {'16'  : 0.009,
+		       	                '32'  : 0.009,
                             '64'  : 0.02,
                             '128' : 0.04,
                             '256' : 0.075}
 
-xbar_inMem_area_dict = {'32'  : 0.00015,
+xbar_inMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
                         '64'  : 0.00033,
                         '128' : 0.00078,
                         '256' : 0.0019}
 
 # Xbar_outMem value dictionary
-xbar_outMem_lat_dict = {'32'  : 1, # indexed with xbar size
+xbar_outMem_lat_dict = {'16'  : 1,
+		       '32'  : 1, # indexed with xbar size
                        '64'   : 1,
                        '128'  : 1,
                        '256'  : 1}
 
-xbar_outMem_pow_dyn_dict = {'32'  : 0.1,
+xbar_outMem_pow_dyn_dict = {'16'  : 0.1,
+		       	    '32'  : 0.1,
                            '64'   : 0.1,
                            '128'  : 0.16,
                            '256'  : 0.2}
 
-xbar_outMem_pow_leak_dict = {'32'  : 0.009,
+xbar_outMem_pow_leak_dict = {'16'  : 0.009,
+		       	    '32'  : 0.009,
                             '64'   : 0.02,
                             '128'  : 0.04,
                             '256'  : 0.075}
 
-xbar_outMem_area_dict = {'32'  : 0.00015,
+xbar_outMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
                         '64'   : 0.00033,
                         '128'  : 0.00078,
                         '256'  : 0.0019}
 
+dataMem_size_max = '2048'
+if str(cfg.dataMem_size) in dataMem_lat_dict:
+      dataMem_size_max =  str(cfg.dataMem_size)
+else:
+      print("Warning: No values for core data memory size provided. Using values for 2048 instead.")
+
+instrnMem_size_max = '2048'
+if str(cfg.instrnMem_size) in instrnMem_lat_dict:
+      instrnMem_size_max =  str(cfg.instrnMem_size)
+else:
+      print("Warning: No values for core instruction memory size provided. Using values for 2048 instead.")
 
 # Chosen latency based on config file - only for components whose latency is parameter dependent
 #xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
@@ -373,10 +366,10 @@
 adc_lat = adc_lat_dict [str(cfg.adc_res)]
 xbar_inMem_lat = xbar_inMem_lat_dict[str(cfg.xbar_size)]
 xbar_outMem_lat = xbar_outMem_lat_dict[str(cfg.xbar_size)]
-instrnMem_lat =  instrnMem_lat_dict[str(cfg.instrnMem_size)]
-dataMem_lat =  dataMem_lat_dict[str(cfg.dataMem_size)]
+instrnMem_lat =  instrnMem_lat_dict[str(instrnMem_size_max)]
+dataMem_lat =  dataMem_lat_dict[str(dataMem_size_max)]
 
-# Chosen area based on config file - only for components whose area is parameter dependent
+# Chosen area based on config file - only for components whose latency is parameter dependent
 if cfg.MVMU_ver == "Analog":
         xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
 else:
@@ -385,8 +378,8 @@
 adc_area = adc_area_dict [str(cfg.adc_res)]
 xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
 xbar_outMem_area = xbar_outMem_area_dict[str(cfg.xbar_size)]
-instrnMem_area =  instrnMem_area_dict[str(cfg.instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
-dataMem_area =  dataMem_area_dict[str(cfg.dataMem_size)]
+instrnMem_area =  instrnMem_area_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_area =  dataMem_area_dict[str(dataMem_size_max)]
 
 # Chosen dyn_power based on config file - only for components whose latency is parameter dependent
 #xbar_pow_dyn = xbar_pow_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
@@ -399,8 +392,8 @@
 xbar_inMem_pow_dyn_read = xbar_inMem_pow_dyn_read_dict[str(cfg.xbar_size)]
 xbar_inMem_pow_dyn_write = xbar_inMem_pow_dyn_write_dict[str(cfg.xbar_size)]
 xbar_outMem_pow_dyn = xbar_outMem_pow_dyn_dict[str(cfg.xbar_size)]
-instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(cfg.instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
-dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(cfg.dataMem_size)]
+instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(dataMem_size_max)]
 
 # Energy
 xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
@@ -420,8 +413,8 @@
 adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
 xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]
 xbar_outMem_pow_leak = xbar_outMem_pow_leak_dict[str(cfg.xbar_size)]
-instrnMem_pow_leak =  instrnMem_pow_leak_dict[str(cfg.instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
-dataMem_pow_leak =  dataMem_pow_leak_dict[str(cfg.dataMem_size)]
+instrnMem_pow_leak =  instrnMem_pow_leak_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_leak =  dataMem_pow_leak_dict[str(dataMem_size_max)]
 
 # Core Control unit (control unit and pipeline registers)
 ccu_pow = 1.25*0.2 #0.2 for activvity
@@ -444,46 +437,50 @@
 # EDRAM value dictionary (counter storage is not coounted)
 edram_lat_dict = {'8'   : 2,
                   '64'  : 2, #edram access width is constant = 256 bits
-                  '128' : 2,
-		  '2048': 2}
+                  '128' : 2}
 
 edram_pow_dyn_dict = {'8'   : 17.2/2,
                       '64'  : 17.2/2, # (0.0172 nJ with 2 cycles access latency)
-                      '128' : 25.35/2,
-		      '2048': 25.35/2}
+                      '128' : 25.35/2}
 
 edram_pow_leak_dict = {'8'   : 0.46,
                        '64'  : 0.46,
-                       '128' : 0.77,
-		       '2048': 0.77}
+                       '128' : 0.77}
 
 edram_area_dict = {'8'   : 0.086,
                    '64'  : 0.086,
-                   '128' : 0.121,
-		   '2048': 0.121}
+                   '128' : 0.121}
 
 # Tile Instruction Memory value dictionary
 tile_instrnMem_lat_dict = {'512': 1,
                           '1024': 1,
-                          '2048': 1,
-			  '4096': 1}
+                          '2048': 1}
 
 tile_instrnMem_pow_dyn_dict = {'512' : 0.46,
                                '1024': 0.53,
-                               '2048': 0.65,
-			       '4096': 0.65}
+                               '2048': 0.65}
 
 tile_instrnMem_pow_leak_dict = {'512' : 0.078,
                                 '1024': 0.147,
-                                '2048': 0.33,
-			        '4096': 0.33}
+                                '2048': 0.33}
 
 
 tile_instrnMem_area_dict = {'512' : 0.00108,
                             '1024': 0.00192,
-                            '2048': 0.0041,
-			    '4096': 0.0041}
+                            '2048': 0.0041}
+
 
+edram_size_max =  '128'
+if str(cfg.edram_size) in edram_lat_dict:
+      edram_size_max =  str(cfg.edram_size)
+else:
+      print("Warning: No values for edram memory size provided. Using values for 128 instead.")
+
+tile_instrnMem_size_max =  '2048'
+if str(cfg.tile_instrnMem_size) in tile_instrnMem_lat_dict:
+      tile_instrnMem_size_max =  str(cfg.tile_instrnMem_size)
+else:
+      print("Warning: No values for tile instrn memory size provided. Using values for 2048 instead.")
 
 # counter storage (2048 Byte Scratch RAM - 1 counter entry shared by 256 bits of data (16 neurons))
 # area scaling (X8)
@@ -513,20 +510,20 @@
 
 
 # Chosen latency based on config file - only for components whose latency is parameter dependent
-edram_lat = edram_lat_dict[str(cfg.edram_size)]
-tile_instrnMem_lat = tile_instrnMem_lat_dict[str(cfg.tile_instrnMem_size)]
+edram_lat = edram_lat_dict[str(edram_size_max)]
+tile_instrnMem_lat = tile_instrnMem_lat_dict[str(tile_instrnMem_size_max)]
 
 # Chosen area based on config file - only for components whose area is parameter dependent
-edram_area = edram_area_dict[str(cfg.edram_size)]
-tile_instrnMem_area = tile_instrnMem_area_dict[str(cfg.tile_instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+edram_area = edram_area_dict[str(edram_size_max)]
+tile_instrnMem_area = tile_instrnMem_area_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
 
 # Chosen dynamic power based on config file - only for components whose dynamic power is parameter dependent
-edram_pow_dyn = edram_pow_dyn_dict[str(cfg.edram_size)]
-tile_instrnMem_pow_dyn = tile_instrnMem_pow_dyn_dict[str(cfg.tile_instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+edram_pow_dyn = edram_pow_dyn_dict[str(edram_size_max)]
+tile_instrnMem_pow_dyn = tile_instrnMem_pow_dyn_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
 
 # Chosen leakage power based on config file - only for components whose leakage power is parameter dependent
-edram_pow_leak = edram_pow_leak_dict[str(cfg.edram_size)]
-tile_instrnMem_pow_leak = tile_instrnMem_pow_leak_dict[str(cfg.tile_instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+edram_pow_leak = edram_pow_leak_dict[str(edram_size_max)]
+tile_instrnMem_pow_leak = tile_instrnMem_pow_leak_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
 
 # Tile Control unit
 tcu_pow = 0.25*0.2
@@ -555,7 +552,7 @@
 noc_area_dict = {'4': 0.047,
                  '8': 0.116}
 
-# Router dynamic power - NOC will be used only if atleast oen of send_queue in node is non_empty
+# Router dynamic power - NOC will be used only if atleast one of send_queue in node is non_empty
 noc_pow_dyn_dict = {'4': 16.13,
                       '8': 51.48}
 
@@ -563,7 +560,7 @@
 noc_pow_leak_dict = {'4': 0.41,
                        '8': 1.04}
 
-# Enter component latency (Based on teh above NOC topological parameters)
+# Enter component latency (Based on the above NOC topological parameters)
 # Inter-node Noc (router & channel)
 assert (cfg.noc_inj_rate <= noc_inj_rate_max), 'Oops: reconsider NOC design and or DNN mapping, with this inj_rate, NOC data transfer throughput \
 will be terrible!'
diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py
index 4b2bdde5..f4ce24c3 100644
--- a/include/example-configs/config-cnn.py
+++ b/include/example-configs/config-cnn.py
@@ -7,13 +7,13 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
-sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
+sparse_opt = 0 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
 # Digital_V1 has compressed inputs (Data+Offset style)
 # Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
-MVMU_ver = "Digital_V2"
+MVMU_ver = "Analog"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
@@ -50,15 +50,19 @@
 num_adc_per_matrix = 2
 num_adc = num_adc_per_matrix * num_matrix
 
+#uncomment this line for homogeneous ADC precision
+adc_res_new ={}
+
+#uncomment adc_res_new for heterogenous adcs
 # The idea is to have different ADC resolution value for each ADC.
 # The number of ADC if defined by num_adc property. Currently it is 2 * num_matrix(2) = 4
 # NOTE: Only taking in account indexes 0 and 2, 1 and 3 are ignored, because ADCs 1 and 3 are assumed t be equal to 0 and 2. 
-adc_res_new = {
-                'matrix_adc_0' : 8,
-                'matrix_adc_1' : 4,
-                'matrix_adc_2' : 8,
-                'matrix_adc_3' : 4
-              }
+#adc_res_new = {
+#                'matrix_adc_0' : 8,
+#                'matrix_adc_1' : 4,
+#                'matrix_adc_2' : 8,
+#                'matrix_adc_3' : 4
+#              }
 
 num_ALU = num_matrix*2
 #dataMem_size = num_matrix*(6*xbar_size) # 4 for 4 input spaces within matrix (1 for f/b each, 2 for d)
diff --git a/include/example-configs/config-mlp.py b/include/example-configs/config-mlp.py
index 02e78dd1..3c3b1952 100644
--- a/include/example-configs/config-mlp.py
+++ b/include/example-configs/config-mlp.py
@@ -7,13 +7,13 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
-sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
+sparse_opt = 0 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
 # Digital_V1 has compressed inputs (Data+Offset style)
 # Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
-MVMU_ver = "Digital_V2"
+MVMU_ver = "Analog"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
@@ -50,15 +50,19 @@
 num_adc_per_matrix = 2
 num_adc = num_adc_per_matrix * num_matrix
 
+#uncomment this line for homogeneous ADC precision
+adc_res_new ={}
+
+#uncomment adc_res_new for heterogenous adcs
 # The idea is to have different ADC resolution value for each ADC.
 # The number of ADC if defined by num_adc property. Currently it is 2 * num_matrix(2) = 4
 # NOTE: Only taking in account indexes 0 and 2, 1 and 3 are ignored, because ADCs 1 and 3 are assumed t be equal to 0 and 2. 
-adc_res_new = {
-                'matrix_adc_0' : 8,
-                'matrix_adc_1' : 4,
-                'matrix_adc_2' : 8,
-                'matrix_adc_3' : 4
-              }
+#adc_res_new = {
+#                'matrix_adc_0' : 8,
+#                'matrix_adc_1' : 4,
+#                'matrix_adc_2' : 8,
+#                'matrix_adc_3' : 4
+#              }
 
 num_ALU = num_matrix*2
 #dataMem_size = num_matrix*(6*xbar_size) # 4 for 4 input spaces within matrix (1 for f/b each, 2 for d)
diff --git a/include/example-constants/constants-128.py b/include/example-constants/constants-128.py
new file mode 100644
index 00000000..141563d9
--- /dev/null
+++ b/include/example-constants/constants-128.py
@@ -0,0 +1,589 @@
+## This file contains the data structures used in differnet hierarchies.
+## It also holds power, area and latency numbers of different component used in DPE design
+import config as cfg
+import math
+import constants_digital as digi_param
+# Limits the number of cycles an IMA runs in case it doesn't halt
+infinity = 100000
+
+#############################################################################################################
+## Technology/Other constants for all the modules
+#############################################################################################################
+# IMA - folliwng parameters are not used currently, will be used when analog functionality is implemented
+cycle_time = 1 # in nanoseconds (1ns)
+vdd = 0.9
+xbar_out_min = -10e-10
+xbar_out_max = 1 # think about this - ???
+
+#############################################################################################################
+## Define commonly used data structures
+#############################################################################################################
+# List of supported opcodes for tile
+op_list_tile = ['send', 'receive', 'compute', 'halt']
+
+# Instruction format for Tile
+dummy_instrn_tile = {'opcode' : op_list_tile[0],
+                     'mem_addr': 0,     # send/receive - edram_addr
+                     'r1': 0,     # send-send_width, receive-receive_width
+                     'r2': 0,     # send-target_addr, receive-counter
+                     'vtile_id': 0, # send/receive-neuron_id
+                     'ima_nma': '',      # compute - a bit for each ima
+                     'vec': 0} # vector width
+
+# List of supported opcodes/aluops for IMA - cp will copy data (from data memory of ima to xbarInmem)
+op_list = ['ld', 'cp', 'st', 'set', 'nop', 'alu', 'alui', 'mvm', 'vvo', 'hlt', 'jmp', 'beq', 'alu_int', 'crs']
+aluop_list = ['add', 'sub', 'sna', 'mul', 'sigmoid'] # sna is also used by mvm isntruction
+
+# Instruction format for IMA
+dummy_instrn = {'opcode' : op_list[0],      # instrn op
+               'aluop'  : aluop_list[0],   # alu function
+               'd1'     : 0,               # destination
+               'r1'     : 0,               # operand1 (stride for mvm)
+               'r2'     : 0,               # operand2
+               'r3'     : 0,               # operand3 (shift)
+               'vec'    : 0,               # vector width
+               'imm'    : 0,               # immediate (scalar) data
+               'xb_nma' : 0 }              # xbar negative-mask, a xbar evaluates if neg-mask = 1
+
+# List of pipeline stages - in order for IMA
+stage_list = ['fet', 'dec', 'ex']
+last_stage = 'ex'
+
+#############################################################################################################
+# IMA Hierarchy parameters
+    # Number of Xbars
+    # Crossbar Size
+    # Crossbar bits
+    # Bit resolution of ADCs and DACs
+    # Number of ADCs
+    # Number of ALUs
+    # Data memory size
+    # Size of Xbar in/out memory (Register) is dependent on Xbar size and num_bits
+    # Instruction memory size
+#############################################################################################################
+
+# IMA component latency/power/area dictionary (all values in ns, mw, mm2)
+# XBAR - Models from ISAAC paper
+xbar_lat_dict = {'2': {'16' : 16,
+		                   '32' : 32,   # first indexed by xbar_bits then by xbar_size
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '4': {'16' : 16,
+		                   '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '6': {'16' : 16,
+		                   '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256}}
+
+xbar_pow_dict = {'2': {'16' : 0.0046875,
+		                   '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '4': {'16' : 0.0046875,
+		                   '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '6': {'16' : 0.0046875,
+		                   '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2}}
+
+xbar_area_dict = {'2': {'16' : 3.90625 * 10**(-7),
+		                    '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '4': {'16' : 3.90625 * 10**(-7),
+		                    '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '6': {'16' : 3.90625 * 10**(-7),
+		                    '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)}}
+
+## New values added for xbar MVM/MTVM, OP (parallel write), serial read/write
+# the following is lumped power for xbar inner/outer-product - includes peripherals
+xbar_op_lat = 20.0*12.8 # with 4 VFUs
+xbar_op_pow = 4.44 * 3.27 / (12.8)
+
+#hardcoded value
+#xbar_ip_lat = 100.0
+#value depending on xb size
+xbar_ip_lat = xbar_lat_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+#xbar_ip_pow = (1.37*2.0) # xbar_ip_pow (includes all mvmu)
+#xbar_ip_pow = (1.37*2.0) - 1.04 if cfg.training else 1.37-1.04 # xbar_ip_pow (includes all mvmu except ADC - uncomment num_access for ADC object), 
+
+#xbar inner product power dependence on xbar size
+xbar_ip_pow = xbar_pow_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+# Note the read and write lat/pow are for entire xbar
+xbar_rd_lat = 328.0 * 1000 * (1/32.0)
+xbar_wr_lat = 351.0 * 1000 * (1/32.0)
+
+# the following is lumped power for xbar rd/wr (for whole array) - includes peripherals
+xbar_rd_pow = 208.0 * 1000 * (1/32.0) / xbar_rd_lat
+xbar_wr_pow = 676.0 * 1000 * (1/32.0) / xbar_rd_lat
+
+# DAC - Discuss exact values with ISSAC authors
+dac_lat_dict = {'1' : 1,
+                '2' : 1,
+                '4' : 1,
+                '8' : 1,
+                '16': 1}
+
+dac_pow_dyn_dict = {'1' : 0.00350625,
+                    '2' : 0.00350625,
+                    '4' : 0.00350625,
+                    '8' : 0.00350625,
+                    '16': 0.00350625}
+
+dac_pow_leak_dict = {'1' : 0.000390625,
+                     '2' : 0.000390625,
+                     '4' : 0.000390625,
+                     '8' : 0.000390625,
+                     '16': 0.000390625}
+
+dac_area_dict = {'1' : 1.67 * 10**(-7),
+                 '2' : 1.67 * 10**(-7),
+                 '4' : 1.67 * 10**(-7),
+                 '8' : 1.67 * 10**(-7),
+                 '16': 1.67 * 10**(-7)}
+
+# ADC - Discuss exact values with ISSAC authors
+# ADC Values for including sparsity
+adc_lat_dict = {'1' : 12.5,
+                '2' : 25,
+                '3' : 37.5,
+                '4' : 50,
+                '5' : 62.5,
+                '6' : 75,
+                '7' : 87.5,
+                '8' : 100,
+		'9' : 112.5,
+                '16': 200}
+
+adc_pow_dyn_dict = {'1' : 0.225,
+                    '2' : 0.45,
+                    '3' : 0.675,
+                    '4' : 0.9,
+                    '5' : 1.125,
+                    '6' : 1.35,
+                    '7' : 1.575,
+                    '8' : 1.8,
+		    '9' : 2.025,
+                    '16': 3.6}
+
+adc_pow_leak_dict = {'1' : 0.025,
+                     '2' : 0.05,
+                     '3' : 0.075,
+                     '4' : 0.1,
+                     '5' : 0.125,
+                     '6' : 0.15,
+                     '7' : 0.175,
+                     '8' : 0.2,
+		     '9' : 0.225,
+                     '16': 0.4}
+
+adc_area_dict = {'1' : 0.0012,
+                 '2' : 0.0012,
+                 '3' : 0.0012,
+                 '4' : 0.0012,
+                 '5' : 0.00075,
+                 '6' : 0.0009,
+                 '7' : 0.00105,
+                 '8' : 0.0012,
+		 '9' : 0.0012,
+                 '16': 0.0012}
+
+# SNH (MVM pipeline)
+snh_lat = 1
+snh_pow_leak = 9.7 * 10**(-7)
+snh_pow_dyn = 9.7 * 10**(-6) - snh_pow_leak
+snh_area = 0.00004 / 8 / 128
+
+# SNA (MVM pipeline)
+sna_lat = 1
+sna_pow_leak = 0.005
+sna_pow_dyn = 0.05 - sna_pow_leak
+sna_area = 0.00006
+
+# ALU (Part of Vector Functional Unit)
+alu_lat = 1
+alu_pow_dyn = 2.4 * 32/45
+alu_pow_div_dyn = 1.52 * 32/45
+alu_pow_mul_dyn = 0.795 * 32/45
+alu_pow_others_dyn = 0.373 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+alu_pow_leak = 0.27 * 32/45
+alu_area = 0.00567 * 32/45
+
+# witout considering division
+#alu_lat = 1
+#alu_pow_dyn = 1.15 * 32/45
+#alu_pow_mul_dyn = 0.796 * 32/45
+#alu_pow_others_dyn = 0.36 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+#alu_pow_leak = 0.05 * 32/45
+#alu_area = 0.002326 * 32/45
+
+# Sigmoid/Tanh (Part of Vector Functional Unit) - Taken from ISAAC paper
+act_lat = 1 # added for 4 exponential units
+act_pow_leak = 0.026
+act_pow_dyn = 0.26 - act_pow_leak
+act_area = 0.0003 # check this ???
+
+# Multiplexer - These should be analog muxes
+mux_lat = 0
+mux_pow_leak = 0
+mux_pow_dyn = 0
+mux_area = 0
+
+# Data Memory value dictionary
+dataMem_lat_dict = {'256' : 1,
+                    '512' : 1,
+                    '1024': 1,
+                    '2048': 1}
+
+dataMem_pow_dyn_dict = {'256' : 0.16,
+                        '512' : 0.24,
+                        '1024': 0.33,
+                        '2048': 0.57}
+
+dataMem_pow_leak_dict = {'256' : 0.044,
+                         '512' : 0.078,
+                         '1024': 0.147,
+                         '2048': 0.33}
+
+dataMem_area_dict = {'256' : 0.00056,
+                     '512' : 0.00108,
+                     '1024': 0.00192,
+                     '2048': 0.00392}
+
+# Instruction Memory value dictionary
+instrnMem_lat_dict = {'512' : 1,
+                      '1024': 1,
+                      '2048': 1}
+
+instrnMem_pow_dyn_dict = {'512' : 0.46,
+                          '1024': 0.53,
+                          '2048': 0.65}
+
+instrnMem_pow_leak_dict = {'512' : 0.078,
+                           '1024': 0.147,
+                           '2048': 0.33}
+
+
+instrnMem_area_dict = {'512' : 0.00108,
+                       '1024': 0.00192,
+                       '2048': 0.0041}
+
+
+# Xbar_inMem value dictionary (1 access means reading (dac_res) bits for each xbar row)
+# for computing average power of ima - scale dyn_pow down by xbar_size
+xbar_inMem_lat_dict = {'16'  : 1,
+		                   '32'  : 1, # indexed with xbar size
+                       '64'  : 1,
+                       '128' : 1,
+                       '256' : 1}
+
+xbar_inMem_pow_dyn_read_dict = {'16'  : 0.3, #doesn't change much as we move from 32 to 16, because these are very small memories
+		       		                  '32'  : 0.3,
+                                '64'  : 0.7,
+                                '128' : 1.7,
+                                '256' : 4.7}
+
+xbar_inMem_pow_dyn_write_dict = {'16'  : 0.1,
+		       		                   '32'  : 0.1,
+                                 '64'  : 0.1,
+                                 '128' : 0.16,
+                                 '256' : 0.2}
+
+xbar_inMem_pow_leak_dict = {'16'  : 0.009,
+		       	                '32'  : 0.009,
+                            '64'  : 0.02,
+                            '128' : 0.04,
+                            '256' : 0.075}
+
+xbar_inMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'  : 0.00033,
+                        '128' : 0.00078,
+                        '256' : 0.0019}
+
+# Xbar_outMem value dictionary
+xbar_outMem_lat_dict = {'16'  : 1,
+		       '32'  : 1, # indexed with xbar size
+                       '64'   : 1,
+                       '128'  : 1,
+                       '256'  : 1}
+
+xbar_outMem_pow_dyn_dict = {'16'  : 0.1,
+		       	    '32'  : 0.1,
+                           '64'   : 0.1,
+                           '128'  : 0.16,
+                           '256'  : 0.2}
+
+xbar_outMem_pow_leak_dict = {'16'  : 0.009,
+		       	    '32'  : 0.009,
+                            '64'   : 0.02,
+                            '128'  : 0.04,
+                            '256'  : 0.075}
+
+xbar_outMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'   : 0.00033,
+                        '128'  : 0.00078,
+                        '256'  : 0.0019}
+
+dataMem_size_max = '2048'
+if str(cfg.dataMem_size) in dataMem_lat_dict:
+      dataMem_size_max =  str(cfg.dataMem_size)
+else:
+      print("Warning: No values for core data memory size provided. Using values for 2048 instead.")
+
+instrnMem_size_max = '2048'
+if str(cfg.instrnMem_size) in instrnMem_lat_dict:
+      instrnMem_size_max =  str(cfg.instrnMem_size)
+else:
+      print("Warning: No values for core instruction memory size provided. Using values for 2048 instead.")
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+#xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+      for key, value in xbar_ip_lat_dict.items():
+            xbar_ip_lat_dict[key] = xbar_ip_lat
+else:
+      xbar_ip_lat_dict = digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+xbar_op_lat = xbar_op_lat
+xbar_rd_lat = xbar_rd_lat
+xbar_wr_lat = xbar_wr_lat
+dac_lat = dac_lat_dict [str(cfg.dac_res)]
+#FIXME need to review it I can remove adc_lat property
+adc_lat = adc_lat_dict [str(cfg.adc_res)]
+xbar_inMem_lat = xbar_inMem_lat_dict[str(cfg.xbar_size)]
+xbar_outMem_lat = xbar_outMem_lat_dict[str(cfg.xbar_size)]
+instrnMem_lat =  instrnMem_lat_dict[str(instrnMem_size_max)]
+dataMem_lat =  dataMem_lat_dict[str(dataMem_size_max)]
+
+# Chosen area based on config file - only for components whose latency is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+else:
+        xbar_area = digi_param.Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+dac_area = dac_area_dict [str(cfg.dac_res)]
+adc_area = adc_area_dict [str(cfg.adc_res)]
+xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
+xbar_outMem_area = xbar_outMem_area_dict[str(cfg.xbar_size)]
+instrnMem_area =  instrnMem_area_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_area =  dataMem_area_dict[str(dataMem_size_max)]
+
+# Chosen dyn_power based on config file - only for components whose latency is parameter dependent
+#xbar_pow_dyn = xbar_pow_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+xbar_ip_pow_dyn = xbar_ip_pow
+xbar_op_pow_dyn = xbar_op_pow
+xbar_rd_pow_dyn = xbar_rd_pow
+xbar_wr_pow_dyn = xbar_wr_pow
+dac_pow_dyn = dac_pow_dyn_dict [str(cfg.dac_res)]
+adc_pow_dyn = adc_pow_dyn_dict [str(cfg.adc_res)]
+xbar_inMem_pow_dyn_read = xbar_inMem_pow_dyn_read_dict[str(cfg.xbar_size)]
+xbar_inMem_pow_dyn_write = xbar_inMem_pow_dyn_write_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_dyn = xbar_outMem_pow_dyn_dict[str(cfg.xbar_size)]
+instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(dataMem_size_max)]
+
+# Energy
+xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+        for key,value in xbar_ip_energy_dict.items():
+                xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
+else:
+        xbar_ip_energy_dict = digi_param.Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print('xbar_ip_energy_dict', xbar_ip_energy_dict)
+
+# Chosen leak_power based on config file - only for components whose latency is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_pow_leak = 0
+else:
+        xbar_pow_leak = digi_param.Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
+dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
+adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
+xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_leak = xbar_outMem_pow_leak_dict[str(cfg.xbar_size)]
+instrnMem_pow_leak =  instrnMem_pow_leak_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_leak =  dataMem_pow_leak_dict[str(dataMem_size_max)]
+
+# Core Control unit (control unit and pipeline registers)
+ccu_pow = 1.25*0.2 #0.2 for activity
+ccu_area = 0.00145*2.25 #taken similar as edctrl (scaled by power)
+
+# Added here for simplicity now (***can need modification later***)
+# The latency of mem access is dependent on when can the ima find edram bys non-busy
+memInterface_lat = infinity # infinite latency
+
+#############################################################################################################
+# Tile Hierarchy
+    # Number of IMAs
+    # EDRAM size
+    # Shared Bus width
+    # Instruction memory size
+    # Receive Buffer size
+#############################################################################################################
+
+# Tile component latency/pow/area
+# EDRAM value dictionary (counter storage is not coounted)
+edram_lat_dict = {'8'   : 2,
+                  '64'  : 2, #edram access width is constant = 256 bits
+                  '128' : 2}
+
+edram_pow_dyn_dict = {'8'   : 17.2/2,
+                      '64'  : 17.2/2, # (0.0172 nJ with 2 cycles access latency)
+                      '128' : 25.35/2}
+
+edram_pow_leak_dict = {'8'   : 0.46,
+                       '64'  : 0.46,
+                       '128' : 0.77}
+
+edram_area_dict = {'8'   : 0.086,
+                   '64'  : 0.086,
+                   '128' : 0.121}
+
+# Tile Instruction Memory value dictionary
+tile_instrnMem_lat_dict = {'512': 1,
+                          '1024': 1,
+                          '2048': 1}
+
+tile_instrnMem_pow_dyn_dict = {'512' : 0.46,
+                               '1024': 0.53,
+                               '2048': 0.65}
+
+tile_instrnMem_pow_leak_dict = {'512' : 0.078,
+                                '1024': 0.147,
+                                '2048': 0.33}
+
+
+tile_instrnMem_area_dict = {'512' : 0.00108,
+                            '1024': 0.00192,
+                            '2048': 0.0041}
+
+
+edram_size_max =  '128'
+if str(cfg.edram_size) in edram_lat_dict:
+      edram_size_max =  str(cfg.edram_size)
+else:
+      print("Warning: No values for edram memory size provided. Using values for 128 instead.")
+
+tile_instrnMem_size_max =  '2048'
+if str(cfg.tile_instrnMem_size) in tile_instrnMem_lat_dict:
+      tile_instrnMem_size_max =  str(cfg.tile_instrnMem_size)
+else:
+      print("Warning: No values for tile instrn memory size provided. Using values for 2048 instead.")
+
+# counter storage (2048 Byte Scratch RAM - 1 counter entry shared by 256 bits of data (16 neurons))
+# area scaling (X8)
+counter_buff_lat = 1 * math.sqrt(8)
+counter_buff_pow_dyn = 0.65/2 * math.sqrt(8)
+counter_buff_pow_leak = 0.33/2 * math.sqrt(8)
+counter_buff_area = 0.0041 * math.sqrt(8)
+
+# EDRAM to IMA bus values
+edram_bus_lat = 1
+edram_bus_pow_dyn = 6/2 #bus width = 384, same as issac (over two cycles)
+edram_bus_pow_leak = 1/2 #bus width = 384, same as issac
+edram_bus_area = 0.090
+
+# EDRAM controller values
+edram_ctrl_lat = 1
+edram_ctrl_pow_dyn = 0.475
+edram_ctrl_pow_leak = 0.05
+edram_ctrl_area = 0.00145
+
+# Receive buffer value dictionary - 16 entries (Need to make this a dictionary)
+# Increasing to 64 entries
+receive_buffer_lat = 1 * math.sqrt(4)
+receive_buffer_pow_dyn = 4.48 * math.sqrt(4) # (0.2*256/16)
+receive_buffer_pow_leak = 0.09 * math.sqrt(4)
+receive_buffer_area = 0.0022 *math.sqrt(4)
+
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+edram_lat = edram_lat_dict[str(edram_size_max)]
+tile_instrnMem_lat = tile_instrnMem_lat_dict[str(tile_instrnMem_size_max)]
+
+# Chosen area based on config file - only for components whose area is parameter dependent
+edram_area = edram_area_dict[str(edram_size_max)]
+tile_instrnMem_area = tile_instrnMem_area_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen dynamic power based on config file - only for components whose dynamic power is parameter dependent
+edram_pow_dyn = edram_pow_dyn_dict[str(edram_size_max)]
+tile_instrnMem_pow_dyn = tile_instrnMem_pow_dyn_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen leakage power based on config file - only for components whose leakage power is parameter dependent
+edram_pow_leak = edram_pow_leak_dict[str(edram_size_max)]
+tile_instrnMem_pow_leak = tile_instrnMem_pow_leak_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Tile Control unit
+tcu_pow = 0.25*0.2
+tcu_area = 0.00145 #taken similar as edctrl
+
+#############################################################################################################
+# Node Hierarchy
+    # Number of Tiles
+    # NOC - Topology (Currently assumes a cmesh (c=4, same as ISSAC))
+        # n = number of dimension\
+        # k = number of tiles in each dimension
+        # c = concentartion (tiles/router)
+        # average injection rate (0.25 - a tile injects a new packet for each destination in every four cycles)
+#############################################################################################################
+
+# NOC latency dictionary (in terms of flit cycle)
+# Note - if inj_rate (packet injection -1 packet - 16 neurons) exceeds 0.025 - there's a problem, NoC needs to be redesigned else network latency will be killing!
+# Hence, not provided for
+noc_inj_rate_max = 0.025
+noc_lat_dict = {'0.001': 29,
+                '0.005': 31,
+                '0.01' : 34,
+                '0.02' : 54,
+                '0.025': 115}
+
+noc_area_dict = {'4': 0.047,
+                 '8': 0.116}
+
+# Router dynamic power - NOC will be used only if atleast one of send_queue in node is non_empty
+noc_pow_dyn_dict = {'4': 16.13,
+                      '8': 51.48}
+
+# Router leakage power - NOC will be used only if atleast oen of send_queue in node is non_empty
+noc_pow_leak_dict = {'4': 0.41,
+                       '8': 1.04}
+
+# Enter component latency (Based on the above NOC topological parameters)
+# Inter-node Noc (router & channel)
+assert (cfg.noc_inj_rate <= noc_inj_rate_max), 'Oops: reconsider NOC design and or DNN mapping, with this inj_rate, NOC data transfer throughput \
+will be terrible!'
+
+noc_intra_lat = noc_lat_dict[str(cfg.noc_inj_rate)]
+noc_intra_pow_dyn = noc_pow_dyn_dict[str(cfg.noc_num_port)] # per router
+noc_intra_pow_leak = noc_pow_leak_dict[str(cfg.noc_num_port)]# per router
+noc_intra_area = noc_area_dict[str(cfg.noc_num_port)] # per router
+
+# Hypertransport network (HT)
+# Note HT is external to a node, but we consider all tiles in one
+# virtual node itself for simplicity
+# HT numbers from ISAAC = 6.4GB/s = 6.4B/ ns = 1packet(16*2 Bytes) = 5ns
+ht_lat = 5 #latency per packet
+noc_inter_lat = ht_lat + noc_intra_lat #navigate to the node, then to tile within node
+noc_inter_pow_dyn = 10400 #10.4W
+noc_inter_pow_leak = 0
+noc_inter_area = 22.88
+
diff --git a/include/example-constants/constants-16.py b/include/example-constants/constants-16.py
new file mode 100644
index 00000000..54db2161
--- /dev/null
+++ b/include/example-constants/constants-16.py
@@ -0,0 +1,614 @@
+## This file contains the data structures used in differnet hierarchies.
+## It also holds power, area and latency numbers of different component used in DPE design
+import config as cfg
+import math
+import constants_digital as digi_param
+# Limits the number of cycles an IMA runs in case it doesn't halt
+infinity = 100000
+
+#############################################################################################################
+## Technology/Other constants for all the modules
+#############################################################################################################
+# IMA - folliwng parameters are not used currently, will be used when analog functionality is implemented
+cycle_time = 1 # in nanoseconds (1ns)
+vdd = 0.9
+xbar_out_min = -10e-10
+xbar_out_max = 1 # think about this - ???
+
+#############################################################################################################
+## Define commonly used data structures
+#############################################################################################################
+# List of supported opcodes for tile
+op_list_tile = ['send', 'receive', 'compute', 'halt']
+
+# Instruction format for Tile
+dummy_instrn_tile = {'opcode' : op_list_tile[0],
+                     'mem_addr': 0,     # send/receive - edram_addr
+                     'r1': 0,     # send-send_width, receive-receive_width
+                     'r2': 0,     # send-target_addr, receive-counter
+                     'vtile_id': 0, # send/receive-neuron_id
+                     'ima_nma': '',      # compute - a bit for each ima
+                     'vec': 0} # vector width
+
+# List of supported opcodes/aluops for IMA - cp will copy data (from data memory of ima to xbarInmem)
+op_list = ['ld', 'cp', 'st', 'set', 'nop', 'alu', 'alui', 'mvm', 'vvo', 'hlt', 'jmp', 'beq', 'alu_int', 'crs']
+aluop_list = ['add', 'sub', 'sna', 'mul', 'sigmoid'] # sna is also used by mvm isntruction
+
+# Instruction format for IMA
+dummy_instrn = {'opcode' : op_list[0],      # instrn op
+               'aluop'  : aluop_list[0],   # alu function
+               'd1'     : 0,               # destination
+               'r1'     : 0,               # operand1 (stride for mvm)
+               'r2'     : 0,               # operand2
+               'r3'     : 0,               # operand3 (shift)
+               'vec'    : 0,               # vector width
+               'imm'    : 0,               # immediate (scalar) data
+               'xb_nma' : 0 }              # xbar negative-mask, a xbar evaluates if neg-mask = 1
+
+# List of pipeline stages - in order for IMA
+stage_list = ['fet', 'dec', 'ex']
+last_stage = 'ex'
+
+#############################################################################################################
+# IMA Hierarchy parameters
+    # Number of Xbars
+    # Crossbar Size
+    # Crossbar bits
+    # Bit resolution of ADCs and DACs
+    # Number of ADCs
+    # Number of ALUs
+    # Data memory size
+    # Size of Xbar in/out memory (Register) is dependent on Xbar size and num_bits
+    # Instruction memory size
+#############################################################################################################
+
+# IMA component latency/power/area dictionary (all values in ns, mw, mm2)
+# XBAR - Models from ISAAC paper
+xbar_lat_dict = {'2': {'16' : 16,
+		       '32' : 32,   # first indexed by xbar_bits then by xbar_size
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '4': {'16' : 16,
+		       '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '6': {'16' : 16,
+		       '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256}}
+
+xbar_pow_dict = {'2': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '4': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '6': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2}}
+
+xbar_area_dict = {'2': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '4': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '6': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)}}
+
+## New values added for xbar MVM/MTVM, OP (parallel write), serial read/write
+# the following is lumped power for xbar inner/outer-product - includes peripherals
+xbar_op_lat = 20.0*12.8 # with 4 VFUs
+xbar_op_pow = 4.44 * 3.27 / (12.8)
+
+#hardcoded value
+#xbar_ip_lat = 100.0
+#value depending on xb size
+xbar_ip_lat = xbar_lat_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+#xbar_ip_pow = (1.37*2.0) # xbar_ip_pow (includes all mvmu)
+#xbar_ip_pow = (1.37*2.0) - 1.04 if cfg.training else 1.37-1.04 # xbar_ip_pow (includes all mvmu except ADC - uncomment num_access for ADC object), 
+
+#xbar inner product power dependence on xbar size
+xbar_ip_pow = xbar_pow_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+# Note the read and write lat/pow are for entire xbar
+xbar_rd_lat = 328.0 * 1000 * (1/32.0)
+xbar_wr_lat = 351.0 * 1000 * (1/32.0)
+
+# the following is lumped power for xbar rd/wr (for whole array) - includes peripherals
+xbar_rd_pow = 208.0 * 1000 * (1/32.0) / xbar_rd_lat
+xbar_wr_pow = 676.0 * 1000 * (1/32.0) / xbar_rd_lat
+
+# DAC - Discuss exact values with ISSAC authors
+dac_lat_dict = {'1' : 1,
+                '2' : 1,
+                '4' : 1,
+                '8' : 1,
+                '16': 1}
+
+dac_pow_dyn_dict = {'1' : 0.00350625,
+                    '2' : 0.00350625,
+                    '4' : 0.00350625,
+                    '8' : 0.00350625,
+                    '16': 0.00350625}
+
+dac_pow_leak_dict = {'1' : 0.000390625,
+                     '2' : 0.000390625,
+                     '4' : 0.000390625,
+                     '8' : 0.000390625,
+                     '16': 0.000390625}
+
+dac_area_dict = {'1' : 1.67 * 10**(-7),
+                 '2' : 1.67 * 10**(-7),
+                 '4' : 1.67 * 10**(-7),
+                 '8' : 1.67 * 10**(-7),
+                 '16': 1.67 * 10**(-7)}
+
+# ADC - Discuss exact values with ISSAC authors
+# ADC Values for including sparsity
+adc_lat_dict = {'1' : 12.5,
+                '2' : 25,
+                '3' : 37.5,
+                '4' : 50,
+                '5' : 62.5,
+                '6' : 75,
+                '7' : 87.5,
+                '8' : 100,
+		'9' : 112.5,
+                '16': 200}
+
+adc_pow_dyn_dict = {'1' : 0.225,
+                    '2' : 0.45,
+                    '3' : 0.675,
+                    '4' : 0.9,
+                    '5' : 1.125,
+                    '6' : 1.35,
+                    '7' : 1.575,
+                    '8' : 1.8,
+		    '9' : 2.025,
+                    '16': 3.6}
+
+adc_pow_leak_dict = {'1' : 0.025,
+                     '2' : 0.05,
+                     '3' : 0.075,
+                     '4' : 0.1,
+                     '5' : 0.125,
+                     '6' : 0.15,
+                     '7' : 0.175,
+                     '8' : 0.2,
+		     '9' : 0.225,
+                     '16': 0.4}
+
+adc_area_dict = {'1' : 0.0012,
+                 '2' : 0.0012,
+                 '3' : 0.0012,
+                 '4' : 0.0012,
+                 '5' : 0.00075,
+                 '6' : 0.0009,
+                 '7' : 0.00105,
+                 '8' : 0.0012,
+		 '9' : 0.0012,
+                 '16': 0.0012}
+
+# SNH (MVM pipeline)
+snh_lat = 1
+snh_pow_leak = 9.7 * 10**(-7)
+snh_pow_dyn = 9.7 * 10**(-6) - snh_pow_leak
+snh_area = 0.00004 / 8 / 128
+
+# SNA (MVM pipeline)
+sna_lat = 1
+sna_pow_leak = 0.005
+sna_pow_dyn = 0.05 - sna_pow_leak
+sna_area = 0.00006
+
+# ALU (Part of Vector Functional Unit)
+alu_lat = 1
+alu_pow_dyn = 2.4 * 32/45
+alu_pow_div_dyn = 1.52 * 32/45
+alu_pow_mul_dyn = 0.795 * 32/45
+alu_pow_others_dyn = 0.373 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+alu_pow_leak = 0.27 * 32/45
+alu_area = 0.00567 * 32/45
+
+# witout considering division
+#alu_lat = 1
+#alu_pow_dyn = 1.15 * 32/45
+#alu_pow_mul_dyn = 0.796 * 32/45
+#alu_pow_others_dyn = 0.36 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+#alu_pow_leak = 0.05 * 32/45
+#alu_area = 0.002326 * 32/45
+
+# Sigmoid/Tanh (Part of Vector Functional Unit) - Taken from ISAAC paper
+act_lat = 1 # added for 4 exponential units
+act_pow_leak = 0.026
+act_pow_dyn = 0.26 - act_pow_leak
+act_area = 0.0003 # check this ???
+
+# Multiplexer - These should be analog muxes
+mux_lat = 0
+mux_pow_leak = 0
+mux_pow_dyn = 0
+mux_area = 0
+
+# Data Memory value dictionary
+dataMem_lat_dict = {'256' : 1,
+                    '512' : 1,
+                    '1024': 1,
+                    '2048': 1,
+                    '4096':1,
+                    '16384':1,
+                    '65536':1}
+
+dataMem_pow_dyn_dict = {'256' : 0.16,
+                        '512' : 0.24,
+                        '1024': 0.33,
+                        '2048': 0.57,
+                        '4096': 0.74,
+                        '16384':1.6,
+                        '65536':3.4}
+
+dataMem_pow_leak_dict = {'256' : 0.044,
+                         '512' : 0.078,
+                         '1024': 0.147,
+                         '2048': 0.33,
+                         '4096': 0.489,
+                         '16384':1.28,
+                         '65536':2.741}
+
+dataMem_area_dict = {'256' : 0.00056,
+                     '512' : 0.00108,
+                     '1024': 0.00192,
+                     '2048': 0.00392,
+                     '4096': 0.020691,
+                     '16384':0.0666,
+                     '65536':0.2684}
+
+# Instruction Memory value dictionary
+instrnMem_lat_dict = {'512' : 1,
+                      '1024': 1,
+                      '2048': 1,
+                      '4096':1,
+                      '16384':1,
+                      '65536':1}
+
+instrnMem_pow_dyn_dict = {'512' : 0.46,
+                          '1024': 0.53,
+                          '2048': 0.65,
+                          '4096':0.74,
+                          '16384':1.6,
+                          '65536':3.4}
+
+instrnMem_pow_leak_dict = {'512' : 0.078,
+                           '1024': 0.147,
+                           '2048': 0.33,
+                           '4096':0.489,
+                           '16384':1.28,
+                           '65536':2.741}
+
+
+instrnMem_area_dict = {'512' : 0.00108,
+                       '1024': 0.00192,
+                       '2048': 0.0041,
+                       '4096':0.020691,
+                       '16384':0.0666,
+                       '65536':0.2684}
+
+
+# Xbar_inMem value dictionary (1 access means reading (dac_res) bits for each xbar row)
+# for computing average power of ima - scale dyn_pow down by xbar_size
+xbar_inMem_lat_dict = {'16'  : 1,
+		                   '32'  : 1, # indexed with xbar size
+                       '64'  : 1,
+                       '128' : 1,
+                       '256' : 1}
+
+xbar_inMem_pow_dyn_read_dict = {'16'  : 0.3, #doesn't change much as we move from 32 to 16, because these are very small memories
+		       		                  '32'  : 0.3,
+                                '64'  : 0.7,
+                                '128' : 1.7,
+                                '256' : 4.7}
+
+xbar_inMem_pow_dyn_write_dict = {'16'  : 0.1,
+		       		                   '32'  : 0.1,
+                                 '64'  : 0.1,
+                                 '128' : 0.16,
+                                 '256' : 0.2}
+
+xbar_inMem_pow_leak_dict = {'16'  : 0.009,
+		       	                '32'  : 0.009,
+                            '64'  : 0.02,
+                            '128' : 0.04,
+                            '256' : 0.075}
+
+xbar_inMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'  : 0.00033,
+                        '128' : 0.00078,
+                        '256' : 0.0019}
+
+# Xbar_outMem value dictionary
+xbar_outMem_lat_dict = {'16'  : 1,
+		       '32'  : 1, # indexed with xbar size
+                       '64'   : 1,
+                       '128'  : 1,
+                       '256'  : 1}
+
+xbar_outMem_pow_dyn_dict = {'16'  : 0.1,
+		       	    '32'  : 0.1,
+                           '64'   : 0.1,
+                           '128'  : 0.16,
+                           '256'  : 0.2}
+
+xbar_outMem_pow_leak_dict = {'16'  : 0.009,
+		       	    '32'  : 0.009,
+                            '64'   : 0.02,
+                            '128'  : 0.04,
+                            '256'  : 0.075}
+
+xbar_outMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'   : 0.00033,
+                        '128'  : 0.00078,
+                        '256'  : 0.0019}
+
+dataMem_size_max =  '65536'
+if str(cfg.dataMem_size) in dataMem_lat_dict:
+      dataMem_size_max =  str(cfg.dataMem_size)
+else:
+      print("Warning: No values for core data memory size provided. Using values for 2048 instead.")
+
+instrnMem_size_max =  '65536'
+if str(cfg.instrnMem_size) in instrnMem_lat_dict:
+      instrnMem_size_max =  str(cfg.instrnMem_size)
+else:
+      print("Warning: No values for core instruction memory size provided. Using values for 2048 instead.")
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+#xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+#xbar_ip_lat = xbar_ip_lat
+xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+      for key, value in xbar_ip_lat_dict.items():
+            xbar_ip_lat_dict[key] = xbar_ip_lat
+else:
+      xbar_ip_lat_dict = digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+xbar_op_lat = xbar_op_lat
+xbar_rd_lat = xbar_rd_lat
+xbar_wr_lat = xbar_wr_lat
+dac_lat = dac_lat_dict [str(cfg.dac_res)]
+#FIXME need to review it I can remove adc_lat property
+adc_lat = adc_lat_dict [str(cfg.adc_res)]
+xbar_inMem_lat = xbar_inMem_lat_dict[str(cfg.xbar_size)]
+xbar_outMem_lat = xbar_outMem_lat_dict[str(cfg.xbar_size)]
+instrnMem_lat =  instrnMem_lat_dict[str(instrnMem_size_max)]
+dataMem_lat =  dataMem_lat_dict[str(dataMem_size_max)]
+
+# Chosen area based on config file - only for components whose latency is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+else:
+        xbar_area = digi_param.Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+dac_area = dac_area_dict [str(cfg.dac_res)]
+adc_area = adc_area_dict [str(cfg.adc_res)]
+xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
+xbar_outMem_area = xbar_outMem_area_dict[str(cfg.xbar_size)]
+instrnMem_area =  instrnMem_area_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_area =  dataMem_area_dict[str(dataMem_size_max)]
+
+# Chosen dyn_power based on config file - only for components whose latency is parameter dependent
+#xbar_pow_dyn = xbar_pow_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+xbar_ip_pow_dyn = xbar_ip_pow
+xbar_op_pow_dyn = xbar_op_pow
+xbar_rd_pow_dyn = xbar_rd_pow
+xbar_wr_pow_dyn = xbar_wr_pow
+dac_pow_dyn = dac_pow_dyn_dict [str(cfg.dac_res)]
+adc_pow_dyn = adc_pow_dyn_dict [str(cfg.adc_res)]
+xbar_inMem_pow_dyn_read = xbar_inMem_pow_dyn_read_dict[str(cfg.xbar_size)]
+xbar_inMem_pow_dyn_write = xbar_inMem_pow_dyn_write_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_dyn = xbar_outMem_pow_dyn_dict[str(cfg.xbar_size)]
+instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(dataMem_size_max)]
+
+# Energy
+xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+        for key,value in xbar_ip_energy_dict.items():
+                xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
+else:
+        xbar_ip_energy_dict = digi_param.Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print('xbar_ip_energy_dict', xbar_ip_energy_dict)
+
+# Chosen leak_power based on config file - only for components whose latency is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_pow_leak = 0
+else:
+        xbar_pow_leak = digi_param.Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
+dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
+adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
+xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_leak = xbar_outMem_pow_leak_dict[str(cfg.xbar_size)]
+instrnMem_pow_leak =  instrnMem_pow_leak_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_leak =  dataMem_pow_leak_dict[str(dataMem_size_max)]
+
+# Core Control unit (control unit and pipeline registers)
+ccu_pow = 1.25*0.2 #0.2 for activvity
+ccu_area = 0.00145*2.25 #taken similar as edctrl (scaled by power)
+
+# Added here for simplicity now (***can need modification later***)
+# The latency of mem access is dependent on when can the ima find edram bys non-busy
+memInterface_lat = infinity # infinite latency
+
+#############################################################################################################
+# Tile Hierarchy
+    # Number of IMAs
+    # EDRAM size
+    # Shared Bus width
+    # Instruction memory size
+    # Receive Buffer size
+#############################################################################################################
+
+# Tile component latency/pow/area
+# EDRAM value dictionary (counter storage is not coounted)
+edram_lat_dict = {'8'   : 2,
+                  '64'  : 2, #edram access width is constant = 256 bits
+                  '128' : 2}
+
+edram_pow_dyn_dict = {'8'   : 17.2/2,
+                      '64'  : 17.2/2, # (0.0172 nJ with 2 cycles access latency)
+                      '128' : 25.35/2}
+
+edram_pow_leak_dict = {'8'   : 0.46,
+                       '64'  : 0.46,
+                       '128' : 0.77}
+
+edram_area_dict = {'8'   : 0.086,
+                   '64'  : 0.086,
+                   '128' : 0.121}
+
+# Tile Instruction Memory value dictionary
+tile_instrnMem_lat_dict = {'512': 1,
+                          '1024': 1,
+                          '2048': 1}
+
+tile_instrnMem_pow_dyn_dict = {'512' : 0.46,
+                               '1024': 0.53,
+                               '2048': 0.65}
+
+tile_instrnMem_pow_leak_dict = {'512' : 0.078,
+                                '1024': 0.147,
+                                '2048': 0.33}
+
+
+tile_instrnMem_area_dict = {'512' : 0.00108,
+                            '1024': 0.00192,
+                            '2048': 0.0041}
+
+
+edram_size_max =  '128'
+if str(cfg.edram_size) in edram_lat_dict:
+      edram_size_max =  str(cfg.edram_size)
+else:
+      print("Warning: No values for edram memory size provided. Using values for 128 instead.")
+
+tile_instrnMem_size_max =  '2048'
+if str(cfg.tile_instrnMem_size) in tile_instrnMem_lat_dict:
+      tile_instrnMem_size_max =  str(cfg.tile_instrnMem_size)
+else:
+      print("Warning: No values for tile instrn memory size provided. Using values for 2048 instead.")
+
+# counter storage (2048 Byte Scratch RAM - 1 counter entry shared by 256 bits of data (16 neurons))
+# area scaling (X8)
+counter_buff_lat = 1 * math.sqrt(8)
+counter_buff_pow_dyn = 0.65/2 * math.sqrt(8)
+counter_buff_pow_leak = 0.33/2 * math.sqrt(8)
+counter_buff_area = 0.0041 * math.sqrt(8)
+
+# EDRAM to IMA bus values
+edram_bus_lat = 1
+edram_bus_pow_dyn = 6/2 #bus width = 384, same as issac (over two cycles)
+edram_bus_pow_leak = 1/2 #bus width = 384, same as issac
+edram_bus_area = 0.090
+
+# EDRAM controller values
+edram_ctrl_lat = 1
+edram_ctrl_pow_dyn = 0.475
+edram_ctrl_pow_leak = 0.05
+edram_ctrl_area = 0.00145
+
+# Receive buffer value dictionary - 16 entries (Need to make this a dictionary)
+# Increasing to 64 entries
+receive_buffer_lat = 1 * math.sqrt(4)
+receive_buffer_pow_dyn = 4.48 * math.sqrt(4) # (0.2*256/16)
+receive_buffer_pow_leak = 0.09 * math.sqrt(4)
+receive_buffer_area = 0.0022 *math.sqrt(4)
+
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+edram_lat = edram_lat_dict[str(edram_size_max)]
+tile_instrnMem_lat = tile_instrnMem_lat_dict[str(tile_instrnMem_size_max)]
+
+# Chosen area based on config file - only for components whose area is parameter dependent
+edram_area = edram_area_dict[str(edram_size_max)]
+tile_instrnMem_area = tile_instrnMem_area_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen dynamic power based on config file - only for components whose dynamic power is parameter dependent
+edram_pow_dyn = edram_pow_dyn_dict[str(edram_size_max)]
+tile_instrnMem_pow_dyn = tile_instrnMem_pow_dyn_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen leakage power based on config file - only for components whose leakage power is parameter dependent
+edram_pow_leak = edram_pow_leak_dict[str(edram_size_max)]
+tile_instrnMem_pow_leak = tile_instrnMem_pow_leak_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Tile Control unit
+tcu_pow = 0.25*0.2
+tcu_area = 0.00145 #taken similar as edctrl
+
+#############################################################################################################
+# Node Hierarchy
+    # Number of Tiles
+    # NOC - Topology (Currently assumes a cmesh (c=4, same as ISSAC))
+        # n = number of dimension\
+        # k = number of tiles in each dimension
+        # c = concentartion (tiles/router)
+        # average injection rate (0.25 - a tile injects a new packet for each destination in every four cycles)
+#############################################################################################################
+
+# NOC latency dictionary (in terms of flit cycle)
+# Note - if inj_rate (packet injection -1 packet - 16 neurons) exceeds 0.025 - there's a problem, NoC needs to be redesigned else network latency will be killing!
+# Hence, not provided for
+noc_inj_rate_max = 0.025
+noc_lat_dict = {'0.001': 29,
+                '0.005': 31,
+                '0.01' : 34,
+                '0.02' : 54,
+                '0.025': 115}
+
+noc_area_dict = {'4': 0.047,
+                 '8': 0.116}
+
+# Router dynamic power - NOC will be used only if atleast one of send_queue in node is non_empty
+noc_pow_dyn_dict = {'4': 16.13,
+                      '8': 51.48}
+
+# Router leakage power - NOC will be used only if atleast oen of send_queue in node is non_empty
+noc_pow_leak_dict = {'4': 0.41,
+                       '8': 1.04}
+
+# Enter component latency (Based on teh above NOC topological parameters)
+# Inter-node Noc (router & channel)
+assert (cfg.noc_inj_rate <= noc_inj_rate_max), 'Oops: reconsider NOC design and or DNN mapping, with this inj_rate, NOC data transfer throughput \
+will be terrible!'
+
+noc_intra_lat = noc_lat_dict[str(cfg.noc_inj_rate)]
+noc_intra_pow_dyn = noc_pow_dyn_dict[str(cfg.noc_num_port)] # per router
+noc_intra_pow_leak = noc_pow_leak_dict[str(cfg.noc_num_port)]# per router
+noc_intra_area = noc_area_dict[str(cfg.noc_num_port)] # per router
+
+# Hypertransport network (HT)
+# Note HT is external to a node, but we consider all tiles in one
+# virtual node itself for simplicity
+# HT numbers from ISAAC = 6.4GB/s = 6.4B/ ns = 1packet(16*2 Bytes) = 5ns
+ht_lat = 5 #latency per packet
+noc_inter_lat = ht_lat + noc_intra_lat #navigate to the node, then to tile within node
+noc_inter_pow_dyn = 10400 #10.4W
+noc_inter_pow_leak = 0
+noc_inter_area = 22.88
+
diff --git a/include/example-constants/constants-32.py b/include/example-constants/constants-32.py
new file mode 100644
index 00000000..b9158e5a
--- /dev/null
+++ b/include/example-constants/constants-32.py
@@ -0,0 +1,606 @@
+## This file contains the data structures used in differnet hierarchies.
+## It also holds power, area and latency numbers of different component used in DPE design
+import config as cfg
+import math
+import constants_digital as digi_param
+# Limits the number of cycles an IMA runs in case it doesn't halt
+infinity = 100000
+
+#############################################################################################################
+## Technology/Other constants for all the modules
+#############################################################################################################
+# IMA - folliwng parameters are not used currently, will be used when analog functionality is implemented
+cycle_time = 1 # in nanoseconds (1ns)
+vdd = 0.9
+xbar_out_min = -10e-10
+xbar_out_max = 1 # think about this - ???
+
+#############################################################################################################
+## Define commonly used data structures
+#############################################################################################################
+# List of supported opcodes for tile
+op_list_tile = ['send', 'receive', 'compute', 'halt']
+
+# Instruction format for Tile
+dummy_instrn_tile = {'opcode' : op_list_tile[0],
+                     'mem_addr': 0,     # send/receive - edram_addr
+                     'r1': 0,     # send-send_width, receive-receive_width
+                     'r2': 0,     # send-target_addr, receive-counter
+                     'vtile_id': 0, # send/receive-neuron_id
+                     'ima_nma': '',      # compute - a bit for each ima
+                     'vec': 0} # vector width
+
+# List of supported opcodes/aluops for IMA - cp will copy data (from data memory of ima to xbarInmem)
+op_list = ['ld', 'cp', 'st', 'set', 'nop', 'alu', 'alui', 'mvm', 'vvo', 'hlt', 'jmp', 'beq', 'alu_int', 'crs']
+aluop_list = ['add', 'sub', 'sna', 'mul', 'sigmoid'] # sna is also used by mvm isntruction
+
+# Instruction format for IMA
+dummy_instrn = {'opcode' : op_list[0],      # instrn op
+               'aluop'  : aluop_list[0],   # alu function
+               'd1'     : 0,               # destination
+               'r1'     : 0,               # operand1 (stride for mvm)
+               'r2'     : 0,               # operand2
+               'r3'     : 0,               # operand3 (shift)
+               'vec'    : 0,               # vector width
+               'imm'    : 0,               # immediate (scalar) data
+               'xb_nma' : 0 }              # xbar negative-mask, a xbar evaluates if neg-mask = 1
+
+# List of pipeline stages - in order for IMA
+stage_list = ['fet', 'dec', 'ex']
+last_stage = 'ex'
+
+#############################################################################################################
+# IMA Hierarchy parameters
+    # Number of Xbars
+    # Crossbar Size
+    # Crossbar bits
+    # Bit resolution of ADCs and DACs
+    # Number of ADCs
+    # Number of ALUs
+    # Data memory size
+    # Size of Xbar in/out memory (Register) is dependent on Xbar size and num_bits
+    # Instruction memory size
+#############################################################################################################
+
+# IMA component latency/power/area dictionary (all values in ns, mw, mm2)
+# XBAR - Models from ISAAC paper
+xbar_lat_dict = {'2': {'16' : 16,
+		       '32' : 32,   # first indexed by xbar_bits then by xbar_size
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '4': {'16' : 16,
+		       '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '6': {'16' : 16,
+		       '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256}}
+
+xbar_pow_dict = {'2': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '4': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '6': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2}}
+
+xbar_area_dict = {'2': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '4': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '6': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)}}
+
+## New values added for xbar MVM/MTVM, OP (parallel write), serial read/write
+# the following is lumped power for xbar inner/outer-product - includes peripherals
+xbar_op_lat = 20.0*12.8 # with 4 VFUs
+xbar_op_pow = 4.44 * 3.27 / (12.8)
+
+#hardcoded value
+#xbar_ip_lat = 100.0
+#value depending on xb size
+xbar_ip_lat = xbar_lat_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+#xbar_ip_pow = (1.37*2.0) # xbar_ip_pow (includes all mvmu)
+#xbar_ip_pow = (1.37*2.0) - 1.04 if cfg.training else 1.37-1.04 # xbar_ip_pow (includes all mvmu except ADC - uncomment num_access for ADC object), 
+
+#xbar inner product power dependence on xbar size
+xbar_ip_pow = xbar_pow_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+# Note the read and write lat/pow are for entire xbar
+xbar_rd_lat = 328.0 * 1000 * (1/32.0)
+xbar_wr_lat = 351.0 * 1000 * (1/32.0)
+
+# the following is lumped power for xbar rd/wr (for whole array) - includes peripherals
+xbar_rd_pow = 208.0 * 1000 * (1/32.0) / xbar_rd_lat
+xbar_wr_pow = 676.0 * 1000 * (1/32.0) / xbar_rd_lat
+
+# DAC - Discuss exact values with ISSAC authors
+dac_lat_dict = {'1' : 1,
+                '2' : 1,
+                '4' : 1,
+                '8' : 1,
+                '16': 1}
+
+dac_pow_dyn_dict = {'1' : 0.00350625,
+                    '2' : 0.00350625,
+                    '4' : 0.00350625,
+                    '8' : 0.00350625,
+                    '16': 0.00350625}
+
+dac_pow_leak_dict = {'1' : 0.000390625,
+                     '2' : 0.000390625,
+                     '4' : 0.000390625,
+                     '8' : 0.000390625,
+                     '16': 0.000390625}
+
+dac_area_dict = {'1' : 1.67 * 10**(-7),
+                 '2' : 1.67 * 10**(-7),
+                 '4' : 1.67 * 10**(-7),
+                 '8' : 1.67 * 10**(-7),
+                 '16': 1.67 * 10**(-7)}
+
+# ADC - Discuss exact values with ISSAC authors
+# ADC Values for including sparsity
+adc_lat_dict = {'1' : 12.5,
+                '2' : 25,
+                '3' : 37.5,
+                '4' : 50,
+                '5' : 62.5,
+                '6' : 75,
+                '7' : 87.5,
+                '8' : 100,
+		'9' : 112.5,
+                '16': 200}
+
+adc_pow_dyn_dict = {'1' : 0.225,
+                    '2' : 0.45,
+                    '3' : 0.675,
+                    '4' : 0.9,
+                    '5' : 1.125,
+                    '6' : 1.35,
+                    '7' : 1.575,
+                    '8' : 1.8,
+		    '9' : 2.025,
+                    '16': 3.6}
+
+adc_pow_leak_dict = {'1' : 0.025,
+                     '2' : 0.05,
+                     '3' : 0.075,
+                     '4' : 0.1,
+                     '5' : 0.125,
+                     '6' : 0.15,
+                     '7' : 0.175,
+                     '8' : 0.2,
+		     '9' : 0.225,
+                     '16': 0.4}
+
+adc_area_dict = {'1' : 0.0012,
+                 '2' : 0.0012,
+                 '3' : 0.0012,
+                 '4' : 0.0012,
+                 '5' : 0.00075,
+                 '6' : 0.0009,
+                 '7' : 0.00105,
+                 '8' : 0.0012,
+		 '9' : 0.0012,
+                 '16': 0.0012}
+
+# SNH (MVM pipeline)
+snh_lat = 1
+snh_pow_leak = 9.7 * 10**(-7)
+snh_pow_dyn = 9.7 * 10**(-6) - snh_pow_leak
+snh_area = 0.00004 / 8 / 128
+
+# SNA (MVM pipeline)
+sna_lat = 1
+sna_pow_leak = 0.005
+sna_pow_dyn = 0.05 - sna_pow_leak
+sna_area = 0.00006
+
+# ALU (Part of Vector Functional Unit)
+alu_lat = 1
+alu_pow_dyn = 2.4 * 32/45
+alu_pow_div_dyn = 1.52 * 32/45
+alu_pow_mul_dyn = 0.795 * 32/45
+alu_pow_others_dyn = 0.373 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+alu_pow_leak = 0.27 * 32/45
+alu_area = 0.00567 * 32/45
+
+# witout considering division
+#alu_lat = 1
+#alu_pow_dyn = 1.15 * 32/45
+#alu_pow_mul_dyn = 0.796 * 32/45
+#alu_pow_others_dyn = 0.36 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+#alu_pow_leak = 0.05 * 32/45
+#alu_area = 0.002326 * 32/45
+
+# Sigmoid/Tanh (Part of Vector Functional Unit) - Taken from ISAAC paper
+act_lat = 1 # added for 4 exponential units
+act_pow_leak = 0.026
+act_pow_dyn = 0.26 - act_pow_leak
+act_area = 0.0003 # check this ???
+
+# Multiplexer - These should be analog muxes
+mux_lat = 0
+mux_pow_leak = 0
+mux_pow_dyn = 0
+mux_area = 0
+
+# Data Memory value dictionary
+dataMem_lat_dict = {'256' : 1,
+                    '512' : 1,
+                    '1024': 1,
+                    '2048': 1,
+                    '4096':1,
+                    '16384':1}
+
+dataMem_pow_dyn_dict = {'256' : 0.16,
+                        '512' : 0.24,
+                        '1024': 0.33,
+                        '2048': 0.57,
+                        '4096': 0.74,
+                        '16384':1.6}
+
+dataMem_pow_leak_dict = {'256' : 0.044,
+                         '512' : 0.078,
+                         '1024': 0.147,
+                         '2048': 0.33,
+                         '4096': 0.489,
+                         '16384':1.28}
+
+dataMem_area_dict = {'256' : 0.00056,
+                     '512' : 0.00108,
+                     '1024': 0.00192,
+                     '2048': 0.00392,
+                     '4096': 0.020691,
+                     '16384':0.0666}
+
+# Instruction Memory value dictionary
+instrnMem_lat_dict = {'512' : 1,
+                      '1024': 1,
+                      '2048': 1,
+                      '4096':1,
+                      '16384':1}
+
+instrnMem_pow_dyn_dict = {'512' : 0.46,
+                          '1024': 0.53,
+                          '2048': 0.65,
+                          '4096':0.74,
+                          '16384':1.6}
+
+instrnMem_pow_leak_dict = {'512' : 0.078,
+                           '1024': 0.147,
+                           '2048': 0.33,
+                           '4096':0.489,
+                           '16384':1.28}
+
+
+instrnMem_area_dict = {'512' : 0.00108,
+                       '1024': 0.00192,
+                       '2048': 0.0041,
+                       '4096':0.020691,
+                       '16384':0.0666}
+
+
+# Xbar_inMem value dictionary (1 access means reading (dac_res) bits for each xbar row)
+# for computing average power of ima - scale dyn_pow down by xbar_size
+xbar_inMem_lat_dict = {'16'  : 1,
+		                   '32'  : 1, # indexed with xbar size
+                       '64'  : 1,
+                       '128' : 1,
+                       '256' : 1}
+
+xbar_inMem_pow_dyn_read_dict = {'16'  : 0.3, #doesn't change much as we move from 32 to 16, because these are very small memories
+		       		                  '32'  : 0.3,
+                                '64'  : 0.7,
+                                '128' : 1.7,
+                                '256' : 4.7}
+
+xbar_inMem_pow_dyn_write_dict = {'16'  : 0.1,
+		       		                   '32'  : 0.1,
+                                 '64'  : 0.1,
+                                 '128' : 0.16,
+                                 '256' : 0.2}
+
+xbar_inMem_pow_leak_dict = {'16'  : 0.009,
+		       	                '32'  : 0.009,
+                            '64'  : 0.02,
+                            '128' : 0.04,
+                            '256' : 0.075}
+
+xbar_inMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'  : 0.00033,
+                        '128' : 0.00078,
+                        '256' : 0.0019}
+
+# Xbar_outMem value dictionary
+xbar_outMem_lat_dict = {'16'  : 1,
+		       '32'  : 1, # indexed with xbar size
+                       '64'   : 1,
+                       '128'  : 1,
+                       '256'  : 1}
+
+xbar_outMem_pow_dyn_dict = {'16'  : 0.1,
+		       	    '32'  : 0.1,
+                           '64'   : 0.1,
+                           '128'  : 0.16,
+                           '256'  : 0.2}
+
+xbar_outMem_pow_leak_dict = {'16'  : 0.009,
+		       	    '32'  : 0.009,
+                            '64'   : 0.02,
+                            '128'  : 0.04,
+                            '256'  : 0.075}
+
+xbar_outMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'   : 0.00033,
+                        '128'  : 0.00078,
+                        '256'  : 0.0019}
+
+dataMem_size_max =  '16384'
+if str(cfg.dataMem_size) in dataMem_lat_dict:
+      dataMem_size_max =  str(cfg.dataMem_size)
+else:
+      print("Warning: No values for core data memory size provided. Using values for 2048 instead.")
+
+instrnMem_size_max =  '16384'
+if str(cfg.instrnMem_size) in instrnMem_lat_dict:
+      instrnMem_size_max =  str(cfg.instrnMem_size)
+else:
+      print("Warning: No values for core instruction memory size provided. Using values for 2048 instead.")
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+#xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+#xbar_ip_lat = xbar_ip_lat
+xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+      for key, value in xbar_ip_lat_dict.items():
+            xbar_ip_lat_dict[key] = xbar_ip_lat
+else:
+      xbar_ip_lat_dict = digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+xbar_op_lat = xbar_op_lat
+xbar_rd_lat = xbar_rd_lat
+xbar_wr_lat = xbar_wr_lat
+dac_lat = dac_lat_dict [str(cfg.dac_res)]
+#FIXME need to review it I can remove adc_lat property
+adc_lat = adc_lat_dict [str(cfg.adc_res)]
+xbar_inMem_lat = xbar_inMem_lat_dict[str(cfg.xbar_size)]
+xbar_outMem_lat = xbar_outMem_lat_dict[str(cfg.xbar_size)]
+instrnMem_lat =  instrnMem_lat_dict[str(instrnMem_size_max)]
+dataMem_lat =  dataMem_lat_dict[str(dataMem_size_max)]
+
+# Chosen area based on config file - only for components whose area is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+else:
+        xbar_area = digi_param.Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+dac_area = dac_area_dict [str(cfg.dac_res)]
+adc_area = adc_area_dict [str(cfg.adc_res)]
+xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
+xbar_outMem_area = xbar_outMem_area_dict[str(cfg.xbar_size)]
+instrnMem_area =  instrnMem_area_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_area =  dataMem_area_dict[str(dataMem_size_max)]
+
+# Chosen dyn_power based on config file - only for components whose latency is parameter dependent
+#xbar_pow_dyn = xbar_pow_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+xbar_ip_pow_dyn = xbar_ip_pow
+xbar_op_pow_dyn = xbar_op_pow
+xbar_rd_pow_dyn = xbar_rd_pow
+xbar_wr_pow_dyn = xbar_wr_pow
+dac_pow_dyn = dac_pow_dyn_dict [str(cfg.dac_res)]
+adc_pow_dyn = adc_pow_dyn_dict [str(cfg.adc_res)]
+xbar_inMem_pow_dyn_read = xbar_inMem_pow_dyn_read_dict[str(cfg.xbar_size)]
+xbar_inMem_pow_dyn_write = xbar_inMem_pow_dyn_write_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_dyn = xbar_outMem_pow_dyn_dict[str(cfg.xbar_size)]
+instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(dataMem_size_max)]
+
+# Energy
+xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+        for key,value in xbar_ip_energy_dict.items():
+                xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
+else:
+        xbar_ip_energy_dict = digi_param.Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print('xbar_ip_energy_dict', xbar_ip_energy_dict)
+
+# Chosen leak_power based on config file - only for components whose latency is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_pow_leak = 0
+else:
+        xbar_pow_leak = digi_param.Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
+dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
+adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
+xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_leak = xbar_outMem_pow_leak_dict[str(cfg.xbar_size)]
+instrnMem_pow_leak =  instrnMem_pow_leak_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_leak =  dataMem_pow_leak_dict[str(dataMem_size_max)]
+
+# Core Control unit (control unit and pipeline registers)
+ccu_pow = 1.25*0.2 #0.2 for activvity
+ccu_area = 0.00145*2.25 #taken similar as edctrl (scaled by power)
+
+# Added here for simplicity now (***can need modification later***)
+# The latency of mem access is dependent on when can the ima find edram bys non-busy
+memInterface_lat = infinity # infinite latency
+
+#############################################################################################################
+# Tile Hierarchy
+    # Number of IMAs
+    # EDRAM size
+    # Shared Bus width
+    # Instruction memory size
+    # Receive Buffer size
+#############################################################################################################
+
+# Tile component latency/pow/area
+# EDRAM value dictionary (counter storage is not coounted)
+edram_lat_dict = {'8'   : 2,
+                  '64'  : 2, #edram access width is constant = 256 bits
+                  '128' : 2}
+
+edram_pow_dyn_dict = {'8'   : 17.2/2,
+                      '64'  : 17.2/2, # (0.0172 nJ with 2 cycles access latency)
+                      '128' : 25.35/2}
+
+edram_pow_leak_dict = {'8'   : 0.46,
+                       '64'  : 0.46,
+                       '128' : 0.77}
+
+edram_area_dict = {'8'   : 0.086,
+                   '64'  : 0.086,
+                   '128' : 0.121}
+
+# Tile Instruction Memory value dictionary
+tile_instrnMem_lat_dict = {'512': 1,
+                          '1024': 1,
+                          '2048': 1}
+
+tile_instrnMem_pow_dyn_dict = {'512' : 0.46,
+                               '1024': 0.53,
+                               '2048': 0.65}
+
+tile_instrnMem_pow_leak_dict = {'512' : 0.078,
+                                '1024': 0.147,
+                                '2048': 0.33}
+
+
+tile_instrnMem_area_dict = {'512' : 0.00108,
+                            '1024': 0.00192,
+                            '2048': 0.0041}
+
+
+edram_size_max =  '128'
+if str(cfg.edram_size) in edram_lat_dict:
+      edram_size_max =  str(cfg.edram_size)
+else:
+      print("Warning: No values for edram memory size provided. Using values for 128 instead.")
+
+tile_instrnMem_size_max =  '2048'
+if str(cfg.tile_instrnMem_size) in tile_instrnMem_lat_dict:
+      tile_instrnMem_size_max =  str(cfg.tile_instrnMem_size)
+else:
+      print("Warning: No values for tile instrn memory size provided. Using values for 2048 instead.")
+
+# counter storage (2048 Byte Scratch RAM - 1 counter entry shared by 256 bits of data (16 neurons))
+# area scaling (X8)
+counter_buff_lat = 1 * math.sqrt(8)
+counter_buff_pow_dyn = 0.65/2 * math.sqrt(8)
+counter_buff_pow_leak = 0.33/2 * math.sqrt(8)
+counter_buff_area = 0.0041 * math.sqrt(8)
+
+# EDRAM to IMA bus values
+edram_bus_lat = 1
+edram_bus_pow_dyn = 6/2 #bus width = 384, same as issac (over two cycles)
+edram_bus_pow_leak = 1/2 #bus width = 384, same as issac
+edram_bus_area = 0.090
+
+# EDRAM controller values
+edram_ctrl_lat = 1
+edram_ctrl_pow_dyn = 0.475
+edram_ctrl_pow_leak = 0.05
+edram_ctrl_area = 0.00145
+
+# Receive buffer value dictionary - 16 entries (Need to make this a dictionary)
+# Increasing to 64 entries
+receive_buffer_lat = 1 * math.sqrt(4)
+receive_buffer_pow_dyn = 4.48 * math.sqrt(4) # (0.2*256/16)
+receive_buffer_pow_leak = 0.09 * math.sqrt(4)
+receive_buffer_area = 0.0022 *math.sqrt(4)
+
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+edram_lat = edram_lat_dict[str(edram_size_max)]
+tile_instrnMem_lat = tile_instrnMem_lat_dict[str(tile_instrnMem_size_max)]
+
+# Chosen area based on config file - only for components whose area is parameter dependent
+edram_area = edram_area_dict[str(edram_size_max)]
+tile_instrnMem_area = tile_instrnMem_area_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen dynamic power based on config file - only for components whose dynamic power is parameter dependent
+edram_pow_dyn = edram_pow_dyn_dict[str(edram_size_max)]
+tile_instrnMem_pow_dyn = tile_instrnMem_pow_dyn_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen leakage power based on config file - only for components whose leakage power is parameter dependent
+edram_pow_leak = edram_pow_leak_dict[str(edram_size_max)]
+tile_instrnMem_pow_leak = tile_instrnMem_pow_leak_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Tile Control unit
+tcu_pow = 0.25*0.2
+tcu_area = 0.00145 #taken similar as edctrl
+
+#############################################################################################################
+# Node Hierarchy
+    # Number of Tiles
+    # NOC - Topology (Currently assumes a cmesh (c=4, same as ISSAC))
+        # n = number of dimension\
+        # k = number of tiles in each dimension
+        # c = concentartion (tiles/router)
+        # average injection rate (0.25 - a tile injects a new packet for each destination in every four cycles)
+#############################################################################################################
+
+# NOC latency dictionary (in terms of flit cycle)
+# Note - if inj_rate (packet injection -1 packet - 16 neurons) exceeds 0.025 - there's a problem, NoC needs to be redesigned else network latency will be killing!
+# Hence, not provided for
+noc_inj_rate_max = 0.025
+noc_lat_dict = {'0.001': 29,
+                '0.005': 31,
+                '0.01' : 34,
+                '0.02' : 54,
+                '0.025': 115}
+
+noc_area_dict = {'4': 0.047,
+                 '8': 0.116}
+
+# Router dynamic power - NOC will be used only if atleast one of send_queue in node is non_empty
+noc_pow_dyn_dict = {'4': 16.13,
+                      '8': 51.48}
+
+# Router leakage power - NOC will be used only if atleast oen of send_queue in node is non_empty
+noc_pow_leak_dict = {'4': 0.41,
+                       '8': 1.04}
+
+# Enter component latency (Based on teh above NOC topological parameters)
+# Inter-node Noc (router & channel)
+assert (cfg.noc_inj_rate <= noc_inj_rate_max), 'Oops: reconsider NOC design and or DNN mapping, with this inj_rate, NOC data transfer throughput \
+will be terrible!'
+
+noc_intra_lat = noc_lat_dict[str(cfg.noc_inj_rate)]
+noc_intra_pow_dyn = noc_pow_dyn_dict[str(cfg.noc_num_port)] # per router
+noc_intra_pow_leak = noc_pow_leak_dict[str(cfg.noc_num_port)]# per router
+noc_intra_area = noc_area_dict[str(cfg.noc_num_port)] # per router
+
+# Hypertransport network (HT)
+# Note HT is external to a node, but we consider all tiles in one
+# virtual node itself for simplicity
+# HT numbers from ISAAC = 6.4GB/s = 6.4B/ ns = 1packet(16*2 Bytes) = 5ns
+ht_lat = 5 #latency per packet
+noc_inter_lat = ht_lat + noc_intra_lat #navigate to the node, then to tile within node
+noc_inter_pow_dyn = 10400 #10.4W
+noc_inter_pow_leak = 0
+noc_inter_area = 22.88
+
diff --git a/include/example-constants/constants-64.py b/include/example-constants/constants-64.py
new file mode 100644
index 00000000..ae589dfd
--- /dev/null
+++ b/include/example-constants/constants-64.py
@@ -0,0 +1,596 @@
+## This file contains the data structures used in differnet hierarchies.
+## It also holds power, area and latency numbers of different component used in DPE design
+import config as cfg
+import math
+import constants_digital as digi_param
+# Limits the number of cycles an IMA runs in case it doesn't halt
+infinity = 100000
+
+#############################################################################################################
+## Technology/Other constants for all the modules
+#############################################################################################################
+# IMA - folliwng parameters are not used currently, will be used when analog functionality is implemented
+cycle_time = 1 # in nanoseconds (1ns)
+vdd = 0.9
+xbar_out_min = -10e-10
+xbar_out_max = 1 # think about this - ???
+
+#############################################################################################################
+## Define commonly used data structures
+#############################################################################################################
+# List of supported opcodes for tile
+op_list_tile = ['send', 'receive', 'compute', 'halt']
+
+# Instruction format for Tile
+dummy_instrn_tile = {'opcode' : op_list_tile[0],
+                     'mem_addr': 0,     # send/receive - edram_addr
+                     'r1': 0,     # send-send_width, receive-receive_width
+                     'r2': 0,     # send-target_addr, receive-counter
+                     'vtile_id': 0, # send/receive-neuron_id
+                     'ima_nma': '',      # compute - a bit for each ima
+                     'vec': 0} # vector width
+
+# List of supported opcodes/aluops for IMA - cp will copy data (from data memory of ima to xbarInmem)
+op_list = ['ld', 'cp', 'st', 'set', 'nop', 'alu', 'alui', 'mvm', 'vvo', 'hlt', 'jmp', 'beq', 'alu_int', 'crs']
+aluop_list = ['add', 'sub', 'sna', 'mul', 'sigmoid'] # sna is also used by mvm isntruction
+
+# Instruction format for IMA
+dummy_instrn = {'opcode' : op_list[0],      # instrn op
+               'aluop'  : aluop_list[0],   # alu function
+               'd1'     : 0,               # destination
+               'r1'     : 0,               # operand1 (stride for mvm)
+               'r2'     : 0,               # operand2
+               'r3'     : 0,               # operand3 (shift)
+               'vec'    : 0,               # vector width
+               'imm'    : 0,               # immediate (scalar) data
+               'xb_nma' : 0 }              # xbar negative-mask, a xbar evaluates if neg-mask = 1
+
+# List of pipeline stages - in order for IMA
+stage_list = ['fet', 'dec', 'ex']
+last_stage = 'ex'
+
+#############################################################################################################
+# IMA Hierarchy parameters
+    # Number of Xbars
+    # Crossbar Size
+    # Crossbar bits
+    # Bit resolution of ADCs and DACs
+    # Number of ADCs
+    # Number of ALUs
+    # Data memory size
+    # Size of Xbar in/out memory (Register) is dependent on Xbar size and num_bits
+    # Instruction memory size
+#############################################################################################################
+
+# IMA component latency/power/area dictionary (all values in ns, mw, mm2)
+# XBAR - Models from ISAAC paper
+xbar_lat_dict = {'2': {'16' : 16,
+		       '32' : 32,   # first indexed by xbar_bits then by xbar_size
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '4': {'16' : 16,
+		       '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256},
+                 '6': {'16' : 16,
+		       '32' : 32,
+                       '64' : 64,
+                       '128': 128,
+                       '256': 256}}
+
+xbar_pow_dict = {'2': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '4': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2},
+                 '6': {'16' : 0.0046875,
+		       '32' : 0.01875,
+                       '64' : 0.075,
+                       '128': 0.3,
+                       '256': 1.2}}
+
+xbar_area_dict = {'2': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '4': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)},
+                  '6': {'16' : 3.90625 * 10**(-7),
+		       '32' : 1.5625 * 10**(-6),
+                       '64' : 6.25 * 10**(-6),
+                       '128': 2.5 * 10**(-5),
+                       '256': 1.0 * 10**(-4)}}
+
+## New values added for xbar MVM/MTVM, OP (parallel write), serial read/write
+# the following is lumped power for xbar inner/outer-product - includes peripherals
+xbar_op_lat = 20.0*12.8 # with 4 VFUs
+xbar_op_pow = 4.44 * 3.27 / (12.8)
+
+#hardcoded value
+#xbar_ip_lat = 100.0
+#value depending on xb size
+xbar_ip_lat = xbar_lat_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+#xbar_ip_pow = (1.37*2.0) # xbar_ip_pow (includes all mvmu)
+#xbar_ip_pow = (1.37*2.0) - 1.04 if cfg.training else 1.37-1.04 # xbar_ip_pow (includes all mvmu except ADC - uncomment num_access for ADC object), 
+#xbar inner product power dependence on xbar size
+xbar_ip_pow = xbar_pow_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+
+# Note the read and write lat/pow are for entire xbar
+xbar_rd_lat = 328.0 * 1000 * (1/32.0)
+xbar_wr_lat = 351.0 * 1000 * (1/32.0)
+
+# the following is lumped power for xbar rd/wr (for whole array) - includes peripherals
+xbar_rd_pow = 208.0 * 1000 * (1/32.0) / xbar_rd_lat
+xbar_wr_pow = 676.0 * 1000 * (1/32.0) / xbar_rd_lat
+
+# DAC - Discuss exact values with ISSAC authors
+dac_lat_dict = {'1' : 1,
+                '2' : 1,
+                '4' : 1,
+                '8' : 1,
+                '16': 1}
+
+dac_pow_dyn_dict = {'1' : 0.00350625,
+                    '2' : 0.00350625,
+                    '4' : 0.00350625,
+                    '8' : 0.00350625,
+                    '16': 0.00350625}
+
+dac_pow_leak_dict = {'1' : 0.000390625,
+                     '2' : 0.000390625,
+                     '4' : 0.000390625,
+                     '8' : 0.000390625,
+                     '16': 0.000390625}
+
+dac_area_dict = {'1' : 1.67 * 10**(-7),
+                 '2' : 1.67 * 10**(-7),
+                 '4' : 1.67 * 10**(-7),
+                 '8' : 1.67 * 10**(-7),
+                 '16': 1.67 * 10**(-7)}
+
+# ADC - Discuss exact values with ISSAC authors
+# ADC Values for including sparsity
+adc_lat_dict = {'1' : 12.5,
+                '2' : 25,
+                '3' : 37.5,
+                '4' : 50,
+                '5' : 62.5,
+                '6' : 75,
+                '7' : 87.5,
+                '8' : 100,
+		'9' : 112.5,
+                '16': 200}
+
+adc_pow_dyn_dict = {'1' : 0.225,
+                    '2' : 0.45,
+                    '3' : 0.675,
+                    '4' : 0.9,
+                    '5' : 1.125,
+                    '6' : 1.35,
+                    '7' : 1.575,
+                    '8' : 1.8,
+		    '9' : 2.025,
+                    '16': 3.6}
+
+adc_pow_leak_dict = {'1' : 0.025,
+                     '2' : 0.05,
+                     '3' : 0.075,
+                     '4' : 0.1,
+                     '5' : 0.125,
+                     '6' : 0.15,
+                     '7' : 0.175,
+                     '8' : 0.2,
+		     '9' : 0.225,
+                     '16': 0.4}
+
+adc_area_dict = {'1' : 0.0012,
+                 '2' : 0.0012,
+                 '3' : 0.0012,
+                 '4' : 0.0012,
+                 '5' : 0.00075,
+                 '6' : 0.0009,
+                 '7' : 0.00105,
+                 '8' : 0.0012,
+		 '9' : 0.0012,
+                 '16': 0.0012}
+
+# SNH (MVM pipeline)
+snh_lat = 1
+snh_pow_leak = 9.7 * 10**(-7)
+snh_pow_dyn = 9.7 * 10**(-6) - snh_pow_leak
+snh_area = 0.00004 / 8 / 128
+
+# SNA (MVM pipeline)
+sna_lat = 1
+sna_pow_leak = 0.005
+sna_pow_dyn = 0.05 - sna_pow_leak
+sna_area = 0.00006
+
+# ALU (Part of Vector Functional Unit)
+alu_lat = 1
+alu_pow_dyn = 2.4 * 32/45
+alu_pow_div_dyn = 1.52 * 32/45
+alu_pow_mul_dyn = 0.795 * 32/45
+alu_pow_others_dyn = 0.373 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+alu_pow_leak = 0.27 * 32/45
+alu_area = 0.00567 * 32/45
+
+# witout considering division
+#alu_lat = 1
+#alu_pow_dyn = 1.15 * 32/45
+#alu_pow_mul_dyn = 0.796 * 32/45
+#alu_pow_others_dyn = 0.36 * 32/45 # logical, eq, relu, add, sub, lsh, rsh
+#alu_pow_leak = 0.05 * 32/45
+#alu_area = 0.002326 * 32/45
+
+# Sigmoid/Tanh (Part of Vector Functional Unit) - Taken from ISAAC paper
+act_lat = 1 # added for 4 exponential units
+act_pow_leak = 0.026
+act_pow_dyn = 0.26 - act_pow_leak
+act_area = 0.0003 # check this ???
+
+# Multiplexer - These should be analog muxes
+mux_lat = 0
+mux_pow_leak = 0
+mux_pow_dyn = 0
+mux_area = 0
+
+# Data Memory value dictionary
+dataMem_lat_dict = {'256' : 1,
+                    '512' : 1,
+                    '1024': 1,
+                    '2048': 1,
+                    '4096':1}
+
+dataMem_pow_dyn_dict = {'256' : 0.16,
+                        '512' : 0.24,
+                        '1024': 0.33,
+                        '2048': 0.57,
+                        '4096': 0.74}
+
+dataMem_pow_leak_dict = {'256' : 0.044,
+                         '512' : 0.078,
+                         '1024': 0.147,
+                         '2048': 0.33,
+                         '4096': 0.489}
+
+dataMem_area_dict = {'256' : 0.00056,
+                     '512' : 0.00108,
+                     '1024': 0.00192,
+                     '2048': 0.00392,
+                     '4096': 0.020691}
+
+# Instruction Memory value dictionary
+instrnMem_lat_dict = {'512' : 1,
+                      '1024': 1,
+                      '2048': 1,
+                      '4096':1}
+
+instrnMem_pow_dyn_dict = {'512' : 0.46,
+                          '1024': 0.53,
+                          '2048': 0.65,
+                          '4096':0.74}
+
+instrnMem_pow_leak_dict = {'512' : 0.078,
+                           '1024': 0.147,
+                           '2048': 0.33,
+                           '4096':0.489}
+
+
+instrnMem_area_dict = {'512' : 0.00108,
+                       '1024': 0.00192,
+                       '2048': 0.0041,
+                       '4096':0.020691}
+
+
+# Xbar_inMem value dictionary (1 access means reading (dac_res) bits for each xbar row)
+# for computing average power of ima - scale dyn_pow down by xbar_size
+xbar_inMem_lat_dict = {'16'  : 1,
+		                   '32'  : 1, # indexed with xbar size
+                       '64'  : 1,
+                       '128' : 1,
+                       '256' : 1}
+
+xbar_inMem_pow_dyn_read_dict = {'16'  : 0.3, #doesn't change much as we move from 32 to 16, because these are very small memories
+		       		                  '32'  : 0.3,
+                                '64'  : 0.7,
+                                '128' : 1.7,
+                                '256' : 4.7}
+
+xbar_inMem_pow_dyn_write_dict = {'16'  : 0.1,
+		       		                   '32'  : 0.1,
+                                 '64'  : 0.1,
+                                 '128' : 0.16,
+                                 '256' : 0.2}
+
+xbar_inMem_pow_leak_dict = {'16'  : 0.009,
+		       	                '32'  : 0.009,
+                            '64'  : 0.02,
+                            '128' : 0.04,
+                            '256' : 0.075}
+
+xbar_inMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'  : 0.00033,
+                        '128' : 0.00078,
+                        '256' : 0.0019}
+
+# Xbar_outMem value dictionary
+xbar_outMem_lat_dict = {'16'  : 1,
+		       '32'  : 1, # indexed with xbar size
+                       '64'   : 1,
+                       '128'  : 1,
+                       '256'  : 1}
+
+xbar_outMem_pow_dyn_dict = {'16'  : 0.1,
+		       	    '32'  : 0.1,
+                           '64'   : 0.1,
+                           '128'  : 0.16,
+                           '256'  : 0.2}
+
+xbar_outMem_pow_leak_dict = {'16'  : 0.009,
+		       	    '32'  : 0.009,
+                            '64'   : 0.02,
+                            '128'  : 0.04,
+                            '256'  : 0.075}
+
+xbar_outMem_area_dict = {'16'  : 0.00015,
+		       	'32'  : 0.00015,
+                        '64'   : 0.00033,
+                        '128'  : 0.00078,
+                        '256'  : 0.0019}
+
+dataMem_size_max =  '4096'
+if str(cfg.dataMem_size) in dataMem_lat_dict:
+      dataMem_size_max =  str(cfg.dataMem_size)
+else:
+      print("Warning: No values for core data memory size provided. Using values for 2048 instead.")
+
+instrnMem_size_max =  '4096'
+if str(cfg.instrnMem_size) in instrnMem_lat_dict:
+      instrnMem_size_max =  str(cfg.instrnMem_size)
+else:
+      print("Warning: No values for core instruction memory size provided. Using values for 2048 instead.")
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+#xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+      for key, value in xbar_ip_lat_dict.items():
+            xbar_ip_lat_dict[key] = xbar_ip_lat
+else:
+      xbar_ip_lat_dict = digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+xbar_op_lat = xbar_op_lat
+xbar_rd_lat = xbar_rd_lat
+xbar_wr_lat = xbar_wr_lat
+dac_lat = dac_lat_dict [str(cfg.dac_res)]
+#FIXME need to review it I can remove adc_lat property
+adc_lat = adc_lat_dict [str(cfg.adc_res)]
+xbar_inMem_lat = xbar_inMem_lat_dict[str(cfg.xbar_size)]
+xbar_outMem_lat = xbar_outMem_lat_dict[str(cfg.xbar_size)]
+instrnMem_lat =  instrnMem_lat_dict[str(instrnMem_size_max)]
+dataMem_lat =  dataMem_lat_dict[str(dataMem_size_max)]
+
+# Chosen area based on config file - only for components whose area is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+else:
+        xbar_area = digi_param.Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+dac_area = dac_area_dict [str(cfg.dac_res)]
+adc_area = adc_area_dict [str(cfg.adc_res)]
+xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
+xbar_outMem_area = xbar_outMem_area_dict[str(cfg.xbar_size)]
+instrnMem_area =  instrnMem_area_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_area =  dataMem_area_dict[str(dataMem_size_max)]
+
+# Chosen dyn_power based on config file - only for components whose latency is parameter dependent
+#xbar_pow_dyn = xbar_pow_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+xbar_ip_pow_dyn = xbar_ip_pow
+xbar_op_pow_dyn = xbar_op_pow
+xbar_rd_pow_dyn = xbar_rd_pow
+xbar_wr_pow_dyn = xbar_wr_pow
+dac_pow_dyn = dac_pow_dyn_dict [str(cfg.dac_res)]
+adc_pow_dyn = adc_pow_dyn_dict [str(cfg.adc_res)]
+xbar_inMem_pow_dyn_read = xbar_inMem_pow_dyn_read_dict[str(cfg.xbar_size)]
+xbar_inMem_pow_dyn_write = xbar_inMem_pow_dyn_write_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_dyn = xbar_outMem_pow_dyn_dict[str(cfg.xbar_size)]
+instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(dataMem_size_max)]
+
+# Energy
+xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+        for key,value in xbar_ip_energy_dict.items():
+                xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
+else:
+        xbar_ip_energy_dict = digi_param.Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print('xbar_ip_energy_dict', xbar_ip_energy_dict)
+
+# Chosen leak_power based on config file - only for components whose latency is parameter dependent
+if cfg.MVMU_ver == "Analog":
+        xbar_pow_leak = 0
+else:
+        xbar_pow_leak = digi_param.Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
+dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
+adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
+xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]
+xbar_outMem_pow_leak = xbar_outMem_pow_leak_dict[str(cfg.xbar_size)]
+instrnMem_pow_leak =  instrnMem_pow_leak_dict[str(instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+dataMem_pow_leak =  dataMem_pow_leak_dict[str(dataMem_size_max)]
+
+# Core Control unit (control unit and pipeline registers)
+ccu_pow = 1.25*0.2 #0.2 for activvity
+ccu_area = 0.00145*2.25 #taken similar as edctrl (scaled by power)
+
+# Added here for simplicity now (***can need modification later***)
+# The latency of mem access is dependent on when can the ima find edram bys non-busy
+memInterface_lat = infinity # infinite latency
+
+#############################################################################################################
+# Tile Hierarchy
+    # Number of IMAs
+    # EDRAM size
+    # Shared Bus width
+    # Instruction memory size
+    # Receive Buffer size
+#############################################################################################################
+
+# Tile component latency/pow/area
+# EDRAM value dictionary (counter storage is not coounted)
+edram_lat_dict = {'8'   : 2,
+                  '64'  : 2, #edram access width is constant = 256 bits
+                  '128' : 2}
+
+edram_pow_dyn_dict = {'8'   : 17.2/2,
+                      '64'  : 17.2/2, # (0.0172 nJ with 2 cycles access latency)
+                      '128' : 25.35/2}
+
+edram_pow_leak_dict = {'8'   : 0.46,
+                       '64'  : 0.46,
+                       '128' : 0.77}
+
+edram_area_dict = {'8'   : 0.086,
+                   '64'  : 0.086,
+                   '128' : 0.121}
+
+# Tile Instruction Memory value dictionary
+tile_instrnMem_lat_dict = {'512': 1,
+                          '1024': 1,
+                          '2048': 1}
+
+tile_instrnMem_pow_dyn_dict = {'512' : 0.46,
+                               '1024': 0.53,
+                               '2048': 0.65}
+
+tile_instrnMem_pow_leak_dict = {'512' : 0.078,
+                                '1024': 0.147,
+                                '2048': 0.33}
+
+
+tile_instrnMem_area_dict = {'512' : 0.00108,
+                            '1024': 0.00192,
+                            '2048': 0.0041}
+
+
+edram_size_max =  '128'
+if str(cfg.edram_size) in edram_lat_dict:
+      edram_size_max =  str(cfg.edram_size)
+else:
+      print("Warning: No values for edram memory size provided. Using values for 128 instead.")
+
+tile_instrnMem_size_max =  '2048'
+if str(cfg.tile_instrnMem_size) in tile_instrnMem_lat_dict:
+      tile_instrnMem_size_max =  str(cfg.tile_instrnMem_size)
+else:
+      print("Warning: No values for tile instrn memory size provided. Using values for 2048 instead.")
+
+# counter storage (2048 Byte Scratch RAM - 1 counter entry shared by 256 bits of data (16 neurons))
+# area scaling (X8)
+counter_buff_lat = 1 * math.sqrt(8)
+counter_buff_pow_dyn = 0.65/2 * math.sqrt(8)
+counter_buff_pow_leak = 0.33/2 * math.sqrt(8)
+counter_buff_area = 0.0041 * math.sqrt(8)
+
+# EDRAM to IMA bus values
+edram_bus_lat = 1
+edram_bus_pow_dyn = 6/2 #bus width = 384, same as issac (over two cycles)
+edram_bus_pow_leak = 1/2 #bus width = 384, same as issac
+edram_bus_area = 0.090
+
+# EDRAM controller values
+edram_ctrl_lat = 1
+edram_ctrl_pow_dyn = 0.475
+edram_ctrl_pow_leak = 0.05
+edram_ctrl_area = 0.00145
+
+# Receive buffer value dictionary - 16 entries (Need to make this a dictionary)
+# Increasing to 64 entries
+receive_buffer_lat = 1 * math.sqrt(4)
+receive_buffer_pow_dyn = 4.48 * math.sqrt(4) # (0.2*256/16)
+receive_buffer_pow_leak = 0.09 * math.sqrt(4)
+receive_buffer_area = 0.0022 *math.sqrt(4)
+
+
+# Chosen latency based on config file - only for components whose latency is parameter dependent
+edram_lat = edram_lat_dict[str(edram_size_max)]
+tile_instrnMem_lat = tile_instrnMem_lat_dict[str(tile_instrnMem_size_max)]
+
+# Chosen area based on config file - only for components whose area is parameter dependent
+edram_area = edram_area_dict[str(edram_size_max)]
+tile_instrnMem_area = tile_instrnMem_area_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen dynamic power based on config file - only for components whose dynamic power is parameter dependent
+edram_pow_dyn = edram_pow_dyn_dict[str(edram_size_max)]
+tile_instrnMem_pow_dyn = tile_instrnMem_pow_dyn_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Chosen leakage power based on config file - only for components whose leakage power is parameter dependent
+edram_pow_leak = edram_pow_leak_dict[str(edram_size_max)]
+tile_instrnMem_pow_leak = tile_instrnMem_pow_leak_dict[str(tile_instrnMem_size_max)] * math.sqrt(8) #area scaling for 8 bytes per instruction
+
+# Tile Control unit
+tcu_pow = 0.25*0.2
+tcu_area = 0.00145 #taken similar as edctrl
+
+#############################################################################################################
+# Node Hierarchy
+    # Number of Tiles
+    # NOC - Topology (Currently assumes a cmesh (c=4, same as ISSAC))
+        # n = number of dimension\
+        # k = number of tiles in each dimension
+        # c = concentartion (tiles/router)
+        # average injection rate (0.25 - a tile injects a new packet for each destination in every four cycles)
+#############################################################################################################
+
+# NOC latency dictionary (in terms of flit cycle)
+# Note - if inj_rate (packet injection -1 packet - 16 neurons) exceeds 0.025 - there's a problem, NoC needs to be redesigned else network latency will be killing!
+# Hence, not provided for
+noc_inj_rate_max = 0.025
+noc_lat_dict = {'0.001': 29,
+                '0.005': 31,
+                '0.01' : 34,
+                '0.02' : 54,
+                '0.025': 115}
+
+noc_area_dict = {'4': 0.047,
+                 '8': 0.116}
+
+# Router dynamic power - NOC will be used only if atleast one of send_queue in node is non_empty
+noc_pow_dyn_dict = {'4': 16.13,
+                      '8': 51.48}
+
+# Router leakage power - NOC will be used only if atleast oen of send_queue in node is non_empty
+noc_pow_leak_dict = {'4': 0.41,
+                       '8': 1.04}
+
+# Enter component latency (Based on teh above NOC topological parameters)
+# Inter-node Noc (router & channel)
+assert (cfg.noc_inj_rate <= noc_inj_rate_max), 'Oops: reconsider NOC design and or DNN mapping, with this inj_rate, NOC data transfer throughput \
+will be terrible!'
+
+noc_intra_lat = noc_lat_dict[str(cfg.noc_inj_rate)]
+noc_intra_pow_dyn = noc_pow_dyn_dict[str(cfg.noc_num_port)] # per router
+noc_intra_pow_leak = noc_pow_leak_dict[str(cfg.noc_num_port)]# per router
+noc_intra_area = noc_area_dict[str(cfg.noc_num_port)] # per router
+
+# Hypertransport network (HT)
+# Note HT is external to a node, but we consider all tiles in one
+# virtual node itself for simplicity
+# HT numbers from ISAAC = 6.4GB/s = 6.4B/ ns = 1packet(16*2 Bytes) = 5ns
+ht_lat = 5 #latency per packet
+noc_inter_lat = ht_lat + noc_intra_lat #navigate to the node, then to tile within node
+noc_inter_pow_dyn = 10400 #10.4W
+noc_inter_pow_leak = 0
+noc_inter_area = 22.88
+
diff --git a/src/hw_stats.py b/src/hw_stats.py
index 6970de88..f9e54419 100644
--- a/src/hw_stats.py
+++ b/src/hw_stats.py
@@ -30,14 +30,14 @@
         'xbar_wr':param.xbar_wr_pow_dyn*param.xbar_wr_lat,
         'dac':param.dac_pow_dyn, 'snh':param.snh_pow_dyn, \
         'mux1':param.mux_pow_dyn, 'mux2':param.mux_pow_dyn, \
-        'adc':{ 'n' :       param.adc_pow_dyn_dict[str(cfg.adc_res)], \
-                'n/2':      param.adc_pow_dyn_dict[str(cfg.adc_res-1)], \
-                'n/4':     param.adc_pow_dyn_dict[str(cfg.adc_res-2)], \
-                'n/8':     param.adc_pow_dyn_dict[str(cfg.adc_res-3)], \
-                'n/16':   param.adc_pow_dyn_dict[str(cfg.adc_res-4)], \
-                'n/32':   param.adc_pow_dyn_dict[str(cfg.adc_res-5)], \
-                'n/64':   param.adc_pow_dyn_dict[str(cfg.adc_res-6)], \
-                'n/128': param.adc_pow_dyn_dict[str(cfg.adc_res-7)]}, \
+        'adc':{ 'n' :    param.adc_pow_dyn_dict[str(cfg.adc_res)]   if cfg.adc_res>0   else 0, \
+                'n/2':   param.adc_pow_dyn_dict[str(cfg.adc_res-1)] if cfg.adc_res-1>0 else 0, \
+                'n/4':   param.adc_pow_dyn_dict[str(cfg.adc_res-2)] if cfg.adc_res-2>0 else 0, \
+                'n/8':   param.adc_pow_dyn_dict[str(cfg.adc_res-3)] if cfg.adc_res-3>0 else 0, \
+                'n/16':  param.adc_pow_dyn_dict[str(cfg.adc_res-4)] if cfg.adc_res-4>0 else 0, \
+                'n/32':  param.adc_pow_dyn_dict[str(cfg.adc_res-5)] if cfg.adc_res-5>0 else 0, \
+                'n/64':  param.adc_pow_dyn_dict[str(cfg.adc_res-6)] if cfg.adc_res-6>0 else 0, \
+                'n/128': param.adc_pow_dyn_dict[str(cfg.adc_res-7)] if cfg.adc_res-7>0 else 0}, \
         'alu_div': param.alu_pow_div_dyn, 'alu_mul':param.alu_pow_mul_dyn, \
         'alu_act': param.act_pow_dyn, 'alu_other':param.alu_pow_others_dyn, \
         'alu_sna': param.sna_pow_dyn, \
@@ -105,7 +105,6 @@ def get_hw_stats (fid, node_dut, cycle):
 
         for j in range (cfg.num_ima):
             sum_num_cycle_ima += node_dut.tile_list[i].ima_list[j].cycle_count # used for leakage energy of imas
-
             mvmu_type = ['f', 'b', 'd']
             for k in range (cfg.num_matrix):
                 for mvmu_t in mvmu_type:
@@ -187,8 +186,8 @@ def get_hw_stats (fid, node_dut, cycle):
             hw_comp_access['dmem'] += node_dut.tile_list[i].ima_list[j].dataMem.num_access
 
     # Added for core and tile control units
-    hw_comp_access['core_control'] = sum_num_cycle_tile
-    hw_comp_access['tile_control'] = sum_num_cycle_ima
+    hw_comp_access['core_control'] = sum_num_cycle_ima
+    hw_comp_access['tile_control'] = sum_num_cycle_tile
 
     total_energy = 0
     total_adc_energy = 0
diff --git a/src/ima.py b/src/ima.py
index ea54481a..b3864426 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -502,7 +502,12 @@ def do_execute (self, ex_op, fid):
                 # check if data is a list
                 if (type(data) != list):
                     data = ['0'*cfg.data_width]*self.de_r2
+                elif (len(data)<self.de_r2):
+                    data = data + ['0'*cfg.data_width]*(self.de_r2-len(data))
+                    print('Warning: Number of elements in Ramload list less than expected')
+                #print ('de_r2:',self.de_r2)
                 for i in range (self.de_r2):
+                #for i in range(len(data)): 
                     dst_addr = data_addr + i
                     if (dst_addr >= datamem_off):
                         self.dataMem.write (dst_addr, data[i])
@@ -721,7 +726,8 @@ def outer_product (mat_id, key):
 
                 if (cfg.inference):
                    for i in xrange(cfg.num_matrix):
-                       if self.de_xb_nma[i]:
+                       if int(self.de_xb_nma[i]):
+                       #if self.de_xb_nma[i]:
                            print ("ima_id: " +str(self.ima_id) + " mat_id: "  +str(i) + " MVM")
                            inner_product(i,'f')
 
diff --git a/src/ima_modules.py b/src/ima_modules.py
index b0d5b2d0..f98e2afe 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -280,7 +280,7 @@ def propagate_dummy (self, inp, sparsity = 0):
         else:
             self.num_access['n/128'] += 1
             self.adc_res = cfg.adc_res-7
-        if(self.adc_res<0):
+        if(self.adc_res<=0):
             self.adc_res = 1
 
         return inp
@@ -740,4 +740,3 @@ def rdRequest (self, addr, rd_width):
 
         ## For DEBUG of IMA only
         #self.ramload = self.edram.memfile[addr]
-
diff --git a/src/tile.py b/src/tile.py
index a849dd35..b895feef 100644
--- a/src/tile.py
+++ b/src/tile.py
@@ -109,7 +109,7 @@ def tile_init (self, instrnpath, tracepath):
     def tile_compute (self, cycle):
         
         ## Simulate a cycle if IMA(s) that haven't halted
-        if (not all(self.halt_list)): # A tile halts whwn all IMAs (within the tile) halt
+        if (not all(self.halt_list)): # A tile halts when all IMAs (within the tile) halt
             for i in range (cfg.num_ima):
                 if ((not self.halt_list[i]) and self.ima_nma_list[i]):
                     self.ima_list[i].pipe_run (cycle, self.fid_list[i])
diff --git a/src/tile_modules.py b/src/tile_modules.py
index 417f95e2..55e9670d 100644
--- a/src/tile_modules.py
+++ b/src/tile_modules.py
@@ -92,8 +92,12 @@ def read (self, addr, width = 1): # read edram_buswidth/data_width of continuous
         # returns  a list of entries (list has one entry - Typical case)
         assert (width < cfg.edram_buswidth/cfg.data_width+1), \
                 'read edram width exceeds'
-        return self.memfile[(addr - self.addr_start) : \
-                (addr - self.addr_start + width)][:]
+        data = self.memfile[(addr - self.addr_start) : \
+				(addr - self.addr_start + width)][:]
+        assert (len(data) == width), 'data length not same as requested width'
+        return data
+#        return self.memfile[(addr - self.addr_start) : \
+#                (addr - self.addr_start + width)][:]
 
     # redefine the write assertion
     def write (self, addr, data, width = 1): # write (edram_buswidth/data_width) to continuous writes to edram
@@ -185,8 +189,10 @@ def propagate (self, ren_list, wen_list, rd_width_list, wr_width_list, ramstore_
                     self.counter[addr+i] = self.counter[addr+i] - 1
                     if (self.counter[addr+i] <= 0): #modified
                         self.valid[addr+i] = 0
-            # read the data and send to ima - if found is 0, ramload is junk
-            ramload = self.mem.read (addr, rd_width_list[idx])
+                # read the data and send to ima - if found is 0, ramload is junk
+                ramload = self.mem.read (addr, rd_width_list[idx])
+            else:
+                ramload = 0  #if found=0 implies set ramload as dummy 0 
             return [found, idx, ramload]
 
         else: # ST instruction
diff --git a/test/cnn/conv-layer-stride.cpp b/test/cnn/conv-layer-stride.cpp
index 1c13dee8..70a6e26e 100644
--- a/test/cnn/conv-layer-stride.cpp
+++ b/test/cnn/conv-layer-stride.cpp
@@ -19,14 +19,14 @@ int main(int argc, char** argv) {
 //    Model model = Model::create("conv3-layer");
 
     // Process parameter
-    unsigned int in_size_x ; 
-    unsigned int in_size_y ; 
-    unsigned int in_channels ;
-    unsigned int out_channels ;
-    unsigned int k_size_x ;
-    unsigned int k_size_y ;
-    unsigned int padding ;
-    unsigned int stride ;
+    unsigned int in_size_x=9 ; 
+    unsigned int in_size_y=9 ; 
+    unsigned int in_channels=128 ;
+    unsigned int out_channels=256 ;
+    unsigned int k_size_x=3 ;
+    unsigned int k_size_y=3 ;
+    unsigned int padding=1 ;
+    unsigned int stride=1 ;
 
     if(argc == 10) {
         in_size_x = atoi(argv[1]);
@@ -35,14 +35,14 @@ int main(int argc, char** argv) {
         out_channels = atoi(argv[4]);
         k_size_x = atoi(argv[5]);
         k_size_y = atoi(argv[6]);
-		padding = atoi(argv[7]);
-		stride = atoi(argv[8]);
+		    padding = atoi(argv[7]);
+		    stride = atoi(argv[8]);
     }    
     std:: string str=std::string("conv") + argv[9] + std::string("-layer");
     Model model = Model::create(str);
    
     // Input stream
-    auto in_stream = InputImagePixelStream::create(model, "in_stream", in_size_x, in_size_y, in_channels);
+    auto in_stream = InputImagePixelStream::create(model, "in_stream", in_size_x, in_size_y, in_channels, stride);
 
     // Output stream
     unsigned int out_size_x = (in_size_x - k_size_x + 2*padding)/stride + 1;