From 73c6adfc13797d80f381ca14b143175c640b6a5f Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Tue, 1 Oct 2019 16:24:07 -0400
Subject: [PATCH 01/15] Added a testbench for MVM test Addded an API for reding
 weights from weight files

---
 include/config.py   | 12 +++++--
 src/dnn_wt_p.py     | 63 ++++++++++++++++++++++++++++++++++
 test/mvm_ip_test.py | 83 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 src/dnn_wt_p.py
 create mode 100644 test/mvm_ip_test.py

diff --git a/include/config.py b/include/config.py
index fe652f4a..ebb4f323 100644
--- a/include/config.py
+++ b/include/config.py
@@ -2,9 +2,11 @@
 ## All user specified parameters are provided by this file only
 
 ## Debug - 0 (1): dpe simulation will (won't) produce ima/tile traces while simulating
-cycles_max = 5000000 # Put both these to very large numbers (when design is bug-free)!
+cycles_max = 500000 # Put both these to very large numbers (when design is bug-free)!
 debug = 1
 xbar_record = 1
+inference =1
+# training = 0
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
@@ -55,7 +57,11 @@
 instrnMem_size = 512 #in entries
 
 # This depends on above parameters
-datamem_off = xbar_size * (num_matrix*6) # each matrix has 6 memory spaces (1 for f/b, 2 for d)
+if(not inference):
+  datamem_off = xbar_size * (num_matrix*6) # each matrix has 6 memory spaces (1 for f/b, 2 for d)
+if(inference):
+  datamem_off = xbar_size * (num_matrix*2) # each matrix has 2 memory spaces (for f only)
+# datamem_off = xbar_size * (num_matrix*6) # each matrix has 6 memory spaces (1 for f/b, 2 for d)
 phy2log_ratio = num_bits / xbar_bits # ratio of physical to logical xbar #vaulue is 8
 lr = 0.25 # learning rate for updates to d-xbar
 
@@ -96,7 +102,7 @@
 # (b bit of address = logN, N is the number of nodes)
 
 # Change here - Specify the Node parameters here
-num_tile_compute = 23 # number of tiles mapped by dnn (leaving input and output tiles)
+num_tile_compute = 7 # number of tiles mapped by dnn (leaving input and output tiles)
 num_tile_max = 168.0 # maximum number of tiles per node
 num_inj_max = num_tile_max # [conservative] max number of packet injections that can occur in a cycle (each tile injects a packet into NOC each cycle)
 noc_inj_rate = 0.005
diff --git a/src/dnn_wt_p.py b/src/dnn_wt_p.py
new file mode 100644
index 00000000..046789e0
--- /dev/null
+++ b/src/dnn_wt_p.py
@@ -0,0 +1,63 @@
+from functools import partial
+from multiprocessing import Pool
+
+#****************************************************************************************
+# Designed by - Aayush Ankit
+#               School of Elctrical and Computer Engineering
+#               Nanoelectronics Research Laboratory
+#               Purdue University
+#               (aankit at purdue dot edu)
+#
+# DPEsim - Dot-Product Engine Simulator
+#
+# Input Tile (tile_id = 0) - has instructions to send input layer data to tiles
+#       -> Dump the SEND instructions correponding to input data in this tile
+#
+# Output Tile (tile_id = num_tile) - has instructions to receive output data from tiles
+#       -> Dump the data in EDRAM - that's your DNN output
+#
+# Other tiles (0 < tile_id < num_tile) - physical tiles used in computations
+#****************************************************************************************
+
+import time
+
+import sys
+import getopt
+import os
+import argparse
+
+root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+src_dir = os.path.join(root_dir, "src")
+include_dir = os.path.join(root_dir, "include")
+test_dir = os.path.join(root_dir, "test")
+
+sys.path.insert(0, include_dir)
+sys.path.insert(0, src_dir)
+sys.path.insert(0, root_dir)
+
+# Set the instruction & trace paths (create the folder hierarchy)
+# Assumption: All instructions for all TILEs and IMAs have already been generated
+from node_dump import *
+import numpy as np
+
+class dnn_wt:
+
+    def prog_dnn_wt(self, instrnpath, node_dut):  
+
+        ## Program DNN weights on the xbars
+        for i in range(1, cfg.num_tile):
+            print ('Programming weights of tile no: ', i)
+            for j in range(cfg.num_ima):
+                print ('Programming ima no: ', j)
+                for k in range(cfg.num_matrix):
+                    for l in range(cfg.phy2log_ratio):
+                        wt_filename = instrnpath + 'weights/tile' + str(i) + '/core'+str(j)+\
+                                '/mat'+str(k)+'-phy_xbar'+str(l)+'.npy'
+                        if (os.path.exists(wt_filename)):  # check if weights for the xbar exist
+                            print ('wtfile exits: ' + 'tile ' + str(i) +
+                                   'ima ' + str(j) + 'matrix ' + str(k) + 'xbar' + str(l))
+                            wt_temp = np.load(wt_filename)
+                            node_dut.tile_list[i].ima_list[j].matrix_list[k]['f'][l].program(wt_temp)
+                            node_dut.tile_list[i].ima_list[j].matrix_list[k]['b'][l].program(wt_temp)
+
+
diff --git a/test/mvm_ip_test.py b/test/mvm_ip_test.py
new file mode 100644
index 00000000..2e58c9c9
--- /dev/null
+++ b/test/mvm_ip_test.py
@@ -0,0 +1,83 @@
+# API for testing MVM inner product operation
+import sys
+import os
+import numpy as np
+
+root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, root_dir)
+
+from src.data_convert import *
+import src.ima as ima
+from src.instrn_proto import *
+import include.config as cfg
+
+#change the core and mvmu id'd here:
+# tile_ID = 2
+# core_ID = 1
+# matrix_ID = 0
+
+for tile_ID in range(2, cfg.num_tile):
+    for core_ID in range(cfg.num_ima):
+        for matrix_ID in range(cfg.num_matrix):
+
+            path = 'testasm/mlp/'
+            wt_path = path +'weights/tile'+ str(tile_ID)+ '/core'+ str(core_ID)+ '/' 
+            inst_file = path + 'tile'+ str(tile_ID)+ '/core_imem'+ str(core_ID)+ '.npy'
+            trace_path = 'traces/mlp/'
+            trace_file = trace_path + 'tile'+ str(tile_ID)+ '/ima_trace'+ str(core_ID)+ '.txt'
+            dump_file = trace_path + 'tile'+ str(tile_ID)+ '/memsim.txt'
+
+            datamem_off = cfg.datamem_off # each matrix has 6 memory spaces (1 for f/b, 2 for d)
+            phy2log_ratio = cfg.phy2log_ratio # ratio of physical to logical xbar
+            
+            if (os.path.exists(wt_path)):  # check if weights for the xbar exist
+                # print ('wtfile exits: ' + 'tile' + str(tile_ID) +' core ' + str(core_ID) + 'matrix ' + str(matrix_ID))
+            
+                xbar_input = ['']*cfg.xbar_size
+                xbar_output = ['']*cfg.xbar_size
+                with open(dump_file, 'r') as file:
+                    lines=file.readlines()
+
+                for i in range (len(lines)):
+                    if(lines[i] == 'Xbar Input Memory: imaId:'+ str(core_ID)+ ' matrixId:'+ str(matrix_ID)+ ' mvmu_type:f contents\n'):
+                        ip_start=i+1
+                    if(lines[i] == 'Xbar Output Memory: imaId:'+ str(core_ID)+ ' matrixId:'+ str(matrix_ID)+ ' mvmu_type:f contents\n'):
+                        op_start=i+1
+                        ip_end=i-1
+                    if(lines[i] == 'Xbar Input Memory: imaId:'+ str(core_ID)+ ' matrixId:'+ str(matrix_ID)+ ' mvmu_type:b contents\n'):
+                        op_end=i-1
+
+                # print(ip_start)
+                # print(ip_end)
+                # print(op_start)
+                # print(op_end)
+                # print('Length of input=',ip_end-ip_start+1 )
+                # print('Length of output=',op_end-op_start+1 )
+
+                for j in range (ip_end-ip_start+1):
+                    xbar_input[j] = float(lines[ip_start+j])
+                for j in range (op_end-op_start+1):
+                    xbar_output[j] = float(lines[op_start+j])
+
+                # print(xbar_input)
+                # print(xbar_output)
+
+                ## Testcases for Functionality Debug of MVM (1,2,3,4)
+                ## 1. compare golden output to ima output
+                wt_gold = np.load(wt_path+'log_xbar0.npy')
+                # print(wt_gold)
+                # out_gold = np.dot (ima.dataMem.memfile_float, wt_gold)
+                if(ip_end-ip_start+1 == 128):
+
+                    out_gold = np.dot (np.asarray(xbar_input), wt_gold)
+                    out_exp = np.asarray(xbar_output)
+
+                    # print (out_gold)
+                    # print (out_exp)
+
+                    err = np.tanh(out_gold) - np.tanh(out_exp)
+                    print ("error for tile"+ str(tile_ID) +" core" + str(core_ID) + " matrix" + str(matrix_ID)+ " has mean= " + str(np.average(err)) + " and stdev= " + \
+                            str(np.std(err)))
+                            
+                else:
+                    print("No or less than length 128 input available for tile"+ str(tile_ID) +" core" + str(core_ID) + " matrix" + str(matrix_ID)+".")

From 2014ee324f4e968c17e6453e81099600fd0f5da8 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Tue, 1 Oct 2019 16:41:47 -0400
Subject: [PATCH 02/15] Added modified node dump

---
 src/dpe.py          |  36 +++++-----
 src/ima.py          | 161 +++++++++++++++++++++++++++++++-------------
 src/ima_modules.py  |  10 +--
 src/instrn_proto.py |  19 ++++--
 src/node_dump.py    |  17 +++--
 src/record_xbar.py  |   8 ++-
 6 files changed, 172 insertions(+), 79 deletions(-)

diff --git a/src/dpe.py b/src/dpe.py
index f7009603..1145ee6c 100644
--- a/src/dpe.py
+++ b/src/dpe.py
@@ -55,6 +55,7 @@
 import ima_metrics
 import tile_metrics
 import node_metrics
+import dnn_wt_p
 
 compiler_path = os.path.join(root_dir, "test/testasm/")
 trace_path = os.path.join(root_dir, "test/traces/")
@@ -110,21 +111,23 @@ def run(self, net):
             node_dut.tile_list[inp_tileId].edram_controller.valid[i] = int(
                 inp['valid'][i])
 
-        ## Program DNN weights on the xbars
-        for i in range(1, cfg.num_tile):
-            print ('Programming weights of tile no: ', i)
-            for j in range(cfg.num_ima):
-                print ('Programming ima no: ', j)
-                for k in range(cfg.num_matrix):
-                    for l in range(cfg.phy2log_ratio):
-                        wt_filename = self.instrnpath + 'weights/tile' + str(i) + '/core'+str(j)+\
-                                '/mat'+str(k)+'-phy_xbar'+str(l)+'.npy'
-                        if (os.path.exists(wt_filename)):  # check if weights for the xbar exist
-                            print ('wtfile exits: ' + 'tile ' + str(i) +
-                                   'ima ' + str(j) + 'matrix ' + str(k) + 'xbar' + str(l))
-                            wt_temp = np.load(wt_filename)
-                            node_dut.tile_list[i].ima_list[j].matrix_list[k]['f'][l].program(wt_temp)
-                            node_dut.tile_list[i].ima_list[j].matrix_list[k]['b'][l].program(wt_temp)
+        dnn_wt_p.dnn_wt().prog_dnn_wt(self.instrnpath, node_dut)
+
+        # ## Program DNN weights on the xbars
+        # for i in range(1, cfg.num_tile):
+        #     print ('Programming weights of tile no: ', i)
+        #     for j in range(cfg.num_ima):
+        #         print ('Programming ima no: ', j)
+        #         for k in range(cfg.num_matrix):
+        #             for l in range(cfg.phy2log_ratio):
+        #                 wt_filename = self.instrnpath + 'weights/tile' + str(i) + '/core'+str(j)+\
+        #                         '/mat'+str(k)+'-phy_xbar'+str(l)+'.npy'
+        #                 if (os.path.exists(wt_filename)):  # check if weights for the xbar exist
+        #                     print ('wtfile exits: ' + 'tile ' + str(i) +
+        #                            'ima ' + str(j) + 'matrix ' + str(k) + 'xbar' + str(l))
+        #                     wt_temp = np.load(wt_filename)
+        #                     node_dut.tile_list[i].ima_list[j].matrix_list[k]['f'][l].program(wt_temp)
+        #                     node_dut.tile_list[i].ima_list[j].matrix_list[k]['b'][l].program(wt_temp)
 
         #raw_input ('Press Enter')
 
@@ -143,6 +146,9 @@ def run(self, net):
         if (cfg.debug):
             node_dump(node_dut, self.tracepath)
 
+        if (cfg.xbar_record):
+            record_xbar(node_dut)
+
         # Dump the contents of output tile (DNN output) to output file (output.txt)
         output_file = self.tracepath + 'output.txt'
         fid = open(output_file, 'w')
diff --git a/src/ima.py b/src/ima.py
index 152e7053..40cb8748 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -6,7 +6,7 @@
 # import dependancy files
 import numpy as np
 import math
-import config as cfg
+import include.config as cfg
 #import include.configTest as cfg
 import include.constants as param
 import src.ima_modules as imod
@@ -348,7 +348,7 @@ def do_decode (self, dec_op):
                 assert (self.fd_instrn['r2'] >= datamem_off), 'operand2 for beq comes from data memory'
                 self.de_val1 = self.dataMem.read(self.fd_instrn['r1'])
                 self.de_val2 = self.dataMem.read(self.fd_instrn['r2'])
-
+                
             elif (dec_op == 'alu_int'):
                 self.de_aluop = self.fd_instrn['aluop']
                 self.de_d1 = self.fd_instrn['d1'] # addr for rf
@@ -356,7 +356,7 @@ def do_decode (self, dec_op):
                 assert (self.fd_instrn['r2'] >= datamem_off), 'operand2 for alu_int comes from data memory'
                 self.de_val1 = self.dataMem.read(self.fd_instrn['r1'])
                 self.de_val2 = self.dataMem.read(self.fd_instrn['r2'])
-
+               
             # do nothing for halt/jmp in decode (just propagate to ex when applicable)
 
 
@@ -417,32 +417,61 @@ def execute (self, update_ready, fid):
         #    xbar_addr = matrix_addr % cfg.xbar_size
         #    return [num_matrix, xbar_type, mem_addr, xbar_addr]
 
-        def getXbarAddr (data_addr):
-            # find i or o
-            if (data_addr < cfg.num_matrix*3*cfg.xbar_size):
-                mem_addr = 0
-            else:
-                mem_addr = 128
-
-            # find xbar_addr
-            xbar_addr = data_addr % cfg.xbar_size
-
-            # find matrix_addr
-            num_matrix = (data_addr / (3*cfg.xbar_size)) % cfg.num_matrix
-
-            # find xbar_type
-            temp_val = (data_addr % (cfg.num_matrix*3*cfg.xbar_size))
-            temp_val1 = temp_val % (3*cfg.xbar_size)
-            if (temp_val1 < cfg.xbar_size):
-                xbar_type = 'f'
-            elif (temp_val1 < 2*cfg.xbar_size):
-                xbar_type = 'b'
-            elif (temp_val1 < 3*cfg.xbar_size):
-                xbar_type = 'd'
-            else:
-                assert (1==0), "xbar memory addressing failed"
+        if(not cfg.inference):
+            def getXbarAddr (data_addr):
+                # find i or o
+                if (data_addr < cfg.num_matrix*3*cfg.xbar_size):
+                    mem_addr = 0
+                else:
+                    mem_addr = 128
+
+                # find xbar_addr
+                xbar_addr = data_addr % cfg.xbar_size
+
+                # find matrix_addr
+                num_matrix = (data_addr / (3*cfg.xbar_size)) % cfg.num_matrix
+
+                # find xbar_type
+                temp_val = (data_addr % (cfg.num_matrix*3*cfg.xbar_size))
+                temp_val1 = temp_val % (3*cfg.xbar_size)
+                if (temp_val1 < cfg.xbar_size):
+                    xbar_type = 'f'
+                elif (temp_val1 < 2*cfg.xbar_size):
+                    xbar_type = 'b'
+                elif (temp_val1 < 3*cfg.xbar_size):
+                    xbar_type = 'd'
+                else:
+                    assert (1==0), "xbar memory addressing failed"
+
+                return [num_matrix, xbar_type, mem_addr, xbar_addr]
 
-            return [num_matrix, xbar_type, mem_addr, xbar_addr]
+        if(cfg.inference):
+            def getXbarAddr (data_addr):
+                # find i or o
+                if (data_addr < cfg.num_matrix*1*cfg.xbar_size):
+                    mem_addr = 0
+                else:
+                    mem_addr = 128
+
+                # find xbar_addr
+                xbar_addr = data_addr % cfg.xbar_size
+
+                # find matrix_addr
+                num_matrix = (data_addr / (1*cfg.xbar_size)) % cfg.num_matrix
+
+                # find xbar_type
+                temp_val = (data_addr % (cfg.num_matrix*1*cfg.xbar_size))
+                temp_val1 = temp_val % (1*cfg.xbar_size)
+                if (temp_val1 < cfg.xbar_size):
+                    xbar_type = 'f'
+                # elif (temp_val1 < 2*cfg.xbar_size):
+                #     xbar_type = 'b'
+                # elif (temp_val1 < 3*cfg.xbar_size):
+                #     xbar_type = 'd'
+                else:
+                    assert (1==0), "xbar memory addressing failed"
+
+                return [num_matrix, xbar_type, mem_addr, xbar_addr]
 
         # write to the xbar memory (in/out) space depending on the address
         def writeToXbarMem (self, data_addr, data):
@@ -465,9 +494,12 @@ def readFromXbarMem (self, data_addr):
                 return self.xb_outMem_list[matrix_id][xbar_type].read (xbar_addr)
 
         # Define what to do in execute (done for conciseness)
+        
+        #set_trace()
         def do_execute (self, ex_op, fid):
 
             if (ex_op == 'ld'):
+                # print('In Load')
                 self.ldAccess_done = 0
                 data = self.mem_interface.ramload
                 # based on the address write to dataMem or xb_inMem
@@ -477,15 +509,17 @@ def do_execute (self, ex_op, fid):
                     data = ['0'*cfg.data_width]*self.de_r2
                 for i in range (self.de_r2):
                     dst_addr = data_addr + i
+                    # print('Destination Address in load', dst_addr)
                     if (dst_addr >= datamem_off):
-                        self.dataMem.write (dst_addr, data[i])
+                      self.dataMem.write (dst_addr, data[i])
                     else:
-                        writeToXbarMem (self, dst_addr, data[i])
+                      writeToXbarMem (self, dst_addr, data[i])
 
             elif (ex_op == 'st'): #nothing to be done by ima for st here
                 return 1
 
             elif (ex_op == 'set'):
+                # print('In Set')
                 for i in range (self.de_vec):
                     # write to dataMem - check if addr is a valid datamem address
                     dst_addr = self.de_d1 + i
@@ -495,8 +529,10 @@ def do_execute (self, ex_op, fid):
                         writeToXbarMem (self, dst_addr, self.de_val1)
 
             elif (ex_op == 'cp'):
+                # print('In Copy')
                 for i in range (self.de_vec):
                     src_addr = self.de_r1 + i
+                    # print('Source Address',src_addr)
                     # based on address read from dataMem or xb_inMem
                     if (src_addr >= datamem_off):
                         ex_val1 = self.dataMem.read (src_addr)
@@ -505,6 +541,8 @@ def do_execute (self, ex_op, fid):
 
                     dst_addr = self.de_d1 + i
                     # based on the address write to dataMem or xb_inMem
+                    # print('Destination Address',dst_addr)
+                    # print('data', ex_val1)
                     if (dst_addr >= datamem_off):
                         self.dataMem.write (dst_addr, ex_val1)
                     else:
@@ -539,6 +577,7 @@ def do_execute (self, ex_op, fid):
                     else:
                         writeToXbarMem (self, dst_addr, ex_val1)
 
+
             elif (ex_op == 'alui'):
                 for i in range (self.de_vec):
                     # read val 2 either from data memory or xbar_outmem
@@ -565,8 +604,16 @@ def do_execute (self, ex_op, fid):
                 ## Define function to perform inner-product on specified mvmu
                 # Note: Inner product with shift and add (shift-sub with last bit), works for 2s complement
                 # representation for positive and negative numbers
+                #import pdb; pdb.set_trace();
+
+                 #print('AHA Do Execute')
+                 #for k in range(cfg.num_matrix):
+                  #      for l in range(cfg.phy2log_ratio):
+                   #         print(self.matrix_list[k]['f'][l].get_value())
+
                 def inner_product (mat_id, key):
                     # reset the xb out memory before starting to accumulate
+                    #import pdb; pdb.set_trace()
                     self.xb_outMem_list[mat_id][key].reset ()
 
                     ## Loop to cover all bits of inputs
@@ -581,7 +628,7 @@ def inner_product (mat_id, key):
                         #*************************************** HACK *********************************************
 
                         # convert digital values to analog
-                        out_dac = self.dacArray_list[mat_id][key].propagate_dummy (out_xb_inMem) #pass through
+                        out_dac = self.dacArray_list[mat_id][key].propagate_dummy(out_xb_inMem) #pass through
 
                         # Do for (data_width/xbar_bits) xbars
                         num_xb = cfg.data_width / cfg.xbar_bits
@@ -589,9 +636,13 @@ def inner_product (mat_id, key):
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):
                             # compute dot-product
-                            out_xbar[m] = self.matrix_list[mat_id][key][m].propagate_dummy (out_dac)
+                            #print('check dac/wt')
+                            #print(out_dac)
+                           # print(self.matrix_list[mat_id][key][m].get_value())
+                           # import pdb; pdb.set_trace()
+                            out_xbar[m] = self.matrix_list[mat_id][key][m].propagate_dummy(out_dac)        
                             # do sampling and hold
-                            out_snh[m] = self.snh_list[mat_id*num_xb+m].propagate_dummy (out_xbar[m])
+                            out_snh[m] = self.snh_list[mat_id*num_xb+m].propagate_dummy(out_xbar[m])
 
                         # each of the num_xb produce shifted bits of output (weight bits have been distributed)
                         for j in xrange (cfg.xbar_size): # this 'for' across xbar outs to adc happens via mux
@@ -600,9 +651,9 @@ def inner_product (mat_id, key):
                             for m in range (num_xb):
                                 # convert from analog to digital
                                 adc_id = (mat_id*num_xb + m) % cfg.num_adc
-                                out_mux1 = self.mux1_list[mat_id].propagate_dummy (out_snh[m][j]) # i is the ith xbar
-                                out_mux2 = self.mux2_list[mat_id % cfg.num_adc].propagate_dummy (out_mux1)
-                                out_adc = self.adc_list[adc_id].propagate_dummy (out_mux2)
+                                out_mux1 = self.mux1_list[mat_id].propagate_dummy(out_snh[m][j]) # i is the ith xbar
+                                out_mux2 = self.mux2_list[mat_id % cfg.num_adc].propagate_dummy(out_mux1)
+                                out_adc = self.adc_list[adc_id].propagate_dummy(out_mux2)
 
                                 # shift and add outputs from difefrent wt_bits
                                 alu_op = 'sna'
@@ -659,19 +710,26 @@ def outer_product (mat_id, key):
                             self.matrix_list[mat_id][key][m].propagate_op_dummy (out_dac1, out_dac2, cfg.lr)
 
                 ## Traverse through the matrices in a core
-                for i in xrange (cfg.num_matrix):
+                if (not cfg.inference):
+                    for i in xrange (cfg.num_matrix):
                     # traverse through f/b/d mvmu(s) for the matrix and execute if applicable
-                    mask_temp = self.de_xb_nma[i]
-                    if (mask_temp[0] == '1'):
+                        mask_temp = self.de_xb_nma[i]
+                        if (mask_temp[0] == '1'):
                         # foward xbar operation
-                        #print ("ima_id: " + str(self.ima_id) + " mat_id: "  + str(i) + " MVM")
-                        inner_product (i, 'f')
-                    if (mask_temp[1] == '1'):
+                            print ("ima_id: " + str(self.ima_id) + " mat_id: "  + str(i) + " MVM")
+                            inner_product (i, 'f')
+                        if (mask_temp[1] == '1'):
                         #print ("ima_id: " + str(self.ima_id) + " mat_id: "  + str(i) + " MTVM")
                         # backward xbar operation
-                        inner_product (i, 'b')
-                    if (mask_temp[2] == '1'):
-                        outer_product (i, 'd')
+                            inner_product (i, 'b')
+                        if (mask_temp[2] == '1'):
+                            outer_product (i, 'd')
+
+                if (cfg.inference):
+                   for i in xrange(cfg.num_matrix):
+                       if self.de_xb_nma[i]:
+                           print ("ima_id: " +str(self.ima_id) + " mat_id: "  +str(i) + " MVM")
+                           inner_product(i,'f')
 
             elif (ex_op == 'crs'):
                 # read weights from delta-xbar, synchronize, write to f/b xbars
@@ -861,6 +919,7 @@ def xbComputeLatency (self, mask):
                 if (st_data_addr >= datamem_off):
                     for num in range (cfg.edram_buswidth / cfg.data_width): # modified
                         ex_val1[num] = self.dataMem.read (st_data_addr+num) # modified
+
                 else:
                     for num in range (cfg.edram_buswidth / cfg.data_width): # modified
                         ex_val1[num] = readFromXbarMem (self, st_data_addr+num)
@@ -877,6 +936,7 @@ def xbComputeLatency (self, mask):
                   (self.de_opcode == 'ld' and self.stage_cycle[sId] >= self.stage_latency[sId]-1 and self.ex_vec_count == (self.de_vec-1) and update_ready)):
                 ex_op = self.de_opcode
                 #print ("doing exe stage for op: " + ex_op)
+                #import pdb ; pdb.set_trace()
                 do_execute (self, ex_op, fid)
                 self.stage_done[sId] = 1
                 self.stage_cycle[sId] = 0
@@ -947,6 +1007,17 @@ def pipe_run (self, cycle, fid = ''): # fid is tracefile's id
                 update_ready = self.stage_done[i+1]
 
             # run the stage based on its update_ready argument
+            #print('check weights in pipe_run')
+            #print(imod.xbar.xbar_value)
+           # print('AHA Pipe Run')
+             #for j in range(cfg.num_ima):
+           # for k in range(cfg.num_matrix):
+               # for l in range(cfg.phy2log_ratio):
+                  #  print(self.matrix_list[k]['f'][l].get_value())
+
+
+            #if (i == 2):
+               # import pdb; pdb.set_trace()
             stage_function[i] (update_ready, fid)
 
         # If specified, print thetrace (pipeline stage information)
diff --git a/src/ima_modules.py b/src/ima_modules.py
index d9d263d8..fe14ba0c 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -6,8 +6,8 @@
 import sys
 
 import numpy as np
-import constants as param
-import config as cfg
+import include.constants as param
+import include.config as cfg
 import math
 from data_convert import *
 
@@ -233,14 +233,14 @@ def real2bin (self, inp, num_bits):
         return ('0'*(num_bits - len(bin_value)) + bin_value)
 
     def propagate (self, inp):
-        self.num_access += 1
+        #self.num_access += 1
         assert (type(inp) in [float, np.float32, np.float64]), 'adc input type mismatch (float, np.float32, np.float64 expected)'
         num_bits = self.adc_res
         return self.real2bin (inp, num_bits)
 
     # HACK - until propagate doesn't have correct analog functionality
     def propagate_dummy (self, inp):
-        self.num_access += 1
+        #self.num_access += 1
         return inp
 
 # Doesn't replicate the exact (sample and hold) functionality (just does hold)
@@ -649,7 +649,7 @@ def __init__ (self):
         self.rd_width = 0
         self.addr = 0 # add sent by ima to mem controller
         self.ramload = 0 # data (for LD) sent by edram to ima
-        self.ramstore = 0 # data (for ST) sent by ima to men controller
+        self.ramstore = 0 # data (for ST) sent by ima to mem controller
 
         ## For DEBUG of IMA only - define a memory element and preload some values
         #self.edram = memory (cfg.dataMem_size, 0)
diff --git a/src/instrn_proto.py b/src/instrn_proto.py
index 17a9cf15..2172ad0a 100644
--- a/src/instrn_proto.py
+++ b/src/instrn_proto.py
@@ -6,8 +6,6 @@
 import include.config as cfg
 import include.constants as param
 
-from src.data_convert import *
-
 # Define nstruction prototypes
 # generate load prototype - load data from edram to (datamem/xbinmem)
 phy2log_ratio = cfg.num_bits/cfg.xbar_bits
@@ -92,10 +90,23 @@ def i_alui (aluop, d1, r1, imm, vec = 1):
 #    return i_temp
 
 # TODO: just a hack for now, but eventually opcode will be different in i_mvm and i_train
-def i_mvm (xb_nma = cfg.num_matrix*['000'], r1=0, r2=0): # r1 is displacement, r2 is length of a continuum of data
+# def i_mvm (xb_nma = cfg.num_matrix*['000'], r1=0, r2=0): # r1 is displacement, r2 is length of a continuum of data
+#     xb_nma_str = xb_nma[0]
+#     #xb_nma_str = xb_nma
+#     xb_nma_list = [xb_nma_str[i*3:(i+1)*3] for i in range(len(xb_nma_str)/3)] # split into list of 3-bit masks
+#     assert (len(xb_nma_list) == cfg.num_matrix) # each matrix in a core has a 3-bit mask
+#     i_temp = param.dummy_instrn.copy()
+#     i_temp['opcode'] = 'mvm'
+#     i_temp['r1'] = r1
+#     i_temp['r2'] = r2
+#     i_temp['xb_nma'] = xb_nma_list
+#     return i_temp
+
+#Defined to take the xb_nma as string instead of list of strings
+def i_mvm (xb_nma = cfg.num_matrix*'0', r1=0, r2=0): # r1 is displacement, r2 is length of a continuum of data
     xb_nma_str = xb_nma[0]
     #xb_nma_str = xb_nma
-    xb_nma_list = [xb_nma_str[i*3:(i+1)*3] for i in range(len(xb_nma_str)/3)] # split into list of 3-bit masks
+    xb_nma_list = [xb_nma_str[i]+'00' for i in range(len(xb_nma_str))] # split into list of 3-bit masks
     assert (len(xb_nma_list) == cfg.num_matrix) # each matrix in a core has a 3-bit mask
     i_temp = param.dummy_instrn.copy()
     i_temp['opcode'] = 'mvm'
diff --git a/src/node_dump.py b/src/node_dump.py
index 7154cedb..3af03630 100644
--- a/src/node_dump.py
+++ b/src/node_dump.py
@@ -21,15 +21,18 @@ def mem_dump (fid, memfile, name, node = '', tile_id = ''):
         # to print in float format
         if (memfile[addr] != ''):
             temp_val = fixed2float (memfile[addr], cfg.int_bits, cfg.frac_bits)
-            # use this for debugging/viewing addresses
-            #temp_val = bin2int (memfile[addr], cfg.num_bits)
-        #else: # not printing zero values for ease of view
-        #    temp_val = 0.0
             if (name == 'EDRAM' and (node != '') and (tile_id != '')): # for EDRAM also show counter/valid
                 fid.write ('valid: ' + str(node.tile_list[tile_id].edram_controller.valid[addr]) \
-                        + ' | counter: ' + str(node.tile_list[tile_id].edram_controller.counter[addr]) + ' | ')
+                    + ' | counter: ' + str(node.tile_list[tile_id].edram_controller.counter[addr]) + ' | ')
+                fid.write(str(temp_val) + '\n')
+            # use this for debugging/viewing addresses
+            #temp_val = bin2int (memfile[addr], cfg.num_bits)
+        else: # not printing zero values for ease of view
+            temp_val = 0.0
+        if (name != 'EDRAM'):
             fid.write(str(temp_val) + '\n')
 
+
 def node_dump (node, filepath = ''):
     assert (filepath != ''), 'Debug flag is set, filepath cannot be nil'
     for i in range(len(node.tile_list)):
@@ -53,10 +56,10 @@ def node_dump (node, filepath = ''):
                 for mvmu_t in mvmu_list:
                     # dump the xbar input memory
                     mem_dump (fid, node.tile_list[i].ima_list[j].xb_inMem_list[k][mvmu_t].memfile, \
-                            'Xbar Input Memory: matrixId: ' + str(k) + 'mvmu_type: ' + mvmu_t, 'Xbar Input Memory')
+                            'Xbar Input Memory: imaId:'+ str(j) +' matrixId:' + str(k) + ' mvmu_type:' + mvmu_t, 'Xbar Input Memory')
                     # dump the xbar output memory
                     mem_dump (fid, node.tile_list[i].ima_list[j].xb_outMem_list[k][mvmu_t].memfile, \
-                            'Xbar Output Memory: matrixId: ' + str(k) + 'mvmu_type: ' + mvmu_t, 'Xbar Output Memory')
+                            'Xbar Output Memory: imaId:'+ str(j) +' matrixId:' + str(k) + ' mvmu_type:' + mvmu_t, 'Xbar Output Memory')
 
         fid.close()
 
diff --git a/src/record_xbar.py b/src/record_xbar.py
index 8b8025f1..4dc3448b 100644
--- a/src/record_xbar.py
+++ b/src/record_xbar.py
@@ -9,10 +9,12 @@ def record_xbar (node):
     for i in range (len(node.tile_list)):
         print ('Dumping xbar currents from tile num: ', i)
         for j in range (len(node.tile_list[0].ima_list)):
-            for k in range (len(node.tile_list[0].ima_list[0].xbar_list)):
+            for k in range (len(node.tile_list[0].ima_list[0].matrix_list)):
                 # check for empty list
-                if (node.tile_list[i].ima_list[j].xbar_list[k].xb_record != []):
-                    xbar_currents.append(node.tile_list[i].ima_list[j].xbar_list[k].xb_record)
+                for l in (node.tile_list[i].ima_list[j].matrix_list[k]['f']):
+                    if (l.xb_record != []):
+                        xbar_currents.append(l.xb_record)
+                      #print(l)
 
     xbar_currents_arr = np.asarray (xbar_currents)
 

From 76da76691affb71d0e79f79cad2509734fba5e35 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Mon, 23 Mar 2020 15:24:14 -0400
Subject: [PATCH 03/15] Synchronising with upstream

---
 include/config.py | 122 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 include/config.py

diff --git a/include/config.py b/include/config.py
new file mode 100644
index 00000000..7456665e
--- /dev/null
+++ b/include/config.py
@@ -0,0 +1,122 @@
+# This file contains the configurable parameters in DPE (all hierarchies - IMA, Tile, Node)
+## All user specified parameters are provided by this file only
+
+## Debug - 0 (1): dpe simulation will (won't) produce ima/tile traces while simulating
+cycles_max = 5000000 # Put both these to very large numbers (when design is bug-free)!
+debug = 1
+xbar_record = 1
+inference = 1
+training = not(inference)
+
+## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
+num_bits = 16
+int_bits = 4
+frac_bits = num_bits - int_bits
+
+## IMA configurable parameters (permissible values for each parameter provided here)
+## Instruction generation - affected by xbar_bits, num_xbar, xbar_size.
+# xbar_bits: 2, 4, 6
+# num_xbar: positive integer
+# xbar_size: 32, 64, 128, 256
+# dac_res: positive integer <= num_bits
+# adc_res: positive integer <= num_bits
+# num_adc: positive integer <= num_xbar (doesn't allow more than one ADC per xbar)
+# num_ALU: positive integer
+# dataMem_size: (in Bytes) - 256, 512, 1024, 2048 (affects instrn width, hence capped)
+# instrnMem_size: (in Bytes) - 512, 1024, 2048
+
+# Fixed parameters
+data_width = num_bits # (in bits)
+xbdata_width = data_width # (in bits)
+instrn_width = 48 # (in bits)
+
+# Change here - Specify the IMA parameters here
+xbar_bits = 2
+num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training  8-fw, 8-bw and 16-delta physical xbars.
+xbar_size = 128
+dac_res = 1
+# ADC configuration
+adc_res = 8 # around 4 to 8. this value should be
+num_adc_per_matrix = 2
+num_adc = num_adc_per_matrix * num_matrix
+
+# The idea is to have different ADC resolution value for each ADC.
+# The number of ADC if defined by num_adc property. Currently it is 2 * num_matrix(2) = 4
+# NOTE: Only taking in account indexes 0 and 2, 1 and 3 are ignored, because ADCs 1 and 3 are assumed t be equal to 0 and 2. 
+adc_res_new = {
+                'matrix_adc_0' : 8,
+                'matrix_adc_1' : 4,
+                'matrix_adc_2' : 8,
+                'matrix_adc_3' : 4
+              }
+
+num_ALU = num_matrix*2
+#dataMem_size = num_matrix*(6*xbar_size) # 4 for 4 input spaces within matrix (1 for f/b each, 2 for d)
+dataMem_size = 2048 # 2048 is larger than num_matrix*(6*xbar_size)
+instrnMem_size = 512 #in entries
+
+# This depends on above parameters
+if (training):
+    datamem_off = xbar_size * (num_matrix*6) # each matrix has 6 memory spaces (1 for f/b, 2 for d)
+
+if (inference):
+    datamem_off = xbar_size * (num_matrix*2) # each matrix has 2 memory spaces ( 1 input Xbar memory and 1 output Xbar memory) 
+
+phy2log_ratio = num_bits / xbar_bits # ratio of physical to logical xbar #vaulue is 8
+lr = 0.25 # learning rate for updates to d-xbar
+
+## Tile configurable parameters (permissible values for each parameter provided here)
+## Instruction generation - affected by num_ima
+# num_ima: positive integer
+# edram buswidth: positive integer <= 16 (actual buswidth - this integer*data_width)
+# edram_size: (in KiloBytes) - 64, 128, 256, 512
+# receive_buffer_depth: 4, 8, 12, 16, 32 (number of edram buffer entries (each entry maps to a virtual tile)) \
+#        puts a cap on the maximum num ber of tiles that can send data to a tile in next layer
+# receive_buffer_width: edram_buswidth/data_width (Fixed - in terms of number of neurons)
+# tile_instrnMem_size: 256, 512, 1024 (in Bytes)
+
+# Fixed parameters
+instrn_width = 48 # bits (op-2, vtile_id-6, send/receive_width-8, target_addr/counter-16, vw-8, mem_addr-16)
+edram_buswidth = 256 # in bits
+#receive_buffer_depth = 16
+receive_buffer_depth = 150 #set equal to num_tile_max
+receive_buffer_width =  edram_buswidth / num_bits # size of receive buffeer entry (in terms of number of neurons)
+
+# Change here - Specify the Tile parameters here
+num_ima = 8
+edram_size = 64 # in Kilobytes (64 KB - same as issac)
+tile_instrnMem_size = 2048 # in entries
+
+## Node configurable parameters (permissible values for each parameter provided here)
+## Instruction generation - affected by num_tile
+# num_tile_compute =  positive integer
+# inj_rate < 0.2 (depends on the mapping)
+# num_port: 4, 8
+
+# Fixed parameters
+# NOC topology: cmesh (n=2, k=4, c=4) - can fit k*n*c tiles
+cmesh_c = 4
+num_bits_tileId =32
+flit_width = 32
+packet_width = edram_buswidth/data_width #in multiples of flits (data considered only - booksim consider address itself)
+# (b bit of address = logN, N is the number of nodes)
+
+# Change here - Specify the Node parameters here
+num_tile_compute = 7 # number of tiles mapped by dnn (leaving input and output tiles)
+num_tile_max = 168.0 # maximum number of tiles per node
+num_inj_max = num_tile_max # [conservative] max number of packet injections that can occur in a cycle (each tile injects a packet into NOC each cycle)
+noc_inj_rate = 0.005
+noc_num_port = 4
+
+## Node parameters - Our way of simulation just assumes all tile in one actual node
+num_node = 1
+
+# Do not change this - total number of tiles
+num_tile = num_node * num_tile_compute + 2 # +1 for first tile (I/O tile) - dummy, others - compute
+
+#Security parameters - Used to verify if the model used is encryted or authenticated (set by dpe.py)
+#Do not change
+encrypted = False
+authenticated = False
+cypher_name = ''
+cypher_hash = ''

From ea1bbc419e687198eb5b20632bb1707dd1f7ec8d Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Wed, 27 May 2020 00:23:35 -0400
Subject: [PATCH 04/15] Mergingchanges for digital MVMU energy mnumbers

---
 src/hw_stats.py    | 251 ++++++++++++++++++++++++++++++++++++++-------
 src/ima.py         |  43 ++++++--
 src/ima_metrics.py |  35 ++++---
 src/ima_modules.py |  86 ++++++++++++++--
 4 files changed, 347 insertions(+), 68 deletions(-)

diff --git a/src/hw_stats.py b/src/hw_stats.py
index 8b38cb2d..6f0df0c3 100644
--- a/src/hw_stats.py
+++ b/src/hw_stats.py
@@ -14,11 +14,47 @@
 
 # Copied from /include/constants.py file
 # Enlists components at core, tile, and node levels
-hw_comp_energy = {'xbar_mvm':param.xbar_ip_pow_dyn*param.xbar_ip_lat, 'xbar_op':param.xbar_op_pow_dyn*param.xbar_op_lat,
-                  'xbar_mtvm':param.xbar_ip_pow_dyn*param.xbar_ip_lat,
+hw_comp_energy = {'xbar_mvm':{  '100':param.xbar_ip_energy_dict['100'], \
+                                '90': param.xbar_ip_energy_dict['90'], \
+                                '80': param.xbar_ip_energy_dict['80'], \
+                                '70': param.xbar_ip_energy_dict['70'], \
+                                '60': param.xbar_ip_energy_dict['60'], \
+                                '50': param.xbar_ip_energy_dict['50'], \
+                                '40': param.xbar_ip_energy_dict['40'], \
+                                '30': param.xbar_ip_energy_dict['30'], \
+                                '20': param.xbar_ip_energy_dict['20'], \
+                                '10': param.xbar_ip_energy_dict['10']}, \
+                  'xbar_op':{  '100': param.xbar_ip_energy_dict['100'], \
+                                '90': param.xbar_ip_energy_dict['90'], \
+                                '80': param.xbar_ip_energy_dict['80'], \
+                                '70': param.xbar_ip_energy_dict['70'], \
+                                '60': param.xbar_ip_energy_dict['60'], \
+                                '50': param.xbar_ip_energy_dict['50'], \
+                                '40': param.xbar_ip_energy_dict['40'], \
+                                '30': param.xbar_ip_energy_dict['30'], \
+                                '20': param.xbar_ip_energy_dict['20'], \
+                                '10': param.xbar_ip_energy_dict['10']}, \
+                  'xbar_mtvm':{ '100':param.xbar_ip_energy_dict['100'], \
+                                '90': param.xbar_ip_energy_dict['90'], \
+                                '80': param.xbar_ip_energy_dict['80'], \
+                                '70': param.xbar_ip_energy_dict['70'], \
+                                '60': param.xbar_ip_energy_dict['60'], \
+                                '50': param.xbar_ip_energy_dict['50'], \
+                                '40': param.xbar_ip_energy_dict['40'], \
+                                '30': param.xbar_ip_energy_dict['30'], \
+                                '20': param.xbar_ip_energy_dict['20'], \
+                                '10': param.xbar_ip_energy_dict['10']}, \
         'xbar_rd':param.xbar_rd_pow_dyn*param.xbar_rd_lat, 'xbar_wr':param.xbar_wr_pow_dyn*param.xbar_wr_lat,
         'dac':param.dac_pow_dyn, 'snh':param.snh_pow_dyn, \
-        'mux1':param.mux_pow_dyn, 'mux2':param.mux_pow_dyn, 'adc':param.adc_pow_dyn, \
+        'mux1':param.mux_pow_dyn, 'mux2':param.mux_pow_dyn, 'adc':{ 'n' :       param.adc_pow_dyn_dict[str(cfg.adc_res)], \
+                                                                    'n/2':      param.adc_pow_dyn_dict[str(cfg.adc_res-1)], \
+                                                                    '3n/4':     param.adc_pow_dyn_dict[str(cfg.adc_res-2)], \
+                                                                    '7n/8':     param.adc_pow_dyn_dict[str(cfg.adc_res-3)], \
+                                                                    '15n/16':   param.adc_pow_dyn_dict[str(cfg.adc_res-4)], \
+                                                                    '31n/32':   param.adc_pow_dyn_dict[str(cfg.adc_res-5)], \
+                                                                    '63n/64':   param.adc_pow_dyn_dict[str(cfg.adc_res-6)], \
+                                                                    '127n/128': param.adc_pow_dyn_dict[str(cfg.adc_res-7)], \
+                                                                    '255n/256': param.adc_pow_dyn_dict[str(cfg.adc_res-7)]}, \
         'alu_div': param.alu_pow_div_dyn, 'alu_mul':param.alu_pow_mul_dyn, \
         'alu_act': param.act_pow_dyn, 'alu_other':param.alu_pow_others_dyn, \
         'alu_sna': param.sna_pow_dyn, \
@@ -38,11 +74,47 @@
 def get_hw_stats (fid, node_dut, cycle):
 
     # List of all components that dissipate power
-    hw_comp_access = {'xbar_mvm':0, 'xbar_op':0,
-                      'xbar_mtvm':0,
-            'xbar_rd':0, 'xbar_wr':0,
+    hw_comp_access = {'xbar_mvm':{  '100':0, \
+                                    '90': 0, \
+                                    '80': 0, \
+                                    '70': 0, \
+                                    '60': 0, \
+                                    '50': 0, \
+                                    '40': 0, \
+                                    '30': 0, \
+                                    '20': 0, \
+                                    '10': 0}, \
+                    'xbar_op':{     '100':0, \
+                                    '90': 0, \
+                                    '80': 0, \
+                                    '70': 0, \
+                                    '60': 0, \
+                                    '50': 0, \
+                                    '40': 0, \
+                                    '30': 0, \
+                                    '20': 0, \
+                                    '10': 0}, \
+                    'xbar_mtvm':{  '100':0,  \
+                                    '90': 0, \
+                                    '80': 0, \
+                                    '70': 0, \
+                                    '60': 0, \
+                                    '50': 0, \
+                                    '40': 0, \
+                                    '30': 0, \
+                                    '20': 0, \
+                                    '10': 0}, \
+            'xbar_rd':0, 'xbar_wr':0, \
             'dac':0, 'snh':0, \
-            'mux1':0, 'mux2':0, 'adc':0, \
+            'mux1':0, 'mux2':0, 'adc':{ 'n' :       0, \
+                                        'n/2':      0, \
+                                        '3n/4':     0, \
+                                        '7n/8':     0, \
+                                        '15n/16':   0, \
+                                        '31n/32':   0, \
+                                        '63n/64':   0, \
+                                        '127n/128': 0, \
+                                        '255n/256': 0}, \
             'alu_div':0, 'alu_mul':0, \
             'alu_act':0, 'alu_other':0, \
             'alu_sna':0, \
@@ -84,32 +156,56 @@ def get_hw_stats (fid, node_dut, cycle):
             for k in range (cfg.num_matrix):
                 for mvmu_t in mvmu_type:
                     # Xbar accesses
-                    for m in range(cfg.phy2log_ratio):
+                    if cfg.MVMU_ver == "Analog":
+                        for m in range(cfg.phy2log_ratio):
+                            if (mvmu_t == 'd'):
+                                for key,value in hw_comp_access['xbar_op'].items():
+                                    hw_comp_access['xbar_op'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access[key]
+                            elif (mvmu_t == 'b'):
+                                for key,value in hw_comp_access['xbar_mtvm'].items():
+                                    hw_comp_access['xbar_mtvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access[key]
+                            else:
+                                for key,value in hw_comp_access['xbar_mvm'].items():
+                                    hw_comp_access['xbar_mvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access[key]
+                            hw_comp_access['xbar_rd'] += \
+                            node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access_rd / (cfg.xbar_size**2)
+                            hw_comp_access['xbar_wr'] += \
+                            node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access_wr / (cfg.xbar_size**2)
+                    
+                    else:
                         if (mvmu_t == 'd'):
-                            hw_comp_access['xbar_op'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access
+                            for key,value in hw_comp_access['xbar_op'].items():
+                                hw_comp_access['xbar_op'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access[key]
                         elif (mvmu_t == 'b'):
-                            hw_comp_access['xbar_mtvm'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access
+                            for key,value in hw_comp_access['xbar_mtvm'].items():
+                                hw_comp_access['xbar_mtvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access[key]
                         else:
-                            hw_comp_access['xbar_mvm'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access
+                            for key,value in hw_comp_access['xbar_mvm'].items():
+                                hw_comp_access['xbar_mvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access[key]
                         hw_comp_access['xbar_rd'] += \
-                        node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access_rd / (cfg.xbar_size**2)
+                        node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access_rd / (cfg.xbar_size**2)
                         hw_comp_access['xbar_wr'] += \
-                        node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access_wr / (cfg.xbar_size**2)
+                        node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access_wr / (cfg.xbar_size**2)
+
                     # Xb_InMem accesses
-                    hw_comp_access['xbInmem_rd'] += node_dut.tile_list[i].ima_list[j].xb_inMem_list[k][mvmu_t].num_access_read
+                    if cfg.MVMU_ver == "Analog":
+                        hw_comp_access['xbInmem_rd'] += node_dut.tile_list[i].ima_list[j].xb_inMem_list[k][mvmu_t].num_access_read
                     hw_comp_access['xbInmem_wr'] += node_dut.tile_list[i].ima_list[j].xb_inMem_list[k][mvmu_t].num_access_write
                     # Xb_OutMem accesses
-                    hw_comp_access['xbOutmem'] += node_dut.tile_list[i].ima_list[j].xb_outMem_list[k][mvmu_t].num_access
+                    if cfg.MVMU_ver == "Analog":
+                        hw_comp_access['xbOutmem'] += node_dut.tile_list[i].ima_list[j].xb_outMem_list[k][mvmu_t].num_access
 
             for k in range(cfg.num_matrix):
                 dac_type = ['f', 'b', 'd_r', 'd_c']
                 for dac_t in dac_type:
                     for l in range(cfg.xbar_size):
-                        hw_comp_access['dac'] += node_dut.tile_list[i].ima_list[j].dacArray_list[k][dac_t].dac_list[l].num_access
+                        if cfg.MVMU_ver == "Analog":
+                            hw_comp_access['dac'] += node_dut.tile_list[i].ima_list[j].dacArray_list[k][dac_t].dac_list[l].num_access
 
-            for k in range (2*cfg.num_matrix*cfg.phy2log_ratio):
-                hw_comp_access['snh'] += (node_dut.tile_list[i].ima_list[j].snh_list[k].num_access * cfg.xbar_size) # each snh is
-                # basically an array of multiple snhs (individual power in constants file must be for one discerete snh)
+            if cfg.MVMU_ver == "Analog":
+                for k in range (2*cfg.num_matrix*cfg.phy2log_ratio):
+                    hw_comp_access['snh'] += (node_dut.tile_list[i].ima_list[j].snh_list[k].num_access * cfg.xbar_size) # each snh is
+                    # basically an array of multiple snhs (individual power in constants file must be for one discerete snh)
 
             for k in range (2*cfg.num_matrix):
                 hw_comp_access['mux1'] += node_dut.tile_list[i].ima_list[j].mux1_list[k].num_access
@@ -117,8 +213,10 @@ def get_hw_stats (fid, node_dut, cycle):
             for k in range (cfg.num_adc):
                 hw_comp_access['mux2'] += node_dut.tile_list[i].ima_list[j].mux1_list[k].num_access
 
-            for k in range (cfg.num_adc):
-                hw_comp_access['adc'] += node_dut.tile_list[i].ima_list[j].adc_list[k].num_access
+            if cfg.MVMU_ver == "Analog":
+                for k in range (cfg.num_adc):
+                    for key,value in hw_comp_access['adc'].items():
+                        hw_comp_access['adc'][key] += node_dut.tile_list[i].ima_list[j].adc_list[k].num_access[key]
 
             for k in range (cfg.num_ALU):
                 hw_comp_access['alu_div'] += node_dut.tile_list[i].ima_list[j].alu_list[k].num_access_div + \
@@ -143,19 +241,92 @@ def get_hw_stats (fid, node_dut, cycle):
     hw_comp_access['tile_control'] = sum_num_cycle_ima
 
     total_energy = 0
+    total_adc_energy = 0
+    total_adc_access = 0
+    total_mvm_energy = 0
+    total_mvm_access = 0
+    total_mtvm_access = 0
+    total_mtvm_energy = 0
+    total_op_access = 0
+    total_op_energy = 0
     # Compute the total dynamic energy consumption
-    for key, value in hw_comp_access.items():
-        total_energy += value * hw_comp_energy[key]
+    if cfg.MVMU_ver == "Analog":
+        for key, value in hw_comp_access.items():
+            if key == 'adc':
+                for key1, value1 in hw_comp_access['adc'].items():
+                    total_energy += value1*hw_comp_energy['adc'][key1]
+                    total_adc_energy +=  value1*hw_comp_energy['adc'][key1] # Not needed for function but for output visualisation
+                    total_adc_access += value1
+            elif key == 'xbar_mvm':
+                for key1, value1 in hw_comp_access['xbar_mvm'].items():
+                    total_energy += value1*hw_comp_energy['xbar_mvm'][key1]
+                    total_mvm_energy +=  value1*hw_comp_energy['xbar_mvm'][key1] # Not needed for function but for output visualisation
+                    total_mvm_access += value1
+            elif key == 'xbar_mtvm':
+                for key1, value1 in hw_comp_access['xbar_mtvm'].items():
+                    total_energy += value1*hw_comp_energy['xbar_mtvm'][key1]
+                    total_mvm_energy +=  value1*hw_comp_energy['xbar_mtvm'][key1] # Not needed for function but for output visualisation
+                    total_mvm_access += value1
+            elif key == 'xbar_op':
+                for key1, value1 in hw_comp_access['xbar_op'].items():
+                    total_energy += value1*hw_comp_energy['xbar_op'][key1]
+                    total_op_energy +=  value1*hw_comp_energy['xbar_op'][key1] # Not needed for function but for output visualisation
+                    total_op_access += value1
+            else:
+                total_energy += value * hw_comp_energy[key]
+    else:
+        for key, value in hw_comp_access.items():
+            if key == 'adc':
+                for key1, value1 in hw_comp_access['adc'].items():
+                    total_energy += value1*hw_comp_energy['adc'][key1]
+                    total_adc_energy +=  value1*hw_comp_energy['adc'][key1] # Not needed for function but for output visualisation
+                    total_adc_access += value1
+            elif key == 'xbar_mvm':
+                for key1, value1 in hw_comp_access['xbar_mvm'].items():
+                    total_energy += (value1/16)*hw_comp_energy['xbar_mvm'][key1]
+                    total_mvm_energy +=  (value1/16)*hw_comp_energy['xbar_mvm'][key1] # Not needed for function but for output visualisation
+                    total_mvm_access += (value1/16)
+            elif key == 'xbar_mtvm':
+                for key1, value1 in hw_comp_access['xbar_mtvm'].items():
+                    total_energy += (value1/16)*hw_comp_energy['xbar_mtvm'][key1]
+                    total_mvm_energy +=  (value1/16)*hw_comp_energy['xbar_mtvm'][key1] # Not needed for function but for output visualisation
+                    total_mvm_access += (value1/16)
+            elif key == 'xbar_op':
+                for key1, value1 in hw_comp_access['xbar_op'].items():
+                    total_energy += (value1/16)*hw_comp_energy['xbar_op'][key1]
+                    total_op_energy +=  (value1/16)*hw_comp_energy['xbar_op'][key1] # Not needed for function but for output visualisation
+                    total_op_access += (value1/16)
+            else:
+                total_energy += value * hw_comp_energy[key]
 
     # Write the dict comp_access & energy proportion to a file for visualization
+    fid.write ("MVMU Type : " + cfg.MVMU_ver + "\n")
     fid.write ('Access and energy distribution of dynamic energy: \n')
     fid.write ('Component                 num_access              percent\n')
     for key, value in hw_comp_access.items():
         # put extra spaces for better visulalization of values
         bl_spc1 = (28-len(key)) * ' '
-        bl_spc2 = (22-len(str(value))) * ' '
-        fid.write (key + bl_spc1 + str(value) + bl_spc2 +\
-                    (str(value*hw_comp_energy[key]/total_energy*100))[0:4] + ' %\n')
+        # bl_spc2 = (22-len(str(value))) * ' '
+        if key == 'adc':
+            bl_spc2 = (22-len(str(total_adc_access))) * ' '
+            fid.write (key + bl_spc1 + str(total_adc_access) + bl_spc2 +\
+                        (str(total_adc_energy/total_energy*100))[0:4] + ' %\n')
+        elif key == 'xbar_mvm':
+            bl_spc2 = (22-len(str(total_mvm_access))) * ' '
+            fid.write (key + bl_spc1 + str(total_mvm_access) + bl_spc2 +\
+                        (str(total_mvm_energy/total_energy*100))[0:4] + ' %\n')
+        elif key == 'xbar_mtvm':
+            bl_spc2 = (22-len(str(total_mtvm_access))) * ' '
+            fid.write (key + bl_spc1 + str(total_mtvm_access) + bl_spc2 +\
+                        (str(total_mtvm_energy/total_energy*100))[0:4] + ' %\n')
+        elif key == 'xbar_op':
+            bl_spc2 = (22-len(str(total_op_access))) * ' '
+            fid.write (key + bl_spc1 + str(total_op_access) + bl_spc2 +\
+                        (str(total_op_energy/total_energy*100))[0:4] + ' %\n')
+        else:
+            bl_spc2 = (22-len(str(value))) * ' '
+            fid.write (key + bl_spc1 + str(value) + bl_spc2 +\
+                        (str(value*hw_comp_energy[key]/total_energy*100))[0:4] + ' %\n')
 
     fid.write ('\n')
 
@@ -168,17 +339,17 @@ def get_hw_stats (fid, node_dut, cycle):
 
     # Write the leakage energy(J), total_energy(J), average_power (mW), peak_power (mW),
     # area (mm2), cycles and time (seconds) to a dict & file
-    metric_dict = {'leakage_energy':0.0,
-            'dynamic_energy':0.0,
-            'total_energy':0.0,
-            'average_power':0.0,
-            'peak_power':0.0,
-            'leakage_power':0.0,
-            'node_area':0.0,
-            'tile_area':0.0,
-            'core_area':0.0,
-            'cycles':0,
-            'time':0.0}
+    metric_dict = { 'leakage_energy':0.0,
+                    'dynamic_energy':0.0,
+                    'total_energy':0.0,
+                    'average_power':0.0,
+                    'peak_power':0.0,
+                    'leakage_power':0.0,
+                    'node_area':0.0,
+                    'tile_area':0.0,
+                    'core_area':0.0,
+                    'cycles':0,
+                    'time':0.0}
 
     metric_dict['leakage_power'] = node_metrics.compute_pow_leak () # in mW
     metric_dict['peak_power'] = node_metrics.compute_pow_peak () # in mW
@@ -187,10 +358,14 @@ def get_hw_stats (fid, node_dut, cycle):
     metric_dict['core_area'] = ima_metrics.compute_area ()# in mm2
     metric_dict['cycles'] = cycle
     metric_dict['time'] = cycle * param.cycle_time * (10**(-9)) # in sec
-    metric_dict['dynamic_energy'] = total_energy * ns * mw # in joule
     #metric_dict['leakage_enegy'] = metric_dict['leakage_power'] * mw * metric_dict['time'] # in joule
     metric_dict['leakage_energy'] =  leakage_energy * ns * mw # in joule
+    # if cfg.MVMU_ver == "Analog":
+    metric_dict['dynamic_energy'] = total_energy * ns * mw # in joule
     metric_dict['total_energy'] = metric_dict['dynamic_energy'] + metric_dict['leakage_energy']
+    # else:
+    #     metric_dict['total_energy'] = total_energy * ns * mw # in joule
+    #     metric_dict['dynamic_energy'] = metric_dict['total_energy']
     metric_dict['average_power'] = metric_dict['total_energy'] / metric_dict['time'] * (10**(3)) # in mW
 
     for key, value in metric_dict.items():
diff --git a/src/ima.py b/src/ima.py
index b7f5c33e..303b73a6 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -39,6 +39,7 @@ def __init__ (self):
         self.matrix_list = [] # list of dicts of mvmu(s)
         self.xb_inMem_list = [] # list of dicts of xbar input memory
         self.xb_outMem_list = [] # list of dicts of xbar output memory
+        self.xbar_inMem_Sparsity_list = [] # list of sparsity od xbar in mem (may have to be removed if found redundant)
 
         for i in xrange(cfg.num_matrix):
             # each matrix represents three mvmus - 1 mvmu for fw, 1 mvmu for bw, 1 mvmu (2X width) for delta
@@ -465,7 +466,7 @@ def getXbarAddr (data_addr):
                 else:
                     assert (1==0), "xbar memory addressing failed"   
 
-	    return [num_matrix, xbar_type, mem_addr, xbar_addr]     
+	    return [num_matrix, xbar_type, mem_addr, xbar_addr]
 
         # write to the xbar memory (in/out) space depending on the address
         def writeToXbarMem (self, data_addr, data):
@@ -594,12 +595,24 @@ def inner_product (mat_id, key):
                     # reset the xb out memory before starting to accumulate
                     self.xb_outMem_list[mat_id][key].reset ()
 
+                    xbar_inMem = self.xb_inMem_list[mat_id][key].read_all ()
+                    # print ("xb_inMem", xbar_inMem)
+                    # calculate sparsity of xbar_in_mem
+                    non_0_val = 0
+                    for i in range(cfg.xbar_size):
+                        if xbar_inMem[i] != '0':
+                            non_0_val = non_0_val +1
+                    sparsity = (cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size
+                    # print ("non_0_val", non_0_val)
+                    # print ("Sparsity", sparsity)
+
                     ## Loop to cover all bits of inputs
                     for k in xrange (cfg.xbdata_width / cfg.dac_res):
                     #for k in xrange (1):
                         # read the values from the xbar's input register
                         out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res)
-
+                        # print("out_xb_inMem", out_xb_inMem)
+                        
                         #*************************************** HACK *********************************************
                         ###### CAUTION: Not replicated exact "functional" circuit behaviour for analog parts
                         ###### Use propagate (not propagate_hack) for DAC, Xbar, TIA, SNH, ADC when above is done
@@ -614,7 +627,7 @@ def inner_product (mat_id, key):
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):
                             # compute dot-product
-                            out_xbar[m] = self.matrix_list[mat_id][key][m].propagate_dummy(out_dac)        
+                            out_xbar[m] = self.matrix_list[mat_id][key][m].propagate_dummy(non_0_val, out_dac)        
                             # do sampling and hold
                             out_snh[m] = self.snh_list[mat_id*num_xb+m].propagate_dummy(out_xbar[m])
 
@@ -627,7 +640,7 @@ def inner_product (mat_id, key):
                                 adc_id = (mat_id*num_xb + m) % cfg.num_adc
                                 out_mux1 = self.mux1_list[mat_id].propagate_dummy(out_snh[m][j]) # i is the ith xbar
                                 out_mux2 = self.mux2_list[mat_id % cfg.num_adc].propagate_dummy(out_mux1)
-                                out_adc = self.adc_list[adc_id].propagate_dummy(out_mux2)
+                                out_adc = self.adc_list[adc_id].propagate_dummy(out_mux2, non_0_val)
 
                                 # shift and add outputs from difefrent wt_bits
                                 alu_op = 'sna'
@@ -781,7 +794,7 @@ def xbComputeLatency (self, mask):
                 lat_temp = 0
                 # We assume all ADCs in a matrix has the same resolution
                 adc_idx = idx*cfg.num_adc_per_matrix
-                lat_temp = self.adc_list[adc_idx].getLatency()
+                lat_temp = self.adc_list[adc_idx].getLatency(cfg.xbar_size)
                 '''
                 print("adc_idx", adc_idx)
                 print("lat_temp", lat_temp)
@@ -842,7 +855,25 @@ def xbComputeLatency (self, mask):
 
                 elif (ex_op == 'mvm'):
                     mask_temp = self.de_xb_nma
-                    self.stage_latency[sId] = xbComputeLatency (self, mask_temp) # mask tells which of ip/op or both is occurring
+                    if (cfg.MVMU_ver == "Analog"):
+                        self.stage_latency[sId] = xbComputeLatency (self, mask_temp) # mask tells which of ip/op or both is occurring
+                    else:
+                        mvm_lat_temp = 0
+                        if (cfg.inference):
+                            for p in xrange(cfg.num_matrix):
+                                if self.de_xb_nma[p]:
+                                    xbar_inMem = self.xb_inMem_list[p]['f'].read_all ()
+                                    non_0_val = 0
+                                    for i in range(cfg.xbar_size):
+                                        if xbar_inMem[i] != '0':
+                                            non_0_val = non_0_val +1
+                                    print ("non_0_val", non_0_val)
+                                    nval_percent = int(non_0_val*100/128)
+                                    if (nval_percent%10!=0):
+                                        nval_percent = nval_percent + 10
+                                    mvm_lat_temp += param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)][str(nval_percent)]
+                        self.stage_latency[sId] = mvm_lat_temp
+                    print("MVM Latency", self.stage_latency[sId])
 
                 # Needs update - use xbar serial read latency
                 elif (ex_op == 'crs'):
diff --git a/src/ima_metrics.py b/src/ima_metrics.py
index e68dd112..0f3a1ea1 100644
--- a/src/ima_metrics.py
+++ b/src/ima_metrics.py
@@ -11,12 +11,13 @@
 def compute_area (): #in mm2
     area = 0.0
     area += (cfg.num_matrix*3) * param.xbar_inMem_area # xbar_inMem one each for f/b/d xbars
-    area += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_area # 1 dac for input of f/b/d xbars, each phy xbar in d-xbar will have a dac_array, hence 8
+    if cfg.MVMU_ver == "Analog":
+        area += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_area # 1 dac for input of f/b/d xbars, each phy xbar in d-xbar will have a dac_array, hence 8
+        area += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_area # snh for f/b xbars
+        area += (cfg.num_matrix*2) * param.sna_area # sna for one each f/b xbars
+        area += cfg.num_adc * param.adc_area # adc
+        area += (cfg.num_matrix*3) * param.xbar_outMem_area # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
     area += (cfg.num_matrix*4) * param.xbar_area # d-xbar has 2X xbars than f/b
-    area += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_area # snh for f/b xbars
-    area += cfg.num_adc * param.adc_area # adc
-    area += (cfg.num_matrix*2) * param.sna_area # sna for one each f/b xbars
-    area += (cfg.num_matrix*3) * param.xbar_outMem_area # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
     area += param.instrnMem_area # instrnMem
     area += param.dataMem_area # dataMem
     area += param.alu_area # alu
@@ -35,12 +36,13 @@ def compute_area (): #in mm2
 def compute_pow_leak ():
     leak_pow = 0.0
     leak_pow += (cfg.num_matrix*3) * param.xbar_inMem_pow_leak # xbar_inMem
-    leak_pow += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_pow_leak # dac
+    if cfg.MVMU_ver == "Analog":
+        leak_pow += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_pow_leak # dac
+        leak_pow += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_pow_leak # snh
+        leak_pow += cfg.num_adc * param.adc_pow_leak # adc
+        leak_pow += (cfg.num_matrix*2) * param.sna_pow_leak # sna
+        leak_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_leak # xbar_outMem
     leak_pow += (cfg.num_matrix*4) * param.xbar_pow_leak # xbar area
-    leak_pow += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_pow_leak # snh
-    leak_pow += cfg.num_adc * param.adc_pow_leak # adc
-    leak_pow += (cfg.num_matrix*2) * param.sna_pow_leak # sna
-    leak_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_leak # xbar_outMem
     leak_pow += param.instrnMem_pow_leak # instrnMem
     leak_pow += param.dataMem_pow_leak # dataMem
     leak_pow += param.alu_pow_leak # alu
@@ -51,15 +53,16 @@ def compute_pow_leak ():
 # Peak dynamic power (assumes all components are being accessed in each cycle)
 def compute_pow_dyn ():
     dyn_pow = 0.0
-    dyn_pow += (cfg.num_matrix*3) * (param.xbar_inMem_pow_dyn_write + param.xbar_inMem_pow_dyn_read/cfg.xbar_size) # xbar_inMem - num_xbar * dac_res bits will be
+    if cfg.MVMU_ver == "Analog":
+        dyn_pow += (cfg.num_matrix*3) * (param.xbar_inMem_pow_dyn_write + param.xbar_inMem_pow_dyn_read/cfg.xbar_size) # xbar_inMem - num_xbar * dac_res bits will be
                     #   read from xb_inMem in an interval that equals xbar_access time
     # dyn_pow += cfg.num_xbar/2 * 1.2 # (adding dyn pow the way issac does for comparison)
-    dyn_pow += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_pow_dyn # dac
+        dyn_pow += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_pow_dyn # dac
+        dyn_pow += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_pow_dyn # snh
+        dyn_pow += cfg.num_adc * param.adc_pow_dyn # adc
+        dyn_pow += (cfg.num_matrix*2) * param.sna_pow_dyn # sna
+        dyn_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_dyn # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
     dyn_pow += (cfg.num_matrix*4) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power
-    dyn_pow += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_pow_dyn # snh
-    dyn_pow += cfg.num_adc * param.adc_pow_dyn # adc
-    dyn_pow += (cfg.num_matrix*2) * param.sna_pow_dyn # sna
-    dyn_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_dyn # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
     dyn_pow += param.instrnMem_pow_dyn # instrnMem
     dyn_pow += param.dataMem_pow_dyn # dataMem
     dyn_pow += param.alu_pow_dyn # alu
diff --git a/src/ima_modules.py b/src/ima_modules.py
index 7a574817..1aa6f3c3 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -15,7 +15,18 @@
 class xbar (object):
     def __init__ (self, xbar_size, xbar_value= 'nil' ):
         # define num_accesses for different operations
-        self.num_access = 0 # parallel reads (inner-product)
+        # parallel reads (inner-product)
+        self.num_access = { '100':0, \
+                            '90': 0, \
+                            '80': 0, \
+                            '70': 0, \
+                            '60': 0, \
+                            '50': 0, \
+                            '40': 0, \
+                            '30': 0, \
+                            '20': 0, \
+                            '10': 0} \
+
         self.num_access_rd = 0 # serial reads
         self.num_access_wr = 0 # serial writes
 
@@ -90,11 +101,36 @@ def propagate (self, inp = 'nil'):
         return out
 
     # HACK - until propagate doesn't have correct analog functionality
-    def propagate_dummy (self, inp = 'nil'):
+    def propagate_dummy (self, n_val, inp = 'nil'):
         # data input is list of bit strings (of length dac_res) - fixed point binary
         assert (inp != 'nil'), 'propagate needs a non-nil input'
         assert (len(inp) == self.xbar_size), 'xbar input size mismatch'
-        self.num_access += 1
+        
+        #Modification to accomodate sparsity and digital crossbars
+        if cfg.MVMU_ver == "Analog":
+            self.num_access['100'] += 1
+        else:
+            if n_val>cfg.xbar_size*9/10.0:
+                self.num_access['100'] += 1
+            elif n_val>cfg.xbar_size*8/10.0:
+                self.num_access['90'] += 1
+            elif n_val>cfg.xbar_size*7/10.0:
+                self.num_access['80'] += 1
+            elif n_val>cfg.xbar_size*6/10.0:
+                self.num_access['70'] += 1
+            elif n_val>cfg.xbar_size*5/10.0:
+                self.num_access['60'] += 1
+            elif n_val>cfg.xbar_size*4/10.0:
+                self.num_access['50'] += 1
+            elif n_val>cfg.xbar_size*3/10.0:
+                self.num_access['40'] += 1
+            elif n_val>cfg.xbar_size*2/10.0:
+                self.num_access['30'] += 1
+            elif n_val>cfg.xbar_size*1/10.0:
+                self.num_access['20'] += 1
+            else:
+                self.num_access['10'] += 1
+
         # convert input from fixed point binary (string) to float
         inp_float = [0.0] * self.xbar_size
         for i in range(len(inp)):
@@ -222,14 +258,23 @@ def propagate_dummy (self, inp_list):
 class adc (object):
     def __init__ (self, adc_res):
         # define num_access
-        self.num_access = 0
+        self.num_access = { 'n' :       0,
+                            'n/2':      0,
+                            '3n/4':     0,
+                            '7n/8':     0,
+                            '15n/16':   0,
+                            '31n/32':   0,
+                            '63n/64':   0,
+                            '127n/128': 0,
+                            '255n/256': 0}
 
         # define latency
-        self.latency = param.adc_lat_dict[str(adc_res)]
+        # self.latency = param.adc_lat_dict[str(adc_res)]
 
         self.adc_res = adc_res
 
-    def getLatency (self):
+    def getLatency (self, n_val):
+        self.latency = param.adc_lat_dict[str(self.adc_res)]
         return self.latency
 
     def real2bin (self, inp, num_bits):
@@ -246,8 +291,26 @@ def propagate (self, inp):
         return self.real2bin (inp, num_bits)
 
     # HACK - until propagate doesn't have correct analog functionality
-    def propagate_dummy (self, inp):
-        #self.num_access += 1
+    def propagate_dummy (self, inp, n_val):
+        if n_val>cfg.xbar_size/2.0:
+            self.num_access['n'] += 1
+        elif n_val>cfg.xbar_size/4.0:
+            self.num_access['n/2'] += 1
+        elif n_val>cfg.xbar_size/8.0:
+            self.num_access['3n/4'] += 1
+        elif n_val>cfg.xbar_size/16.0:
+            self.num_access['7n/8'] += 1
+        elif n_val>cfg.xbar_size/32.0:
+            self.num_access['15n/16'] += 1
+        elif n_val>cfg.xbar_size/64.0:
+            self.num_access['31n/32'] += 1
+        elif n_val>cfg.xbar_size/128.0:
+            self.num_access['63n/64'] += 1
+        elif n_val>cfg.xbar_size/256.0:
+            self.num_access['127n/128'] += 1
+        else:
+            self.num_access['255n/256'] += 1
+
         return inp
 
 # Doesn't replicate the exact (sample and hold) functionality (just does hold)
@@ -512,6 +575,13 @@ def read (self, num_bits):
             out_list.append(value[-1*num_bits:])
         return out_list
 
+    def read_all (self):
+        out_list = []
+        for i in xrange(self.xbar_size):
+            value = self.memfile[i]
+            out_list.append(value)
+        return out_list
+
     def write (self, addr, data):
         self.num_access_write += 1
         assert (type(addr) == int), 'addr type should be int'

From 3645ad129395398de79d5793bb85471a8e6de26d Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Wed, 27 May 2020 00:24:41 -0400
Subject: [PATCH 05/15] Mergingchanges for digital MVMU energy mnumbers

---
 include/config.py    |   8 +-
 include/constants.py | 226 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 230 insertions(+), 4 deletions(-)

diff --git a/include/config.py b/include/config.py
index 7456665e..80567c82 100644
--- a/include/config.py
+++ b/include/config.py
@@ -5,9 +5,15 @@
 cycles_max = 5000000 # Put both these to very large numbers (when design is bug-free)!
 debug = 1
 xbar_record = 1
-inference = 1
+inference = 1       # For training change this flag
 training = not(inference)
 
+## Variable to define the type of MVMU
+# One of "Analog", "Digital_V1" or "Digital_V2" 
+# Digital_V1 has compressed inputs (Data+Offset style)
+# Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
+MVMU_ver = "Digital_V2" 
+
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
 int_bits = 4
diff --git a/include/constants.py b/include/constants.py
index 4246f10e..ee931fc0 100644
--- a/include/constants.py
+++ b/include/constants.py
@@ -119,6 +119,182 @@
 xbar_rd_pow = 208.0 * 1000 * (1/32.0) / xbar_rd_lat
 xbar_wr_pow = 676.0 * 1000 * (1/32.0) / xbar_rd_lat
 
+## Adding power area and latency for Digital MVMU V1 and V2
+Digital_xbar_lat_dict = {'Digital_V1': {'32': { '100':130, # first indexed by version then by xbar_size and then by % of non_0 values
+                                                '90': 114, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
+                                                '80': 102, 
+                                                '70': 90, 
+                                                '60': 78, 
+                                                '50': 66, 
+                                                '40': 50, 
+                                                '30': 38, 
+                                                '20': 26, 
+                                                '10': 14},      
+                                        '64': { '100':258, 
+                                                '90': 230,
+                                                '80': 206, 
+                                                '70': 178, 
+                                                '60': 154, 
+                                                '50': 130, 
+                                                '40': 102, 
+                                                '30': 78, 
+                                                '20': 50, 
+                                                '10': 26},      
+                                        '128':{ '100':514,
+                                                '90': 462, 
+                                                '80': 410, 
+                                                '70': 358, 
+                                                '60': 306, 
+                                                '50': 258, 
+                                                '40': 206, 
+                                                '30': 154, 
+                                                '20': 102, 
+                                                '10': 50},     
+                                        '256':{ '100':1026, 
+                                                '90': 922,
+                                                '80': 818, 
+                                                '70': 718, 
+                                                '60': 614, 
+                                                '50': 514, 
+                                                '40': 410, 
+                                                '30': 306, 
+                                                '20': 206, 
+                                                '10': 102}},
+                         'Digital_V2': {'32' :{ '100':130,
+                                                '90': 118,
+                                                '80': 109, 
+                                                '70': 100, 
+                                                '60': 91, 
+                                                '50': 82, 
+                                                '40': 70, 
+                                                '30': 61, 
+                                                '20': 52, 
+                                                '10': 43},  
+                                        '64' :{ '100':258,
+                                                '90': 237,  
+                                                '80': 219, 
+                                                '70': 198, 
+                                                '60': 180, 
+                                                '50': 162, 
+                                                '40': 141, 
+                                                '30': 123, 
+                                                '20': 102, 
+                                                '10': 84},
+                                        '128':{ '100':514,
+                                                '90': 475,  
+                                                '80': 436, 
+                                                '70': 397, 
+                                                '60': 358, 
+                                                '50': 322, 
+                                                '40': 283, 
+                                                '30': 244, 
+                                                '20': 205, 
+                                                '10': 166},
+                                        '256':{ '100':1026,
+                                                '90': 948,  
+                                                '80': 870, 
+                                                '70': 795, 
+                                                '60': 717, 
+                                                '50': 642, 
+                                                '40': 564, 
+                                                '30': 486, 
+                                                '20': 411, 
+                                                '10': 333}}}
+
+Digital_xbar_area_dict = {'Digital_V1': { '32' : 0.16977,   # first indexed by version then by xbar_size
+                                          '64' : 0.27701,
+                                          '128': 1.74020,
+                                          '256': 7.29481},
+                          'Digital_V2': { '32' : 0.16949,  
+                                          '64' : 0.27645,
+                                          '128': 1.73908,
+                                          '256': 7.29257}}
+
+Digital_xbar_energy_dict = {'Digital_V1':{'32':{'100':5261.43744,  # first indexed by version then by xbar_size and then by % of non_0 values
+                                                '90': 4613.872832, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
+                                                '80': 4128.199376, # in pJ
+                                                '70': 3642.52592, 
+                                                '60': 3156.852464, 
+                                                '50': 2671.179008, 
+                                                '40': 2023.6144, 
+                                                '30': 1537.940944, 
+                                                '20': 1052.267488, 
+                                                '10': 566.594032},      
+                                          '64':{'100':20844.00864, 
+                                                '90': 18581.86252,
+                                                '80': 16642.88014, 
+                                                '70': 14380.73402, 
+                                                '60': 12441.75163, 
+                                                '50': 10502.76925, 
+                                                '40': 8240.623131, 
+                                                '30': 6301.640745, 
+                                                '20': 4039.494628, 
+                                                '10': 2100.512242},      
+                                        '128':{'100': 83018.14464,
+                                                '90': 74619.39346, 
+                                                '80': 66220.64228, 
+                                                '70': 57821.8911, 
+                                                '60': 49423.13992, 
+                                                '50': 41670.44653, 
+                                                '40': 33271.69535, 
+                                                '30': 24872.94417, 
+                                                '20': 16474.19299, 
+                                                '10': 8075.441812},     
+                                        '256':{'100': 331639.0958, 
+                                                '90': 298022.5268,
+                                                '80': 264405.9578, 
+                                                '70': 232082.3337, 
+                                                '60': 198465.7647, 
+                                                '50': 166142.1407, 
+                                                '40': 132525.5717, 
+                                                '30': 98909.00265, 
+                                                '20': 66585.3786, 
+                                                '10': 32968.80959}},
+                            'Digital_V2':{'32':{'100':4466.744263,
+                                                '90': 4053.765767,
+                                                '80': 3744.031895, 
+                                                '70': 3434.298023, 
+                                                '60': 3124.564151, 
+                                                '50': 2814.830279, 
+                                                '40': 2401.851783, 
+                                                '30': 2092.117911, 
+                                                '20': 1782.384039, 
+                                                '10': 1472.650167},  
+                                          '64':{'100':17654.27322,
+                                                '90': 16216.06481,  
+                                                '80': 14983.31474, 
+                                                '70': 13545.10633, 
+                                                '60': 12312.35626, 
+                                                '50': 11079.6062, 
+                                                '40': 9641.397787, 
+                                                '30': 8408.647721, 
+                                                '20': 6970.439311, 
+                                                '10': 5737.689245},
+                                        '128':{'100': 70237.24474,
+                                                '90': 64904.19392,  
+                                                '80': 59571.14309, 
+                                                '70': 54238.09226, 
+                                                '60': 48905.04144, 
+                                                '50': 43982.22529, 
+                                                '40': 38649.17446, 
+                                                '30': 33316.12363, 
+                                                '20': 27983.07281, 
+                                                '10': 22650.02198},
+                                        '256':{'100': 280471.5471,
+                                                '90': 259128.5,  
+                                                '80': 237785.453, 
+                                                '70': 217263.2925, 
+                                                '60': 195920.2454, 
+                                                '50': 175398.0849, 
+                                                '40': 154055.0379, 
+                                                '30': 132711.9909, 
+                                                '20': 112189.8303, 
+                                                '10': 90846.78326}}}
+Digital_xbar_pow_leak_dict = {  '32' :5.575928889,          #in mW 
+                                '64' :12.82466678,
+                                '128':40.24037556,
+                                '256':120.2098611}
+
 # DAC - Discuss exact values with ISSAC authors
 dac_lat_dict = {'1' : 1,
                 '2' : 1,
@@ -145,27 +321,44 @@
                  '16': 1.67 * 10**(-7)}
 
 # ADC - Discuss exact values with ISSAC authors
+# ADC Values for including sparsity
 adc_lat_dict = {'1' : 12.5,
                 '2' : 25,
+                '3' : 37.5,
                 '4' : 50,
+                '5' : 62.5,
+                '6' : 75,
+                '7' : 87.5,
                 '8' : 100,
                 '16': 200}
 
 adc_pow_dyn_dict = {'1' : 0.225,
                     '2' : 0.45,
+                    '3' : 0.675,
                     '4' : 0.9,
+                    '5' : 1.125,
+                    '6' : 1.35,
+                    '7' : 1.575,
                     '8' : 1.8,
                     '16': 3.6}
 
 adc_pow_leak_dict = {'1' : 0.025,
                      '2' : 0.05,
+                     '3' : 0.075,
                      '4' : 0.1,
+                     '5' : 0.125,
+                     '6' : 0.15,
+                     '7' : 0.175,
                      '8' : 0.2,
                      '16': 0.4}
 
 adc_area_dict = {'1' : 0.0012,
                  '2' : 0.0012,
+                 '3' : 0.0012,
                  '4' : 0.0012,
+                 '5' : 0.0012,
+                 '6' : 0.0012,
+                 '7' : 0.0012,
                  '8' : 0.0012,
                  '16': 0.0012}
 
@@ -319,7 +512,19 @@
 
 # Chosen latency based on config file - only for components whose latency is parameter dependent
 #xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
-xbar_ip_lat = xbar_ip_lat
+# xbar_innerp_lat_dict = {'32':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0},
+#                         '64':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0},
+#                         '128':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0},
+#                         '256':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0}}
+xbar_ip_lat_dict = {'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+      for key, value in xbar_ip_lat_dict.items():
+            xbar_ip_lat_dict[key] = xbar_ip_lat
+else:
+      xbar_ip_lat_dict = Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print("xbar_ip_lat_dict",xbar_ip_lat_dict)
+
+
 xbar_op_lat = xbar_op_lat
 xbar_rd_lat = xbar_rd_lat
 xbar_wr_lat = xbar_wr_lat
@@ -332,7 +537,10 @@
 dataMem_lat =  dataMem_lat_dict[str(cfg.dataMem_size)]
 
 # Chosen area based on config file - only for components whose latency is parameter dependent
-xbar_area = xbar_area_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
+if cfg.MVMU_ver == "Analog":
+        xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
+else:
+        xbar_area = Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
 dac_area = dac_area_dict [str(cfg.dac_res)]
 adc_area = adc_area_dict [str(cfg.adc_res)]
 xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
@@ -354,8 +562,20 @@
 instrnMem_pow_dyn =  instrnMem_pow_dyn_dict[str(cfg.instrnMem_size)] * math.sqrt(8) #area scaling for 8 bytes per instruction
 dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(cfg.dataMem_size)]
 
+# Energy
+xbar_ip_energy_dict = {'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+if cfg.MVMU_ver == "Analog":
+        for key,value in xbar_ip_energy_dict.items():
+                xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
+else:
+        xbar_ip_energy_dict = Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+print('xbar_ip_energy_dict', xbar_ip_energy_dict)
+
 # Chosen leak_power based on config file - only for components whose latency is parameter dependent
-xbar_pow_leak = 0
+if cfg.MVMU_ver == "Analog":
+        xbar_pow_leak = 0
+else:
+        xbar_pow_leak = Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
 dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
 adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
 xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]

From b231a28f6f7b048558855572e4123ef08d672808 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Wed, 10 Jun 2020 16:51:53 -0400
Subject: [PATCH 06/15] Corrected area computation fro ima

---
 src/ima_metrics.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/ima_metrics.py b/src/ima_metrics.py
index 0f3a1ea1..f05f6087 100644
--- a/src/ima_metrics.py
+++ b/src/ima_metrics.py
@@ -10,14 +10,16 @@
 # Area is computed as the summation of all component area (doesn't consider physical layout)
 def compute_area (): #in mm2
     area = 0.0
-    area += (cfg.num_matrix*3) * param.xbar_inMem_area # xbar_inMem one each for f/b/d xbars
     if cfg.MVMU_ver == "Analog":
         area += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_area # 1 dac for input of f/b/d xbars, each phy xbar in d-xbar will have a dac_array, hence 8
         area += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_area # snh for f/b xbars
         area += (cfg.num_matrix*2) * param.sna_area # sna for one each f/b xbars
         area += cfg.num_adc * param.adc_area # adc
         area += (cfg.num_matrix*3) * param.xbar_outMem_area # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
-    area += (cfg.num_matrix*4) * param.xbar_area # d-xbar has 2X xbars than f/b
+        area += (cfg.num_matrix*4) * cfg.phy2log_ratio * param.xbar_area # d-xbar has 2X xbars than f/b
+    else:
+        area += (cfg.num_matrix*2) * param.xbar_area # d-xbar are not needed in Digital MVMUs xbars than f/b
+    area += (cfg.num_matrix*3) * param.xbar_inMem_area # xbar_inMem one each for f/b/d xbars
     area += param.instrnMem_area # instrnMem
     area += param.dataMem_area # dataMem
     area += param.alu_area # alu

From b34045f8c7436c98d3223ea91d905e7977eb173a Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Thu, 11 Jun 2020 22:09:00 -0400
Subject: [PATCH 07/15] Merging changes for digital MVMUs

---
 include/config.py                     | 8 +++++---
 include/example-configs/config-cnn.py | 6 ++++++
 include/example-configs/config-mlp.py | 6 ++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/config.py b/include/config.py
index 46226f52..086b2416 100644
--- a/include/config.py
+++ b/include/config.py
@@ -5,14 +5,14 @@
 cycles_max = 5000000 # Put both these to very large numbers (when design is bug-free)!
 debug = 1
 xbar_record = 1
-inference = 1       # For training change this flag
+inference = 1
 training = not(inference)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
 # Digital_V1 has compressed inputs (Data+Offset style)
 # Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
-MVMU_ver = "Digital_V2" 
+MVMU_ver = "Digital_V2"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
@@ -36,7 +36,9 @@
 data_width = num_bits # (in bits)
 xbdata_width = data_width # (in bits)
 instrn_width = 48 # (in bits)
-
+# Input and Weight parameters
+input_prec = 16
+weight_width = 16
 # Change here - Specify the IMA parameters here
 xbar_bits = 2
 num_matrix = 2 # each matrix is 1-fw logical xbar for inference and 1-fw, 1-bw, and 1 delta logical xbar for training. Each logical xbar for inference is 8-fw physical xbar and for training  8-fw, 8-bw and 16-delta physical xbars.
diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py
index d60c657e..10e2e418 100644
--- a/include/example-configs/config-cnn.py
+++ b/include/example-configs/config-cnn.py
@@ -8,6 +8,12 @@
 inference = 1
 training = not(inference)
 
+## Variable to define the type of MVMU
+# One of "Analog", "Digital_V1" or "Digital_V2" 
+# Digital_V1 has compressed inputs (Data+Offset style)
+# Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
+MVMU_ver = "Digital_V2"
+
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
 int_bits = 4
diff --git a/include/example-configs/config-mlp.py b/include/example-configs/config-mlp.py
index ee3647fa..086b2416 100644
--- a/include/example-configs/config-mlp.py
+++ b/include/example-configs/config-mlp.py
@@ -8,6 +8,12 @@
 inference = 1
 training = not(inference)
 
+## Variable to define the type of MVMU
+# One of "Analog", "Digital_V1" or "Digital_V2" 
+# Digital_V1 has compressed inputs (Data+Offset style)
+# Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
+MVMU_ver = "Digital_V2"
+
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
 int_bits = 4

From 15f1748a754aee27b496dfcb3530725116ded7c4 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Fri, 12 Jun 2020 00:08:33 -0400
Subject: [PATCH 08/15] Merging changes for digital MVMUs

---
 src/ima_metrics.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/ima_metrics.py b/src/ima_metrics.py
index f05f6087..8cfb1c4e 100644
--- a/src/ima_metrics.py
+++ b/src/ima_metrics.py
@@ -37,14 +37,16 @@ def compute_area (): #in mm2
 # Leakage power is computed as sum of leakage powers of all components
 def compute_pow_leak ():
     leak_pow = 0.0
-    leak_pow += (cfg.num_matrix*3) * param.xbar_inMem_pow_leak # xbar_inMem
     if cfg.MVMU_ver == "Analog":
         leak_pow += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_pow_leak # dac
         leak_pow += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_pow_leak # snh
         leak_pow += cfg.num_adc * param.adc_pow_leak # adc
         leak_pow += (cfg.num_matrix*2) * param.sna_pow_leak # sna
         leak_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_leak # xbar_outMem
-    leak_pow += (cfg.num_matrix*4) * param.xbar_pow_leak # xbar area
+        leak_pow += (cfg.num_matrix*4) * param.xbar_pow_leak # xbar area
+    else:
+        leak_pow += (cfg.num_matrix*2) * param.xbar_pow_leak # xbar area
+    leak_pow += (cfg.num_matrix*3) * param.xbar_inMem_pow_leak # xbar_inMem
     leak_pow += param.instrnMem_pow_leak # instrnMem
     leak_pow += param.dataMem_pow_leak # dataMem
     leak_pow += param.alu_pow_leak # alu

From 4b308d348af9e51e8258c2d04705458b1eb7b87f Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Fri, 12 Jun 2020 00:36:21 -0400
Subject: [PATCH 09/15] Merging changes for digital MVMUs

---
 src/ima_modules.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ima_modules.py b/src/ima_modules.py
index 84c16cd1..922e61af 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -6,8 +6,8 @@
 import sys
 
 import numpy as np
-import include.constants as param
-import include.config as cfg
+import constants as param
+import config as cfg
 import math
 from data_convert import *
 

From 1f053a058afcc8a1c64ee76f031c5744b685150c Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Sat, 13 Jun 2020 01:21:11 -0400
Subject: [PATCH 10/15] Commiting after suggested changes

---
 include/config.py                     |   3 +-
 include/constants.py                  | 199 ++------------------------
 include/constants_digital.py          | 175 ++++++++++++++++++++++
 include/example-configs/config-cnn.py |   1 +
 include/example-configs/config-mlp.py |   1 +
 src/dnn_wt_p.py                       |   1 -
 src/hw_stats.py                       | 144 ++++---------------
 src/ima.py                            |  72 +++++-----
 src/ima_metrics.py                    |  14 +-
 src/ima_modules.py                    |  96 +++++--------
 src/node_dump.py                      |   4 +-
 test/mvm_ip_test.py                   |  83 -----------
 12 files changed, 299 insertions(+), 494 deletions(-)
 create mode 100644 include/constants_digital.py
 delete mode 100644 test/mvm_ip_test.py

diff --git a/include/config.py b/include/config.py
index 086b2416..e09ebedc 100644
--- a/include/config.py
+++ b/include/config.py
@@ -7,12 +7,13 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
+sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
 # Digital_V1 has compressed inputs (Data+Offset style)
 # Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
-MVMU_ver = "Digital_V2"
+MVMU_ver = "Analog"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
diff --git a/include/constants.py b/include/constants.py
index 55ea43f1..d16395df 100644
--- a/include/constants.py
+++ b/include/constants.py
@@ -2,6 +2,7 @@
 ## It also holds power, area and latency numbers of different component used in DPE design
 import config as cfg
 import math
+import constants_digital as digi_param
 # Limits the number of cycles an IMA runs in case it doesn't halt
 infinity = 100000
 
@@ -119,182 +120,6 @@
 xbar_rd_pow = 208.0 * 1000 * (1/32.0) / xbar_rd_lat
 xbar_wr_pow = 676.0 * 1000 * (1/32.0) / xbar_rd_lat
 
-## Adding power area and latency for Digital MVMU V1 and V2
-Digital_xbar_lat_dict = {'Digital_V1': {'32': { '100':130, # first indexed by version then by xbar_size and then by % of non_0 values
-                                                '90': 114, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
-                                                '80': 102, 
-                                                '70': 90, 
-                                                '60': 78, 
-                                                '50': 66, 
-                                                '40': 50, 
-                                                '30': 38, 
-                                                '20': 26, 
-                                                '10': 14},      
-                                        '64': { '100':258, 
-                                                '90': 230,
-                                                '80': 206, 
-                                                '70': 178, 
-                                                '60': 154, 
-                                                '50': 130, 
-                                                '40': 102, 
-                                                '30': 78, 
-                                                '20': 50, 
-                                                '10': 26},      
-                                        '128':{ '100':514,
-                                                '90': 462, 
-                                                '80': 410, 
-                                                '70': 358, 
-                                                '60': 306, 
-                                                '50': 258, 
-                                                '40': 206, 
-                                                '30': 154, 
-                                                '20': 102, 
-                                                '10': 50},     
-                                        '256':{ '100':1026, 
-                                                '90': 922,
-                                                '80': 818, 
-                                                '70': 718, 
-                                                '60': 614, 
-                                                '50': 514, 
-                                                '40': 410, 
-                                                '30': 306, 
-                                                '20': 206, 
-                                                '10': 102}},
-                         'Digital_V2': {'32' :{ '100':130,
-                                                '90': 118,
-                                                '80': 109, 
-                                                '70': 100, 
-                                                '60': 91, 
-                                                '50': 82, 
-                                                '40': 70, 
-                                                '30': 61, 
-                                                '20': 52, 
-                                                '10': 43},  
-                                        '64' :{ '100':258,
-                                                '90': 237,  
-                                                '80': 219, 
-                                                '70': 198, 
-                                                '60': 180, 
-                                                '50': 162, 
-                                                '40': 141, 
-                                                '30': 123, 
-                                                '20': 102, 
-                                                '10': 84},
-                                        '128':{ '100':514,
-                                                '90': 475,  
-                                                '80': 436, 
-                                                '70': 397, 
-                                                '60': 358, 
-                                                '50': 322, 
-                                                '40': 283, 
-                                                '30': 244, 
-                                                '20': 205, 
-                                                '10': 166},
-                                        '256':{ '100':1026,
-                                                '90': 948,  
-                                                '80': 870, 
-                                                '70': 795, 
-                                                '60': 717, 
-                                                '50': 642, 
-                                                '40': 564, 
-                                                '30': 486, 
-                                                '20': 411, 
-                                                '10': 333}}}
-
-Digital_xbar_area_dict = {'Digital_V1': { '32' : 0.16977,   # first indexed by version then by xbar_size
-                                          '64' : 0.27701,
-                                          '128': 1.74020,
-                                          '256': 7.29481},
-                          'Digital_V2': { '32' : 0.16949,  
-                                          '64' : 0.27645,
-                                          '128': 1.73908,
-                                          '256': 7.29257}}
-
-Digital_xbar_energy_dict = {'Digital_V1':{'32':{'100':5261.43744,  # first indexed by version then by xbar_size and then by % of non_0 values
-                                                '90': 4613.872832, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
-                                                '80': 4128.199376, # in pJ
-                                                '70': 3642.52592, 
-                                                '60': 3156.852464, 
-                                                '50': 2671.179008, 
-                                                '40': 2023.6144, 
-                                                '30': 1537.940944, 
-                                                '20': 1052.267488, 
-                                                '10': 566.594032},      
-                                          '64':{'100':20844.00864, 
-                                                '90': 18581.86252,
-                                                '80': 16642.88014, 
-                                                '70': 14380.73402, 
-                                                '60': 12441.75163, 
-                                                '50': 10502.76925, 
-                                                '40': 8240.623131, 
-                                                '30': 6301.640745, 
-                                                '20': 4039.494628, 
-                                                '10': 2100.512242},      
-                                        '128':{'100': 83018.14464,
-                                                '90': 74619.39346, 
-                                                '80': 66220.64228, 
-                                                '70': 57821.8911, 
-                                                '60': 49423.13992, 
-                                                '50': 41670.44653, 
-                                                '40': 33271.69535, 
-                                                '30': 24872.94417, 
-                                                '20': 16474.19299, 
-                                                '10': 8075.441812},     
-                                        '256':{'100': 331639.0958, 
-                                                '90': 298022.5268,
-                                                '80': 264405.9578, 
-                                                '70': 232082.3337, 
-                                                '60': 198465.7647, 
-                                                '50': 166142.1407, 
-                                                '40': 132525.5717, 
-                                                '30': 98909.00265, 
-                                                '20': 66585.3786, 
-                                                '10': 32968.80959}},
-                            'Digital_V2':{'32':{'100':4466.744263,
-                                                '90': 4053.765767,
-                                                '80': 3744.031895, 
-                                                '70': 3434.298023, 
-                                                '60': 3124.564151, 
-                                                '50': 2814.830279, 
-                                                '40': 2401.851783, 
-                                                '30': 2092.117911, 
-                                                '20': 1782.384039, 
-                                                '10': 1472.650167},  
-                                          '64':{'100':17654.27322,
-                                                '90': 16216.06481,  
-                                                '80': 14983.31474, 
-                                                '70': 13545.10633, 
-                                                '60': 12312.35626, 
-                                                '50': 11079.6062, 
-                                                '40': 9641.397787, 
-                                                '30': 8408.647721, 
-                                                '20': 6970.439311, 
-                                                '10': 5737.689245},
-                                        '128':{'100': 70237.24474,
-                                                '90': 64904.19392,  
-                                                '80': 59571.14309, 
-                                                '70': 54238.09226, 
-                                                '60': 48905.04144, 
-                                                '50': 43982.22529, 
-                                                '40': 38649.17446, 
-                                                '30': 33316.12363, 
-                                                '20': 27983.07281, 
-                                                '10': 22650.02198},
-                                        '256':{'100': 280471.5471,
-                                                '90': 259128.5,  
-                                                '80': 237785.453, 
-                                                '70': 217263.2925, 
-                                                '60': 195920.2454, 
-                                                '50': 175398.0849, 
-                                                '40': 154055.0379, 
-                                                '30': 132711.9909, 
-                                                '20': 112189.8303, 
-                                                '10': 90846.78326}}}
-Digital_xbar_pow_leak_dict = {  '32' :5.575928889,          #in mW 
-                                '64' :12.82466678,
-                                '128':40.24037556,
-                                '256':120.2098611}
-
 # DAC - Discuss exact values with ISSAC authors
 dac_lat_dict = {'1' : 1,
                 '2' : 1,
@@ -533,20 +358,12 @@
 
 
 # Chosen latency based on config file - only for components whose latency is parameter dependent
-#xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
-# xbar_innerp_lat_dict = {'32':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0},
-#                         '64':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0},
-#                         '128':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0},
-#                         '256':{'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0}}
-xbar_ip_lat_dict = {'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
 if cfg.MVMU_ver == "Analog":
       for key, value in xbar_ip_lat_dict.items():
             xbar_ip_lat_dict[key] = xbar_ip_lat
 else:
-      xbar_ip_lat_dict = Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
-print("xbar_ip_lat_dict",xbar_ip_lat_dict)
-
-
+      xbar_ip_lat_dict = digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
 xbar_op_lat = xbar_op_lat
 xbar_rd_lat = xbar_rd_lat
 xbar_wr_lat = xbar_wr_lat
@@ -558,11 +375,11 @@
 instrnMem_lat =  instrnMem_lat_dict[str(cfg.instrnMem_size)]
 dataMem_lat =  dataMem_lat_dict[str(cfg.dataMem_size)]
 
-# Chosen area based on config file - only for components whose latency is parameter dependent
+# Chosen area based on config file - only for components whose area is parameter dependent
 if cfg.MVMU_ver == "Analog":
         xbar_area = xbar_area_dict[str(cfg.xbar_bits)][str(cfg.xbar_size)]
 else:
-        xbar_area = Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+        xbar_area = digi_param.Digital_xbar_area_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
 dac_area = dac_area_dict [str(cfg.dac_res)]
 adc_area = adc_area_dict [str(cfg.adc_res)]
 xbar_inMem_area = xbar_inMem_area_dict[str(cfg.xbar_size)]
@@ -585,19 +402,19 @@
 dataMem_pow_dyn =  dataMem_pow_dyn_dict[str(cfg.dataMem_size)]
 
 # Energy
-xbar_ip_energy_dict = {'100':0, '90':0, '80':0, '70':0, '60':0, '70':0, '50':0, '40':0, '30':0, '20':0, '10':0}
+xbar_ip_energy_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
 if cfg.MVMU_ver == "Analog":
         for key,value in xbar_ip_energy_dict.items():
                 xbar_ip_energy_dict[key] = xbar_ip_lat*xbar_ip_pow_dyn
 else:
-        xbar_ip_energy_dict = Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
+        xbar_ip_energy_dict = digi_param.Digital_xbar_energy_dict[cfg.MVMU_ver][str(cfg.xbar_size)]
 print('xbar_ip_energy_dict', xbar_ip_energy_dict)
 
 # Chosen leak_power based on config file - only for components whose latency is parameter dependent
 if cfg.MVMU_ver == "Analog":
         xbar_pow_leak = 0
 else:
-        xbar_pow_leak = Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
+        xbar_pow_leak = digi_param.Digital_xbar_pow_leak_dict[str(cfg.xbar_size)]
 dac_pow_leak = dac_pow_leak_dict [str(cfg.dac_res)]
 adc_pow_leak = adc_pow_leak_dict [str(cfg.adc_res)]
 xbar_inMem_pow_leak = xbar_inMem_pow_leak_dict[str(cfg.xbar_size)]
diff --git a/include/constants_digital.py b/include/constants_digital.py
new file mode 100644
index 00000000..abe5310b
--- /dev/null
+++ b/include/constants_digital.py
@@ -0,0 +1,175 @@
+## This file contains the power, area and latency numbers of Digital MVMUs for two versions
+Digital_xbar_lat_dict = {'Digital_V1': {'32': { '0':  130, # first indexed by version then by xbar_size and then by % sparsity
+                                                '10': 114, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
+                                                '20': 102, 
+                                                '30': 90, 
+                                                '40': 78, 
+                                                '50': 66, 
+                                                '60': 50, 
+                                                '70': 38, 
+                                                '80': 26, 
+                                                '90': 14},      
+                                        '64': { '0' : 258, 
+                                                '10': 230,
+                                                '20': 206, 
+                                                '30': 178, 
+                                                '40': 154, 
+                                                '50': 130, 
+                                                '60': 102, 
+                                                '70': 78, 
+                                                '80': 50, 
+                                                '90': 26},      
+                                        '128':{ '0' : 514,
+                                                '10': 462, 
+                                                '20': 410, 
+                                                '30': 358, 
+                                                '40': 306, 
+                                                '50': 258, 
+                                                '60': 206, 
+                                                '70': 154, 
+                                                '80': 102, 
+                                                '90': 50},     
+                                        '256':{ '0' : 1026, 
+                                                '10': 922,
+                                                '20': 818, 
+                                                '30': 718, 
+                                                '40': 614, 
+                                                '50': 514, 
+                                                '60': 410, 
+                                                '70': 306, 
+                                                '80': 206, 
+                                                '90': 102}},
+                         'Digital_V2': {'32' :{ '0' : 130,
+                                                '10': 118,
+                                                '20': 109, 
+                                                '30': 100, 
+                                                '40': 91, 
+                                                '50': 82, 
+                                                '60': 70, 
+                                                '70': 61, 
+                                                '80': 52, 
+                                                '90': 43},  
+                                        '64' :{ '0' : 258,
+                                                '10': 237,  
+                                                '20': 219, 
+                                                '30': 198, 
+                                                '40': 180, 
+                                                '50': 162, 
+                                                '60': 141, 
+                                                '70': 123, 
+                                                '80': 102, 
+                                                '90': 84},
+                                        '128':{ '0' : 514,
+                                                '10': 475,  
+                                                '20': 436, 
+                                                '30': 397, 
+                                                '40': 358, 
+                                                '50': 322, 
+                                                '60': 283, 
+                                                '70': 244, 
+                                                '80': 205, 
+                                                '90': 166},
+                                        '256':{ '0' : 1026,
+                                                '10': 948,  
+                                                '20': 870, 
+                                                '30': 795, 
+                                                '40': 717, 
+                                                '50': 642, 
+                                                '60': 564, 
+                                                '70': 486, 
+                                                '80': 411, 
+                                                '90': 333}}}
+
+Digital_xbar_area_dict = {'Digital_V1': { '32' : 0.16977,   # first indexed by version then by xbar_size
+                                          '64' : 0.27701,
+                                          '128': 1.74020,
+                                          '256': 7.29481},
+                          'Digital_V2': { '32' : 0.16949,  
+                                          '64' : 0.27645,
+                                          '128': 1.73908,
+                                          '256': 7.29257}}
+
+Digital_xbar_energy_dict = {'Digital_V1':{'32':{'0' : 5261.43744,  # first indexed by version then by xbar_size and then by % of non_0 values
+                                                '10': 4613.872832, # For V1 it is (4n+2)*T and for V2 it is (3n+2+xbar_size)*T
+                                                '20': 4128.199376, # in pJ
+                                                '30': 3642.52592, 
+                                                '40': 3156.852464, 
+                                                '50': 2671.179008, 
+                                                '60': 2023.6144, 
+                                                '70': 1537.940944, 
+                                                '80': 1052.267488, 
+                                                '90': 566.594032},      
+                                          '64':{'0' : 20844.00864, 
+                                                '10': 18581.86252,
+                                                '20': 16642.88014, 
+                                                '30': 14380.73402, 
+                                                '40': 12441.75163, 
+                                                '50': 10502.76925, 
+                                                '60': 8240.623131, 
+                                                '70': 6301.640745, 
+                                                '80': 4039.494628, 
+                                                '90': 2100.512242},      
+                                        '128':{ '0' : 83018.14464,
+                                                '10': 74619.39346, 
+                                                '20': 66220.64228, 
+                                                '30': 57821.8911, 
+                                                '40': 49423.13992, 
+                                                '50': 41670.44653, 
+                                                '60': 33271.69535, 
+                                                '70': 24872.94417, 
+                                                '80': 16474.19299, 
+                                                '90': 8075.441812},     
+                                        '256':{ '0' : 331639.0958, 
+                                                '10': 298022.5268,
+                                                '20': 264405.9578, 
+                                                '30': 232082.3337, 
+                                                '40': 198465.7647, 
+                                                '50': 166142.1407, 
+                                                '60': 132525.5717, 
+                                                '70': 98909.00265, 
+                                                '80': 66585.3786, 
+                                                '90': 32968.80959}},
+                            'Digital_V2':{'32':{'0' : 4466.744263,
+                                                '10': 4053.765767,
+                                                '20': 3744.031895, 
+                                                '30': 3434.298023, 
+                                                '40': 3124.564151, 
+                                                '50': 2814.830279, 
+                                                '60': 2401.851783, 
+                                                '70': 2092.117911, 
+                                                '80': 1782.384039, 
+                                                '90': 1472.650167},  
+                                          '64':{'0' : 17654.27322,
+                                                '10': 16216.06481,  
+                                                '20': 14983.31474, 
+                                                '30': 13545.10633, 
+                                                '40': 12312.35626, 
+                                                '50': 11079.6062, 
+                                                '60': 9641.397787, 
+                                                '70': 8408.647721, 
+                                                '80': 6970.439311, 
+                                                '90': 5737.689245},
+                                        '128':{ '0' : 70237.24474,
+                                                '10': 64904.19392,  
+                                                '20': 59571.14309, 
+                                                '30': 54238.09226, 
+                                                '40': 48905.04144, 
+                                                '50': 43982.22529, 
+                                                '60': 38649.17446, 
+                                                '70': 33316.12363, 
+                                                '80': 27983.07281, 
+                                                '90': 22650.02198},
+                                        '256':{ '0' : 280471.5471,
+                                                '10': 259128.5,  
+                                                '20': 237785.453, 
+                                                '30': 217263.2925, 
+                                                '40': 195920.2454, 
+                                                '50': 175398.0849, 
+                                                '60': 154055.0379, 
+                                                '70': 132711.9909, 
+                                                '80': 112189.8303, 
+                                                '90': 90846.78326}}}
+Digital_xbar_pow_leak_dict = {  '32' :5.575928889,          #in mW 
+                                '64' :12.82466678,
+                                '128':40.24037556,
+                                '256':120.2098611}
\ No newline at end of file
diff --git a/include/example-configs/config-cnn.py b/include/example-configs/config-cnn.py
index 10e2e418..4b2bdde5 100644
--- a/include/example-configs/config-cnn.py
+++ b/include/example-configs/config-cnn.py
@@ -7,6 +7,7 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
+sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
diff --git a/include/example-configs/config-mlp.py b/include/example-configs/config-mlp.py
index 086b2416..02e78dd1 100644
--- a/include/example-configs/config-mlp.py
+++ b/include/example-configs/config-mlp.py
@@ -7,6 +7,7 @@
 xbar_record = 1
 inference = 1
 training = not(inference)
+sparse_opt = 1 # Flag for Sparsity optimisaton (Make it 0 for only dense computations)
 
 ## Variable to define the type of MVMU
 # One of "Analog", "Digital_V1" or "Digital_V2" 
diff --git a/src/dnn_wt_p.py b/src/dnn_wt_p.py
index 3d19cb04..df1b4a08 100644
--- a/src/dnn_wt_p.py
+++ b/src/dnn_wt_p.py
@@ -28,4 +28,3 @@ def prog_dnn_wt(self, instrnpath, node_dut):
                             wt_temp = np.load(wt_filename)
                             node_dut.tile_list[i].ima_list[j].matrix_list[k]['f'][l].program(wt_temp)
                             node_dut.tile_list[i].ima_list[j].matrix_list[k]['b'][l].program(wt_temp)
-
diff --git a/src/hw_stats.py b/src/hw_stats.py
index 6f0df0c3..6970de88 100644
--- a/src/hw_stats.py
+++ b/src/hw_stats.py
@@ -14,7 +14,7 @@
 
 # Copied from /include/constants.py file
 # Enlists components at core, tile, and node levels
-hw_comp_energy = {'xbar_mvm':{  '100':param.xbar_ip_energy_dict['100'], \
+hw_comp_energy = {'xbar_mvm':{  '0':param.xbar_ip_energy_dict['0'], \
                                 '90': param.xbar_ip_energy_dict['90'], \
                                 '80': param.xbar_ip_energy_dict['80'], \
                                 '70': param.xbar_ip_energy_dict['70'], \
@@ -24,37 +24,20 @@
                                 '30': param.xbar_ip_energy_dict['30'], \
                                 '20': param.xbar_ip_energy_dict['20'], \
                                 '10': param.xbar_ip_energy_dict['10']}, \
-                  'xbar_op':{  '100': param.xbar_ip_energy_dict['100'], \
-                                '90': param.xbar_ip_energy_dict['90'], \
-                                '80': param.xbar_ip_energy_dict['80'], \
-                                '70': param.xbar_ip_energy_dict['70'], \
-                                '60': param.xbar_ip_energy_dict['60'], \
-                                '50': param.xbar_ip_energy_dict['50'], \
-                                '40': param.xbar_ip_energy_dict['40'], \
-                                '30': param.xbar_ip_energy_dict['30'], \
-                                '20': param.xbar_ip_energy_dict['20'], \
-                                '10': param.xbar_ip_energy_dict['10']}, \
-                  'xbar_mtvm':{ '100':param.xbar_ip_energy_dict['100'], \
-                                '90': param.xbar_ip_energy_dict['90'], \
-                                '80': param.xbar_ip_energy_dict['80'], \
-                                '70': param.xbar_ip_energy_dict['70'], \
-                                '60': param.xbar_ip_energy_dict['60'], \
-                                '50': param.xbar_ip_energy_dict['50'], \
-                                '40': param.xbar_ip_energy_dict['40'], \
-                                '30': param.xbar_ip_energy_dict['30'], \
-                                '20': param.xbar_ip_energy_dict['20'], \
-                                '10': param.xbar_ip_energy_dict['10']}, \
-        'xbar_rd':param.xbar_rd_pow_dyn*param.xbar_rd_lat, 'xbar_wr':param.xbar_wr_pow_dyn*param.xbar_wr_lat,
+        'xbar_op':param.xbar_ip_energy_dict['0'], \
+        'xbar_mtvm':param.xbar_ip_energy_dict['0'], \
+        'xbar_rd':param.xbar_rd_pow_dyn*param.xbar_rd_lat, \
+        'xbar_wr':param.xbar_wr_pow_dyn*param.xbar_wr_lat,
         'dac':param.dac_pow_dyn, 'snh':param.snh_pow_dyn, \
-        'mux1':param.mux_pow_dyn, 'mux2':param.mux_pow_dyn, 'adc':{ 'n' :       param.adc_pow_dyn_dict[str(cfg.adc_res)], \
-                                                                    'n/2':      param.adc_pow_dyn_dict[str(cfg.adc_res-1)], \
-                                                                    '3n/4':     param.adc_pow_dyn_dict[str(cfg.adc_res-2)], \
-                                                                    '7n/8':     param.adc_pow_dyn_dict[str(cfg.adc_res-3)], \
-                                                                    '15n/16':   param.adc_pow_dyn_dict[str(cfg.adc_res-4)], \
-                                                                    '31n/32':   param.adc_pow_dyn_dict[str(cfg.adc_res-5)], \
-                                                                    '63n/64':   param.adc_pow_dyn_dict[str(cfg.adc_res-6)], \
-                                                                    '127n/128': param.adc_pow_dyn_dict[str(cfg.adc_res-7)], \
-                                                                    '255n/256': param.adc_pow_dyn_dict[str(cfg.adc_res-7)]}, \
+        'mux1':param.mux_pow_dyn, 'mux2':param.mux_pow_dyn, \
+        'adc':{ 'n' :       param.adc_pow_dyn_dict[str(cfg.adc_res)], \
+                'n/2':      param.adc_pow_dyn_dict[str(cfg.adc_res-1)], \
+                'n/4':     param.adc_pow_dyn_dict[str(cfg.adc_res-2)], \
+                'n/8':     param.adc_pow_dyn_dict[str(cfg.adc_res-3)], \
+                'n/16':   param.adc_pow_dyn_dict[str(cfg.adc_res-4)], \
+                'n/32':   param.adc_pow_dyn_dict[str(cfg.adc_res-5)], \
+                'n/64':   param.adc_pow_dyn_dict[str(cfg.adc_res-6)], \
+                'n/128': param.adc_pow_dyn_dict[str(cfg.adc_res-7)]}, \
         'alu_div': param.alu_pow_div_dyn, 'alu_mul':param.alu_pow_mul_dyn, \
         'alu_act': param.act_pow_dyn, 'alu_other':param.alu_pow_others_dyn, \
         'alu_sna': param.sna_pow_dyn, \
@@ -74,47 +57,18 @@
 def get_hw_stats (fid, node_dut, cycle):
 
     # List of all components that dissipate power
-    hw_comp_access = {'xbar_mvm':{  '100':0, \
-                                    '90': 0, \
-                                    '80': 0, \
-                                    '70': 0, \
-                                    '60': 0, \
-                                    '50': 0, \
-                                    '40': 0, \
-                                    '30': 0, \
-                                    '20': 0, \
-                                    '10': 0}, \
-                    'xbar_op':{     '100':0, \
-                                    '90': 0, \
-                                    '80': 0, \
-                                    '70': 0, \
-                                    '60': 0, \
-                                    '50': 0, \
-                                    '40': 0, \
-                                    '30': 0, \
-                                    '20': 0, \
-                                    '10': 0}, \
-                    'xbar_mtvm':{  '100':0,  \
-                                    '90': 0, \
-                                    '80': 0, \
-                                    '70': 0, \
-                                    '60': 0, \
-                                    '50': 0, \
-                                    '40': 0, \
-                                    '30': 0, \
-                                    '20': 0, \
-                                    '10': 0}, \
+    hw_comp_access = {'xbar_mvm':{  '0':0, '90': 0,'80': 0,'70': 0,'60': 0,'50': 0,'40': 0,'30': 0,'20': 0,'10': 0}, \
+            'xbar_op':0, 'xbar_mtvm':0, \
             'xbar_rd':0, 'xbar_wr':0, \
             'dac':0, 'snh':0, \
-            'mux1':0, 'mux2':0, 'adc':{ 'n' :       0, \
-                                        'n/2':      0, \
-                                        '3n/4':     0, \
-                                        '7n/8':     0, \
-                                        '15n/16':   0, \
-                                        '31n/32':   0, \
-                                        '63n/64':   0, \
-                                        '127n/128': 0, \
-                                        '255n/256': 0}, \
+            'mux1':0, 'mux2':0, 'adc':{ 'n' :    0, \
+                                        'n/2':   0, \
+                                        'n/4':   0, \
+                                        'n/8':   0, \
+                                        'n/16':  0, \
+                                        'n/32':  0, \
+                                        'n/64':  0, \
+                                        'n/128': 0}, \
             'alu_div':0, 'alu_mul':0, \
             'alu_act':0, 'alu_other':0, \
             'alu_sna':0, \
@@ -159,11 +113,9 @@ def get_hw_stats (fid, node_dut, cycle):
                     if cfg.MVMU_ver == "Analog":
                         for m in range(cfg.phy2log_ratio):
                             if (mvmu_t == 'd'):
-                                for key,value in hw_comp_access['xbar_op'].items():
-                                    hw_comp_access['xbar_op'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access[key]
+                                hw_comp_access['xbar_op'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access['0']
                             elif (mvmu_t == 'b'):
-                                for key,value in hw_comp_access['xbar_mtvm'].items():
-                                    hw_comp_access['xbar_mtvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access[key]
+                                hw_comp_access['xbar_mtvm'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access['0']
                             else:
                                 for key,value in hw_comp_access['xbar_mvm'].items():
                                     hw_comp_access['xbar_mvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][m].num_access[key]
@@ -174,11 +126,9 @@ def get_hw_stats (fid, node_dut, cycle):
                     
                     else:
                         if (mvmu_t == 'd'):
-                            for key,value in hw_comp_access['xbar_op'].items():
-                                hw_comp_access['xbar_op'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access[key]
+                            hw_comp_access['xbar_op'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access['0']
                         elif (mvmu_t == 'b'):
-                            for key,value in hw_comp_access['xbar_mtvm'].items():
-                                hw_comp_access['xbar_mtvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access[key]
+                            hw_comp_access['xbar_mtvm'] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access['0']
                         else:
                             for key,value in hw_comp_access['xbar_mvm'].items():
                                 hw_comp_access['xbar_mvm'][key] += node_dut.tile_list[i].ima_list[j].matrix_list[k][mvmu_t][0].num_access[key]
@@ -245,10 +195,6 @@ def get_hw_stats (fid, node_dut, cycle):
     total_adc_access = 0
     total_mvm_energy = 0
     total_mvm_access = 0
-    total_mtvm_access = 0
-    total_mtvm_energy = 0
-    total_op_access = 0
-    total_op_energy = 0
     # Compute the total dynamic energy consumption
     if cfg.MVMU_ver == "Analog":
         for key, value in hw_comp_access.items():
@@ -262,16 +208,6 @@ def get_hw_stats (fid, node_dut, cycle):
                     total_energy += value1*hw_comp_energy['xbar_mvm'][key1]
                     total_mvm_energy +=  value1*hw_comp_energy['xbar_mvm'][key1] # Not needed for function but for output visualisation
                     total_mvm_access += value1
-            elif key == 'xbar_mtvm':
-                for key1, value1 in hw_comp_access['xbar_mtvm'].items():
-                    total_energy += value1*hw_comp_energy['xbar_mtvm'][key1]
-                    total_mvm_energy +=  value1*hw_comp_energy['xbar_mtvm'][key1] # Not needed for function but for output visualisation
-                    total_mvm_access += value1
-            elif key == 'xbar_op':
-                for key1, value1 in hw_comp_access['xbar_op'].items():
-                    total_energy += value1*hw_comp_energy['xbar_op'][key1]
-                    total_op_energy +=  value1*hw_comp_energy['xbar_op'][key1] # Not needed for function but for output visualisation
-                    total_op_access += value1
             else:
                 total_energy += value * hw_comp_energy[key]
     else:
@@ -286,16 +222,6 @@ def get_hw_stats (fid, node_dut, cycle):
                     total_energy += (value1/16)*hw_comp_energy['xbar_mvm'][key1]
                     total_mvm_energy +=  (value1/16)*hw_comp_energy['xbar_mvm'][key1] # Not needed for function but for output visualisation
                     total_mvm_access += (value1/16)
-            elif key == 'xbar_mtvm':
-                for key1, value1 in hw_comp_access['xbar_mtvm'].items():
-                    total_energy += (value1/16)*hw_comp_energy['xbar_mtvm'][key1]
-                    total_mvm_energy +=  (value1/16)*hw_comp_energy['xbar_mtvm'][key1] # Not needed for function but for output visualisation
-                    total_mvm_access += (value1/16)
-            elif key == 'xbar_op':
-                for key1, value1 in hw_comp_access['xbar_op'].items():
-                    total_energy += (value1/16)*hw_comp_energy['xbar_op'][key1]
-                    total_op_energy +=  (value1/16)*hw_comp_energy['xbar_op'][key1] # Not needed for function but for output visualisation
-                    total_op_access += (value1/16)
             else:
                 total_energy += value * hw_comp_energy[key]
 
@@ -315,14 +241,6 @@ def get_hw_stats (fid, node_dut, cycle):
             bl_spc2 = (22-len(str(total_mvm_access))) * ' '
             fid.write (key + bl_spc1 + str(total_mvm_access) + bl_spc2 +\
                         (str(total_mvm_energy/total_energy*100))[0:4] + ' %\n')
-        elif key == 'xbar_mtvm':
-            bl_spc2 = (22-len(str(total_mtvm_access))) * ' '
-            fid.write (key + bl_spc1 + str(total_mtvm_access) + bl_spc2 +\
-                        (str(total_mtvm_energy/total_energy*100))[0:4] + ' %\n')
-        elif key == 'xbar_op':
-            bl_spc2 = (22-len(str(total_op_access))) * ' '
-            fid.write (key + bl_spc1 + str(total_op_access) + bl_spc2 +\
-                        (str(total_op_energy/total_energy*100))[0:4] + ' %\n')
         else:
             bl_spc2 = (22-len(str(value))) * ' '
             fid.write (key + bl_spc1 + str(value) + bl_spc2 +\
@@ -358,14 +276,10 @@ def get_hw_stats (fid, node_dut, cycle):
     metric_dict['core_area'] = ima_metrics.compute_area ()# in mm2
     metric_dict['cycles'] = cycle
     metric_dict['time'] = cycle * param.cycle_time * (10**(-9)) # in sec
+    metric_dict['dynamic_energy'] = total_energy * ns * mw # in joule
     #metric_dict['leakage_enegy'] = metric_dict['leakage_power'] * mw * metric_dict['time'] # in joule
     metric_dict['leakage_energy'] =  leakage_energy * ns * mw # in joule
-    # if cfg.MVMU_ver == "Analog":
-    metric_dict['dynamic_energy'] = total_energy * ns * mw # in joule
     metric_dict['total_energy'] = metric_dict['dynamic_energy'] + metric_dict['leakage_energy']
-    # else:
-    #     metric_dict['total_energy'] = total_energy * ns * mw # in joule
-    #     metric_dict['dynamic_energy'] = metric_dict['total_energy']
     metric_dict['average_power'] = metric_dict['total_energy'] / metric_dict['time'] * (10**(3)) # in mW
 
     for key, value in metric_dict.items():
diff --git a/src/ima.py b/src/ima.py
index 99815cb6..1759ed19 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -9,6 +9,7 @@
 import include.config as cfg
 #import include.configTest as cfg
 import include.constants as param
+import constants_digital as digi_param
 import src.ima_modules as imod
 
 from data_convert import *
@@ -39,7 +40,6 @@ def __init__ (self):
         self.matrix_list = [] # list of dicts of mvmu(s)
         self.xb_inMem_list = [] # list of dicts of xbar input memory
         self.xb_outMem_list = [] # list of dicts of xbar output memory
-        self.xbar_inMem_Sparsity_list = [] # list of sparsity od xbar in mem (may have to be removed if found redundant)
 
         for i in xrange(cfg.num_matrix):
             # each matrix represents three mvmus - 1 mvmu for fw, 1 mvmu for bw, 1 mvmu (2X width) for delta
@@ -505,9 +505,9 @@ def do_execute (self, ex_op, fid):
                 for i in range (self.de_r2):
                     dst_addr = data_addr + i
                     if (dst_addr >= datamem_off):
-                      self.dataMem.write (dst_addr, data[i])
+                        self.dataMem.write (dst_addr, data[i])
                     else:
-                      writeToXbarMem (self, dst_addr, data[i])
+                        writeToXbarMem (self, dst_addr, data[i])
 
             elif (ex_op == 'st'): #nothing to be done by ima for st here
                 return 1
@@ -599,22 +599,23 @@ def inner_product (mat_id, key):
                     self.xb_outMem_list[mat_id][key].reset ()
 
                     xbar_inMem = self.xb_inMem_list[mat_id][key].read_all ()
-                    # print ("xb_inMem", xbar_inMem)
-                    # calculate sparsity of xbar_in_mem
                     non_0_val = 0
                     for i in range(cfg.xbar_size):
-                        if xbar_inMem[i] != '0':
+                        if xbar_inMem[i] != '0000000000000000':
                             non_0_val = non_0_val +1
-                    sparsity = (cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size
-                    # print ("non_0_val", non_0_val)
-                    # print ("Sparsity", sparsity)
+                    sparsity = int((cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size)
+                    sparsity_adc = sparsity
+                    if (sparsity%10!=0):
+                        sparsity = sparsity-(sparsity%10)
+                    else:
+                        if (sparsity == 100):
+                            sparsity = sparsity-10
 
                     ## Loop to cover all bits of inputs
                     for k in xrange (int(math.ceil(cfg.input_prec / cfg.dac_res))): #quantization affects the # of streams
                     #for k in xrange (1):
                         # read the values from the xbar's input register
                         out_xb_inMem = self.xb_inMem_list[mat_id][key].read (cfg.dac_res)
-                        # print("out_xb_inMem", out_xb_inMem)
                         
                         #*************************************** HACK *********************************************
                         ###### CAUTION: Not replicated exact "functional" circuit behaviour for analog parts
@@ -630,7 +631,7 @@ def inner_product (mat_id, key):
                         out_snh = [[] for x in range(num_xb)]
                         for m in range (num_xb):
                             # compute dot-product
-                            out_xbar[m] = self.matrix_list[mat_id][key][m].propagate_dummy(non_0_val, out_dac)        
+                            out_xbar[m] = self.matrix_list[mat_id][key][m].propagate_dummy(out_dac, sparsity)        
                             # do sampling and hold
                             out_snh[m] = self.snh_list[mat_id*num_xb+m].propagate_dummy(out_xbar[m])
 
@@ -643,7 +644,7 @@ def inner_product (mat_id, key):
                                 adc_id = (mat_id*num_xb + m) % cfg.num_adc
                                 out_mux1 = self.mux1_list[mat_id].propagate_dummy(out_snh[m][j]) # i is the ith xbar
                                 out_mux2 = self.mux2_list[mat_id % cfg.num_adc].propagate_dummy(out_mux1)
-                                out_adc = self.adc_list[adc_id].propagate_dummy(out_mux2, non_0_val)
+                                out_adc = self.adc_list[adc_id].propagate_dummy(out_mux2, sparsity_adc)
 
                                 # shift and add outputs from difefrent wt_bits
                                 alu_op = 'sna'
@@ -772,8 +773,8 @@ def outer_product (mat_id, key):
             # do nothing for nop instruction
 
 
-        # Computes the latency for mvm instruction based on DPE configuration
-        def xbComputeLatency (self, mask):
+        # Computes the latency for Analog mvm instruction based on DPE configuration
+        def xbComputeLatency_Analog (self, mask):
             latency_out_list = []
             fb_found = 0
             d_found = 0
@@ -797,7 +798,7 @@ def xbComputeLatency (self, mask):
                 lat_temp = 0
                 # We assume all ADCs in a matrix has the same resolution
                 adc_idx = idx*cfg.num_adc_per_matrix
-                lat_temp = self.adc_list[adc_idx].getLatency(cfg.xbar_size)
+                lat_temp = self.adc_list[adc_idx].getLatency()
                 '''
                 print("adc_idx", adc_idx)
                 print("lat_temp", lat_temp)
@@ -822,6 +823,26 @@ def xbComputeLatency (self, mask):
                 latency_out_list.append(latency_out)
             return max(latency_out_list)
 
+        # Computes the latency for Analog mvm instruction based on DPE configuration
+        def xbComputeLatency_Digital (self):
+            mvm_lat_temp = 0
+            if (cfg.inference):
+                for p in xrange(cfg.num_matrix):
+                    if self.de_xb_nma[p]:
+                        xbar_inMem = self.xb_inMem_list[p]['f'].read_all ()
+                        non_0_val = 0
+                        for i in range(cfg.xbar_size):
+                            if xbar_inMem[i] != '0000000000000000':
+                                non_0_val = non_0_val +1
+                        sparsity = int((cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size)
+                        if (sparsity%10!=0):
+                            sparsity = sparsity-(sparsity%10)
+                        else:
+                            if (sparsity == 100):
+                                sparsity = sparsity-10
+                        mvm_lat_temp += digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)][str(sparsity)]
+            return mvm_lat_temp
+
         # State machine runs only if the stage is non-empty
         # Describe the functionality on a cycle basis
         if (self.stage_empty[sId] != 1):
@@ -860,24 +881,9 @@ def xbComputeLatency (self, mask):
                 elif (ex_op == 'mvm'):
                     mask_temp = self.de_xb_nma
                     if (cfg.MVMU_ver == "Analog"):
-                        self.stage_latency[sId] = xbComputeLatency (self, mask_temp) # mask tells which of ip/op or both is occurring
+                        self.stage_latency[sId] = xbComputeLatency_Analog (self, mask_temp) # mask tells which of ip/op or both is occurring
                     else:
-                        mvm_lat_temp = 0
-                        if (cfg.inference):
-                            for p in xrange(cfg.num_matrix):
-                                if self.de_xb_nma[p]:
-                                    xbar_inMem = self.xb_inMem_list[p]['f'].read_all ()
-                                    non_0_val = 0
-                                    for i in range(cfg.xbar_size):
-                                        if xbar_inMem[i] != '0':
-                                            non_0_val = non_0_val +1
-                                    print ("non_0_val", non_0_val)
-                                    nval_percent = int(non_0_val*100/128)
-                                    if (nval_percent%10!=0):
-                                        nval_percent = nval_percent + 10
-                                    mvm_lat_temp += param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)][str(nval_percent)]
-                        self.stage_latency[sId] = mvm_lat_temp
-                    print("MVM Latency", self.stage_latency[sId])
+                        self.stage_latency[sId] = xbComputeLatency_Digital(self)
 
                 # Needs update - use xbar serial read latency
                 elif (ex_op == 'crs'):
@@ -1014,7 +1020,7 @@ def pipe_run (self, cycle, fid = ''): # fid is tracefile's id
                 update_ready = self.stage_done[i+1]
 
             # run the stage based on its update_ready argument
-
+           
             stage_function[i] (update_ready, fid)
 
         # If specified, print thetrace (pipeline stage information)
diff --git a/src/ima_metrics.py b/src/ima_metrics.py
index 8cfb1c4e..f7ec20ac 100644
--- a/src/ima_metrics.py
+++ b/src/ima_metrics.py
@@ -18,7 +18,7 @@ def compute_area (): #in mm2
         area += (cfg.num_matrix*3) * param.xbar_outMem_area # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
         area += (cfg.num_matrix*4) * cfg.phy2log_ratio * param.xbar_area # d-xbar has 2X xbars than f/b
     else:
-        area += (cfg.num_matrix*2) * param.xbar_area # d-xbar are not needed in Digital MVMUs xbars than f/b
+        area += (cfg.num_matrix*2) * param.xbar_area # d-xbar are not needed in Digital MVMUs only f and b are there
     area += (cfg.num_matrix*3) * param.xbar_inMem_area # xbar_inMem one each for f/b/d xbars
     area += param.instrnMem_area # instrnMem
     area += param.dataMem_area # dataMem
@@ -43,9 +43,9 @@ def compute_pow_leak ():
         leak_pow += cfg.num_adc * param.adc_pow_leak # adc
         leak_pow += (cfg.num_matrix*2) * param.sna_pow_leak # sna
         leak_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_leak # xbar_outMem
-        leak_pow += (cfg.num_matrix*4) * param.xbar_pow_leak # xbar area
+        leak_pow += (cfg.num_matrix*4) * param.xbar_pow_leak # xbar power fr analog
     else:
-        leak_pow += (cfg.num_matrix*2) * param.xbar_pow_leak # xbar area
+        leak_pow += (cfg.num_matrix*2) * param.xbar_pow_leak # d-xbar are not needed in Digital MVMUs only f and b are there
     leak_pow += (cfg.num_matrix*3) * param.xbar_inMem_pow_leak # xbar_inMem
     leak_pow += param.instrnMem_pow_leak # instrnMem
     leak_pow += param.dataMem_pow_leak # dataMem
@@ -58,15 +58,17 @@ def compute_pow_leak ():
 def compute_pow_dyn ():
     dyn_pow = 0.0
     if cfg.MVMU_ver == "Analog":
-        dyn_pow += (cfg.num_matrix*3) * (param.xbar_inMem_pow_dyn_write + param.xbar_inMem_pow_dyn_read/cfg.xbar_size) # xbar_inMem - num_xbar * dac_res bits will be
-                    #   read from xb_inMem in an interval that equals xbar_access time
     # dyn_pow += cfg.num_xbar/2 * 1.2 # (adding dyn pow the way issac does for comparison)
         dyn_pow += (cfg.num_matrix*11) * cfg.xbar_size * param.dac_pow_dyn # dac
         dyn_pow += (cfg.num_matrix*2) * cfg.xbar_size * param.snh_pow_dyn # snh
         dyn_pow += cfg.num_adc * param.adc_pow_dyn # adc
         dyn_pow += (cfg.num_matrix*2) * param.sna_pow_dyn # sna
         dyn_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_dyn # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
-    dyn_pow += (cfg.num_matrix*4) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power
+        dyn_pow += (cfg.num_matrix*4) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power
+    else:
+        dyn_pow += (cfg.num_matrix*2) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power
+    dyn_pow += (cfg.num_matrix*3) * (param.xbar_inMem_pow_dyn_write + param.xbar_inMem_pow_dyn_read/cfg.xbar_size) # xbar_inMem - num_xbar * dac_res bits will be
+        #   read from xb_inMem in an interval that equals xbar_access time
     dyn_pow += param.instrnMem_pow_dyn # instrnMem
     dyn_pow += param.dataMem_pow_dyn # dataMem
     dyn_pow += param.alu_pow_dyn # alu
diff --git a/src/ima_modules.py b/src/ima_modules.py
index 922e61af..a7dac03f 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -16,16 +16,7 @@ class xbar (object):
     def __init__ (self, xbar_size, xbar_value= 'nil' ):
         # define num_accesses for different operations
         # parallel reads (inner-product)
-        self.num_access = { '100':0, \
-                            '90': 0, \
-                            '80': 0, \
-                            '70': 0, \
-                            '60': 0, \
-                            '50': 0, \
-                            '40': 0, \
-                            '30': 0, \
-                            '20': 0, \
-                            '10': 0} \
+        self.num_access = { '0':0, '90': 0,'80': 0,'70': 0,'60': 0,'50': 0,'40': 0,'30': 0,'20': 0,'10': 0}
 
         self.num_access_rd = 0 # serial reads
         self.num_access_wr = 0 # serial writes
@@ -101,35 +92,16 @@ def propagate (self, inp = 'nil'):
         return out
 
     # HACK - until propagate doesn't have correct analog functionality
-    def propagate_dummy (self, n_val, inp = 'nil'):
+    def propagate_dummy (self, inp = 'nil', sparsity = 0):
         # data input is list of bit strings (of length dac_res) - fixed point binary
         assert (inp != 'nil'), 'propagate needs a non-nil input'
         assert (len(inp) == self.xbar_size), 'xbar input size mismatch'
         
         #Modification to accomodate sparsity and digital crossbars
         if cfg.MVMU_ver == "Analog":
-            self.num_access['100'] += 1
+            self.num_access['0'] += 1
         else:
-            if n_val>cfg.xbar_size*9/10.0:
-                self.num_access['100'] += 1
-            elif n_val>cfg.xbar_size*8/10.0:
-                self.num_access['90'] += 1
-            elif n_val>cfg.xbar_size*7/10.0:
-                self.num_access['80'] += 1
-            elif n_val>cfg.xbar_size*6/10.0:
-                self.num_access['70'] += 1
-            elif n_val>cfg.xbar_size*5/10.0:
-                self.num_access['60'] += 1
-            elif n_val>cfg.xbar_size*4/10.0:
-                self.num_access['50'] += 1
-            elif n_val>cfg.xbar_size*3/10.0:
-                self.num_access['40'] += 1
-            elif n_val>cfg.xbar_size*2/10.0:
-                self.num_access['30'] += 1
-            elif n_val>cfg.xbar_size*1/10.0:
-                self.num_access['20'] += 1
-            else:
-                self.num_access['10'] += 1
+            self.num_access[str(sparsity)] +=1
 
         # convert input from fixed point binary (string) to float
         inp_float = [0.0] * self.xbar_size
@@ -159,7 +131,7 @@ class xbar_op (xbar):
     # add function for outer_product computation
     def propagate_op_dummy (self, inp1 = 'nil', inp2 = 'nil', lr=1, in1_bit=cfg.dac_res, in2_bit=cfg.xbar_bits):
         # inner-product and outer_product functions should have different energies (and other metrics) - NEEDS UPDATE
-        self.num_access += 1
+        self.num_access['0'] += 1
         # check both data inputs
         assert (inp1 != 'nil' and inp2 != 'nil'), 'propagate needs a non-nil inputs'
         assert ((len(inp1) == self.xbar_size) and (len(inp1[0]) == in1_bit)), 'inp1 size mismatch - should be \
@@ -258,22 +230,14 @@ def propagate_dummy (self, inp_list):
 class adc (object):
     def __init__ (self, adc_res):
         # define num_access
-        self.num_access = { 'n' :       0,
-                            'n/2':      0,
-                            '3n/4':     0,
-                            '7n/8':     0,
-                            '15n/16':   0,
-                            '31n/32':   0,
-                            '63n/64':   0,
-                            '127n/128': 0,
-                            '255n/256': 0}
-
+        self.num_access = { 'n':0, 'n/2': 0,'n/4': 0,'n/8': 0,'n/16': 0,'n/32': 0,'n/64': 0,'n/128': 0}
+        
         # define latency
-        # self.latency = param.adc_lat_dict[str(adc_res)]
+        self.latency = param.adc_lat_dict[str(adc_res)]
 
         self.adc_res = adc_res
 
-    def getLatency (self, n_val):
+    def getLatency (self):
         self.latency = param.adc_lat_dict[str(self.adc_res)]
         return self.latency
 
@@ -285,31 +249,39 @@ def real2bin (self, inp, num_bits):
         return ('0'*(num_bits - len(bin_value)) + bin_value)
 
     def propagate (self, inp):
-        #self.num_access += 1
+        self.num_access += 1
         assert (type(inp) in [float, np.float32, np.float64]), 'adc input type mismatch (float, np.float32, np.float64 expected)'
         num_bits = self.adc_res
         return self.real2bin (inp, num_bits)
 
     # HACK - until propagate doesn't have correct analog functionality
-    def propagate_dummy (self, inp, n_val):
-        if n_val>cfg.xbar_size/2.0:
+    def propagate_dummy (self, inp, sparsity):
+        if sparsity>50:
             self.num_access['n'] += 1
-        elif n_val>cfg.xbar_size/4.0:
+            self.adc_res = cfg.adc_res
+        elif sparsity>25:
             self.num_access['n/2'] += 1
-        elif n_val>cfg.xbar_size/8.0:
-            self.num_access['3n/4'] += 1
-        elif n_val>cfg.xbar_size/16.0:
-            self.num_access['7n/8'] += 1
-        elif n_val>cfg.xbar_size/32.0:
-            self.num_access['15n/16'] += 1
-        elif n_val>cfg.xbar_size/64.0:
-            self.num_access['31n/32'] += 1
-        elif n_val>cfg.xbar_size/128.0:
-            self.num_access['63n/64'] += 1
-        elif n_val>cfg.xbar_size/256.0:
-            self.num_access['127n/128'] += 1
+            self.adc_res = cfg.adc_res-1
+        elif sparsity>12.5:
+            self.num_access['n/4'] += 1
+            self.adc_res = cfg.adc_res-2
+        elif sparsity>6.25:
+            self.num_access['n/8'] += 1
+            self.adc_res = cfg.adc_res-3
+        elif sparsity>3.125:
+            self.num_access['n/16'] += 1
+            self.adc_res = cfg.adc_res-4
+        elif sparsity>1.5625:
+            self.num_access['n/32'] += 1
+            self.adc_res = cfg.adc_res-5
+        elif sparsity>0.78125:
+            self.num_access['n/64'] += 1
+            self.adc_res = cfg.adc_res-6
         else:
-            self.num_access['255n/256'] += 1
+            self.num_access['n/128'] += 1
+            self.adc_res = cfg.adc_res-7
+        if(self.adc_res<0):
+            self.adc_res = 1
 
         return inp
 
diff --git a/src/node_dump.py b/src/node_dump.py
index 0dd27441..281f1d15 100644
--- a/src/node_dump.py
+++ b/src/node_dump.py
@@ -25,8 +25,8 @@ def mem_dump (fid, memfile, name, node = '', tile_id = ''):
             #temp_val = bin2int (memfile[addr], cfg.num_bits)
             if (name == 'EDRAM' and (node != '') and (tile_id != '')): # for EDRAM also show counter/valid
                 fid.write ('valid: ' + str(node.tile_list[tile_id].edram_controller.valid[addr]) \
-                    + ' | counter: ' + str(node.tile_list[tile_id].edram_controller.counter[addr]) + ' | ')
-                fid.write(str(temp_val) + '\n')
+                        + ' | counter: ' + str(node.tile_list[tile_id].edram_controller.counter[addr]) + ' | ')
+            fid.write(str(temp_val) + '\n')
         else: # not printing zero values for ease of view
             temp_val = 0.0
         if (name != 'EDRAM'):
diff --git a/test/mvm_ip_test.py b/test/mvm_ip_test.py
deleted file mode 100644
index 2e58c9c9..00000000
--- a/test/mvm_ip_test.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# API for testing MVM inner product operation
-import sys
-import os
-import numpy as np
-
-root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.insert(0, root_dir)
-
-from src.data_convert import *
-import src.ima as ima
-from src.instrn_proto import *
-import include.config as cfg
-
-#change the core and mvmu id'd here:
-# tile_ID = 2
-# core_ID = 1
-# matrix_ID = 0
-
-for tile_ID in range(2, cfg.num_tile):
-    for core_ID in range(cfg.num_ima):
-        for matrix_ID in range(cfg.num_matrix):
-
-            path = 'testasm/mlp/'
-            wt_path = path +'weights/tile'+ str(tile_ID)+ '/core'+ str(core_ID)+ '/' 
-            inst_file = path + 'tile'+ str(tile_ID)+ '/core_imem'+ str(core_ID)+ '.npy'
-            trace_path = 'traces/mlp/'
-            trace_file = trace_path + 'tile'+ str(tile_ID)+ '/ima_trace'+ str(core_ID)+ '.txt'
-            dump_file = trace_path + 'tile'+ str(tile_ID)+ '/memsim.txt'
-
-            datamem_off = cfg.datamem_off # each matrix has 6 memory spaces (1 for f/b, 2 for d)
-            phy2log_ratio = cfg.phy2log_ratio # ratio of physical to logical xbar
-            
-            if (os.path.exists(wt_path)):  # check if weights for the xbar exist
-                # print ('wtfile exits: ' + 'tile' + str(tile_ID) +' core ' + str(core_ID) + 'matrix ' + str(matrix_ID))
-            
-                xbar_input = ['']*cfg.xbar_size
-                xbar_output = ['']*cfg.xbar_size
-                with open(dump_file, 'r') as file:
-                    lines=file.readlines()
-
-                for i in range (len(lines)):
-                    if(lines[i] == 'Xbar Input Memory: imaId:'+ str(core_ID)+ ' matrixId:'+ str(matrix_ID)+ ' mvmu_type:f contents\n'):
-                        ip_start=i+1
-                    if(lines[i] == 'Xbar Output Memory: imaId:'+ str(core_ID)+ ' matrixId:'+ str(matrix_ID)+ ' mvmu_type:f contents\n'):
-                        op_start=i+1
-                        ip_end=i-1
-                    if(lines[i] == 'Xbar Input Memory: imaId:'+ str(core_ID)+ ' matrixId:'+ str(matrix_ID)+ ' mvmu_type:b contents\n'):
-                        op_end=i-1
-
-                # print(ip_start)
-                # print(ip_end)
-                # print(op_start)
-                # print(op_end)
-                # print('Length of input=',ip_end-ip_start+1 )
-                # print('Length of output=',op_end-op_start+1 )
-
-                for j in range (ip_end-ip_start+1):
-                    xbar_input[j] = float(lines[ip_start+j])
-                for j in range (op_end-op_start+1):
-                    xbar_output[j] = float(lines[op_start+j])
-
-                # print(xbar_input)
-                # print(xbar_output)
-
-                ## Testcases for Functionality Debug of MVM (1,2,3,4)
-                ## 1. compare golden output to ima output
-                wt_gold = np.load(wt_path+'log_xbar0.npy')
-                # print(wt_gold)
-                # out_gold = np.dot (ima.dataMem.memfile_float, wt_gold)
-                if(ip_end-ip_start+1 == 128):
-
-                    out_gold = np.dot (np.asarray(xbar_input), wt_gold)
-                    out_exp = np.asarray(xbar_output)
-
-                    # print (out_gold)
-                    # print (out_exp)
-
-                    err = np.tanh(out_gold) - np.tanh(out_exp)
-                    print ("error for tile"+ str(tile_ID) +" core" + str(core_ID) + " matrix" + str(matrix_ID)+ " has mean= " + str(np.average(err)) + " and stdev= " + \
-                            str(np.std(err)))
-                            
-                else:
-                    print("No or less than length 128 input available for tile"+ str(tile_ID) +" core" + str(core_ID) + " matrix" + str(matrix_ID)+".")

From 4302e61902361809af34a85363ed33f1af9353a5 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Sat, 13 Jun 2020 03:09:50 -0400
Subject: [PATCH 11/15] Commiting after suggested changes in PR

---
 include/config.py    |  2 +-
 include/constants.py |  1 +
 src/dnn_wt_p.py      |  1 +
 src/ima.py           | 55 ++++++++++++++++++++++++--------------------
 src/ima_metrics.py   |  2 +-
 src/ima_modules.py   |  2 +-
 6 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/include/config.py b/include/config.py
index e09ebedc..02e78dd1 100644
--- a/include/config.py
+++ b/include/config.py
@@ -13,7 +13,7 @@
 # One of "Analog", "Digital_V1" or "Digital_V2" 
 # Digital_V1 has compressed inputs (Data+Offset style)
 # Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
-MVMU_ver = "Analog"
+MVMU_ver = "Digital_V2"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
diff --git a/include/constants.py b/include/constants.py
index d16395df..a2cfe755 100644
--- a/include/constants.py
+++ b/include/constants.py
@@ -358,6 +358,7 @@
 
 
 # Chosen latency based on config file - only for components whose latency is parameter dependent
+#xbar_lat = xbar_lat_dict [str(cfg.xbar_bits)][str(cfg.xbar_size)]
 xbar_ip_lat_dict = {'0':0, '90':0, '80':0, '70':0, '60':0, '50':0, '40':0, '30':0, '20':0, '10':0}
 if cfg.MVMU_ver == "Analog":
       for key, value in xbar_ip_lat_dict.items():
diff --git a/src/dnn_wt_p.py b/src/dnn_wt_p.py
index df1b4a08..3d19cb04 100644
--- a/src/dnn_wt_p.py
+++ b/src/dnn_wt_p.py
@@ -28,3 +28,4 @@ def prog_dnn_wt(self, instrnpath, node_dut):
                             wt_temp = np.load(wt_filename)
                             node_dut.tile_list[i].ima_list[j].matrix_list[k]['f'][l].program(wt_temp)
                             node_dut.tile_list[i].ima_list[j].matrix_list[k]['b'][l].program(wt_temp)
+
diff --git a/src/ima.py b/src/ima.py
index 1759ed19..ea54481a 100644
--- a/src/ima.py
+++ b/src/ima.py
@@ -351,7 +351,7 @@ def do_decode (self, dec_op):
                 assert (self.fd_instrn['r2'] >= datamem_off), 'operand2 for beq comes from data memory'
                 self.de_val1 = self.dataMem.read(self.fd_instrn['r1'])
                 self.de_val2 = self.dataMem.read(self.fd_instrn['r2'])
-                
+
             elif (dec_op == 'alu_int'):
                 self.de_aluop = self.fd_instrn['aluop']
                 self.de_d1 = self.fd_instrn['d1'] # addr for rf
@@ -359,7 +359,7 @@ def do_decode (self, dec_op):
                 assert (self.fd_instrn['r2'] >= datamem_off), 'operand2 for alu_int comes from data memory'
                 self.de_val1 = self.dataMem.read(self.fd_instrn['r1'])
                 self.de_val2 = self.dataMem.read(self.fd_instrn['r2'])
-               
+
             # do nothing for halt/jmp in decode (just propagate to ex when applicable)
 
 
@@ -598,18 +598,21 @@ def inner_product (mat_id, key):
                     # reset the xb out memory before starting to accumulate
                     self.xb_outMem_list[mat_id][key].reset ()
 
-                    xbar_inMem = self.xb_inMem_list[mat_id][key].read_all ()
-                    non_0_val = 0
-                    for i in range(cfg.xbar_size):
-                        if xbar_inMem[i] != '0000000000000000':
-                            non_0_val = non_0_val +1
-                    sparsity = int((cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size)
-                    sparsity_adc = sparsity
-                    if (sparsity%10!=0):
-                        sparsity = sparsity-(sparsity%10)
-                    else:
-                        if (sparsity == 100):
-                            sparsity = sparsity-10
+                    sparsity=0
+                    sparsity_adc=0
+                    if cfg.sparse_opt:
+                        xbar_inMem = self.xb_inMem_list[mat_id][key].read_all ()
+                        non_0_val = 0
+                        for i in range(cfg.xbar_size):
+                            if xbar_inMem[i] != '0000000000000000':
+                                non_0_val = non_0_val +1
+                        sparsity = int((cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size)
+                        sparsity_adc = sparsity
+                        if (sparsity%10!=0):
+                            sparsity = sparsity-(sparsity%10)
+                        else:
+                            if (sparsity == 100):
+                                sparsity = sparsity-10
 
                     ## Loop to cover all bits of inputs
                     for k in xrange (int(math.ceil(cfg.input_prec / cfg.dac_res))): #quantization affects the # of streams
@@ -829,17 +832,19 @@ def xbComputeLatency_Digital (self):
             if (cfg.inference):
                 for p in xrange(cfg.num_matrix):
                     if self.de_xb_nma[p]:
-                        xbar_inMem = self.xb_inMem_list[p]['f'].read_all ()
-                        non_0_val = 0
-                        for i in range(cfg.xbar_size):
-                            if xbar_inMem[i] != '0000000000000000':
-                                non_0_val = non_0_val +1
-                        sparsity = int((cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size)
-                        if (sparsity%10!=0):
-                            sparsity = sparsity-(sparsity%10)
-                        else:
-                            if (sparsity == 100):
-                                sparsity = sparsity-10
+                        sparsity=0
+                        if cfg.sparse_opt:
+                            xbar_inMem = self.xb_inMem_list[p]['f'].read_all ()
+                            non_0_val = 0
+                            for i in range(cfg.xbar_size):
+                                if xbar_inMem[i] != '0000000000000000':
+                                    non_0_val = non_0_val +1
+                            sparsity = int((cfg.xbar_size-non_0_val)*100.0/cfg.xbar_size)
+                            if (sparsity%10!=0):
+                                sparsity = sparsity-(sparsity%10)
+                            else:
+                                if (sparsity == 100):
+                                    sparsity = sparsity-10
                         mvm_lat_temp += digi_param.Digital_xbar_lat_dict[cfg.MVMU_ver][str(cfg.xbar_size)][str(sparsity)]
             return mvm_lat_temp
 
diff --git a/src/ima_metrics.py b/src/ima_metrics.py
index f7ec20ac..c0314e9d 100644
--- a/src/ima_metrics.py
+++ b/src/ima_metrics.py
@@ -66,7 +66,7 @@ def compute_pow_dyn ():
         dyn_pow += (cfg.num_matrix*3) * param.xbar_outMem_pow_dyn # xbar_outMem (1 OR for 8 xbars - 16 bit weights, 2 bit xbars)
         dyn_pow += (cfg.num_matrix*4) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power
     else:
-        dyn_pow += (cfg.num_matrix*2) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power
+        dyn_pow += (cfg.num_matrix*2) * param.xbar_ip_pow_dyn # xbar ip power considred as ip>op power # d-xbar are not needed in Digital MVMUs only f and b are there
     dyn_pow += (cfg.num_matrix*3) * (param.xbar_inMem_pow_dyn_write + param.xbar_inMem_pow_dyn_read/cfg.xbar_size) # xbar_inMem - num_xbar * dac_res bits will be
         #   read from xb_inMem in an interval that equals xbar_access time
     dyn_pow += param.instrnMem_pow_dyn # instrnMem
diff --git a/src/ima_modules.py b/src/ima_modules.py
index a7dac03f..8080896a 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -255,7 +255,7 @@ def propagate (self, inp):
         return self.real2bin (inp, num_bits)
 
     # HACK - until propagate doesn't have correct analog functionality
-    def propagate_dummy (self, inp, sparsity):
+    def propagate_dummy (self, inp, sparsity = 0):
         if sparsity>50:
             self.num_access['n'] += 1
             self.adc_res = cfg.adc_res

From 9f506352b9d97791e0f26aad02d07c0794a59ba6 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Sat, 13 Jun 2020 03:15:06 -0400
Subject: [PATCH 12/15] Commiting after suggested changes in PR

---
 src/dnn_wt_p.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dnn_wt_p.py b/src/dnn_wt_p.py
index 3d19cb04..820d5ee3 100644
--- a/src/dnn_wt_p.py
+++ b/src/dnn_wt_p.py
@@ -29,3 +29,4 @@ def prog_dnn_wt(self, instrnpath, node_dut):
                             node_dut.tile_list[i].ima_list[j].matrix_list[k]['f'][l].program(wt_temp)
                             node_dut.tile_list[i].ima_list[j].matrix_list[k]['b'][l].program(wt_temp)
 
+

From fd778f552f849bf110255345a4bd28c7ea7e2ccc Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Sat, 13 Jun 2020 03:25:23 -0400
Subject: [PATCH 13/15] Updated how_to_run.md

---
 how_to_run.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/how_to_run.md b/how_to_run.md
index f9a06abd..f4bcce92 100644
--- a/how_to_run.md
+++ b/how_to_run.md
@@ -82,6 +82,9 @@ cp -R <example> puma-simulator/test/testasm/
 
 #### 6.1 - Setup config file :
 
+Use the appropriate config file from ```puma-simulator/include/example-configs/(config file name)```.
+For example: for mlp use ```config-mlp.py```.
+Copy the file to ```puma-simulator/include/``` and rename it to ```config.py```. 
 Config file - ```puma-simulator/include/config.py```.
 
 Update ```num_tile_compute``` in config file based on the number of tiles generated in your ```<example>``` model.

From 05b48982d1588776d929bdffff93e52a53984a6a Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Sat, 13 Jun 2020 03:28:03 -0400
Subject: [PATCH 14/15] Updated how_to_run.md

---
 how_to_run.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/how_to_run.md b/how_to_run.md
index f4bcce92..bf1dc791 100644
--- a/how_to_run.md
+++ b/how_to_run.md
@@ -85,7 +85,6 @@ cp -R <example> puma-simulator/test/testasm/
 Use the appropriate config file from ```puma-simulator/include/example-configs/(config file name)```.
 For example: for mlp use ```config-mlp.py```.
 Copy the file to ```puma-simulator/include/``` and rename it to ```config.py```. 
-Config file - ```puma-simulator/include/config.py```.
 
 Update ```num_tile_compute``` in config file based on the number of tiles generated in your ```<example>``` model.
 

From af3516a8d307919931532a3598820004025c49e8 Mon Sep 17 00:00:00 2001
From: Deepika Sharma <sharm444@.purdue.edu>
Date: Sat, 13 Jun 2020 16:08:57 -0400
Subject: [PATCH 15/15] corrrected sparsity comparison function in adc module

---
 include/config.py  |  2 +-
 src/ima_modules.py | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/config.py b/include/config.py
index 02e78dd1..e09ebedc 100644
--- a/include/config.py
+++ b/include/config.py
@@ -13,7 +13,7 @@
 # One of "Analog", "Digital_V1" or "Digital_V2" 
 # Digital_V1 has compressed inputs (Data+Offset style)
 # Digital_V2 has uncompressed inputs (Skips computations for 0 activation)
-MVMU_ver = "Digital_V2"
+MVMU_ver = "Analog"
 
 ## Operand precision (fixed point allowed only): num_bits = int_bits + frac_bits
 num_bits = 16
diff --git a/src/ima_modules.py b/src/ima_modules.py
index 8080896a..b0d5b2d0 100644
--- a/src/ima_modules.py
+++ b/src/ima_modules.py
@@ -256,25 +256,25 @@ def propagate (self, inp):
 
     # HACK - until propagate doesn't have correct analog functionality
     def propagate_dummy (self, inp, sparsity = 0):
-        if sparsity>50:
+        if sparsity<50:
             self.num_access['n'] += 1
             self.adc_res = cfg.adc_res
-        elif sparsity>25:
+        elif sparsity<75:
             self.num_access['n/2'] += 1
             self.adc_res = cfg.adc_res-1
-        elif sparsity>12.5:
+        elif sparsity<87.5:
             self.num_access['n/4'] += 1
             self.adc_res = cfg.adc_res-2
-        elif sparsity>6.25:
+        elif sparsity<93.75:
             self.num_access['n/8'] += 1
             self.adc_res = cfg.adc_res-3
-        elif sparsity>3.125:
+        elif sparsity<96.875:
             self.num_access['n/16'] += 1
             self.adc_res = cfg.adc_res-4
-        elif sparsity>1.5625:
+        elif sparsity<98.4375:
             self.num_access['n/32'] += 1
             self.adc_res = cfg.adc_res-5
-        elif sparsity>0.78125:
+        elif sparsity<99.21875:
             self.num_access['n/64'] += 1
             self.adc_res = cfg.adc_res-6
         else: