From 04fd18e3ebe2e3240474f5208258e2fd8ea48dc8 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 1 Aug 2023 10:50:58 +0100 Subject: [PATCH 01/18] [CustomOp] Remove outdated stream depth pragma from decoupled mode --- src/finn/custom_op/fpgadataflow/matrixvectoractivation.py | 3 --- src/finn/custom_op/fpgadataflow/vectorvectoractivation.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 204a41e21c..7c180534b1 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -1274,9 +1274,6 @@ def pragmas(self): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE axis port=weights_" + self.hls_sname() ) - self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS stream depth=8 variable=weights_" + self.hls_sname() - ) else: raise Exception( diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index f817751852..58a85b29ee 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -979,9 +979,6 @@ def pragmas(self): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE axis port=weights_" + self.hls_sname() ) - self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS stream depth=8 variable=weights_" + self.hls_sname() - ) else: raise Exception( """Please set mem_mode to "const", "decoupled", or external, From 2c929b959ab1e5696a4d982bf6f35a40d72a61eb Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Tue, 1 Aug 2023 19:33:49 +0200 Subject: [PATCH 02/18] Fix verilator_fifosim for RTL SWG component --- src/finn/util/pyverilator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finn/util/pyverilator.py b/src/finn/util/pyverilator.py index 86cf2eed14..73c8755bfb 100644 --- a/src/finn/util/pyverilator.py +++ b/src/finn/util/pyverilator.py @@ -188,7 +188,8 @@ def verilator_fifosim(model, n_inputs, max_iters=100000000): xpm_memory = f"{vivado_path}/data/ip/xpm/xpm_memory/hdl/xpm_memory.sv" xpm_cdc = f"{vivado_path}/data/ip/xpm/xpm_cdc/hdl/xpm_cdc.sv" xpm_fifo = f"{vivado_path}/data/ip/xpm/xpm_fifo/hdl/xpm_fifo.sv" - verilog_file_arg = ["finn_design_wrapper.v", xpm_memory, xpm_cdc, xpm_fifo] + swg_pkg = os.environ["FINN_ROOT"] + "/finn-rtllib/swg/swg_pkg.sv" + verilog_file_arg = [swg_pkg, "finn_design_wrapper.v", xpm_memory, xpm_cdc, xpm_fifo] verilator_args = [ "perl", From d2c682759de2c874116946000fdf207a2cdab5c9 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 2 Aug 2023 16:06:54 +0100 Subject: [PATCH 03/18] [Util] Update Alveo platforms --- src/finn/util/basic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 3bc5b803db..05f748d3bb 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -62,10 +62,10 @@ alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" alveo_default_platform = dict() -alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_201920_3" -alveo_default_platform["U200"] = "xilinx_u200_xdma_201830_2" -alveo_default_platform["U250"] = "xilinx_u250_gen3x16_xdma_2_1_202010_1" -alveo_default_platform["U280"] = "xilinx_u280_xdma_201920_3" +alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1" +alveo_default_platform["U200"] = "xilinx_u200_gen3x16_xdma_2_202110_1" +alveo_default_platform["U250"] = "xilinx_u250_gen3x16_xdma_4_1_202210_1" +alveo_default_platform["U280"] = "xilinx_u280_gen3x16_xdma_1_202211_1" def get_rtlsim_trace_depth(): From 121e893f73edabc370a102f3227b322db918e253 Mon Sep 17 00:00:00 2001 From: icolbert Date: Tue, 9 May 2023 14:11:09 -0700 Subject: [PATCH 04/18] [MVAU] Handling minimize acc bw for no-activation nodes --- .../fpgadataflow/matrixvectoractivation.py | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 7c180534b1..2f99ddca77 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -655,14 +655,28 @@ def minimize_accumulator_width(self, model): adt = DataType.get_smallest_possible(-acc_max - 1) else: adt = DataType.get_smallest_possible(acc_max) - # if this is the last node in the graph, then ensure the datatype is - # divisibly by 8 bits + # if this is the last node in the graph, then ensure the datatype of the + # output is divisible by 8 if model.find_direct_successors(self.onnx_node) is None: - bw = roundup_to_integer_multiple(adt.bitwidth(), 8) - new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - adt = DataType[new_adt_name] - # for no-activation nodes, output dt = acc dt - self.set_nodeattr("outputDataType", adt.name) + if self.get_nodeattr("noActivation"): + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) + else: + odt = DataType[self.get_nodeattr("outputDataType")] + bw = roundup_to_integer_multiple(odt.bitwidth(), 8) + new_odt_name = odt.name.replace(str(odt.bitwidth()), str(bw)) + if bw != odt.bitwidth(): + warn_str = "outputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + odt.name, + new_odt_name, + ) + warnings.warn(warn_str) + odt = DataType[new_odt_name] + self.set_nodeattr("outputDataType", odt.name) self.set_nodeattr("accDataType", adt.name) return DataType[self.get_nodeattr("accDataType")] From 93d9cdb3b38d706222cc122d17638b9a9828a24e Mon Sep 17 00:00:00 2001 From: icolbert Date: Tue, 9 May 2023 14:11:35 -0700 Subject: [PATCH 05/18] [VVAU] Handling minimize acc bw for no-activation nodes --- .../fpgadataflow/vectorvectoractivation.py | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 58a85b29ee..773c49915f 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -182,14 +182,28 @@ def minimize_accumulator_width(self, model): adt = DataType.get_smallest_possible(-acc_max - 1) else: adt = DataType.get_smallest_possible(acc_max) - # if this is the last node in the graph, then ensure the datatype is - # divisibly by 8 bits + # if this is the last node in the graph, then ensure the datatype of the + # output is divisible by 8 if model.find_direct_successors(self.onnx_node) is None: - bw = roundup_to_integer_multiple(adt.bitwidth(), 8) - new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - adt = DataType[new_adt_name] - # for no-activation nodes, output dt = acc dt - self.set_nodeattr("outputDataType", adt.name) + if self.get_nodeattr("noActivation"): + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) + else: + odt = DataType[self.get_nodeattr("outputDataType")] + bw = roundup_to_integer_multiple(odt.bitwidth(), 8) + new_odt_name = odt.name.replace(str(odt.bitwidth()), str(bw)) + if bw != odt.bitwidth(): + warn_str = "outputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + odt.name, + new_odt_name, + ) + warnings.warn(warn_str) + odt = DataType[new_odt_name] + self.set_nodeattr("outputDataType", odt.name) self.set_nodeattr("accDataType", adt.name) return DataType[self.get_nodeattr("accDataType")] From 0123d2629d0c89f384a5243b22a96a5a33daeac7 Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 31 May 2023 09:36:11 -0700 Subject: [PATCH 06/18] [MVAU] Fixing to maintain prior functionality --- src/finn/custom_op/fpgadataflow/matrixvectoractivation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 2f99ddca77..cef336bdd4 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -667,7 +667,10 @@ def minimize_accumulator_width(self, model): else: odt = DataType[self.get_nodeattr("outputDataType")] bw = roundup_to_integer_multiple(odt.bitwidth(), 8) - new_odt_name = odt.name.replace(str(odt.bitwidth()), str(bw)) + # NOTE: keeping previous functionality of converting outputDataType + # to accDataType on the last node. May want to preserve outputDataType + # in the future by replacing adt with odt below. + new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) if bw != odt.bitwidth(): warn_str = "outputDataType changing for %s: %s -> %s " % ( self.onnx_node.name, From aa345e333df5c81ab28f134063b67d4ca7ccb14f Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 31 May 2023 09:36:24 -0700 Subject: [PATCH 07/18] [VVAU] Fixing to maintain prior functionality --- src/finn/custom_op/fpgadataflow/vectorvectoractivation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 773c49915f..29bf9651f0 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -194,7 +194,10 @@ def minimize_accumulator_width(self, model): else: odt = DataType[self.get_nodeattr("outputDataType")] bw = roundup_to_integer_multiple(odt.bitwidth(), 8) - new_odt_name = odt.name.replace(str(odt.bitwidth()), str(bw)) + # NOTE: keeping previous functionality of converting outputDataType + # to accDataType on the last node. May want to preserve outputDataType + # in the future by replacing adt with odt below. + new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) if bw != odt.bitwidth(): warn_str = "outputDataType changing for %s: %s -> %s " % ( self.onnx_node.name, From 3f05b634b6a837159bb4a0f1ccc83b97f1705e8f Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 31 May 2023 09:38:25 -0700 Subject: [PATCH 08/18] Updating unit test to check correct functionality --- tests/fpgadataflow/test_minimize_bit_width.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/fpgadataflow/test_minimize_bit_width.py b/tests/fpgadataflow/test_minimize_bit_width.py index 805578018c..ad7b1cdf86 100644 --- a/tests/fpgadataflow/test_minimize_bit_width.py +++ b/tests/fpgadataflow/test_minimize_bit_width.py @@ -297,10 +297,11 @@ def test_minimize_accumulator_width(wdt: DataType, idt: DataType, tdt: DataType, assert cur_adt.bitwidth() <= exp_adt.bitwidth(), "Mismatched accumulation data types" if model.find_direct_successors(inst.onnx_node) is None: assert ( - cur_adt.bitwidth() % 8 - ) == 0, "bit width of last node needs to be divisible by 8" - assert ( - cur_adt.bitwidth() == cur_odt.bitwidth() - ), "outputDataType and accDataType should be equal" + cur_odt.bitwidth() % 8 + ) == 0, "output bit width of last node needs to be divisible by 8" + if inst.get_nodeattr("noActivation"): + assert ( + cur_adt.bitwidth() == cur_odt.bitwidth() + ), "outputDataType and accDataType should be equal" else: assert cur_odt.bitwidth() == idt.bitwidth(), "outputDataType should not be changed" From d853ab50ab2c2811f9001b003d9f527082100bfe Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 7 Jun 2023 10:54:46 -0700 Subject: [PATCH 09/18] [MVAU] updating minimize_accumulator logic --- .../fpgadataflow/matrixvectoractivation.py | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index cef336bdd4..1d6a6f5576 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -655,31 +655,37 @@ def minimize_accumulator_width(self, model): adt = DataType.get_smallest_possible(-acc_max - 1) else: adt = DataType.get_smallest_possible(acc_max) - # if this is the last node in the graph, then ensure the datatype of the - # output is divisible by 8 - if model.find_direct_successors(self.onnx_node) is None: - if self.get_nodeattr("noActivation"): + + is_last_node = model.find_direct_successors(self.onnx_node) is None + + # if no activation, output and accumulator datatypes are the same + if self.get_nodeattr("noActivation"): + # if last node, we need to round the accumulator datatype (adt) + # up to the nearest 8 and set the output datatype (odt) + if is_last_node: bw = roundup_to_integer_multiple(adt.bitwidth(), 8) new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) adt = DataType[new_adt_name] - # for no-activation nodes, output dt = acc dt - self.set_nodeattr("outputDataType", adt.name) - else: - odt = DataType[self.get_nodeattr("outputDataType")] - bw = roundup_to_integer_multiple(odt.bitwidth(), 8) - # NOTE: keeping previous functionality of converting outputDataType - # to accDataType on the last node. May want to preserve outputDataType - # in the future by replacing adt with odt below. - new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - if bw != odt.bitwidth(): - warn_str = "outputDataType changing for %s: %s -> %s " % ( - self.onnx_node.name, - odt.name, - new_odt_name, - ) - warnings.warn(warn_str) - odt = DataType[new_odt_name] - self.set_nodeattr("outputDataType", odt.name) + self.set_nodeattr("outputDataType", adt.name) + + # if last node has activation, then ensure the output datatype is divisible by 8 + if not self.get_nodeattr("noActivation") and is_last_node: + odt = DataType[self.get_nodeattr("outputDataType")] + bw = roundup_to_integer_multiple(odt.bitwidth(), 8) + # NOTE: keeping previous functionality of converting odt to adt on the last + # node, could preserve odt in the future by replacing adt with odt. This + # may yield unfavorable functionality for Bipolar and/or Ternary datatypes + new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + if bw != odt.bitwidth(): + warn_str = "outputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + odt.name, + new_odt_name, + ) + warnings.warn(warn_str) + odt = DataType[new_odt_name] + self.set_nodeattr("outputDataType", odt.name) + self.set_nodeattr("accDataType", adt.name) return DataType[self.get_nodeattr("accDataType")] From a3ee3a37289ceef2395c1867aca5edbe8813a27b Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 7 Jun 2023 10:54:57 -0700 Subject: [PATCH 10/18] [VVAU] updating minimize_accumulator logic --- .../fpgadataflow/vectorvectoractivation.py | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 29bf9651f0..09e749be57 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -182,32 +182,37 @@ def minimize_accumulator_width(self, model): adt = DataType.get_smallest_possible(-acc_max - 1) else: adt = DataType.get_smallest_possible(acc_max) - # if this is the last node in the graph, then ensure the datatype of the - # output is divisible by 8 - if model.find_direct_successors(self.onnx_node) is None: - if self.get_nodeattr("noActivation"): + + is_last_node = model.find_direct_successors(self.onnx_node) is None + + # if no activation, output and accumulator datatypes are the same + if self.get_nodeattr("noActivation"): + # if last node, we need to round the accumulator datatype (adt) + # up to the nearest 8 and set the output datatype (odt) + if is_last_node: bw = roundup_to_integer_multiple(adt.bitwidth(), 8) new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) adt = DataType[new_adt_name] - # for no-activation nodes, output dt = acc dt - self.set_nodeattr("outputDataType", adt.name) - else: - odt = DataType[self.get_nodeattr("outputDataType")] - bw = roundup_to_integer_multiple(odt.bitwidth(), 8) - # NOTE: keeping previous functionality of converting outputDataType - # to accDataType on the last node. May want to preserve outputDataType - # in the future by replacing adt with odt below. - new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - if bw != odt.bitwidth(): - warn_str = "outputDataType changing for %s: %s -> %s " % ( - self.onnx_node.name, - odt.name, - new_odt_name, - ) - warnings.warn(warn_str) - odt = DataType[new_odt_name] - self.set_nodeattr("outputDataType", odt.name) - self.set_nodeattr("accDataType", adt.name) + self.set_nodeattr("outputDataType", adt.name) + + # if last node has activation, then ensure the output datatype is divisible by 8 + if not self.get_nodeattr("noActivation") and is_last_node: + odt = DataType[self.get_nodeattr("outputDataType")] + bw = roundup_to_integer_multiple(odt.bitwidth(), 8) + # NOTE: keeping previous functionality of converting odt to adt on the last + # node, could preserve odt in the future by replacing adt with odt. This + # may yield unfavorable functionality for Bipolar and/or Ternary datatypes + new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + if bw != odt.bitwidth(): + warn_str = "outputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + odt.name, + new_odt_name, + ) + warnings.warn(warn_str) + odt = DataType[new_odt_name] + self.set_nodeattr("outputDataType", odt.name) + return DataType[self.get_nodeattr("accDataType")] def minimize_weight_bit_width(self, model): From f172adc8fcc19338756b8b936a78a2de5620e142 Mon Sep 17 00:00:00 2001 From: icolbert Date: Thu, 8 Jun 2023 10:37:14 -0700 Subject: [PATCH 11/18] [VVAU] fixing bug with setting accDataType --- src/finn/custom_op/fpgadataflow/vectorvectoractivation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 09e749be57..af2591f703 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -213,6 +213,7 @@ def minimize_accumulator_width(self, model): odt = DataType[new_odt_name] self.set_nodeattr("outputDataType", odt.name) + self.set_nodeattr("accDataType", adt.name) return DataType[self.get_nodeattr("accDataType")] def minimize_weight_bit_width(self, model): From 1e5cbc8d65690f8c3c684506fd5f3778e017a027 Mon Sep 17 00:00:00 2001 From: icolbert Date: Thu, 8 Jun 2023 10:37:59 -0700 Subject: [PATCH 12/18] Fixing test_minimize_bit_width unit test --- tests/fpgadataflow/test_minimize_bit_width.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/fpgadataflow/test_minimize_bit_width.py b/tests/fpgadataflow/test_minimize_bit_width.py index ad7b1cdf86..0427bbd4d8 100644 --- a/tests/fpgadataflow/test_minimize_bit_width.py +++ b/tests/fpgadataflow/test_minimize_bit_width.py @@ -294,14 +294,12 @@ def test_minimize_accumulator_width(wdt: DataType, idt: DataType, tdt: DataType, # bit width minimization logic in the MVAU and VVAU is exact and should be # less than or equal to this calculation exp_adt = calculate_accumulator_bit_width(inst, model) - assert cur_adt.bitwidth() <= exp_adt.bitwidth(), "Mismatched accumulation data types" - if model.find_direct_successors(inst.onnx_node) is None: + assert ( + cur_adt.bitwidth() <= exp_adt.bitwidth() + ), "Mismatched accumulation data types" + + # if there is no activation, outputDataType = accDataType + if inst.get_nodeattr("noActivation"): assert ( - cur_odt.bitwidth() % 8 - ) == 0, "output bit width of last node needs to be divisible by 8" - if inst.get_nodeattr("noActivation"): - assert ( - cur_adt.bitwidth() == cur_odt.bitwidth() - ), "outputDataType and accDataType should be equal" - else: - assert cur_odt.bitwidth() == idt.bitwidth(), "outputDataType should not be changed" + cur_adt.bitwidth() == cur_odt.bitwidth() + ), "outputDataType and accDataType should be equal" From d9e4654c79b4461189fe6381163f17eaf8037597 Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 14 Jun 2023 18:20:37 -0700 Subject: [PATCH 13/18] [MVAU] Updating minimize_accumulator_width logic --- .../fpgadataflow/matrixvectoractivation.py | 92 ++++++------------- 1 file changed, 29 insertions(+), 63 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 1d6a6f5576..ccf5b00918 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -589,11 +589,14 @@ def minimize_accumulator_width(self, model): # for the bipolar case they need to be converted to bipolar if self.get_nodeattr("binaryXnorMode"): weights = 2 * weights - 1 + + thresholds = None if len(self.onnx_node.input) > 2: thresholds = model.get_initializer(self.onnx_node.input[2]) - else: - thresholds = None + idt = self.get_input_datatype() + + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) # if runtime-writeable weights, then the values of the weights can # change and we need to use the worst-case values from the datatypes if self.get_nodeattr("runtime_writeable_weights"): @@ -604,11 +607,7 @@ def minimize_accumulator_width(self, model): upper_range = calculate_matvec_accumulator_range(upper_worst, idt) acc_min = min(min(lower_range), min(upper_range)) acc_max = max(max(upper_range), max(upper_range)) - # if not runtime-writeable weights, then we can calculate the min - # and max values of the accumulation range using knowledge of the - # weights and input data types since they are fixed - else: - (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + # if the thresholds can be used to determine range, then adjust the range # according to the known values of the thresholds if thresholds is not None: @@ -617,76 +616,43 @@ def minimize_accumulator_width(self, model): min_threshold = thresholds.min() max_threshold = thresholds.max() # clip threshold values - clip_upper = None - clip_lower = None - if max_threshold > acc_max + 1: - clip_upper = acc_max + 1 - if min_threshold < acc_min: - clip_lower = acc_min - if (clip_lower is not None) or (clip_upper is not None): + if max_threshold > acc_max or min_threshold < acc_min: warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name) - thresholds = np.clip(thresholds, clip_lower, clip_upper) + thresholds = np.clip(thresholds, acc_min, acc_max) model.set_initializer(self.onnx_node.input[2], thresholds) threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) min_threshold = thresholds.min() max_threshold = thresholds.max() - # get range required by threshold values - tdt_min = min(acc_min, min_threshold) - tdt_max = max(acc_max, max_threshold) - if tdt_min < 0: - if abs(tdt_min) > tdt_max: - tdt = DataType.get_smallest_possible(tdt_min) - else: - tdt = DataType.get_smallest_possible(-tdt_max - 1) - else: - tdt = DataType.get_smallest_possible(tdt_max) - assert np.vectorize(tdt.allowed)( + acc_min = min(min_threshold, acc_min) + acc_max = max(max_threshold, acc_max) + + # if the acc_range is always greater than 0, then acc_max <= 2^P - 1 + if acc_min >= 0: + acc_bit_width = np.log2(acc_max + 1) + acc_bit_width = math.ceil(acc_bit_width) + adt = DataType[f"UINT{acc_bit_width}"] + # if the acc_range is signed, then acc_min >= -2^{P-1} and acc_max <= + # 2^{P - 1} - 1, which means 2^{P - 1} >= max(-acc_min, 1 + acc_max) + else: + _acc_max = max(-acc_min, 1 + acc_max) + acc_bit_width = np.log2(_acc_max) + 1 + acc_bit_width = math.ceil(acc_bit_width) + adt = DataType[f"INT{acc_bit_width}"] + + # if activation, assert that the thresholds can be expressed with adt + if thresholds is not None: + assert np.vectorize(adt.allowed)( threshold_tensor ).all(), "Thresholds in %s can't be expressed with type %s" % ( self.onnx_node.name, - str(tdt), + str(adt), ) - adt = tdt # Set activation datatype to the threshold datatype - else: - if acc_min < 0: - if abs(acc_min) > acc_max: - adt = DataType.get_smallest_possible(acc_min) - else: - adt = DataType.get_smallest_possible(-acc_max - 1) - else: - adt = DataType.get_smallest_possible(acc_max) - - is_last_node = model.find_direct_successors(self.onnx_node) is None # if no activation, output and accumulator datatypes are the same if self.get_nodeattr("noActivation"): - # if last node, we need to round the accumulator datatype (adt) - # up to the nearest 8 and set the output datatype (odt) - if is_last_node: - bw = roundup_to_integer_multiple(adt.bitwidth(), 8) - new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - adt = DataType[new_adt_name] self.set_nodeattr("outputDataType", adt.name) - - # if last node has activation, then ensure the output datatype is divisible by 8 - if not self.get_nodeattr("noActivation") and is_last_node: - odt = DataType[self.get_nodeattr("outputDataType")] - bw = roundup_to_integer_multiple(odt.bitwidth(), 8) - # NOTE: keeping previous functionality of converting odt to adt on the last - # node, could preserve odt in the future by replacing adt with odt. This - # may yield unfavorable functionality for Bipolar and/or Ternary datatypes - new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - if bw != odt.bitwidth(): - warn_str = "outputDataType changing for %s: %s -> %s " % ( - self.onnx_node.name, - odt.name, - new_odt_name, - ) - warnings.warn(warn_str) - odt = DataType[new_odt_name] - self.set_nodeattr("outputDataType", odt.name) - self.set_nodeattr("accDataType", adt.name) + return DataType[self.get_nodeattr("accDataType")] def minimize_weight_bit_width(self, model): From 4dc169463ec0586380d2f19b1ae39e3c7d64955d Mon Sep 17 00:00:00 2001 From: icolbert Date: Wed, 14 Jun 2023 18:20:48 -0700 Subject: [PATCH 14/18] [VVAU] Updating minimize_accumulator_width logic --- .../fpgadataflow/vectorvectoractivation.py | 87 ++++++------------- 1 file changed, 26 insertions(+), 61 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index af2591f703..035b6f28ec 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -121,6 +121,8 @@ def minimize_accumulator_width(self, model): else: thresholds = None idt = self.get_input_datatype() + + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) # if runtime-writeable weights, then the values of the weights can # change and we need to use the worst-case values from the datatypes if self.get_nodeattr("runtime_writeable_weights"): @@ -131,11 +133,7 @@ def minimize_accumulator_width(self, model): upper_range = calculate_matvec_accumulator_range(upper_worst, idt) acc_min = min(min(lower_range), min(upper_range)) acc_max = max(max(upper_range), max(upper_range)) - # if not runtime-writeable weights, then we can calculate the min - # and max values of the accumulation range using knowledge of the - # weights and input data types since they are fixed - else: - (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + # if the thresholds can be used to determine range, then adjust the range # according to the known values of the thresholds if thresholds is not None: @@ -144,76 +142,43 @@ def minimize_accumulator_width(self, model): min_threshold = thresholds.min() max_threshold = thresholds.max() # clip threshold values - clip_upper = None - clip_lower = None - if max_threshold > acc_max + 1: - clip_upper = acc_max + 1 - if min_threshold < acc_min: - clip_lower = acc_min - if (clip_lower is not None) or (clip_upper is not None): + if max_threshold > acc_max or min_threshold < acc_min: warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name) - thresholds = np.clip(thresholds, clip_lower, clip_upper) + thresholds = np.clip(thresholds, acc_min, acc_max) model.set_initializer(self.onnx_node.input[2], thresholds) threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) min_threshold = thresholds.min() max_threshold = thresholds.max() - # get range required by threshold values - tdt_min = min(acc_min, min_threshold) - tdt_max = max(acc_max, max_threshold) - if tdt_min < 0: - if abs(tdt_min) > tdt_max: - tdt = DataType.get_smallest_possible(tdt_min) - else: - tdt = DataType.get_smallest_possible(-tdt_max - 1) - else: - tdt = DataType.get_smallest_possible(tdt_max) - assert np.vectorize(tdt.allowed)( + acc_min = min(min_threshold, acc_min) + acc_max = max(max_threshold, acc_max) + + # if the acc_range is always greater than 0, then acc_max <= 2^P - 1 + if acc_min >= 0: + acc_bit_width = np.log2(acc_max + 1) + acc_bit_width = math.ceil(acc_bit_width) + adt = DataType[f"UINT{acc_bit_width}"] + # if the acc_range is signed, then acc_min >= -2^{P-1} and acc_max <= + # 2^{P - 1} - 1, which means 2^{P - 1} >= max(-acc_min, 1 + acc_max) + else: + _acc_max = max(-acc_min, 1 + acc_max) + acc_bit_width = np.log2(_acc_max) + 1 + acc_bit_width = math.ceil(acc_bit_width) + adt = DataType[f"INT{acc_bit_width}"] + + # if activation, assert that the thresholds can be expressed with adt + if thresholds is not None: + assert np.vectorize(adt.allowed)( threshold_tensor ).all(), "Thresholds in %s can't be expressed with type %s" % ( self.onnx_node.name, - str(tdt), + str(adt), ) - adt = tdt # Set activation datatype to the threshold datatype - else: - if acc_min < 0: - if abs(acc_min) > acc_max: - adt = DataType.get_smallest_possible(acc_min) - else: - adt = DataType.get_smallest_possible(-acc_max - 1) - else: - adt = DataType.get_smallest_possible(acc_max) - - is_last_node = model.find_direct_successors(self.onnx_node) is None # if no activation, output and accumulator datatypes are the same if self.get_nodeattr("noActivation"): - # if last node, we need to round the accumulator datatype (adt) - # up to the nearest 8 and set the output datatype (odt) - if is_last_node: - bw = roundup_to_integer_multiple(adt.bitwidth(), 8) - new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - adt = DataType[new_adt_name] self.set_nodeattr("outputDataType", adt.name) - - # if last node has activation, then ensure the output datatype is divisible by 8 - if not self.get_nodeattr("noActivation") and is_last_node: - odt = DataType[self.get_nodeattr("outputDataType")] - bw = roundup_to_integer_multiple(odt.bitwidth(), 8) - # NOTE: keeping previous functionality of converting odt to adt on the last - # node, could preserve odt in the future by replacing adt with odt. This - # may yield unfavorable functionality for Bipolar and/or Ternary datatypes - new_odt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - if bw != odt.bitwidth(): - warn_str = "outputDataType changing for %s: %s -> %s " % ( - self.onnx_node.name, - odt.name, - new_odt_name, - ) - warnings.warn(warn_str) - odt = DataType[new_odt_name] - self.set_nodeattr("outputDataType", odt.name) - self.set_nodeattr("accDataType", adt.name) + return DataType[self.get_nodeattr("accDataType")] def minimize_weight_bit_width(self, model): From 7d5a8b6b2103b6ebd931a0b9d0479808007d5c4d Mon Sep 17 00:00:00 2001 From: icolbert Date: Thu, 3 Aug 2023 07:53:34 -0700 Subject: [PATCH 15/18] Pre-commit fixes --- tests/fpgadataflow/test_minimize_bit_width.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/fpgadataflow/test_minimize_bit_width.py b/tests/fpgadataflow/test_minimize_bit_width.py index 0427bbd4d8..4be0a260b7 100644 --- a/tests/fpgadataflow/test_minimize_bit_width.py +++ b/tests/fpgadataflow/test_minimize_bit_width.py @@ -294,9 +294,7 @@ def test_minimize_accumulator_width(wdt: DataType, idt: DataType, tdt: DataType, # bit width minimization logic in the MVAU and VVAU is exact and should be # less than or equal to this calculation exp_adt = calculate_accumulator_bit_width(inst, model) - assert ( - cur_adt.bitwidth() <= exp_adt.bitwidth() - ), "Mismatched accumulation data types" + assert cur_adt.bitwidth() <= exp_adt.bitwidth(), "Mismatched accumulation data types" # if there is no activation, outputDataType = accDataType if inst.get_nodeattr("noActivation"): From f11856f3c5db5825722bfd977f644c01f5ad6139 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Fri, 4 Aug 2023 10:27:30 +0100 Subject: [PATCH 16/18] [Deps] Update qonnx version to include qcdq2qonnx changes Signed-off-by: aziz bahri --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 9e3ee3ef99..5b07d11273 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -27,7 +27,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -QONNX_COMMIT="8755423377e9c01dd2d2358c320484399b5d6625" +QONNX_COMMIT="04e24583fb5c1895744801480db3ced8a5b6a914" FINN_EXP_COMMIT="0aa7e1c44b20cf085b6fe42cff360f0a832afd2c" BREVITAS_COMMIT="9bb26bf2798de210a267d1e4aed4c20087e0e8a5" PYVERILATOR_COMMIT="766e457465f5c0dd315490d7b9cc5d74f9a76f4f" From d07655968644a0c7a19a04986fca3984a2ea43ab Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 4 Aug 2023 14:14:48 +0100 Subject: [PATCH 17/18] [MVAU/VVAU] DataType divisibility by 8 for last node if no activation --- src/finn/custom_op/fpgadataflow/matrixvectoractivation.py | 7 +++++++ src/finn/custom_op/fpgadataflow/vectorvectoractivation.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index ccf5b00918..7eb56db382 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -650,6 +650,13 @@ def minimize_accumulator_width(self, model): # if no activation, output and accumulator datatypes are the same if self.get_nodeattr("noActivation"): + # if this is the last node in the graph, then ensure the datatype is + # divisibly by 8 bits + if model.find_direct_successors(self.onnx_node) is None: + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + # for no-activation nodes, output dt = acc dt self.set_nodeattr("outputDataType", adt.name) self.set_nodeattr("accDataType", adt.name) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 035b6f28ec..bd5bb75f1d 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -176,6 +176,13 @@ def minimize_accumulator_width(self, model): # if no activation, output and accumulator datatypes are the same if self.get_nodeattr("noActivation"): + # if this is the last node in the graph, then ensure the datatype is + # divisibly by 8 bits + if model.find_direct_successors(self.onnx_node) is None: + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + # for no-activation nodes, output dt = acc dt self.set_nodeattr("outputDataType", adt.name) self.set_nodeattr("accDataType", adt.name) From f713ab09794a66e9cddae605055666064b855caf Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 4 Aug 2023 16:39:16 +0100 Subject: [PATCH 18/18] [Tests] Include divisibility by 8 in minimize bit width testing --- tests/fpgadataflow/test_minimize_bit_width.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_minimize_bit_width.py b/tests/fpgadataflow/test_minimize_bit_width.py index 4be0a260b7..0e704230e7 100644 --- a/tests/fpgadataflow/test_minimize_bit_width.py +++ b/tests/fpgadataflow/test_minimize_bit_width.py @@ -296,8 +296,13 @@ def test_minimize_accumulator_width(wdt: DataType, idt: DataType, tdt: DataType, exp_adt = calculate_accumulator_bit_width(inst, model) assert cur_adt.bitwidth() <= exp_adt.bitwidth(), "Mismatched accumulation data types" - # if there is no activation, outputDataType = accDataType + # if there is no activation, outputDataType = accDataType and if it is the last node + # it needs to be divisible by 8 if inst.get_nodeattr("noActivation"): assert ( cur_adt.bitwidth() == cur_odt.bitwidth() ), "outputDataType and accDataType should be equal" + if model.find_direct_successors(inst.onnx_node) is None: + assert ( + cur_adt.bitwidth() % 8 + ) == 0, "bit width of last node needs to be divisible by 8"