diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 18c7fbd8..6a9d5554 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -74,7 +74,7 @@ jobs: ICCT_ITA_8 miniMobileNet miniMobileNetv2 - CCT/CCT_16_16_8 + CCT/CCT_1_16_16_8 ### CortexM Tests ### @@ -249,6 +249,7 @@ jobs: MLPerf/KeywordSpotting MLPerf/ImageClassification MLPerf/AnomalyDetection + CCT/CCT_1_16_16_8 num-cores: 8 siracusa-kernels-tiled-singlebuffer-L2: @@ -429,7 +430,7 @@ jobs: L1: [64000] - name: "MLPerf/AnomalyDetection" L1: [64000] - - name: "CCT/CCT_16_16_8" + - name: "CCT/CCT_1_16_16_8" L1: [64000] num-cores: - 8 @@ -456,6 +457,8 @@ jobs: L1: [60000, 30000, 15000] - name: "microLlama/microLlama1" L1: [60000, 10000, 5000] + - name: "CCT/CCT_1_32_32_8" + L1: [64000] num-cores: - 8 default-memory-level: @@ -488,6 +491,8 @@ jobs: L1: [60000, 20000, 10000] - name: "microLlama/microLlama8_parallel" L1: [60000, 20000, 10000] + - name: "CCT/CCT_1_32_32_8" + L1: [64000] num-cores: - 8 double-buffer: diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c0e03ec..ed985830 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -136,8 +136,23 @@ Change main.c to use OUTPUTTYPE instead of float - CCT onnx tests with img size of 16 and 32 ### Fixed -- CycleMeasure Pass for Siracusa Untiling Profilling +- CycleMeasure Pass for Siracusa Untiled Profilling - GEMM Tiling Constraints transA and `transB' not supported - MatMul layer Multi-Dimensional Input Issue - Add Layer for Broadcasted Bias - Resolved an issue where concatenation of float32 with f caused inf errors during code generation + +## Fix Float CCT Bugs on L3 + +### Added +- Added multiple CCT settings for testing. +- Added CCT L3 test to CI to ensure correctness for img size of 16 and 32. +- Added NaN check for deeploytest diff to improve result validation. + +### Changed +- Regenerated CCT ONNX files without "output" & "input" in their names to avoid triggering the dumphex parser bug. +- Regenerated CCT ONNX file with 3 branches for attention, transforming the attention computation graph into three branches. +- Changed code generation for Hex output to properly handle float values. + +### Fixed +- Updated printinput nodetemplate for float handling. diff --git a/Deeploy/CommonExtensions/CodeTransformationPasses/PrintInputs.py b/Deeploy/CommonExtensions/CodeTransformationPasses/PrintInputs.py index 1f4f8fb2..1b057ca2 100644 --- a/Deeploy/CommonExtensions/CodeTransformationPasses/PrintInputs.py +++ b/Deeploy/CommonExtensions/CodeTransformationPasses/PrintInputs.py @@ -40,13 +40,16 @@ accessStr += "[" + f"print_iter_{idx}" + "]" if idx > 0: dimStr += "[" + f"{dim}" + "]" +formatSpecifier = "%*i" +if "float" in bufferType.referencedType.typeName or "double" in bufferType.referencedType.typeName: + formatSpecifier = "%*.6f" %> printf("${nodeName} ${bufferName}: ${bufferType.referencedType.typeName}, ${bufferShape}, %p\\n", ${bufferName}); % for idx, dim in enumerate(bufferShape): printf("["); for (int print_iter_${idx}=0; print_iter_${idx} < ${dim}; print_iter_${idx}++){ % endfor -printf("%*i,", 4, ((${bufferType.referencedType.typeName} (*)${dimStr})${bufferName})${accessStr}); +printf("${formatSpecifier},", 4, ((${bufferType.referencedType.typeName} (*)${dimStr})${bufferName})${accessStr}); % for dim in bufferShape: } printf("], \\n"); @@ -214,8 +217,11 @@ def apply(self, ctxt: NetworkContext, executionBlock: ExecutionBlock, class MemoryAwarePrintConstantGeneration(MemoryAwareGeneration, PrintConstantGeneration): - def apply(self, ctxt: NetworkContext, executionBlock: ExecutionBlock, - name: str) -> Tuple[NetworkContext, ExecutionBlock]: + def apply(self, + ctxt: NetworkContext, + executionBlock: ExecutionBlock, + name: str, + verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]: references = self.extractDynamicReferences(ctxt, executionBlock, True) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 9ce1d162..cc685303 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -70,7 +70,8 @@ class CodeGenVerbosity: """ tilingProfiling: Optional[str] #: str: Specifies the name of the memory level on which to profile tiling - untiledProfiling: Optional[bool] = None #: str: Specifies the name of the memory level on which to profile untiling + untiledProfiling: Optional[ + bool] = None #: str: Specifies the name of the memory level on which to profile untiled code _NoVerbosity = CodeGenVerbosity(None) diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 7349fa26..c99931b9 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -38,19 +38,20 @@ from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGELUTemplate, FloatGemmTemplate, \ - FloatLayernormTemplate, FloatMatMulTemplate, FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \ - GatherTemplate, RQSiGELUTemplate, iHardswishTemplate + FloatLayernormTemplate, FloatMatMulTemplate, FloatMulTemplate, FloatReluTemplate, GatherTemplate, \ + RQSiGELUTemplate, iHardswishTemplate from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, ConvChecker, GatherChecker, GELUChecker, GEMMChecker, \ HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, ReduceMeanChecker, ReluChecker, RQAddChecker, \ RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling +from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPProfileUntiled import PULPProfileUntiled from Deeploy.Targets.PULPOpen.DataTypes import PULPDMAFuture -from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, FloatConvTemplate, FloatMaxPoolTemplate, GEMMTemplate, \ - MatrixVectorTemplate, MaxPool2DTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, RQAddTemplate, \ - RQSiHardswishTemplate, SliceTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \ - iRMSNormTemplate, iSoftmaxTemplate +from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, FloatConvTemplate, FloatMaxPoolTemplate, \ + FloatSoftmaxTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPool2DTemplate, MulTemplate, ReduceMeanTemplate, \ + RequantShiftTemplate, RQAddTemplate, RQSiHardswishTemplate, SliceTemplate, TallGEMMTemplate, TransposeTemplate, \ + UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \ PULPRequantShiftChecker from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement @@ -118,6 +119,7 @@ MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True), TilingVariableReplacement("L2"), PULPL3Tiling("L2"), + PULPProfileUntiled(), ArgumentStructGeneration(), L3MemoryAwareFunctionCallClosure(writeback = False), MemoryManagementGeneration("L3.*"), @@ -134,6 +136,7 @@ MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True), TilingVariableReplacement("L2"), PULPL3Tiling("L2"), + PULPProfileUntiled(), ArgumentStructGeneration(), L3MemoryAwareFunctionCallClosure(writeback = False), MemoryManagementGeneration("L2"), diff --git a/Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTiling.py b/Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTiling.py index e2f097f2..c3723201 100644 --- a/Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTiling.py +++ b/Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTiling.py @@ -25,7 +25,6 @@ from typing import Tuple -from Deeploy.CommonExtensions.CodeTransformationPasses.CycleMeasurement import ProfilingCodeGeneration from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, _NoVerbosity from .PULPClusterTilingDB import ProfilingPULPClusterTilingGenerationDB, PULPClusterTilingGenerationDB @@ -39,7 +38,6 @@ def __init__(self, targetMemLevel: str): self.profilingSB = ProfilingPULPClusterTilingGenerationSB(targetMemLevel) self.DB = PULPClusterTilingGenerationDB(targetMemLevel) self.profilingDB = ProfilingPULPClusterTilingGenerationDB(targetMemLevel) - self.profiluntiling = ProfilingCodeGeneration() def apply(self, ctxt: NetworkContext, @@ -54,7 +52,4 @@ def apply(self, ctxt, executionBlock = self.SB.apply(ctxt, executionBlock, name) ctxt, executionBlock = self.DB.apply(ctxt, executionBlock, name) - if verbose.untilingProfiling: - ctxt, executionBlock = self.profiluntiling.apply(ctxt, executionBlock, name) - return ctxt, executionBlock diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatSoftmaxTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatSoftmaxTemplate.py new file mode 100644 index 00000000..8f721b2b --- /dev/null +++ b/Deeploy/Targets/PULPOpen/Templates/FloatSoftmaxTemplate.py @@ -0,0 +1,34 @@ +# ---------------------------------------------------------------------- +# +# File: FloatSoftmaxTemplate.py +# +# Last edited: 23.1.2025 +# +# Copyright (C) 2021, ETH Zurich and University of Bologna. +# +# Author: Run Wang, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Softmax (Name: ${nodeName}, Op: ${nodeOp}) +int8_t ${nodeName}_core_id = pi_core_id(); +if (${nodeName}_core_id == 0) { + Softmax_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${lastDimLength}); +} +""") diff --git a/DeeployTest/Platforms/Siracusa/src/deeploytest.c b/DeeployTest/Platforms/Siracusa/src/deeploytest.c index 4803de50..00a63fc6 100644 --- a/DeeployTest/Platforms/Siracusa/src/deeploytest.c +++ b/DeeployTest/Platforms/Siracusa/src/deeploytest.c @@ -114,7 +114,7 @@ void main(void) { diff = expected - actual; if (ISOUTPUTFLOAT) { - if ((diff < -1e-4) || (diff > 1e-4)) + if ((diff < -1e-4) || (diff > 1e-4) || (isnan(diff))) { tot_err += 1; printf("Expected: %10.6f ", expected); diff --git a/DeeployTest/Tests/CCT/CCT_16_16_8/inputs.npz b/DeeployTest/Tests/CCT/CCT_16_16_8/inputs.npz deleted file mode 100644 index 8f9731d5..00000000 Binary files a/DeeployTest/Tests/CCT/CCT_16_16_8/inputs.npz and /dev/null differ diff --git a/DeeployTest/Tests/CCT/CCT_16_16_8/network.onnx b/DeeployTest/Tests/CCT/CCT_16_16_8/network.onnx deleted file mode 100644 index cac7f237..00000000 Binary files a/DeeployTest/Tests/CCT/CCT_16_16_8/network.onnx and /dev/null differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_128/inputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_128/inputs.npz new file mode 100644 index 00000000..bd39fa0e Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_128/inputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_128/network.onnx b/DeeployTest/Tests/CCT/CCT_1_16_16_128/network.onnx new file mode 100644 index 00000000..46f45fbc Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_128/network.onnx differ diff --git a/DeeployTest/Tests/CCT/CCT_32_32_8/outputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_128/outputs.npz similarity index 70% rename from DeeployTest/Tests/CCT/CCT_32_32_8/outputs.npz rename to DeeployTest/Tests/CCT/CCT_1_16_16_128/outputs.npz index 5a8e99bd..888c45fa 100644 Binary files a/DeeployTest/Tests/CCT/CCT_32_32_8/outputs.npz and b/DeeployTest/Tests/CCT/CCT_1_16_16_128/outputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_32/inputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_32/inputs.npz new file mode 100644 index 00000000..f8455694 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_32/inputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_32/network.onnx b/DeeployTest/Tests/CCT/CCT_1_16_16_32/network.onnx new file mode 100644 index 00000000..0aeb8940 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_32/network.onnx differ diff --git a/DeeployTest/Tests/CCT/CCT_16_16_8/outputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_32/outputs.npz similarity index 70% rename from DeeployTest/Tests/CCT/CCT_16_16_8/outputs.npz rename to DeeployTest/Tests/CCT/CCT_1_16_16_32/outputs.npz index 9ed24fd0..1208927b 100644 Binary files a/DeeployTest/Tests/CCT/CCT_16_16_8/outputs.npz and b/DeeployTest/Tests/CCT/CCT_1_16_16_32/outputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_64/inputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_64/inputs.npz new file mode 100644 index 00000000..cb995e53 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_64/inputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_64/network.onnx b/DeeployTest/Tests/CCT/CCT_1_16_16_64/network.onnx new file mode 100644 index 00000000..1301de83 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_64/network.onnx differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_64/outputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_64/outputs.npz new file mode 100644 index 00000000..d311d0d3 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_64/outputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_8/inputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_8/inputs.npz new file mode 100644 index 00000000..97469eb9 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_8/inputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_8/network.onnx b/DeeployTest/Tests/CCT/CCT_1_16_16_8/network.onnx new file mode 100644 index 00000000..93e4147f Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_8/network.onnx differ diff --git a/DeeployTest/Tests/CCT/CCT_1_16_16_8/outputs.npz b/DeeployTest/Tests/CCT/CCT_1_16_16_8/outputs.npz new file mode 100644 index 00000000..ec6759c1 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_16_16_8/outputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_32/inputs.npz b/DeeployTest/Tests/CCT/CCT_1_32_32_32/inputs.npz new file mode 100644 index 00000000..1772ff74 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_32_32_32/inputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_32/network.onnx b/DeeployTest/Tests/CCT/CCT_1_32_32_32/network.onnx new file mode 100644 index 00000000..83b0a16c Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_32_32_32/network.onnx differ diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_32/outputs.npz b/DeeployTest/Tests/CCT/CCT_1_32_32_32/outputs.npz new file mode 100644 index 00000000..39bba709 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_32_32_32/outputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_8/inputs.npz b/DeeployTest/Tests/CCT/CCT_1_32_32_8/inputs.npz new file mode 100644 index 00000000..77525d20 Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_32_32_8/inputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_32_32_8/network.onnx b/DeeployTest/Tests/CCT/CCT_1_32_32_8/network.onnx similarity index 58% rename from DeeployTest/Tests/CCT/CCT_32_32_8/network.onnx rename to DeeployTest/Tests/CCT/CCT_1_32_32_8/network.onnx index 88f296ed..c07322d8 100644 Binary files a/DeeployTest/Tests/CCT/CCT_32_32_8/network.onnx and b/DeeployTest/Tests/CCT/CCT_1_32_32_8/network.onnx differ diff --git a/DeeployTest/Tests/CCT/CCT_1_32_32_8/outputs.npz b/DeeployTest/Tests/CCT/CCT_1_32_32_8/outputs.npz new file mode 100644 index 00000000..9297b77a Binary files /dev/null and b/DeeployTest/Tests/CCT/CCT_1_32_32_8/outputs.npz differ diff --git a/DeeployTest/Tests/CCT/CCT_32_32_8/inputs.npz b/DeeployTest/Tests/CCT/CCT_32_32_8/inputs.npz deleted file mode 100644 index 68720f4e..00000000 Binary files a/DeeployTest/Tests/CCT/CCT_32_32_8/inputs.npz and /dev/null differ diff --git a/DeeployTest/generateNetwork.py b/DeeployTest/generateNetwork.py index 48a396b5..2278627a 100644 --- a/DeeployTest/generateNetwork.py +++ b/DeeployTest/generateNetwork.py @@ -55,6 +55,11 @@ parser.add_argument('--overwriteRecentState', action = 'store_true', help = 'Copy the recent deeply state to the ./deeployStates folder\n') + parser.add_argument('--profileUntiled', + action = 'store_true', + dest = 'profileUntiled', + default = False, + help = 'Profile Untiled for L2\n') args = parser.parse_args() @@ -105,6 +110,10 @@ ) and not "simpleCNN" in args.dir and not "testRQMatMul" in args.dir and not "testRQGEMM" in args.dir: deployer.loweringOptimizer.passes.insert(0, EmulateCMSISRequantPass()) + verbosityCfg = _NoVerbosity + if isinstance(platform, PULPPlatform): + verbosityCfg.untiledProfiling = args.profileUntiled + # Parse graph and infer output levels and signedness _ = deployer.generateFunction(verbose = verbosityCfg) @@ -149,4 +158,4 @@ print("=" * 80) print() print(f"{'Number of Ops:' :<{_TEXT_ALIGN}} {num_ops}") - print(f"{'Model Parameters: ' :<{_TEXT_ALIGN}} {deployer.getParameterSize()}") + print(f"{'Model Parameters: ' :<{_TEXT_ALIGN}} {deployer.getParameterSize()}") \ No newline at end of file diff --git a/DeeployTest/testUtils/codeGenerate.py b/DeeployTest/testUtils/codeGenerate.py index d34716f4..b5f9f6e8 100644 --- a/DeeployTest/testUtils/codeGenerate.py +++ b/DeeployTest/testUtils/codeGenerate.py @@ -249,17 +249,21 @@ def generateTestNetworkImplementation(deployer: NetworkDeployer, def generateL3HexDump(deployer: NetworkDeployer, path: str, test_inputs: List, test_outputs: List): def type2TypeStr(dataType) -> Tuple[str, int]: - width = dataType.referencedType.typeWidth - signed = (dataType.referencedType.typeMin < 0) + if dataType.referencedType.typeName == "float32_t": + retStr = "float32" + width = 32 + else: + width = dataType.referencedType.typeWidth + signed = (dataType.referencedType.typeMin < 0) - retStr = "" + retStr = "" - if signed: - retStr += "int" - else: - retStr += "uint" + if signed: + retStr += "int" + else: + retStr += "uint" - retStr += str(width) + retStr += str(width) return retStr, width diff --git a/DeeployTest/testUtils/testRunner.py b/DeeployTest/testUtils/testRunner.py index 0ca2c0f8..123feb5a 100644 --- a/DeeployTest/testUtils/testRunner.py +++ b/DeeployTest/testUtils/testRunner.py @@ -305,7 +305,6 @@ def generate_test(self): command = f"python {generation_script} -d {self._dir_gen} -t {self._dir_test} -p {self._platform} {self.gen_args}" command += self._argument_parser.generate_cmd_args() - print(command) if self._args.verbose >= 2: prBlue(f"[TestRunner] Generation Command: {command}")