Skip to content

Commit

Permalink
Fixed CCT on L3 Bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
runwangdl committed Feb 23, 2025
1 parent 026210f commit 3df3245
Show file tree
Hide file tree
Showing 32 changed files with 100 additions and 29 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
ICCT_ITA_8
miniMobileNet
miniMobileNetv2
CCT/CCT_16_16_8
CCT/CCT_1_16_16_8

### CortexM Tests ###
Expand Down Expand Up @@ -249,6 +249,7 @@ jobs:
MLPerf/KeywordSpotting
MLPerf/ImageClassification
MLPerf/AnomalyDetection
CCT/CCT_1_16_16_8
num-cores: 8

siracusa-kernels-tiled-singlebuffer-L2:
Expand Down Expand Up @@ -429,7 +430,7 @@ jobs:
L1: [64000]
- name: "MLPerf/AnomalyDetection"
L1: [64000]
- name: "CCT/CCT_16_16_8"
- name: "CCT/CCT_1_16_16_8"
L1: [64000]
num-cores:
- 8
Expand All @@ -456,6 +457,8 @@ jobs:
L1: [60000, 30000, 15000]
- name: "microLlama/microLlama1"
L1: [60000, 10000, 5000]
- name: "CCT/CCT_1_32_32_8"
L1: [64000]
num-cores:
- 8
default-memory-level:
Expand Down Expand Up @@ -488,6 +491,8 @@ jobs:
L1: [60000, 20000, 10000]
- name: "microLlama/microLlama8_parallel"
L1: [60000, 20000, 10000]
- name: "CCT/CCT_1_32_32_8"
L1: [64000]
num-cores:
- 8
double-buffer:
Expand Down
17 changes: 16 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,23 @@ Change main.c to use OUTPUTTYPE instead of float
- CCT onnx tests with img size of 16 and 32

### Fixed
- CycleMeasure Pass for Siracusa Untiling Profilling
- CycleMeasure Pass for Siracusa Untiled Profilling
- GEMM Tiling Constraints transA and `transB' not supported
- MatMul layer Multi-Dimensional Input Issue
- Add Layer for Broadcasted Bias
- Resolved an issue where concatenation of float32 with f caused inf errors during code generation

## Fix Float CCT Bugs on L3

### Added
- Added multiple CCT settings for testing.
- Added CCT L3 test to CI to ensure correctness for img size of 16 and 32.
- Added NaN check for deeploytest diff to improve result validation.

### Changed
- Regenerated CCT ONNX files without "output" & "input" in their names to avoid triggering the dumphex parser bug.
- Regenerated CCT ONNX file with 3 branches for attention, transforming the attention computation graph into three branches.
- Changed code generation for Hex output to properly handle float values.

### Fixed
- Updated printinput nodetemplate for float handling.
12 changes: 9 additions & 3 deletions Deeploy/CommonExtensions/CodeTransformationPasses/PrintInputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@
accessStr += "[" + f"print_iter_{idx}" + "]"
if idx > 0:
dimStr += "[" + f"{dim}" + "]"
formatSpecifier = "%*i"
if "float" in bufferType.referencedType.typeName or "double" in bufferType.referencedType.typeName:
formatSpecifier = "%*.6f"
%>
printf("${nodeName} ${bufferName}: ${bufferType.referencedType.typeName}, ${bufferShape}, %p\\n", ${bufferName});
% for idx, dim in enumerate(bufferShape):
printf("[");
for (int print_iter_${idx}=0; print_iter_${idx} < ${dim}; print_iter_${idx}++){
% endfor
printf("%*i,", 4, ((${bufferType.referencedType.typeName} (*)${dimStr})${bufferName})${accessStr});
printf("${formatSpecifier},", 4, ((${bufferType.referencedType.typeName} (*)${dimStr})${bufferName})${accessStr});
% for dim in bufferShape:
}
printf("], \\n");
Expand Down Expand Up @@ -214,8 +217,11 @@ def apply(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,

class MemoryAwarePrintConstantGeneration(MemoryAwareGeneration, PrintConstantGeneration):

def apply(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
name: str) -> Tuple[NetworkContext, ExecutionBlock]:
def apply(self,
ctxt: NetworkContext,
executionBlock: ExecutionBlock,
name: str,
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:

references = self.extractDynamicReferences(ctxt, executionBlock, True)

Expand Down
3 changes: 2 additions & 1 deletion Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ class CodeGenVerbosity:
"""

tilingProfiling: Optional[str] #: str: Specifies the name of the memory level on which to profile tiling
untiledProfiling: Optional[bool] = None #: str: Specifies the name of the memory level on which to profile untiling
untiledProfiling: Optional[
bool] = None #: str: Specifies the name of the memory level on which to profile untiled code


_NoVerbosity = CodeGenVerbosity(None)
Expand Down
15 changes: 9 additions & 6 deletions Deeploy/Targets/PULPOpen/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,20 @@
from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \
GatherTemplate, RQSiGELUTemplate, iHardswishTemplate
FloatLayernormTemplate, FloatMatMulTemplate, FloatMulTemplate, FloatReluTemplate, GatherTemplate, \
RQSiGELUTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, ConvChecker, GatherChecker, GELUChecker, GEMMChecker, \
HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, ReduceMeanChecker, ReluChecker, RQAddChecker, \
RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPProfileUntiled import PULPProfileUntiled
from Deeploy.Targets.PULPOpen.DataTypes import PULPDMAFuture
from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, FloatConvTemplate, FloatMaxPoolTemplate, GEMMTemplate, \
MatrixVectorTemplate, MaxPool2DTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, RQAddTemplate, \
RQSiHardswishTemplate, SliceTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \
iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, FloatConvTemplate, FloatMaxPoolTemplate, \
FloatSoftmaxTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPool2DTemplate, MulTemplate, ReduceMeanTemplate, \
RequantShiftTemplate, RQAddTemplate, RQSiHardswishTemplate, SliceTemplate, TallGEMMTemplate, TransposeTemplate, \
UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \
PULPRequantShiftChecker
from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement
Expand Down Expand Up @@ -118,6 +119,7 @@
MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True),
TilingVariableReplacement("L2"),
PULPL3Tiling("L2"),
PULPProfileUntiled(),
ArgumentStructGeneration(),
L3MemoryAwareFunctionCallClosure(writeback = False),
MemoryManagementGeneration("L3.*"),
Expand All @@ -134,6 +136,7 @@
MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True),
TilingVariableReplacement("L2"),
PULPL3Tiling("L2"),
PULPProfileUntiled(),
ArgumentStructGeneration(),
L3MemoryAwareFunctionCallClosure(writeback = False),
MemoryManagementGeneration("L2"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

from typing import Tuple

from Deeploy.CommonExtensions.CodeTransformationPasses.CycleMeasurement import ProfilingCodeGeneration
from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, _NoVerbosity

from .PULPClusterTilingDB import ProfilingPULPClusterTilingGenerationDB, PULPClusterTilingGenerationDB
Expand All @@ -39,7 +38,6 @@ def __init__(self, targetMemLevel: str):
self.profilingSB = ProfilingPULPClusterTilingGenerationSB(targetMemLevel)
self.DB = PULPClusterTilingGenerationDB(targetMemLevel)
self.profilingDB = ProfilingPULPClusterTilingGenerationDB(targetMemLevel)
self.profiluntiling = ProfilingCodeGeneration()

def apply(self,
ctxt: NetworkContext,
Expand All @@ -54,7 +52,4 @@ def apply(self,
ctxt, executionBlock = self.SB.apply(ctxt, executionBlock, name)
ctxt, executionBlock = self.DB.apply(ctxt, executionBlock, name)

if verbose.untilingProfiling:
ctxt, executionBlock = self.profiluntiling.apply(ctxt, executionBlock, name)

return ctxt, executionBlock
34 changes: 34 additions & 0 deletions Deeploy/Targets/PULPOpen/Templates/FloatSoftmaxTemplate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# ----------------------------------------------------------------------
#
# File: FloatSoftmaxTemplate.py
#
# Last edited: 23.1.2025
#
# Copyright (C) 2021, ETH Zurich and University of Bologna.
#
# Author: Run Wang, ETH Zurich
#
# ----------------------------------------------------------------------
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from Deeploy.DeeployTypes import NodeTemplate

referenceTemplate = NodeTemplate("""
// Softmax (Name: ${nodeName}, Op: ${nodeOp})
int8_t ${nodeName}_core_id = pi_core_id();
if (${nodeName}_core_id == 0) {
Softmax_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${lastDimLength});
}
""")
2 changes: 1 addition & 1 deletion DeeployTest/Platforms/Siracusa/src/deeploytest.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ void main(void) {
diff = expected - actual;
if (ISOUTPUTFLOAT)
{
if ((diff < -1e-4) || (diff > 1e-4))
if ((diff < -1e-4) || (diff > 1e-4) || (isnan(diff)))
{
tot_err += 1;
printf("Expected: %10.6f ", expected);
Expand Down
Binary file removed DeeployTest/Tests/CCT/CCT_16_16_8/inputs.npz
Binary file not shown.
Binary file removed DeeployTest/Tests/CCT/CCT_16_16_8/network.onnx
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_128/inputs.npz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_32/inputs.npz
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_32/network.onnx
Binary file not shown.
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_64/inputs.npz
Binary file not shown.
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_64/outputs.npz
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_8/inputs.npz
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_8/network.onnx
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_16_16_8/outputs.npz
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_32_32_32/inputs.npz
Binary file not shown.
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_32_32_32/outputs.npz
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_32_32_8/inputs.npz
Binary file not shown.
Binary file not shown.
Binary file added DeeployTest/Tests/CCT/CCT_1_32_32_8/outputs.npz
Binary file not shown.
Binary file removed DeeployTest/Tests/CCT/CCT_32_32_8/inputs.npz
Binary file not shown.
11 changes: 10 additions & 1 deletion DeeployTest/generateNetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
parser.add_argument('--overwriteRecentState',
action = 'store_true',
help = 'Copy the recent deeply state to the ./deeployStates folder\n')
parser.add_argument('--profileUntiled',
action = 'store_true',
dest = 'profileUntiled',
default = False,
help = 'Profile Untiled for L2\n')

args = parser.parse_args()

Expand Down Expand Up @@ -105,6 +110,10 @@
) and not "simpleCNN" in args.dir and not "testRQMatMul" in args.dir and not "testRQGEMM" in args.dir:
deployer.loweringOptimizer.passes.insert(0, EmulateCMSISRequantPass())

verbosityCfg = _NoVerbosity
if isinstance(platform, PULPPlatform):
verbosityCfg.untiledProfiling = args.profileUntiled

# Parse graph and infer output levels and signedness
_ = deployer.generateFunction(verbose = verbosityCfg)

Expand Down Expand Up @@ -149,4 +158,4 @@
print("=" * 80)
print()
print(f"{'Number of Ops:' :<{_TEXT_ALIGN}} {num_ops}")
print(f"{'Model Parameters: ' :<{_TEXT_ALIGN}} {deployer.getParameterSize()}")
print(f"{'Model Parameters: ' :<{_TEXT_ALIGN}} {deployer.getParameterSize()}")
20 changes: 12 additions & 8 deletions DeeployTest/testUtils/codeGenerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,17 +249,21 @@ def generateTestNetworkImplementation(deployer: NetworkDeployer,
def generateL3HexDump(deployer: NetworkDeployer, path: str, test_inputs: List, test_outputs: List):

def type2TypeStr(dataType) -> Tuple[str, int]:
width = dataType.referencedType.typeWidth
signed = (dataType.referencedType.typeMin < 0)
if dataType.referencedType.typeName == "float32_t":
retStr = "float32"
width = 32
else:
width = dataType.referencedType.typeWidth
signed = (dataType.referencedType.typeMin < 0)

retStr = ""
retStr = ""

if signed:
retStr += "int"
else:
retStr += "uint"
if signed:
retStr += "int"
else:
retStr += "uint"

retStr += str(width)
retStr += str(width)

return retStr, width

Expand Down
1 change: 0 additions & 1 deletion DeeployTest/testUtils/testRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ def generate_test(self):

command = f"python {generation_script} -d {self._dir_gen} -t {self._dir_test} -p {self._platform} {self.gen_args}"
command += self._argument_parser.generate_cmd_args()
print(command)

if self._args.verbose >= 2:
prBlue(f"[TestRunner] Generation Command: {command}")
Expand Down

0 comments on commit 3df3245

Please sign in to comment.