diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp index 4fb4703ba2742b..7bf7a47c3a7b39 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2024 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp index eb8af61898754d..cfb94b8bafd2bd 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp @@ -4,19 +4,16 @@ #include "acl_lowp_fullyconnected.hpp" +#include "acl_weights.hpp" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" - +#include "memory_desc/cpu_memory_desc_utils.h" +#include "nodes/common/cpu_convert.h" #include "nodes/executors/acl/acl_utils.hpp" #include "nodes/executors/executor.hpp" #include "nodes/executors/memory_arguments.hpp" -#include "utils/debug_capabilities.h" #include "nodes/executors/debug_messages.hpp" #include "nodes/executors/implementation_utils.hpp" -#include "acl_weights.hpp" -#include "acl_utils.hpp" - -#include "nodes/common/cpu_convert.h" -#include "memory_desc/cpu_memory_desc_utils.h" +#include "utils/debug_capabilities.h" namespace ov { namespace intel_cpu { @@ -41,9 +38,7 @@ static void initFCAttrs(const FCAttrs &attrs, arm_compute::GEMMInfo& fullyConnectedLayerInfo, const PostOps &postOps) { aclTensorAttrs.hasLayoutTypeNHWC = memory.at(ARG_SRC)->getDescPtr()->hasLayoutType(LayoutType::nspc); - //fullyConnectedLayerInfo.weights_trained_layout = getAclDataLayoutByMemoryDesc(memory.at(ARG_WEI)->getDescPtr()); aclfcAttrs.inputPrecision = memory.at(ARG_SRC)->getDescPtr()->getPrecision(); - //fullyConnectedLayerInfo.transpose_weights = false; aclfcAttrs.weightsNonTransposed = attrs.weightsNonTransposed; if (!postOps.empty()) { @@ -86,11 +81,6 @@ void ACLLowpFullyConnectedExecutor::updateTensorsShapes(ACLShapes& aclMemoryShap } arm_compute::Status ACLLowpFullyConnectedExecutor::validateTensorsInfo(const ACLInfos & aclMemoryInfos) { - // TODO: debug only - //const auto src0 = aclMemoryInfos[ACLArgs::ACL_SRC_0].get(); - //const auto src1 = aclMemoryInfos[ACLArgs::ACL_WEI].get(); - //const auto dst = aclMemoryInfos[ACLArgs::ACL_DST].get(); - auto &tensor_info = aclMemoryInfos[ACLArgs::ACL_SRC_0]; if (dequantizationScales.empty()) { tensor_info->set_quantization_info(arm_compute::QuantizationInfo(1.f)); @@ -104,7 +94,7 @@ arm_compute::Status ACLLowpFullyConnectedExecutor::validateTensorsInfo(const ACL const auto matMulValid = arm_compute::NEGEMMLowpMatrixMultiplyCore::validate( aclMemoryInfos[ACLArgs::ACL_SRC_0].get(), aclMemoryInfos[ACLArgs::ACL_WEI].get(), - nullptr, //aclMemoryInfos[ACLArgs::ACL_BIAS].get(), + aclMemoryInfos[ACLArgs::ACL_BIAS].get(), aclMemoryInfos[ACLArgs::ACL_DST].get(), gemmInfo); return matMulValid; @@ -126,7 +116,6 @@ ACLFunction ACLLowpFullyConnectedExecutor::configureFunction(const ACLTensors & return gemm; } -// TODO: move to ACLLowpExecutor std::shared_ptr ACLLowpFullyConnectedExecutor::initTensorInfo( const arm_compute::TensorShape& tensorShape, const arm_compute::DataType& dataType, diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_weights.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_weights.cpp index 819be7ef057dce..27e1ba2194f4de 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_weights.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_weights.cpp @@ -4,13 +4,11 @@ #include "acl_fullyconnected.hpp" #include "acl_utils.hpp" +#include "nodes/common/cpu_convert.h" #include "nodes/executors/executor.hpp" #include "nodes/executors/memory_arguments.hpp" -#include "utils/debug_capabilities.h" -#include "nodes/executors/debug_messages.hpp" -#include "nodes/executors/implementation_utils.hpp" -#include "nodes/common/cpu_convert.h" #include "memory_desc/cpu_memory_desc_utils.h" +#include "utils/debug_capabilities.h" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index 92b721726a9e2f..3280321ea722b0 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -90,7 +90,7 @@ static const TypeMapping aclFCTypeMapping { static const TypeMapping aclLowpFCTypeMapping { // {src, wei, bia, dst} pt - {{_i8, _i8, _any, _f32}, pt(just(), just(), just(), just())} + {{_i8, _i8, _any, _f32}, pt(just(), just(), just(), just())} }; static const MappingNotation dnnlConvolutionMappingNotation { @@ -396,7 +396,6 @@ const std::vector>& getImplementations() { }, // acceptsShapes [](const MemoryArgs& memory) -> bool { - // @todo create syntactic sugar (functor) for shape agnostic lambda return true; }, // create diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 9215906925b46e..38649b2906e9e3 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -725,11 +725,7 @@ void Transformations::Lpt(const std::vector& defaultPrecision }), PrecisionsRestriction::create({ {{0}, {ov::element::u8, ov::element::i8}}, -#if defined(OPENVINO_ARCH_ARM64) - {{1}, {ov::element::u8, ov::element::i8}} -#else {{1}, {ov::element::i8}} -#endif }), PrecisionsRestriction::create({ {{0, 1}, {ov::element::u8}} diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp index 74dd7450130360..3d394c1e45674c 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp @@ -19,10 +19,8 @@ class MatMulTransformationTestValues { ov::builder::subgraph::FakeQuantizeOnData fqOnData1; ov::Shape inputShape2; ov::builder::subgraph::FakeQuantizeOnData fqOnData2; - // TODO: remove, not used std::string expectedKernelName; std::string expectedRuntimePrecision; - bool requantization; }; typedef std::tuple< diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp index d1d4ea94c6b01b..bc1ce628deb245 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp @@ -27,11 +27,10 @@ std::string MatMulTransformation::getTestCaseName(const testing::TestParamInfo(GetParam()); - EXPECT_EQ(expected.expectedRuntimePrecision, actualType); + const auto params = std::get<3>(GetParam()); + const auto actualType = get_runtime_precision(params.expectedKernelName); - const auto& actualPrimitiveType = get_property_by_type("MatMul", "primitiveType"); - const auto expectedPrimitiveType = "gemm_acl_i8"; - EXPECT_EQ(expectedPrimitiveType, actualPrimitiveType); + EXPECT_EQ(actualType, params.expectedRuntimePrecision); } TEST_P(MatMulTransformation, CompareWithRefImpl) {