Skip to content

Commit

Permalink
[CPU] [ARM] INT8 FullyConnected
Browse files Browse the repository at this point in the history
  • Loading branch information
eshoguli committed Jul 31, 2024
1 parent 3b4ac7e commit d089473
Show file tree
Hide file tree
Showing 7 changed files with 272 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ ACLFunction ACLGEMMExecutor::configureFunction(const ACLMemoryTensors & aclMemor
return gemm;
}

// TODO: move to ACLLowpExecutor
ACLInfo ACLGEMMExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) {
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/executors/acl/acl_gemm.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2018-2024 Intel Corporation
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_lowp_fullyconnected.hpp"

#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"

#include "nodes/executors/executor.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "utils/debug_capabilities.h"
#include "nodes/executors/debug_messages.hpp"
#include "nodes/executors/implementation_utils.hpp"
#include "acl_weights.hpp"
#include "acl_utils.hpp"

namespace ov {
namespace intel_cpu {

static void initFCAttrs(const FCAttrs &attrs,
ACLTensorAttrs& aclTensorAttrs,
ACLFCAttrs& aclfcAttrs,
const MemoryArgs &memory,
arm_compute::GEMMInfo& gemmInfo,
const PostOps &postOps) {
aclTensorAttrs.hasLayoutTypeNHWC = memory.at(ARG_SRC)->getDescPtr()->hasLayoutType(LayoutType::nspc);
//fullyConnectedLayerInfo.weights_trained_layout = getAclDataLayoutByMemoryDesc(memory.at(ARG_WEI)->getDescPtr());
aclfcAttrs.inputPrecision = memory.at(ARG_SRC)->getDescPtr()->getPrecision();
//fullyConnectedLayerInfo.transpose_weights = false;
// ??
gemmInfo.set_pretranspose_B(false);
aclfcAttrs.weightsNonTransposed = attrs.weightsNonTransposed;

// Add postops
if (!postOps.empty() && postOps.size() == 1) {
if (const auto activation = std::dynamic_pointer_cast<ActivationPostOp>(postOps[0])) {
gemmInfo.set_activation_info(getActivationLayerInfo(convertToEltwiseAlgorithm(activation->type()),
activation->alpha(),
activation->beta(),
activation->gamma()));
}
}

if (memory.at(ARG_SRC)->getPrecision() != memory.at(ARG_WEI)->getPrecision()) {
aclfcAttrs.isConvertedWeights = true;
}
}

ACLLowpFullyConnectedExecutor::ACLLowpFullyConnectedExecutor(const FCAttrs &attrs,
const PostOps &postOps,
const MemoryArgs &memory,
const ExecutorContext::CPtr context) {
initFCAttrs(attrs, aclTensorAttrs, aclfcAttrs, memory, gemmInfo, postOps);
packedWeights = acl_fc_executor::prepareWeightMemory(memory, context, attrs, aclTensorAttrs, aclfcAttrs, postOps);
}

bool ACLLowpFullyConnectedExecutor::supports(const FCConfig &config) {
// TODO: check weights layout
const auto attrs = static_cast<FCAttrs>(config.attrs);
if (std::any_of(
attrs.dequantizationScales.begin(),
attrs.dequantizationScales.end(),
[](float value) { return value != 1.f;})) {
return false;
}

const auto src1_dims = std::dynamic_pointer_cast<BlockedMemoryDesc>(config.descs.at(ARG_SRC))->getBlockDims();
const auto src2_dims = std::dynamic_pointer_cast<BlockedMemoryDesc>(config.descs.at(ARG_WEI))->getBlockDims();

const auto precision = srcType(config);
VERIFY(one_of(precision, ov::element::i8, ov::element::u8), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(postOpsNumbers(config) == 0, UNSUPPORTED_NUMBER_OF_POSTOPS);
VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK);
VERIFY(one_of(weiRank(config), 2U, 3U, 4U), UNSUPPORTED_WEI_RANK);
VERIFY(static_cast<FCAttrs>(config.attrs).dequantizationScales.size() <= 1, UNSUPPORTED_PER_CHANNEL_QUANTIZATION);
return true;
}

void ACLLowpFullyConnectedExecutor::updateTensorsShapes(ACLMemoryShapes& aclMemoryShapes) {
acl_fc_executor::updateFCTensorsShapes(aclMemoryShapes);
}

arm_compute::Status ACLLowpFullyConnectedExecutor::validateTensorsInfo(const ACLMemoryInfo & aclMemoryInfos) {
const auto matMulValid = arm_compute::NEGEMMLowpMatrixMultiplyCore::validate(
aclMemoryInfos[ACLArgs::ACL_SRC_0].get(),
aclMemoryInfos[ACLArgs::ACL_WEI].get(),
nullptr, //aclMemoryInfos[ACLArgs::ACL_BIAS].get(),
aclMemoryInfos[ACLArgs::ACL_DST].get(),
gemmInfo);
return matMulValid;
}

ACLFunction ACLLowpFullyConnectedExecutor::configureFunction(const ACLMemoryTensors & aclMemoryTensors) {
auto gemm = std::make_unique<arm_compute::NEGEMMLowpMatrixMultiplyCore>();
gemm->configure(
aclMemoryTensors[ACLArgs::ACL_SRC_0].get(),
aclMemoryTensors[ACLArgs::ACL_WEI].get(),
nullptr, //aclMemoryTensors[ACLArgs::ACL_BIAS].get(),
aclMemoryTensors.at(ACLArgs::ACL_DST).get(),
gemmInfo);

if (aclfcAttrs.isConvertedWeights || !aclfcAttrs.weightsNonTransposed) {
aclTensorAttrs.memoryUsageIndicator[ACLArgs::ACL_WEI] = false;
aclMemoryTensors[ACLArgs::ACL_WEI]->allocator()->import_memory(packedWeights->getData());
}
return gemm;
}

// TODO: move to ACLLowpExecutor
ACLInfo ACLLowpFullyConnectedExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) {
arm_compute::DataType result;
switch (dataType) {
case arm_compute::DataType::S8: {
result = arm_compute::DataType::QASYMM8_SIGNED;
break;
}
case arm_compute::DataType::U8: {
result = arm_compute::DataType::QASYMM8;
break;
}
default: {
result = dataType;
break;
}
}

return ACLCommonExecutor::initTensorInfo(tensorShape, result, dataLayout);
}

} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "acl_common_executor.hpp"
#include "nodes/executors/fullyconnected_config.hpp"
#include "acl_weights.hpp"

namespace ov {
namespace intel_cpu {

class ACLLowpFullyConnectedExecutor : public ACLCommonExecutor {
public:
ACLLowpFullyConnectedExecutor(const FCAttrs& attrs,
const PostOps& postOps,
const MemoryArgs& memory,
const ExecutorContext::CPtr context);

static bool supports(const FCConfig& config);

void updateTensorsShapes(ACLMemoryShapes& aclMemoryShapes) override;

arm_compute::Status validateTensorsInfo(const ACLMemoryInfo & aclMemoryInfos) override;

ACLFunction configureFunction(const ACLMemoryTensors & aclMemoryTensors) override;

impl_desc_type implType() const override {
return impl_desc_type::gemm_acl;
}

protected:
ACLInfo initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) override;

private:
arm_compute::GEMMInfo gemmInfo;
arm_compute::WeightsInfo weightsInfo;

MemoryCPtr packedWeights;
ACLFCAttrs aclfcAttrs;
};

using ACLLowpFullyConnectedExecutorPtr = std::shared_ptr<ACLLowpFullyConnectedExecutor>;

} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#if defined(OV_CPU_WITH_ACL)
#include "nodes/executors/acl/acl_fullyconnected.hpp"
#include "nodes/executors/acl/acl_lowp_fullyconnected.hpp"
#endif

#if defined(OV_CPU_WITH_SHL)
Expand Down Expand Up @@ -82,6 +83,11 @@ static const TypeMapping dnnlFCTypeMapping {
static const TypeMapping aclFCTypeMapping {
// {src, wei, bia, dst} pt<src, wei, bias, dst>
{{_f32 | _f16, _f32 | _f16, _any, _any}, pt(bypass(), bypass(), use<0>(), use<0>())},
{{_any, _any, _any, _any}, pt(just<f32>(), just<f32>(), just<f32>(), just<f32>())}
};

static const TypeMapping aclLowpFCTypeMapping {
// {src, wei, bia, dst} pt<src, wei, bias, dst>
{{_i8, _i8, _any, _any}, pt(just<i8>(), just<i8>(), just<i32>(), just<i32>())},
{{_any, _any, _any, _any}, pt(just<f32>(), just<f32>(), just<f32>(), just<f32>())}
};
Expand Down Expand Up @@ -352,6 +358,36 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
const ExecutorContext::CPtr context) {
return std::make_shared<ACLFullyConnectedExecutor>(attrs, postOps, memory, context);
})
OV_CPU_INSTANCE_ACL(
"fullyconnected_acl_lowp",
ExecutorType::Acl,
OperationType::FullyConnected,
ShapeTolerance::Agnostic,
// supports
[](const FCConfig& config) -> bool {
VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS);
VERIFY(noWeightsDecompression(config), UNSUPPORTED_WEIGHTS_DECOMPRESSION);
return ACLLowpFullyConnectedExecutor::supports(config);
},
// requiresFallback
[](const FCConfig& config) -> ov::optional<executor::Config<FCAttrs>> {
return requiresFallbackCommon(config,
aclLowpFCTypeMapping,
aclFCLayoutConfig,
aclFullyConnectedMappingNotation);
},
// acceptsShapes
[](const MemoryArgs& memory) -> bool {
// @todo create syntactic sugar (functor) for shape agnostic lambda
return true;
},
// create
[](const FCAttrs& attrs,
const PostOps& postOps,
const MemoryArgs& memory,
const ExecutorContext::CPtr context) {
return std::make_shared<ACLLowpFullyConnectedExecutor>(attrs, postOps, memory, context);
})
OV_CPU_INSTANCE_SHL(
"fullyconnected_shl",
ExecutorType::Shl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#include "snippets/pass/tokenization.hpp"
#include "snippets/op/subgraph.hpp"
#include "snippets/utils.hpp"

#include "transformations/utils/utils.hpp"
#include "transformations/utils.hpp"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <vector>

#include "low_precision_transformations/fully_connected_transformation.hpp"
#include "common_test_utils/test_constants.hpp"

using namespace LayerTestsDefinitions;

namespace {
const std::vector<ov::element::Type> netPrecisions = {
ov::element::f32
};

const std::vector<MatMulShapes> shapes = {
{
ov::PartialShape{ 1, 16 },
ov::PartialShape{ 16, 8 },
false,
false
},
// {
// ov::PartialShape{ 1, 16 },
// ov::PartialShape{ 8, 16 },
// false,
// true
// },
// {
// ov::PartialShape{ 16, 1 },
// ov::PartialShape{ 16, 8 },
// true,
// false
// },
};

const std::vector<ov::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams()
};

INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn({ov::element::i8 /*, ov::element::u8*/}),
::testing::ValuesIn({/*true,*/ false}),
::testing::Values("gemm_acl_i8")),
FullyConnectedTransformation::getTestCaseName);
} // namespace

0 comments on commit d089473

Please sign in to comment.