Skip to content

Commit

Permalink
[CPU] [ARM] FullyConnected: int8 support
Browse files Browse the repository at this point in the history
  • Loading branch information
eshoguli committed Jun 26, 2024
1 parent 3a13983 commit c2d4099
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 22 deletions.
27 changes: 23 additions & 4 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
//

#include "acl_executor.hpp"
#include "acl_utils.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "utils/debug_capabilities.h"

namespace ov {
namespace intel_cpu {

ACLMemoryInfo ACLCommonExecutor::initTensorInfo(const MemoryPtr& memoryPtr, ACLTensorAttrs attrs) {
auto acl_tensor_type = precisionToAclDataType(memoryPtr->getPrecision());
ACLMemoryInfo ACLCommonExecutor::initTensorInfo(
const MemoryPtr& memoryPtr,
const ACLTensorAttrs attrs,
const QuantizedDataType quantized) {
auto acl_tensor_type = precisionToAclDataType(memoryPtr->getPrecision(), quantized);
auto acl_tensor_layout = getAclDataLayoutByMemoryDesc(memoryPtr->getDescPtr());

ACLMemoryInfo aclMemoryInfo = nullptr;
Expand Down Expand Up @@ -40,8 +42,25 @@ ACLMemory ACLCommonExecutor::initTensor(const ACLMemoryInfo& aclMemoryInfo) {

bool ACLCommonExecutor::update(const MemoryArgs &memory) {
for (auto& cpu_mem_ptr : memory) {
auto aclPrecision = precisionToAclDataType(cpu_mem_ptr.second->getPrecision());
QuantizedDataType quantized;
switch (aclPrecision) {
case arm_compute::DataType::S8: {
quantized = QuantizedDataType::QASYMM;
break;
}
case arm_compute::DataType::U8: {
quantized = QuantizedDataType::QSYMM;
break;
}
default: {
quantized = QuantizedDataType::NONE;
break;
}
}

// Initialize arm_compute::TensorInfo object
auto aclTensorInfo = initTensorInfo(cpu_mem_ptr.second, aclTensorAttrs);
auto aclTensorInfo = initTensorInfo(cpu_mem_ptr.second, aclTensorAttrs, quantized);
// Initialize arm_compute::Tensor object
aclMemoryMap[cpu_mem_ptr.first] = initTensor(aclTensorInfo);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include "acl_utils.hpp"
#include "cpu_memory.h"
#include "nodes/executors/executor.hpp"
#include "arm_compute/runtime/NEON/NEFunctions.h"
Expand Down Expand Up @@ -42,7 +43,10 @@ class ACLCommonExecutor : public Executor {
private:
ACLMemoryMap aclMemoryMap;
ACLFunction iFunction = nullptr;
static ACLMemoryInfo initTensorInfo(const MemoryPtr& memoryPtr, ACLTensorAttrs attrs);
static ACLMemoryInfo initTensorInfo(
const MemoryPtr& memoryPtr,
const ACLTensorAttrs attrs,
const QuantizedDataType quantized);
static ACLMemory initTensor(const ACLMemoryInfo& aclMemoryInfo);
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ACLFullyConnectedExecutor::ACLFullyConnectedExecutor(const FCAttrs &attrs, const
}

bool ACLFullyConnectedExecutor::supports(const FCConfig &config) {
VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32, ov::element::i8), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(postOpsNumbers(config) < 2, UNSUPPORTED_NUMBER_OF_POSTOPS);
VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK);
VERIFY(one_of(weiRank(config), 2U, 3U), UNSUPPORTED_SRC_RANK);
Expand Down
41 changes: 38 additions & 3 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,50 @@ inline int axisCast(const std::size_t axis, const std::size_t shapeSize, ACLAxis
}
}

enum class QuantizedDataType {
NONE, // not quantized
QSYMM, // quantized, symmetric
QASYMM // quantized, asymmetric
};

/**
* @brief Return ComputeLibrary DataType that corresponds to the given precision
* @param precision precision to be converted
* @return ComputeLibrary DataType or UNKNOWN if precision is not mapped to DataType
*/
inline arm_compute::DataType precisionToAclDataType(ov::element::Type precision) {
inline arm_compute::DataType precisionToAclDataType(
const ov::element::Type& precision,
const QuantizedDataType quantized = QuantizedDataType::NONE) {
switch (precision) {
case ov::element::i8: return arm_compute::DataType::S8;
case ov::element::u8: return arm_compute::DataType::U8;
case ov::element::i8: {
switch (quantized) {
case QuantizedDataType::QASYMM: {
return arm_compute::DataType::QASYMM8_SIGNED;
}
case QuantizedDataType::NONE: {
return arm_compute::DataType::S8;
}
default: {
return arm_compute::DataType::UNKNOWN;
}
}
}
case ov::element::u8: {
switch (quantized) {
case QuantizedDataType::QSYMM: {
return arm_compute::DataType::QSYMM8;
}
case QuantizedDataType::QASYMM: {
return arm_compute::DataType::QASYMM8;
}
case QuantizedDataType::NONE: {
return arm_compute::DataType::U8;
}
default: {
return arm_compute::DataType::UNKNOWN;
}
}
}
case ov::element::i16: return arm_compute::DataType::S16;
case ov::element::u16: return arm_compute::DataType::U16;
case ov::element::i32: return arm_compute::DataType::S32;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ static const TypeMapping dnnlFCTypeMapping {
static const TypeMapping aclFCTypeMapping {
// {src, wei, bia, dst} pt<src, wei, bias, dst>
{{_f32 | _f16, _any, _any, _any}, pt(bypass(), use<0>(), use<0>(), use<0>())},
{{_i8, _i8, _any, _any}, pt(just<i8>(), just<i8>(), bypass(), just<i32>())},
{{_any, _any, _any, _any}, pt(just<f32>(), just<f32>(), just<f32>(), just<f32>())}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues)),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn({ov::element::i8, ov::element::u8})),
FullyConnectedTransformation::getTestCaseName);
} // namespace
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ typedef std::tuple<
ov::element::Type,
MatMulShapes,
std::string,
ov::pass::low_precision::LayerTransformation::Params> FullyConnectedTransformationParams;
ov::pass::low_precision::LayerTransformation::Params,
ov::element::Type> FullyConnectedTransformationParams;

namespace LayerTestsDefinitions {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ std::string FullyConnectedTransformation::getTestCaseName(const testing::TestPar
MatMulShapes shapes;
std::string targetDevice;
ov::pass::low_precision::LayerTransformation::Params params;
std::tie(precision, shapes, targetDevice, params) = obj.param;
ov::element::Type weightsType;
std::tie(precision, shapes, targetDevice, params, weightsType) = obj.param;

std::ostringstream result;
result <<
get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
shapes.inputB << "_" <<
get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
shapes.inputB << "_" <<
shapes.transposeA << "_" <<
shapes.transposeB;
shapes.transposeB << "_" <<
weightsType;

return result.str();
}
Expand All @@ -36,7 +38,8 @@ void FullyConnectedTransformation::SetUp() {
ov::element::Type precision;
MatMulShapes shapes;
ov::pass::low_precision::LayerTransformation::Params params;
std::tie(precision, shapes, targetDevice, params) = this->GetParam();
ov::element::Type weightsType;
std::tie(precision, shapes, targetDevice, params, weightsType) = this->GetParam();

init_input_shapes({ shapes.inputA, shapes.inputB });

Expand All @@ -45,12 +48,17 @@ void FullyConnectedTransformation::SetUp() {
shapes.inputA,
shapes.inputB,
shapes.transposeA,
shapes.transposeB);
shapes.transposeB,
weightsType == ov::element::i8);
}

TEST_P(FullyConnectedTransformation, CompareWithRefImpl) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
run();

const auto actualPrecision = get_runtime_precision_by_type("FullyConnected");
const auto weightsType = std::get<4>(GetParam());
EXPECT_EQ(actualPrecision, weightsType.to_string());
};

} // namespace LayerTestsDefinitions
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class MatMulFunction {
const ov::PartialShape inputShape1,
const ov::PartialShape inputShape2,
const bool transpose1,
const bool transpose2);
const bool transpose2,
const bool signedOnWeights = false);

static std::shared_ptr<ov::Model> getOriginal(
const ov::element::Type precision,
Expand Down
13 changes: 9 additions & 4 deletions src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,17 @@ std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
const ov::PartialShape inputShape1,
const ov::PartialShape inputShape2,
const bool transpose1,
const bool transpose2) {
const bool transpose2,
const bool signedOnWeights) {
const auto paramNode = std::make_shared<ov::opset1::Parameter>(precision, inputShape1);
const std::vector<size_t> constShapes(inputShape1.rank().get_length(), 1ul);
const auto fakeQuantizeOnAcitvations = ov::test::utils::make_fake_quantize(
paramNode, precision, 256ul, constShapes,
{ 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
const auto fakeQuantizeOnAcitvations = signedOnWeights ?
ov::test::utils::make_fake_quantize(
paramNode, precision, 256ul, constShapes,
{ -128.f / 4.f }, { 127.f / 4.f }, { -128.f / 4.f }, { 127.f / 4.f }) :
ov::test::utils::make_fake_quantize(
paramNode, precision, 256ul, constShapes,
{ 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations");

auto weightsConst = std::make_shared<ov::op::v0::Constant>(
Expand Down

0 comments on commit c2d4099

Please sign in to comment.