Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] [ARM] [INT8] FullyConnected #25171

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ class LP_TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation {
bool transform(TransformationContext &context, ov::pass::pattern::Matcher &m) override;
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;

protected:
virtual void handleDequantization(const std::shared_ptr<ov::opset1::Multiply>& dequantization) const {}
};

} // namespace low_precision
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include "mat_mul.hpp"

namespace ov {
namespace pass {
namespace low_precision {

/**
* @ingroup ov_transformation_common_api
* @brief MatMulWithDequantizationTransformation propagates dequantization operations through MatMul operation and keep dequantisation as is.
*
* For more details about the transformation, refer to
* [MatMulWithDequantizationTransformation](@ref openvino_docs_OV_UG_lpt_MatMulWithDequantizationTransformation) page
* in the OpenVINO Developer Guide.
*/
class LP_TRANSFORMATIONS_API MatMulWithDequantizationTransformation : public MatMulTransformation {
public:
OPENVINO_RTTI("MatMulWithDequantizationTransformation", "0");
MatMulWithDequantizationTransformation(const Params& params = Params());

protected:
void handleDequantization(const std::shared_ptr<ov::opset1::Multiply>& dequantization) const override;
};

} // namespace low_precision
} // namespace pass
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@
#include "low_precision/fake_quantize.hpp"
#include "low_precision/group_convolution.hpp"
#include "low_precision/interpolate.hpp"
#ifdef OPENVINO_ARCH_ARM64
#include "low_precision/mat_mul_with_dequantization.hpp"
#else
#include "low_precision/mat_mul.hpp"
#endif
#include "low_precision/max_pool.hpp"
#include "low_precision/multiply_partial.hpp"
#include "low_precision/mvn.hpp"
Expand Down Expand Up @@ -252,7 +256,11 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr<o
ADD_MATCHER(common, FakeQuantizeTransformation, params)
ADD_MATCHER(common, InterpolateTransformation, params)
ADD_MATCHER(common, GroupConvolutionTransformation, params)
#ifdef OPENVINO_ARCH_ARM64
ADD_MATCHER(common, MatMulWithDequantizationTransformation, params)
#else
ADD_MATCHER(common, MatMulTransformation, params)
#endif
Comment on lines +259 to +263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe such reconfiguration should be on plugin's side, not in common LPT code. I'd suggest do the following in CPU's transformation pipeline instead of the current implementation (for ARM platform): We can keep MatMul transformation as is (and not introduce its inheritor), but add an ARM specific transformation which will match on quantized MatMul + multiply, and mark the multiply with the needed attribute.

We already have additional_main_passes, which allow to add a custom matcher to LPT pipeline.

ADD_MATCHER(common, MaxPoolTransformation, params)
ADD_MATCHER(common, MultiplyPartialTransformation, params)
ADD_MATCHER(common, MVNTransformation, params)
Expand Down
3 changes: 3 additions & 0 deletions src/common/low_precision_transformations/src/mat_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "openvino/pass/pattern/op/or.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"

#include "low_precision/rt_info/bias_attribute.hpp"
#include "low_precision/network_helper.hpp"
#include "openvino/util/log.hpp"
#include "itt.hpp"
Expand Down Expand Up @@ -176,6 +177,8 @@ bool MatMulTransformation::transform(TransformationContext &context, ov::pass::p

updateOutput(context, newMultiply, newMatMul);

handleDequantization(newMultiply);

OPENVINO_DEBUG("LPT: done: ", newMatMul);
return true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "low_precision/mat_mul_with_dequantization.hpp"

#include <memory>
#include "low_precision/rt_info/bias_attribute.hpp"

using namespace ov;
using namespace ov::pass;
using namespace ov::pass::low_precision;

MatMulWithDequantizationTransformation::MatMulWithDequantizationTransformation(const Params& params) : MatMulTransformation(params) {
}

void MatMulWithDequantizationTransformation::handleDequantization(const std::shared_ptr<ov::opset1::Multiply>& dequantization) const {
const auto& dequantization_constant = is_type<opset1::Constant>(dequantization->get_input_node_shared_ptr(1)) ?
as_type<opset1::Constant>(dequantization->get_input_node_ptr(1)) :
as_type<opset1::Constant>(dequantization->get_input_node_ptr(0));
if ((dequantization_constant == nullptr) || (ov::shape_size(dequantization_constant->get_shape()) != 1ull)) {
return;
}

ov::mark_as_bias(dequantization);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BiasAttribute is Add specific (please take a look at BiasAttribute::is_copyable realization), I think its usage for Multiply ops is not good idea. Let's discuss alternative options offline

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "low_precision/common/ie_lpt_exception.hpp"
#include "low_precision/layer_transformation.hpp"
#include "low_precision/network_helper.hpp"
#include "low_precision/rt_info/bias_attribute.hpp"
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
Expand Down Expand Up @@ -1192,7 +1193,7 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt
const std::shared_ptr<ov::opset1::Multiply> multiply = ov::as_type_ptr<ov::opset1::Multiply>(dataNode.get_node_shared_ptr());
std::shared_ptr<ov::opset1::Constant> multiplyConstant;
if (multiply != nullptr) {
if (!FakeQuantizeDequantization::checkShape(multiply)) {
if (!FakeQuantizeDequantization::checkShape(multiply) || ov::marked_as_bias(multiply)) {
return FakeQuantizeDequantization();
}

Expand All @@ -1207,6 +1208,9 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt
std::shared_ptr<ov::opset1::Convert> subtractConvert;
std::shared_ptr<ov::opset1::Constant> subtractConstant;
if (subtract != nullptr) {
if (ov::marked_as_bias(subtract)) {
return FakeQuantizeDequantization();
}
if (!FakeQuantizeDequantization::checkShape(subtract)) {
return FakeQuantizeDequantization(dataNode, nullptr, nullptr, nullptr, nullptr, multiply, multiplyConstant);
}
Expand All @@ -1220,6 +1224,9 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt

const std::shared_ptr<ov::opset1::Convert> convert = ov::as_type_ptr<ov::opset1::Convert>(dataNode.get_node_shared_ptr());
if (convert != nullptr) {
if (ov::marked_as_bias(convert)) {
return FakeQuantizeDequantization();
}
auto el_type = convert->input(0).get_element_type();
auto foundIt = std::find(defaultPrecisions.begin(), defaultPrecisions.end(), el_type);
if (foundIt == defaultPrecisions.end() &&
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/cpu_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ void DnnlMemoryBlock::notifyUpdate() {

StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) :
m_eng(eng), m_pMemDesc(desc) {
OPENVINO_ASSERT(!desc->empty() || (desc->empty() && (data == nullptr)));
if (desc->getPrecision() == element::string) {
OPENVINO_THROW("[CPU] StaticMemory object cannot be created for string data.");
}
Expand All @@ -412,7 +413,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo

m_size = m_pMemDesc->getCurrentMemSize();

if (data) {
if (data || desc->empty()) {
m_pMemBlock = std::make_shared<StaticMemoryBlock>(const_cast<void*>(data), m_size);
} else {
m_pMemBlock = std::make_shared<StaticMemoryBlock>(m_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ static void initACLTensorParams(const MemoryPtr& memoryPtr,
}
}

static std::shared_ptr<arm_compute::TensorInfo> initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) {
std::shared_ptr<arm_compute::TensorInfo> ACLCommonExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout) {
std::shared_ptr<arm_compute::TensorInfo> aclMemoryInfo = nullptr;
if (dataType != arm_compute::DataType::UNKNOWN) {
aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>(
Expand Down Expand Up @@ -72,6 +72,9 @@ bool ACLCommonExecutor::update(const MemoryArgs &memory) {
ACLTypes aclDataType{};
ACLLayouts aclDataLayout{};
for (auto& cpu_mem_ptr : memory) {
if (cpu_mem_ptr.second->getSize() == 0) {
continue;
}
const ACLArgs index = argConvert.at(cpu_mem_ptr.first);
initACLTensorParams(cpu_mem_ptr.second, aclTensorAttrs,
aclMemoryShapes[index],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ class ACLCommonExecutor : public Executor {

protected:
ACLTensorAttrs aclTensorAttrs;

virtual std::shared_ptr<arm_compute::TensorInfo> initTensorInfo(const arm_compute::TensorShape& tensorShape,
const arm_compute::DataType& dataType,
const arm_compute::DataLayout& dataLayout);

private:
ACLTensors aclMemoryTensors;
ACLInfos aclMemoryInfos;
Expand Down
Loading
Loading