[CPU] [ARM] FullyConnected: int8 support

openvinotoolkit · Jul 2, 2024 · 743281f · 743281f
1 parent a58e4a5
commit 743281f
Show file tree

Hide file tree

Showing 80 changed files with 120 additions and 23 deletions.
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.cpp
@@ -38,9 +38,9 @@ static void initACLTensorParams(const MemoryPtr& memoryPtr,
     }
 }
 
-static ACLInfo initTensorInfo(const arm_compute::TensorShape& tensorShape,
-                              const arm_compute::DataType& dataType,
-                              const arm_compute::DataLayout& dataLayout) {
+ACLInfo ACLCommonExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape,
+                                          const arm_compute::DataType& dataType,
+                                          const arm_compute::DataLayout& dataLayout) {
     ACLInfo aclMemoryInfo = nullptr;
     if (dataType != arm_compute::DataType::UNKNOWN) {
         aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>(

diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.hpp
@@ -52,6 +52,10 @@ class ACLCommonExecutor : public Executor {
 protected:
     ACLTensorAttrs aclTensorAttrs;
 
+    virtual ACLInfo initTensorInfo(const arm_compute::TensorShape& tensorShape,
+                                   const arm_compute::DataType& dataType,
+                                   const arm_compute::DataLayout& dataLayout);
+
 private:
     ACLMemoryTensors aclMemoryTensors;
     ACLFunction iFunction = nullptr;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
@@ -32,7 +32,15 @@ ACLFullyConnectedExecutor::ACLFullyConnectedExecutor(const FCAttrs &attrs, const
 }
 
 bool ACLFullyConnectedExecutor::supports(const FCConfig &config) {
-    VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32), UNSUPPORTED_SRC_PRECISIONS);
+    const auto attrs = static_cast<FCAttrs>(config.attrs);
+    if (std::any_of(
+            attrs.dequantizationScales.begin(),
+            attrs.dequantizationScales.end(),
+            [](float value) { return value != 1.f;})) {
+        return false;
+    }
+
+    VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32, ov::element::i8), UNSUPPORTED_SRC_PRECISIONS);
     VERIFY(postOpsNumbers(config) < 2,          UNSUPPORTED_NUMBER_OF_POSTOPS);
     VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK);
     VERIFY(one_of(weiRank(config), 2U, 3U),     UNSUPPORTED_WEI_RANK);
@@ -85,5 +93,27 @@ ACLFunction ACLFullyConnectedExecutor::configureFunction(const ACLMemoryTensors
     return neFC;
 }
 
+ACLInfo ACLFullyConnectedExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape,
+                                                  const arm_compute::DataType& dataType,
+                                                  const arm_compute::DataLayout& dataLayout) {
+    arm_compute::DataType fcDataType;
+    switch (dataType) {
+        case arm_compute::DataType::S8: {
+            fcDataType = arm_compute::DataType::QASYMM8_SIGNED;
+            break;
+        }
+        case arm_compute::DataType::U8: {
+            fcDataType = arm_compute::DataType::QASYMM8;
+            break;
+        }
+        default: {
+            fcDataType = dataType;
+            break;
+        }
+    }
+
+    return ACLCommonExecutor::initTensorInfo(tensorShape, fcDataType, dataLayout);
+}
+
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.hpp
@@ -28,6 +28,12 @@ class ACLFullyConnectedExecutor : public ACLCommonExecutor {
     impl_desc_type implType() const override {
         return impl_desc_type::gemm_acl;
     }
+
+protected:
+    ACLInfo initTensorInfo(const arm_compute::TensorShape& tensorShape,
+                           const arm_compute::DataType& dataType,
+                           const arm_compute::DataLayout& dataLayout) override;
+
 private:
     arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo;
     arm_compute::WeightsInfo weightsInfo;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp
@@ -96,15 +96,50 @@ inline int axisCast(const std::size_t axis, const std::size_t shapeSize, ACLAxis
     }
 }
 
+enum class QuantizedDataType {
+    NONE,   // not quantized
+    QSYMM,  // quantized, symmetric
+    QASYMM  // quantized, asymmetric
+};
+
 /**
 * @brief Return ComputeLibrary DataType that corresponds to the given precision
 * @param precision precision to be converted
 * @return ComputeLibrary DataType or UNKNOWN if precision is not mapped to DataType
 */
-inline arm_compute::DataType precisionToAclDataType(ov::element::Type precision) {
+inline arm_compute::DataType precisionToAclDataType(
+        const ov::element::Type& precision,
+        const QuantizedDataType quantized = QuantizedDataType::NONE) {
     switch (precision) {
-        case ov::element::i8:    return arm_compute::DataType::S8;
-        case ov::element::u8:    return arm_compute::DataType::U8;
+        case ov::element::i8: {
+            switch (quantized) {
+                case QuantizedDataType::QASYMM: {
+                    return arm_compute::DataType::QASYMM8_SIGNED;
+                }
+                case QuantizedDataType::NONE: {
+                    return arm_compute::DataType::S8;
+                }
+                default: {
+                    return arm_compute::DataType::UNKNOWN;
+                }
+            }
+        }
+        case ov::element::u8: {
+            switch (quantized) {
+                case QuantizedDataType::QSYMM: {
+                    return arm_compute::DataType::QSYMM8;
+                }
+                case QuantizedDataType::QASYMM: {
+                    return arm_compute::DataType::QASYMM8;
+                }
+                case QuantizedDataType::NONE: {
+                    return arm_compute::DataType::U8;
+                }
+                default: {
+                    return arm_compute::DataType::UNKNOWN;
+                }
+            }
+        }
         case ov::element::i16:   return arm_compute::DataType::S16;
         case ov::element::u16:   return arm_compute::DataType::U16;
         case ov::element::i32:   return arm_compute::DataType::S32;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp
@@ -78,6 +78,7 @@ static const TypeMapping dnnlFCTypeMapping {
 static const TypeMapping aclFCTypeMapping {
     // {src, wei, bia, dst}              pt<src, wei, bias, dst>
     {{_f32 | _f16, _any, _any, _any}, pt(bypass(), use<0>(), use<0>(), use<0>())},
+    {{_i8, _i8, _any, _any},          pt(just<i8>(), just<i8>(), bypass(), just<i32>())},
     {{_any, _any, _any, _any},        pt(just<f32>(), just<f32>(), just<f32>(), just<f32>())}
 };
 

diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt
@@ -41,6 +41,7 @@ if(NOT (ARM OR AARCH64))
     list(APPEND EXCLUDED_SOURCE_PATHS
          ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/arm
          ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/arm
+         ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/arm
          ${CMAKE_CURRENT_SOURCE_DIR}/utils/arm)
 else()
     list(APPEND EXCLUDED_SOURCE_PATHS
@@ -67,7 +68,8 @@ endif()
 if(NOT X86_64)
     list(APPEND EXCLUDED_SOURCE_PATHS
          ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/x64
-         ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64)
+         ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64
+         ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/x64)
 endif()
 
 ov_add_test_target(

diff --git a/...ations/fully_connected_transformation.cpp → ...common/fully_connected_transformation.cpp b/...ations/fully_connected_transformation.cpp → ...common/fully_connected_transformation.cpp
@@ -44,6 +44,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation,
         ::testing::ValuesIn(netPrecisions),
         ::testing::ValuesIn(shapes),
         ::testing::Values(ov::test::utils::DEVICE_CPU),
-        ::testing::ValuesIn(trasformationParamValues)),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn({ov::element::i8, ov::element::u8})),
     FullyConnectedTransformation::getTestCaseName);
 }  // namespace
diff --git a/...on_transformations/add_transformation.cpp → ...ransformations/x64/add_transformation.cpp b/...on_transformations/add_transformation.cpp → ...ransformations/x64/add_transformation.cpp
diff --git a/.../assign_and_read_value_transformation.cpp → .../assign_and_read_value_transformation.cpp b/.../assign_and_read_value_transformation.cpp → .../assign_and_read_value_transformation.cpp
diff --git a/...mations/batch_to_space_transformation.cpp → ...ons/x64/batch_to_space_transformation.cpp b/...mations/batch_to_space_transformation.cpp → ...ons/x64/batch_to_space_transformation.cpp
diff --git a/..._transformations/clamp_transformation.cpp → ...nsformations/x64/clamp_transformation.cpp b/..._transformations/clamp_transformation.cpp → ...nsformations/x64/clamp_transformation.cpp
diff --git a/...transformations/concat_transformation.cpp → ...sformations/x64/concat_transformation.cpp b/...transformations/concat_transformation.cpp → ...sformations/x64/concat_transformation.cpp
diff --git a/...rmations/concat_with_child_and_output.cpp → ...ions/x64/concat_with_child_and_output.cpp b/...rmations/concat_with_child_and_output.cpp → ...ions/x64/concat_with_child_and_output.cpp
diff --git a/..._with_different_precision_on_children.cpp → ..._with_different_precision_on_children.cpp b/..._with_different_precision_on_children.cpp → ..._with_different_precision_on_children.cpp
diff --git a/...ncat_with_intermediate_transformation.cpp → ...ncat_with_intermediate_transformation.cpp b/...ncat_with_intermediate_transformation.cpp → ...ncat_with_intermediate_transformation.cpp
diff --git a/.../concat_with_neighbors_transformation.cpp → .../concat_with_neighbors_transformation.cpp b/.../concat_with_neighbors_transformation.cpp → .../concat_with_neighbors_transformation.cpp
diff --git a/...ions/concat_with_split_transformation.cpp → .../x64/concat_with_split_transformation.cpp b/...ions/concat_with_split_transformation.cpp → .../x64/concat_with_split_transformation.cpp
diff --git a/...volution_backprop_data_transformation.cpp → ...volution_backprop_data_transformation.cpp b/...volution_backprop_data_transformation.cpp → ...volution_backprop_data_transformation.cpp
diff --git a/...ations/convolution_qdq_transformation.cpp → ...ns/x64/convolution_qdq_transformation.cpp b/...ations/convolution_qdq_transformation.cpp → ...ns/x64/convolution_qdq_transformation.cpp
diff --git a/...formations/convolution_transformation.cpp → ...ations/x64/convolution_transformation.cpp b/...formations/convolution_transformation.cpp → ...ations/x64/convolution_transformation.cpp
diff --git a/...mations/depth_to_space_transformation.cpp → ...ons/x64/depth_to_space_transformation.cpp b/...mations/depth_to_space_transformation.cpp → ...ons/x64/depth_to_space_transformation.cpp
diff --git a/...twise_branch_selection_transformation.cpp → ...twise_branch_selection_transformation.cpp b/...twise_branch_selection_transformation.cpp → ...twise_branch_selection_transformation.cpp
diff --git a/...liminate_fake_quantize_transformation.cpp → ...liminate_fake_quantize_transformation.cpp b/...liminate_fake_quantize_transformation.cpp → ...liminate_fake_quantize_transformation.cpp
diff --git a/...ations/fq_and_avg_pool_transformation.cpp → ...ns/x64/fq_and_avg_pool_transformation.cpp b/...ations/fq_and_avg_pool_transformation.cpp → ...ns/x64/fq_and_avg_pool_transformation.cpp
diff --git a/...ations/fq_and_max_pool_transformation.cpp → ...ns/x64/fq_and_max_pool_transformation.cpp b/...ations/fq_and_max_pool_transformation.cpp → ...ns/x64/fq_and_max_pool_transformation.cpp
diff --git a/..._two_output_branches_with_convolution.cpp → ..._two_output_branches_with_convolution.cpp b/..._two_output_branches_with_convolution.cpp → ..._two_output_branches_with_convolution.cpp
diff --git a/...fq_precision_selection_transformation.cpp → ...fq_precision_selection_transformation.cpp b/...fq_precision_selection_transformation.cpp → ...fq_precision_selection_transformation.cpp
diff --git a/...ion_transformations/fq_transformation.cpp → ...transformations/x64/fq_transformation.cpp b/...ion_transformations/fq_transformation.cpp → ...transformations/x64/fq_transformation.cpp
diff --git a/...fq_with_dq_not_optimal_transformation.cpp → ...fq_with_dq_not_optimal_transformation.cpp b/...fq_with_dq_not_optimal_transformation.cpp → ...fq_with_dq_not_optimal_transformation.cpp
diff --git a/...ormations/fuse_convert_transformation.cpp → ...tions/x64/fuse_convert_transformation.cpp b/...ormations/fuse_convert_transformation.cpp → ...tions/x64/fuse_convert_transformation.cpp
diff --git a/.../fuse_dequantize_to_fq_transformation.cpp → .../fuse_dequantize_to_fq_transformation.cpp b/.../fuse_dequantize_to_fq_transformation.cpp → .../fuse_dequantize_to_fq_transformation.cpp
diff --git a/...use_fq_and_scale_shift_transformation.cpp → ...use_fq_and_scale_shift_transformation.cpp b/...use_fq_and_scale_shift_transformation.cpp → ...use_fq_and_scale_shift_transformation.cpp
diff --git a/...ns/fuse_multiply_to_fq_transformation.cpp → ...64/fuse_multiply_to_fq_transformation.cpp b/...ns/fuse_multiply_to_fq_transformation.cpp → ...64/fuse_multiply_to_fq_transformation.cpp
diff --git a/...ns/fuse_subtract_to_fq_transformation.cpp → ...64/fuse_subtract_to_fq_transformation.cpp b/...ns/fuse_subtract_to_fq_transformation.cpp → ...64/fuse_subtract_to_fq_transformation.cpp
diff --git a/...transformations/gather_transformation.cpp → ...sformations/x64/gather_transformation.cpp b/...transformations/gather_transformation.cpp → ...sformations/x64/gather_transformation.cpp
diff --git a/...n_transformations/gemm_transformation.cpp → ...ansformations/x64/gemm_transformation.cpp b/...n_transformations/gemm_transformation.cpp → ...ansformations/x64/gemm_transformation.cpp
diff --git a/...ions/group_convolution_transformation.cpp → .../x64/group_convolution_transformation.cpp b/...ions/group_convolution_transformation.cpp → .../x64/group_convolution_transformation.cpp
diff --git a/...s/groupconvolution_qdq_transformation.cpp → ...4/groupconvolution_qdq_transformation.cpp b/...s/groupconvolution_qdq_transformation.cpp → ...4/groupconvolution_qdq_transformation.cpp
diff --git a/...formations/interpolate_transformation.cpp → ...ations/x64/interpolate_transformation.cpp b/...formations/interpolate_transformation.cpp → ...ations/x64/interpolate_transformation.cpp
diff --git a/...ransformations/mat_mul_transformation.cpp → ...formations/x64/mat_mul_transformation.cpp b/...ransformations/mat_mul_transformation.cpp → ...formations/x64/mat_mul_transformation.cpp
diff --git a/.../mat_mul_with_constant_transformation.cpp → .../mat_mul_with_constant_transformation.cpp b/.../mat_mul_with_constant_transformation.cpp → .../mat_mul_with_constant_transformation.cpp
diff --git a/...ns/mat_mul_with_optimized_constant_fq.cpp → ...64/mat_mul_with_optimized_constant_fq.cpp b/...ns/mat_mul_with_optimized_constant_fq.cpp → ...64/mat_mul_with_optimized_constant_fq.cpp
diff --git a/...ons/move_fake_quantize_transformation.cpp → ...x64/move_fake_quantize_transformation.cpp b/...ons/move_fake_quantize_transformation.cpp → ...x64/move_fake_quantize_transformation.cpp
diff --git a/...mations/multiply_to_group_convolution.cpp → ...ons/x64/multiply_to_group_convolution.cpp b/...mations/multiply_to_group_convolution.cpp → ...ons/x64/multiply_to_group_convolution.cpp
diff --git a/...ansformations/multiply_transformation.cpp → ...ormations/x64/multiply_transformation.cpp b/...ansformations/multiply_transformation.cpp → ...ormations/x64/multiply_transformation.cpp
diff --git a/...nsformations/multiply_with_one_parent.cpp → ...rmations/x64/multiply_with_one_parent.cpp b/...nsformations/multiply_with_one_parent.cpp → ...rmations/x64/multiply_with_one_parent.cpp
diff --git a/...on_transformations/mvn_transformation.cpp → ...ransformations/x64/mvn_transformation.cpp b/...on_transformations/mvn_transformation.cpp → ...ransformations/x64/mvn_transformation.cpp
diff --git a/...nsformations/normalize_transformation.cpp → ...rmations/x64/normalize_transformation.cpp b/...nsformations/normalize_transformation.cpp → ...rmations/x64/normalize_transformation.cpp
diff --git a/...ecision_transformations/output_layers.cpp → ...ion_transformations/x64/output_layers.cpp b/...ecision_transformations/output_layers.cpp → ...ion_transformations/x64/output_layers.cpp
diff --git a/..._transformations/output_layers_concat.cpp → ...nsformations/x64/output_layers_concat.cpp b/..._transformations/output_layers_concat.cpp → ...nsformations/x64/output_layers_concat.cpp
diff --git a/...ns/output_layers_concat_multi_channel.cpp → ...64/output_layers_concat_multi_channel.cpp b/...ns/output_layers_concat_multi_channel.cpp → ...64/output_layers_concat_multi_channel.cpp
diff --git a/...on_transformations/pad_transformation.cpp → ...ransformations/x64/pad_transformation.cpp b/...on_transformations/pad_transformation.cpp → ...ransformations/x64/pad_transformation.cpp
diff --git a/..._transformations/prelu_transformation.cpp → ...nsformations/x64/prelu_transformation.cpp b/..._transformations/prelu_transformation.cpp → ...nsformations/x64/prelu_transformation.cpp
diff --git a/...s/pull_reshape_through_dequantization.cpp → ...4/pull_reshape_through_dequantization.cpp b/...s/pull_reshape_through_dequantization.cpp → ...4/pull_reshape_through_dequantization.cpp
diff --git a/...mations/recurrent_cell_transformation.cpp → ...ons/x64/recurrent_cell_transformation.cpp b/...mations/recurrent_cell_transformation.cpp → ...ons/x64/recurrent_cell_transformation.cpp
diff --git a/...sformations/reduce_max_transformation.cpp → ...mations/x64/reduce_max_transformation.cpp b/...sformations/reduce_max_transformation.cpp → ...mations/x64/reduce_max_transformation.cpp
diff --git a/...formations/reduce_mean_transformation.cpp → ...ations/x64/reduce_mean_transformation.cpp b/...formations/reduce_mean_transformation.cpp → ...ations/x64/reduce_mean_transformation.cpp
diff --git a/...sformations/reduce_min_transformation.cpp → ...mations/x64/reduce_min_transformation.cpp b/...sformations/reduce_min_transformation.cpp → ...mations/x64/reduce_min_transformation.cpp
diff --git a/...sformations/reduce_sum_transformation.cpp → ...mations/x64/reduce_sum_transformation.cpp b/...sformations/reduce_sum_transformation.cpp → ...mations/x64/reduce_sum_transformation.cpp
diff --git a/...n_transformations/relu_transformation.cpp → ...ansformations/x64/relu_transformation.cpp b/...n_transformations/relu_transformation.cpp → ...ansformations/x64/relu_transformation.cpp
diff --git a/...ransformations/reshape_transformation.cpp → ...formations/x64/reshape_transformation.cpp b/...ransformations/reshape_transformation.cpp → ...formations/x64/reshape_transformation.cpp
diff --git a/...tions/shuffle_channels_transformation.cpp → ...s/x64/shuffle_channels_transformation.cpp b/...tions/shuffle_channels_transformation.cpp → ...s/x64/shuffle_channels_transformation.cpp
diff --git a/...mations/space_to_batch_transformation.cpp → ...ons/x64/space_to_batch_transformation.cpp b/...mations/space_to_batch_transformation.cpp → ...ons/x64/space_to_batch_transformation.cpp
diff --git a/..._transformations/split_transformation.cpp → ...nsformations/x64/split_transformation.cpp b/..._transformations/split_transformation.cpp → ...nsformations/x64/split_transformation.cpp
diff --git a/...ransformations/squeeze_transformation.cpp → ...formations/x64/squeeze_transformation.cpp b/...ransformations/squeeze_transformation.cpp → ...formations/x64/squeeze_transformation.cpp
diff --git a/...rmations/strided_slice_transformation.cpp → ...ions/x64/strided_slice_transformation.cpp b/...rmations/strided_slice_transformation.cpp → ...ions/x64/strided_slice_transformation.cpp
diff --git a/...ons/subtract_multiply_to_multiply_add.cpp → ...x64/subtract_multiply_to_multiply_add.cpp b/...ons/subtract_multiply_to_multiply_add.cpp → ...x64/subtract_multiply_to_multiply_add.cpp
diff --git a/...ansformations/subtract_transformation.cpp → ...ormations/x64/subtract_transformation.cpp b/...ansformations/subtract_transformation.cpp → ...ormations/x64/subtract_transformation.cpp
diff --git a/...transpose_after_matmul_transformation.cpp → ...transpose_after_matmul_transformation.cpp b/...transpose_after_matmul_transformation.cpp → ...transpose_after_matmul_transformation.cpp
diff --git a/...nsformations/transpose_transformation.cpp → ...rmations/x64/transpose_transformation.cpp b/...nsformations/transpose_transformation.cpp → ...rmations/x64/transpose_transformation.cpp
diff --git a/...nsformations/unsqueeze_transformation.cpp → ...rmations/x64/unsqueeze_transformation.cpp b/...nsformations/unsqueeze_transformation.cpp → ...rmations/x64/unsqueeze_transformation.cpp
diff --git a/...mations/variadic_split_transformation.cpp → ...ons/x64/variadic_split_transformation.cpp b/...mations/variadic_split_transformation.cpp → ...ons/x64/variadic_split_transformation.cpp
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -396,8 +396,10 @@ std::vector<std::string> disabledTestPatterns() {
     retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.Inference.*)");
     // Issue 88764, 91647, 108802: accuracy issue
     retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)");
+#if !defined(OPENVINO_ARCH_ARM64)
     // int8 / code-generation specific
     retVector.emplace_back(R"(smoke_LPT.*)");
+#endif
     // Compressed weights are not supported
     retVector.emplace_back(R"(smoke_MatMulCompressedWeights.*)");
     retVector.emplace_back(R"(smoke_MatMulSharedCompressedWeights.*)");

diff --git a/src/plugins/intel_cpu/thirdparty/ComputeLibrary b/src/plugins/intel_cpu/thirdparty/ComputeLibrary
diff --git a/...l/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/...l/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp
@@ -45,6 +45,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation,
         ::testing::ValuesIn(netPrecisions),
         ::testing::ValuesIn(shapes),
         ::testing::Values(ov::test::utils::DEVICE_GPU),
-        ::testing::ValuesIn(trasformationParamValues)),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn({ov::element::i8, ov::element::u8})),
     FullyConnectedTransformation::getTestCaseName);
 }  // namespace
diff --git a/...al/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp b/...al/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp
@@ -20,7 +20,8 @@ typedef std::tuple<
     ov::element::Type,
     MatMulShapes,
     std::string,
-    ov::pass::low_precision::LayerTransformation::Params> FullyConnectedTransformationParams;
+    ov::pass::low_precision::LayerTransformation::Params,
+    ov::element::Type> FullyConnectedTransformationParams;
 
 namespace LayerTestsDefinitions {
 

diff --git a/...tional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp b/...tional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp
@@ -20,14 +20,16 @@ std::string FullyConnectedTransformation::getTestCaseName(const testing::TestPar
     MatMulShapes shapes;
     std::string targetDevice;
     ov::pass::low_precision::LayerTransformation::Params params;
-    std::tie(precision, shapes, targetDevice, params) = obj.param;
+    ov::element::Type weightsType;
+    std::tie(precision, shapes, targetDevice, params, weightsType) = obj.param;
 
     std::ostringstream result;
     result <<
-           get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
-           shapes.inputB << "_" <<
+        get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
+        shapes.inputB << "_" <<
         shapes.transposeA << "_" <<
-        shapes.transposeB;
+        shapes.transposeB << "_" <<
+        weightsType;
 
     return result.str();
 }
@@ -36,7 +38,8 @@ void FullyConnectedTransformation::SetUp() {
     ov::element::Type precision;
     MatMulShapes shapes;
     ov::pass::low_precision::LayerTransformation::Params params;
-    std::tie(precision, shapes, targetDevice, params) = this->GetParam();
+    ov::element::Type weightsType;
+    std::tie(precision, shapes, targetDevice, params, weightsType) = this->GetParam();
 
     init_input_shapes({ shapes.inputA, shapes.inputB });
 
@@ -45,12 +48,17 @@ void FullyConnectedTransformation::SetUp() {
         shapes.inputA,
         shapes.inputB,
         shapes.transposeA,
-        shapes.transposeB);
+        shapes.transposeB,
+        weightsType == ov::element::i8);
 }
 
 TEST_P(FullyConnectedTransformation, CompareWithRefImpl) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED();
     run();
+
+    const auto actualPrecision = get_runtime_precision_by_type("FullyConnected");
+    const auto weightsType = std::get<4>(GetParam());
+    EXPECT_EQ(actualPrecision, weightsType.to_string());
 };
 
 }  // namespace LayerTestsDefinitions
diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp
@@ -27,7 +27,8 @@ class MatMulFunction {
         const ov::PartialShape inputShape1,
         const ov::PartialShape inputShape2,
         const bool transpose1,
-        const bool transpose2);
+        const bool transpose2,
+        const bool signedOnWeights = false);
 
     static std::shared_ptr<ov::Model> getOriginal(
         const ov::element::Type precision,

diff --git a/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp b/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp
@@ -54,12 +54,17 @@ std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
     const ov::PartialShape inputShape1,
     const ov::PartialShape inputShape2,
     const bool transpose1,
-    const bool transpose2) {
+    const bool transpose2,
+    const bool signedOnWeights) {
     const auto paramNode = std::make_shared<ov::opset1::Parameter>(precision, inputShape1);
     const std::vector<size_t> constShapes(inputShape1.rank().get_length(), 1ul);
-    const auto fakeQuantizeOnAcitvations = ov::test::utils::make_fake_quantize(
-        paramNode, precision, 256ul, constShapes,
-        { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
+    const auto fakeQuantizeOnAcitvations = signedOnWeights ?
+            ov::test::utils::make_fake_quantize(
+                paramNode, precision, 256ul, constShapes,
+                { -128.f / 4.f }, { 127.f / 4.f }, { -128.f / 4.f }, { 127.f / 4.f }) :
+            ov::test::utils::make_fake_quantize(
+                paramNode, precision, 256ul, constShapes,
+                { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
     fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations");
 
     auto weightsConst = std::make_shared<ov::op::v0::Constant>(