Merge branch 'master' into sw_plugins

sbalandi · Mar 19, 2024 · c73e944 · c73e944
2 parents afceb9d + 9f08e40
commit c73e944
Show file tree

Hide file tree

Showing 24 changed files with 148 additions and 1,880 deletions.
diff --git a/docs/dev/build_mac_arm.md b/docs/dev/build_mac_arm.md
@@ -40,7 +40,6 @@ The software was validated on:
   % # update pip and setuptools to newer versions
   % python3 -m pip install -U pip
   % python3 -m pip install -r <openvino source tree>/src/bindings/python/requirements.txt
-  % python3 -m pip install -r <openvino source tree>/src/bindings/python/src/compatibility/openvino/requirements-dev.txt
   ```
   Additional install requirements (after OpenVINO repo clone) in order to build OpenVINO Python API and Development tools as wheel packages:
   ```sh

diff --git a/docs/dev/build_mac_intel_cpu.md b/docs/dev/build_mac_intel_cpu.md
@@ -37,7 +37,6 @@ The software was validated on:
   % # update pip and setuptools to newer versions
   % python3 -m pip install -U pip
   % python3 -m pip install -r <openvino source tree>/src/bindings/python/requirements.txt
-  % python3 -m pip install -r <openvino source tree>/src/bindings/python/src/compatibility/openvino/requirements-dev.txt
   ```
   Additional install requirements (after OpenVINO repo clone) in order to build OpenVINO Python API and Development tools as wheel packages:
   ```sh

diff --git a/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp b/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp
@@ -37,19 +37,23 @@ bool relax_hc_reshape_followed_by_matmul(const ov::pass::pattern::PatternValueMa
         return false;
 
     const auto idx = reshape_is_A_input ? (matmul->get_transpose_b() ? -1 : -2) : (matmul->get_transpose_a() ? -2 : -1);
-    const auto C = std::make_shared<ov::op::v8::Gather>(std::make_shared<ov::op::v3::ShapeOf>(shape_source),
-                                                        ov::op::v0::Constant::create(ov::element::i64, {1}, {idx}),
-                                                        ov::op::v0::Constant::create(ov::element::i64, {}, {0}));
+    const auto in_C_0 = std::make_shared<ov::op::v3::ShapeOf>(shape_source);
+    const auto in_C_1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {idx});
+    const auto in_C_2 = ov::op::v0::Constant::create(ov::element::i64, {}, {0});
+    const auto C = std::make_shared<ov::op::v8::Gather>(in_C_0, in_C_1, in_C_2);
     const auto N = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
     const auto pattern_vector = reshape_is_A_input
                                     ? (matmul->get_transpose_a() ? ov::OutputVector({C, N}) : ov::OutputVector({N, C}))
                                     : (matmul->get_transpose_b() ? ov::OutputVector({N, C}) : ov::OutputVector({C, N}));
     const auto new_reshape_pattern = std::make_shared<ov::op::v0::Concat>(pattern_vector, 0);
 
     auto reshape_pattern = pattern_to_output.at(reshape_pattern_label).get_node_shared_ptr();
-    new_reshape_pattern->set_friendly_name(reshape_pattern->get_friendly_name());
-    copy_runtime_info(reshape_pattern, new_reshape_pattern);
-    replace_node(reshape_pattern, new_reshape_pattern);
+    ov::NodeVector nodes_to_copy_rt_info{new_reshape_pattern, C, N, in_C_0, in_C_1, in_C_2};
+    copy_runtime_info(reshape_pattern, nodes_to_copy_rt_info);
+
+    auto reshape_input = pattern_to_output.at(reshape_label).get_node_shared_ptr()->input(1);
+    reshape_input.replace_source_output(new_reshape_pattern);
+
     return true;
 }
 

diff --git a/src/frontends/tensorflow_common/src/op/reciprocal.cpp b/src/frontends/tensorflow_common/src/op/reciprocal.cpp
@@ -3,6 +3,12 @@
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/concat.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/negative.hpp"
 #include "openvino/op/power.hpp"
 #include "utils.hpp"
 
@@ -16,8 +22,34 @@ namespace op {
 
 OutputVector translate_reciprocal_op(const NodeContext& node) {
     // computes element-wise 1/x, where x - input
-    default_op_checks(node, 1, {"Reciprocal"});
+    default_op_checks(node, 1, {"Reciprocal"}, true);
     auto x = node.get_input(0);
+    auto complex_type_mark_x = as_type_ptr<ComplexTypeMark>(x.get_node_shared_ptr());
+    if (complex_type_mark_x) {
+        x = complex_type_mark_x->input_value(0);
+        auto minus_one = make_shared<v0::Constant>(element::i32, Shape{1}, -1);
+        auto two = create_same_type_const_scalar<int32_t>(x, 2);
+        auto gather_index_real = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+        auto gather_index_imag = make_shared<v0::Constant>(element::i32, Shape{1}, 1);
+        auto x_real = make_shared<v8::Gather>(x, gather_index_real, minus_one)->output(0);
+        auto x_imag = make_shared<v8::Gather>(x, gather_index_imag, minus_one)->output(0);
+
+        // compute (a^2+b^2)
+        auto real_squared_norm = make_shared<v1::Power>(x_real, two);
+        auto img_squared_norm = make_shared<v1::Power>(x_imag, two);
+        auto squared_norm = make_shared<v1::Add>(real_squared_norm, img_squared_norm);
+
+        // compute 1/(a+bi) = (a-bi)/(a^2+b^2)
+        auto complex_reciprocal = make_shared<v1::Divide>(
+            make_shared<v0::Concat>(OutputVector{x_real, make_shared<ov::op::v0::Negative>(x_imag)}, -1),
+            squared_norm);
+        auto complex_result =
+            make_shared<ComplexTypeMark>(complex_reciprocal, complex_type_mark_x->get_complex_part_type());
+        set_node_name(node.get_name(), complex_reciprocal);
+        return {complex_result};
+    }
+
+    // For real numbers, computes element-wise 1/x, where x - input
     auto minus_one_const = create_same_type_const_scalar<int32_t>(x, -1);
     auto reciprocal = make_shared<v1::Power>(x, minus_one_const);
     set_node_name(node.get_name(), reciprocal);

diff --git a/src/inference/tests/functional/matmul_sr_tests.cpp b/src/inference/tests/functional/matmul_sr_tests.cpp
@@ -12,11 +12,14 @@
 #include "common_test_utils/ov_test_utils.hpp"
 #include "common_test_utils/test_common.hpp"
 #include "openvino/op/add.hpp"
+#include "openvino/op/concat.hpp"
 #include "openvino/op/constant.hpp"
+#include "openvino/op/gather.hpp"
 #include "openvino/op/matmul.hpp"
 #include "openvino/op/parameter.hpp"
 #include "openvino/op/reduce_max.hpp"
 #include "openvino/op/reshape.hpp"
+#include "openvino/op/shape_of.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/variadic_split.hpp"
 #include "openvino/pass/manager.hpp"
@@ -390,3 +393,53 @@ TEST_F(TransformationTestsF, SmartReshapeReshapeBMatMulSeveralConsumers) {
     model = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{data_A, data_B});
     manager.register_pass<ov::pass::ReshapeBMatMul>();
 }
+
+TEST_F(TransformationTestsF, SmartReshape_ReshapeAMatMul_ReshapeInputSeveralConsumers) {
+    // Const will be reused as shared input for reshape and add operation
+    // param      param        const
+    //   |          |           | |
+    //   |          +-----------+ |
+    //   |                 |      |
+    //   |              Reshape   |
+    //   |                 |      |
+    //   +-----------------+      |
+    //             |              |
+    //           MatMul           |
+    //             |              |
+    //             +--------------+
+    //            Add
+    //             |
+    //           Result
+    std::shared_ptr<ov::Model> f(nullptr), f_ref(nullptr);
+    {
+        auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, {2}, {1, 10});
+        auto data_reshape = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{2, 5});
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(data_reshape, reshape_const, false);
+        auto data_matmul = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{10, 1});
+        auto matmul = std::make_shared<ov::op::v0::MatMul>(reshape, data_matmul);
+        auto add = std::make_shared<ov::op::v1::Add>(matmul, reshape_const);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{add}, ov::ParameterVector{data_matmul, data_reshape});
+        ;
+        manager.register_pass<ov::pass::ReshapeAMatMul>();
+    }
+    {
+        auto reshape_param = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{2, 5});
+        auto shape_of_param = std::make_shared<ov::op::v0::Parameter>(ov::element::i32, ov::Shape{10, 1});
+        auto shape_of = std::make_shared<ov::op::v3::ShapeOf>(shape_of_param, ov::element::i64);
+        auto const_gather_1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {-2});
+        auto const_gather_2 = ov::op::v0::Constant::create(ov::element::i64, {}, {0});
+        auto gather = std::make_shared<ov::op::v8::Gather>(shape_of, const_gather_1, const_gather_2);
+        auto const_concat_1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
+        auto concat = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{const_concat_1, gather}, 0);
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(reshape_param, concat, false);
+        auto matmul = std::make_shared<ov::op::v0::MatMul>(reshape, shape_of_param);
+        auto const_add_1 = ov::op::v0::Constant::create(ov::element::i32, {2}, {1, 10});
+        auto add = std::make_shared<ov::op::v1::Add>(matmul, const_add_1);
+
+        model_ref =
+            std::make_shared<ov::Model>(ov::NodeVector{add}, ov::ParameterVector{reshape_param, shape_of_param});
+        ov::pass::Manager m;
+        m.run_passes(model_ref);
+    }
+}
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
@@ -369,10 +369,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
         if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
             inferencePrecision = ov::element::f32;
 #if defined(OV_CPU_ARM_ENABLE_FP16)
-            // fp16 precision is used as default precision on ARM for non-convolution networks
-            // fp16 ACL convolution is slower than fp32
-            if (modelType != ModelType::CNN)
-                inferencePrecision = ov::element::f16;
+            inferencePrecision = ov::element::f16;
 #else
             if (mayiuse(avx512_core_bf16))
                 inferencePrecision = ov::element::bf16;

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp
@@ -139,26 +139,16 @@ struct format {
         oyix,
         oxiy,
         os_iyx_osv16,                                 ///< format used only for convolution weights
-        o_is_yx_isv2,                                 ///< format used only for convolution weights
         o_is_yx_isv4,                                 ///< format used only for convolution weights
         o_is_yx_isv16,                                ///< format used only for convolution weights
-        o_is_zyx_isv16,                               ///< format used only for convolution weights
-        os_yxi_osv16,                                 ///< format used only for convolution weights
-        os_is_yx_osv16_isv2,                          ///< format used only for convolution weights
         os_is_yx_osv16_isv16,                         ///< format used for convolution i8 weights
         os_is_zyx_osv32_isv16,
         os_is_zyx_osv64_isv16,
         os_zyxi_osv16,                                ///< format used for weights for 3D convolution
         os_is_yx_isv16_osv16,                         ///< format used for blocked convolution
         os_is_zyx_isv16_osv16,                        ///< format used for weights for blocked 3D convolution
         is_os_zyx_isv16_osv16,                        ///< format used for weights for blocked 3D deconvolution
-        is_os_yx_osv8_isv4,                           ///< format used for weights for blocked deconvolution
         is_os_yx_isv16_osv16,                         ///< format used for weights for blocked deconvolution
-        is_os_yx_isv16_osv8,                          ///< format used for weights for blocked deconvolution
-        is_os_yx_isv16_osv4,                          ///< format used for weights for blocked deconvolution
-        is_os_yx_isv16_osv2,                          ///< format used for weights for blocked deconvolution
-        os_is_yx_isa8_osv16_isv2,                     ///< format used for weights for blocked 2D onednn convolution
-        os_is_zyx_isa8_osv16_isv2,                    ///< format used for weights for blocked 3D onednn convolution
         os_is_yx_isv8_osv16_isv2,                     ///< format used for weights for blocked 2D convolution
         os_is_zyx_isv8_osv16_isv2,                    ///< format used for weights for blocked 3D convolution
                                                       ///< os - output feature maps slice, i - input feature maps,
@@ -187,66 +177,24 @@ struct format {
         os_is_zyx_isa8_osv8_isv4,                     ///< format for weights for MMAD convolution
         os_is_yx_isa8_osv16_isv4,                     ///< format for weights for fully connected MMAD
         os_is_zyx_isa8_osv16_isv4,                    ///< format for weights for fully connected MMAD
-        os_is_yx_isa8_osv8_isv4_swizzled_by_4,        ///< format for weights for MMAD convolution
         os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4,   ///< format for weights for MMAD fsv32 convolution
         os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4,  ///< format for weights for MMAD fsv32 convolution
-        os_is_yx_osa4_isa8_osv8_isv2,                 ///< format for weights for MMAD fsv32 convolution
-        os_is_zyx_osa4_isa8_osv8_isv2,                ///< format for weights for MMAD fsv32 convolution
         os_is_zyx_osa4_isa8_osv8_isv4,                ///< format for weights for MMAD fsv32 convolution
         os_is_yx_osa4_isa8_osv8_isv4,                 ///< format for weights for MMAD fsv32 convolution
-        os_is_yx_osa2_isa8_osv8_isv2,
-        os_is_zyx_osa2_isa8_osv8_isv2,
-        os_is_yx_osa2_isa8_osv16_isv2,
-        os_is_yx_osa2_isa8_osv16_isv4,
-        os_is_yx_isa8_osv8_isv2,
-        is_os_yx_isa8_osv8_isv2,
-        is_os_yx_isa8_osv8_isv4,
-        is_os_yx_osa8_isv16_osv4,
-        os_is_zyx_isa8_osv8_isv2,
-        is_os_zyx_isa8_osv8_isv2,
-        is_os_zyx_isa8_osv8_isv4,
-        is_os_yx_isa2_osa8_isv8_osv2,
-        is_os_yx_isa4_osa8_isv8_osv4,
-        is_os_yx_osa4_isa8_osv8_isv4,
-        is_o_yx_isv32,                                ///< format for weights for 1x1 MMAD convolutions
-        is_o32_yx_isv32_swizzled_by_4,                ///< format for weights for 1x1 MMAD convolutions
-        os_is_y_x8_osv8_isv4,                         ///< format for weights for 1x1 MMAD convolutions
-        os_is_y_x8_osv8_isv4_swizzled_by_4,           ///< format for weights for 1x1 MMAD convolutions
         os_is_yx_osv16_isv4,                          ///< format for weights for IMAD convolutions
         os_is_yx_osv8_isv4,                           ///< format used for convolution i8 weights
-        os_is_zyx_osv8_isv4,                          ///< format used for convolution i8 weights
-        os_is_yx_osv8_isv2,                           ///< format used for convolution fp16 weights
-        os_is_zyx_osv8_isv2,                          ///< format used for convolution fp16 weights
         os_is_zyx_osv16_isv16,                        ///< format for weights for IMAD convolutions
         os_is_yx_osv32_isv4_swizzled_by_2,            ///< format for weights for IMAD convolutions
         os_is_yx_osv32_isv4,                          ///< format for weights for IMAD convolutions
         os_is_zyx_osv32_isv4,                         ///< format for weights for IMAD convolutions
-        os_is_osv32_isv32_swizzled_by_4,              ///< format for weights for 1x1 IMAD convolution
         os_iyx_osv8,
         os_iyx_osv32__ai32,
         iy_xs_os_xsv2_osv8__ao32,
         iy_xs_os_xsv2_osv16__ao32,
         i_yxs_os_yxsv2_osv16,
-        os_i_yxs_osv4_yxsv4,
         os_i_osv16,                                   ///< format used only for fully connected weights
         os_i_osv16__ai8,                              ///< format used only for fully connected weights
         os_i_osv8__ai8,                               ///< format used only for fully connected weights
-        os_y_is_x_osv8_isv2,
-        os_y_is_x_osv8_isv4,
-        os_y_is_x_osv16_isv4,
-        os_yx_is_osv8_isv2,
-        os_yx_is_osv8_isv4,
-        os_yx_is_osv16_isv2,
-        os_zyx_is_osv8_isv2,
-        os_zyx_is_osv8_isv4,
-        os_zy_is_x_osv8_isv2,
-        os_zy_is_x_osv8_isv4,
-        os_is_yx_osv4_isv16,
-        os_is_yx_osv4_isv2,
-        os_is_yx_osv8_isv16,
-        os_is_yx_osv2_isv4,
-        os_is_yx_osv2_isv16,
-        os_is_yx_osv2_isv32,
 
         goiyx,                                        ///< format used for weights for 2D convolution
         gioyx,                                        ///< format used for weights for 2D deconvolution
@@ -256,54 +204,29 @@ struct format {
         g_os_iyx_osv8,                                ///< format used for weights for 2D convolution
         g_os_iyx_osv16,                               ///< format used for weights for 2D convolution
         g_os_iyx_osv32,                               ///< format used for weights for 2D convolution
-        gs_oiyx_gsv8,                                 ///< format used for weights for 2D convolution
         gs_oiyx_gsv16,                                ///< format used for weights for 2D convolution
-        gs_oizyx_gsv8,                                ///< format used for weights for 3D convolution
         gs_oizyx_gsv16,                               ///< format used for weights for 3D convolution
         gs_oiyx_gsv32,                                ///< format used for weights for 2D convolution
-        gs_oizyx_gsv32,                               ///< format used for weights for 3D convolution
         g_is_os_zyx_isv16_osv16,                      ///< format used for grouped weights for blocked 3D deconvolution
         g_os_is_yx_osv16_isv4,
         g_os_is_zyx_osv16_isv16,
         g_is_os_yx_isv16_osv16,
-        g_os_is_yx_isa8_osv8_isv2,
-        g_os_is_yx_isa8_osv8_isv4,
         g_os_is_zyx_isv8_osv16_isv2,
         g_os_is_yx_isv8_osv16_isv2,
         g_os_is_zyx_isv16_osv16,
-        g_os_zy_is_x_osv8_isv2,
-        g_os_zy_is_x_osv8_isv4,
-        g_os_zyx_is_osv8_isv2,
-        g_os_zyx_is_osv8_isv4,
         g_os_zyx_is_osv16_isv4,                       ///< format for imad deconvolution
         g_os_zyx_is_osv16_isv16,                      ///< format for imad deconvolution
         g_os_zyx_is_osv16_isv32,                      ///< format for imad deconvolution
         g_os_zyx_is_osv32_isv4,                       ///< format for imad deconvolution
         g_os_zyx_is_osv32_isv16,                      ///< format for imad deconvolution
         g_os_zyx_is_osv32_isv32,                      ///< format for imad deconvolution
         g_os_is_yx_isv16_osv16,
-        g_os_is_yx_osv8_isv2,
-        g_os_is_yx_osv8_isv4,
         gs_oi_yxs_gsv4_yxsv4,
         gs_oi_yxs_gsv16_yxsv4,
         gs_oi_yxs_gsv32_yxsv4,
         gi_yxs_os_yxsv2_osv16,
         giy_xs_os_xsv2_osv8__ao32,
         giy_xs_os_xsv2_osv16__ao32,
-        g_os_is_yx_osa2_isa8_osv8_isv2,
-        g_os_is_yx_osa4_isa8_osv8_isv4,
-        g_os_is_yx_osa4_isa8_osv8_isv2,
-        g_os_is_yx_osa2_isa8_osv16_isv2,
-        g_os_is_yx_osa2_isa8_osv16_isv4,
-        g_os_is_zyx_osa4_isa8_osv8_isv2,
-        g_os_is_zyx_osa4_isa8_osv8_isv4,
-        g_os_is_zyx_isa8_osv8_isv2,
-        g_os_is_zyx_isa8_osv8_isv4,
-        g_os_yx_is_osv8_isv2,
-        g_os_yx_is_osv8_isv4,
-        g_os_yx_is_osv16_isv2,
-        g_os_y_is_x_osv8_isv2,
-        g_os_y_is_x_osv8_isv4,
 
         format_num,  ///< number of format types
         custom,      ///< means that this format is created based on custom format traits and may have no corresponding label