Merge branch 'master' into swFix

sbalandi · Mar 4, 2024 · 29166d5 · 29166d5
2 parents b89d3d2 + e6dc086
commit 29166d5
Show file tree

Hide file tree

Showing 38 changed files with 1,338 additions and 674 deletions.
diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
@@ -21,7 +21,7 @@ jobs:
           lfs: 'true'
 
       - name: Install apt-get dependencies
-        uses: awalsh128/[email protected].1
+        uses: awalsh128/[email protected].2
         with:
           packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9
           version: 3.0

diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml
@@ -30,7 +30,7 @@ jobs:
           submodules: 'true'
 
       - name: Install OpenCL
-        uses: awalsh128/[email protected].1
+        uses: awalsh128/[email protected].2
         if: runner.os == 'Linux'
         with:
           packages: ocl-icd-opencl-dev opencl-headers

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -318,7 +318,7 @@ jobs:
 
   Conformance:
     needs: [ Build, Smart_CI ]
-    timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 30 }}
+    timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 20 }}
     defaults:
       run:
         shell: bash

diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts
@@ -36,6 +36,7 @@ interface Core {
     modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Promise<Model>;
   readModelSync(modelPath: string, weightsPath?: string): Model;
   readModelSync(modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Model;
+  getAvailableDevices(): string[];
 }
 interface CoreConstructor {
   new(): Core;
@@ -81,7 +82,6 @@ interface InferRequest {
   inferAsync(inputData: { [inputName: string]: Tensor}
     | Tensor[] ): Promise<{ [outputName: string] : Tensor}>;
   getCompiledModel(): CompiledModel;
-  getAvailableDevices(): string[];
 }
 
 type Dimension = number | [number, number];

diff --git a/src/core/reference/include/openvino/reference/concat.hpp b/src/core/reference/include/openvino/reference/concat.hpp
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include "openvino/core/shape.hpp"
+#include "openvino/core/type/element_type.hpp"
 
 namespace ov {
 namespace reference {
@@ -15,14 +16,8 @@ void concat(const std::vector<const char*>& args,
             const std::vector<Shape>& in_shapes,
             const Shape& out_shape,
             int64_t concatenation_axis,
-            size_t elem_size);
-
-void concat(const std::vector<const std::string*>& args,
-            std::string* out,
-            const std::vector<Shape>& in_shapes,
-            const Shape& out_shape,
-            int64_t concatenation_axis,
-            size_t);
+            size_t elem_size,
+            const ov::element::Type& elem_type = ov::element::Type_t::undefined);
 
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/reference/include/openvino/reference/grid_sample.hpp b/src/core/reference/include/openvino/reference/grid_sample.hpp
@@ -141,8 +141,8 @@ DATA_ET bilinear(const DATA_ET* data,
     const auto x_d = denormalize(x_n, data_shape[3]);
     const auto y_topleft = std::floor(y_d);
     const auto x_topleft = std::floor(x_d);
-    const auto dy = y_d - y_topleft;
-    const auto dx = x_d - x_topleft;
+    const auto dy = static_cast<DATA_ET>(y_d - y_topleft);
+    const auto dx = static_cast<DATA_ET>(x_d - x_topleft);
     const auto v00 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft));
     const auto v01 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft + 1));
     const auto v10 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft + 1), static_cast<long>(x_topleft));

diff --git a/src/core/reference/src/op/concat.cpp b/src/core/reference/src/op/concat.cpp
@@ -17,60 +17,51 @@ std::vector<size_t> calculate_shape_sizes(const std::vector<Shape>& in_shapes) {
     });
     return sizes;
 }
+
+void copy_elements(const char* arg,
+                   char* out,
+                   size_t in_offset,
+                   size_t out_offset,
+                   size_t num_of_elements,
+                   size_t elem_size) {
+    std::memcpy(out + (out_offset * elem_size), arg + (in_offset * elem_size), num_of_elements * elem_size);
+}
+
+void copy_string_elements(const char* arg,
+                          char* out,
+                          size_t in_offset,
+                          size_t out_offset,
+                          size_t num_of_elements,
+                          size_t) {
+    const auto src_begin = std::next(reinterpret_cast<const std::string*>(arg), in_offset);
+    const auto out_ptr = std::next(reinterpret_cast<std::string*>(out), out_offset);
+    std::copy_n(src_begin, num_of_elements, out_ptr);
+}
 }  // namespace
 
 void concat(const std::vector<const char*>& args,
             char* out,
             const std::vector<Shape>& in_shapes,
             const Shape& out_shape,
             int64_t concatenation_axis,
-            size_t elem_size) {
-    size_t steps = 1;
-    for (int i = 0; i < concatenation_axis; ++i) {
-        steps *= out_shape[i];
-    }
-
+            size_t elem_size,
+            const ov::element::Type& elem_type) {
+    const auto steps = shape_size(out_shape.begin(), out_shape.begin() + concatenation_axis);
     const auto& shape_sizes = calculate_shape_sizes(in_shapes);
 
-    size_t out_offset = 0;
-    for (size_t step = 0; step < steps; ++step) {
-        for (size_t in_index = 0; in_index < args.size(); ++in_index) {
-            const size_t size = shape_sizes[in_index] / steps;
-            const size_t in_offset = step * size;
-
-            std::memcpy(&out[out_offset * elem_size], &args[in_index][in_offset * elem_size], size * elem_size);
-
-            out_offset += size;
-        }
-    }
-}
-
-void concat(const std::vector<const std::string*>& args,
-            std::string* out,
-            const std::vector<Shape>& in_shapes,
-            const Shape& out_shape,
-            int64_t concatenation_axis,
-            size_t) {
-    size_t steps = 1;
-    for (int i = 0; i < concatenation_axis; ++i) {
-        steps *= out_shape[i];
-    }
-    const auto& shape_sizes = calculate_shape_sizes(in_shapes);
+    const auto copy_func = elem_type == ov::element::string ? copy_string_elements : copy_elements;
 
     size_t out_offset = 0;
     for (size_t step = 0; step < steps; ++step) {
         for (size_t in_index = 0; in_index < args.size(); ++in_index) {
             const size_t size = shape_sizes[in_index] / steps;
             const size_t in_offset = step * size;
 
-            const auto src_begin = std::next(args[in_index], in_offset);
-            const auto out_ptr = std::next(out, out_offset);
-            std::copy_n(src_begin, size, out_ptr);
+            copy_func(args[in_index], out, in_offset, out_offset, size, elem_size);
 
             out_offset += size;
         }
     }
 }
-
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp
@@ -52,43 +52,35 @@ std::shared_ptr<Node> Concat::clone_with_new_inputs(const OutputVector& new_args
     return std::make_shared<Concat>(new_args, m_axis);
 }
 
-template <typename T>
-void evaluate_concat(const Concat* node, TensorVector& outputs, const TensorVector& inputs) {
+bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
+    OV_OP_SCOPE(v0_Concat_evaluate);
+    OPENVINO_ASSERT(outputs.size() == 1);
+
     const auto inputs_count = inputs.size();
     std::vector<Shape> arg_shapes;
     std::vector<PartialShape> input_shapes;
+    std::vector<const char*> arg_bufs;
     arg_shapes.reserve(inputs_count);
     input_shapes.reserve(inputs_count);
+    arg_bufs.reserve(inputs_count);
 
-    std::vector<const T*> arg_bufs(inputs_count);
-    auto arg_buf = arg_bufs.begin();
     for (auto& input : inputs) {
-        *arg_buf = static_cast<const T*>(input.data());
-        ++arg_buf;
         const auto& input_shape = input.get_shape();
         arg_shapes.emplace_back(input_shape);
         input_shapes.emplace_back(input_shape);
+        arg_bufs.emplace_back(static_cast<const char*>(input.data()));
     }
 
-    const auto& out_shape = shape_infer(node, input_shapes).front().to_shape();
+    const auto& out_shape = shape_infer(this, input_shapes).front().to_shape();
     outputs.front().set_shape(out_shape);
+    const auto elem_type = outputs.front().get_element_type();
     reference::concat(arg_bufs,
-                      static_cast<T*>(outputs.front().data()),
+                      static_cast<char*>(outputs.front().data()),
                       arg_shapes,
                       out_shape,
-                      ov::util::normalize(node->get_axis(), out_shape.size()),
-                      outputs.front().get_element_type().size());
-}
-
-bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
-    OV_OP_SCOPE(v0_Concat_evaluate);
-    OPENVINO_ASSERT(outputs.size() == 1);
-
-    if (outputs.front().get_element_type() == ov::element::string) {
-        evaluate_concat<std::string>(this, outputs, inputs);
-    } else {
-        evaluate_concat<char>(this, outputs, inputs);
-    }
+                      ov::util::normalize(this->get_axis(), out_shape.size()),
+                      elem_type.size(),
+                      elem_type);
 
     return true;
 }

diff --git a/src/frontends/tensorflow_common/src/op/inv.cpp b/src/frontends/tensorflow_common/src/op/inv.cpp
@@ -1,10 +1,17 @@
-// Copyright (C) 2018-2023 Intel Corporation
+// Copyright (C) 2018-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/concat.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/divide.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/negative.hpp"
+#include "openvino/op/unsqueeze.hpp"
 
 using namespace std;
 using namespace ov::op;
@@ -14,9 +21,38 @@ namespace frontend {
 namespace tensorflow {
 namespace op {
 OutputVector translate_inv_op(const NodeContext& node) {
-    default_op_checks(node, 1, {"Inv"});
+    default_op_checks(node, 1, {"Inv"}, true);
     auto x = node.get_input(0);
 
+    auto complex_type_mark = as_type_ptr<ComplexTypeMark>(x.get_node_shared_ptr());
+    if (complex_type_mark) {
+        x = complex_type_mark->input_value(0);
+        element::Type complex_part_type = complex_type_mark->get_complex_part_type();
+
+        auto gather_index_real = make_shared<v0::Constant>(element::i32, Shape{}, 0);
+        auto gather_index_imag = make_shared<v0::Constant>(element::i32, Shape{}, 1);
+
+        auto minus_one = make_shared<v0::Constant>(element::i32, Shape{1}, -1);
+
+        auto x_real = make_shared<v8::Gather>(x, gather_index_real, minus_one)->output(0);
+        auto x_imag = make_shared<v8::Gather>(x, gather_index_imag, minus_one)->output(0);
+
+        auto scale =
+            make_shared<v1::Add>(make_shared<v1::Multiply>(x_real, x_real), make_shared<v1::Multiply>(x_imag, x_imag));
+
+        auto y_real = make_shared<v1::Divide>(x_real, scale);
+        auto y_imag = make_shared<v1::Divide>(make_shared<v0::Negative>(x_imag), scale);
+
+        auto real_unsqueeze = make_shared<v0::Unsqueeze>(y_real, minus_one);
+        auto imag_unsqueeze = make_shared<v0::Unsqueeze>(y_imag, minus_one);
+
+        auto concat_result = make_shared<v0::Concat>(OutputVector{real_unsqueeze, imag_unsqueeze}, -1);
+        set_node_name(node.get_name(), concat_result);
+
+        auto complex_result = make_shared<ComplexTypeMark>(concat_result->output(0), complex_part_type);
+        return {complex_result};
+    }
+
     // prepare auxiliary one constants of the same type as the inputs
     auto one = create_same_type_const_scalar<int32_t>(x, 1);
 

diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
@@ -125,15 +125,15 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
                PreferredCoreType threadPreferredCoreType = PreferredCoreType::ANY,
                std::vector<std::vector<int>> streamsInfoTable = {},
                bool cpuReservation = false)
-            : _name{name},
+            : _name{std::move(name)},
               _streams{streams},
               _threads_per_stream{threadsPerStream},
               _threadBindingType{threadBindingType},
               _threadBindingStep{threadBindingStep},
               _threadBindingOffset{threadBindingOffset},
               _threads{threads},
               _thread_preferred_core_type(threadPreferredCoreType),
-              _streams_info_table{streamsInfoTable},
+              _streams_info_table{std::move(streamsInfoTable)},
               _cpu_reservation{cpuReservation} {
             update_executor_config();
         }

diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp
@@ -9,6 +9,7 @@
 
 #include "openvino/runtime/iremote_tensor.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/tensor.hpp"
 #ifdef PROXY_PLUGIN_ENABLED
 #    include "openvino/proxy/plugin.hpp"
 #endif

diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp
@@ -32,7 +32,7 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) {
         if (key == ov::num_streams) {
             auto streams = value.as<ov::streams::Num>();
             if (streams == ov::streams::NUMA) {
-                _streams = 1;
+                _streams = get_num_numa_nodes();
             } else if (streams == ov::streams::AUTO) {
                 // bare minimum of streams (that evenly divides available number of cores)
                 _streams = get_default_num_streams();
@@ -114,29 +114,11 @@ IStreamsExecutor::Config IStreamsExecutor::Config::make_default_multi_threaded(
         return streamConfig;
     }
 
-    const auto numa_nodes = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : proc_type_table.size();
-    const bool latency_case = static_cast<size_t>(streamConfig._streams) <= numa_nodes;
+    int num_cores = proc_type_table[0][ALL_PROC];
 
-    // by default, do not use the hyper-threading (to minimize threads synch overheads)
-    int num_cores = !latency_case && numa_nodes == 1
-                        ? proc_type_table[0][ALL_PROC]
-                        : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC];
-
-    // additional latency-case logic for hybrid processors:
     if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && proc_type_table[0][MAIN_CORE_PROC] > 0) {
         if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::ANY) {
-            // by default the latency case uses (faster) Big cores only, depending on the compute ratio
-            const bool big_only = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] / 2);
-            // selecting the preferred core type
-            if (big_only) {
-                streamConfig._thread_preferred_core_type = IStreamsExecutor::Config::PreferredCoreType::BIG;
-                const int hyper_threading_threshold =
-                    2;  // min #cores, for which the hyper-threading becomes useful for the latency case
-                // additionally selecting the #cores to use in the "Big-only" case
-                num_cores = (proc_type_table[0][MAIN_CORE_PROC] <= hyper_threading_threshold)
-                                ? proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]
-                                : proc_type_table[0][MAIN_CORE_PROC];
-            }
+            num_cores = proc_type_table[0][ALL_PROC];
         } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::BIG) {
             num_cores = proc_type_table[0][MAIN_CORE_PROC];
         } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::LITTLE) {