From 7b46de73866698ab1f15476d05e39348640f626b Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Sun, 3 Mar 2024 00:08:25 +0400
Subject: [PATCH 01/14] [TF FE] Switch on layer test for LookupTableFindV2 with
 string key (#23197)

**Details:** Switch on layer test for LookupTableFindV2 with string key.
Merge after
https://github.com/openvinotoolkit/openvino_tokenizers/pull/50

**Ticket:** 132669

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../test_tf_LookupTableFind.py                | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py
index 6ff6daeda99045..3c585ff65cec88 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -35,6 +37,8 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v
         self.keys_type = keys_type
         self.all_keys = all_keys
         self.invalid_key = invalid_key
+        if keys_type == str:
+            keys_type = tf.string
         tf.compat.v1.reset_default_graph()
         # Create the graph and model
         with tf.compat.v1.Session() as sess:
@@ -67,11 +71,10 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v
         dict(keys_type=np.int32, values_type=tf.string, all_keys=[20, 10, 33, -22, 44, 11],
              all_values=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'],
              default_value='UNKNOWN', invalid_key=1000),
-        pytest.param(dict(keys_type=str, values_type=np.int64,
-                          all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'],
-                          all_values=[200, 100, 0, -3, 10, 1],
-                          default_value=0, invalid_key='AbraCadabra'),
-                     marks=pytest.mark.xfail(reason="132669 - Support LookupTableFindV2 with string key")),
+        dict(keys_type=str, values_type=np.int64,
+             all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'],
+             all_values=[200, 100, 0, -3, 10, 1],
+             default_value=0, invalid_key='AbraCadabra'),
     ]
 
     @pytest.mark.parametrize("hash_table_type", [0, 1])
@@ -81,6 +84,12 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v
     @pytest.mark.nightly
     def test_lookup_table_find(self, hash_table_type, keys_shape, params, ie_device, precision, ir_version, temp_dir,
                                use_legacy_frontend):
+        if params['keys_type'] == str and params['values_type'] == np.int64:
+            if platform.system() in ('Darwin') or platform.machine() in ['arm', 'armv7l',
+                                                                         'aarch64',
+                                                                         'arm64',
+                                                                         'ARM64']:
+                pytest.xfail(reason='126314, 132699: Build tokenizers for ARM and MacOS')
         self._test(*self.create_lookup_table_find_net(hash_table_type=hash_table_type,
                                                       keys_shape=keys_shape, **params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,

From 54ac9947aca3697513daf8318f8da8fac9b8b60d Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Sun, 3 Mar 2024 18:57:29 +0100
Subject: [PATCH 02/14] Fix includes in dev api for conan integration (#23175)

### Details:
 - Fix includes in dev api for conan integration

### Tickets:
 - *ticket-id*
---
 src/inference/src/dev/make_tensor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp
index f28c90ccf4856a..e457b81fc0e850 100644
--- a/src/inference/src/dev/make_tensor.cpp
+++ b/src/inference/src/dev/make_tensor.cpp
@@ -9,6 +9,7 @@
 
 #include "openvino/runtime/iremote_tensor.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/tensor.hpp"
 #ifdef PROXY_PLUGIN_ENABLED
 #    include "openvino/proxy/plugin.hpp"
 #endif

From 608a9981c52b4714453ad5ae5683ecf17442c6e9 Mon Sep 17 00:00:00 2001
From: Wilson Seok <wilson.seok@intel.com>
Date: Mon, 4 Mar 2024 09:29:07 +0900
Subject: [PATCH 03/14] [GPU] update layout.compatible() to check false
 conditions first (#23101)

### Details:
- Update layout.compatible() to check properly for the case where o(or
b) axis is blocked
 - Add unit test case

### Tickets:
 - 132367
---
 src/plugins/intel_gpu/src/runtime/layout.cpp                  | 3 ++-
 src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp
index 70e760bca41557..19e75275997ee6 100644
--- a/src/plugins/intel_gpu/src/runtime/layout.cpp
+++ b/src/plugins/intel_gpu/src/runtime/layout.cpp
@@ -585,7 +585,8 @@ ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format
 // Check a reorder is 1d along feature axis. Or feature size fits to inner block size of feature axis
 static inline bool check_redundant_1d_along_feature(layout const& l1, layout const& l2) {
     // No padding, double blocked format and different data_type
-    if (!l1.data_padding && !l2.data_padding && !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) &&
+    if ((l1.get_linear_size() == l2.get_linear_size()) && !l1.data_padding && !l2.data_padding &&
+        !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) &&
         l2.data_type == l1.data_type && l2.count() == l1.count()) {
         auto l1_inner_blk = format::is_single_blocked(l1.format) ? l1.format.traits().block_sizes.at(0).second : 1;
         auto l2_inner_blk = format::is_single_blocked(l2.format) ? l2.format.traits().block_sizes.at(0).second : 1;
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp
index 1eeb065d3d2b7b..6a2c8c5a80e991 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp
@@ -223,6 +223,8 @@ INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
          layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)}, true, true},
         {layout{ov::PartialShape{10, 20}, data_types::f16, format::bfyx},
          layout{ov::PartialShape{10, 20}, data_types::f16, format::os_iyx_osv16}, false, false},
+        {layout{ov::PartialShape{1, 16, 1, 1}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{1, 16, 1, 1}, data_types::f16, format::os_iyx_osv16}, false, false},
         {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
          layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::oiyx}, false, true},
         {layout{ov::PartialShape{128, 10}, data_types::f16, format::bfyx},

From 9e2accb57282142716510f09d1a27767d72ff6c1 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Mon, 4 Mar 2024 09:20:45 +0400
Subject: [PATCH 04/14] Enable skipped tests (#23168)

### Tickets:
- [CVS-55937](https://jira.devtools.intel.com/browse/CVS-55937)
- [CVS-123427](https://jira.devtools.intel.com/browse/CVS-123427)
---
 .../functional/shared_tests_instances/skip_tests_config.cpp    | 2 --
 src/plugins/template/tests/functional/skip_tests_config.cpp    | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 8d5e432d7b87de..1e8852e31e0562 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -184,8 +184,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
         // Issue: 123321
         R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=10.*relu.*)",
-        // Issue: 123427
-        R"(.*RDFTLayerTest.*SignalSize=().*)",
         // Issue: 123815 (Tests are sensintive to available thread count on testing machines)
         R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)",
         // Issue: 122356
diff --git a/src/plugins/template/tests/functional/skip_tests_config.cpp b/src/plugins/template/tests/functional/skip_tests_config.cpp
index eac640ebfe40f3..297cba2660d11d 100644
--- a/src/plugins/template/tests/functional/skip_tests_config.cpp
+++ b/src/plugins/template/tests/functional/skip_tests_config.cpp
@@ -21,9 +21,6 @@ std::vector<std::string> disabledTestPatterns() {
         // unsupported metrics
         R"(.*smoke_OVGetMetricPropsTest.*OVGetMetricPropsTest.*(RANGE_FOR_STREAMS|MAX_BATCH_SIZE).*)",
 
-        // CVS-55937
-        R"(.*SplitLayerTest.*numSplits=30.*)",
-
         // CVS-64094
         R"(.*ReferenceLogSoftmaxLayerTest.*4.*iType=f16.*axis=.*1.*)",
         // CVS-64012

From 5d74236bcaa21246823dadcac87c7f8cc3f14a63 Mon Sep 17 00:00:00 2001
From: linzs148 <56420840+linzs148@users.noreply.github.com>
Date: Mon, 4 Mar 2024 13:58:18 +0800
Subject: [PATCH 05/14] [TF FE] Support complex type for Inv (#23085)

**Ticket:** https://github.com/openvinotoolkit/openvino/issues/22952

---------

Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 .../tensorflow_common/src/op/inv.cpp          | 40 +++++++++++++++-
 .../tensorflow_tests/test_tf_Inv.py           | 46 ++++++++++++++++++-
 2 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/src/frontends/tensorflow_common/src/op/inv.cpp b/src/frontends/tensorflow_common/src/op/inv.cpp
index ec2196219f5033..5af62e2a1764e4 100644
--- a/src/frontends/tensorflow_common/src/op/inv.cpp
+++ b/src/frontends/tensorflow_common/src/op/inv.cpp
@@ -1,10 +1,17 @@
-// Copyright (C) 2018-2023 Intel Corporation
+// Copyright (C) 2018-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/concat.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/divide.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/negative.hpp"
+#include "openvino/op/unsqueeze.hpp"
 
 using namespace std;
 using namespace ov::op;
@@ -14,9 +21,38 @@ namespace frontend {
 namespace tensorflow {
 namespace op {
 OutputVector translate_inv_op(const NodeContext& node) {
-    default_op_checks(node, 1, {"Inv"});
+    default_op_checks(node, 1, {"Inv"}, true);
     auto x = node.get_input(0);
 
+    auto complex_type_mark = as_type_ptr<ComplexTypeMark>(x.get_node_shared_ptr());
+    if (complex_type_mark) {
+        x = complex_type_mark->input_value(0);
+        element::Type complex_part_type = complex_type_mark->get_complex_part_type();
+
+        auto gather_index_real = make_shared<v0::Constant>(element::i32, Shape{}, 0);
+        auto gather_index_imag = make_shared<v0::Constant>(element::i32, Shape{}, 1);
+
+        auto minus_one = make_shared<v0::Constant>(element::i32, Shape{1}, -1);
+
+        auto x_real = make_shared<v8::Gather>(x, gather_index_real, minus_one)->output(0);
+        auto x_imag = make_shared<v8::Gather>(x, gather_index_imag, minus_one)->output(0);
+
+        auto scale =
+            make_shared<v1::Add>(make_shared<v1::Multiply>(x_real, x_real), make_shared<v1::Multiply>(x_imag, x_imag));
+
+        auto y_real = make_shared<v1::Divide>(x_real, scale);
+        auto y_imag = make_shared<v1::Divide>(make_shared<v0::Negative>(x_imag), scale);
+
+        auto real_unsqueeze = make_shared<v0::Unsqueeze>(y_real, minus_one);
+        auto imag_unsqueeze = make_shared<v0::Unsqueeze>(y_imag, minus_one);
+
+        auto concat_result = make_shared<v0::Concat>(OutputVector{real_unsqueeze, imag_unsqueeze}, -1);
+        set_node_name(node.get_name(), concat_result);
+
+        auto complex_result = make_shared<ComplexTypeMark>(concat_result->output(0), complex_part_type);
+        return {complex_result};
+    }
+
     // prepare auxiliary one constants of the same type as the inputs
     auto one = create_same_type_const_scalar<int32_t>(x, 1);
 
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py
index d2350f9841fb6c..875ab763ab81ce 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py
@@ -41,4 +41,48 @@ def test_inv_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                          use_legacy_frontend):
         self._test(*self.create_inv_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
-                   use_legacy_frontend=use_legacy_frontend)
\ No newline at end of file
+                   use_legacy_frontend=use_legacy_frontend)
+
+class TestComplexInv(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        rng = np.random.default_rng()
+        assert 'param_real:0' in inputs_info
+        assert 'param_imag:0' in inputs_info
+        param_real_shape_1 = inputs_info['param_real:0']
+        param_imag_shape_1 = inputs_info['param_imag:0']
+        inputs_data = {}
+        inputs_data['param_real:0'] = 4 * rng.random(param_real_shape_1).astype(np.float32) - 2
+        inputs_data['param_imag:0'] = 4 * rng.random(param_imag_shape_1).astype(np.float32) - 2
+        return inputs_data
+
+    def create_complex_inv_net(self, input_shape):
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real')
+            param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag')
+            complex = tf.raw_ops.Complex(real=param_real, imag=param_imag)
+            inv = tf.raw_ops.Inv(x=complex, name="complex_inv")
+            real = tf.raw_ops.Real(input=inv)
+            img = tf.raw_ops.Imag(input=inv)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input_shape=[]),
+        dict(input_shape=[2]),
+        dict(input_shape=[1, 3]),
+        dict(input_shape=[2, 3, 4]),
+        dict(input_shape=[3, 4, 5, 6]),
+    ]
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_complex_inv(self, params, ie_device, precision, ir_version, temp_dir,
+                         use_legacy_frontend):
+        self._test(
+            *self.create_complex_inv_net(**params),
+            ie_device, precision, ir_version, temp_dir=temp_dir,
+            use_legacy_frontend=use_legacy_frontend)

From 5c7a4bc51f3a2897b72642fd534416e97241d3cb Mon Sep 17 00:00:00 2001
From: Aleksandr Voron <aleksandr.voron@intel.com>
Date: Mon, 4 Mar 2024 07:31:33 +0100
Subject: [PATCH 06/14] [CPU][ARM] Enable f16 eltwise fusing in ACL (#23117)

oneDNN PR: https://github.com/openvinotoolkit/oneDNN/pull/234
---
 .../intel_cpu/src/dnnl_extension_utils.cpp    |   3 +-
 src/plugins/intel_cpu/src/graph_optimizer.cpp |  28 ++--
 src/plugins/intel_cpu/src/nodes/conv.cpp      |   4 +
 .../classes/convolution.cpp                   |  13 ++
 .../classes/convolution.hpp                   |   1 +
 .../instances/common/convolution.cpp          | 149 +-----------------
 .../instances/x64/convolution.cpp             | 133 ++++++++++++++++
 src/plugins/intel_cpu/thirdparty/onednn       |   2 +-
 8 files changed, 167 insertions(+), 166 deletions(-)

diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
index f09b9c5ab2f101..6eced71f2b83fd 100644
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -274,7 +274,8 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) {
                        Algorithm::EltwiseAbs,
                        Algorithm::EltwiseSqrt,
                        Algorithm::EltwiseSoftRelu,
-                       Algorithm::EltwiseSigmoid);
+                       Algorithm::EltwiseSigmoid,
+                       Algorithm::EltwiseClamp);
 #elif defined(OPENVINO_ARCH_X86_64)
     return one_of(alg, Algorithm::EltwiseRelu,
                        Algorithm::EltwiseGeluErf,
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
index d85c7fcead4001..5d52cfdfb0155f 100644
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -1496,19 +1496,19 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &grap
             parent++;
             continue;
         }
-//Disable ACL post-ops in fp16 to avoid performance degradation
-#if defined(OPENVINO_ARCH_ARM64)
-        if (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::f16) {
+
+        auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild();
+        if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) {
             parent++;
             continue;
         }
-#endif
 
-        auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild();
-        if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) {
+#if defined(OV_CPU_WITH_ACL)
+        if (!parentNode->getFusedWith().empty()) {
             parent++;
             continue;
         }
+#endif
 
         if (!DnnlExtensionUtils::isUnarySupportedAsPostOp(fuseCandidate->getAlgorithm())) {
             parent++;
@@ -1552,13 +1552,6 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) {
             parent++;
             continue;
         }
-//Disable ACL post-ops in fp16 to avoid performance degradation
-#if defined(OPENVINO_ARCH_ARM64)
-        if (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::f16) {
-            parent++;
-            continue;
-        }
-#endif
 
         childNode->fuseInto(parentNode);
 
@@ -1686,6 +1679,10 @@ static bool is_data_dependency(const std::shared_ptr<Node> &parent,
  */
 
 void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph) {
+#if !defined(OPENVINO_ARCH_X86) && !defined(OPENVINO_ARCH_X86_64)
+    return;
+#endif
+
     auto &graphNodes = graph.GetNodes();
 
     auto isFusingSupported = [&](NodePtr conv, NodePtr child) {
@@ -1817,11 +1814,6 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph)
         if (mergedConv->isConstant() && !sum->isConstant())
             continue;
 
-//Disable ACL post-ops in fp16 to avoid performance degradation
-#if defined(OPENVINO_ARCH_ARM64)
-        if (mergedConv->getOriginalInputPrecisionAtPort(0) == ov::element::f16)
-            continue;
-#endif
         // Disable fusing for Add with broadcasing in case of known data ranges. Add with brodcasting triggers
         // non-optimal code path inside Convolution node, so better to avoid fusing at all.
         const auto& shape1 = sum->getInputShapeAtPort(0);
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 59721c5df76c4d..b89d3a1e21d61a 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -1110,6 +1110,10 @@ std::shared_ptr<MemoryDesc> Convolution::getSrcMemDesc(const dnnl::primitive_des
 }
 
 bool Convolution::canFuse(const NodePtr& node) const {
+#if defined(OV_CPU_WITH_ACL)
+    if (!fusedWith.empty())
+        return false;
+#endif
     return canFuseSimpleOperation(node);
 }
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp
index 71d8f0b3e2fa14..856fa9cd151f26 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp
@@ -352,6 +352,19 @@ const std::vector<CPUSpecificParams>& CPUParams_2D() {
     return CPUParams_2D;
 }
 
+const std::vector<CPUSpecificParams>& CPUParams_3D() {
+    static const std::vector<CPUSpecificParams> CPUParams_3D = {
+        //conv_sse42_3D, // not supported jit_sse42 for 3d
+        conv_avx2_3D,
+        conv_avx512_3D,
+        conv_avx2_3D_nspc,
+        conv_avx2_3D_nspc_brgconv,
+        conv_avx512_3D_nspc,
+        conv_avx512_3D_nspc_brgconv
+    };
+    return CPUParams_3D;
+}
+
 const std::vector<CPUSpecificParams>& CPUParams_GEMM_1D() {
     static const std::vector<CPUSpecificParams> CPUParams_GEMM_1D = {
             conv_gemm_1D,
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp
index a6e5faee3e909d..db8b6ca8f943b1 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp
@@ -72,6 +72,7 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface<convLayerCPUT
     const std::vector<CPUSpecificParams>& CPUParams_1x1_1D();
     const std::vector<CPUSpecificParams>& CPUParams_1x1_2D();
     const std::vector<CPUSpecificParams>& CPUParams_2D();
+    const std::vector<CPUSpecificParams>& CPUParams_3D();
     const std::vector<CPUSpecificParams>& CPUParams_GEMM_1D();
     const std::vector<CPUSpecificParams>& CPUParams_GEMM_2D();
     const std::vector<CPUSpecificParams>& CPUParams_GEMM_3D();
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp
index b6518f8e8f48f1..09f8dc14660392 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp
@@ -62,20 +62,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_FP32, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_GEMM_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inShapesGemm2D()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_empty_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -90,20 +76,6 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_empty_fusing, ConvolutionLaye
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_I8_dilated, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_GEMM_2D_dilated(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inShapesGemm2D()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 /* ============= Convolution (2D) ============= */
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_empty_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
@@ -119,41 +91,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_empty_fusing, ConvolutionLayerCPUTes
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-const std::vector<fusingSpecificParams> fusingParamsSet_dynBatch{
-        emptyFusingSpec,
-        fusingSum,
-        fusingAddPerChannel,
-        fusingReluScaleShift
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_dynBatch, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d_dynBatch()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
-                                 ::testing::ValuesIn(fusingParamsSet_dynBatch),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_empty_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -168,20 +105,6 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_empty_fusing, ConvolutionLayerCPUT
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(Conv_2D_I8_dilated, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_2D_dilated(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 const std::vector<CPUSpecificParams> CPUParams_2D_plain_to_blocked = {
         conv_sse42_plain_to_blocked_2D,
         conv_avx2_plain_to_blocked_2D,
@@ -262,16 +185,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_reorder_Conv_2D, ConvolutionLayerCPUTest,
                          ConvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= Convolution (3D) ============= */
-const std::vector<CPUSpecificParams> CPUParams_3D = {
-        //conv_sse42_3D, // not supported jit_sse42 for 3d
-        conv_avx2_3D,
-        conv_avx512_3D,
-        conv_avx2_3D_nspc,
-        conv_avx2_3D_nspc_brgconv,
-        conv_avx512_3D_nspc,
-        conv_avx512_3D_nspc_brgconv
-};
-
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -281,7 +194,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest,
                                          ::testing::Values(ElementType::undefined),
                                          ::testing::ValuesIn(inputShapes3d()),
                                          ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
                                  ::testing::Values(emptyFusingSpec),
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
@@ -295,25 +208,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32_fusingScaleShiftAndFakeQuantizePerCh
                                          ::testing::Values(ElementType::undefined),
                                          ::testing::ValuesIn(inputShapes3d()),
                                          ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
                                  ::testing::Values(fusingScaleShiftAndFakeQuantizePerChannel),
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_3D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes3d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(Conv_3D_FP32_dilated, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -323,25 +222,11 @@ INSTANTIATE_TEST_SUITE_P(Conv_3D_FP32_dilated, ConvolutionLayerCPUTest,
                                          ::testing::Values(ElementType::undefined),
                                          ::testing::ValuesIn(inputShapes3d()),
                                          ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
                                  ::testing::Values(emptyFusingSpec),
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(Conv_3D_I8_dilated, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_3D_dilated(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes3d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 const std::vector<CPUSpecificParams> CPUParams_3D_plain_to_blocked = {
         conv_avx2_plain_to_blocked_3D,
         conv_avx512_plain_to_blocked_3D,
@@ -419,20 +304,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32_empty_fusing, ConvolutionLayerCP
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_1x1_1D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes1d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_1D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 /* ============= Kernel_1x1 (2D) ============= */
 
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_empty_fusing, ConvolutionLayerCPUTest,
@@ -449,20 +320,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_empty_fusing, ConvolutionLayerCP
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_1x1_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 /* ============= Convolution auto padding tests ============= */
 
 const auto convParams_AutoPadding_2D = ::testing::Combine(
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp
index 8073b67b726eb8..2d1638856386f3 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp
@@ -113,6 +113,34 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_I8, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_GEMM_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inShapesGemm2D()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_I8_dilated, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_GEMM_2D_dilated(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inShapesGemm2D()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_BF16, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -169,6 +197,27 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_fusing, ConvolutionLayerCPUTe
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+const std::vector<fusingSpecificParams> fusingParamsSet_dynBatch{
+        emptyFusingSpec,
+        fusingSum,
+        fusingAddPerChannel,
+        fusingReluScaleShift
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_dynBatch, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d_dynBatch()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
+                                 ::testing::ValuesIn(fusingParamsSet_dynBatch),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -197,6 +246,34 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_fusing, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(Conv_2D_I8_dilated, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_2D_dilated(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 /* ============= Kernel_1x1 (1D) ============= */
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
@@ -226,6 +303,34 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_fusing, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_1x1_1D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes1d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_1D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_1x1_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 /* ============= Convolution (1D) ============= */
 const auto convParams_ExplicitPadding_1D = ::testing::Combine(
         ::testing::ValuesIn(kernels1d()),
@@ -503,6 +608,34 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTes
                          ConvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= Convolution (GEMM 3D) ============= */
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_3D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes3d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(Conv_3D_I8_dilated, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_3D_dilated(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes3d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_GEMM_FP32, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index b2cdc2cfdec616..0f94c0e7b94f64 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit b2cdc2cfdec61638f941ccdfb0b9dbcc27a7c333
+Subproject commit 0f94c0e7b94f64df2b94929279bbeb4f576a6a36

From 74570b748e803c04319c258a26534236c8cb02a5 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Mon, 4 Mar 2024 07:47:35 +0100
Subject: [PATCH 07/14] [Coverity] In IStreamsExecutor::Config use move instead
 copy (#23106)

### Details:
- Fix Coverity reported issues `COPY_INSTEAD_OF_MOVE` in
`ov::threading::IStreamsExecutor::Config` constructor.

### Tickets:
 - CID 1518201
 - CID 1518243
---
 .../dev_api/openvino/runtime/threading/istreams_executor.hpp  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
index 7f5693f24ce4fb..b3b8be765e630a 100644
--- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
@@ -125,7 +125,7 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
                PreferredCoreType threadPreferredCoreType = PreferredCoreType::ANY,
                std::vector<std::vector<int>> streamsInfoTable = {},
                bool cpuReservation = false)
-            : _name{name},
+            : _name{std::move(name)},
               _streams{streams},
               _threads_per_stream{threadsPerStream},
               _threadBindingType{threadBindingType},
@@ -133,7 +133,7 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
               _threadBindingOffset{threadBindingOffset},
               _threads{threads},
               _thread_preferred_core_type(threadPreferredCoreType),
-              _streams_info_table{streamsInfoTable},
+              _streams_info_table{std::move(streamsInfoTable)},
               _cpu_reservation{cpuReservation} {
             update_executor_config();
         }

From a605edbd776ca4c14f4efb120403a74911ba9c27 Mon Sep 17 00:00:00 2001
From: Sun Xiaoxia <xiaoxia.sun@intel.com>
Date: Mon, 4 Mar 2024 15:44:36 +0800
Subject: [PATCH 08/14] Fix performance regression of conformance tests
 (#22938)

### Details:

fix performance regression of conformance tests

Root cause is `make_default_multi_threaded()` API which was refactored
in PR22414. This API is used to calculate the number of threads.
**Master version**: When the test machine has both Pcore and Ecore, it
recognized the machine type and will select proper cores (Pcores are
selected in general machine which pcores >Ecores / 2) to create
executor.
**Old version**: the input parameter `_threadBindingType` of config was
used to judge the type of current machine, and `_threadBindingType` is
always set to default value `None` in template plugin. So hybrid core
machine is regarded to core machine and physical cores (include Pcores
and Ecores) are used. `_threadBindingType` is deprecated in master now.
For example: ADL i9-12900K, 8 Pcore, 8Ecore.
Old version: threads=16 in any cores.
Master: threads=8 in Pcores.
Actually, the threads calculated from `make_default_multi_threaded` of
master version is the best option in normal situation which not create
multi threads in app side. But in conformance test, 24 threads are
created to seize CPU resource. Of cause, all cores are must be used to
achieved the best performance.


### Tickets:
 - *CVS-131820*

---------

Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
---
 .github/workflows/linux.yml                   |  2 +-
 .../src/dev/threading/istreams_executor.cpp   | 24 +++--------------
 .../unit/make_default_multi_threaded_test.cpp | 26 ++++++++++++++-----
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 46493c55bc3ab3..0e2ee84264d6a6 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -318,7 +318,7 @@ jobs:
 
   Conformance:
     needs: [ Build, Smart_CI ]
-    timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 30 }}
+    timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 20 }}
     defaults:
       run:
         shell: bash
diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp
index b7151e15b74e5e..fd75cf7d8a8a5d 100644
--- a/src/inference/src/dev/threading/istreams_executor.cpp
+++ b/src/inference/src/dev/threading/istreams_executor.cpp
@@ -32,7 +32,7 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) {
         if (key == ov::num_streams) {
             auto streams = value.as<ov::streams::Num>();
             if (streams == ov::streams::NUMA) {
-                _streams = 1;
+                _streams = get_num_numa_nodes();
             } else if (streams == ov::streams::AUTO) {
                 // bare minimum of streams (that evenly divides available number of cores)
                 _streams = get_default_num_streams();
@@ -114,29 +114,11 @@ IStreamsExecutor::Config IStreamsExecutor::Config::make_default_multi_threaded(
         return streamConfig;
     }
 
-    const auto numa_nodes = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : proc_type_table.size();
-    const bool latency_case = static_cast<size_t>(streamConfig._streams) <= numa_nodes;
+    int num_cores = proc_type_table[0][ALL_PROC];
 
-    // by default, do not use the hyper-threading (to minimize threads synch overheads)
-    int num_cores = !latency_case && numa_nodes == 1
-                        ? proc_type_table[0][ALL_PROC]
-                        : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC];
-
-    // additional latency-case logic for hybrid processors:
     if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && proc_type_table[0][MAIN_CORE_PROC] > 0) {
         if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::ANY) {
-            // by default the latency case uses (faster) Big cores only, depending on the compute ratio
-            const bool big_only = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] / 2);
-            // selecting the preferred core type
-            if (big_only) {
-                streamConfig._thread_preferred_core_type = IStreamsExecutor::Config::PreferredCoreType::BIG;
-                const int hyper_threading_threshold =
-                    2;  // min #cores, for which the hyper-threading becomes useful for the latency case
-                // additionally selecting the #cores to use in the "Big-only" case
-                num_cores = (proc_type_table[0][MAIN_CORE_PROC] <= hyper_threading_threshold)
-                                ? proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]
-                                : proc_type_table[0][MAIN_CORE_PROC];
-            }
+            num_cores = proc_type_table[0][ALL_PROC];
         } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::BIG) {
             num_cores = proc_type_table[0][MAIN_CORE_PROC];
         } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::LITTLE) {
diff --git a/src/inference/tests/unit/make_default_multi_threaded_test.cpp b/src/inference/tests/unit/make_default_multi_threaded_test.cpp
index e1917394507045..40dafe9911b69b 100644
--- a/src/inference/tests/unit/make_default_multi_threaded_test.cpp
+++ b/src/inference/tests/unit/make_default_multi_threaded_test.cpp
@@ -51,7 +51,9 @@ MakeDefaultMultiThreadsTestCase _1sockets_streams_1 = {
     1,  // param[in]: the number of streams
     // param[out]: streams info table
     {
-        {1, 1, 6, 0, 0},
+        {1, 0, 12, 0, 0},
+        {0, 1, 6, 0, 0},
+        {0, 3, 6, 0, 0},
     },
 };
 
@@ -74,9 +76,11 @@ MakeDefaultMultiThreadsTestCase _2sockets_streams_1 = {
     },
     1,
     {
-        {1, 0, 36, -1, -1},
+        {1, 0, 72, -1, -1},
         {0, 1, 18, 0, 0},
         {0, 1, 18, 1, 1},
+        {0, 3, 18, 0, 0},
+        {0, 3, 18, 1, 1},
     },
 };
 
@@ -88,8 +92,10 @@ MakeDefaultMultiThreadsTestCase _2sockets_streams_4 = {
     },
     4,
     {
-        {2, 1, 9, 0, 0},
-        {2, 1, 9, 1, 1},
+        {1, 1, 18, 0, 0},
+        {1, 1, 18, 1, 1},
+        {1, 3, 18, 0, 0},
+        {1, 3, 18, 1, 1},
     },
 };
 
@@ -99,7 +105,10 @@ MakeDefaultMultiThreadsTestCase _pecore24_streams_1 = {
     },
     1,
     {
-        {1, 1, 8, 0, 0},
+        {1, 0, 24, 0, 0},
+        {0, 1, 8, 0, 0},
+        {0, 2, 8, 0, 0},
+        {0, 3, 8, 0, 0},
     },
 };
 
@@ -109,7 +118,9 @@ MakeDefaultMultiThreadsTestCase _pecore24_streams_3 = {
     },
     3,
     {
-        {3, 1, 2, 0, 0},
+        {1, 1, 8, 0, 0},
+        {1, 2, 8, 0, 0},
+        {1, 3, 8, 0, 0},
     },
 };
 
@@ -119,9 +130,10 @@ MakeDefaultMultiThreadsTestCase _pecore32_streams_1 = {
     },
     1,
     {
-        {1, 0, 24, 0, 0},
+        {1, 0, 32, 0, 0},
         {0, 1, 8, 0, 0},
         {0, 2, 16, 0, 0},
+        {0, 3, 8, 0, 0},
     },
 };
 

From b1a19fc62c3a485c32dd0e52a2983ca022e59da9 Mon Sep 17 00:00:00 2001
From: Wanglei Shen <wanglei.shen@intel.com>
Date: Mon, 4 Mar 2024 15:44:58 +0800
Subject: [PATCH 09/14] add get_streams_info_table() test cases for 4 cores and
 8 cores CPUs (#23156)

### Details:
- *add get_streams_info_table() test cases for 4 cores and 8 cores CPUs*
 - *...*

### Tickets:
 - *ticket-id*
---
 .../streams_info/streams_info_table_test.cpp  | 70 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
index 1fce6e02e96f8f..204501b6046ec0 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
@@ -1821,6 +1821,32 @@ StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
     {{6, MAIN_CORE_PROC, 1, 0, 0}, {6, HYPER_THREADING_PROC, 1, 0, 0}},
 };
 
+StreamsCalculationTestCase _1sockets_4cores_latency_1 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM,
+    {{4, 4, 0, 0, 0, 0}},
+    {{1, MAIN_CORE_PROC, 4, 0, 0}},
+};
+
+StreamsCalculationTestCase _1sockets_4cores_tput_1 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "THROUGHPUT",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM,
+    {{4, 4, 0, 0, 0, 0}},
+    {{2, MAIN_CORE_PROC, 2, 0, 0}},
+};
+
 StreamsCalculationTestCase _1sockets_ecores_latency_1 = {
     1,
     false,
@@ -1976,6 +2002,20 @@ StreamsCalculationTestCase _1sockets_mock_tput_3 = {
     {{19, 19, 0, 0, -1, -1}, {11, 11, 0, 0, 0, 0}, {8, 8, 0, 0, 1, 1}},
     {{5, MAIN_CORE_PROC, 2, 0, 0}, {4, MAIN_CORE_PROC, 2, 1, 1}},
 };
+
+StreamsCalculationTestCase _1sockets_mock_tput_4 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "THROUGHPUT",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM,
+    {{8, 8, 0, 0, 0, 0}},
+    {{4, MAIN_CORE_PROC, 2, 0, 0}},
+};
+
 StreamsCalculationTestCase _2sockets_mock_latency_1 = {
     1,
     false,
@@ -2286,6 +2326,30 @@ StreamsCalculationTestCase _2sockets_mock_latency_21 = {
      {0, HYPER_THREADING_PROC, 20, 6, 6},
      {0, MAIN_CORE_PROC, 10, 0, 0}},
 };
+StreamsCalculationTestCase _2sockets_mock_latency_22 = {
+    1,
+    false,
+    200,
+    0,
+    0,
+    3,
+    "LATENCY",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET,
+    {{200, 100, 0, 100, -1, -1},
+     {80, 40, 0, 40, 0, 0},
+     {60, 30, 0, 30, 1, 1},
+     {40, 20, 0, 20, 2, 2},
+     {20, 10, 0, 10, 3, 3}},
+    {{1, ALL_PROC, 200, -1, -1},
+     {0, MAIN_CORE_PROC, 10, 3, 3},
+     {0, HYPER_THREADING_PROC, 10, 3, 3},
+     {0, MAIN_CORE_PROC, 40, 0, 0},
+     {0, MAIN_CORE_PROC, 30, 1, 1},
+     {0, MAIN_CORE_PROC, 20, 2, 2},
+     {0, HYPER_THREADING_PROC, 40, 0, 0},
+     {0, HYPER_THREADING_PROC, 30, 1, 1},
+     {0, HYPER_THREADING_PROC, 20, 2, 2}},
+};
 
 TEST_P(StreamsCalculationTests, StreamsCalculation) {}
 
@@ -2419,6 +2483,8 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_6cores_tput_2,
                                          _1sockets_6cores_tput_3,
                                          _1sockets_6cores_tput_4,
+                                         _1sockets_4cores_latency_1,
+                                         _1sockets_4cores_tput_1,
                                          _1sockets_ecores_latency_1,
                                          _1sockets_ecores_latency_2,
                                          _1sockets_ecores_latency_3,
@@ -2431,6 +2497,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_mock_tput_1,
                                          _1sockets_mock_tput_2,
                                          _1sockets_mock_tput_3,
+                                         _1sockets_mock_tput_4,
                                          _2sockets_mock_latency_1,
                                          _2sockets_mock_latency_2,
                                          _2sockets_mock_latency_3,
@@ -2451,6 +2518,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _2sockets_mock_latency_18,
                                          _2sockets_mock_latency_19,
                                          _2sockets_mock_latency_20,
-                                         _2sockets_mock_latency_21));
+                                         _2sockets_mock_latency_21,
+                                         _2sockets_mock_latency_22));
 
 }  // namespace
\ No newline at end of file

From df32562523349c325d35afcb056574c8e368dd3e Mon Sep 17 00:00:00 2001
From: Piotr Kowalczyk <piotr.kowalczyk@intel.com>
Date: Mon, 4 Mar 2024 08:58:11 +0100
Subject: [PATCH 10/14] [GPU]: Slice op supports dynamic shapes (#22935)

### Details:
 - Slice op on GPU supports dynamic shapes

### Tickets:
 - *CVS-129985*
 - *CVS-100235*
---
 .../intel_gpu/src/graph/impls/ocl/slice.cpp   | 205 +++++++++++------
 .../intel_gpu/src/graph/include/slice_inst.h  |  60 +++++
 src/plugins/intel_gpu/src/graph/slice.cpp     |  74 +++++-
 .../kernel_selector/cl_kernels/slice_ref.cl   | 103 +++++++--
 .../kernels/slice/slice_kernel_ref.cpp        | 102 +++++++--
 .../kernels/slice/slice_kernel_ref.h          |  10 +-
 .../single_layer_tests/slice.cpp              |  15 ++
 .../tests/unit/test_cases/slice_gpu_test.cpp  | 216 +++++++++++++++---
 8 files changed, 623 insertions(+), 162 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp
index 7573acd46d3153..cd778650b12a3c 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp
@@ -2,33 +2,32 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "primitive_base.hpp"
+#include <algorithm>
+#include <cstddef>
 
-#include "slice_inst.h"
 #include "data_inst.h"
-#include "slice/slice_kernel_selector.h"
+#include "primitive_base.hpp"
 #include "slice/slice_kernel_ref.h"
-
-#include <algorithm>
-#include <cstddef>
+#include "slice/slice_kernel_selector.h"
+#include "slice_inst.h"
 
 namespace cldnn {
 namespace ocl {
 
 namespace {
-template<typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
-std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
+template <typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
+std::vector<std::int64_t> extractIntegerData(const data_node& node, const stream& stream) {
     mem_lock<T> lock{node.get_attached_memory_ptr(), stream};
     T* data = lock.data();
-    std::vector<std::int32_t> integer_data;
+    std::vector<std::int64_t> integer_data;
     integer_data.reserve(node.get_output_layout().count());
     for (size_t i = 0; i < node.get_output_layout().count(); i++) {
-        integer_data.emplace_back(static_cast<std::int32_t>(data[i]));
+        integer_data.emplace_back(static_cast<std::int64_t>(data[i]));
     }
     return integer_data;
 }
 
-std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
+std::vector<std::int64_t> extractIntegerData(const data_node& node, const stream& stream) {
     auto dt = node.get_output_layout().data_type;
     switch (dt) {
     case data_types::u8:
@@ -40,22 +39,16 @@ std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream
     case data_types::i64:
         return extractIntegerData<std::int64_t>(node, stream);
     default:
-        OPENVINO_ASSERT(false, "[GPU] Slice parameters should be of integral type for node ", node.id(), " while got ", dt);
+        OPENVINO_ASSERT(false,
+                        "[GPU] Slice parameters should be of integral type for node ",
+                        node.id(),
+                        " while got ",
+                        dt);
     }
     return {};
 }
 
-std::vector<std::int32_t> extractShape(kernel_selector::Tensor::DataTensor& tensor) {
-    auto logical_dims = tensor.LogicalDims();
-    // LogicalDims method returns dims in reversed order
-    std::vector<int32_t> reverse_logical_dims;
-    for (auto it = logical_dims.rbegin(); it != logical_dims.rend(); ++it) {
-        reverse_logical_dims.push_back(static_cast<int32_t>(*it));
-    }
-    return reverse_logical_dims;
-}
-
-} // namespace
+}  // namespace
 
 struct slice_impl : typed_primitive_impl_ocl<slice> {
     using parent = typed_primitive_impl_ocl<slice>;
@@ -63,81 +56,141 @@ struct slice_impl : typed_primitive_impl_ocl<slice> {
     using kernel_selector_t = kernel_selector::slice_kernel_selector;
     using kernel_params_t = kernel_selector::slice_params;
 
-    enum InputIndices {
-        kData,
-        kStart,
-        kEnd,
-        kStep,
-        kAxes,
-        kInputsNum
-    };
-
     DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::slice_impl)
 
     std::unique_ptr<primitive_impl> clone() const override {
         return make_unique<slice_impl>(*this);
     }
 
+    void load(BinaryInputBuffer& ib) override {
+        parent::load(ib);
+        if (is_dynamic()) {
+            auto& kernel_selector = kernel_selector_t::Instance();
+            auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
+            kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
+        }
+    }
+
+    kernel_arguments_data get_arguments(const slice_inst& instance) const override {
+        kernel_arguments_data args;
+
+        const SliceKernelRefNeededInputs inputs = SliceKernelRefNeededInputs::Create(*instance.node);
+
+        for (auto idx : inputs.GetNeededInputIndexes()) {
+            args.inputs.push_back(instance.input_memory_ptr(idx));
+        }
+
+        for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
+            args.outputs.push_back(instance.output_memory_ptr(i));
+        }
+
+        args.shape_info = instance.shape_info_memory_ptr();
+        return args;
+    }
+
     static std::unique_ptr<primitive_impl> create(const slice_node& arg, const kernel_impl_params& impl_param) {
-        auto params = get_default_params<kernel_selector::slice_params>(impl_param);
-        const auto& inputs = arg.get_dependencies();
-        const stream& stream = arg.get_program().get_stream();
-        auto start_elts = extractIntegerData(inputs[InputIndices::kStart].first->as<data>(), stream);
-        auto end_elts = extractIntegerData(inputs[InputIndices::kEnd].first->as<data>(), stream);
-        auto step_elts = extractIntegerData(inputs[InputIndices::kStep].first->as<data>(), stream);
-        auto data_shape = extractShape(params.inputs[0]);
-        std::vector<std::int32_t> axes(data_shape.size());
-        if (inputs.size() == InputIndices::kInputsNum)
-            axes = extractIntegerData(inputs[InputIndices::kAxes].first->as<data>(), stream);
-        else
-            std::iota(axes.begin(), axes.end(), 0);
-        std::vector<std::int32_t> selected_start(data_shape.size(), 0);
-        std::vector<std::int32_t> selected_step(data_shape.size(), 1);
-        std::vector<std::int32_t> selected_end(data_shape);
-        for (size_t axis = 0; axis < axes.size(); axis++) {
-            auto transformed_axe = axes[axis] < 0 ? data_shape.size() + axes[axis] : axes[axis];
-            auto start = start_elts[axis];
-            auto end = end_elts[axis];
-            auto dim_size = data_shape[transformed_axe];
-            selected_start[transformed_axe] = std::max(std::min(start < 0 ? dim_size + start : start, dim_size - 1), 0);
-            selected_end[transformed_axe] = std::max(std::min(end < 0 ? dim_size + end : end, dim_size - 1), 0);
-            selected_step[transformed_axe] = step_elts[axis];
+        auto params = get_default_params<kernel_selector::slice_params>(impl_param, impl_param.is_dynamic());
+        const auto input_rank = params.inputs[0].Dimentions();
+
+        if (!PrepareInput(arg,
+                          SliceKernelRefNeededInputs::kStart,
+                          params.compile_time_start,
+                          params.start_data_type,
+                          params.inputs)) {
+            // No kStart input - set it to default:
+            params.axes_data_type = kernel_selector::Datatype::INT64;
+            params.compile_time_start = std::vector<int64_t>(input_rank, 0);
+        }
+
+        // NOTE: Stop input is not used by the slice kernel, as this information
+        // is implicitely passed with output shape.
+
+        if (!PrepareInput(arg,
+                          SliceKernelRefNeededInputs::kStep,
+                          params.compile_time_step,
+                          params.step_data_type,
+                          params.inputs)) {
+            // No kStep input - set it to default:
+            params.axes_data_type = kernel_selector::Datatype::INT64;
+            params.compile_time_step = std::vector<int64_t>(input_rank, 1);
+        }
+
+        if (!PrepareInput(arg,
+                          SliceKernelRefNeededInputs::kAxes,
+                          params.compile_time_axes,
+                          params.axes_data_type,
+                          params.inputs)) {
+            // No kAxes input - set it to default:
+            params.axes_data_type = kernel_selector::Datatype::INT64;
+            params.compile_time_axes.resize(input_rank);
+            std::iota(params.compile_time_axes.begin(), params.compile_time_axes.end(), 0);
         }
-        params.start = std::move(selected_start);
-        params.end = std::move(selected_end);
-        params.step = std::move(selected_step);
+
+        // Transform compile time axes:
+        for (size_t axis = 0; axis < params.compile_time_axes.size(); ++axis) {
+            const int64_t transformed_axe = params.compile_time_axes[axis] < 0
+                                             ? input_rank + params.compile_time_axes[axis]
+                                             : params.compile_time_axes[axis];
+            params.compile_time_axes[axis] = transformed_axe;
+        }
+
         params.set_dynamic_shape_offsets();
-        auto &kernel_selector =
-                kernel_selector::slice_kernel_selector::Instance();
+        auto& kernel_selector = kernel_selector::slice_kernel_selector::Instance();
         auto best_kernel = kernel_selector.get_best_kernel(params);
 
         return make_unique<slice_impl>(best_kernel);
     }
+
+    void update_dispatch_data(const kernel_impl_params& impl_param) override {
+        auto kernel_params = get_default_params<kernel_selector::slice_params>(impl_param, true);
+        (_kernel_data.update_dispatch_data_func)(kernel_params, _kernel_data);
+    }
+
+private:
+    // Returns true if input was prepared(was avaiable in node def), false otherwise.
+    static bool PrepareInput(const slice_node& arg,
+                             SliceKernelRefNeededInputs::InputIndices idx,
+                             std::vector<std::int64_t>& out_compile_time_buff,
+                             kernel_selector::Datatype& out_buff_data_type,
+                             kernel_selector::MultiDataTensor& out_runtime_inputs) {
+        const stream& stream = arg.get_program().get_stream();
+        const auto& inputs = arg.get_dependencies();
+
+        if (inputs.size() <= idx)
+            return false;
+
+        const SliceKernelRefNeededInputs kernel_needed_inputs = SliceKernelRefNeededInputs::Create(arg);
+        if (kernel_needed_inputs.IsInputNeededInRuntime(idx)) {
+            const auto layout = inputs[idx].first->get_output_layout(0);
+            out_buff_data_type = to_data_type(layout.data_type);
+            out_compile_time_buff.clear();
+            out_runtime_inputs.push_back(convert_data_tensor(layout));
+        } else {
+            out_buff_data_type = kernel_selector::Datatype::INT64;
+            out_compile_time_buff = extractIntegerData(inputs[idx].first->as<data>(), stream);
+        }
+
+        return true;
+    }
 };
 
 namespace detail {
 
 attach_slice_impl::attach_slice_impl() {
-    implementation_map<slice>::add(impl_types::ocl, slice_impl::create, {
-        std::make_tuple(data_types::f16, format::bfyx),
-        std::make_tuple(data_types::f32, format::bfyx),
-        std::make_tuple(data_types::u8, format::bfyx),
-        std::make_tuple(data_types::i8, format::bfyx),
-        std::make_tuple(data_types::i32, format::bfyx),
-        std::make_tuple(data_types::i64, format::bfyx),
-        std::make_tuple(data_types::f16, format::bfzyx),
-        std::make_tuple(data_types::f32, format::bfzyx),
-        std::make_tuple(data_types::u8, format::bfyx),
-        std::make_tuple(data_types::i8, format::bfyx),
-        std::make_tuple(data_types::i32, format::bfzyx),
-        std::make_tuple(data_types::i64, format::bfzyx),
-    });
+    auto types = {data_types::f32, data_types::f16, data_types::i8, data_types::u8, data_types::i32, data_types::i64};
+
+    auto formats = {
+        format::bfyx,
+        format::bfzyx,
+    };
+
+    implementation_map<slice>::add(impl_types::ocl, shape_types::any, slice_impl::create, types, formats);
 }
 
 }  // namespace detail
 
-} // namespace ocl
-} // namespace cldnn
+}  // namespace ocl
+}  // namespace cldnn
 
 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::slice_impl)
 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::slice)
diff --git a/src/plugins/intel_gpu/src/graph/include/slice_inst.h b/src/plugins/intel_gpu/src/graph/include/slice_inst.h
index 189a2a6096fea6..09425f20f5dd47 100644
--- a/src/plugins/intel_gpu/src/graph/include/slice_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/slice_inst.h
@@ -10,6 +10,45 @@ namespace cldnn {
 
 using slice_node = typed_program_node<slice>;
 
+// This class is needed to have one place where decision
+// is made which Slice inputs are used by the kernel on GPU.
+// Unfortnately, the same decison needs to be made
+// in multiple places, including:
+// - slice_inst::update_shape_info_tensor
+// - slice_impl::get_arguments
+// - slice_impl::create
+// This class was created to encapsulate that logic in single place.
+// NOTE: the placement of this class is the 'lesser evil'. Normally such logic
+// should be a part of codegen/jitter, which should output some struct with information
+// about which data is needed by the kernel, how it should be provided, bindings, etc.
+// Currently it is scattered in mutiple places, where basically similar logic has to be applied.
+// NOTE: This class implicietly depends on logic inside SliceKernelRef and the kernel
+// itself. If you make any changes of how params are provided to kernel,
+// likely you will needed to update this one too.
+class SliceKernelRefNeededInputs {
+public:
+    enum InputIndices {
+        kData,
+        kStart,
+        kEnd,
+        kStep,
+        kAxes,
+        kInputsNum
+    };
+
+    // Creates instance of SliceKernelRefNeededInputs.
+    static SliceKernelRefNeededInputs Create(const slice_node& node);
+
+    // Retruns needed indexes in runtime.
+    const std::vector<size_t>& GetNeededInputIndexes() const;
+
+    // Returns true if given input is needed in runtime.
+    bool IsInputNeededInRuntime(InputIndices type) const;
+
+private:
+    std::vector<size_t> neededIndexes;
+};
+
 template <>
 class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
     using parent = typed_primitive_inst_base<slice>;
@@ -22,8 +61,29 @@ class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
     static std::string to_string(slice_node const& node);
 
     typed_primitive_inst(network& network, slice_node const& desc);
+    void update_shape_info_tensor(const kernel_impl_params& params) override;
 };
 
 using slice_inst = typed_primitive_inst<slice>;
 
+///////////////////////////////////////////////////////////////////
+//
+// INLINES:
+//
+///////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////
+inline const std::vector<size_t>& SliceKernelRefNeededInputs::GetNeededInputIndexes() const {
+    return neededIndexes;
+}
+
+///////////////////////////////////////////////////////////////////
+inline bool SliceKernelRefNeededInputs::IsInputNeededInRuntime(InputIndices type) const {
+    for (auto idx : neededIndexes) {
+        if (idx == type)
+            return true;
+    }
+    return false;
+}
+
 } // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/slice.cpp b/src/plugins/intel_gpu/src/graph/slice.cpp
index 4ac47b63c2e66f..84e57ea10ca0d0 100644
--- a/src/plugins/intel_gpu/src/graph/slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/slice.cpp
@@ -10,6 +10,30 @@
 #include <json_object.h>
 
 namespace cldnn {
+
+SliceKernelRefNeededInputs SliceKernelRefNeededInputs::Create(const slice_node& node) {
+    SliceKernelRefNeededInputs inputs;
+
+    const auto& node_inputs = node.get_dependencies();
+
+    const bool axes_in_runtime =
+        ((node_inputs.size() == InputIndices::kInputsNum) && !node_inputs[InputIndices::kAxes].first->is_constant());
+    const bool start_in_runtime = !node_inputs[InputIndices::kStart].first->is_constant();
+    const bool step_in_runtime = !node_inputs[InputIndices::kStep].first->is_constant();
+
+    inputs.neededIndexes.push_back(InputIndices::kData);
+    if (start_in_runtime)
+        inputs.neededIndexes.push_back(InputIndices::kStart);
+    if (step_in_runtime)
+        inputs.neededIndexes.push_back(InputIndices::kStep);
+    if (axes_in_runtime)
+        inputs.neededIndexes.push_back(InputIndices::kAxes);
+
+    // NOTE: stop is never needed as it is passed implicitely via output shape.
+
+    return inputs;
+}
+
 GPU_DEFINE_PRIMITIVE_TYPE_ID(slice)
 
 slice_inst::typed_primitive_inst(network& network, slice_node const& node)
@@ -19,18 +43,33 @@ layout slice_inst::calc_output_layout(slice_node const& node, kernel_impl_params
     return calc_output_layouts<ov::PartialShape>(node, impl_param)[0];
 }
 
-template<typename ShapeType>
+template <typename ShapeType>
 inline std::vector<layout> slice_inst::calc_output_layouts(const slice_node&, const kernel_impl_params& impl_param) {
     std::vector<ShapeType> input_shapes{impl_param.input_layouts[0].get<ShapeType>()};
     std::unordered_map<size_t, ov::Tensor> const_data;
     for (std::size_t i = 1; i < impl_param.input_layouts.size(); i++) {
-        const auto shape_len = shape_size(impl_param.input_layouts[i].get<ShapeType>().to_shape());
-        const ov::PartialShape input_shape{static_cast<ov::Dimension::value_type>(shape_len)};
+        // NOTE: This code effectively makes a reshape operation on tensors start,
+        // stop, step and axes. The specification of Slice operator clearly says
+        // that those tensors are 1D tensors - and this is what is expected
+        // in shape_infer(). However, people in tests and other places,
+        // put 4D tensors instead of 1D(e.g. [4,1,1,1] instead of [4]).
+        // At the time of writing this comment - the hack for such situation
+        // was already there, so adding an ASSERT will effectively make
+        // some tests and graph transformations fail.
+        // There should be some kind of warning to the user about it, but AFAIK
+        // we don't have warning logs that could be enabled/disabled without
+        // affecting performance...
+        ov::PartialShape input_shape = ov::PartialShape::dynamic(1);
+        if (impl_param.memory_deps.find(i) != impl_param.memory_deps.end()) {
+            auto gpu_mem = impl_param.memory_deps.at(i);
+            input_shape = {static_cast<ov::Dimension::value_type>(gpu_mem->count())};
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> gpu_mem_lock(gpu_mem, impl_param.get_stream());
+            const_data.emplace(
+                i,
+                make_tensor(layout{input_shape, gpu_mem->get_layout().data_type, gpu_mem->get_layout().format},
+                            gpu_mem_lock.data()));
+        }
         input_shapes.push_back(input_shape);
-        auto gpu_mem = impl_param.memory_deps.at(i);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> gpu_mem_lock(gpu_mem, impl_param.get_stream());
-        const_data.emplace(i, make_tensor(layout {input_shape, gpu_mem->get_layout().data_type, gpu_mem->get_layout().format },
-            gpu_mem_lock.data()));
     }
     ov::op::v8::Slice op;
     auto output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
@@ -42,7 +81,6 @@ inline std::vector<layout> slice_inst::calc_output_layouts(const slice_node&, co
     return output_layouts;
 }
 
-
 std::string slice_inst::to_string(slice_node const& node) {
     auto node_info = node.desc_to_json();
     json_composite slice_info;
@@ -57,4 +95,24 @@ std::string slice_inst::to_string(slice_node const& node) {
     return primitive_description.str();
 }
 
+void slice_inst::update_shape_info_tensor(const kernel_impl_params& params) {
+    mem_lock<int32_t> lock(_shape_info_memory, _network.get_stream());
+    auto shape_info_ptr = lock.data();
+    size_t offset = 0;
+    const SliceKernelRefNeededInputs inputs = SliceKernelRefNeededInputs::Create(*_node);
+
+    for (auto idx : inputs.GetNeededInputIndexes()) {
+        GPU_DEBUG_TRACE_DETAIL << id() << " : update shape_info for input[" << idx << "]" << std::endl;
+        const auto& node_in_lay = _node->get_input_layout(idx);
+        const auto& runtime_in_lay = params.input_layouts[idx];
+        fill_shape_info_data(runtime_in_lay, node_in_lay, shape_info_ptr, offset);
+    }
+    for (size_t i = 0; i < _node->get_output_layouts().size(); i++) {
+        GPU_DEBUG_TRACE_DETAIL << id() << " : update shape_info for output[" << i << "]" << std::endl;
+        const auto& node_out_lay = _node->get_output_layout(i);
+        const auto& runtime_out_lay = params.output_layouts[i];
+        fill_shape_info_data(runtime_out_lay, node_out_lay, shape_info_ptr, offset);
+    }
+}
+
 } // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl
index f90254ce89f0e4..a67aa925060c73 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl
@@ -4,33 +4,88 @@
 
 #include "include/batch_headers/fetch_data.cl"
 
-KERNEL(slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
+#define BRING_INTO_RANGE(VAL, MAX) \
+    clamp((long)VAL < 0l ? (long)VAL + (long)MAX : (long)VAL, 0l, (long)MAX-1l);
+
+#if INPUT0_DIMS < 5
+#define LOAD_BUFFER(in_prefix, out_name)  \
+    long out_name[INPUT0_DIMS];           \
+    out_name[0] = in_prefix##_VAL0;       \
+    out_name[1] = in_prefix##_VAL1;       \
+    out_name[2] = in_prefix##_VAL2;       \
+    out_name[3] = in_prefix##_VAL3;
+#else
+#define LOAD_BUFFER(in_prefix, out_name)  \
+    long out_name[INPUT0_DIMS];           \
+    out_name[0] = in_prefix##_VAL0;       \
+    out_name[1] = in_prefix##_VAL1;       \
+    out_name[2] = in_prefix##_VAL2;       \
+    out_name[3] = in_prefix##_VAL3;       \
+    out_name[4] = in_prefix##_VAL4;
+#endif
+
+KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG 
+                  const __global INPUT0_TYPE* restrict input,
+                  START_BUFFER
+                  STEP_BUFFER
+                  AXES_BUFFER
+                  __global OUTPUT_TYPE* restrict output)
 {
-    const uint batch = get_global_id(0);
-    const uint feature = get_global_id(1);
+    LOAD_BUFFER(START, start_buff);
+    LOAD_BUFFER(STEP, step_buff);
+    LOAD_BUFFER(AXES, axes_buff);
+
+    long slice_step[INPUT0_DIMS];
+    long slice_start[INPUT0_DIMS];
+
+    unroll_for(int i = 0; i < INPUT0_DIMS; ++i) {
+        slice_step[i] = 1;
+        slice_start[i] = 0;
+    }
+
+    unroll_for(int i = 0; i < AXES_BUFFER_SIZE; ++i) {
+        const long axis = axes_buff[i];
+        slice_step[axis] = step_buff[i];
+        slice_start[axis] = start_buff[i];
+    }
+
+    const long output_dim0 = get_global_id(0);
+    const long output_dim1 = get_global_id(1);
+    const long slice_begin_dim0 = BRING_INTO_RANGE(slice_start[0], INPUT0_BATCH_NUM);
+    const long slice_begin_dim1 = BRING_INTO_RANGE(slice_start[1], INPUT0_FEATURE_NUM);
+
 #if INPUT0_DIMS <= 4
-    const uint xy = get_global_id(2);
-    const uint y = xy / OUTPUT_SIZE_X;
-    const uint x = xy % OUTPUT_SIZE_X;
-    const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x);
-    const uint input_index = INPUT0_GET_INDEX(
-        SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
-        SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
-        SLICE_BEGIN_Y + y * SLICE_STEP_Y,
-        SLICE_BEGIN_X + x * SLICE_STEP_X);
+    const long slice_begin_dim2 = BRING_INTO_RANGE(slice_start[2], INPUT0_SIZE_Y);
+    const long slice_begin_dim3 = BRING_INTO_RANGE(slice_start[3], INPUT0_SIZE_X);
+    const long output_dim23 = get_global_id(2);
+    const long output_dim2 = output_dim23 / OUTPUT_SIZE_X;
+    const long output_dim3 = output_dim23 % OUTPUT_SIZE_X;
+    const long output_index = OUTPUT_GET_INDEX(output_dim0, output_dim1, output_dim2, output_dim3);
+    const long input_index = INPUT0_GET_INDEX(
+        slice_begin_dim0 + output_dim0 * slice_step[0],
+        slice_begin_dim1 + output_dim1 * slice_step[1],
+        slice_begin_dim2 + output_dim2 * slice_step[2],
+        slice_begin_dim3 + output_dim3 * slice_step[3]);
 #elif INPUT0_DIMS == 5
-    const uint xyz = get_global_id(2);
-    const uint yx = xyz % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
-    const uint z = xyz / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
-    const uint y = yx / OUTPUT_SIZE_X;
-    const uint x = yx % OUTPUT_SIZE_X;
-    const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x);
-    const uint input_index = INPUT0_GET_INDEX(
-        SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
-        SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
-        SLICE_BEGIN_Z + z * SLICE_STEP_Z,
-        SLICE_BEGIN_Y + y * SLICE_STEP_Y,
-        SLICE_BEGIN_X + x * SLICE_STEP_X);
+    const long slice_begin_dim2 = BRING_INTO_RANGE(slice_start[2], INPUT0_SIZE_Z);
+    const long slice_begin_dim3 = BRING_INTO_RANGE(slice_start[3], INPUT0_SIZE_Y);
+    const long slice_begin_dim4 = BRING_INTO_RANGE(slice_start[4], INPUT0_SIZE_X);
+    const long output_dim234 = get_global_id(2);
+    const long output_dim34 = output_dim234 % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const long output_dim2 = output_dim234 / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const long output_dim3 = output_dim34 / OUTPUT_SIZE_X;
+    const long output_dim4 = output_dim34 % OUTPUT_SIZE_X;
+    const long output_index = OUTPUT_GET_INDEX(output_dim0, output_dim1, output_dim2, output_dim3, output_dim4);
+    const long input_index = INPUT0_GET_INDEX(
+        slice_begin_dim0 + output_dim0 * slice_step[0],
+        slice_begin_dim1 + output_dim1 * slice_step[1],
+        slice_begin_dim2 + output_dim2 * slice_step[2],
+        slice_begin_dim3 + output_dim3 * slice_step[3],
+        slice_begin_dim4 + output_dim4 * slice_step[4]);
 #endif
+
     output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
 }
+
+#undef LOAD_BUFFER;
+#undef BRING_INTO_RANGE;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp
index 1952afa5378bb3..312cd0fa2f7c2c 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp
@@ -7,23 +7,49 @@
 #include <vector>
 
 namespace {
-
-void addJitConstantsForAttribute(kernel_selector::JitConstants &jit,
-        const std::string &name, const std::vector<std::int32_t> &attribute) {
+static constexpr size_t MAX_SUPPORTED_DIM = 5;
+static constexpr char JIT_AXES_BUFF_SIZE_NAME[] = "AXES_BUFFER_SIZE";
+
+// Generates macros:
+// - name_BUFFER
+// - name_VAL0, name_VAL1 ...
+void addJitConstantsForParam(kernel_selector::JitConstants& jit,
+                             const std::string& name,
+                             const std::vector<std::int64_t>& compile_time_param,
+                             kernel_selector::Datatype type,
+                             const std::function<std::string(std::string, size_t)>& dynamic_access_decorator) {
     using namespace kernel_selector;
-    jit.AddConstant(MakeJitConstant(name + "_BATCH", attribute[0]));
-    jit.AddConstant(MakeJitConstant(name + "_FEATURE", attribute[1]));
-    if (attribute.size() == 5) {  // BFZYX
-        jit.AddConstant(MakeJitConstant(name + "_Z", attribute[2]));
-        jit.AddConstant(MakeJitConstant(name + "_Y", attribute[3]));
-        jit.AddConstant(MakeJitConstant(name + "_X", attribute[4]));
-    } else {  // BFYX
-        jit.AddConstant(MakeJitConstant(name + "_Y", attribute[2]));
-        jit.AddConstant(MakeJitConstant(name + "_X", attribute[3]));
+    const std::string BUFF_CONST_NAME = name + "_BUFFER";
+    const std::string BUFF_PTR_NAME = name + "_buffer_ptr";
+    const auto jit_name_decorator = [](std::string name, size_t i) {
+        return name + "_VAL" + std::to_string(i);
+    };
+
+    if (compile_time_param.empty()) {
+        // Dynamic param:
+        const std::string type_str = toCLType(type);
+        jit.AddConstant(
+            MakeJitConstant(BUFF_CONST_NAME, "__global const " + type_str + "* restrict " + BUFF_PTR_NAME + ","));
+
+        for (size_t i = 0; i < MAX_SUPPORTED_DIM; ++i) {
+            const std::string i_str = std::to_string(i);
+            const std::string jit_name = jit_name_decorator(name, i);
+            const std::string access_str = dynamic_access_decorator(BUFF_PTR_NAME, i);
+            jit.AddConstant(
+                MakeJitConstant(jit_name, i_str + " < " + JIT_AXES_BUFF_SIZE_NAME + " ? (" + access_str + ") : -1"));
+        }
+    } else {
+        // Static param:
+        jit.AddConstant(MakeJitConstant(BUFF_CONST_NAME, ""));
+        for (size_t i = 0; i < MAX_SUPPORTED_DIM; ++i) {
+            const std::string jit_name = jit_name_decorator(name, i);
+            const int64_t val = i < compile_time_param.size() ? compile_time_param[i] : -1;
+            jit.AddConstant(MakeJitConstant(jit_name, val));
+        }
     }
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 namespace kernel_selector {
 
@@ -39,8 +65,11 @@ KernelsData SliceKernelRef::GetKernelsData(const Params &params) const {
     auto slice_specific_jit = GetJitConstants(new_params);
     auto jit = CreateJit(kernelName, slice_specific_jit, entry_point);
 
-    FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo,
-            kernelName, jit, entry_point);
+    GetUpdateDispatchDataFunc(kernel_data);
+
+    FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo, kernelName, jit, entry_point,
+                     "", false, false, static_cast<int>(new_params.inputs.size()),
+                     0, 1, new_params.has_dynamic_tensors());
 
     return {kernel_data};
 }
@@ -68,6 +97,8 @@ ParamsKey SliceKernelRef::GetSupportedKey() const {
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
+    k.EnableDynamicShapesSupport();
+    k.EnableDifferentTypes();
     return k;
 }
 
@@ -80,17 +111,37 @@ bool SliceKernelRef::Validate(const Params &p) const {
     if (params.inputs.empty())
         return false;
 
-    if (params.outputs[0].Dimentions() > 5 || params.inputs[0].Dimentions() > 5)
+    if (params.outputs[0].Dimentions() > MAX_SUPPORTED_DIM || params.inputs[0].Dimentions() > MAX_SUPPORTED_DIM)
         return false;
 
     return true;
 }
 
-JitConstants SliceKernelRef::GetJitConstants(const slice_params &params) const {
+JitConstants SliceKernelRef::GetJitConstants(const slice_params& params) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
-    addJitConstantsForAttribute(jit, "SLICE_BEGIN", params.start);
-    addJitConstantsForAttribute(jit, "SLICE_END", params.end);
-    addJitConstantsForAttribute(jit, "SLICE_STEP", params.step);
+
+    // Define axes size as constant:
+    if (params.compile_time_axes.empty()) {
+        kernel_selector::DimensionAccessHelper dims(params.inputs.back());
+        jit.AddConstant(MakeJitConstant(JIT_AXES_BUFF_SIZE_NAME,
+                                        toVectorMulString({dims.b(), dims.f(), dims.x(), dims.y(), dims.z()})));
+    } else {
+        jit.AddConstant(MakeJitConstant(JIT_AXES_BUFF_SIZE_NAME, params.compile_time_axes.size()));
+    }
+
+    // Prepare axes, start and step params:
+    const auto axes_decorator = [](std::string name, size_t i) {
+        const std::string i_str = std::to_string(i);
+        return name + "[" + i_str + "] < 0 ? INPUT0_DIMS + " + name + "[" + i_str + "] : " + name + "[" + i_str + "]";
+    };
+    addJitConstantsForParam(jit, "AXES", params.compile_time_axes, params.axes_data_type, axes_decorator);
+
+    const auto default_decorator = [](std::string name, size_t i) {
+        return name + "[" + std::to_string(i) + "]";
+    };
+    addJitConstantsForParam(jit, "START", params.compile_time_start, params.start_data_type, default_decorator);
+    addJitConstantsForParam(jit, "STEP", params.compile_time_step, params.step_data_type, default_decorator);
+
     return jit;
 }
 
@@ -105,4 +156,15 @@ CommonDispatchData SliceKernelRef::SetDefault(const slice_params &params) const
     return dispatchData;
 }
 
+void SliceKernelRef::GetUpdateDispatchDataFunc(KernelData& kd) const {
+    kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
+        const auto& prim_params = static_cast<const slice_params&>(params);
+        auto dispatchData = SetDefault(prim_params);
+        OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
+        kd.kernels[0].params.workGroups.global = dispatchData.gws;
+        kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
+    };
+}
+
 } // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h
index 67449aed56b22b..b1d331cc94921e 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h
@@ -12,9 +12,12 @@ namespace kernel_selector {
 struct slice_params: public base_params {
     slice_params() : base_params(KernelType::SLICE) {}
 
-    std::vector<std::int32_t> start;
-    std::vector<std::int32_t> end;
-    std::vector<std::int32_t> step;
+    std::vector<std::int64_t> compile_time_start;
+    std::vector<std::int64_t> compile_time_step;
+    std::vector<std::int64_t> compile_time_axes;
+    kernel_selector::Datatype start_data_type;
+    kernel_selector::Datatype step_data_type;
+    kernel_selector::Datatype axes_data_type;
 };
 
 class SliceKernelRef: public KernelBaseOpenCL {
@@ -30,6 +33,7 @@ class SliceKernelRef: public KernelBaseOpenCL {
 private:
     JitConstants GetJitConstants(const slice_params &params) const;
     CommonDispatchData SetDefault(const slice_params &params) const;
+    void GetUpdateDispatchDataFunc(KernelData& kd) const override;
 };
 
 } // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp
index 19d386e8a8351b..1f0381ef95100e 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp
@@ -46,5 +46,20 @@ INSTANTIATE_TEST_SUITE_P(
             ::testing::Values(ov::test::utils::DEVICE_GPU)),
         Slice8LayerTest::getTestCaseName);
 
+std::vector<Slice8SpecificParams> dynamic_params = {
+        Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
+        Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
+        Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
+        Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
+        Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)},
+            {{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_GPU_dynamic, Slice8LayerTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(dynamic_params),
+            ::testing::ValuesIn(types),
+            ::testing::Values(ov::test::utils::DEVICE_GPU)),
+        Slice8LayerTest::getTestCaseName);
 
 }  // namespace
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp
index c33a287d2e4ddb..3cff74940daf57 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp
@@ -46,7 +46,14 @@ struct SliceTestParams {
     memory::ptr start;
     memory::ptr stop;
     memory::ptr step;
+    memory::ptr axes;
     memory::ptr wanted_output;
+    bool is_input_dynamic = false;
+    bool is_start_dynamic = false;
+    bool is_stop_dynamic = false;
+    bool is_step_dynamic = false;
+    bool is_axes_dynamic = false;
+    bool is_caching_test = false;
 };
 
 template<typename T>
@@ -54,8 +61,7 @@ class SliceTest : public ::testing::Test {
 public:
     // Runs all test cases for given params.
     void RunAllTestCasesForParams(const SliceTestParams& params) {
-        RunTestCase(params, false);
-        RunTestCase(params, true);
+        RunTestCase(params);
     }
 
     // Allocates tensoer with given shape and data.
@@ -69,25 +75,83 @@ class SliceTest : public ::testing::Test {
         return tensor;
     }
 
+    template<typename TypeParam>
+    void FillWithBasicBfyxPositiveStepAxesLessThanRankData(SliceTestParams& params) {
+        const ov::PartialShape input_shape{ 1, 2, 12, 100 };
+        params.input = this->template AllocateTensor<TypeParam>(
+            input_shape, format::bfyx, helpers::GenInput<TypeParam>(input_shape));
+        params.start = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 1, 1, 0 });
+        params.stop = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 2, 120, 5 });
+        params.step = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 1, 10, 1 });
+        params.axes = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 1, 3, 2 });
+        params.wanted_output = this->template AllocateTensor<TypeParam>(
+            ov::PartialShape{ 1, 1, 5, 10 }, format::bfyx, { 
+                1201, 1211, 1221, 1231, 1241, 1251, 1261, 1271, 1281, 1291,
+                1301, 1311, 1321, 1331, 1341, 1351, 1361, 1371, 1381, 1391,
+                1401, 1411, 1421, 1431, 1441, 1451, 1461, 1471, 1481, 1491,
+                1501, 1511, 1521, 1531, 1541, 1551, 1561, 1571, 1581, 1591,
+                1601, 1611, 1621, 1631, 1641, 1651, 1661, 1671, 1681, 1691,
+            });
+    }
+
 private:
+    void SetParameterInput(const std::string& name, topology& topology, const memory::ptr& data_ptr, bool is_dynamic ) {
+        if(is_dynamic) {
+            auto dynamic_shape = data_ptr->get_layout();
+            dynamic_shape.set_partial_shape(ov::PartialShape::dynamic(dynamic_shape.get_rank()));
+            topology.add(input_layout(name, dynamic_shape));
+        } else {
+            topology.add(data(name, data_ptr));
+        }
+    }
+
     // Runs single tests case for given params.
-    void RunTestCase(const SliceTestParams& params, bool is_caching_test) {
+    void RunTestCase(const SliceTestParams& params) {
+
+        auto dynamic_input = params.input->get_layout();
+        dynamic_input.set_partial_shape(ov::PartialShape::dynamic(dynamic_input.get_rank()));
         topology topology;
-        topology.add(input_layout("input", params.input->get_layout()));
-        topology.add(data("start", params.start));
-        topology.add(data("stop", params.stop));
-        topology.add(data("step", params.step));
+        topology.add(input_layout("input", params.is_input_dynamic ? dynamic_input : params.input->get_layout()));
+
+        SetParameterInput("start", topology, params.start, params.is_start_dynamic);
+        SetParameterInput("stop", topology, params.stop, params.is_stop_dynamic);
+        SetParameterInput("step", topology, params.step, params.is_step_dynamic);
+
+        if(params.axes) {
+            SetParameterInput("axes", topology, params.axes, params.is_axes_dynamic);
+        }
+
         std::vector<input_info> inputs{input_info("input"),
                                        input_info("start"),
                                        input_info("stop"),
                                        input_info("step")};
+        if (params.axes) {
+            inputs.push_back(input_info("axes"));
+        }
         topology.add(slice("slice", inputs));
 
         ExecutionConfig config = get_test_default_config(engine_);
+        config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+
         cldnn::network::ptr network =
-            get_network(engine_, topology, config, get_test_stream_ptr(), is_caching_test);
+            get_network(engine_, topology, config, get_test_stream_ptr(), params.is_caching_test);
 
         network->set_input_data("input", params.input);
+
+        if (params.is_start_dynamic)
+            network->set_input_data("start", params.start);
+        if (params.is_stop_dynamic)
+            network->set_input_data("stop", params.stop);
+        if (params.is_step_dynamic)
+            network->set_input_data("step", params.step);
+        if(params.axes && params.is_axes_dynamic) {
+             network->set_input_data("axes", params.axes);
+        }
+
         auto outputs = network->execute();
 
         ASSERT_EQ(outputs.size(), size_t(1));
@@ -113,24 +177,105 @@ TYPED_TEST_SUITE(SliceTest, DataTypes);
 
 TYPED_TEST(SliceTest, bfyx_positive_step) {
     SliceTestParams params;
-    const ov::PartialShape input_shape{ 1, 2, 12, 100 };
-    params.input = this->template AllocateTensor<TypeParam>(
-        input_shape, format::bfyx, helpers::GenInput<TypeParam>(input_shape));
-    params.start = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 0, 1, 0, 1 });
-    params.stop = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 2, 5, 100 });
-    params.step = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 1, 1, 10 });
-    params.wanted_output = this->template AllocateTensor<TypeParam>(
-        ov::PartialShape{ 1, 1, 5, 10 }, format::bfyx, { 
-            1201, 1211, 1221, 1231, 1241, 1251, 1261, 1271, 1281, 1291,
-            1301, 1311, 1321, 1331, 1341, 1351, 1361, 1371, 1381, 1391,
-            1401, 1411, 1421, 1431, 1441, 1451, 1461, 1471, 1481, 1491,
-            1501, 1511, 1521, 1531, 1541, 1551, 1561, 1571, 1581, 1591,
-            1601, 1611, 1621, 1631, 1641, 1651, 1661, 1671, 1681, 1691,
-        });
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    this->RunAllTestCasesForParams(params);
+}
 
+TYPED_TEST(SliceTest, bfyx_positive_step_all_static_caching) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_caching_test = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, bfyx_positive_step_all_dynamic_caching) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_start_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    params.is_axes_dynamic = true;
+    params.is_caching_test = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, stop_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_step_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, stop_step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_stop_step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, start_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_start_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_start_stop_step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_start_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, axes_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_axes_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_axes_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_axes_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_step_axes_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_axes_dynamic = true;
     this->RunAllTestCasesForParams(params);
 }
 
@@ -140,11 +285,11 @@ TYPED_TEST(SliceTest, bfyx_negative_step) {
     params.input = this->template AllocateTensor<TypeParam>(
         input_shape, format::bfyx, helpers::GenInput<TypeParam>(input_shape));
     params.start = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 0, 1, 5, 90 });
+        ov::PartialShape{ 4 }, format::bfyx, { 0, 1, 5, 90 });
     params.stop = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 0, 0, 10 });
+        ov::PartialShape{ 4 }, format::bfyx, { 1, 0, 0, 10 });
     params.step = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, -1, -1, -10 });
+        ov::PartialShape{ 4 }, format::bfyx, { 1, -1, -1, -10 });
     params.wanted_output = this->template AllocateTensor<TypeParam>(
         ov::PartialShape{ 1, 1, 5, 8 }, format::bfyx, { 
             1789, 1779, 1769, 1759, 1749, 1739, 1729, 1719,
@@ -163,17 +308,26 @@ TYPED_TEST(SliceTest, bfzyx) {
     params.input = this->template AllocateTensor<TypeParam>(
         input_shape, format::bfzyx, helpers::GenInput<TypeParam>(input_shape));
     params.start = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 0, 0, 0, 0, 0 });
+        ov::PartialShape{ 5 }, format::bfzyx, { 0, 0, 0, 0, 0 });
     params.stop = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 1, 2, 2, 2, 2 });
+        ov::PartialShape{ 5 }, format::bfzyx, { 1, 2, 2, 2, 2 });
     params.step = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 1, 1, 1, 1, 1 });
+        ov::PartialShape{ 5 }, format::bfzyx, { 1, 1, 1, 1, 1 });
     params.wanted_output = this->template AllocateTensor<TypeParam>(
         ov::PartialShape{ 1, 2, 2, 2, 2 }, format::bfzyx, { 
             0,   1,   5,   6,   60,  61,  65,  66,
             600, 601, 605, 606, 660, 661, 665, 666
         });
 
+    params.is_caching_test = true;
+
+    this->RunAllTestCasesForParams(params);
+    params.is_input_dynamic = true;
+    params.is_start_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    params.is_axes_dynamic = true;
+
     this->RunAllTestCasesForParams(params);
 }
 

From 50e503892755fcb00d1e48b6be82dd863fa718f0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Mar 2024 08:55:51 +0000
Subject: [PATCH 11/14] Bump awalsh128/cache-apt-pkgs-action from 1.4.1 to
 1.4.2 (#23225)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[awalsh128/cache-apt-pkgs-action](https://github.com/awalsh128/cache-apt-pkgs-action)
from 1.4.1 to 1.4.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/awalsh128/cache-apt-pkgs-action/releases">awalsh128/cache-apt-pkgs-action's
releases</a>.</em></p>
<blockquote>
<h2>v1.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add arm64 binary by <a
href="https://github.com/koppor"><code>@​koppor</code></a> in <a
href="https://redirect.github.com/awalsh128/cache-apt-pkgs-action/pull/127">awalsh128/cache-apt-pkgs-action#127</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/koppor"><code>@​koppor</code></a> made
their first contribution in <a
href="https://redirect.github.com/awalsh128/cache-apt-pkgs-action/pull/127">awalsh128/cache-apt-pkgs-action#127</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1...v1.4.2">https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1...v1.4.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/awalsh128/cache-apt-pkgs-action/commit/a6c3917cc929dd0345bfb2d3feaf9101823370ad"><code>a6c3917</code></a>
Add arm64 binary (<a
href="https://redirect.github.com/awalsh128/cache-apt-pkgs-action/issues/127">#127</a>)</li>
<li>See full diff in <a
href="https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1.4.1...v1.4.2">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=awalsh128/cache-apt-pkgs-action&package-manager=github_actions&previous-version=1.4.1&new-version=1.4.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_doc.yml     | 2 +-
 .github/workflows/code_snippets.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
index 5fc223d9746bdf..fe52646914a40b 100644
--- a/.github/workflows/build_doc.yml
+++ b/.github/workflows/build_doc.yml
@@ -21,7 +21,7 @@ jobs:
           lfs: 'true'
 
       - name: Install apt-get dependencies
-        uses: awalsh128/cache-apt-pkgs-action@v1.4.1
+        uses: awalsh128/cache-apt-pkgs-action@v1.4.2
         with:
           packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9
           version: 3.0
diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml
index 5181e26f378da0..856f85afa29961 100644
--- a/.github/workflows/code_snippets.yml
+++ b/.github/workflows/code_snippets.yml
@@ -30,7 +30,7 @@ jobs:
           submodules: 'true'
 
       - name: Install OpenCL
-        uses: awalsh128/cache-apt-pkgs-action@v1.4.1
+        uses: awalsh128/cache-apt-pkgs-action@v1.4.2
         if: runner.os == 'Linux'
         with:
           packages: ocl-icd-opencl-dev opencl-headers

From caf9148bc8c3530cd8b6d4711f79f1c329644815 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Mon, 4 Mar 2024 10:50:54 +0100
Subject: [PATCH 12/14] [TEMPLATE] GridSample in template plugin supports more
 precisions (#23052)

### Details:
 - Add more precisions support for `GridSample` in Template plugin.

### Tickets:
 - [CVS-133057](https://jira.devtools.intel.com/browse/CVS-133057)
---
 .../openvino/reference/grid_sample.hpp        |   4 +-
 .../template/backend/ops/grid_sample.cpp      |  60 ++-
 .../functional/op_reference/grid_sample.cpp   | 478 ++++++++++--------
 3 files changed, 308 insertions(+), 234 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/grid_sample.hpp b/src/core/reference/include/openvino/reference/grid_sample.hpp
index 6c765881e536db..7daffa1011f373 100644
--- a/src/core/reference/include/openvino/reference/grid_sample.hpp
+++ b/src/core/reference/include/openvino/reference/grid_sample.hpp
@@ -141,8 +141,8 @@ DATA_ET bilinear(const DATA_ET* data,
     const auto x_d = denormalize(x_n, data_shape[3]);
     const auto y_topleft = std::floor(y_d);
     const auto x_topleft = std::floor(x_d);
-    const auto dy = y_d - y_topleft;
-    const auto dx = x_d - x_topleft;
+    const auto dy = static_cast<DATA_ET>(y_d - y_topleft);
+    const auto dx = static_cast<DATA_ET>(x_d - x_topleft);
     const auto v00 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft));
     const auto v01 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft + 1));
     const auto v10 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft + 1), static_cast<long>(x_topleft));
diff --git a/src/plugins/template/backend/ops/grid_sample.cpp b/src/plugins/template/backend/ops/grid_sample.cpp
index f47dba333f3c4e..1ea86f9c0ec03b 100644
--- a/src/plugins/template/backend/ops/grid_sample.cpp
+++ b/src/plugins/template/backend/ops/grid_sample.cpp
@@ -6,18 +6,48 @@
 
 #include "evaluate_node.hpp"
 
-template <ov::element::Type_t T>
+template <ov::element::Type_t DATA_ET>
 bool evaluate(const std::shared_ptr<ov::op::v9::GridSample>& op,
               ov::TensorVector& outputs,
               const ov::TensorVector& inputs) {
-    using ET = typename ov::element_type_traits<T>::value_type;
+    using DT = ov::fundamental_type_for<DATA_ET>;
     const auto& attributes = op->get_attributes();
-    ov::element::Type grid_et = op->get_input_element_type(1);
-    switch (grid_et) {
+
+    switch (op->get_input_element_type(1)) {
+    case ov::element::f16:
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::f16>>(),
+                                   inputs[0].get_shape(),
+                                   inputs[1].get_shape(),
+                                   attributes.align_corners,
+                                   attributes.mode,
+                                   attributes.padding_mode);
+        break;
+    case ov::element::bf16:
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::bf16>>(),
+                                   inputs[0].get_shape(),
+                                   inputs[1].get_shape(),
+                                   attributes.align_corners,
+                                   attributes.mode,
+                                   attributes.padding_mode);
+        break;
     case ov::element::f32:
-        ov::reference::grid_sample(outputs[0].data<ET>(),
-                                   inputs[0].data<ET>(),
-                                   inputs[1].data<float>(),
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::f32>>(),
+                                   inputs[0].get_shape(),
+                                   inputs[1].get_shape(),
+                                   attributes.align_corners,
+                                   attributes.mode,
+                                   attributes.padding_mode);
+        break;
+    case ov::element::f64:
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::f64>>(),
                                    inputs[0].get_shape(),
                                    inputs[1].get_shape(),
                                    attributes.align_corners,
@@ -34,23 +64,17 @@ template <>
 bool evaluate_node<ov::op::v9::GridSample>(std::shared_ptr<ov::Node> node,
                                            ov::TensorVector& outputs,
                                            const ov::TensorVector& inputs) {
-    auto element_type = node->get_output_element_type(0);
-    if (ov::is_type<ov::op::v1::Select>(node) || ov::is_type<ov::op::util::BinaryElementwiseComparison>(node))
-        element_type = node->get_input_element_type(1);
-
-    switch (element_type) {
+    switch (node->get_output_element_type(0)) {
     case ov::element::boolean:
         return evaluate<ov::element::boolean>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::bf16:
         return evaluate<ov::element::bf16>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::f16:
         return evaluate<ov::element::f16>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::f64:
-        return evaluate<ov::element::f64>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::f32:
         return evaluate<ov::element::f32>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::i4:
-        return evaluate<ov::element::i4>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
+    case ov::element::f64:
+        return evaluate<ov::element::f64>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::i8:
         return evaluate<ov::element::i8>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::i16:
@@ -59,10 +83,6 @@ bool evaluate_node<ov::op::v9::GridSample>(std::shared_ptr<ov::Node> node,
         return evaluate<ov::element::i32>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::i64:
         return evaluate<ov::element::i64>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::u1:
-        return evaluate<ov::element::u1>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::u4:
-        return evaluate<ov::element::u4>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::u8:
         return evaluate<ov::element::u8>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::u16:
diff --git a/src/plugins/template/tests/functional/op_reference/grid_sample.cpp b/src/plugins/template/tests/functional/op_reference/grid_sample.cpp
index 1923fe011a9f6e..df4f7e5a34845b 100644
--- a/src/plugins/template/tests/functional/op_reference/grid_sample.cpp
+++ b/src/plugins/template/tests/functional/op_reference/grid_sample.cpp
@@ -65,24 +65,33 @@ constexpr auto GS_ZEROS{op::v9::GridSample::PaddingMode::ZEROS};
 constexpr std::array<op::v9::GridSample::PaddingMode, 3> padding_modes{GS_ZEROS, GS_BORDER, GS_REFLECTION};
 constexpr std::array<bool, 2> align_corners_modes{false, true};
 
+std::string param_types_str(const element::Type& data_et, const element::Type& grid_et) {
+    std::stringstream types;
+    types << "_data_et_" << data_et << "_grid_et_" << grid_et;
+    return types.str();
+}
+
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateNearestParamsOddDimensionsInnerGrids() {
     std::vector<GridSampleParams> params;
 
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
-    reference_tests::Tensor grid_inner{
-        {1, 3, 4, 2},
-        element::f32,
-        std::vector<float>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5,
-                           0.5,  -0.5, 0.5,  0.5, -1., -1.,  -1., 1.,  1.,   -1.,  1.,   1.}};
-    reference_tests::Tensor output{{1, 1, 3, 4},
-                                   element::f32,
-                                   std::vector<float>{8, 8, 8, 8, 2, 12, 4, 14, 1, 11, 5, 15}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+    reference_tests::Tensor grid_inner{{1, 3, 4, 2}, GRID_ET, std::vector<GT>{-0.1, -0.1, -0.1, 0.1,  0.1,  -0.1,
+                                                                              0.1,  0.1,  -0.5, -0.5, -0.5, 0.5,
+                                                                              0.5,  -0.5, 0.5,  0.5,  -1.,  -1.,
+                                                                              -1.,  1.,   1.,   -1.,  1.,   1.}};
+    reference_tests::Tensor output{{1, 1, 3, 4}, DATA_ET, std::vector<DT>{8, 8, 8, 8, 2, 12, 4, 14, 1, 11, 5, 15}};
+
     for (const auto& padding : padding_modes) {
         for (const auto align : align_corners_modes) {
             std::stringstream name;
             name << "nearest_" << padding << (align ? "_align" : "_noalign") << "_odd_dims_inner";
+            name << param_types_str(DATA_ET, GRID_ET);
             params.emplace_back(data_odd_dims,
                                 grid_inner,
                                 op::v9::GridSample::Attributes{align, GS_NEAREST, padding},
@@ -93,78 +102,83 @@ std::vector<GridSampleParams> generateNearestParamsOddDimensionsInnerGrids() {
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateNearestParamsOddDimensionsOuterGrids() {
     std::vector<GridSampleParams> params;
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
     reference_tests::Tensor grid_outer{
         {1, 1, 7, 2},
-        element::f32,
-        std::vector<float>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
+        GRID_ET,
+        std::vector<GT>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
+
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{false, GS_NEAREST, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 0}},
-                        "nearest_zeros_noalign_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{0, 0, 0, 0, 0, 0, 0}},
+                        "nearest_zeros_noalign_odd_dims_outer" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{true, GS_NEAREST, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 0}},
-                        "nearest_zeros_align_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, GRID_ET, std::vector<GT>{0, 0, 0, 0, 0, 0, 0}},
+                        "nearest_zeros_align_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{false, GS_NEAREST, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 11, 11, 14, 15, 10, 5}},
-        "nearest_border_noalign_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{false, GS_NEAREST, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 11, 11, 14, 15, 10, 5}},
+                        "nearest_border_noalign_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 6, 11, 14, 15, 10, 5}},
-        "nearest_border_align_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 6, 11, 14, 15, 10, 5}},
+                        "nearest_border_align_odd_dims_outer" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{false, GS_NEAREST, GS_REFLECTION},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{8, 14, 1, 4, 14, 6, 5}},
-                        "nearest_reflection_noalign_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{8, 14, 1, 4, 14, 6, 5}},
+                        "nearest_reflection_noalign_odd_dims_outer" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{true, GS_NEAREST, GS_REFLECTION},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{8, 9, 6, 4, 14, 6, 4}},
-                        "nearest_reflection_align_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{8, 9, 6, 4, 14, 6, 4}},
+                        "nearest_reflection_align_odd_dims_outer" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateNearestParamsEvenDimensions() {
     std::vector<GridSampleParams> params;
-    reference_tests::Tensor data_even_dims{
-        {1, 1, 4, 6},
-        element::f32,
-        std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}};
+    reference_tests::Tensor data_even_dims{{1, 1, 4, 6}, DATA_ET, std::vector<DT>{1,  2,  3,  4,  5,  6,  7,  8,
+                                                                                  9,  10, 11, 12, 13, 14, 15, 16,
+                                                                                  17, 18, 19, 20, 21, 22, 23, 24}};
     reference_tests::Tensor grid_inner{
         {1, 1, 8, 2},
-        element::f32,
-        std::vector<float>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
+        GRID_ET,
+        std::vector<GT>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
 
-    reference_tests::Tensor output_align{{1, 1, 1, 8}, element::f32, std::vector<float>{8, 14, 11, 17, 19, 6, 9, 16}};
-    reference_tests::Tensor output_noalign{{1, 1, 1, 8}, element::f32, std::vector<float>{2, 14, 5, 17, 19, 6, 9, 16}};
-    reference_tests::Tensor output_zeros_noalign{{1, 1, 1, 8},
-                                                 element::f32,
-                                                 std::vector<float>{2, 14, 5, 17, 0, 0, 9, 16}};
+    reference_tests::Tensor output_align{{1, 1, 1, 8}, DATA_ET, std::vector<DT>{8, 14, 11, 17, 19, 6, 9, 16}};
+    reference_tests::Tensor output_noalign{{1, 1, 1, 8}, DATA_ET, std::vector<DT>{2, 14, 5, 17, 19, 6, 9, 16}};
+    reference_tests::Tensor output_zeros_noalign{{1, 1, 1, 8}, DATA_ET, std::vector<DT>{2, 14, 5, 17, 0, 0, 9, 16}};
 
     for (const auto& padding : padding_modes) {
         std::stringstream name1, name2;
         name1 << "nearest_" << padding << "_noalign"
-              << "_even_dims_inner";
+              << "_even_dims_inner" << param_types_str(DATA_ET, GRID_ET);
         params.emplace_back(data_even_dims,
                             grid_inner,
                             op::v9::GridSample::Attributes{false, GS_NEAREST, padding},
@@ -172,7 +186,7 @@ std::vector<GridSampleParams> generateNearestParamsEvenDimensions() {
                             name1.str());
 
         name2 << "nearest_" << padding << "_align"
-              << "_even_dims_inner";
+              << "_even_dims_inner" << param_types_str(DATA_ET, GRID_ET);
         params.emplace_back(data_even_dims,
                             grid_inner,
                             op::v9::GridSample::Attributes{true, GS_NEAREST, padding},
@@ -183,199 +197,209 @@ std::vector<GridSampleParams> generateNearestParamsEvenDimensions() {
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBilinearParamsOddDimensionsInnerGrids() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
-    reference_tests::Tensor grid_inner{
-        {1, 3, 4, 2},
-        element::f32,
-        std::vector<float>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5,
-                           0.5,  -0.5, 0.5,  0.5, -1., -1.,  -1., 1.,  1.,   -1.,  1.,   1.}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+    reference_tests::Tensor grid_inner{{1, 3, 4, 2}, GRID_ET, std::vector<GT>{-0.1, -0.1, -0.1, 0.1,  0.1,  -0.1,
+                                                                              0.1,  0.1,  -0.5, -0.5, -0.5, 0.5,
+                                                                              0.5,  -0.5, 0.5,  0.5,  -1.,  -1.,
+                                                                              -1.,  1.,   1.,   -1.,  1.,   1.}};
 
     reference_tests::Tensor output_align{{1, 1, 3, 4},
-                                         element::f32,
-                                         std::vector<float>{7.3, 8.3, 7.7, 8.7, 4.5, 9.5, 6.5, 11.5, 1, 11, 5, 15}};
+                                         DATA_ET,
+                                         std::vector<DT>{7.3, 8.3, 7.7, 8.7, 4.5, 9.5, 6.5, 11.5, 1, 11, 5, 15}};
     reference_tests::Tensor output_noalign{{1, 1, 3, 4},
-                                           element::f32,
-                                           std::vector<float>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 1, 11, 5, 15}};
+                                           DATA_ET,
+                                           std::vector<DT>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 1, 11, 5, 15}};
     reference_tests::Tensor output_zeros_noalign{
         {1, 1, 3, 4},
-        element::f32,
-        std::vector<float>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 0.25, 2.75, 1.25, 3.75}};
+        DATA_ET,
+        std::vector<DT>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 0.25, 2.75, 1.25, 3.75}};
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
                         output_zeros_noalign,
-                        "bilinear_zeros_noalign_odd_dims_inner");
+                        "bilinear_zeros_noalign_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
                         output_align,
-                        "bilinear_zeros_align_odd_dims_inner");
+                        "bilinear_zeros_align_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
                         output_noalign,
-                        "bilinear_border_noalign_odd_dims_inner");
+                        "bilinear_border_noalign_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
                         output_align,
-                        "bilinear_border_align_odd_dims_inner");
+                        "bilinear_border_align_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
                         output_noalign,
-                        "bilinear_reflection_noalign_odd_dims_inner");
+                        "bilinear_reflection_noalign_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
                         output_align,
-                        "bilinear_reflection_align_odd_dims_inner");
+                        "bilinear_reflection_align_odd_dims_inner" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBilinearParamsOddDimensionsOuterGrids() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
 
     reference_tests::Tensor grid_outer{
         {1, 1, 7, 2},
-        element::f32,
-        std::vector<float>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
+        GRID_ET,
+        std::vector<GT>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 0}},
-                        "bilinear_zeros_noalign_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{0, 0, 0, 0, 0, 0, 0}},
+                        "bilinear_zeros_noalign_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 1.9880099}},
-        "bilinear_zeros_align_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{0, 0, 0, 0, 0, 0, 1.9880099}},
+                        "bilinear_zeros_align_odd_dims_outer" + types_str);
 
     params.emplace_back(
         data_odd_dims,
         grid_outer,
         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 8.775, 11, 14.25, 15, 8.725, 5}},
-        "bilinear_border_noalign_odd_dims_outer");
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 8.775, 11, 14.25, 15, 8.725, 5}},
+        "bilinear_border_noalign_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 7.85, 11, 14, 15, 9.15, 5}},
-        "bilinear_border_align_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 7.85, 11, 14, 15, 9.15, 5}},
+                        "bilinear_border_align_odd_dims_outer" + types_str);
 
     params.emplace_back(
         data_odd_dims,
         grid_outer,
         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
         reference_tests::Tensor{{1, 1, 1, 7},
-                                element::f32,
-                                std::vector<float>{5.9999995, 11.9, 2.7000031, 5.1250005, 13.75, 4.725, 4.7475}},
-        "bilinear_reflection_noalign_odd_dims_outer");
+                                DATA_ET,
+                                std::vector<DT>{5.9999995, 11.9, 2.7000031, 5.1250005, 13.75, 4.725, 4.7475}},
+        "bilinear_reflection_noalign_odd_dims_outer" + types_str);
 
     params.emplace_back(
         data_odd_dims,
         grid_outer,
         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
         reference_tests::Tensor{{1, 1, 1, 7},
-                                element::f32,
-                                std::vector<float>{6.7, 10.75, 3.800002, 6.25, 13.099999, 5.15, 4.4030004}},
-        "bilinear_reflection_align_odd_dims_outer");
+                                DATA_ET,
+                                std::vector<DT>{6.7, 10.75, 3.800002, 6.25, 13.099999, 5.15, 4.4030004}},
+        "bilinear_reflection_align_odd_dims_outer" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBilinearParamsEvenDimensions() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
-    reference_tests::Tensor data_even_dims{
-        {1, 1, 4, 6},
-        element::f32,
-        std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}};
+    reference_tests::Tensor data_even_dims{{1, 1, 4, 6}, DATA_ET, std::vector<DT>{1,  2,  3,  4,  5,  6,  7,  8,
+                                                                                  9,  10, 11, 12, 13, 14, 15, 16,
+                                                                                  17, 18, 19, 20, 21, 22, 23, 24}};
     reference_tests::Tensor grid_inner{
         {1, 1, 8, 2},
-        element::f32,
-        std::vector<float>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
+        GRID_ET,
+        std::vector<GT>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
 
     params.emplace_back(
         data_even_dims,
         grid_inner,
         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{5, 17, 8, 20, 4.75, 1.5, 11, 14}},
-        "bilinear_zeros_noalign_even_dims_inner");
-
-    params.emplace_back(data_even_dims,
-                        grid_inner,
-                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7},
-                                                element::f32,
-                                                std::vector<float>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
-                        "bilinear_zeros_align_even_dims_inner");
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{5, 17, 8, 20, 4.75, 1.5, 11, 14}},
+        "bilinear_zeros_noalign_even_dims_inner" + types_str);
 
     params.emplace_back(
         data_even_dims,
         grid_inner,
-        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{5, 17, 8, 20, 19, 6, 11, 14}},
-        "bilinear_border_noalign_even_dims_inner");
+        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
+        "bilinear_zeros_align_even_dims_inner" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid_inner,
-                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
-                        reference_tests::Tensor{{1, 1, 1, 7},
-                                                element::f32,
-                                                std::vector<float>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
-                        "bilinear_border_align_even_dims_inner");
+                        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{5, 17, 8, 20, 19, 6, 11, 14}},
+                        "bilinear_border_noalign_even_dims_inner" + types_str);
 
     params.emplace_back(
         data_even_dims,
         grid_inner,
-        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{5, 17, 8, 20, 19, 6, 11, 14}},
-        "bilinear_reflection_noalign_even_dims_inner");
+        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
+        "bilinear_border_align_even_dims_inner" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid_inner,
-                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
-                        reference_tests::Tensor{{1, 1, 1, 7},
-                                                element::f32,
-                                                std::vector<float>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
-                        "bilinear_reflection_align_even_dims_inner");
+                        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{5, 17, 8, 20, 19, 6, 11, 14}},
+                        "bilinear_reflection_noalign_even_dims_inner" + types_str);
+
+    params.emplace_back(
+        data_even_dims,
+        grid_inner,
+        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
+        "bilinear_reflection_align_even_dims_inner" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET, class DT = ov::fundamental_type_for<DATA_ET>>
 std::vector<GridSampleParams> generateBicubicParams() {
+    constexpr auto GRID_ET = ov::element::Type_t::f32;
+    using GT = ov::fundamental_type_for<GRID_ET>;
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
     // clang-format off
-    reference_tests::Tensor data_even_dims{{1, 1, 4, 7}, element::f32,
-        std::vector<float>{ 1, 1, 1, 1, 1, 1, 1,
+    reference_tests::Tensor data_even_dims{{1, 1, 4, 7}, DATA_ET,
+            std::vector<DT>{1, 1, 1, 1, 1, 1, 1,
                             1, 2, 2, 2, 2, 2, 1,
                             1, 2, 3, 5, 3, 2, 1,
                             1, 2, 5, 9, 5, 2, 1}};
     reference_tests::Tensor grid{
         {1, 4, 4, 2},
-        element::f32,
-        std::vector<float>{ -0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1,
+        GRID_ET,
+            std::vector<GT>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1,
                             -0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5,
                             -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, 0.9, 0.9,
                             -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21, 37}};
@@ -384,85 +408,90 @@ std::vector<GridSampleParams> generateBicubicParams() {
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_ZEROS},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{2.6663566, 3.527928,   2.6663566,  3.527928,
-                                                                   1.6318359, 2.7156982,  1.6318359,  2.7156982,
-                                                                   0.6378987, 0.57033366, 0.6378987,  0.57033366,
-                                                                   0,        -0.01507522, 0.25528803, 0 }},
-                        "bicubic_zeros_noalign");
+                                                DATA_ET,
+                                                std::vector<DT>{2.6663566, 3.527928,   2.6663566,  3.527928,
+                                                                1.6318359, 2.7156982,  1.6318359,  2.7156982,
+                                                                0.6378987, 0.57033366, 0.6378987,  0.57033366,
+                                                                0,        -0.01507522, 0.25528803, 0 }},
+                        "bicubic_zeros_noalign" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_ZEROS},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.7887204, 3.4506166,  2.7887204, 3.4506166,
-                                                                    1.8481445, 2.7364502,  1.8481445, 2.7364502,
-                                                                    1.2367951, 1.3602872,  1.2367951, 1.3602872,
-                                                                    0,         0.00650583, 1.1182348, 0 }},
-                        "bicubic_zeros_align");
+                                                DATA_ET,
+                                                std::vector<DT>{2.7887204, 3.4506166,  2.7887204, 3.4506166,
+                                                                1.8481445, 2.7364502,  1.8481445, 2.7364502,
+                                                                1.2367951, 1.3602872,  1.2367951, 1.3602872,
+                                                                0,         0.00650583, 1.1182348, 0 }},
+                        "bicubic_zeros_align" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_BORDER},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.6663566, 3.527928,   2.6663566, 3.527928,
-                                                                    1.5380859, 2.4677734,  1.5380859, 2.4677734,
-                                                                    1.0089612, 0.91871876, 1.0089612, 0.91871876,
-                                                                    1,         1,          0.8902873, 1 }},
-                        "bicubic_border_noalign");
+                                                DATA_ET,
+                                                std::vector<DT>{2.6663566, 3.527928,   2.6663566, 3.527928,
+                                                                1.5380859, 2.4677734,  1.5380859, 2.4677734,
+                                                                1.0089612, 0.91871876, 1.0089612, 0.91871876,
+                                                                1,         1,          0.8902873, 1 }},
+                        "bicubic_border_noalign" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_BORDER},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.7887204, 3.4506166, 2.7887204, 3.4506166,
-                                                                    1.8129883, 2.623291,  1.8129883, 2.623291,
-                                                                    1.0363026, 1.1486388, 1.0363026, 1.1486388,
-                                                                    1,         1.0000064, 1.0641243, 1 }},
-                        "bicubic_border_align");
+                                                DATA_ET,
+                                                std::vector<DT>{2.7887204, 3.4506166, 2.7887204, 3.4506166,
+                                                                1.8129883, 2.623291,  1.8129883, 2.623291,
+                                                                1.0363026, 1.1486388, 1.0363026, 1.1486388,
+                                                                1,         1.0000064, 1.0641243, 1 }},
+                        "bicubic_border_align" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_REFLECTION},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.6663566, 3.527928,  2.6663566, 3.527928,
-                                                                    1.5380859, 2.4677734, 1.5380859, 2.4677734,
-                                                                    1.0150609, 0.904375,  1.0150609, 0.904375,
-                                                                    5.48851,   0.898316,  0.8237547, 0.8125 }},
-                        "bicubic_reflection_noalign");
+                                                DATA_ET,
+                                                std::vector<DT>{2.6663566, 3.527928,  2.6663566, 3.527928,
+                                                                1.5380859, 2.4677734, 1.5380859, 2.4677734,
+                                                                1.0150609, 0.904375,  1.0150609, 0.904375,
+                                                                5.48851,   0.898316,  0.8237547, 0.8125 }},
+                        "bicubic_reflection_noalign" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_REFLECTION},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.7887204, 3.4506166, 2.7887204, 3.4506166,
-                                                                    1.7745361, 2.6518555, 1.7745361, 2.6518555,
-                                                                    1.0085088, 1.0307077, 1.0085088, 1.0307077,
-                                                                    5.5649586, 1.0553409, 1.0011607, 1 }},
-                        "bicubic_reflection_align");
+                                                DATA_ET,
+                                                std::vector<DT>{2.7887204, 3.4506166, 2.7887204, 3.4506166,
+                                                                1.7745361, 2.6518555, 1.7745361, 2.6518555,
+                                                                1.0085088, 1.0307077, 1.0085088, 1.0307077,
+                                                                5.5649586, 1.0553409, 1.0011607, 1 }},
+                        "bicubic_reflection_align" + types_str);
     // clang-format on
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBicubicBatchesParams() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
     reference_tests::Tensor data{{2, 2, 4, 3},
-                                 element::f32,
-                                 std::vector<float>{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
-                                                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                                                    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}};
-    reference_tests::Tensor grid{{2, 2, 4, 2},
-                                 element::f32,
-                                 std::vector<float>{-0.1, -0.1, -0.1,  0.1, 0.1,  -0.1,  0.1,   0.1,   -0.5, -0.5, -0.5,
-                                                    0.5,  0.5,  -0.5,  0.5, 0.5,  -0.9,  -0.9,  -0.9,  0.9,  0.9,  -0.9,
-                                                    0.9,  0.9,  -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21,   37}};
+                                 DATA_ET,
+                                 std::vector<DT>{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}};
+    reference_tests::Tensor grid{
+        {2, 2, 4, 2},
+        GRID_ET,
+        std::vector<GT>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5,  -0.5, -0.5, 0.5,   0.5,   -0.5,  0.5, 0.5,
+                        -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, 0.9, 0.9, -1.75, 0.7,  1.33, -1.11, 0.965, 1.007, 21,  37}};
 
     params.emplace_back(
         data,
@@ -470,12 +499,12 @@ std::vector<GridSampleParams> generateBicubicBatchesParams() {
         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_BORDER},
         reference_tests::Tensor{
             {2, 2, 2, 4},
-            element::f32,
-            std::vector<float>{6.0096254, 6.7048755, 6.2951245, 6.9903746, 3.4101562, 8.402344,  4.5976562, 9.589844,
-                               18.009624, 18.704876, 18.295124, 18.990376, 15.410156, 20.402344, 16.597656, 21.589844,
-                               25.415281, 33.735218, 27.26478,  35.58472,  32.884,    26.852259, 35.996872, 36.,
-                               37.41528,  45.735218, 39.264782, 47.58472,  44.884,    38.852257, 47.996872, 48.}},
-        "bicubic_border_align_batches");
+            DATA_ET,
+            std::vector<DT>{6.0096254, 6.7048755, 6.2951245, 6.9903746, 3.4101562, 8.402344,  4.5976562, 9.589844,
+                            18.009624, 18.704876, 18.295124, 18.990376, 15.410156, 20.402344, 16.597656, 21.589844,
+                            25.415281, 33.735218, 27.26478,  35.58472,  32.884,    26.852259, 35.996872, 36.,
+                            37.41528,  45.735218, 39.264782, 47.58472,  44.884,    38.852257, 47.996872, 48.}},
+        "bicubic_border_align_batches" + types_str);
 
     params.emplace_back(
         data,
@@ -483,76 +512,101 @@ std::vector<GridSampleParams> generateBicubicBatchesParams() {
         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_REFLECTION},
         reference_tests::Tensor{
             {2, 2, 2, 4},
-            element::f32,
-            std::vector<float>{5.8170314, 6.7650313, 6.2349687, 7.182969,  2.4101562, 8.972656,  4.0273438, 10.589844,
-                               17.81703,  18.765032, 18.234968, 19.18297,  14.410156, 20.972656, 16.027344, 22.589844,
-                               24.356874, 34.301876, 26.698126, 36.643124, 34.304035, 26.55013,  36.74749,  36.75,
-                               36.356876, 46.301876, 38.698124, 48.643124, 46.304035, 38.55013,  48.74749,  48.75}},
-        "bicubic_reflection_noalign_batches");
+            DATA_ET,
+            std::vector<DT>{5.8170314, 6.7650313, 6.2349687, 7.182969,  2.4101562, 8.972656,  4.0273438, 10.589844,
+                            17.81703,  18.765032, 18.234968, 19.18297,  14.410156, 20.972656, 16.027344, 22.589844,
+                            24.356874, 34.301876, 26.698126, 36.643124, 34.304035, 26.55013,  36.74749,  36.75,
+                            36.356876, 46.301876, 38.698124, 48.643124, 46.304035, 38.55013,  48.74749,  48.75}},
+        "bicubic_reflection_noalign_batches" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateCornerCaseData1x1Params() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
-    const reference_tests::Tensor data{{1, 1, 1, 1}, element::f32, std::vector<float>{7}};
-    const reference_tests::Tensor grid{{1, 1, 5, 2},
-                                       element::f32,
-                                       std::vector<float>{1, -1, 0, 0, -1, 0, 0.5, 0.5, 2, -4}};
-    const reference_tests::Tensor sevens{{1, 1, 1, 5}, element::f32, std::vector<float>{7, 7, 7, 7, 7}};
+    const reference_tests::Tensor data{{1, 1, 1, 1}, DATA_ET, std::vector<DT>{7}};
+    const reference_tests::Tensor grid{{1, 1, 5, 2}, GRID_ET, std::vector<GT>{1, -1, 0, 0, -1, 0, 0.5, 0.5, 2, -4}};
+    const reference_tests::Tensor sevens{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{7, 7, 7, 7, 7}};
 
-    params.emplace_back(
-        data,
-        grid,
-        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector<float>{1.75, 7, 3.5, 3.9375, 0}},
-        "bilinear_zeros_no_align_data1x1");
+    params.emplace_back(data,
+                        grid,
+                        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
+                        reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{1.75, 7, 3.5, 3.9375, 0}},
+                        "bilinear_zeros_no_align_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_NEAREST, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector<float>{7, 7, 7, 7, 0}},
-                        "nearest_zeros_no_align_data1x1");
+                        reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{7, 7, 7, 7, 0}},
+                        "nearest_zeros_no_align_data1x1" + types_str);
 
     params.emplace_back(
         data,
         grid,
         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector<float>{2.4677734, 7, 4.15625, 5.4073334, 0}},
-        "bicubic_zeros_no_align_data1x1");
+        reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{2.4677734, 7, 4.15625, 5.4073334, 0}},
+        "bicubic_zeros_no_align_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_ZEROS},
                         sevens,
-                        "bicubic_zeros_align_data1x1");
+                        "bicubic_zeros_align_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
                         sevens,
-                        "bilinear_reflection_noalign_data1x1");
+                        "bilinear_reflection_noalign_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER},
                         sevens,
-                        "nearest_border_align_data1x1");
+                        "nearest_border_align_data1x1" + types_str);
 
     return params;
 }
 
 std::vector<GridSampleParams> generateGridSampleParams() {
-    std::vector<std::vector<GridSampleParams>> combo_params{generateNearestParamsOddDimensionsInnerGrids(),
-                                                            generateNearestParamsOddDimensionsOuterGrids(),
-                                                            generateNearestParamsEvenDimensions(),
-                                                            generateBilinearParamsOddDimensionsInnerGrids(),
-                                                            generateBilinearParamsOddDimensionsOuterGrids(),
-                                                            generateBilinearParamsEvenDimensions(),
-                                                            generateBicubicParams(),
-                                                            generateBicubicBatchesParams(),
-                                                            generateCornerCaseData1x1Params()};
+    using namespace ov::element;
+    std::vector<std::vector<GridSampleParams>> combo_params{generateNearestParamsOddDimensionsInnerGrids<f32, f32>(),
+                                                            generateNearestParamsOddDimensionsInnerGrids<f32, f16>(),
+                                                            generateNearestParamsOddDimensionsInnerGrids<f16, f32>(),
+
+                                                            generateNearestParamsEvenDimensions<f32, f32>(),
+                                                            generateNearestParamsEvenDimensions<f32, f16>(),
+                                                            generateNearestParamsEvenDimensions<f16, f32>(),
+
+                                                            generateBilinearParamsOddDimensionsInnerGrids<f32, f32>(),
+                                                            generateBilinearParamsOddDimensionsInnerGrids<f32, f16>(),
+                                                            generateBilinearParamsOddDimensionsInnerGrids<f16, f32>(),
+
+                                                            generateBilinearParamsOddDimensionsOuterGrids<f32, f32>(),
+                                                            generateBilinearParamsOddDimensionsOuterGrids<f32, f16>(),
+                                                            generateBilinearParamsOddDimensionsOuterGrids<f16, f32>(),
+
+                                                            generateBilinearParamsEvenDimensions<f32, f32>(),
+                                                            generateBilinearParamsEvenDimensions<f32, f16>(),
+                                                            generateBilinearParamsEvenDimensions<f16, f32>(),
+
+                                                            generateBicubicParams<f32>(),
+                                                            generateBicubicParams<f64>(),
+                                                            generateBicubicParams<f16>(),
+
+                                                            generateBicubicBatchesParams<f32, f32>(),
+                                                            generateBicubicBatchesParams<f32, f16>(),
+                                                            generateBicubicBatchesParams<f16, f32>(),
+
+                                                            generateCornerCaseData1x1Params<f32, f32>(),
+                                                            generateCornerCaseData1x1Params<f32, bf16>(),
+                                                            generateCornerCaseData1x1Params<f32, f16>()};
     std::vector<GridSampleParams> test_params;
     for (auto& params : combo_params)
         std::move(params.begin(), params.end(), std::back_inserter(test_params));

From e2a74956c0d136fcfee4d07488d437dfc3184390 Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Mon, 4 Mar 2024 10:53:09 +0100
Subject: [PATCH 13/14] [Ref] Concat op reference implementation code
 improvements (#23048)

### Details:
- Concat op reference implementation code improvements, removal of code
duplication
- Leftovers from PR:
https://github.com/openvinotoolkit/openvino/pull/22686
- Reduce Concat template code, pass element_type to distinguish copy
method for string
 (element::Type_t::undefined as default to keep compatibility)
- ~Corresponding update of places where reference::concat is used
(including one gpu file)~ (reverted)

(*First approach
(https://github.com/openvinotoolkit/openvino/pull/23048/commits/6308f9d1cfe7e8de5848f7e8e04fb2907b129066)
was to simply introduce common template to reuse the Concat reference
code, but it still results in generation of string and char
specializations of the whole template function*
*Current approach is to use the same function and choose the copy method
inside, based on the element type*)

### Tickets:
 -Related to 131838
---
 .../include/openvino/reference/concat.hpp     | 11 +---
 src/core/reference/src/op/concat.cpp          | 59 ++++++++-----------
 src/core/src/op/concat.cpp                    | 34 ++++-------
 3 files changed, 41 insertions(+), 63 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/concat.hpp b/src/core/reference/include/openvino/reference/concat.hpp
index b7a18ecef5d650..751226d83251f8 100644
--- a/src/core/reference/include/openvino/reference/concat.hpp
+++ b/src/core/reference/include/openvino/reference/concat.hpp
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include "openvino/core/shape.hpp"
+#include "openvino/core/type/element_type.hpp"
 
 namespace ov {
 namespace reference {
@@ -15,14 +16,8 @@ void concat(const std::vector<const char*>& args,
             const std::vector<Shape>& in_shapes,
             const Shape& out_shape,
             int64_t concatenation_axis,
-            size_t elem_size);
-
-void concat(const std::vector<const std::string*>& args,
-            std::string* out,
-            const std::vector<Shape>& in_shapes,
-            const Shape& out_shape,
-            int64_t concatenation_axis,
-            size_t);
+            size_t elem_size,
+            const ov::element::Type& elem_type = ov::element::Type_t::undefined);
 
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/reference/src/op/concat.cpp b/src/core/reference/src/op/concat.cpp
index 81cdbc25e5b805..815925274ea6a5 100644
--- a/src/core/reference/src/op/concat.cpp
+++ b/src/core/reference/src/op/concat.cpp
@@ -17,6 +17,26 @@ std::vector<size_t> calculate_shape_sizes(const std::vector<Shape>& in_shapes) {
     });
     return sizes;
 }
+
+void copy_elements(const char* arg,
+                   char* out,
+                   size_t in_offset,
+                   size_t out_offset,
+                   size_t num_of_elements,
+                   size_t elem_size) {
+    std::memcpy(out + (out_offset * elem_size), arg + (in_offset * elem_size), num_of_elements * elem_size);
+}
+
+void copy_string_elements(const char* arg,
+                          char* out,
+                          size_t in_offset,
+                          size_t out_offset,
+                          size_t num_of_elements,
+                          size_t) {
+    const auto src_begin = std::next(reinterpret_cast<const std::string*>(arg), in_offset);
+    const auto out_ptr = std::next(reinterpret_cast<std::string*>(out), out_offset);
+    std::copy_n(src_begin, num_of_elements, out_ptr);
+}
 }  // namespace
 
 void concat(const std::vector<const char*>& args,
@@ -24,38 +44,12 @@ void concat(const std::vector<const char*>& args,
             const std::vector<Shape>& in_shapes,
             const Shape& out_shape,
             int64_t concatenation_axis,
-            size_t elem_size) {
-    size_t steps = 1;
-    for (int i = 0; i < concatenation_axis; ++i) {
-        steps *= out_shape[i];
-    }
-
+            size_t elem_size,
+            const ov::element::Type& elem_type) {
+    const auto steps = shape_size(out_shape.begin(), out_shape.begin() + concatenation_axis);
     const auto& shape_sizes = calculate_shape_sizes(in_shapes);
 
-    size_t out_offset = 0;
-    for (size_t step = 0; step < steps; ++step) {
-        for (size_t in_index = 0; in_index < args.size(); ++in_index) {
-            const size_t size = shape_sizes[in_index] / steps;
-            const size_t in_offset = step * size;
-
-            std::memcpy(&out[out_offset * elem_size], &args[in_index][in_offset * elem_size], size * elem_size);
-
-            out_offset += size;
-        }
-    }
-}
-
-void concat(const std::vector<const std::string*>& args,
-            std::string* out,
-            const std::vector<Shape>& in_shapes,
-            const Shape& out_shape,
-            int64_t concatenation_axis,
-            size_t) {
-    size_t steps = 1;
-    for (int i = 0; i < concatenation_axis; ++i) {
-        steps *= out_shape[i];
-    }
-    const auto& shape_sizes = calculate_shape_sizes(in_shapes);
+    const auto copy_func = elem_type == ov::element::string ? copy_string_elements : copy_elements;
 
     size_t out_offset = 0;
     for (size_t step = 0; step < steps; ++step) {
@@ -63,14 +57,11 @@ void concat(const std::vector<const std::string*>& args,
             const size_t size = shape_sizes[in_index] / steps;
             const size_t in_offset = step * size;
 
-            const auto src_begin = std::next(args[in_index], in_offset);
-            const auto out_ptr = std::next(out, out_offset);
-            std::copy_n(src_begin, size, out_ptr);
+            copy_func(args[in_index], out, in_offset, out_offset, size, elem_size);
 
             out_offset += size;
         }
     }
 }
-
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp
index 4ec7743d64167e..b670af7d4e03ea 100644
--- a/src/core/src/op/concat.cpp
+++ b/src/core/src/op/concat.cpp
@@ -52,43 +52,35 @@ std::shared_ptr<Node> Concat::clone_with_new_inputs(const OutputVector& new_args
     return std::make_shared<Concat>(new_args, m_axis);
 }
 
-template <typename T>
-void evaluate_concat(const Concat* node, TensorVector& outputs, const TensorVector& inputs) {
+bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
+    OV_OP_SCOPE(v0_Concat_evaluate);
+    OPENVINO_ASSERT(outputs.size() == 1);
+
     const auto inputs_count = inputs.size();
     std::vector<Shape> arg_shapes;
     std::vector<PartialShape> input_shapes;
+    std::vector<const char*> arg_bufs;
     arg_shapes.reserve(inputs_count);
     input_shapes.reserve(inputs_count);
+    arg_bufs.reserve(inputs_count);
 
-    std::vector<const T*> arg_bufs(inputs_count);
-    auto arg_buf = arg_bufs.begin();
     for (auto& input : inputs) {
-        *arg_buf = static_cast<const T*>(input.data());
-        ++arg_buf;
         const auto& input_shape = input.get_shape();
         arg_shapes.emplace_back(input_shape);
         input_shapes.emplace_back(input_shape);
+        arg_bufs.emplace_back(static_cast<const char*>(input.data()));
     }
 
-    const auto& out_shape = shape_infer(node, input_shapes).front().to_shape();
+    const auto& out_shape = shape_infer(this, input_shapes).front().to_shape();
     outputs.front().set_shape(out_shape);
+    const auto elem_type = outputs.front().get_element_type();
     reference::concat(arg_bufs,
-                      static_cast<T*>(outputs.front().data()),
+                      static_cast<char*>(outputs.front().data()),
                       arg_shapes,
                       out_shape,
-                      ov::util::normalize(node->get_axis(), out_shape.size()),
-                      outputs.front().get_element_type().size());
-}
-
-bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
-    OV_OP_SCOPE(v0_Concat_evaluate);
-    OPENVINO_ASSERT(outputs.size() == 1);
-
-    if (outputs.front().get_element_type() == ov::element::string) {
-        evaluate_concat<std::string>(this, outputs, inputs);
-    } else {
-        evaluate_concat<char>(this, outputs, inputs);
-    }
+                      ov::util::normalize(this->get_axis(), out_shape.size()),
+                      elem_type.size(),
+                      elem_type);
 
     return true;
 }

From e6dc08651284e9798bd5bb3383136563e3c401ed Mon Sep 17 00:00:00 2001
From: Alicja Miloszewska <alicja.miloszewska@intel.com>
Date: Mon, 4 Mar 2024 11:06:19 +0100
Subject: [PATCH 14/14] [OV JS] Fix typescript method definition (#23185)

### Details:
 - Move `getAvailableDevices` to `Core` definition
---
 src/bindings/js/node/lib/addon.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts
index ae75f479e655e1..7f4f9fd917cd13 100644
--- a/src/bindings/js/node/lib/addon.ts
+++ b/src/bindings/js/node/lib/addon.ts
@@ -36,6 +36,7 @@ interface Core {
     modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Promise<Model>;
   readModelSync(modelPath: string, weightsPath?: string): Model;
   readModelSync(modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Model;
+  getAvailableDevices(): string[];
 }
 interface CoreConstructor {
   new(): Core;
@@ -81,7 +82,6 @@ interface InferRequest {
   inferAsync(inputData: { [inputName: string]: Tensor}
     | Tensor[] ): Promise<{ [outputName: string] : Tensor}>;
   getCompiledModel(): CompiledModel;
-  getAvailableDevices(): string[];
 }
 
 type Dimension = number | [number, number];