From 7b46de73866698ab1f15476d05e39348640f626b Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Sun, 3 Mar 2024 00:08:25 +0400 Subject: [PATCH 01/14] [TF FE] Switch on layer test for LookupTableFindV2 with string key (#23197) **Details:** Switch on layer test for LookupTableFindV2 with string key. Merge after https://github.com/openvinotoolkit/openvino_tokenizers/pull/50 **Ticket:** 132669 --------- Signed-off-by: Kazantsev, Roman --- .../test_tf_LookupTableFind.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py index 6ff6daeda99045..3c585ff65cec88 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -35,6 +37,8 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v self.keys_type = keys_type self.all_keys = all_keys self.invalid_key = invalid_key + if keys_type == str: + keys_type = tf.string tf.compat.v1.reset_default_graph() # Create the graph and model with tf.compat.v1.Session() as sess: @@ -67,11 +71,10 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v dict(keys_type=np.int32, values_type=tf.string, all_keys=[20, 10, 33, -22, 44, 11], all_values=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'], default_value='UNKNOWN', invalid_key=1000), - pytest.param(dict(keys_type=str, values_type=np.int64, - all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'], - all_values=[200, 100, 0, -3, 10, 1], - default_value=0, invalid_key='AbraCadabra'), - marks=pytest.mark.xfail(reason="132669 - Support LookupTableFindV2 with string key")), + dict(keys_type=str, values_type=np.int64, + all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'], + all_values=[200, 100, 0, -3, 10, 1], + default_value=0, invalid_key='AbraCadabra'), ] @pytest.mark.parametrize("hash_table_type", [0, 1]) @@ -81,6 +84,12 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v @pytest.mark.nightly def test_lookup_table_find(self, hash_table_type, keys_shape, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if params['keys_type'] == str and params['values_type'] == np.int64: + if platform.system() in ('Darwin') or platform.machine() in ['arm', 'armv7l', + 'aarch64', + 'arm64', + 'ARM64']: + pytest.xfail(reason='126314, 132699: Build tokenizers for ARM and MacOS') self._test(*self.create_lookup_table_find_net(hash_table_type=hash_table_type, keys_shape=keys_shape, **params), ie_device, precision, ir_version, temp_dir=temp_dir, From 54ac9947aca3697513daf8318f8da8fac9b8b60d Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Sun, 3 Mar 2024 18:57:29 +0100 Subject: [PATCH 02/14] Fix includes in dev api for conan integration (#23175) ### Details: - Fix includes in dev api for conan integration ### Tickets: - *ticket-id* --- src/inference/src/dev/make_tensor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index f28c90ccf4856a..e457b81fc0e850 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -9,6 +9,7 @@ #include "openvino/runtime/iremote_tensor.hpp" #include "openvino/runtime/properties.hpp" +#include "openvino/runtime/tensor.hpp" #ifdef PROXY_PLUGIN_ENABLED # include "openvino/proxy/plugin.hpp" #endif From 608a9981c52b4714453ad5ae5683ecf17442c6e9 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Mon, 4 Mar 2024 09:29:07 +0900 Subject: [PATCH 03/14] [GPU] update layout.compatible() to check false conditions first (#23101) ### Details: - Update layout.compatible() to check properly for the case where o(or b) axis is blocked - Add unit test case ### Tickets: - 132367 --- src/plugins/intel_gpu/src/runtime/layout.cpp | 3 ++- src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index 70e760bca41557..19e75275997ee6 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -585,7 +585,8 @@ ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format // Check a reorder is 1d along feature axis. Or feature size fits to inner block size of feature axis static inline bool check_redundant_1d_along_feature(layout const& l1, layout const& l2) { // No padding, double blocked format and different data_type - if (!l1.data_padding && !l2.data_padding && !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) && + if ((l1.get_linear_size() == l2.get_linear_size()) && !l1.data_padding && !l2.data_padding && + !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) && l2.data_type == l1.data_type && l2.count() == l1.count()) { auto l1_inner_blk = format::is_single_blocked(l1.format) ? l1.format.traits().block_sizes.at(0).second : 1; auto l2_inner_blk = format::is_single_blocked(l2.format) ? l2.format.traits().block_sizes.at(0).second : 1; diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp index 1eeb065d3d2b7b..6a2c8c5a80e991 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp @@ -223,6 +223,8 @@ INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test, layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)}, true, true}, {layout{ov::PartialShape{10, 20}, data_types::f16, format::bfyx}, layout{ov::PartialShape{10, 20}, data_types::f16, format::os_iyx_osv16}, false, false}, + {layout{ov::PartialShape{1, 16, 1, 1}, data_types::f16, format::bfyx}, + layout{ov::PartialShape{1, 16, 1, 1}, data_types::f16, format::os_iyx_osv16}, false, false}, {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::oiyx}, false, true}, {layout{ov::PartialShape{128, 10}, data_types::f16, format::bfyx}, From 9e2accb57282142716510f09d1a27767d72ff6c1 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Mon, 4 Mar 2024 09:20:45 +0400 Subject: [PATCH 04/14] Enable skipped tests (#23168) ### Tickets: - [CVS-55937](https://jira.devtools.intel.com/browse/CVS-55937) - [CVS-123427](https://jira.devtools.intel.com/browse/CVS-123427) --- .../functional/shared_tests_instances/skip_tests_config.cpp | 2 -- src/plugins/template/tests/functional/skip_tests_config.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 8d5e432d7b87de..1e8852e31e0562 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -184,8 +184,6 @@ std::vector disabledTestPatterns() { R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", // Issue: 123321 R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=10.*relu.*)", - // Issue: 123427 - R"(.*RDFTLayerTest.*SignalSize=().*)", // Issue: 123815 (Tests are sensintive to available thread count on testing machines) R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)", // Issue: 122356 diff --git a/src/plugins/template/tests/functional/skip_tests_config.cpp b/src/plugins/template/tests/functional/skip_tests_config.cpp index eac640ebfe40f3..297cba2660d11d 100644 --- a/src/plugins/template/tests/functional/skip_tests_config.cpp +++ b/src/plugins/template/tests/functional/skip_tests_config.cpp @@ -21,9 +21,6 @@ std::vector disabledTestPatterns() { // unsupported metrics R"(.*smoke_OVGetMetricPropsTest.*OVGetMetricPropsTest.*(RANGE_FOR_STREAMS|MAX_BATCH_SIZE).*)", - // CVS-55937 - R"(.*SplitLayerTest.*numSplits=30.*)", - // CVS-64094 R"(.*ReferenceLogSoftmaxLayerTest.*4.*iType=f16.*axis=.*1.*)", // CVS-64012 From 5d74236bcaa21246823dadcac87c7f8cc3f14a63 Mon Sep 17 00:00:00 2001 From: linzs148 <56420840+linzs148@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:58:18 +0800 Subject: [PATCH 05/14] [TF FE] Support complex type for Inv (#23085) **Ticket:** https://github.com/openvinotoolkit/openvino/issues/22952 --------- Co-authored-by: Roman Kazantsev --- .../tensorflow_common/src/op/inv.cpp | 40 +++++++++++++++- .../tensorflow_tests/test_tf_Inv.py | 46 ++++++++++++++++++- 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/frontends/tensorflow_common/src/op/inv.cpp b/src/frontends/tensorflow_common/src/op/inv.cpp index ec2196219f5033..5af62e2a1764e4 100644 --- a/src/frontends/tensorflow_common/src/op/inv.cpp +++ b/src/frontends/tensorflow_common/src/op/inv.cpp @@ -1,10 +1,17 @@ -// Copyright (C) 2018-2023 Intel Corporation +// Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/divide.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/negative.hpp" +#include "openvino/op/unsqueeze.hpp" using namespace std; using namespace ov::op; @@ -14,9 +21,38 @@ namespace frontend { namespace tensorflow { namespace op { OutputVector translate_inv_op(const NodeContext& node) { - default_op_checks(node, 1, {"Inv"}); + default_op_checks(node, 1, {"Inv"}, true); auto x = node.get_input(0); + auto complex_type_mark = as_type_ptr(x.get_node_shared_ptr()); + if (complex_type_mark) { + x = complex_type_mark->input_value(0); + element::Type complex_part_type = complex_type_mark->get_complex_part_type(); + + auto gather_index_real = make_shared(element::i32, Shape{}, 0); + auto gather_index_imag = make_shared(element::i32, Shape{}, 1); + + auto minus_one = make_shared(element::i32, Shape{1}, -1); + + auto x_real = make_shared(x, gather_index_real, minus_one)->output(0); + auto x_imag = make_shared(x, gather_index_imag, minus_one)->output(0); + + auto scale = + make_shared(make_shared(x_real, x_real), make_shared(x_imag, x_imag)); + + auto y_real = make_shared(x_real, scale); + auto y_imag = make_shared(make_shared(x_imag), scale); + + auto real_unsqueeze = make_shared(y_real, minus_one); + auto imag_unsqueeze = make_shared(y_imag, minus_one); + + auto concat_result = make_shared(OutputVector{real_unsqueeze, imag_unsqueeze}, -1); + set_node_name(node.get_name(), concat_result); + + auto complex_result = make_shared(concat_result->output(0), complex_part_type); + return {complex_result}; + } + // prepare auxiliary one constants of the same type as the inputs auto one = create_same_type_const_scalar(x, 1); diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py index d2350f9841fb6c..875ab763ab81ce 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py @@ -41,4 +41,48 @@ def test_inv_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): self._test(*self.create_inv_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, - use_legacy_frontend=use_legacy_frontend) \ No newline at end of file + use_legacy_frontend=use_legacy_frontend) + +class TestComplexInv(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + rng = np.random.default_rng() + assert 'param_real:0' in inputs_info + assert 'param_imag:0' in inputs_info + param_real_shape_1 = inputs_info['param_real:0'] + param_imag_shape_1 = inputs_info['param_imag:0'] + inputs_data = {} + inputs_data['param_real:0'] = 4 * rng.random(param_real_shape_1).astype(np.float32) - 2 + inputs_data['param_imag:0'] = 4 * rng.random(param_imag_shape_1).astype(np.float32) - 2 + return inputs_data + + def create_complex_inv_net(self, input_shape): + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real') + param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag') + complex = tf.raw_ops.Complex(real=param_real, imag=param_imag) + inv = tf.raw_ops.Inv(x=complex, name="complex_inv") + real = tf.raw_ops.Real(input=inv) + img = tf.raw_ops.Imag(input=inv) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[]), + dict(input_shape=[2]), + dict(input_shape=[1, 3]), + dict(input_shape=[2, 3, 4]), + dict(input_shape=[3, 4, 5, 6]), + ] + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_complex_inv(self, params, ie_device, precision, ir_version, temp_dir, + use_legacy_frontend): + self._test( + *self.create_complex_inv_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_legacy_frontend=use_legacy_frontend) From 5c7a4bc51f3a2897b72642fd534416e97241d3cb Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Mon, 4 Mar 2024 07:31:33 +0100 Subject: [PATCH 06/14] [CPU][ARM] Enable f16 eltwise fusing in ACL (#23117) oneDNN PR: https://github.com/openvinotoolkit/oneDNN/pull/234 --- .../intel_cpu/src/dnnl_extension_utils.cpp | 3 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 28 ++-- src/plugins/intel_cpu/src/nodes/conv.cpp | 4 + .../classes/convolution.cpp | 13 ++ .../classes/convolution.hpp | 1 + .../instances/common/convolution.cpp | 149 +----------------- .../instances/x64/convolution.cpp | 133 ++++++++++++++++ src/plugins/intel_cpu/thirdparty/onednn | 2 +- 8 files changed, 167 insertions(+), 166 deletions(-) diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index f09b9c5ab2f101..6eced71f2b83fd 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -274,7 +274,8 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) { Algorithm::EltwiseAbs, Algorithm::EltwiseSqrt, Algorithm::EltwiseSoftRelu, - Algorithm::EltwiseSigmoid); + Algorithm::EltwiseSigmoid, + Algorithm::EltwiseClamp); #elif defined(OPENVINO_ARCH_X86_64) return one_of(alg, Algorithm::EltwiseRelu, Algorithm::EltwiseGeluErf, diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index d85c7fcead4001..5d52cfdfb0155f 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -1496,19 +1496,19 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &grap parent++; continue; } -//Disable ACL post-ops in fp16 to avoid performance degradation -#if defined(OPENVINO_ARCH_ARM64) - if (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::f16) { + + auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild(); + if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) { parent++; continue; } -#endif - auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild(); - if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) { +#if defined(OV_CPU_WITH_ACL) + if (!parentNode->getFusedWith().empty()) { parent++; continue; } +#endif if (!DnnlExtensionUtils::isUnarySupportedAsPostOp(fuseCandidate->getAlgorithm())) { parent++; @@ -1552,13 +1552,6 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) { parent++; continue; } -//Disable ACL post-ops in fp16 to avoid performance degradation -#if defined(OPENVINO_ARCH_ARM64) - if (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::f16) { - parent++; - continue; - } -#endif childNode->fuseInto(parentNode); @@ -1686,6 +1679,10 @@ static bool is_data_dependency(const std::shared_ptr &parent, */ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph) { +#if !defined(OPENVINO_ARCH_X86) && !defined(OPENVINO_ARCH_X86_64) + return; +#endif + auto &graphNodes = graph.GetNodes(); auto isFusingSupported = [&](NodePtr conv, NodePtr child) { @@ -1817,11 +1814,6 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph) if (mergedConv->isConstant() && !sum->isConstant()) continue; -//Disable ACL post-ops in fp16 to avoid performance degradation -#if defined(OPENVINO_ARCH_ARM64) - if (mergedConv->getOriginalInputPrecisionAtPort(0) == ov::element::f16) - continue; -#endif // Disable fusing for Add with broadcasing in case of known data ranges. Add with brodcasting triggers // non-optimal code path inside Convolution node, so better to avoid fusing at all. const auto& shape1 = sum->getInputShapeAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 59721c5df76c4d..b89d3a1e21d61a 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1110,6 +1110,10 @@ std::shared_ptr Convolution::getSrcMemDesc(const dnnl::primitive_des } bool Convolution::canFuse(const NodePtr& node) const { +#if defined(OV_CPU_WITH_ACL) + if (!fusedWith.empty()) + return false; +#endif return canFuseSimpleOperation(node); } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp index 71d8f0b3e2fa14..856fa9cd151f26 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp @@ -352,6 +352,19 @@ const std::vector& CPUParams_2D() { return CPUParams_2D; } +const std::vector& CPUParams_3D() { + static const std::vector CPUParams_3D = { + //conv_sse42_3D, // not supported jit_sse42 for 3d + conv_avx2_3D, + conv_avx512_3D, + conv_avx2_3D_nspc, + conv_avx2_3D_nspc_brgconv, + conv_avx512_3D_nspc, + conv_avx512_3D_nspc_brgconv + }; + return CPUParams_3D; +} + const std::vector& CPUParams_GEMM_1D() { static const std::vector CPUParams_GEMM_1D = { conv_gemm_1D, diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp index a6e5faee3e909d..db8b6ca8f943b1 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp @@ -72,6 +72,7 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface& CPUParams_1x1_1D(); const std::vector& CPUParams_1x1_2D(); const std::vector& CPUParams_2D(); + const std::vector& CPUParams_3D(); const std::vector& CPUParams_GEMM_1D(); const std::vector& CPUParams_GEMM_2D(); const std::vector& CPUParams_GEMM_3D(); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp index b6518f8e8f48f1..09f8dc14660392 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp @@ -62,20 +62,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_FP32, ConvolutionLayerCPUTest, ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_GEMM_2D(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inShapesGemm2D()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_empty_fusing, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( @@ -90,20 +76,6 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_empty_fusing, ConvolutionLaye ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_I8_dilated, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_GEMM_2D_dilated(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inShapesGemm2D()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - /* ============= Convolution (2D) ============= */ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_empty_fusing, ConvolutionLayerCPUTest, ::testing::Combine( @@ -119,41 +91,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_empty_fusing, ConvolutionLayerCPUTes ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -const std::vector fusingParamsSet_dynBatch{ - emptyFusingSpec, - fusingSum, - fusingAddPerChannel, - fusingReluScaleShift -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_dynBatch, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_2D(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes2d_dynBatch()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())), - ::testing::ValuesIn(fusingParamsSet_dynBatch), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_2D(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes2d()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_empty_fusing, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( @@ -168,20 +105,6 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_empty_fusing, ConvolutionLayerCPUT ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(Conv_2D_I8_dilated, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_2D_dilated(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes2d()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - const std::vector CPUParams_2D_plain_to_blocked = { conv_sse42_plain_to_blocked_2D, conv_avx2_plain_to_blocked_2D, @@ -262,16 +185,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_reorder_Conv_2D, ConvolutionLayerCPUTest, ConvolutionLayerCPUTest::getTestCaseName); /* ============= Convolution (3D) ============= */ -const std::vector CPUParams_3D = { - //conv_sse42_3D, // not supported jit_sse42 for 3d - conv_avx2_3D, - conv_avx512_3D, - conv_avx2_3D_nspc, - conv_avx2_3D_nspc_brgconv, - conv_avx512_3D_nspc, - conv_avx512_3D_nspc_brgconv -}; - INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( @@ -281,7 +194,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest, ::testing::Values(ElementType::undefined), ::testing::ValuesIn(inputShapes3d()), ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())), ::testing::Values(emptyFusingSpec), ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); @@ -295,25 +208,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32_fusingScaleShiftAndFakeQuantizePerCh ::testing::Values(ElementType::undefined), ::testing::ValuesIn(inputShapes3d()), ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())), ::testing::Values(fusingScaleShiftAndFakeQuantizePerChannel), ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_3D(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes3d()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(Conv_3D_FP32_dilated, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( @@ -323,25 +222,11 @@ INSTANTIATE_TEST_SUITE_P(Conv_3D_FP32_dilated, ConvolutionLayerCPUTest, ::testing::Values(ElementType::undefined), ::testing::ValuesIn(inputShapes3d()), ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())), ::testing::Values(emptyFusingSpec), ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(Conv_3D_I8_dilated, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_3D_dilated(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes3d()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - const std::vector CPUParams_3D_plain_to_blocked = { conv_avx2_plain_to_blocked_3D, conv_avx512_plain_to_blocked_3D, @@ -419,20 +304,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32_empty_fusing, ConvolutionLayerCP ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_1x1_1D(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes1d()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_1D())), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - /* ============= Kernel_1x1 (2D) ============= */ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_empty_fusing, ConvolutionLayerCPUTest, @@ -449,20 +320,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_empty_fusing, ConvolutionLayerCP ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest, - ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_1x1_2D(), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::i8), - ::testing::Values(ElementType::undefined), - ::testing::ValuesIn(inputShapes2d()), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_2D())), - ::testing::Values(fusingSum), - ::testing::Values(empty_plugin_config)), - ConvolutionLayerCPUTest::getTestCaseName); - /* ============= Convolution auto padding tests ============= */ const auto convParams_AutoPadding_2D = ::testing::Combine( diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp index 8073b67b726eb8..2d1638856386f3 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp @@ -113,6 +113,34 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_I8, ConvolutionLayerCPUTest, ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_GEMM_2D(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inShapesGemm2D()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_I8_dilated, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_GEMM_2D_dilated(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inShapesGemm2D()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_BF16, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( @@ -169,6 +197,27 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_fusing, ConvolutionLayerCPUTe ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); +const std::vector fusingParamsSet_dynBatch{ + emptyFusingSpec, + fusingSum, + fusingAddPerChannel, + fusingReluScaleShift +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_dynBatch, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_2D(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2d_dynBatch()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())), + ::testing::ValuesIn(fusingParamsSet_dynBatch), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_fusing, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( @@ -197,6 +246,34 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_fusing, ConvolutionLayerCPUTest, ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_2D(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2d()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(Conv_2D_I8_dilated, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_2D_dilated(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2d()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + /* ============= Kernel_1x1 (1D) ============= */ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32_fusing, ConvolutionLayerCPUTest, ::testing::Combine( @@ -226,6 +303,34 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_fusing, ConvolutionLayerCPUTest, ::testing::Values(empty_plugin_config)), ConvolutionLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_1x1_1D(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes1d()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_1D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_1x1_2D(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2d()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_2D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + /* ============= Convolution (1D) ============= */ const auto convParams_ExplicitPadding_1D = ::testing::Combine( ::testing::ValuesIn(kernels1d()), @@ -503,6 +608,34 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTes ConvolutionLayerCPUTest::getTestCaseName); /* ============= Convolution (GEMM 3D) ============= */ +INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_3D(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes3d()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(Conv_3D_I8_dilated, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_3D_dilated(), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::i8), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes3d()), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())), + ::testing::Values(fusingSum), + ::testing::Values(empty_plugin_config)), + ConvolutionLayerCPUTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_GEMM_FP32, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index b2cdc2cfdec616..0f94c0e7b94f64 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit b2cdc2cfdec61638f941ccdfb0b9dbcc27a7c333 +Subproject commit 0f94c0e7b94f64df2b94929279bbeb4f576a6a36 From 74570b748e803c04319c258a26534236c8cb02a5 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 4 Mar 2024 07:47:35 +0100 Subject: [PATCH 07/14] [Coverity] In IStreamsExecutor::Config use move instead copy (#23106) ### Details: - Fix Coverity reported issues `COPY_INSTEAD_OF_MOVE` in `ov::threading::IStreamsExecutor::Config` constructor. ### Tickets: - CID 1518201 - CID 1518243 --- .../dev_api/openvino/runtime/threading/istreams_executor.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp index 7f5693f24ce4fb..b3b8be765e630a 100644 --- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp +++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp @@ -125,7 +125,7 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor { PreferredCoreType threadPreferredCoreType = PreferredCoreType::ANY, std::vector> streamsInfoTable = {}, bool cpuReservation = false) - : _name{name}, + : _name{std::move(name)}, _streams{streams}, _threads_per_stream{threadsPerStream}, _threadBindingType{threadBindingType}, @@ -133,7 +133,7 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor { _threadBindingOffset{threadBindingOffset}, _threads{threads}, _thread_preferred_core_type(threadPreferredCoreType), - _streams_info_table{streamsInfoTable}, + _streams_info_table{std::move(streamsInfoTable)}, _cpu_reservation{cpuReservation} { update_executor_config(); } From a605edbd776ca4c14f4efb120403a74911ba9c27 Mon Sep 17 00:00:00 2001 From: Sun Xiaoxia Date: Mon, 4 Mar 2024 15:44:36 +0800 Subject: [PATCH 08/14] Fix performance regression of conformance tests (#22938) ### Details: fix performance regression of conformance tests Root cause is `make_default_multi_threaded()` API which was refactored in PR22414. This API is used to calculate the number of threads. **Master version**: When the test machine has both Pcore and Ecore, it recognized the machine type and will select proper cores (Pcores are selected in general machine which pcores >Ecores / 2) to create executor. **Old version**: the input parameter `_threadBindingType` of config was used to judge the type of current machine, and `_threadBindingType` is always set to default value `None` in template plugin. So hybrid core machine is regarded to core machine and physical cores (include Pcores and Ecores) are used. `_threadBindingType` is deprecated in master now. For example: ADL i9-12900K, 8 Pcore, 8Ecore. Old version: threads=16 in any cores. Master: threads=8 in Pcores. Actually, the threads calculated from `make_default_multi_threaded` of master version is the best option in normal situation which not create multi threads in app side. But in conformance test, 24 threads are created to seize CPU resource. Of cause, all cores are must be used to achieved the best performance. ### Tickets: - *CVS-131820* --------- Co-authored-by: Wanglei Shen --- .github/workflows/linux.yml | 2 +- .../src/dev/threading/istreams_executor.cpp | 24 +++-------------- .../unit/make_default_multi_threaded_test.cpp | 26 ++++++++++++++----- 3 files changed, 23 insertions(+), 29 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 46493c55bc3ab3..0e2ee84264d6a6 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -318,7 +318,7 @@ jobs: Conformance: needs: [ Build, Smart_CI ] - timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 30 }} + timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 20 }} defaults: run: shell: bash diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp index b7151e15b74e5e..fd75cf7d8a8a5d 100644 --- a/src/inference/src/dev/threading/istreams_executor.cpp +++ b/src/inference/src/dev/threading/istreams_executor.cpp @@ -32,7 +32,7 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) { if (key == ov::num_streams) { auto streams = value.as(); if (streams == ov::streams::NUMA) { - _streams = 1; + _streams = get_num_numa_nodes(); } else if (streams == ov::streams::AUTO) { // bare minimum of streams (that evenly divides available number of cores) _streams = get_default_num_streams(); @@ -114,29 +114,11 @@ IStreamsExecutor::Config IStreamsExecutor::Config::make_default_multi_threaded( return streamConfig; } - const auto numa_nodes = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : proc_type_table.size(); - const bool latency_case = static_cast(streamConfig._streams) <= numa_nodes; + int num_cores = proc_type_table[0][ALL_PROC]; - // by default, do not use the hyper-threading (to minimize threads synch overheads) - int num_cores = !latency_case && numa_nodes == 1 - ? proc_type_table[0][ALL_PROC] - : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC]; - - // additional latency-case logic for hybrid processors: if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && proc_type_table[0][MAIN_CORE_PROC] > 0) { if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::ANY) { - // by default the latency case uses (faster) Big cores only, depending on the compute ratio - const bool big_only = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] / 2); - // selecting the preferred core type - if (big_only) { - streamConfig._thread_preferred_core_type = IStreamsExecutor::Config::PreferredCoreType::BIG; - const int hyper_threading_threshold = - 2; // min #cores, for which the hyper-threading becomes useful for the latency case - // additionally selecting the #cores to use in the "Big-only" case - num_cores = (proc_type_table[0][MAIN_CORE_PROC] <= hyper_threading_threshold) - ? proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC] - : proc_type_table[0][MAIN_CORE_PROC]; - } + num_cores = proc_type_table[0][ALL_PROC]; } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::BIG) { num_cores = proc_type_table[0][MAIN_CORE_PROC]; } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::LITTLE) { diff --git a/src/inference/tests/unit/make_default_multi_threaded_test.cpp b/src/inference/tests/unit/make_default_multi_threaded_test.cpp index e1917394507045..40dafe9911b69b 100644 --- a/src/inference/tests/unit/make_default_multi_threaded_test.cpp +++ b/src/inference/tests/unit/make_default_multi_threaded_test.cpp @@ -51,7 +51,9 @@ MakeDefaultMultiThreadsTestCase _1sockets_streams_1 = { 1, // param[in]: the number of streams // param[out]: streams info table { - {1, 1, 6, 0, 0}, + {1, 0, 12, 0, 0}, + {0, 1, 6, 0, 0}, + {0, 3, 6, 0, 0}, }, }; @@ -74,9 +76,11 @@ MakeDefaultMultiThreadsTestCase _2sockets_streams_1 = { }, 1, { - {1, 0, 36, -1, -1}, + {1, 0, 72, -1, -1}, {0, 1, 18, 0, 0}, {0, 1, 18, 1, 1}, + {0, 3, 18, 0, 0}, + {0, 3, 18, 1, 1}, }, }; @@ -88,8 +92,10 @@ MakeDefaultMultiThreadsTestCase _2sockets_streams_4 = { }, 4, { - {2, 1, 9, 0, 0}, - {2, 1, 9, 1, 1}, + {1, 1, 18, 0, 0}, + {1, 1, 18, 1, 1}, + {1, 3, 18, 0, 0}, + {1, 3, 18, 1, 1}, }, }; @@ -99,7 +105,10 @@ MakeDefaultMultiThreadsTestCase _pecore24_streams_1 = { }, 1, { - {1, 1, 8, 0, 0}, + {1, 0, 24, 0, 0}, + {0, 1, 8, 0, 0}, + {0, 2, 8, 0, 0}, + {0, 3, 8, 0, 0}, }, }; @@ -109,7 +118,9 @@ MakeDefaultMultiThreadsTestCase _pecore24_streams_3 = { }, 3, { - {3, 1, 2, 0, 0}, + {1, 1, 8, 0, 0}, + {1, 2, 8, 0, 0}, + {1, 3, 8, 0, 0}, }, }; @@ -119,9 +130,10 @@ MakeDefaultMultiThreadsTestCase _pecore32_streams_1 = { }, 1, { - {1, 0, 24, 0, 0}, + {1, 0, 32, 0, 0}, {0, 1, 8, 0, 0}, {0, 2, 16, 0, 0}, + {0, 3, 8, 0, 0}, }, }; From b1a19fc62c3a485c32dd0e52a2983ca022e59da9 Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Mon, 4 Mar 2024 15:44:58 +0800 Subject: [PATCH 09/14] add get_streams_info_table() test cases for 4 cores and 8 cores CPUs (#23156) ### Details: - *add get_streams_info_table() test cases for 4 cores and 8 cores CPUs* - *...* ### Tickets: - *ticket-id* --- .../streams_info/streams_info_table_test.cpp | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp index 1fce6e02e96f8f..204501b6046ec0 100644 --- a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp @@ -1821,6 +1821,32 @@ StreamsCalculationTestCase _1sockets_6cores_tput_4 = { {{6, MAIN_CORE_PROC, 1, 0, 0}, {6, HYPER_THREADING_PROC, 1, 0, 0}}, }; +StreamsCalculationTestCase _1sockets_4cores_latency_1 = { + 1, + false, + 0, + 0, + 0, + 0, + "LATENCY", + ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, + {{4, 4, 0, 0, 0, 0}}, + {{1, MAIN_CORE_PROC, 4, 0, 0}}, +}; + +StreamsCalculationTestCase _1sockets_4cores_tput_1 = { + 1, + false, + 0, + 0, + 0, + 0, + "THROUGHPUT", + ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, + {{4, 4, 0, 0, 0, 0}}, + {{2, MAIN_CORE_PROC, 2, 0, 0}}, +}; + StreamsCalculationTestCase _1sockets_ecores_latency_1 = { 1, false, @@ -1976,6 +2002,20 @@ StreamsCalculationTestCase _1sockets_mock_tput_3 = { {{19, 19, 0, 0, -1, -1}, {11, 11, 0, 0, 0, 0}, {8, 8, 0, 0, 1, 1}}, {{5, MAIN_CORE_PROC, 2, 0, 0}, {4, MAIN_CORE_PROC, 2, 1, 1}}, }; + +StreamsCalculationTestCase _1sockets_mock_tput_4 = { + 1, + false, + 0, + 0, + 0, + 0, + "THROUGHPUT", + ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM, + {{8, 8, 0, 0, 0, 0}}, + {{4, MAIN_CORE_PROC, 2, 0, 0}}, +}; + StreamsCalculationTestCase _2sockets_mock_latency_1 = { 1, false, @@ -2286,6 +2326,30 @@ StreamsCalculationTestCase _2sockets_mock_latency_21 = { {0, HYPER_THREADING_PROC, 20, 6, 6}, {0, MAIN_CORE_PROC, 10, 0, 0}}, }; +StreamsCalculationTestCase _2sockets_mock_latency_22 = { + 1, + false, + 200, + 0, + 0, + 3, + "LATENCY", + ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET, + {{200, 100, 0, 100, -1, -1}, + {80, 40, 0, 40, 0, 0}, + {60, 30, 0, 30, 1, 1}, + {40, 20, 0, 20, 2, 2}, + {20, 10, 0, 10, 3, 3}}, + {{1, ALL_PROC, 200, -1, -1}, + {0, MAIN_CORE_PROC, 10, 3, 3}, + {0, HYPER_THREADING_PROC, 10, 3, 3}, + {0, MAIN_CORE_PROC, 40, 0, 0}, + {0, MAIN_CORE_PROC, 30, 1, 1}, + {0, MAIN_CORE_PROC, 20, 2, 2}, + {0, HYPER_THREADING_PROC, 40, 0, 0}, + {0, HYPER_THREADING_PROC, 30, 1, 1}, + {0, HYPER_THREADING_PROC, 20, 2, 2}}, +}; TEST_P(StreamsCalculationTests, StreamsCalculation) {} @@ -2419,6 +2483,8 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _1sockets_6cores_tput_2, _1sockets_6cores_tput_3, _1sockets_6cores_tput_4, + _1sockets_4cores_latency_1, + _1sockets_4cores_tput_1, _1sockets_ecores_latency_1, _1sockets_ecores_latency_2, _1sockets_ecores_latency_3, @@ -2431,6 +2497,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _1sockets_mock_tput_1, _1sockets_mock_tput_2, _1sockets_mock_tput_3, + _1sockets_mock_tput_4, _2sockets_mock_latency_1, _2sockets_mock_latency_2, _2sockets_mock_latency_3, @@ -2451,6 +2518,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _2sockets_mock_latency_18, _2sockets_mock_latency_19, _2sockets_mock_latency_20, - _2sockets_mock_latency_21)); + _2sockets_mock_latency_21, + _2sockets_mock_latency_22)); } // namespace \ No newline at end of file From df32562523349c325d35afcb056574c8e368dd3e Mon Sep 17 00:00:00 2001 From: Piotr Kowalczyk Date: Mon, 4 Mar 2024 08:58:11 +0100 Subject: [PATCH 10/14] [GPU]: Slice op supports dynamic shapes (#22935) ### Details: - Slice op on GPU supports dynamic shapes ### Tickets: - *CVS-129985* - *CVS-100235* --- .../intel_gpu/src/graph/impls/ocl/slice.cpp | 205 +++++++++++------ .../intel_gpu/src/graph/include/slice_inst.h | 60 +++++ src/plugins/intel_gpu/src/graph/slice.cpp | 74 +++++- .../kernel_selector/cl_kernels/slice_ref.cl | 103 +++++++-- .../kernels/slice/slice_kernel_ref.cpp | 102 +++++++-- .../kernels/slice/slice_kernel_ref.h | 10 +- .../single_layer_tests/slice.cpp | 15 ++ .../tests/unit/test_cases/slice_gpu_test.cpp | 216 +++++++++++++++--- 8 files changed, 623 insertions(+), 162 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp index 7573acd46d3153..cd778650b12a3c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp @@ -2,33 +2,32 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "primitive_base.hpp" +#include +#include -#include "slice_inst.h" #include "data_inst.h" -#include "slice/slice_kernel_selector.h" +#include "primitive_base.hpp" #include "slice/slice_kernel_ref.h" - -#include -#include +#include "slice/slice_kernel_selector.h" +#include "slice_inst.h" namespace cldnn { namespace ocl { namespace { -template::value>::type> -std::vector extractIntegerData(const data_node& node, const stream& stream) { +template ::value>::type> +std::vector extractIntegerData(const data_node& node, const stream& stream) { mem_lock lock{node.get_attached_memory_ptr(), stream}; T* data = lock.data(); - std::vector integer_data; + std::vector integer_data; integer_data.reserve(node.get_output_layout().count()); for (size_t i = 0; i < node.get_output_layout().count(); i++) { - integer_data.emplace_back(static_cast(data[i])); + integer_data.emplace_back(static_cast(data[i])); } return integer_data; } -std::vector extractIntegerData(const data_node& node, const stream& stream) { +std::vector extractIntegerData(const data_node& node, const stream& stream) { auto dt = node.get_output_layout().data_type; switch (dt) { case data_types::u8: @@ -40,22 +39,16 @@ std::vector extractIntegerData(const data_node& node, const stream case data_types::i64: return extractIntegerData(node, stream); default: - OPENVINO_ASSERT(false, "[GPU] Slice parameters should be of integral type for node ", node.id(), " while got ", dt); + OPENVINO_ASSERT(false, + "[GPU] Slice parameters should be of integral type for node ", + node.id(), + " while got ", + dt); } return {}; } -std::vector extractShape(kernel_selector::Tensor::DataTensor& tensor) { - auto logical_dims = tensor.LogicalDims(); - // LogicalDims method returns dims in reversed order - std::vector reverse_logical_dims; - for (auto it = logical_dims.rbegin(); it != logical_dims.rend(); ++it) { - reverse_logical_dims.push_back(static_cast(*it)); - } - return reverse_logical_dims; -} - -} // namespace +} // namespace struct slice_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; @@ -63,81 +56,141 @@ struct slice_impl : typed_primitive_impl_ocl { using kernel_selector_t = kernel_selector::slice_kernel_selector; using kernel_params_t = kernel_selector::slice_params; - enum InputIndices { - kData, - kStart, - kEnd, - kStep, - kAxes, - kInputsNum - }; - DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::slice_impl) std::unique_ptr clone() const override { return make_unique(*this); } + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + if (is_dynamic()) { + auto& kernel_selector = kernel_selector_t::Instance(); + auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName); + kernel_impl->GetUpdateDispatchDataFunc(_kernel_data); + } + } + + kernel_arguments_data get_arguments(const slice_inst& instance) const override { + kernel_arguments_data args; + + const SliceKernelRefNeededInputs inputs = SliceKernelRefNeededInputs::Create(*instance.node); + + for (auto idx : inputs.GetNeededInputIndexes()) { + args.inputs.push_back(instance.input_memory_ptr(idx)); + } + + for (size_t i = 0; i < instance.outputs_memory_count(); i++) { + args.outputs.push_back(instance.output_memory_ptr(i)); + } + + args.shape_info = instance.shape_info_memory_ptr(); + return args; + } + static std::unique_ptr create(const slice_node& arg, const kernel_impl_params& impl_param) { - auto params = get_default_params(impl_param); - const auto& inputs = arg.get_dependencies(); - const stream& stream = arg.get_program().get_stream(); - auto start_elts = extractIntegerData(inputs[InputIndices::kStart].first->as(), stream); - auto end_elts = extractIntegerData(inputs[InputIndices::kEnd].first->as(), stream); - auto step_elts = extractIntegerData(inputs[InputIndices::kStep].first->as(), stream); - auto data_shape = extractShape(params.inputs[0]); - std::vector axes(data_shape.size()); - if (inputs.size() == InputIndices::kInputsNum) - axes = extractIntegerData(inputs[InputIndices::kAxes].first->as(), stream); - else - std::iota(axes.begin(), axes.end(), 0); - std::vector selected_start(data_shape.size(), 0); - std::vector selected_step(data_shape.size(), 1); - std::vector selected_end(data_shape); - for (size_t axis = 0; axis < axes.size(); axis++) { - auto transformed_axe = axes[axis] < 0 ? data_shape.size() + axes[axis] : axes[axis]; - auto start = start_elts[axis]; - auto end = end_elts[axis]; - auto dim_size = data_shape[transformed_axe]; - selected_start[transformed_axe] = std::max(std::min(start < 0 ? dim_size + start : start, dim_size - 1), 0); - selected_end[transformed_axe] = std::max(std::min(end < 0 ? dim_size + end : end, dim_size - 1), 0); - selected_step[transformed_axe] = step_elts[axis]; + auto params = get_default_params(impl_param, impl_param.is_dynamic()); + const auto input_rank = params.inputs[0].Dimentions(); + + if (!PrepareInput(arg, + SliceKernelRefNeededInputs::kStart, + params.compile_time_start, + params.start_data_type, + params.inputs)) { + // No kStart input - set it to default: + params.axes_data_type = kernel_selector::Datatype::INT64; + params.compile_time_start = std::vector(input_rank, 0); + } + + // NOTE: Stop input is not used by the slice kernel, as this information + // is implicitely passed with output shape. + + if (!PrepareInput(arg, + SliceKernelRefNeededInputs::kStep, + params.compile_time_step, + params.step_data_type, + params.inputs)) { + // No kStep input - set it to default: + params.axes_data_type = kernel_selector::Datatype::INT64; + params.compile_time_step = std::vector(input_rank, 1); + } + + if (!PrepareInput(arg, + SliceKernelRefNeededInputs::kAxes, + params.compile_time_axes, + params.axes_data_type, + params.inputs)) { + // No kAxes input - set it to default: + params.axes_data_type = kernel_selector::Datatype::INT64; + params.compile_time_axes.resize(input_rank); + std::iota(params.compile_time_axes.begin(), params.compile_time_axes.end(), 0); } - params.start = std::move(selected_start); - params.end = std::move(selected_end); - params.step = std::move(selected_step); + + // Transform compile time axes: + for (size_t axis = 0; axis < params.compile_time_axes.size(); ++axis) { + const int64_t transformed_axe = params.compile_time_axes[axis] < 0 + ? input_rank + params.compile_time_axes[axis] + : params.compile_time_axes[axis]; + params.compile_time_axes[axis] = transformed_axe; + } + params.set_dynamic_shape_offsets(); - auto &kernel_selector = - kernel_selector::slice_kernel_selector::Instance(); + auto& kernel_selector = kernel_selector::slice_kernel_selector::Instance(); auto best_kernel = kernel_selector.get_best_kernel(params); return make_unique(best_kernel); } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_default_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params, _kernel_data); + } + +private: + // Returns true if input was prepared(was avaiable in node def), false otherwise. + static bool PrepareInput(const slice_node& arg, + SliceKernelRefNeededInputs::InputIndices idx, + std::vector& out_compile_time_buff, + kernel_selector::Datatype& out_buff_data_type, + kernel_selector::MultiDataTensor& out_runtime_inputs) { + const stream& stream = arg.get_program().get_stream(); + const auto& inputs = arg.get_dependencies(); + + if (inputs.size() <= idx) + return false; + + const SliceKernelRefNeededInputs kernel_needed_inputs = SliceKernelRefNeededInputs::Create(arg); + if (kernel_needed_inputs.IsInputNeededInRuntime(idx)) { + const auto layout = inputs[idx].first->get_output_layout(0); + out_buff_data_type = to_data_type(layout.data_type); + out_compile_time_buff.clear(); + out_runtime_inputs.push_back(convert_data_tensor(layout)); + } else { + out_buff_data_type = kernel_selector::Datatype::INT64; + out_compile_time_buff = extractIntegerData(inputs[idx].first->as(), stream); + } + + return true; + } }; namespace detail { attach_slice_impl::attach_slice_impl() { - implementation_map::add(impl_types::ocl, slice_impl::create, { - std::make_tuple(data_types::f16, format::bfyx), - std::make_tuple(data_types::f32, format::bfyx), - std::make_tuple(data_types::u8, format::bfyx), - std::make_tuple(data_types::i8, format::bfyx), - std::make_tuple(data_types::i32, format::bfyx), - std::make_tuple(data_types::i64, format::bfyx), - std::make_tuple(data_types::f16, format::bfzyx), - std::make_tuple(data_types::f32, format::bfzyx), - std::make_tuple(data_types::u8, format::bfyx), - std::make_tuple(data_types::i8, format::bfyx), - std::make_tuple(data_types::i32, format::bfzyx), - std::make_tuple(data_types::i64, format::bfzyx), - }); + auto types = {data_types::f32, data_types::f16, data_types::i8, data_types::u8, data_types::i32, data_types::i64}; + + auto formats = { + format::bfyx, + format::bfzyx, + }; + + implementation_map::add(impl_types::ocl, shape_types::any, slice_impl::create, types, formats); } } // namespace detail -} // namespace ocl -} // namespace cldnn +} // namespace ocl +} // namespace cldnn BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::slice_impl) BIND_BINARY_BUFFER_WITH_TYPE(cldnn::slice) diff --git a/src/plugins/intel_gpu/src/graph/include/slice_inst.h b/src/plugins/intel_gpu/src/graph/include/slice_inst.h index 189a2a6096fea6..09425f20f5dd47 100644 --- a/src/plugins/intel_gpu/src/graph/include/slice_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/slice_inst.h @@ -10,6 +10,45 @@ namespace cldnn { using slice_node = typed_program_node; +// This class is needed to have one place where decision +// is made which Slice inputs are used by the kernel on GPU. +// Unfortnately, the same decison needs to be made +// in multiple places, including: +// - slice_inst::update_shape_info_tensor +// - slice_impl::get_arguments +// - slice_impl::create +// This class was created to encapsulate that logic in single place. +// NOTE: the placement of this class is the 'lesser evil'. Normally such logic +// should be a part of codegen/jitter, which should output some struct with information +// about which data is needed by the kernel, how it should be provided, bindings, etc. +// Currently it is scattered in mutiple places, where basically similar logic has to be applied. +// NOTE: This class implicietly depends on logic inside SliceKernelRef and the kernel +// itself. If you make any changes of how params are provided to kernel, +// likely you will needed to update this one too. +class SliceKernelRefNeededInputs { +public: + enum InputIndices { + kData, + kStart, + kEnd, + kStep, + kAxes, + kInputsNum + }; + + // Creates instance of SliceKernelRefNeededInputs. + static SliceKernelRefNeededInputs Create(const slice_node& node); + + // Retruns needed indexes in runtime. + const std::vector& GetNeededInputIndexes() const; + + // Returns true if given input is needed in runtime. + bool IsInputNeededInRuntime(InputIndices type) const; + +private: + std::vector neededIndexes; +}; + template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; @@ -22,8 +61,29 @@ class typed_primitive_inst : public typed_primitive_inst_base { static std::string to_string(slice_node const& node); typed_primitive_inst(network& network, slice_node const& desc); + void update_shape_info_tensor(const kernel_impl_params& params) override; }; using slice_inst = typed_primitive_inst; +/////////////////////////////////////////////////////////////////// +// +// INLINES: +// +/////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////// +inline const std::vector& SliceKernelRefNeededInputs::GetNeededInputIndexes() const { + return neededIndexes; +} + +/////////////////////////////////////////////////////////////////// +inline bool SliceKernelRefNeededInputs::IsInputNeededInRuntime(InputIndices type) const { + for (auto idx : neededIndexes) { + if (idx == type) + return true; + } + return false; +} + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/slice.cpp b/src/plugins/intel_gpu/src/graph/slice.cpp index 4ac47b63c2e66f..84e57ea10ca0d0 100644 --- a/src/plugins/intel_gpu/src/graph/slice.cpp +++ b/src/plugins/intel_gpu/src/graph/slice.cpp @@ -10,6 +10,30 @@ #include namespace cldnn { + +SliceKernelRefNeededInputs SliceKernelRefNeededInputs::Create(const slice_node& node) { + SliceKernelRefNeededInputs inputs; + + const auto& node_inputs = node.get_dependencies(); + + const bool axes_in_runtime = + ((node_inputs.size() == InputIndices::kInputsNum) && !node_inputs[InputIndices::kAxes].first->is_constant()); + const bool start_in_runtime = !node_inputs[InputIndices::kStart].first->is_constant(); + const bool step_in_runtime = !node_inputs[InputIndices::kStep].first->is_constant(); + + inputs.neededIndexes.push_back(InputIndices::kData); + if (start_in_runtime) + inputs.neededIndexes.push_back(InputIndices::kStart); + if (step_in_runtime) + inputs.neededIndexes.push_back(InputIndices::kStep); + if (axes_in_runtime) + inputs.neededIndexes.push_back(InputIndices::kAxes); + + // NOTE: stop is never needed as it is passed implicitely via output shape. + + return inputs; +} + GPU_DEFINE_PRIMITIVE_TYPE_ID(slice) slice_inst::typed_primitive_inst(network& network, slice_node const& node) @@ -19,18 +43,33 @@ layout slice_inst::calc_output_layout(slice_node const& node, kernel_impl_params return calc_output_layouts(node, impl_param)[0]; } -template +template inline std::vector slice_inst::calc_output_layouts(const slice_node&, const kernel_impl_params& impl_param) { std::vector input_shapes{impl_param.input_layouts[0].get()}; std::unordered_map const_data; for (std::size_t i = 1; i < impl_param.input_layouts.size(); i++) { - const auto shape_len = shape_size(impl_param.input_layouts[i].get().to_shape()); - const ov::PartialShape input_shape{static_cast(shape_len)}; + // NOTE: This code effectively makes a reshape operation on tensors start, + // stop, step and axes. The specification of Slice operator clearly says + // that those tensors are 1D tensors - and this is what is expected + // in shape_infer(). However, people in tests and other places, + // put 4D tensors instead of 1D(e.g. [4,1,1,1] instead of [4]). + // At the time of writing this comment - the hack for such situation + // was already there, so adding an ASSERT will effectively make + // some tests and graph transformations fail. + // There should be some kind of warning to the user about it, but AFAIK + // we don't have warning logs that could be enabled/disabled without + // affecting performance... + ov::PartialShape input_shape = ov::PartialShape::dynamic(1); + if (impl_param.memory_deps.find(i) != impl_param.memory_deps.end()) { + auto gpu_mem = impl_param.memory_deps.at(i); + input_shape = {static_cast(gpu_mem->count())}; + cldnn::mem_lock gpu_mem_lock(gpu_mem, impl_param.get_stream()); + const_data.emplace( + i, + make_tensor(layout{input_shape, gpu_mem->get_layout().data_type, gpu_mem->get_layout().format}, + gpu_mem_lock.data())); + } input_shapes.push_back(input_shape); - auto gpu_mem = impl_param.memory_deps.at(i); - cldnn::mem_lock gpu_mem_lock(gpu_mem, impl_param.get_stream()); - const_data.emplace(i, make_tensor(layout {input_shape, gpu_mem->get_layout().data_type, gpu_mem->get_layout().format }, - gpu_mem_lock.data())); } ov::op::v8::Slice op; auto output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); @@ -42,7 +81,6 @@ inline std::vector slice_inst::calc_output_layouts(const slice_node&, co return output_layouts; } - std::string slice_inst::to_string(slice_node const& node) { auto node_info = node.desc_to_json(); json_composite slice_info; @@ -57,4 +95,24 @@ std::string slice_inst::to_string(slice_node const& node) { return primitive_description.str(); } +void slice_inst::update_shape_info_tensor(const kernel_impl_params& params) { + mem_lock lock(_shape_info_memory, _network.get_stream()); + auto shape_info_ptr = lock.data(); + size_t offset = 0; + const SliceKernelRefNeededInputs inputs = SliceKernelRefNeededInputs::Create(*_node); + + for (auto idx : inputs.GetNeededInputIndexes()) { + GPU_DEBUG_TRACE_DETAIL << id() << " : update shape_info for input[" << idx << "]" << std::endl; + const auto& node_in_lay = _node->get_input_layout(idx); + const auto& runtime_in_lay = params.input_layouts[idx]; + fill_shape_info_data(runtime_in_lay, node_in_lay, shape_info_ptr, offset); + } + for (size_t i = 0; i < _node->get_output_layouts().size(); i++) { + GPU_DEBUG_TRACE_DETAIL << id() << " : update shape_info for output[" << i << "]" << std::endl; + const auto& node_out_lay = _node->get_output_layout(i); + const auto& runtime_out_lay = params.output_layouts[i]; + fill_shape_info_data(runtime_out_lay, node_out_lay, shape_info_ptr, offset); + } +} + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl index f90254ce89f0e4..a67aa925060c73 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl @@ -4,33 +4,88 @@ #include "include/batch_headers/fetch_data.cl" -KERNEL(slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) +#define BRING_INTO_RANGE(VAL, MAX) \ + clamp((long)VAL < 0l ? (long)VAL + (long)MAX : (long)VAL, 0l, (long)MAX-1l); + +#if INPUT0_DIMS < 5 +#define LOAD_BUFFER(in_prefix, out_name) \ + long out_name[INPUT0_DIMS]; \ + out_name[0] = in_prefix##_VAL0; \ + out_name[1] = in_prefix##_VAL1; \ + out_name[2] = in_prefix##_VAL2; \ + out_name[3] = in_prefix##_VAL3; +#else +#define LOAD_BUFFER(in_prefix, out_name) \ + long out_name[INPUT0_DIMS]; \ + out_name[0] = in_prefix##_VAL0; \ + out_name[1] = in_prefix##_VAL1; \ + out_name[2] = in_prefix##_VAL2; \ + out_name[3] = in_prefix##_VAL3; \ + out_name[4] = in_prefix##_VAL4; +#endif + +KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* restrict input, + START_BUFFER + STEP_BUFFER + AXES_BUFFER + __global OUTPUT_TYPE* restrict output) { - const uint batch = get_global_id(0); - const uint feature = get_global_id(1); + LOAD_BUFFER(START, start_buff); + LOAD_BUFFER(STEP, step_buff); + LOAD_BUFFER(AXES, axes_buff); + + long slice_step[INPUT0_DIMS]; + long slice_start[INPUT0_DIMS]; + + unroll_for(int i = 0; i < INPUT0_DIMS; ++i) { + slice_step[i] = 1; + slice_start[i] = 0; + } + + unroll_for(int i = 0; i < AXES_BUFFER_SIZE; ++i) { + const long axis = axes_buff[i]; + slice_step[axis] = step_buff[i]; + slice_start[axis] = start_buff[i]; + } + + const long output_dim0 = get_global_id(0); + const long output_dim1 = get_global_id(1); + const long slice_begin_dim0 = BRING_INTO_RANGE(slice_start[0], INPUT0_BATCH_NUM); + const long slice_begin_dim1 = BRING_INTO_RANGE(slice_start[1], INPUT0_FEATURE_NUM); + #if INPUT0_DIMS <= 4 - const uint xy = get_global_id(2); - const uint y = xy / OUTPUT_SIZE_X; - const uint x = xy % OUTPUT_SIZE_X; - const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x); - const uint input_index = INPUT0_GET_INDEX( - SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH, - SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE, - SLICE_BEGIN_Y + y * SLICE_STEP_Y, - SLICE_BEGIN_X + x * SLICE_STEP_X); + const long slice_begin_dim2 = BRING_INTO_RANGE(slice_start[2], INPUT0_SIZE_Y); + const long slice_begin_dim3 = BRING_INTO_RANGE(slice_start[3], INPUT0_SIZE_X); + const long output_dim23 = get_global_id(2); + const long output_dim2 = output_dim23 / OUTPUT_SIZE_X; + const long output_dim3 = output_dim23 % OUTPUT_SIZE_X; + const long output_index = OUTPUT_GET_INDEX(output_dim0, output_dim1, output_dim2, output_dim3); + const long input_index = INPUT0_GET_INDEX( + slice_begin_dim0 + output_dim0 * slice_step[0], + slice_begin_dim1 + output_dim1 * slice_step[1], + slice_begin_dim2 + output_dim2 * slice_step[2], + slice_begin_dim3 + output_dim3 * slice_step[3]); #elif INPUT0_DIMS == 5 - const uint xyz = get_global_id(2); - const uint yx = xyz % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); - const uint z = xyz / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); - const uint y = yx / OUTPUT_SIZE_X; - const uint x = yx % OUTPUT_SIZE_X; - const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x); - const uint input_index = INPUT0_GET_INDEX( - SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH, - SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE, - SLICE_BEGIN_Z + z * SLICE_STEP_Z, - SLICE_BEGIN_Y + y * SLICE_STEP_Y, - SLICE_BEGIN_X + x * SLICE_STEP_X); + const long slice_begin_dim2 = BRING_INTO_RANGE(slice_start[2], INPUT0_SIZE_Z); + const long slice_begin_dim3 = BRING_INTO_RANGE(slice_start[3], INPUT0_SIZE_Y); + const long slice_begin_dim4 = BRING_INTO_RANGE(slice_start[4], INPUT0_SIZE_X); + const long output_dim234 = get_global_id(2); + const long output_dim34 = output_dim234 % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const long output_dim2 = output_dim234 / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const long output_dim3 = output_dim34 / OUTPUT_SIZE_X; + const long output_dim4 = output_dim34 % OUTPUT_SIZE_X; + const long output_index = OUTPUT_GET_INDEX(output_dim0, output_dim1, output_dim2, output_dim3, output_dim4); + const long input_index = INPUT0_GET_INDEX( + slice_begin_dim0 + output_dim0 * slice_step[0], + slice_begin_dim1 + output_dim1 * slice_step[1], + slice_begin_dim2 + output_dim2 * slice_step[2], + slice_begin_dim3 + output_dim3 * slice_step[3], + slice_begin_dim4 + output_dim4 * slice_step[4]); #endif + output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS); } + +#undef LOAD_BUFFER; +#undef BRING_INTO_RANGE; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp index 1952afa5378bb3..312cd0fa2f7c2c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp @@ -7,23 +7,49 @@ #include namespace { - -void addJitConstantsForAttribute(kernel_selector::JitConstants &jit, - const std::string &name, const std::vector &attribute) { +static constexpr size_t MAX_SUPPORTED_DIM = 5; +static constexpr char JIT_AXES_BUFF_SIZE_NAME[] = "AXES_BUFFER_SIZE"; + +// Generates macros: +// - name_BUFFER +// - name_VAL0, name_VAL1 ... +void addJitConstantsForParam(kernel_selector::JitConstants& jit, + const std::string& name, + const std::vector& compile_time_param, + kernel_selector::Datatype type, + const std::function& dynamic_access_decorator) { using namespace kernel_selector; - jit.AddConstant(MakeJitConstant(name + "_BATCH", attribute[0])); - jit.AddConstant(MakeJitConstant(name + "_FEATURE", attribute[1])); - if (attribute.size() == 5) { // BFZYX - jit.AddConstant(MakeJitConstant(name + "_Z", attribute[2])); - jit.AddConstant(MakeJitConstant(name + "_Y", attribute[3])); - jit.AddConstant(MakeJitConstant(name + "_X", attribute[4])); - } else { // BFYX - jit.AddConstant(MakeJitConstant(name + "_Y", attribute[2])); - jit.AddConstant(MakeJitConstant(name + "_X", attribute[3])); + const std::string BUFF_CONST_NAME = name + "_BUFFER"; + const std::string BUFF_PTR_NAME = name + "_buffer_ptr"; + const auto jit_name_decorator = [](std::string name, size_t i) { + return name + "_VAL" + std::to_string(i); + }; + + if (compile_time_param.empty()) { + // Dynamic param: + const std::string type_str = toCLType(type); + jit.AddConstant( + MakeJitConstant(BUFF_CONST_NAME, "__global const " + type_str + "* restrict " + BUFF_PTR_NAME + ",")); + + for (size_t i = 0; i < MAX_SUPPORTED_DIM; ++i) { + const std::string i_str = std::to_string(i); + const std::string jit_name = jit_name_decorator(name, i); + const std::string access_str = dynamic_access_decorator(BUFF_PTR_NAME, i); + jit.AddConstant( + MakeJitConstant(jit_name, i_str + " < " + JIT_AXES_BUFF_SIZE_NAME + " ? (" + access_str + ") : -1")); + } + } else { + // Static param: + jit.AddConstant(MakeJitConstant(BUFF_CONST_NAME, "")); + for (size_t i = 0; i < MAX_SUPPORTED_DIM; ++i) { + const std::string jit_name = jit_name_decorator(name, i); + const int64_t val = i < compile_time_param.size() ? compile_time_param[i] : -1; + jit.AddConstant(MakeJitConstant(jit_name, val)); + } } } -} // anonymous namespace +} // anonymous namespace namespace kernel_selector { @@ -39,8 +65,11 @@ KernelsData SliceKernelRef::GetKernelsData(const Params ¶ms) const { auto slice_specific_jit = GetJitConstants(new_params); auto jit = CreateJit(kernelName, slice_specific_jit, entry_point); - FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo, - kernelName, jit, entry_point); + GetUpdateDispatchDataFunc(kernel_data); + + FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo, kernelName, jit, entry_point, + "", false, false, static_cast(new_params.inputs.size()), + 0, 1, new_params.has_dynamic_tensors()); return {kernel_data}; } @@ -68,6 +97,8 @@ ParamsKey SliceKernelRef::GetSupportedKey() const { k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); + k.EnableDynamicShapesSupport(); + k.EnableDifferentTypes(); return k; } @@ -80,17 +111,37 @@ bool SliceKernelRef::Validate(const Params &p) const { if (params.inputs.empty()) return false; - if (params.outputs[0].Dimentions() > 5 || params.inputs[0].Dimentions() > 5) + if (params.outputs[0].Dimentions() > MAX_SUPPORTED_DIM || params.inputs[0].Dimentions() > MAX_SUPPORTED_DIM) return false; return true; } -JitConstants SliceKernelRef::GetJitConstants(const slice_params ¶ms) const { +JitConstants SliceKernelRef::GetJitConstants(const slice_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); - addJitConstantsForAttribute(jit, "SLICE_BEGIN", params.start); - addJitConstantsForAttribute(jit, "SLICE_END", params.end); - addJitConstantsForAttribute(jit, "SLICE_STEP", params.step); + + // Define axes size as constant: + if (params.compile_time_axes.empty()) { + kernel_selector::DimensionAccessHelper dims(params.inputs.back()); + jit.AddConstant(MakeJitConstant(JIT_AXES_BUFF_SIZE_NAME, + toVectorMulString({dims.b(), dims.f(), dims.x(), dims.y(), dims.z()}))); + } else { + jit.AddConstant(MakeJitConstant(JIT_AXES_BUFF_SIZE_NAME, params.compile_time_axes.size())); + } + + // Prepare axes, start and step params: + const auto axes_decorator = [](std::string name, size_t i) { + const std::string i_str = std::to_string(i); + return name + "[" + i_str + "] < 0 ? INPUT0_DIMS + " + name + "[" + i_str + "] : " + name + "[" + i_str + "]"; + }; + addJitConstantsForParam(jit, "AXES", params.compile_time_axes, params.axes_data_type, axes_decorator); + + const auto default_decorator = [](std::string name, size_t i) { + return name + "[" + std::to_string(i) + "]"; + }; + addJitConstantsForParam(jit, "START", params.compile_time_start, params.start_data_type, default_decorator); + addJitConstantsForParam(jit, "STEP", params.compile_time_step, params.step_data_type, default_decorator); + return jit; } @@ -105,4 +156,15 @@ CommonDispatchData SliceKernelRef::SetDefault(const slice_params ¶ms) const return dispatchData; } +void SliceKernelRef::GetUpdateDispatchDataFunc(KernelData& kd) const { + kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params); + }; +} + } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h index 67449aed56b22b..b1d331cc94921e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h @@ -12,9 +12,12 @@ namespace kernel_selector { struct slice_params: public base_params { slice_params() : base_params(KernelType::SLICE) {} - std::vector start; - std::vector end; - std::vector step; + std::vector compile_time_start; + std::vector compile_time_step; + std::vector compile_time_axes; + kernel_selector::Datatype start_data_type; + kernel_selector::Datatype step_data_type; + kernel_selector::Datatype axes_data_type; }; class SliceKernelRef: public KernelBaseOpenCL { @@ -30,6 +33,7 @@ class SliceKernelRef: public KernelBaseOpenCL { private: JitConstants GetJitConstants(const slice_params ¶ms) const; CommonDispatchData SetDefault(const slice_params ¶ms) const; + void GetUpdateDispatchDataFunc(KernelData& kd) const override; }; } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp index 19d386e8a8351b..1f0381ef95100e 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp @@ -46,5 +46,20 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(ov::test::utils::DEVICE_GPU)), Slice8LayerTest::getTestCaseName); +std::vector dynamic_params = { + Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } }, + Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } }, + Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } }, + Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} }, + Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)}, + {{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_GPU_dynamic, Slice8LayerTest, + ::testing::Combine( + ::testing::ValuesIn(dynamic_params), + ::testing::ValuesIn(types), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + Slice8LayerTest::getTestCaseName); } // namespace \ No newline at end of file diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp index c33a287d2e4ddb..3cff74940daf57 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp @@ -46,7 +46,14 @@ struct SliceTestParams { memory::ptr start; memory::ptr stop; memory::ptr step; + memory::ptr axes; memory::ptr wanted_output; + bool is_input_dynamic = false; + bool is_start_dynamic = false; + bool is_stop_dynamic = false; + bool is_step_dynamic = false; + bool is_axes_dynamic = false; + bool is_caching_test = false; }; template @@ -54,8 +61,7 @@ class SliceTest : public ::testing::Test { public: // Runs all test cases for given params. void RunAllTestCasesForParams(const SliceTestParams& params) { - RunTestCase(params, false); - RunTestCase(params, true); + RunTestCase(params); } // Allocates tensoer with given shape and data. @@ -69,25 +75,83 @@ class SliceTest : public ::testing::Test { return tensor; } + template + void FillWithBasicBfyxPositiveStepAxesLessThanRankData(SliceTestParams& params) { + const ov::PartialShape input_shape{ 1, 2, 12, 100 }; + params.input = this->template AllocateTensor( + input_shape, format::bfyx, helpers::GenInput(input_shape)); + params.start = this->template AllocateTensor( + ov::PartialShape{ 3 }, format::bfyx, { 1, 1, 0 }); + params.stop = this->template AllocateTensor( + ov::PartialShape{ 3 }, format::bfyx, { 2, 120, 5 }); + params.step = this->template AllocateTensor( + ov::PartialShape{ 3 }, format::bfyx, { 1, 10, 1 }); + params.axes = this->template AllocateTensor( + ov::PartialShape{ 3 }, format::bfyx, { 1, 3, 2 }); + params.wanted_output = this->template AllocateTensor( + ov::PartialShape{ 1, 1, 5, 10 }, format::bfyx, { + 1201, 1211, 1221, 1231, 1241, 1251, 1261, 1271, 1281, 1291, + 1301, 1311, 1321, 1331, 1341, 1351, 1361, 1371, 1381, 1391, + 1401, 1411, 1421, 1431, 1441, 1451, 1461, 1471, 1481, 1491, + 1501, 1511, 1521, 1531, 1541, 1551, 1561, 1571, 1581, 1591, + 1601, 1611, 1621, 1631, 1641, 1651, 1661, 1671, 1681, 1691, + }); + } + private: + void SetParameterInput(const std::string& name, topology& topology, const memory::ptr& data_ptr, bool is_dynamic ) { + if(is_dynamic) { + auto dynamic_shape = data_ptr->get_layout(); + dynamic_shape.set_partial_shape(ov::PartialShape::dynamic(dynamic_shape.get_rank())); + topology.add(input_layout(name, dynamic_shape)); + } else { + topology.add(data(name, data_ptr)); + } + } + // Runs single tests case for given params. - void RunTestCase(const SliceTestParams& params, bool is_caching_test) { + void RunTestCase(const SliceTestParams& params) { + + auto dynamic_input = params.input->get_layout(); + dynamic_input.set_partial_shape(ov::PartialShape::dynamic(dynamic_input.get_rank())); topology topology; - topology.add(input_layout("input", params.input->get_layout())); - topology.add(data("start", params.start)); - topology.add(data("stop", params.stop)); - topology.add(data("step", params.step)); + topology.add(input_layout("input", params.is_input_dynamic ? dynamic_input : params.input->get_layout())); + + SetParameterInput("start", topology, params.start, params.is_start_dynamic); + SetParameterInput("stop", topology, params.stop, params.is_stop_dynamic); + SetParameterInput("step", topology, params.step, params.is_step_dynamic); + + if(params.axes) { + SetParameterInput("axes", topology, params.axes, params.is_axes_dynamic); + } + std::vector inputs{input_info("input"), input_info("start"), input_info("stop"), input_info("step")}; + if (params.axes) { + inputs.push_back(input_info("axes")); + } topology.add(slice("slice", inputs)); ExecutionConfig config = get_test_default_config(engine_); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + cldnn::network::ptr network = - get_network(engine_, topology, config, get_test_stream_ptr(), is_caching_test); + get_network(engine_, topology, config, get_test_stream_ptr(), params.is_caching_test); network->set_input_data("input", params.input); + + if (params.is_start_dynamic) + network->set_input_data("start", params.start); + if (params.is_stop_dynamic) + network->set_input_data("stop", params.stop); + if (params.is_step_dynamic) + network->set_input_data("step", params.step); + if(params.axes && params.is_axes_dynamic) { + network->set_input_data("axes", params.axes); + } + auto outputs = network->execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -113,24 +177,105 @@ TYPED_TEST_SUITE(SliceTest, DataTypes); TYPED_TEST(SliceTest, bfyx_positive_step) { SliceTestParams params; - const ov::PartialShape input_shape{ 1, 2, 12, 100 }; - params.input = this->template AllocateTensor( - input_shape, format::bfyx, helpers::GenInput(input_shape)); - params.start = this->template AllocateTensor( - ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 0, 1, 0, 1 }); - params.stop = this->template AllocateTensor( - ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 2, 5, 100 }); - params.step = this->template AllocateTensor( - ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 1, 1, 10 }); - params.wanted_output = this->template AllocateTensor( - ov::PartialShape{ 1, 1, 5, 10 }, format::bfyx, { - 1201, 1211, 1221, 1231, 1241, 1251, 1261, 1271, 1281, 1291, - 1301, 1311, 1321, 1331, 1341, 1351, 1361, 1371, 1381, 1391, - 1401, 1411, 1421, 1431, 1441, 1451, 1461, 1471, 1481, 1491, - 1501, 1511, 1521, 1531, 1541, 1551, 1561, 1571, 1581, 1591, - 1601, 1611, 1621, 1631, 1641, 1651, 1661, 1671, 1681, 1691, - }); + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + this->RunAllTestCasesForParams(params); +} +TYPED_TEST(SliceTest, bfyx_positive_step_all_static_caching) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_caching_test = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, bfyx_positive_step_all_dynamic_caching) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_input_dynamic = true; + params.is_start_dynamic = true; + params.is_step_dynamic = true; + params.is_stop_dynamic = true; + params.is_axes_dynamic = true; + params.is_caching_test = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, input_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_input_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, stop_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_stop_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, step_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_step_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, stop_step_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_step_dynamic = true; + params.is_stop_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, input_stop_step_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_input_dynamic = true; + params.is_step_dynamic = true; + params.is_stop_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, start_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_start_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, input_start_stop_step_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_input_dynamic = true; + params.is_start_dynamic = true; + params.is_step_dynamic = true; + params.is_stop_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, axes_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_axes_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, input_axes_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_input_dynamic = true; + params.is_axes_dynamic = true; + this->RunAllTestCasesForParams(params); +} + +TYPED_TEST(SliceTest, input_step_axes_dynamic) { + SliceTestParams params; + this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData(params); + params.is_input_dynamic = true; + params.is_step_dynamic = true; + params.is_axes_dynamic = true; this->RunAllTestCasesForParams(params); } @@ -140,11 +285,11 @@ TYPED_TEST(SliceTest, bfyx_negative_step) { params.input = this->template AllocateTensor( input_shape, format::bfyx, helpers::GenInput(input_shape)); params.start = this->template AllocateTensor( - ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 0, 1, 5, 90 }); + ov::PartialShape{ 4 }, format::bfyx, { 0, 1, 5, 90 }); params.stop = this->template AllocateTensor( - ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 0, 0, 10 }); + ov::PartialShape{ 4 }, format::bfyx, { 1, 0, 0, 10 }); params.step = this->template AllocateTensor( - ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, -1, -1, -10 }); + ov::PartialShape{ 4 }, format::bfyx, { 1, -1, -1, -10 }); params.wanted_output = this->template AllocateTensor( ov::PartialShape{ 1, 1, 5, 8 }, format::bfyx, { 1789, 1779, 1769, 1759, 1749, 1739, 1729, 1719, @@ -163,17 +308,26 @@ TYPED_TEST(SliceTest, bfzyx) { params.input = this->template AllocateTensor( input_shape, format::bfzyx, helpers::GenInput(input_shape)); params.start = this->template AllocateTensor( - ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 0, 0, 0, 0, 0 }); + ov::PartialShape{ 5 }, format::bfzyx, { 0, 0, 0, 0, 0 }); params.stop = this->template AllocateTensor( - ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 1, 2, 2, 2, 2 }); + ov::PartialShape{ 5 }, format::bfzyx, { 1, 2, 2, 2, 2 }); params.step = this->template AllocateTensor( - ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 1, 1, 1, 1, 1 }); + ov::PartialShape{ 5 }, format::bfzyx, { 1, 1, 1, 1, 1 }); params.wanted_output = this->template AllocateTensor( ov::PartialShape{ 1, 2, 2, 2, 2 }, format::bfzyx, { 0, 1, 5, 6, 60, 61, 65, 66, 600, 601, 605, 606, 660, 661, 665, 666 }); + params.is_caching_test = true; + + this->RunAllTestCasesForParams(params); + params.is_input_dynamic = true; + params.is_start_dynamic = true; + params.is_step_dynamic = true; + params.is_stop_dynamic = true; + params.is_axes_dynamic = true; + this->RunAllTestCasesForParams(params); } From 50e503892755fcb00d1e48b6be82dd863fa718f0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Mar 2024 08:55:51 +0000 Subject: [PATCH 11/14] Bump awalsh128/cache-apt-pkgs-action from 1.4.1 to 1.4.2 (#23225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [awalsh128/cache-apt-pkgs-action](https://github.com/awalsh128/cache-apt-pkgs-action) from 1.4.1 to 1.4.2.
Release notes

Sourced from awalsh128/cache-apt-pkgs-action's releases.

v1.4.2

What's Changed

New Contributors

Full Changelog: https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1...v1.4.2

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=awalsh128/cache-apt-pkgs-action&package-manager=github_actions&previous-version=1.4.1&new-version=1.4.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build_doc.yml | 2 +- .github/workflows/code_snippets.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 5fc223d9746bdf..fe52646914a40b 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -21,7 +21,7 @@ jobs: lfs: 'true' - name: Install apt-get dependencies - uses: awalsh128/cache-apt-pkgs-action@v1.4.1 + uses: awalsh128/cache-apt-pkgs-action@v1.4.2 with: packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9 version: 3.0 diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index 5181e26f378da0..856f85afa29961 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -30,7 +30,7 @@ jobs: submodules: 'true' - name: Install OpenCL - uses: awalsh128/cache-apt-pkgs-action@v1.4.1 + uses: awalsh128/cache-apt-pkgs-action@v1.4.2 if: runner.os == 'Linux' with: packages: ocl-icd-opencl-dev opencl-headers From caf9148bc8c3530cd8b6d4711f79f1c329644815 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 4 Mar 2024 10:50:54 +0100 Subject: [PATCH 12/14] [TEMPLATE] GridSample in template plugin supports more precisions (#23052) ### Details: - Add more precisions support for `GridSample` in Template plugin. ### Tickets: - [CVS-133057](https://jira.devtools.intel.com/browse/CVS-133057) --- .../openvino/reference/grid_sample.hpp | 4 +- .../template/backend/ops/grid_sample.cpp | 60 ++- .../functional/op_reference/grid_sample.cpp | 478 ++++++++++-------- 3 files changed, 308 insertions(+), 234 deletions(-) diff --git a/src/core/reference/include/openvino/reference/grid_sample.hpp b/src/core/reference/include/openvino/reference/grid_sample.hpp index 6c765881e536db..7daffa1011f373 100644 --- a/src/core/reference/include/openvino/reference/grid_sample.hpp +++ b/src/core/reference/include/openvino/reference/grid_sample.hpp @@ -141,8 +141,8 @@ DATA_ET bilinear(const DATA_ET* data, const auto x_d = denormalize(x_n, data_shape[3]); const auto y_topleft = std::floor(y_d); const auto x_topleft = std::floor(x_d); - const auto dy = y_d - y_topleft; - const auto dx = x_d - x_topleft; + const auto dy = static_cast(y_d - y_topleft); + const auto dx = static_cast(x_d - x_topleft); const auto v00 = get_padded(data, data_shape, n, c, static_cast(y_topleft), static_cast(x_topleft)); const auto v01 = get_padded(data, data_shape, n, c, static_cast(y_topleft), static_cast(x_topleft + 1)); const auto v10 = get_padded(data, data_shape, n, c, static_cast(y_topleft + 1), static_cast(x_topleft)); diff --git a/src/plugins/template/backend/ops/grid_sample.cpp b/src/plugins/template/backend/ops/grid_sample.cpp index f47dba333f3c4e..1ea86f9c0ec03b 100644 --- a/src/plugins/template/backend/ops/grid_sample.cpp +++ b/src/plugins/template/backend/ops/grid_sample.cpp @@ -6,18 +6,48 @@ #include "evaluate_node.hpp" -template +template bool evaluate(const std::shared_ptr& op, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - using ET = typename ov::element_type_traits::value_type; + using DT = ov::fundamental_type_for; const auto& attributes = op->get_attributes(); - ov::element::Type grid_et = op->get_input_element_type(1); - switch (grid_et) { + + switch (op->get_input_element_type(1)) { + case ov::element::f16: + ov::reference::grid_sample(outputs[0].data
(), + inputs[0].data(), + inputs[1].data>(), + inputs[0].get_shape(), + inputs[1].get_shape(), + attributes.align_corners, + attributes.mode, + attributes.padding_mode); + break; + case ov::element::bf16: + ov::reference::grid_sample(outputs[0].data
(), + inputs[0].data(), + inputs[1].data>(), + inputs[0].get_shape(), + inputs[1].get_shape(), + attributes.align_corners, + attributes.mode, + attributes.padding_mode); + break; case ov::element::f32: - ov::reference::grid_sample(outputs[0].data(), - inputs[0].data(), - inputs[1].data(), + ov::reference::grid_sample(outputs[0].data
(), + inputs[0].data(), + inputs[1].data>(), + inputs[0].get_shape(), + inputs[1].get_shape(), + attributes.align_corners, + attributes.mode, + attributes.padding_mode); + break; + case ov::element::f64: + ov::reference::grid_sample(outputs[0].data
(), + inputs[0].data(), + inputs[1].data>(), inputs[0].get_shape(), inputs[1].get_shape(), attributes.align_corners, @@ -34,23 +64,17 @@ template <> bool evaluate_node(std::shared_ptr node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - auto element_type = node->get_output_element_type(0); - if (ov::is_type(node) || ov::is_type(node)) - element_type = node->get_input_element_type(1); - - switch (element_type) { + switch (node->get_output_element_type(0)) { case ov::element::boolean: return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::bf16: return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::f16: return evaluate(ov::as_type_ptr(node), outputs, inputs); - case ov::element::f64: - return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::f32: return evaluate(ov::as_type_ptr(node), outputs, inputs); - case ov::element::i4: - return evaluate(ov::as_type_ptr(node), outputs, inputs); + case ov::element::f64: + return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::i8: return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::i16: @@ -59,10 +83,6 @@ bool evaluate_node(std::shared_ptr node, return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::i64: return evaluate(ov::as_type_ptr(node), outputs, inputs); - case ov::element::u1: - return evaluate(ov::as_type_ptr(node), outputs, inputs); - case ov::element::u4: - return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::u8: return evaluate(ov::as_type_ptr(node), outputs, inputs); case ov::element::u16: diff --git a/src/plugins/template/tests/functional/op_reference/grid_sample.cpp b/src/plugins/template/tests/functional/op_reference/grid_sample.cpp index 1923fe011a9f6e..df4f7e5a34845b 100644 --- a/src/plugins/template/tests/functional/op_reference/grid_sample.cpp +++ b/src/plugins/template/tests/functional/op_reference/grid_sample.cpp @@ -65,24 +65,33 @@ constexpr auto GS_ZEROS{op::v9::GridSample::PaddingMode::ZEROS}; constexpr std::array padding_modes{GS_ZEROS, GS_BORDER, GS_REFLECTION}; constexpr std::array align_corners_modes{false, true}; +std::string param_types_str(const element::Type& data_et, const element::Type& grid_et) { + std::stringstream types; + types << "_data_et_" << data_et << "_grid_et_" << grid_et; + return types.str(); +} + +template , + class GT = ov::fundamental_type_for> std::vector generateNearestParamsOddDimensionsInnerGrids() { std::vector params; reference_tests::Tensor data_odd_dims{{1, 1, 3, 5}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; - reference_tests::Tensor grid_inner{ - {1, 3, 4, 2}, - element::f32, - std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5, - 0.5, -0.5, 0.5, 0.5, -1., -1., -1., 1., 1., -1., 1., 1.}}; - reference_tests::Tensor output{{1, 1, 3, 4}, - element::f32, - std::vector{8, 8, 8, 8, 2, 12, 4, 14, 1, 11, 5, 15}}; + DATA_ET, + std::vector
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; + reference_tests::Tensor grid_inner{{1, 3, 4, 2}, GRID_ET, std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, + 0.1, 0.1, -0.5, -0.5, -0.5, 0.5, + 0.5, -0.5, 0.5, 0.5, -1., -1., + -1., 1., 1., -1., 1., 1.}}; + reference_tests::Tensor output{{1, 1, 3, 4}, DATA_ET, std::vector
{8, 8, 8, 8, 2, 12, 4, 14, 1, 11, 5, 15}}; + for (const auto& padding : padding_modes) { for (const auto align : align_corners_modes) { std::stringstream name; name << "nearest_" << padding << (align ? "_align" : "_noalign") << "_odd_dims_inner"; + name << param_types_str(DATA_ET, GRID_ET); params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{align, GS_NEAREST, padding}, @@ -93,78 +102,83 @@ std::vector generateNearestParamsOddDimensionsInnerGrids() { return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateNearestParamsOddDimensionsOuterGrids() { std::vector params; reference_tests::Tensor data_odd_dims{{1, 1, 3, 5}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; + DATA_ET, + std::vector
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; reference_tests::Tensor grid_outer{ {1, 1, 7, 2}, - element::f32, - std::vector{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}}; + GRID_ET, + std::vector{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}}; + + const auto types_str = param_types_str(DATA_ET, GRID_ET); params.emplace_back(data_odd_dims, grid_outer, op::v9::GridSample::Attributes{false, GS_NEAREST, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{0, 0, 0, 0, 0, 0, 0}}, - "nearest_zeros_noalign_odd_dims_outer"); + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{0, 0, 0, 0, 0, 0, 0}}, + "nearest_zeros_noalign_odd_dims_outer" + types_str); params.emplace_back(data_odd_dims, grid_outer, op::v9::GridSample::Attributes{true, GS_NEAREST, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{0, 0, 0, 0, 0, 0, 0}}, - "nearest_zeros_align_odd_dims_outer"); + reference_tests::Tensor{{1, 1, 1, 7}, GRID_ET, std::vector{0, 0, 0, 0, 0, 0, 0}}, + "nearest_zeros_align_odd_dims_outer" + types_str); - params.emplace_back( - data_odd_dims, - grid_outer, - op::v9::GridSample::Attributes{false, GS_NEAREST, GS_BORDER}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{1, 11, 11, 14, 15, 10, 5}}, - "nearest_border_noalign_odd_dims_outer"); + params.emplace_back(data_odd_dims, + grid_outer, + op::v9::GridSample::Attributes{false, GS_NEAREST, GS_BORDER}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{1, 11, 11, 14, 15, 10, 5}}, + "nearest_border_noalign_odd_dims_outer" + types_str); - params.emplace_back( - data_odd_dims, - grid_outer, - op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{1, 6, 11, 14, 15, 10, 5}}, - "nearest_border_align_odd_dims_outer"); + params.emplace_back(data_odd_dims, + grid_outer, + op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{1, 6, 11, 14, 15, 10, 5}}, + "nearest_border_align_odd_dims_outer" + types_str); params.emplace_back(data_odd_dims, grid_outer, op::v9::GridSample::Attributes{false, GS_NEAREST, GS_REFLECTION}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{8, 14, 1, 4, 14, 6, 5}}, - "nearest_reflection_noalign_odd_dims_outer"); + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{8, 14, 1, 4, 14, 6, 5}}, + "nearest_reflection_noalign_odd_dims_outer" + types_str); params.emplace_back(data_odd_dims, grid_outer, op::v9::GridSample::Attributes{true, GS_NEAREST, GS_REFLECTION}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{8, 9, 6, 4, 14, 6, 4}}, - "nearest_reflection_align_odd_dims_outer"); + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{8, 9, 6, 4, 14, 6, 4}}, + "nearest_reflection_align_odd_dims_outer" + types_str); return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateNearestParamsEvenDimensions() { std::vector params; - reference_tests::Tensor data_even_dims{ - {1, 1, 4, 6}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}}; + reference_tests::Tensor data_even_dims{{1, 1, 4, 6}, DATA_ET, std::vector
{1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24}}; reference_tests::Tensor grid_inner{ {1, 1, 8, 2}, - element::f32, - std::vector{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}}; + GRID_ET, + std::vector{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}}; - reference_tests::Tensor output_align{{1, 1, 1, 8}, element::f32, std::vector{8, 14, 11, 17, 19, 6, 9, 16}}; - reference_tests::Tensor output_noalign{{1, 1, 1, 8}, element::f32, std::vector{2, 14, 5, 17, 19, 6, 9, 16}}; - reference_tests::Tensor output_zeros_noalign{{1, 1, 1, 8}, - element::f32, - std::vector{2, 14, 5, 17, 0, 0, 9, 16}}; + reference_tests::Tensor output_align{{1, 1, 1, 8}, DATA_ET, std::vector
{8, 14, 11, 17, 19, 6, 9, 16}}; + reference_tests::Tensor output_noalign{{1, 1, 1, 8}, DATA_ET, std::vector
{2, 14, 5, 17, 19, 6, 9, 16}}; + reference_tests::Tensor output_zeros_noalign{{1, 1, 1, 8}, DATA_ET, std::vector
{2, 14, 5, 17, 0, 0, 9, 16}}; for (const auto& padding : padding_modes) { std::stringstream name1, name2; name1 << "nearest_" << padding << "_noalign" - << "_even_dims_inner"; + << "_even_dims_inner" << param_types_str(DATA_ET, GRID_ET); params.emplace_back(data_even_dims, grid_inner, op::v9::GridSample::Attributes{false, GS_NEAREST, padding}, @@ -172,7 +186,7 @@ std::vector generateNearestParamsEvenDimensions() { name1.str()); name2 << "nearest_" << padding << "_align" - << "_even_dims_inner"; + << "_even_dims_inner" << param_types_str(DATA_ET, GRID_ET); params.emplace_back(data_even_dims, grid_inner, op::v9::GridSample::Attributes{true, GS_NEAREST, padding}, @@ -183,199 +197,209 @@ std::vector generateNearestParamsEvenDimensions() { return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateBilinearParamsOddDimensionsInnerGrids() { + const auto types_str = param_types_str(DATA_ET, GRID_ET); std::vector params; reference_tests::Tensor data_odd_dims{{1, 1, 3, 5}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; - reference_tests::Tensor grid_inner{ - {1, 3, 4, 2}, - element::f32, - std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5, - 0.5, -0.5, 0.5, 0.5, -1., -1., -1., 1., 1., -1., 1., 1.}}; + DATA_ET, + std::vector
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; + reference_tests::Tensor grid_inner{{1, 3, 4, 2}, GRID_ET, std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, + 0.1, 0.1, -0.5, -0.5, -0.5, 0.5, + 0.5, -0.5, 0.5, 0.5, -1., -1., + -1., 1., 1., -1., 1., 1.}}; reference_tests::Tensor output_align{{1, 1, 3, 4}, - element::f32, - std::vector{7.3, 8.3, 7.7, 8.7, 4.5, 9.5, 6.5, 11.5, 1, 11, 5, 15}}; + DATA_ET, + std::vector
{7.3, 8.3, 7.7, 8.7, 4.5, 9.5, 6.5, 11.5, 1, 11, 5, 15}}; reference_tests::Tensor output_noalign{{1, 1, 3, 4}, - element::f32, - std::vector{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 1, 11, 5, 15}}; + DATA_ET, + std::vector
{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 1, 11, 5, 15}}; reference_tests::Tensor output_zeros_noalign{ {1, 1, 3, 4}, - element::f32, - std::vector{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 0.25, 2.75, 1.25, 3.75}}; + DATA_ET, + std::vector
{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 0.25, 2.75, 1.25, 3.75}}; params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS}, output_zeros_noalign, - "bilinear_zeros_noalign_odd_dims_inner"); + "bilinear_zeros_noalign_odd_dims_inner" + types_str); params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS}, output_align, - "bilinear_zeros_align_odd_dims_inner"); + "bilinear_zeros_align_odd_dims_inner" + types_str); params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER}, output_noalign, - "bilinear_border_noalign_odd_dims_inner"); + "bilinear_border_noalign_odd_dims_inner" + types_str); params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER}, output_align, - "bilinear_border_align_odd_dims_inner"); + "bilinear_border_align_odd_dims_inner" + types_str); params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION}, output_noalign, - "bilinear_reflection_noalign_odd_dims_inner"); + "bilinear_reflection_noalign_odd_dims_inner" + types_str); params.emplace_back(data_odd_dims, grid_inner, op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION}, output_align, - "bilinear_reflection_align_odd_dims_inner"); + "bilinear_reflection_align_odd_dims_inner" + types_str); return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateBilinearParamsOddDimensionsOuterGrids() { + const auto types_str = param_types_str(DATA_ET, GRID_ET); std::vector params; reference_tests::Tensor data_odd_dims{{1, 1, 3, 5}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; + DATA_ET, + std::vector
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; reference_tests::Tensor grid_outer{ {1, 1, 7, 2}, - element::f32, - std::vector{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}}; + GRID_ET, + std::vector{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}}; params.emplace_back(data_odd_dims, grid_outer, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{0, 0, 0, 0, 0, 0, 0}}, - "bilinear_zeros_noalign_odd_dims_outer"); + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{0, 0, 0, 0, 0, 0, 0}}, + "bilinear_zeros_noalign_odd_dims_outer" + types_str); - params.emplace_back( - data_odd_dims, - grid_outer, - op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{0, 0, 0, 0, 0, 0, 1.9880099}}, - "bilinear_zeros_align_odd_dims_outer"); + params.emplace_back(data_odd_dims, + grid_outer, + op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{0, 0, 0, 0, 0, 0, 1.9880099}}, + "bilinear_zeros_align_odd_dims_outer" + types_str); params.emplace_back( data_odd_dims, grid_outer, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{1, 8.775, 11, 14.25, 15, 8.725, 5}}, - "bilinear_border_noalign_odd_dims_outer"); + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{1, 8.775, 11, 14.25, 15, 8.725, 5}}, + "bilinear_border_noalign_odd_dims_outer" + types_str); - params.emplace_back( - data_odd_dims, - grid_outer, - op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{1, 7.85, 11, 14, 15, 9.15, 5}}, - "bilinear_border_align_odd_dims_outer"); + params.emplace_back(data_odd_dims, + grid_outer, + op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{1, 7.85, 11, 14, 15, 9.15, 5}}, + "bilinear_border_align_odd_dims_outer" + types_str); params.emplace_back( data_odd_dims, grid_outer, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION}, reference_tests::Tensor{{1, 1, 1, 7}, - element::f32, - std::vector{5.9999995, 11.9, 2.7000031, 5.1250005, 13.75, 4.725, 4.7475}}, - "bilinear_reflection_noalign_odd_dims_outer"); + DATA_ET, + std::vector
{5.9999995, 11.9, 2.7000031, 5.1250005, 13.75, 4.725, 4.7475}}, + "bilinear_reflection_noalign_odd_dims_outer" + types_str); params.emplace_back( data_odd_dims, grid_outer, op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION}, reference_tests::Tensor{{1, 1, 1, 7}, - element::f32, - std::vector{6.7, 10.75, 3.800002, 6.25, 13.099999, 5.15, 4.4030004}}, - "bilinear_reflection_align_odd_dims_outer"); + DATA_ET, + std::vector
{6.7, 10.75, 3.800002, 6.25, 13.099999, 5.15, 4.4030004}}, + "bilinear_reflection_align_odd_dims_outer" + types_str); return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateBilinearParamsEvenDimensions() { + const auto types_str = param_types_str(DATA_ET, GRID_ET); std::vector params; - reference_tests::Tensor data_even_dims{ - {1, 1, 4, 6}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}}; + reference_tests::Tensor data_even_dims{{1, 1, 4, 6}, DATA_ET, std::vector
{1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24}}; reference_tests::Tensor grid_inner{ {1, 1, 8, 2}, - element::f32, - std::vector{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}}; + GRID_ET, + std::vector{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}}; params.emplace_back( data_even_dims, grid_inner, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{5, 17, 8, 20, 4.75, 1.5, 11, 14}}, - "bilinear_zeros_noalign_even_dims_inner"); - - params.emplace_back(data_even_dims, - grid_inner, - op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 7}, - element::f32, - std::vector{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}}, - "bilinear_zeros_align_even_dims_inner"); + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{5, 17, 8, 20, 4.75, 1.5, 11, 14}}, + "bilinear_zeros_noalign_even_dims_inner" + types_str); params.emplace_back( data_even_dims, grid_inner, - op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{5, 17, 8, 20, 19, 6, 11, 14}}, - "bilinear_border_noalign_even_dims_inner"); + op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}}, + "bilinear_zeros_align_even_dims_inner" + types_str); params.emplace_back(data_even_dims, grid_inner, - op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER}, - reference_tests::Tensor{{1, 1, 1, 7}, - element::f32, - std::vector{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}}, - "bilinear_border_align_even_dims_inner"); + op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{5, 17, 8, 20, 19, 6, 11, 14}}, + "bilinear_border_noalign_even_dims_inner" + types_str); params.emplace_back( data_even_dims, grid_inner, - op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION}, - reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector{5, 17, 8, 20, 19, 6, 11, 14}}, - "bilinear_reflection_noalign_even_dims_inner"); + op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}}, + "bilinear_border_align_even_dims_inner" + types_str); params.emplace_back(data_even_dims, grid_inner, - op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION}, - reference_tests::Tensor{{1, 1, 1, 7}, - element::f32, - std::vector{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}}, - "bilinear_reflection_align_even_dims_inner"); + op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{5, 17, 8, 20, 19, 6, 11, 14}}, + "bilinear_reflection_noalign_even_dims_inner" + types_str); + + params.emplace_back( + data_even_dims, + grid_inner, + op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION}, + reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector
{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}}, + "bilinear_reflection_align_even_dims_inner" + types_str); return params; } +template > std::vector generateBicubicParams() { + constexpr auto GRID_ET = ov::element::Type_t::f32; + using GT = ov::fundamental_type_for; + const auto types_str = param_types_str(DATA_ET, GRID_ET); std::vector params; // clang-format off - reference_tests::Tensor data_even_dims{{1, 1, 4, 7}, element::f32, - std::vector{ 1, 1, 1, 1, 1, 1, 1, + reference_tests::Tensor data_even_dims{{1, 1, 4, 7}, DATA_ET, + std::vector
{1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 3, 5, 3, 2, 1, 1, 2, 5, 9, 5, 2, 1}}; reference_tests::Tensor grid{ {1, 4, 4, 2}, - element::f32, - std::vector{ -0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, + GRID_ET, + std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, 0.9, 0.9, -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21, 37}}; @@ -384,85 +408,90 @@ std::vector generateBicubicParams() { grid, op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_ZEROS}, reference_tests::Tensor{{1, 1, 4, 4}, - element::f32, - std::vector{2.6663566, 3.527928, 2.6663566, 3.527928, - 1.6318359, 2.7156982, 1.6318359, 2.7156982, - 0.6378987, 0.57033366, 0.6378987, 0.57033366, - 0, -0.01507522, 0.25528803, 0 }}, - "bicubic_zeros_noalign"); + DATA_ET, + std::vector
{2.6663566, 3.527928, 2.6663566, 3.527928, + 1.6318359, 2.7156982, 1.6318359, 2.7156982, + 0.6378987, 0.57033366, 0.6378987, 0.57033366, + 0, -0.01507522, 0.25528803, 0 }}, + "bicubic_zeros_noalign" + types_str); params.emplace_back(data_even_dims, grid, op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_ZEROS}, reference_tests::Tensor{{1, 1, 4, 4}, - element::f32, - std::vector{ 2.7887204, 3.4506166, 2.7887204, 3.4506166, - 1.8481445, 2.7364502, 1.8481445, 2.7364502, - 1.2367951, 1.3602872, 1.2367951, 1.3602872, - 0, 0.00650583, 1.1182348, 0 }}, - "bicubic_zeros_align"); + DATA_ET, + std::vector
{2.7887204, 3.4506166, 2.7887204, 3.4506166, + 1.8481445, 2.7364502, 1.8481445, 2.7364502, + 1.2367951, 1.3602872, 1.2367951, 1.3602872, + 0, 0.00650583, 1.1182348, 0 }}, + "bicubic_zeros_align" + types_str); params.emplace_back(data_even_dims, grid, op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_BORDER}, reference_tests::Tensor{{1, 1, 4, 4}, - element::f32, - std::vector{ 2.6663566, 3.527928, 2.6663566, 3.527928, - 1.5380859, 2.4677734, 1.5380859, 2.4677734, - 1.0089612, 0.91871876, 1.0089612, 0.91871876, - 1, 1, 0.8902873, 1 }}, - "bicubic_border_noalign"); + DATA_ET, + std::vector
{2.6663566, 3.527928, 2.6663566, 3.527928, + 1.5380859, 2.4677734, 1.5380859, 2.4677734, + 1.0089612, 0.91871876, 1.0089612, 0.91871876, + 1, 1, 0.8902873, 1 }}, + "bicubic_border_noalign" + types_str); params.emplace_back(data_even_dims, grid, op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_BORDER}, reference_tests::Tensor{{1, 1, 4, 4}, - element::f32, - std::vector{ 2.7887204, 3.4506166, 2.7887204, 3.4506166, - 1.8129883, 2.623291, 1.8129883, 2.623291, - 1.0363026, 1.1486388, 1.0363026, 1.1486388, - 1, 1.0000064, 1.0641243, 1 }}, - "bicubic_border_align"); + DATA_ET, + std::vector
{2.7887204, 3.4506166, 2.7887204, 3.4506166, + 1.8129883, 2.623291, 1.8129883, 2.623291, + 1.0363026, 1.1486388, 1.0363026, 1.1486388, + 1, 1.0000064, 1.0641243, 1 }}, + "bicubic_border_align" + types_str); params.emplace_back(data_even_dims, grid, op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_REFLECTION}, reference_tests::Tensor{{1, 1, 4, 4}, - element::f32, - std::vector{ 2.6663566, 3.527928, 2.6663566, 3.527928, - 1.5380859, 2.4677734, 1.5380859, 2.4677734, - 1.0150609, 0.904375, 1.0150609, 0.904375, - 5.48851, 0.898316, 0.8237547, 0.8125 }}, - "bicubic_reflection_noalign"); + DATA_ET, + std::vector
{2.6663566, 3.527928, 2.6663566, 3.527928, + 1.5380859, 2.4677734, 1.5380859, 2.4677734, + 1.0150609, 0.904375, 1.0150609, 0.904375, + 5.48851, 0.898316, 0.8237547, 0.8125 }}, + "bicubic_reflection_noalign" + types_str); params.emplace_back(data_even_dims, grid, op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_REFLECTION}, reference_tests::Tensor{{1, 1, 4, 4}, - element::f32, - std::vector{ 2.7887204, 3.4506166, 2.7887204, 3.4506166, - 1.7745361, 2.6518555, 1.7745361, 2.6518555, - 1.0085088, 1.0307077, 1.0085088, 1.0307077, - 5.5649586, 1.0553409, 1.0011607, 1 }}, - "bicubic_reflection_align"); + DATA_ET, + std::vector
{2.7887204, 3.4506166, 2.7887204, 3.4506166, + 1.7745361, 2.6518555, 1.7745361, 2.6518555, + 1.0085088, 1.0307077, 1.0085088, 1.0307077, + 5.5649586, 1.0553409, 1.0011607, 1 }}, + "bicubic_reflection_align" + types_str); // clang-format on return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateBicubicBatchesParams() { + const auto types_str = param_types_str(DATA_ET, GRID_ET); std::vector params; reference_tests::Tensor data{{2, 2, 4, 3}, - element::f32, - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}}; - reference_tests::Tensor grid{{2, 2, 4, 2}, - element::f32, - std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, - 0.5, 0.5, -0.5, 0.5, 0.5, -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, - 0.9, 0.9, -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21, 37}}; + DATA_ET, + std::vector
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}}; + reference_tests::Tensor grid{ + {2, 2, 4, 2}, + GRID_ET, + std::vector{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, + -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, 0.9, 0.9, -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21, 37}}; params.emplace_back( data, @@ -470,12 +499,12 @@ std::vector generateBicubicBatchesParams() { op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_BORDER}, reference_tests::Tensor{ {2, 2, 2, 4}, - element::f32, - std::vector{6.0096254, 6.7048755, 6.2951245, 6.9903746, 3.4101562, 8.402344, 4.5976562, 9.589844, - 18.009624, 18.704876, 18.295124, 18.990376, 15.410156, 20.402344, 16.597656, 21.589844, - 25.415281, 33.735218, 27.26478, 35.58472, 32.884, 26.852259, 35.996872, 36., - 37.41528, 45.735218, 39.264782, 47.58472, 44.884, 38.852257, 47.996872, 48.}}, - "bicubic_border_align_batches"); + DATA_ET, + std::vector
{6.0096254, 6.7048755, 6.2951245, 6.9903746, 3.4101562, 8.402344, 4.5976562, 9.589844, + 18.009624, 18.704876, 18.295124, 18.990376, 15.410156, 20.402344, 16.597656, 21.589844, + 25.415281, 33.735218, 27.26478, 35.58472, 32.884, 26.852259, 35.996872, 36., + 37.41528, 45.735218, 39.264782, 47.58472, 44.884, 38.852257, 47.996872, 48.}}, + "bicubic_border_align_batches" + types_str); params.emplace_back( data, @@ -483,76 +512,101 @@ std::vector generateBicubicBatchesParams() { op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_REFLECTION}, reference_tests::Tensor{ {2, 2, 2, 4}, - element::f32, - std::vector{5.8170314, 6.7650313, 6.2349687, 7.182969, 2.4101562, 8.972656, 4.0273438, 10.589844, - 17.81703, 18.765032, 18.234968, 19.18297, 14.410156, 20.972656, 16.027344, 22.589844, - 24.356874, 34.301876, 26.698126, 36.643124, 34.304035, 26.55013, 36.74749, 36.75, - 36.356876, 46.301876, 38.698124, 48.643124, 46.304035, 38.55013, 48.74749, 48.75}}, - "bicubic_reflection_noalign_batches"); + DATA_ET, + std::vector
{5.8170314, 6.7650313, 6.2349687, 7.182969, 2.4101562, 8.972656, 4.0273438, 10.589844, + 17.81703, 18.765032, 18.234968, 19.18297, 14.410156, 20.972656, 16.027344, 22.589844, + 24.356874, 34.301876, 26.698126, 36.643124, 34.304035, 26.55013, 36.74749, 36.75, + 36.356876, 46.301876, 38.698124, 48.643124, 46.304035, 38.55013, 48.74749, 48.75}}, + "bicubic_reflection_noalign_batches" + types_str); return params; } +template , + class GT = ov::fundamental_type_for> std::vector generateCornerCaseData1x1Params() { + const auto types_str = param_types_str(DATA_ET, GRID_ET); std::vector params; - const reference_tests::Tensor data{{1, 1, 1, 1}, element::f32, std::vector{7}}; - const reference_tests::Tensor grid{{1, 1, 5, 2}, - element::f32, - std::vector{1, -1, 0, 0, -1, 0, 0.5, 0.5, 2, -4}}; - const reference_tests::Tensor sevens{{1, 1, 1, 5}, element::f32, std::vector{7, 7, 7, 7, 7}}; + const reference_tests::Tensor data{{1, 1, 1, 1}, DATA_ET, std::vector
{7}}; + const reference_tests::Tensor grid{{1, 1, 5, 2}, GRID_ET, std::vector{1, -1, 0, 0, -1, 0, 0.5, 0.5, 2, -4}}; + const reference_tests::Tensor sevens{{1, 1, 1, 5}, DATA_ET, std::vector
{7, 7, 7, 7, 7}}; - params.emplace_back( - data, - grid, - op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector{1.75, 7, 3.5, 3.9375, 0}}, - "bilinear_zeros_no_align_data1x1"); + params.emplace_back(data, + grid, + op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS}, + reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector
{1.75, 7, 3.5, 3.9375, 0}}, + "bilinear_zeros_no_align_data1x1" + types_str); params.emplace_back(data, grid, op::v9::GridSample::Attributes{false, GS_NEAREST, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector{7, 7, 7, 7, 0}}, - "nearest_zeros_no_align_data1x1"); + reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector
{7, 7, 7, 7, 0}}, + "nearest_zeros_no_align_data1x1" + types_str); params.emplace_back( data, grid, op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_ZEROS}, - reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector{2.4677734, 7, 4.15625, 5.4073334, 0}}, - "bicubic_zeros_no_align_data1x1"); + reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector
{2.4677734, 7, 4.15625, 5.4073334, 0}}, + "bicubic_zeros_no_align_data1x1" + types_str); params.emplace_back(data, grid, op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_ZEROS}, sevens, - "bicubic_zeros_align_data1x1"); + "bicubic_zeros_align_data1x1" + types_str); params.emplace_back(data, grid, op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION}, sevens, - "bilinear_reflection_noalign_data1x1"); + "bilinear_reflection_noalign_data1x1" + types_str); params.emplace_back(data, grid, op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER}, sevens, - "nearest_border_align_data1x1"); + "nearest_border_align_data1x1" + types_str); return params; } std::vector generateGridSampleParams() { - std::vector> combo_params{generateNearestParamsOddDimensionsInnerGrids(), - generateNearestParamsOddDimensionsOuterGrids(), - generateNearestParamsEvenDimensions(), - generateBilinearParamsOddDimensionsInnerGrids(), - generateBilinearParamsOddDimensionsOuterGrids(), - generateBilinearParamsEvenDimensions(), - generateBicubicParams(), - generateBicubicBatchesParams(), - generateCornerCaseData1x1Params()}; + using namespace ov::element; + std::vector> combo_params{generateNearestParamsOddDimensionsInnerGrids(), + generateNearestParamsOddDimensionsInnerGrids(), + generateNearestParamsOddDimensionsInnerGrids(), + + generateNearestParamsEvenDimensions(), + generateNearestParamsEvenDimensions(), + generateNearestParamsEvenDimensions(), + + generateBilinearParamsOddDimensionsInnerGrids(), + generateBilinearParamsOddDimensionsInnerGrids(), + generateBilinearParamsOddDimensionsInnerGrids(), + + generateBilinearParamsOddDimensionsOuterGrids(), + generateBilinearParamsOddDimensionsOuterGrids(), + generateBilinearParamsOddDimensionsOuterGrids(), + + generateBilinearParamsEvenDimensions(), + generateBilinearParamsEvenDimensions(), + generateBilinearParamsEvenDimensions(), + + generateBicubicParams(), + generateBicubicParams(), + generateBicubicParams(), + + generateBicubicBatchesParams(), + generateBicubicBatchesParams(), + generateBicubicBatchesParams(), + + generateCornerCaseData1x1Params(), + generateCornerCaseData1x1Params(), + generateCornerCaseData1x1Params()}; std::vector test_params; for (auto& params : combo_params) std::move(params.begin(), params.end(), std::back_inserter(test_params)); From e2a74956c0d136fcfee4d07488d437dfc3184390 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Mon, 4 Mar 2024 10:53:09 +0100 Subject: [PATCH 13/14] [Ref] Concat op reference implementation code improvements (#23048) ### Details: - Concat op reference implementation code improvements, removal of code duplication - Leftovers from PR: https://github.com/openvinotoolkit/openvino/pull/22686 - Reduce Concat template code, pass element_type to distinguish copy method for string (element::Type_t::undefined as default to keep compatibility) - ~Corresponding update of places where reference::concat is used (including one gpu file)~ (reverted) (*First approach (https://github.com/openvinotoolkit/openvino/pull/23048/commits/6308f9d1cfe7e8de5848f7e8e04fb2907b129066) was to simply introduce common template to reuse the Concat reference code, but it still results in generation of string and char specializations of the whole template function* *Current approach is to use the same function and choose the copy method inside, based on the element type*) ### Tickets: -Related to 131838 --- .../include/openvino/reference/concat.hpp | 11 +--- src/core/reference/src/op/concat.cpp | 59 ++++++++----------- src/core/src/op/concat.cpp | 34 ++++------- 3 files changed, 41 insertions(+), 63 deletions(-) diff --git a/src/core/reference/include/openvino/reference/concat.hpp b/src/core/reference/include/openvino/reference/concat.hpp index b7a18ecef5d650..751226d83251f8 100644 --- a/src/core/reference/include/openvino/reference/concat.hpp +++ b/src/core/reference/include/openvino/reference/concat.hpp @@ -7,6 +7,7 @@ #include #include "openvino/core/shape.hpp" +#include "openvino/core/type/element_type.hpp" namespace ov { namespace reference { @@ -15,14 +16,8 @@ void concat(const std::vector& args, const std::vector& in_shapes, const Shape& out_shape, int64_t concatenation_axis, - size_t elem_size); - -void concat(const std::vector& args, - std::string* out, - const std::vector& in_shapes, - const Shape& out_shape, - int64_t concatenation_axis, - size_t); + size_t elem_size, + const ov::element::Type& elem_type = ov::element::Type_t::undefined); } // namespace reference } // namespace ov diff --git a/src/core/reference/src/op/concat.cpp b/src/core/reference/src/op/concat.cpp index 81cdbc25e5b805..815925274ea6a5 100644 --- a/src/core/reference/src/op/concat.cpp +++ b/src/core/reference/src/op/concat.cpp @@ -17,6 +17,26 @@ std::vector calculate_shape_sizes(const std::vector& in_shapes) { }); return sizes; } + +void copy_elements(const char* arg, + char* out, + size_t in_offset, + size_t out_offset, + size_t num_of_elements, + size_t elem_size) { + std::memcpy(out + (out_offset * elem_size), arg + (in_offset * elem_size), num_of_elements * elem_size); +} + +void copy_string_elements(const char* arg, + char* out, + size_t in_offset, + size_t out_offset, + size_t num_of_elements, + size_t) { + const auto src_begin = std::next(reinterpret_cast(arg), in_offset); + const auto out_ptr = std::next(reinterpret_cast(out), out_offset); + std::copy_n(src_begin, num_of_elements, out_ptr); +} } // namespace void concat(const std::vector& args, @@ -24,38 +44,12 @@ void concat(const std::vector& args, const std::vector& in_shapes, const Shape& out_shape, int64_t concatenation_axis, - size_t elem_size) { - size_t steps = 1; - for (int i = 0; i < concatenation_axis; ++i) { - steps *= out_shape[i]; - } - + size_t elem_size, + const ov::element::Type& elem_type) { + const auto steps = shape_size(out_shape.begin(), out_shape.begin() + concatenation_axis); const auto& shape_sizes = calculate_shape_sizes(in_shapes); - size_t out_offset = 0; - for (size_t step = 0; step < steps; ++step) { - for (size_t in_index = 0; in_index < args.size(); ++in_index) { - const size_t size = shape_sizes[in_index] / steps; - const size_t in_offset = step * size; - - std::memcpy(&out[out_offset * elem_size], &args[in_index][in_offset * elem_size], size * elem_size); - - out_offset += size; - } - } -} - -void concat(const std::vector& args, - std::string* out, - const std::vector& in_shapes, - const Shape& out_shape, - int64_t concatenation_axis, - size_t) { - size_t steps = 1; - for (int i = 0; i < concatenation_axis; ++i) { - steps *= out_shape[i]; - } - const auto& shape_sizes = calculate_shape_sizes(in_shapes); + const auto copy_func = elem_type == ov::element::string ? copy_string_elements : copy_elements; size_t out_offset = 0; for (size_t step = 0; step < steps; ++step) { @@ -63,14 +57,11 @@ void concat(const std::vector& args, const size_t size = shape_sizes[in_index] / steps; const size_t in_offset = step * size; - const auto src_begin = std::next(args[in_index], in_offset); - const auto out_ptr = std::next(out, out_offset); - std::copy_n(src_begin, size, out_ptr); + copy_func(args[in_index], out, in_offset, out_offset, size, elem_size); out_offset += size; } } } - } // namespace reference } // namespace ov diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp index 4ec7743d64167e..b670af7d4e03ea 100644 --- a/src/core/src/op/concat.cpp +++ b/src/core/src/op/concat.cpp @@ -52,43 +52,35 @@ std::shared_ptr Concat::clone_with_new_inputs(const OutputVector& new_args return std::make_shared(new_args, m_axis); } -template -void evaluate_concat(const Concat* node, TensorVector& outputs, const TensorVector& inputs) { +bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Concat_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + const auto inputs_count = inputs.size(); std::vector arg_shapes; std::vector input_shapes; + std::vector arg_bufs; arg_shapes.reserve(inputs_count); input_shapes.reserve(inputs_count); + arg_bufs.reserve(inputs_count); - std::vector arg_bufs(inputs_count); - auto arg_buf = arg_bufs.begin(); for (auto& input : inputs) { - *arg_buf = static_cast(input.data()); - ++arg_buf; const auto& input_shape = input.get_shape(); arg_shapes.emplace_back(input_shape); input_shapes.emplace_back(input_shape); + arg_bufs.emplace_back(static_cast(input.data())); } - const auto& out_shape = shape_infer(node, input_shapes).front().to_shape(); + const auto& out_shape = shape_infer(this, input_shapes).front().to_shape(); outputs.front().set_shape(out_shape); + const auto elem_type = outputs.front().get_element_type(); reference::concat(arg_bufs, - static_cast(outputs.front().data()), + static_cast(outputs.front().data()), arg_shapes, out_shape, - ov::util::normalize(node->get_axis(), out_shape.size()), - outputs.front().get_element_type().size()); -} - -bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const { - OV_OP_SCOPE(v0_Concat_evaluate); - OPENVINO_ASSERT(outputs.size() == 1); - - if (outputs.front().get_element_type() == ov::element::string) { - evaluate_concat(this, outputs, inputs); - } else { - evaluate_concat(this, outputs, inputs); - } + ov::util::normalize(this->get_axis(), out_shape.size()), + elem_type.size(), + elem_type); return true; } From e6dc08651284e9798bd5bb3383136563e3c401ed Mon Sep 17 00:00:00 2001 From: Alicja Miloszewska Date: Mon, 4 Mar 2024 11:06:19 +0100 Subject: [PATCH 14/14] [OV JS] Fix typescript method definition (#23185) ### Details: - Move `getAvailableDevices` to `Core` definition --- src/bindings/js/node/lib/addon.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts index ae75f479e655e1..7f4f9fd917cd13 100644 --- a/src/bindings/js/node/lib/addon.ts +++ b/src/bindings/js/node/lib/addon.ts @@ -36,6 +36,7 @@ interface Core { modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Promise; readModelSync(modelPath: string, weightsPath?: string): Model; readModelSync(modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Model; + getAvailableDevices(): string[]; } interface CoreConstructor { new(): Core; @@ -81,7 +82,6 @@ interface InferRequest { inferAsync(inputData: { [inputName: string]: Tensor} | Tensor[] ): Promise<{ [outputName: string] : Tensor}>; getCompiledModel(): CompiledModel; - getAvailableDevices(): string[]; } type Dimension = number | [number, number];