diff --git a/src/core/shape_inference/include/tensor_data_accessor.hpp b/src/core/shape_inference/include/tensor_data_accessor.hpp index 41c48641ad9ab2..300d859561b6da 100644 --- a/src/core/shape_inference/include/tensor_data_accessor.hpp +++ b/src/core/shape_inference/include/tensor_data_accessor.hpp @@ -18,6 +18,9 @@ class ITensorAccessor { * @return Tensor to data at port. */ virtual Tensor operator()(size_t port) const = 0; + +protected: + ~ITensorAccessor() = default; }; /** diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp index 2e6ceedbee7105..647a10581b4b65 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp @@ -9,8 +9,6 @@ #include "event.hpp" #include "engine_configuration.hpp" -#include "ngraph/runtime/host_tensor.hpp" - #include #ifdef ENABLE_ONEDNN_FOR_GPU @@ -247,16 +245,4 @@ inline std::vector read_vector(cldnn::memory::ptr mem, const cldnn::stream& s return out_vecs; } -inline std::shared_ptr make_host_tensor(layout l, void* memory_pointer) { - ov::element::Type et = data_type_to_element_type(l.data_type); - - return std::make_shared(et, l.get_shape(), memory_pointer); -} - -inline ov::Tensor make_tensor(layout l, void* memory_pointer) { - ov::element::Type et = data_type_to_element_type(l.data_type); - - return ov::Tensor(et, l.get_shape(), memory_pointer); -} - } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor_accessor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor_accessor.hpp new file mode 100644 index 00000000000000..1072c1bdf7fe80 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor_accessor.hpp @@ -0,0 +1,89 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/tensor.hpp" +#include "tensor_data_accessor.hpp" + +#include "memory.hpp" +#include "layout.hpp" + +namespace cldnn { + +inline ov::Tensor make_tensor(const layout& l, void* memory_pointer) { + ov::element::Type et = data_type_to_element_type(l.data_type); + + return ov::Tensor(et, l.get_shape(), memory_pointer); +} + +struct TensorsContainer final { + using MemoryMap = std::unordered_map; + using TensorsMap = std::unordered_map; + + TensorsContainer(const cldnn::stream* stream, const std::map& deps_map = {}) + : m_stream(stream) + , m_memories(deps_map.begin(), deps_map.end()) { } + + ~TensorsContainer() { + for (auto& port : m_locked_memories) { + m_memories.at(port)->unlock(*m_stream); + } + } + + void emplace(size_t port, cldnn::memory::ptr mem) { + m_memories.emplace(port, mem); + } + + void emplace(size_t port, const ov::Tensor& tensor) { + auto res = m_tensors.emplace(port, tensor); + OPENVINO_ASSERT(res.first != m_tensors.end()); + } + + template + void emplace(size_t port, std::vector& vector, data_types dt = data_types::i64) { + ov::Shape shape{vector.size()}; + auto tensor = make_tensor({shape, dt, format::bfyx}, static_cast(vector.data())); + m_tensors.emplace(port, tensor); + } + + size_t size() const { return m_tensors.size(); } + ov::Tensor operator[](std::size_t port) const { + if (m_memories.count(port) > 0) { + m_locked_memories.insert(port); + auto mem = m_memories.at(port); + auto ptr = mem->lock(*m_stream, cldnn::mem_lock_type::read); + return make_tensor(mem->get_layout(), ptr); + } else if (m_tensors.count(port) > 0) { + return m_tensors.at(port); + } else { + OPENVINO_THROW("[GPU] Can't get tensor for ", port, " port!\n"); + } + } + +private: + const cldnn::stream* m_stream; + MemoryMap m_memories; + TensorsMap m_tensors; + + mutable std::set m_locked_memories = {}; +}; + +class TensorAccessor final : public ov::ITensorAccessor { +public: + explicit TensorAccessor(const TensorsContainer& container) : m_container(container) { } + + ov::Tensor operator()(size_t port) const override { + return m_container[port]; + } + +private: + const TensorsContainer& m_container; +}; + +inline cldnn::TensorAccessor make_tensor_accessor(const TensorsContainer& c) { + return cldnn::TensorAccessor(c); +} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/CMakeLists.txt b/src/plugins/intel_gpu/src/graph/CMakeLists.txt index efefc017cc6a0e..1f46bdfa82bb48 100644 --- a/src/plugins/intel_gpu/src/graph/CMakeLists.txt +++ b/src/plugins/intel_gpu/src/graph/CMakeLists.txt @@ -33,10 +33,9 @@ target_include_directories(${TARGET_NAME} PUBLIC target_compile_options(${TARGET_NAME} PRIVATE $<$:$,/Os,-Os>>) -target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL) +target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL ov_shape_inference) target_link_libraries(${TARGET_NAME} PRIVATE openvino_intel_gpu_kernels openvino_intel_gpu_runtime - ov_shape_inference openvino::itt openvino::runtime::dev openvino::runtime) diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp index 28e3d41cae04d4..47303ece86cd82 100644 --- a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp @@ -87,17 +87,17 @@ std::vector arg_max_min_inst::calc_output_layouts(arg_max_min_node const auto& constant_mem = impl_param.memory_deps; if (desc->top_k > 0) { - std::map const_data; + std::unordered_map const_data; auto topk = desc->top_k; - auto top_k_tensor = std::make_shared(ov::element::u32, ov::Shape{1}, static_cast(&topk)); + auto top_k_tensor = ov::Tensor(ov::element::u32, ov::Shape{1}, static_cast(&topk)); const_data = { {1, top_k_tensor} }; output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (constant_mem.count(1)) { - std::map const_data; + std::unordered_map const_data; auto target_shape_mem = constant_mem.at(1); cldnn::mem_lock target_shape_lock(target_shape_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); + const_data.emplace(1, make_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else { diff --git a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp index 59f51491e26212..b09f2bab6b9bbd 100644 --- a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp @@ -112,7 +112,7 @@ std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node end_shape }; - std::map const_data; + std::unordered_map const_data; if (desc->shape_constant) { auto block_sizes = tensor_to_vec(block_data, input0_format); auto begin_sizes = tensor_to_vec(begin_data, input0_format); @@ -122,9 +122,9 @@ std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node auto begin_values = static_cast(begin_sizes.data()); auto end_values = static_cast(end_sizes.data()); - auto block_tensor = make_host_tensor({ block_shape, data_types::i32, input0_format }, block_values); - auto begin_tensor = make_host_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); - auto end_tensor = make_host_tensor({ end_shape, data_types::i32, input0_format }, end_values); + auto block_tensor = make_tensor({ block_shape, data_types::i32, input0_format }, block_values); + auto begin_tensor = make_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); + auto end_tensor = make_tensor({ end_shape, data_types::i32, input0_format }, end_values); const_data.emplace(1, block_tensor); const_data.emplace(2, begin_tensor); @@ -140,9 +140,9 @@ std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node cldnn::mem_lock lock2(begin_mem, impl_param.get_stream()); cldnn::mem_lock lock3(end_mem, impl_param.get_stream()); - auto block_tensor = make_host_tensor(block_mem->get_layout(), lock1.data()); - auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock2.data()); - auto end_tensor = make_host_tensor(end_mem->get_layout(), lock3.data()); + auto block_tensor = make_tensor(block_mem->get_layout(), lock1.data()); + auto begin_tensor = make_tensor(begin_mem->get_layout(), lock2.data()); + auto end_tensor = make_tensor(end_mem->get_layout(), lock3.data()); const_data.emplace(1, block_tensor); const_data.emplace(2, begin_tensor); diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp index 2f5eef40093c4d..2e662781b01d4a 100644 --- a/src/plugins/intel_gpu/src/graph/border.cpp +++ b/src/plugins/intel_gpu/src/graph/border.cpp @@ -3,6 +3,7 @@ // #include "border_inst.h" +#include "intel_gpu/runtime/tensor_accessor.hpp" #include "pad_shape_inference.hpp" #include "intel_gpu/runtime/error_handler.hpp" @@ -41,85 +42,55 @@ std::vector border_inst::calc_output_layouts(border_node const& /*node*/ output_type = impl_param.get_fused_output_layout().data_type; } + size_t in_rank = input0_layout.get_partial_shape().size(); + ov::op::v1::Pad op; op.set_pad_mode(desc->pad_mode); const bool is_begin_mem = (desc->non_constant_input_mask & border::PAD_NON_CONST_INPUT::BEGIN); const bool is_end_mem = (desc->non_constant_input_mask & border::PAD_NON_CONST_INPUT::END); - layout pads_begin_layout, pads_end_layout; - if (is_begin_mem) { - pads_begin_layout = impl_param.get_input_layout(1); - } - if (is_end_mem) { - pads_end_layout = is_begin_mem ? impl_param.get_input_layout(2) : impl_param.get_input_layout(1); + const size_t begin_mem_idx = is_begin_mem ? 1 : 0; + const size_t end_mem_idx = is_begin_mem ? 2 : 1; + + auto& memory_deps = impl_param.memory_deps; + if ((is_begin_mem && memory_deps.count(begin_mem_idx) == 0) || + (is_end_mem && memory_deps.count(end_mem_idx) == 0)) { + return {layout{ShapeType::dynamic(static_cast(in_rank)), input0_layout.data_type, input0_layout.format}}; } - ShapeType pads_begin_shape = is_begin_mem ? pads_begin_layout.get() : ov::Shape{ desc->pads_begin.size() }; - ShapeType pads_end_shape = is_end_mem ? pads_end_layout.get() : ov::Shape{ desc->pads_end.size() }; - std::vector output_shapes; + int64_t begin_size = desc->pads_begin.size(); + int64_t end_size = desc->pads_end.size(); + + layout pads_begin_layout = is_begin_mem ? impl_param.get_input_layout(begin_mem_idx) : layout({ begin_size }, data_types::i64, format::bfyx); + layout pads_end_layout = is_end_mem ? impl_param.get_input_layout(end_mem_idx) : layout({ end_size }, data_types::i64, format::bfyx); + std::vector input_shapes = { input0_layout.get(), - pads_begin_shape, - pads_end_shape, + pads_begin_layout.get(), + pads_end_layout.get(), }; - auto& memory_deps = impl_param.memory_deps; - std::map const_data; - auto ta = ov::make_tensor_accessor(const_data); - - if ((is_begin_mem && memory_deps.count(1)) && (is_end_mem && memory_deps.count(2))) { - auto pads_begin_mem = memory_deps.at(1); - cldnn::mem_lock pads_begin_lock(pads_begin_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data())); - - auto pads_end_mem = memory_deps.at(2); - cldnn::mem_lock pads_end_lock(pads_end_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data())); - - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); - } else if ((is_begin_mem || is_end_mem) && memory_deps.count(1)) { - if (is_begin_mem) { - auto pads_begin_mem = memory_deps.at(1); - cldnn::mem_lock pads_begin_lock(pads_begin_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data())); - - auto pads_end_data = desc->pads_end; - auto pads_end_tensor = make_host_tensor({pads_end_shape, data_types::i64, format::bfyx}, static_cast(pads_end_data.data())); - const_data.emplace(2, pads_end_tensor); - - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); - } else { - auto pads_begin_data = desc->pads_begin; - auto pads_begin_tensor = make_host_tensor({pads_begin_shape, data_types::i64, format::bfyx}, static_cast(pads_begin_data.data())); - const_data.emplace(1, pads_begin_tensor); - - auto pads_end_mem = memory_deps.at(1); - cldnn::mem_lock pads_end_lock(pads_end_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data())); - - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); - } - } else { - std::ptrdiff_t val = desc->pad_value; + TensorsContainer const_data(&impl_param.get_stream()); - auto pads_begin_data = desc->pads_begin; - if (is_begin_mem && desc->pad_mode == ov::op::PadMode::CONSTANT) { - pads_begin_data = {val, val, val, val}; - } - auto pads_begin_tensor = make_host_tensor({pads_begin_shape, data_types::i64, format::bfyx}, static_cast(pads_begin_data.data())); - const_data.emplace(1, pads_begin_tensor); + auto pads_begin_data = desc->pads_begin; + auto pads_end_data = desc->pads_end; - auto pads_end_data = desc->pads_end; - if (is_end_mem && desc->pad_mode == ov::op::PadMode::CONSTANT) { - pads_end_data = {val, val, val, val}; - } - auto pads_end_tensor = make_host_tensor({pads_end_shape, data_types::i64, format::bfyx}, static_cast(pads_end_data.data())); - const_data.emplace(2, pads_end_tensor); + if (is_begin_mem) { + const_data.emplace(1, memory_deps.at(begin_mem_idx)); + } else { + const_data.emplace(1, make_tensor(pads_begin_layout, static_cast(pads_begin_data.data()))); + } - output_shapes = ov::op::shape_infer(&op, input_shapes, ta); + if (is_end_mem) { + const_data.emplace(2, memory_deps.at(end_mem_idx)); + } else { + const_data.emplace(2, make_tensor(pads_end_layout, static_cast(pads_end_data.data()))); } + auto ta = cldnn::make_tensor_accessor(const_data); + std::vector output_shapes = ov::op::shape_infer(&op, input_shapes, ta); + format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size()); return { layout{output_shapes[0], output_type, output_format} }; diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index bc4eaab28a9816..fdce54766b26dd 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -61,11 +61,11 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* auto axes_mapping = desc->axes_mapping.to_vector(); ShapeType axes_mapping_shape = ov::Shape{axes_mapping.size()}; - std::map const_data; + std::unordered_map const_data; if (third_input_needed) { input_shapes.emplace_back(axes_mapping_shape); - auto axes_mapping_tensor = make_host_tensor({axes_mapping_shape, data_types::i64, format::bfyx}, + auto axes_mapping_tensor = make_tensor({axes_mapping_shape, data_types::i64, format::bfyx}, static_cast(axes_mapping.data())); const_data.emplace(2, axes_mapping_tensor); } @@ -74,12 +74,11 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* if (constant_mem.count(1)) { auto target_shape_mem = constant_mem.at(1); cldnn::mem_lock target_shape_lock(target_shape_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); + const_data.emplace(1, make_tensor(target_shape_mem->get_layout(), target_shape_lock.data())); output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (impl_param.input_layouts.size() == 1) { // predefined pattern shape - auto target_shape_tensor = make_host_tensor({pattern_shape, data_types::i64, format::bfyx}, - static_cast(target_shape.data())); + auto target_shape_tensor = make_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(target_shape.data())); const_data.emplace(1, target_shape_tensor); output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (impl_param.input_layouts.size() >= 2) { diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index 78b993d5d6ed99..f103738baeb864 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -56,17 +56,17 @@ std::vector crop_inst::calc_output_layouts(const crop_node& /*node*/, co // TODO: calling shape_infer for all cropped outpus is redundant... Need to optimize. if (desc->op_mode == cldnn::crop_ngraph_op_mode::variadic_split) { - std::map const_data; + std::unordered_map const_data; OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph VariadicSplit op mode) axis values memory dependency"); auto axis_values_mem = impl_param.memory_deps.at(1); cldnn::mem_lock axis_values_mem_lock(axis_values_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); + const_data.emplace(1, make_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); if (impl_param.memory_deps.count(2) > 0) { auto split_length_mem = impl_param.memory_deps.at(2); cldnn::mem_lock split_length_mem_lock(split_length_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(split_length_mem->get_layout(), split_length_mem_lock.data())); + const_data.emplace(2, make_tensor(split_length_mem->get_layout(), split_length_mem_lock.data())); ov::op::v1::VariadicSplit op; output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); @@ -76,12 +76,12 @@ std::vector crop_inst::calc_output_layouts(const crop_node& /*node*/, co return { layout{out_shape, input0_layout.data_type, input0_layout.format } }; } } else if (desc->op_mode == cldnn::crop_ngraph_op_mode::split) { - std::map const_data; + std::unordered_map const_data; OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph Split op mode) axis values memory dependency"); auto axis_values_mem = impl_param.memory_deps.at(1); cldnn::mem_lock axis_values_mem_lock(axis_values_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); + const_data.emplace(1, make_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data())); ov::op::v1::Split op; op.set_num_splits(desc->num_splits); diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index daa3416ad3235d..985ac85131dd05 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -88,8 +88,8 @@ std::vector gather_inst::calc_output_layouts(gather_node const& /*node*/ int64_t axis = desc->axis; - auto axis_tensor = std::make_shared(ov::element::i64, ov::Shape{1}, static_cast(&axis)); - std::map> const_data = {{2, axis_tensor}}; + auto axis_tensor = ov::Tensor(ov::element::i64, ov::Shape{1}, static_cast(&axis)); + std::unordered_map const_data = {{2, axis_tensor}}; output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size()); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp index a482925dfd8bec..e4318334b17060 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp @@ -46,8 +46,8 @@ struct range_impl : public typed_primitive_impl { auto ev = stream.create_user_event(false); auto params = instance.get_impl_params(); - ov::HostTensorVector input_host_tensors; - ov::HostTensorVector output_host_tensors; + ov::TensorVector input_host_tensors; + ov::TensorVector output_host_tensors; std::vector input_mem_ptrs; for (size_t i = 0; i < instance.dependencies().size(); i++) @@ -58,9 +58,9 @@ struct range_impl : public typed_primitive_impl { cldnn::mem_lock output_lock(output_mem_ptr, stream); for (size_t i = 0; i < input_mem_ptrs.size(); i++) - input_host_tensors.push_back(make_host_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); + input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); - output_host_tensors.push_back(make_host_tensor(params->output_layouts[0], output_lock.data())); + output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); if (!op) { const auto output_dt = params->get_output_layout().data_type; diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 098b44226aa4dd..3244ad20b49869 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -8,6 +8,7 @@ #include "intel_gpu/runtime/event.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/lru_cache.hpp" +#include "intel_gpu/runtime/tensor_accessor.hpp" #include "intel_gpu/graph/network.hpp" #include "intel_gpu/runtime/utils.hpp" #include "program_node.h" diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 0ed005788965d4..bc156ccc75fda4 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -38,12 +38,12 @@ std::vector non_max_suppression_inst::calc_output_layouts(non_max_suppre }; auto& memory_deps = impl_param.memory_deps; - std::map const_data; + std::unordered_map const_data; if (memory_deps.count(2)) { auto max_output_boxes_per_class_mem = memory_deps.at(2); cldnn::mem_lock max_output_boxes_per_class_lock(max_output_boxes_per_class_mem, impl_param.get_stream()); - auto max_output_boxes_per_class_tensor = make_host_tensor(max_output_boxes_per_class_mem->get_layout(), + auto max_output_boxes_per_class_tensor = make_tensor(max_output_boxes_per_class_mem->get_layout(), max_output_boxes_per_class_lock.data()); const_data.emplace(2, max_output_boxes_per_class_tensor); diff --git a/src/plugins/intel_gpu/src/graph/one_hot.cpp b/src/plugins/intel_gpu/src/graph/one_hot.cpp index 32dd93633bed0c..01116bd987c263 100644 --- a/src/plugins/intel_gpu/src/graph/one_hot.cpp +++ b/src/plugins/intel_gpu/src/graph/one_hot.cpp @@ -67,8 +67,8 @@ std::vector one_hot_inst::calc_output_layouts(const one_hot_node& /*node int64_t depth = desc->depth; - auto depth_tensor = std::make_shared(ov::element::i64, ov::Shape{1}, static_cast(&depth)); - std::map> const_data = { + auto depth_tensor = ov::Tensor(ov::element::i64, ov::Shape{1}, static_cast(&depth)); + std::unordered_map const_data = { {1, depth_tensor} }; std::vector output_shapes = diff --git a/src/plugins/intel_gpu/src/graph/prior_box.cpp b/src/plugins/intel_gpu/src/graph/prior_box.cpp index 191d3cc947dba0..899f0db6f2ba4a 100644 --- a/src/plugins/intel_gpu/src/graph/prior_box.cpp +++ b/src/plugins/intel_gpu/src/graph/prior_box.cpp @@ -440,7 +440,7 @@ std::vector prior_box_inst::calc_output_layouts(prior_box_node const& /* impl_param.get_input_layout(1).get() }; std::vector output_shapes = {ShapeType()}; - std::map const_data; + std::unordered_map const_data; auto& memory_deps = impl_param.memory_deps; @@ -451,7 +451,7 @@ std::vector prior_box_inst::calc_output_layouts(prior_box_node const& /* cldnn::mem_lock output_size_lock(output_size_mem, impl_param.get_stream()); cldnn::mem_lock img_size_lock(img_size_mem, impl_param.get_stream()); - const_data.emplace(0, make_host_tensor(output_size_mem->get_layout(), output_size_lock.data())); + const_data.emplace(0, make_tensor(output_size_mem->get_layout(), output_size_lock.data())); auto p_param = const_cast(&impl_param); if (output_size_mem->get_layout().data_type == cldnn::data_types::i64) { diff --git a/src/plugins/intel_gpu/src/graph/random_uniform.cpp b/src/plugins/intel_gpu/src/graph/random_uniform.cpp index 31ab7894ac760f..6d665caa4890c0 100644 --- a/src/plugins/intel_gpu/src/graph/random_uniform.cpp +++ b/src/plugins/intel_gpu/src/graph/random_uniform.cpp @@ -33,18 +33,18 @@ std::vector random_uniform_inst::calc_output_layouts(random_uniform_node impl_param.get_input_layout(2).get_partial_shape() }; auto& memory_deps = impl_param.memory_deps; - std::map const_data; + std::unordered_map const_data; auto run_shape_infer = [&]() { ov::op::v8::RandomUniform op; if (memory_deps.count(1) > 0 && memory_deps.count(2) > 0) { auto min_val = memory_deps.at(1); cldnn::mem_lock min_val_lock(min_val, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(min_val->get_layout(), min_val_lock.data())); + const_data.emplace(1, make_tensor(min_val->get_layout(), min_val_lock.data())); auto max_val = memory_deps.at(2); cldnn::mem_lock max_val_lock(max_val, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(max_val->get_layout(), max_val_lock.data())); + const_data.emplace(2, make_tensor(max_val->get_layout(), max_val_lock.data())); return ov::op::v8::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else { @@ -55,7 +55,7 @@ std::vector random_uniform_inst::calc_output_layouts(random_uniform_node if (memory_deps.count(0) > 0) { auto output_shape = memory_deps.at(0); cldnn::mem_lock output_shape_lock(output_shape, impl_param.get_stream()); - const_data.emplace(0, make_host_tensor(output_shape->get_layout(), output_shape_lock.data())); + const_data.emplace(0, make_tensor(output_shape->get_layout(), output_shape_lock.data())); output_shapes = run_shape_infer(); } else { diff --git a/src/plugins/intel_gpu/src/graph/range.cpp b/src/plugins/intel_gpu/src/graph/range.cpp index c1dae775f19ee2..0b57793bb6650a 100644 --- a/src/plugins/intel_gpu/src/graph/range.cpp +++ b/src/plugins/intel_gpu/src/graph/range.cpp @@ -34,21 +34,21 @@ std::vector range_inst::calc_output_layouts(range_node const& /*node*/, std::vector output_shapes = {ShapeType::dynamic(1)}; std::vector input_shapes = {ov::Shape(), ov::Shape(), ov::Shape()}; - std::map const_data; + std::unordered_map const_data; auto& memory_deps = impl_param.memory_deps; if (memory_deps.count(0) > 0 && memory_deps.count(1) > 0 && memory_deps.count(2) > 0) { auto start_mem = memory_deps.at(0); cldnn::mem_lock start_mem_lock(start_mem, impl_param.get_stream()); - const_data.emplace(0, make_host_tensor(start_mem->get_layout(), start_mem_lock.data())); + const_data.emplace(0, make_tensor(start_mem->get_layout(), start_mem_lock.data())); auto stop_mem = memory_deps.at(1); cldnn::mem_lock stop_mem_lock(stop_mem, impl_param.get_stream()); - const_data.emplace(1, make_host_tensor(stop_mem->get_layout(), stop_mem_lock.data())); + const_data.emplace(1, make_tensor(stop_mem->get_layout(), stop_mem_lock.data())); auto step_mem = memory_deps.at(2); cldnn::mem_lock step_mem_lock(step_mem, impl_param.get_stream()); - const_data.emplace(2, make_host_tensor(step_mem->get_layout(), step_mem_lock.data())); + const_data.emplace(2, make_tensor(step_mem->get_layout(), step_mem_lock.data())); output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } diff --git a/src/plugins/intel_gpu/src/graph/reduce.cpp b/src/plugins/intel_gpu/src/graph/reduce.cpp index d5a66fb0acb2d0..ee57a7984a4173 100644 --- a/src/plugins/intel_gpu/src/graph/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/reduce.cpp @@ -107,8 +107,8 @@ std::vector reduce_inst::calc_output_layouts(reduce_node const& /*node*/ std::vector output_shapes = {ShapeType()}; auto axes = desc->axes; - auto axes_tensor = std::make_shared(ov::element::i64, ov::Shape{axes.size()}, axes.data()); - std::map> const_data = {{1, axes_tensor}}; + auto axes_tensor = ov::Tensor(ov::element::i64, ov::Shape{axes.size()}, axes.data()); + std::unordered_map const_data = {{1, axes_tensor}}; auto ta = ov::make_tensor_accessor(const_data); // shape infer by mode diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index 2d04188a3c3a7d..9fc276469fb7c8 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -85,7 +85,7 @@ std::vector reshape_inst::calc_output_layouts(reshape_node const& /*node pattern_shape, }; - std::map const_data; + std::unordered_map const_data; const auto ta = ov::make_tensor_accessor(const_data); auto run_shape_infer = [&](reshape::reshape_mode mode) { @@ -120,13 +120,13 @@ std::vector reshape_inst::calc_output_layouts(reshape_node const& /*node cldnn::mem_lock pattern_lock(pattern_mem, impl_param.get_stream()); auto pattern_ptr = pattern_lock.data(); - auto pattern_tensor = make_host_tensor(pattern_mem->get_layout(), pattern_ptr); + auto pattern_tensor = make_tensor(pattern_mem->get_layout(), pattern_ptr); const_data.emplace(1, pattern_tensor); run_shape_infer(prim->mode); } else { auto pattern_data = prim->output_pattern; - auto pattern_tensor = make_host_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(pattern_data.data())); + auto pattern_tensor = make_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(pattern_data.data())); const_data.emplace(1, pattern_tensor); run_shape_infer(prim->mode); diff --git a/src/plugins/intel_gpu/src/graph/space_to_batch.cpp b/src/plugins/intel_gpu/src/graph/space_to_batch.cpp index 61fa6d45ed220a..159778951d8b81 100644 --- a/src/plugins/intel_gpu/src/graph/space_to_batch.cpp +++ b/src/plugins/intel_gpu/src/graph/space_to_batch.cpp @@ -101,7 +101,7 @@ std::vector space_to_batch_inst::calc_output_layouts(space_to_batch_node end_shape }; - std::map const_data; + std::unordered_map const_data; if (desc->shape_constant) { auto block_sizes = tensor_to_vec(block_data, input0_format); auto begin_sizes = tensor_to_vec(begin_data, input0_format); @@ -111,9 +111,9 @@ std::vector space_to_batch_inst::calc_output_layouts(space_to_batch_node auto begin_values = static_cast(begin_sizes.data()); auto end_values = static_cast(end_sizes.data()); - auto block_tensor = make_host_tensor({ block_shape, data_types::i32, input0_format }, block_values); - auto begin_tensor = make_host_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); - auto end_tensor = make_host_tensor({ end_shape, data_types::i32, input0_format }, end_values); + auto block_tensor = make_tensor({ block_shape, data_types::i32, input0_format }, block_values); + auto begin_tensor = make_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); + auto end_tensor = make_tensor({ end_shape, data_types::i32, input0_format }, end_values); const_data.emplace(1, block_tensor); const_data.emplace(2, begin_tensor); @@ -129,9 +129,9 @@ std::vector space_to_batch_inst::calc_output_layouts(space_to_batch_node cldnn::mem_lock lock2(begin_mem, impl_param.get_stream()); cldnn::mem_lock lock3(end_mem, impl_param.get_stream()); - auto block_tensor = make_host_tensor(block_mem->get_layout(), lock1.data()); - auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock2.data()); - auto end_tensor = make_host_tensor(end_mem->get_layout(), lock3.data()); + auto block_tensor = make_tensor(block_mem->get_layout(), lock1.data()); + auto begin_tensor = make_tensor(begin_mem->get_layout(), lock2.data()); + auto end_tensor = make_tensor(end_mem->get_layout(), lock3.data()); const_data.emplace(1, block_tensor); const_data.emplace(2, begin_tensor); diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 3f334e58ee8d05..2bd2fcd0c483fb 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -67,12 +67,12 @@ std::vector strided_slice_inst::calc_output_layouts(strided_slice_node c op.set_shrink_axis_mask(desc->shrink_axis_mask); op.set_ellipsis_mask_mask(desc->ellipsis_mask); - std::map const_data; + std::unordered_map const_data; const auto ta = ov::make_tensor_accessor(const_data); if (!begin_data.empty() && !end_data.empty() && !strides_data.empty()) { - auto begin_tensor = make_host_tensor({ begin_shape, data_types::i64, format::bfyx }, static_cast(begin_data.data())); - auto end_tensor = make_host_tensor({ end_shape, data_types::i64, format::bfyx }, static_cast(end_data.data())); - auto strides_tensor = make_host_tensor({ strides_shape, data_types::i64, format::bfyx }, static_cast(strides_data.data())); + auto begin_tensor = make_tensor({ begin_shape, data_types::i64, format::bfyx }, static_cast(begin_data.data())); + auto end_tensor = make_tensor({ end_shape, data_types::i64, format::bfyx }, static_cast(end_data.data())); + auto strides_tensor = make_tensor({ strides_shape, data_types::i64, format::bfyx }, static_cast(strides_data.data())); const_data.emplace(1, begin_tensor); const_data.emplace(2, end_tensor); @@ -88,9 +88,9 @@ std::vector strided_slice_inst::calc_output_layouts(strided_slice_node c cldnn::mem_lock lock2(end_mem, impl_param.get_stream()); cldnn::mem_lock lock3(strides_mem, impl_param.get_stream()); - auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock1.data()); - auto end_tensor = make_host_tensor(end_mem->get_layout(), lock2.data()); - auto strides_tensor = make_host_tensor(strides_mem->get_layout(), lock3.data()); + auto begin_tensor = make_tensor(begin_mem->get_layout(), lock1.data()); + auto end_tensor = make_tensor(end_mem->get_layout(), lock2.data()); + auto strides_tensor = make_tensor(strides_mem->get_layout(), lock3.data()); const_data.emplace(1, begin_tensor); const_data.emplace(2, end_tensor); diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp index 8a507ee45e1c7e..4d931f6cdc97b0 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/pad_si_test.cpp @@ -177,21 +177,28 @@ INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_begin, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{3, 4, 36, 48}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}, {4, -1}, {8, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} + }, + { + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 0}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx} } })); @@ -234,21 +241,21 @@ INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_end, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{3, 4, 36, 48}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 0, 3, 7}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}, {4, -1}, {8, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 0}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {1, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx} } })); @@ -294,21 +301,92 @@ INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_begin_end, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{3, 5, 34, 42}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {}, - ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {2, -1}, {2, -1}, {2, -1}}, data_types::f32, format::bfyx} + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx} }, { layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {}, ov::op::PadMode::CONSTANT, 1.f, - layout{ov::PartialShape{{2, -1}, {2, -1}}, data_types::f32, format::bfyx} + layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx} + } + })); + +class pad_test_non_constant_input_begin_end_with_data : public testing::TestWithParam { }; + +TEST_P(pad_test_non_constant_input_begin_end_with_data, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto input0_prim = std::make_shared("input0", p.in_layout); + auto input1_prim = std::make_shared("input1", p.pads_begin_layout); + auto input2_prim = std::make_shared("input2", p.pads_end_layout); + + auto border_prim = std::make_shared("output", + std::vector({input_info("input0"), input_info("input1"), input_info("input2")}), + border::PAD_NON_CONST_INPUT::BEGIN | border::PAD_NON_CONST_INPUT::END, + p.pads_begin_data, + p.pads_end_data, + p.pad_mode, + p.pad_value); + cldnn::program prog(engine); + + auto& input0_node = prog.get_or_create(input0_prim); + auto& input1_node = prog.get_or_create(input1_prim); + auto& input2_node = prog.get_or_create(input2_prim); + auto& border_node = prog.get_or_create(border_prim); + + program_wrapper::add_connection(prog, input0_node, border_node); + program_wrapper::add_connection(prog, input1_node, border_node); + program_wrapper::add_connection(prog, input2_node, border_node); + + auto begin_mem = engine.allocate_memory(p.pads_begin_layout); + auto end_mem = engine.allocate_memory(p.pads_end_layout); + + set_values(begin_mem, p.pads_begin_data); + set_values(end_mem, p.pads_end_data); + auto impl_params = border_node.get_kernel_impl_params(); + impl_params->memory_deps = { + {1, begin_mem}, + {2, end_mem} + }; + + auto res = border_inst::calc_output_layouts(border_node, *impl_params); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], p.expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, pad_test_non_constant_input_begin_end_with_data, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{1, 3, 32, 40}, data_types::f32, format::bfyx}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 2, 3, 4}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {3, 2, 1, 0}, + ov::op::PadMode::CONSTANT, 1.f, + layout{ov::PartialShape{5, 7, 36, 44}, data_types::f32, format::bfyx} + }, + { + layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {1, 2, 3, 4}, + layout{ov::PartialShape{4}, data_types::i64, format::bfyx}, {3, 2, 1, 0}, + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape{{4, -1}, {4, -1}, {4, -1}, {4, -1}}, data_types::f32, format::bfyx} + }, + { + layout{ov::PartialShape{10, 20}, data_types::f32, format::bfyx}, + layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 2}, + layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {3, 4}, + ov::op::PadMode::EDGE, 1.f, + layout{ov::PartialShape{14, 26}, data_types::f32, format::bfyx} } }));