Skip to content

Commit

Permalink
[GPU] Use ov::Tensor intead of ngraph::HostTensor (openvinotoolkit#18925
Browse files Browse the repository at this point in the history
)
  • Loading branch information
vladimir-paramuzov authored Aug 10, 2023
1 parent 7d192c9 commit cde0893
Show file tree
Hide file tree
Showing 22 changed files with 274 additions and 148 deletions.
3 changes: 3 additions & 0 deletions src/core/shape_inference/include/tensor_data_accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ class ITensorAccessor {
* @return Tensor to data at port.
*/
virtual Tensor operator()(size_t port) const = 0;

protected:
~ITensorAccessor() = default;
};

/**
Expand Down
14 changes: 0 additions & 14 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
#include "event.hpp"
#include "engine_configuration.hpp"

#include "ngraph/runtime/host_tensor.hpp"

#include <type_traits>

#ifdef ENABLE_ONEDNN_FOR_GPU
Expand Down Expand Up @@ -247,16 +245,4 @@ inline std::vector<T> read_vector(cldnn::memory::ptr mem, const cldnn::stream& s
return out_vecs;
}

inline std::shared_ptr<ngraph::runtime::HostTensor> make_host_tensor(layout l, void* memory_pointer) {
ov::element::Type et = data_type_to_element_type(l.data_type);

return std::make_shared<ngraph::runtime::HostTensor>(et, l.get_shape(), memory_pointer);
}

inline ov::Tensor make_tensor(layout l, void* memory_pointer) {
ov::element::Type et = data_type_to_element_type(l.data_type);

return ov::Tensor(et, l.get_shape(), memory_pointer);
}

} // namespace cldnn
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/runtime/tensor.hpp"
#include "tensor_data_accessor.hpp"

#include "memory.hpp"
#include "layout.hpp"

namespace cldnn {

inline ov::Tensor make_tensor(const layout& l, void* memory_pointer) {
ov::element::Type et = data_type_to_element_type(l.data_type);

return ov::Tensor(et, l.get_shape(), memory_pointer);
}

struct TensorsContainer final {
using MemoryMap = std::unordered_map<size_t, cldnn::memory::ptr>;
using TensorsMap = std::unordered_map<size_t, ov::Tensor>;

TensorsContainer(const cldnn::stream* stream, const std::map<size_t, cldnn::memory::ptr>& deps_map = {})
: m_stream(stream)
, m_memories(deps_map.begin(), deps_map.end()) { }

~TensorsContainer() {
for (auto& port : m_locked_memories) {
m_memories.at(port)->unlock(*m_stream);
}
}

void emplace(size_t port, cldnn::memory::ptr mem) {
m_memories.emplace(port, mem);
}

void emplace(size_t port, const ov::Tensor& tensor) {
auto res = m_tensors.emplace(port, tensor);
OPENVINO_ASSERT(res.first != m_tensors.end());
}

template<typename ElementType>
void emplace(size_t port, std::vector<ElementType>& vector, data_types dt = data_types::i64) {
ov::Shape shape{vector.size()};
auto tensor = make_tensor({shape, dt, format::bfyx}, static_cast<void*>(vector.data()));
m_tensors.emplace(port, tensor);
}

size_t size() const { return m_tensors.size(); }
ov::Tensor operator[](std::size_t port) const {
if (m_memories.count(port) > 0) {
m_locked_memories.insert(port);
auto mem = m_memories.at(port);
auto ptr = mem->lock(*m_stream, cldnn::mem_lock_type::read);
return make_tensor(mem->get_layout(), ptr);
} else if (m_tensors.count(port) > 0) {
return m_tensors.at(port);
} else {
OPENVINO_THROW("[GPU] Can't get tensor for ", port, " port!\n");
}
}

private:
const cldnn::stream* m_stream;
MemoryMap m_memories;
TensorsMap m_tensors;

mutable std::set<size_t> m_locked_memories = {};
};

class TensorAccessor final : public ov::ITensorAccessor {
public:
explicit TensorAccessor(const TensorsContainer& container) : m_container(container) { }

ov::Tensor operator()(size_t port) const override {
return m_container[port];
}

private:
const TensorsContainer& m_container;
};

inline cldnn::TensorAccessor make_tensor_accessor(const TensorsContainer& c) {
return cldnn::TensorAccessor(c);
}

} // namespace cldnn
3 changes: 1 addition & 2 deletions src/plugins/intel_gpu/src/graph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ target_include_directories(${TARGET_NAME} PUBLIC
target_compile_options(${TARGET_NAME} PRIVATE
$<$<CONFIG:Release>:$<IF:$<CXX_COMPILER_ID:MSVC>,/Os,-Os>>)

target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL)
target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL ov_shape_inference)
target_link_libraries(${TARGET_NAME} PRIVATE openvino_intel_gpu_kernels
openvino_intel_gpu_runtime
ov_shape_inference
openvino::itt
openvino::runtime::dev
openvino::runtime)
Expand Down
8 changes: 4 additions & 4 deletions src/plugins/intel_gpu/src/graph/arg_max_min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ std::vector<layout> arg_max_min_inst::calc_output_layouts(arg_max_min_node const

auto& constant_mem = impl_param.memory_deps;
if (desc->top_k > 0) {
std::map<size_t, ngraph::HostTensorPtr> const_data;
std::unordered_map<size_t, ov::Tensor> const_data;
auto topk = desc->top_k;
auto top_k_tensor = std::make_shared<ngraph::runtime::HostTensor>(ov::element::u32, ov::Shape{1}, static_cast<void*>(&topk));
auto top_k_tensor = ov::Tensor(ov::element::u32, ov::Shape{1}, static_cast<void*>(&topk));
const_data = { {1, top_k_tensor} };

output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
} else if (constant_mem.count(1)) {
std::map<size_t, ngraph::HostTensorPtr> const_data;
std::unordered_map<size_t, ov::Tensor> const_data;
auto target_shape_mem = constant_mem.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.get_stream());
const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
const_data.emplace(1, make_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));

output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
} else {
Expand Down
14 changes: 7 additions & 7 deletions src/plugins/intel_gpu/src/graph/batch_to_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ std::vector<layout> batch_to_space_inst::calc_output_layouts(batch_to_space_node
end_shape
};

std::map<size_t, ngraph::HostTensorPtr> const_data;
std::unordered_map<size_t, ov::Tensor> const_data;
if (desc->shape_constant) {
auto block_sizes = tensor_to_vec(block_data, input0_format);
auto begin_sizes = tensor_to_vec(begin_data, input0_format);
Expand All @@ -122,9 +122,9 @@ std::vector<layout> batch_to_space_inst::calc_output_layouts(batch_to_space_node
auto begin_values = static_cast<void*>(begin_sizes.data());
auto end_values = static_cast<void*>(end_sizes.data());

auto block_tensor = make_host_tensor({ block_shape, data_types::i32, input0_format }, block_values);
auto begin_tensor = make_host_tensor({ begin_shape, data_types::i32, input0_format }, begin_values);
auto end_tensor = make_host_tensor({ end_shape, data_types::i32, input0_format }, end_values);
auto block_tensor = make_tensor({ block_shape, data_types::i32, input0_format }, block_values);
auto begin_tensor = make_tensor({ begin_shape, data_types::i32, input0_format }, begin_values);
auto end_tensor = make_tensor({ end_shape, data_types::i32, input0_format }, end_values);

const_data.emplace(1, block_tensor);
const_data.emplace(2, begin_tensor);
Expand All @@ -140,9 +140,9 @@ std::vector<layout> batch_to_space_inst::calc_output_layouts(batch_to_space_node
cldnn::mem_lock<uint8_t, mem_lock_type::read> lock2(begin_mem, impl_param.get_stream());
cldnn::mem_lock<uint8_t, mem_lock_type::read> lock3(end_mem, impl_param.get_stream());

auto block_tensor = make_host_tensor(block_mem->get_layout(), lock1.data());
auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock2.data());
auto end_tensor = make_host_tensor(end_mem->get_layout(), lock3.data());
auto block_tensor = make_tensor(block_mem->get_layout(), lock1.data());
auto begin_tensor = make_tensor(begin_mem->get_layout(), lock2.data());
auto end_tensor = make_tensor(end_mem->get_layout(), lock3.data());

const_data.emplace(1, block_tensor);
const_data.emplace(2, begin_tensor);
Expand Down
95 changes: 33 additions & 62 deletions src/plugins/intel_gpu/src/graph/border.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//

#include "border_inst.h"
#include "intel_gpu/runtime/tensor_accessor.hpp"
#include "pad_shape_inference.hpp"

#include "intel_gpu/runtime/error_handler.hpp"
Expand Down Expand Up @@ -41,85 +42,55 @@ std::vector<layout> border_inst::calc_output_layouts(border_node const& /*node*/
output_type = impl_param.get_fused_output_layout().data_type;
}

size_t in_rank = input0_layout.get_partial_shape().size();

ov::op::v1::Pad op;
op.set_pad_mode(desc->pad_mode);

const bool is_begin_mem = (desc->non_constant_input_mask & border::PAD_NON_CONST_INPUT::BEGIN);
const bool is_end_mem = (desc->non_constant_input_mask & border::PAD_NON_CONST_INPUT::END);

layout pads_begin_layout, pads_end_layout;
if (is_begin_mem) {
pads_begin_layout = impl_param.get_input_layout(1);
}
if (is_end_mem) {
pads_end_layout = is_begin_mem ? impl_param.get_input_layout(2) : impl_param.get_input_layout(1);
const size_t begin_mem_idx = is_begin_mem ? 1 : 0;
const size_t end_mem_idx = is_begin_mem ? 2 : 1;

auto& memory_deps = impl_param.memory_deps;
if ((is_begin_mem && memory_deps.count(begin_mem_idx) == 0) ||
(is_end_mem && memory_deps.count(end_mem_idx) == 0)) {
return {layout{ShapeType::dynamic(static_cast<int64_t>(in_rank)), input0_layout.data_type, input0_layout.format}};
}

ShapeType pads_begin_shape = is_begin_mem ? pads_begin_layout.get<ShapeType>() : ov::Shape{ desc->pads_begin.size() };
ShapeType pads_end_shape = is_end_mem ? pads_end_layout.get<ShapeType>() : ov::Shape{ desc->pads_end.size() };
std::vector<ShapeType> output_shapes;
int64_t begin_size = desc->pads_begin.size();
int64_t end_size = desc->pads_end.size();

layout pads_begin_layout = is_begin_mem ? impl_param.get_input_layout(begin_mem_idx) : layout({ begin_size }, data_types::i64, format::bfyx);
layout pads_end_layout = is_end_mem ? impl_param.get_input_layout(end_mem_idx) : layout({ end_size }, data_types::i64, format::bfyx);

std::vector<ShapeType> input_shapes = {
input0_layout.get<ShapeType>(),
pads_begin_shape,
pads_end_shape,
pads_begin_layout.get<ShapeType>(),
pads_end_layout.get<ShapeType>(),
};

auto& memory_deps = impl_param.memory_deps;
std::map<size_t, ngraph::HostTensorPtr> const_data;
auto ta = ov::make_tensor_accessor(const_data);

if ((is_begin_mem && memory_deps.count(1)) && (is_end_mem && memory_deps.count(2))) {
auto pads_begin_mem = memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_begin_lock(pads_begin_mem, impl_param.get_stream());
const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data()));

auto pads_end_mem = memory_deps.at(2);
cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_end_lock(pads_end_mem, impl_param.get_stream());
const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data()));

output_shapes = ov::op::shape_infer(&op, input_shapes, ta);
} else if ((is_begin_mem || is_end_mem) && memory_deps.count(1)) {
if (is_begin_mem) {
auto pads_begin_mem = memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_begin_lock(pads_begin_mem, impl_param.get_stream());
const_data.emplace(1, make_host_tensor(pads_begin_mem->get_layout(), pads_begin_lock.data()));

auto pads_end_data = desc->pads_end;
auto pads_end_tensor = make_host_tensor({pads_end_shape, data_types::i64, format::bfyx}, static_cast<void*>(pads_end_data.data()));
const_data.emplace(2, pads_end_tensor);

output_shapes = ov::op::shape_infer(&op, input_shapes, ta);
} else {
auto pads_begin_data = desc->pads_begin;
auto pads_begin_tensor = make_host_tensor({pads_begin_shape, data_types::i64, format::bfyx}, static_cast<void*>(pads_begin_data.data()));
const_data.emplace(1, pads_begin_tensor);

auto pads_end_mem = memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> pads_end_lock(pads_end_mem, impl_param.get_stream());
const_data.emplace(2, make_host_tensor(pads_end_mem->get_layout(), pads_end_lock.data()));

output_shapes = ov::op::shape_infer(&op, input_shapes, ta);
}
} else {
std::ptrdiff_t val = desc->pad_value;
TensorsContainer const_data(&impl_param.get_stream());

auto pads_begin_data = desc->pads_begin;
if (is_begin_mem && desc->pad_mode == ov::op::PadMode::CONSTANT) {
pads_begin_data = {val, val, val, val};
}
auto pads_begin_tensor = make_host_tensor({pads_begin_shape, data_types::i64, format::bfyx}, static_cast<void*>(pads_begin_data.data()));
const_data.emplace(1, pads_begin_tensor);
auto pads_begin_data = desc->pads_begin;
auto pads_end_data = desc->pads_end;

auto pads_end_data = desc->pads_end;
if (is_end_mem && desc->pad_mode == ov::op::PadMode::CONSTANT) {
pads_end_data = {val, val, val, val};
}
auto pads_end_tensor = make_host_tensor({pads_end_shape, data_types::i64, format::bfyx}, static_cast<void*>(pads_end_data.data()));
const_data.emplace(2, pads_end_tensor);
if (is_begin_mem) {
const_data.emplace(1, memory_deps.at(begin_mem_idx));
} else {
const_data.emplace(1, make_tensor(pads_begin_layout, static_cast<void*>(pads_begin_data.data())));
}

output_shapes = ov::op::shape_infer(&op, input_shapes, ta);
if (is_end_mem) {
const_data.emplace(2, memory_deps.at(end_mem_idx));
} else {
const_data.emplace(2, make_tensor(pads_end_layout, static_cast<void*>(pads_end_data.data())));
}

auto ta = cldnn::make_tensor_accessor(const_data);
std::vector<ShapeType> output_shapes = ov::op::shape_infer(&op, input_shapes, ta);

format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size());

return { layout{output_shapes[0], output_type, output_format} };
Expand Down
9 changes: 4 additions & 5 deletions src/plugins/intel_gpu/src/graph/broadcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ std::vector<layout> broadcast_inst::calc_output_layouts(broadcast_node const& /*
auto axes_mapping = desc->axes_mapping.to_vector();
ShapeType axes_mapping_shape = ov::Shape{axes_mapping.size()};

std::map<size_t, ngraph::HostTensorPtr> const_data;
std::unordered_map<size_t, ov::Tensor> const_data;
if (third_input_needed) {
input_shapes.emplace_back(axes_mapping_shape);

auto axes_mapping_tensor = make_host_tensor({axes_mapping_shape, data_types::i64, format::bfyx},
auto axes_mapping_tensor = make_tensor({axes_mapping_shape, data_types::i64, format::bfyx},
static_cast<void*>(axes_mapping.data()));
const_data.emplace(2, axes_mapping_tensor);
}
Expand All @@ -74,12 +74,11 @@ std::vector<layout> broadcast_inst::calc_output_layouts(broadcast_node const& /*
if (constant_mem.count(1)) {
auto target_shape_mem = constant_mem.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.get_stream());
const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
const_data.emplace(1, make_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
} else if (impl_param.input_layouts.size() == 1) {
// predefined pattern shape
auto target_shape_tensor = make_host_tensor({pattern_shape, data_types::i64, format::bfyx},
static_cast<void*>(target_shape.data()));
auto target_shape_tensor = make_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast<void*>(target_shape.data()));
const_data.emplace(1, target_shape_tensor);
output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
} else if (impl_param.input_layouts.size() >= 2) {
Expand Down
10 changes: 5 additions & 5 deletions src/plugins/intel_gpu/src/graph/crop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ std::vector<layout> crop_inst::calc_output_layouts(const crop_node& /*node*/, co

// TODO: calling shape_infer for all cropped outpus is redundant... Need to optimize.
if (desc->op_mode == cldnn::crop_ngraph_op_mode::variadic_split) {
std::map<size_t, ngraph::HostTensorPtr> const_data;
std::unordered_map<size_t, ov::Tensor> const_data;

OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph VariadicSplit op mode) axis values memory dependency");
auto axis_values_mem = impl_param.memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> axis_values_mem_lock(axis_values_mem, impl_param.get_stream());
const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data()));
const_data.emplace(1, make_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data()));

if (impl_param.memory_deps.count(2) > 0) {
auto split_length_mem = impl_param.memory_deps.at(2);
cldnn::mem_lock<uint8_t, mem_lock_type::read> split_length_mem_lock(split_length_mem, impl_param.get_stream());
const_data.emplace(2, make_host_tensor(split_length_mem->get_layout(), split_length_mem_lock.data()));
const_data.emplace(2, make_tensor(split_length_mem->get_layout(), split_length_mem_lock.data()));

ov::op::v1::VariadicSplit op;
output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
Expand All @@ -76,12 +76,12 @@ std::vector<layout> crop_inst::calc_output_layouts(const crop_node& /*node*/, co
return { layout{out_shape, input0_layout.data_type, input0_layout.format } };
}
} else if (desc->op_mode == cldnn::crop_ngraph_op_mode::split) {
std::map<size_t, ngraph::HostTensorPtr> const_data;
std::unordered_map<size_t, ov::Tensor> const_data;

OPENVINO_ASSERT(impl_param.memory_deps.count(1) > 0, "[GPU] Can't find Crop(ngraph Split op mode) axis values memory dependency");
auto axis_values_mem = impl_param.memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> axis_values_mem_lock(axis_values_mem, impl_param.get_stream());
const_data.emplace(1, make_host_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data()));
const_data.emplace(1, make_tensor(axis_values_mem->get_layout(), axis_values_mem_lock.data()));

ov::op::v1::Split op;
op.set_num_splits(desc->num_splits);
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ std::vector<layout> gather_inst::calc_output_layouts(gather_node const& /*node*/

int64_t axis = desc->axis;

auto axis_tensor = std::make_shared<ngraph::runtime::HostTensor>(ov::element::i64, ov::Shape{1}, static_cast<void*>(&axis));
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> const_data = {{2, axis_tensor}};
auto axis_tensor = ov::Tensor(ov::element::i64, ov::Shape{1}, static_cast<void*>(&axis));
std::unordered_map<size_t, ov::Tensor> const_data = {{2, axis_tensor}};
output_shapes = ov::op::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));

format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size());
Expand Down
Loading

0 comments on commit cde0893

Please sign in to comment.