Skip to content

Commit

Permalink
[GPU] Graph serialization for GPU #2 (openvinotoolkit#13986)
Browse files Browse the repository at this point in the history
* moved serialization include path

* quiet onednn-gpu patching

* save and load kernels in _impls

* changed to use OPENVINO_ASSERT

* fix errata

* updated to follow OpenVINO naming convention

* updated error messages

* binary buffer by vector<uint8_t>

* partial_shape serialization

* removed object_type

* added a new storage class for primitive_type_string and id

* updated to throw an exception when _node is null in build_deps().

* removed redundant memory_pool clearing

* added a new net_id creator

* newline at eof

* updated CLDNN with GPU

* added cache blob descriptions

* updated output allocation logic  in serialization

* added ov::device::architecture in supported properties

* overrided save and load in data_inst and mutable_data_inst

* removed save and load functions in mutable_data

* baseline for serialization unit tests

* added serialization unit tests

* added serialization unit tests

* updated not to execute build_deps when deserialized

* make_data without namespace

* updated to use default layout c-tor

* updated get_unique_net_id()

* updated get_type_id() to a pure virtual method

* updated ov::caching_properties

* added [GPU] tags

* updated network c-tor

* updated unit tests
  • Loading branch information
e-ddykim authored Nov 22, 2022
1 parent 97878de commit 0b1e366
Show file tree
Hide file tree
Showing 272 changed files with 2,486 additions and 1,630 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

#include "intel_gpu/graph/topology.hpp"
#include "intel_gpu/graph/program.hpp"
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
#include "intel_gpu/runtime/compounds.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
#include "serialization/binary_buffer.hpp"

#include <map>
#include <vector>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {

void write(void const * data, std::streamsize size) {
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
if (written_size != size) {
throw std::runtime_error("Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
}
OPENVINO_ASSERT(written_size == size,
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
}

void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
Expand All @@ -38,9 +37,8 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {

void read(void* const data, std::streamsize size) {
auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
if (read_size != size) {
throw std::runtime_error("Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}
OPENVINO_ASSERT(read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}

void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
Expand Down Expand Up @@ -85,9 +83,14 @@ class Serializer<BinaryInputBuffer, Data<T>> {

} // namespace cldnn

#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name, obj_type) \
#define ASSIGN_TYPE_NAME(cls_name) \
namespace cldnn { \
const object_type cls_name::type = obj_type; \
const std::string cls_name::type = #cls_name; \
}

#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name) \
namespace cldnn { \
const std::string cls_name::type = #cls_name; \
BIND_TO_BUFFER(BinaryOutputBuffer, cls_name) \
BIND_TO_BUFFER(BinaryInputBuffer, cls_name) \
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
#include <functional>
#include "buffer.hpp"
#include "static_instance.hpp"
#include "object_types.hpp"

#define DECLARE_OBJECT_TYPE_SERIALIZATION \
static const object_type type; \
object_type get_type() const override { return type; }
static const std::string type; \
std::string get_type() const override { return type; }

#define BIND_TO_BUFFER(buffer, type) \
template <> \
Expand All @@ -25,26 +24,19 @@
const instance_creator<buffer, type>& bind_creator<buffer, type>::creator = \
static_instance<instance_creator<buffer, type>>::get_instance().instantiate();

// It's a defect, and was fixed in C++14
// https://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2148
struct enum_class_hash {
template <typename T>
std::size_t operator()(T t) const { return static_cast<std::size_t>(t); }
};

namespace cldnn {

template <typename BufferType>
struct saver_storage {
using save_function = std::function<void(BufferType&, const void*)>;
using value_type = typename std::unordered_map<object_type, save_function, enum_class_hash>::value_type;
using value_type = typename std::unordered_map<std::string, save_function>::value_type;

static saver_storage<BufferType>& instance() {
static saver_storage<BufferType> instance;
return instance;
}

const save_function& get_save_function(const object_type& type) const {
const save_function& get_save_function(const std::string& type) const {
return map.at(type);
}

Expand All @@ -57,7 +49,7 @@ struct saver_storage {
saver_storage(const saver_storage&) = delete;
void operator=(const saver_storage&) = delete;

std::unordered_map<object_type, save_function, enum_class_hash> map;
std::unordered_map<std::string, save_function> map;
};

template <typename T>
Expand All @@ -67,14 +59,14 @@ struct void_deleter {

template <typename BufferType, typename FuncT>
struct loader_storage {
using value_type = typename std::unordered_map<object_type, FuncT, enum_class_hash>::value_type;
using value_type = typename std::unordered_map<std::string, FuncT>::value_type;

static loader_storage& instance() {
static loader_storage instance;
return instance;
}

const FuncT& get_load_function(const object_type& type) {
const FuncT& get_load_function(const std::string& type) {
return map.at(type);
}

Expand All @@ -87,7 +79,7 @@ struct loader_storage {
loader_storage(const loader_storage&) = delete;
void operator=(const loader_storage&) = delete;

std::unordered_map<object_type, FuncT, enum_class_hash> map;
std::unordered_map<std::string, FuncT> map;
};

template <typename BufferType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,33 @@
#include "intel_gpu/runtime/layout.hpp"

namespace cldnn {
template <typename BufferType>
class Serializer<BufferType, ov::PartialShape, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void save(BufferType& buffer, const ov::PartialShape& partial_shape) {
std::vector<ov::Dimension> dimensions(partial_shape);
buffer << dimensions.size();
for (const auto& dimension : dimensions) {
buffer << dimension.get_interval().get_min_val();
buffer << dimension.get_interval().get_max_val();
}
}
};

template <typename BufferType>
class Serializer<BufferType, ov::PartialShape, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void load(BufferType& buffer, ov::PartialShape& partial_shape) {
size_t num_dimensions;
buffer >> num_dimensions;
for (size_t i = 0; i < num_dimensions; i++) {
ov::Dimension::value_type min_val, max_val;
buffer >> min_val >> max_val;
partial_shape.push_back(ov::Dimension(min_val, max_val));
}
}
};

template <typename BufferType>
class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
public:
Expand All @@ -21,15 +48,7 @@ class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base
buffer << _layout.data_padding.filling_value();
buffer << _layout.data_padding.lower_size().sizes();
buffer << _layout.data_padding.upper_size().sizes();

std::vector<cldnn::tensor::value_type> _sizes = _layout.get_tensor().sizes(_layout.format);
// Temp WA for bs_x_bsv16
if (_layout.format == cldnn::format::bs_x_bsv16) {
std::vector<cldnn::tensor::value_type> _tmp_sizes = _layout.get_tensor().sizes();
_sizes[0] = _tmp_sizes[0];
_sizes[1] = _tmp_sizes[1];
}
buffer << _sizes;
buffer << _layout.get_partial_shape();
}
};

Expand All @@ -50,15 +69,9 @@ class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base
_layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value);
}

std::vector<cldnn::tensor::value_type> _sizes;
buffer >> _sizes;

// Temp WA for bs_x_bsv16
if (_layout.format == cldnn::format::bs_x_bsv16) {
_layout.set_tensor(tensor(_sizes));
} else {
_layout.set_tensor(tensor(_layout.format, _sizes));
}
ov::PartialShape partial_shape;
buffer >> partial_shape;
_layout.set_partial_shape(partial_shape);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "buffer.hpp"
#include "bind.hpp"
#include "helpers.hpp"
#include "object_types.hpp"

namespace cldnn {

Expand All @@ -21,7 +20,7 @@ class Serializer<BufferType, std::unique_ptr<T>, typename std::enable_if<std::is
public:
static void save(BufferType& buffer, const std::unique_ptr<T>& ptr) {
const auto& type = ptr->get_type();
buffer << cldnn::make_data(&type, sizeof(object_type));
buffer << type;
const auto save_func = saver_storage<BufferType>::instance().get_save_function(type);
save_func(buffer, ptr.get());
}
Expand All @@ -31,17 +30,17 @@ template <typename BufferType, typename T>
class Serializer<BufferType, std::unique_ptr<T>, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void load(BufferType& buffer, std::unique_ptr<T>& ptr, engine& engine) {
object_type type;
buffer >> cldnn::make_data(&type, sizeof(object_type));
std::string type;
buffer >> type;
const auto load_func = dif<BufferType>::instance().get_load_function(type);
std::unique_ptr<void, void_deleter<void>> result;
load_func(buffer, result, engine);
ptr.reset(static_cast<T*>(result.release()));
}

static void load(BufferType& buffer, std::unique_ptr<T>& ptr) {
object_type type;
buffer >> cldnn::make_data(&type, sizeof(object_type));
std::string type;
buffer >> type;
const auto load_func = def<BufferType>::instance().get_load_function(type);
std::unique_ptr<void, void_deleter<void>> result;
load_func(buffer, result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);

void Export(std::ostream& networkModel) override;
bool isSerializable();
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
Expand All @@ -47,6 +46,9 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
Config m_config;
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;

private:
bool is_serializable();
};

} // namespace intel_gpu
Expand Down
24 changes: 24 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,30 @@ struct primitive_info {
CLDNN_DEFINE_TYPE_ID(PType) \
CLDNN_DEFINE_TYPE_STRING(PType)

#define GPU_DEFINE_PRIMITIVE_TYPE_ID(PType) \
primitive_type_id PType::type_id() { \
static primitive_type_base<PType> instance; \
return &instance; \
} \
bool _##PType##_added_ = prim_map_storage::instance().set_type_id(#PType, PType::type_id());

struct prim_map_storage {
static prim_map_storage& instance() {
static prim_map_storage instance;
return instance;
}

const cldnn::primitive_type_id get_type_id(const std::string& type_string) const {
return map.at(type_string);
}

bool set_type_id(const std::string& type_string, const cldnn::primitive_type_id type_id) {
return map.insert({type_string, type_id}).second;
}

private:
std::unordered_map<std::string, cldnn::primitive_type_id> map;
};
/// @}
/// @}
} // namespace cldnn
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
#include <vector>

namespace cldnn {
primitive_type_id activation::type_id() {
static primitive_type_base<activation> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(activation)

layout activation_inst::calc_output_layout(activation_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
#include <string>

namespace cldnn {
primitive_type_id adaptive_pooling::type_id() {
static primitive_type_base<adaptive_pooling> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(adaptive_pooling)

layout adaptive_pooling_inst::calc_output_layout(const adaptive_pooling_node& node, kernel_impl_params const& impl_param) {
const auto data_layout = impl_param.get_input_layout();
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/arg_max_min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@
#include "topk_shape_inference.hpp"

namespace cldnn {
primitive_type_id arg_max_min::type_id() {
static primitive_type_base<arg_max_min> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(arg_max_min)

layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<arg_max_min>();
Expand Down
6 changes: 1 addition & 5 deletions src/plugins/intel_gpu/src/graph/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
#include <data_inst.h>

namespace cldnn {

primitive_type_id assign::type_id() {
static primitive_type_base<assign> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(assign)

assign_inst::typed_primitive_inst(network& network, const assign_node& node) :
parent{network, node, false},
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/average_unpooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
#include <string>

namespace cldnn {
primitive_type_id average_unpooling::type_id() {
static primitive_type_base<average_unpooling> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(average_unpooling)

layout average_unpooling_inst::calc_output_layout(average_unpooling_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/batch_to_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
#include <vector>

namespace cldnn {
primitive_type_id cldnn::batch_to_space::type_id() {
static primitive_type_base<batch_to_space> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(batch_to_space)

layout batch_to_space_inst::calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<batch_to_space>();
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/binary_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
#include <string>

namespace cldnn {
primitive_type_id binary_convolution::type_id() {
static primitive_type_base<binary_convolution> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(binary_convolution)

layout binary_convolution_inst::calc_output_layout(binary_convolution_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<binary_convolution>();
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/border.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
#include <algorithm>

namespace cldnn {
primitive_type_id border::type_id() {
static primitive_type_base<border> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(border)

layout border_inst::calc_output_layout(border_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
Expand Down
Loading

0 comments on commit 0b1e366

Please sign in to comment.