[GPU] Graph serialization for GPU #2 (openvinotoolkit#13986)

* moved serialization include path * quiet onednn-gpu patching * save and load kernels in _impls * changed to use OPENVINO_ASSERT * fix errata * updated to follow OpenVINO naming convention * updated error messages * binary buffer by vector<uint8_t> * partial_shape serialization * removed object_type * added a new storage class for primitive_type_string and id * updated to throw an exception when _node is null in build_deps(). * removed redundant memory_pool clearing * added a new net_id creator * newline at eof * updated CLDNN with GPU * added cache blob descriptions * updated output allocation logic in serialization * added ov::device::architecture in supported properties * overrided save and load in data_inst and mutable_data_inst * removed save and load functions in mutable_data * baseline for serialization unit tests * added serialization unit tests * added serialization unit tests * updated not to execute build_deps when deserialized * make_data without namespace * updated to use default layout c-tor * updated get_unique_net_id() * updated get_type_id() to a pure virtual method * updated ov::caching_properties * added [GPU] tags * updated network c-tor * updated unit tests
sbalandi · Nov 22, 2022 · 0b1e366 · 0b1e366
1 parent 97878de
commit 0b1e366
Show file tree

Hide file tree

Showing 272 changed files with 2,486 additions and 1,630 deletions.
diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@@ -6,13 +6,13 @@
 
 #include "intel_gpu/graph/topology.hpp"
 #include "intel_gpu/graph/program.hpp"
+#include "intel_gpu/graph/serialization/binary_buffer.hpp"
 #include "intel_gpu/runtime/compounds.hpp"
 #include "intel_gpu/runtime/memory.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/event.hpp"
 #include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/lru_cache.hpp"
-#include "serialization/binary_buffer.hpp"
 
 #include <map>
 #include <vector>

diff --git a/...h/include/serialization/binary_buffer.hpp → ...gpu/graph/serialization/binary_buffer.hpp b/...h/include/serialization/binary_buffer.hpp → ...gpu/graph/serialization/binary_buffer.hpp
@@ -19,9 +19,8 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
 
     void write(void const * data, std::streamsize size) {
         auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
-        if (written_size != size) {
-            throw std::runtime_error("Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
-        }
+        OPENVINO_ASSERT(written_size == size,
+            "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
     }
 
     void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
@@ -38,9 +37,8 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
 
     void read(void* const data, std::streamsize size) {
         auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
-        if (read_size != size) {
-            throw std::runtime_error("Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
-        }
+        OPENVINO_ASSERT(read_size == size,
+            "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
     }
 
     void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
@@ -85,9 +83,14 @@ class Serializer<BinaryInputBuffer, Data<T>> {
 
 }  // namespace cldnn
 
-#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name, obj_type) \
+#define ASSIGN_TYPE_NAME(cls_name) \
             namespace cldnn {                            \
-            const object_type cls_name::type = obj_type; \
+            const std::string cls_name::type = #cls_name; \
+            }
+
+#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name) \
+            namespace cldnn {                            \
+            const std::string cls_name::type = #cls_name; \
             BIND_TO_BUFFER(BinaryOutputBuffer, cls_name) \
             BIND_TO_BUFFER(BinaryInputBuffer, cls_name)  \
             }
diff --git a/.../src/graph/include/serialization/bind.hpp → ...de/intel_gpu/graph/serialization/bind.hpp b/.../src/graph/include/serialization/bind.hpp → ...de/intel_gpu/graph/serialization/bind.hpp
@@ -10,11 +10,10 @@
 #include <functional>
 #include "buffer.hpp"
 #include "static_instance.hpp"
-#include "object_types.hpp"
 
 #define DECLARE_OBJECT_TYPE_SERIALIZATION \
-    static const object_type type; \
-    object_type get_type() const override { return type; }
+    static const std::string type; \
+    std::string get_type() const override { return type; }
 
 #define BIND_TO_BUFFER(buffer, type)                                                       \
         template <>                                                                        \
@@ -25,26 +24,19 @@
         const instance_creator<buffer, type>& bind_creator<buffer, type>::creator =        \
             static_instance<instance_creator<buffer, type>>::get_instance().instantiate();
 
-// It's a defect, and was fixed in C++14
-// https://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2148
-struct enum_class_hash {
-    template <typename T>
-    std::size_t operator()(T t) const { return static_cast<std::size_t>(t); }
-};
-
 namespace cldnn {
 
 template <typename BufferType>
 struct saver_storage {
     using save_function = std::function<void(BufferType&, const void*)>;
-    using value_type = typename std::unordered_map<object_type, save_function, enum_class_hash>::value_type;
+    using value_type = typename std::unordered_map<std::string, save_function>::value_type;
 
     static saver_storage<BufferType>& instance() {
         static saver_storage<BufferType> instance;
         return instance;
     }
 
-    const save_function& get_save_function(const object_type& type) const {
+    const save_function& get_save_function(const std::string& type) const {
         return map.at(type);
     }
 
@@ -57,7 +49,7 @@ struct saver_storage {
     saver_storage(const saver_storage&) = delete;
     void operator=(const saver_storage&) = delete;
 
-    std::unordered_map<object_type, save_function, enum_class_hash> map;
+    std::unordered_map<std::string, save_function> map;
 };
 
 template <typename T>
@@ -67,14 +59,14 @@ struct void_deleter {
 
 template <typename BufferType, typename FuncT>
 struct loader_storage {
-    using value_type = typename std::unordered_map<object_type, FuncT, enum_class_hash>::value_type;
+    using value_type = typename std::unordered_map<std::string, FuncT>::value_type;
 
     static loader_storage& instance() {
         static loader_storage instance;
         return instance;
     }
 
-    const FuncT& get_load_function(const object_type& type) {
+    const FuncT& get_load_function(const std::string& type) {
         return map.at(type);
     }
 
@@ -87,7 +79,7 @@ struct loader_storage {
     loader_storage(const loader_storage&) = delete;
     void operator=(const loader_storage&) = delete;
 
-    std::unordered_map<object_type, FuncT, enum_class_hash> map;
+    std::unordered_map<std::string, FuncT> map;
 };
 
 template <typename BufferType>

diff --git a/...rc/graph/include/serialization/buffer.hpp → .../intel_gpu/graph/serialization/buffer.hpp b/...rc/graph/include/serialization/buffer.hpp → .../intel_gpu/graph/serialization/buffer.hpp
diff --git a/...rialization/cl_kernel_data_serializer.hpp → ...rialization/cl_kernel_data_serializer.hpp b/...rialization/cl_kernel_data_serializer.hpp → ...rialization/cl_kernel_data_serializer.hpp
diff --git a/...c/graph/include/serialization/helpers.hpp → ...intel_gpu/graph/serialization/helpers.hpp b/...c/graph/include/serialization/helpers.hpp → ...intel_gpu/graph/serialization/helpers.hpp
diff --git a/...clude/serialization/layout_serializer.hpp → ...graph/serialization/layout_serializer.hpp b/...clude/serialization/layout_serializer.hpp → ...graph/serialization/layout_serializer.hpp
@@ -12,6 +12,33 @@
 #include "intel_gpu/runtime/layout.hpp"
 
 namespace cldnn {
+template <typename BufferType>
+class Serializer<BufferType, ov::PartialShape, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
+public:
+    static void save(BufferType& buffer, const ov::PartialShape& partial_shape) {
+        std::vector<ov::Dimension> dimensions(partial_shape);
+        buffer << dimensions.size();
+        for (const auto& dimension : dimensions) {
+            buffer << dimension.get_interval().get_min_val();
+            buffer << dimension.get_interval().get_max_val();
+        }
+    }
+};
+
+template <typename BufferType>
+class Serializer<BufferType, ov::PartialShape, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
+public:
+    static void load(BufferType& buffer, ov::PartialShape& partial_shape) {
+        size_t num_dimensions;
+        buffer >> num_dimensions;
+        for (size_t i = 0; i < num_dimensions; i++) {
+            ov::Dimension::value_type min_val, max_val;
+            buffer >> min_val >> max_val;
+            partial_shape.push_back(ov::Dimension(min_val, max_val));
+        }
+    }
+};
+
 template <typename BufferType>
 class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
 public:
@@ -21,15 +48,7 @@ class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base
         buffer << _layout.data_padding.filling_value();
         buffer << _layout.data_padding.lower_size().sizes();
         buffer << _layout.data_padding.upper_size().sizes();
-
-        std::vector<cldnn::tensor::value_type> _sizes = _layout.get_tensor().sizes(_layout.format);
-        // Temp WA for bs_x_bsv16
-        if (_layout.format == cldnn::format::bs_x_bsv16) {
-            std::vector<cldnn::tensor::value_type> _tmp_sizes = _layout.get_tensor().sizes();
-            _sizes[0] = _tmp_sizes[0];
-            _sizes[1] = _tmp_sizes[1];
-        }
-        buffer << _sizes;
+        buffer << _layout.get_partial_shape();
     }
 };
 
@@ -50,15 +69,9 @@ class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base
             _layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value);
         }
 
-        std::vector<cldnn::tensor::value_type> _sizes;
-        buffer >> _sizes;
-
-        // Temp WA for bs_x_bsv16
-        if (_layout.format == cldnn::format::bs_x_bsv16) {
-            _layout.set_tensor(tensor(_sizes));
-        } else {
-            _layout.set_tensor(tensor(_layout.format, _sizes));
-        }
+        ov::PartialShape partial_shape;
+        buffer >> partial_shape;
+        _layout.set_partial_shape(partial_shape);
     }
 };
 

diff --git a/.../include/serialization/map_serializer.hpp → ...pu/graph/serialization/map_serializer.hpp b/.../include/serialization/map_serializer.hpp → ...pu/graph/serialization/map_serializer.hpp
diff --git a/.../serialization/polymorphic_serializer.hpp → .../serialization/polymorphic_serializer.hpp b/.../serialization/polymorphic_serializer.hpp → .../serialization/polymorphic_serializer.hpp
@@ -12,7 +12,6 @@
 #include "buffer.hpp"
 #include "bind.hpp"
 #include "helpers.hpp"
-#include "object_types.hpp"
 
 namespace cldnn {
 
@@ -21,7 +20,7 @@ class Serializer<BufferType, std::unique_ptr<T>, typename std::enable_if<std::is
 public:
     static void save(BufferType& buffer, const std::unique_ptr<T>& ptr) {
         const auto& type = ptr->get_type();
-        buffer << cldnn::make_data(&type, sizeof(object_type));
+        buffer << type;
         const auto save_func = saver_storage<BufferType>::instance().get_save_function(type);
         save_func(buffer, ptr.get());
     }
@@ -31,17 +30,17 @@ template <typename BufferType, typename T>
 class Serializer<BufferType, std::unique_ptr<T>, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
 public:
     static void load(BufferType& buffer, std::unique_ptr<T>& ptr, engine& engine) {
-        object_type type;
-        buffer >> cldnn::make_data(&type, sizeof(object_type));
+        std::string type;
+        buffer >> type;
         const auto load_func = dif<BufferType>::instance().get_load_function(type);
         std::unique_ptr<void, void_deleter<void>> result;
         load_func(buffer, result, engine);
         ptr.reset(static_cast<T*>(result.release()));
     }
 
     static void load(BufferType& buffer, std::unique_ptr<T>& ptr) {
-        object_type type;
-        buffer >> cldnn::make_data(&type, sizeof(object_type));
+        std::string type;
+        buffer >> type;
         const auto load_func = def<BufferType>::instance().get_load_function(type);
         std::unique_ptr<void, void_deleter<void>> result;
         load_func(buffer, result);

diff --git a/...raph/include/serialization/serializer.hpp → ...el_gpu/graph/serialization/serializer.hpp b/...raph/include/serialization/serializer.hpp → ...el_gpu/graph/serialization/serializer.hpp
diff --git a/.../include/serialization/set_serializer.hpp → ...pu/graph/serialization/set_serializer.hpp b/.../include/serialization/set_serializer.hpp → ...pu/graph/serialization/set_serializer.hpp
diff --git a/...include/serialization/static_instance.hpp → ...u/graph/serialization/static_instance.hpp b/...include/serialization/static_instance.hpp → ...u/graph/serialization/static_instance.hpp
diff --git a/...clude/serialization/string_serializer.hpp → ...graph/serialization/string_serializer.hpp b/...clude/serialization/string_serializer.hpp → ...graph/serialization/string_serializer.hpp
diff --git a/...clude/serialization/vector_serializer.hpp → ...graph/serialization/vector_serializer.hpp b/...clude/serialization/vector_serializer.hpp → ...graph/serialization/vector_serializer.hpp
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp
@@ -28,7 +28,6 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
     CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
 
     void Export(std::ostream& networkModel) override;
-    bool isSerializable();
     std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
@@ -47,6 +46,9 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
     Config m_config;
     InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
     InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;
+
+private:
+    bool is_serializable();
 };
 
 }  // namespace intel_gpu

diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp
@@ -213,6 +213,30 @@ struct primitive_info {
     CLDNN_DEFINE_TYPE_ID(PType)              \
     CLDNN_DEFINE_TYPE_STRING(PType)
 
+#define GPU_DEFINE_PRIMITIVE_TYPE_ID(PType)             \
+    primitive_type_id PType::type_id() {                \
+        static primitive_type_base<PType> instance;     \
+        return &instance;                               \
+    }                                                   \
+    bool _##PType##_added_ = prim_map_storage::instance().set_type_id(#PType, PType::type_id());
+
+struct prim_map_storage {
+    static prim_map_storage& instance() {
+        static prim_map_storage instance;
+        return instance;
+    }
+
+    const cldnn::primitive_type_id get_type_id(const std::string& type_string) const {
+        return map.at(type_string);
+    }
+
+    bool set_type_id(const std::string& type_string, const cldnn::primitive_type_id type_id) {
+        return map.insert({type_string, type_id}).second;
+    }
+
+private:
+    std::unordered_map<std::string, cldnn::primitive_type_id> map;
+};
 /// @}
 /// @}
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/activation.cpp b/src/plugins/intel_gpu/src/graph/activation.cpp
@@ -10,10 +10,7 @@
 #include <vector>
 
 namespace cldnn {
-primitive_type_id activation::type_id() {
-    static primitive_type_base<activation> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(activation)
 
 layout activation_inst::calc_output_layout(activation_node const& node, kernel_impl_params const& impl_param) {
     assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&

diff --git a/src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp b/src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp
@@ -9,10 +9,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id adaptive_pooling::type_id() {
-    static primitive_type_base<adaptive_pooling> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(adaptive_pooling)
 
 layout adaptive_pooling_inst::calc_output_layout(const adaptive_pooling_node& node, kernel_impl_params const& impl_param) {
     const auto data_layout = impl_param.get_input_layout();

diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp
@@ -13,10 +13,7 @@
 #include "topk_shape_inference.hpp"
 
 namespace cldnn {
-primitive_type_id arg_max_min::type_id() {
-    static primitive_type_base<arg_max_min> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(arg_max_min)
 
 layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node, kernel_impl_params const& impl_param) {
     auto desc = impl_param.typed_desc<arg_max_min>();

diff --git a/src/plugins/intel_gpu/src/graph/assign.cpp b/src/plugins/intel_gpu/src/graph/assign.cpp
@@ -9,11 +9,7 @@
 #include <data_inst.h>
 
 namespace cldnn {
-
-primitive_type_id assign::type_id() {
-    static primitive_type_base<assign> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(assign)
 
 assign_inst::typed_primitive_inst(network& network, const assign_node& node) :
     parent{network, node, false},

diff --git a/src/plugins/intel_gpu/src/graph/average_unpooling.cpp b/src/plugins/intel_gpu/src/graph/average_unpooling.cpp
@@ -10,10 +10,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id average_unpooling::type_id() {
-    static primitive_type_base<average_unpooling> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(average_unpooling)
 
 layout average_unpooling_inst::calc_output_layout(average_unpooling_node const& node, kernel_impl_params const& impl_param) {
     assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&

diff --git a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp
@@ -12,10 +12,7 @@
 #include <vector>
 
 namespace cldnn {
-primitive_type_id cldnn::batch_to_space::type_id() {
-    static primitive_type_base<batch_to_space> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(batch_to_space)
 
 layout batch_to_space_inst::calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param) {
     auto desc = impl_param.typed_desc<batch_to_space>();

diff --git a/src/plugins/intel_gpu/src/graph/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/binary_convolution.cpp
@@ -12,10 +12,7 @@
 #include <string>
 
 namespace cldnn {
-primitive_type_id binary_convolution::type_id() {
-    static primitive_type_base<binary_convolution> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(binary_convolution)
 
 layout binary_convolution_inst::calc_output_layout(binary_convolution_node const& node, kernel_impl_params const& impl_param) {
     auto desc = impl_param.typed_desc<binary_convolution>();

diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp
@@ -12,10 +12,7 @@
 #include <algorithm>
 
 namespace cldnn {
-primitive_type_id border::type_id() {
-    static primitive_type_base<border> instance;
-    return &instance;
-}
+GPU_DEFINE_PRIMITIVE_TYPE_ID(border)
 
 layout border_inst::calc_output_layout(border_node const& node, kernel_impl_params const& impl_param) {
     assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&