From c563ce1be196435d9c65057e9cf65ebc815aedfe Mon Sep 17 00:00:00 2001 From: Lyamin-Roman Date: Fri, 15 Nov 2024 01:43:58 +0900 Subject: [PATCH] [GPU] Moved RMSFusion higher in pipeline and added output type fuse --- .../transformations/include/ov_ops/rms.hpp | 6 +++++ .../src/transformations/convert_precision.cpp | 19 +++++++++++++- .../src/plugin/transformations_pipeline.cpp | 25 +++++++++++-------- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/common/transformations/include/ov_ops/rms.hpp b/src/common/transformations/include/ov_ops/rms.hpp index 4e22e505819a10..38ac0a73bda3cb 100644 --- a/src/common/transformations/include/ov_ops/rms.hpp +++ b/src/common/transformations/include/ov_ops/rms.hpp @@ -43,6 +43,12 @@ class TRANSFORMATIONS_API RMS : public ov::op::Op { m_epsilon = epsilon; } + void set_output_type(const element::Type& output_type) { + m_output_type = output_type; + } + // Overload collision with method on Node + using Node::set_output_type; + private: double m_epsilon{0}; ov::element::Type m_output_type; diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index d8fd21699a5c20..6c56a91079a2f9 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -12,6 +12,7 @@ #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" #include "openvino/reference/convert.hpp" +#include "ov_ops/rms.hpp" #include "ov_ops/type_relaxed.hpp" #include "transformations/fp16_compression/align_mixed_fp32_fp16_types.hpp" #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" @@ -59,6 +60,7 @@ bool fuse_type_to_maxpool(const std::shared_ptr& node, const precision bool fuse_type_to_nonzero(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_bucketize(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_ctc_greedy_decoder_seq_len(const std::shared_ptr& node, const precisions_map& precisions); +bool fuse_type_to_rms(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_random_uniform_v8(const std::shared_ptr& node, const precisions_map& precisions); @@ -465,7 +467,8 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& {ov::op::v0::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, {ov::op::v8::PriorBox::get_type_info_static(), fuse_type_to_prior_box}, {ov::op::v0::PriorBoxClustered::get_type_info_static(), fuse_type_to_prior_box}, - {ov::op::v15::SearchSorted::get_type_info_static(), fuse_type_to_search_sorted_v15}}; + {ov::op::v15::SearchSorted::get_type_info_static(), fuse_type_to_search_sorted_v15}, + {ov::op::internal::RMS::get_type_info_static(), fuse_type_to_rms}}; for (const auto& it : m_additional_type_to_fuse_map) { type_to_fuse[it.first] = it.second; @@ -858,6 +861,20 @@ bool fuse_type_to_nms_rotated(const std::shared_ptr& node, const preci return res; } +bool fuse_type_to_rms(const std::shared_ptr& node, const precisions_map& precisions) { + auto it = precisions.find(node->get_output_element_type(0)); + if (it == precisions.end()) + return false; + const auto& to = it->second; + if (auto rms = ov::as_type_ptr(node)) { + if (to.is_real()) { + rms->set_output_type(to); + return true; + } + } + return false; +} + namespace { bool update_type(size_t idx, diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index db93696865a971..cefec2b409c36d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -107,6 +107,7 @@ #include "transformations/op_conversions/convert_broadcast3.hpp" #include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" #include "transformations/op_conversions/convert_depth_to_space.hpp" +#include "transformations/op_conversions/convert_divide.hpp" #include "transformations/op_conversions/convert_gather_0d.hpp" #include "transformations/op_conversions/convert_gather_downgrade.hpp" #include "transformations/op_conversions/convert_gelu.hpp" @@ -338,6 +339,19 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }); } + manager.register_pass(); + manager.register_pass(); + + pass_config->set_callback([=](const_node_ptr& root) -> bool { + if (!root->get_input_partial_shape(0).is_static()) { + return false; + } + const auto& gamma_shape = root->get_input_partial_shape(0).to_shape(); + const int32_t vec_size = 8; + return static_cast((gamma_shape.back() / vec_size)) > static_cast(device_info.max_work_group_size); + }); + manager.register_pass(false); + const bool keep_precision_sensitive_in_fp32_1 = true; const bool convert_input_output_precision = false; const bool store_original_precision_as_rt_attribute = true; @@ -855,16 +869,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); auto pass_config = manager.get_pass_config(); - pass_config->set_callback([=](const_node_ptr& root) -> bool { - if (!root->get_input_node_ptr(0)->get_input_partial_shape(0).is_static()) { - return false; - } - const auto& gamma_shape = root->get_input_node_ptr(0)->get_input_partial_shape(0).to_shape(); - const int32_t vec_size = 8; - return static_cast((gamma_shape.back() / vec_size)) > static_cast(device_info.max_work_group_size); - }); - - manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(device_info.supports_immad); @@ -930,7 +934,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->verbose >= 1) { manager.register_pass(); } - manager.run_passes(func); } }