diff --git a/src/common/snippets/docs/debug_capabilities/parameters_dump.md b/src/common/snippets/docs/debug_capabilities/parameters_dump.md new file mode 100644 index 00000000000000..40fa46a55d1d9a --- /dev/null +++ b/src/common/snippets/docs/debug_capabilities/parameters_dump.md @@ -0,0 +1,20 @@ +# Snippet parameters dump + +The pass dumps selected properties of some performance-critical operations in Subgraphs. Only MatMuls are currently supported by this pass. + +To turn on snippet properties dump feature, the following environment variable should be used: +```sh + OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=" binary ... +``` + +Examples: +```sh + OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=brgemm.csv" binary ... +``` + +Output example: + +| subgraph_name | name | in_type | out_type | in_shapes | out_shapes | in_layouts | out_layouts | M | N | K | m_block | n_block | k_block | acc_max_time | avg_max_time | +|--------------------|------------|-------------|----------|-------------------------------------|----------------------|--------------------------|-------------|-----|-----|-----|---------|----------|----------|---------------|---------------| +| FakeQuantitze_457 | MatMul_438 | i8;i8;f32 | i32 | 1 16 128 64;1 16 64 128;1 16 64 128 | 1 16 128 128 | 0 2 1 3;0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 128 | 64 | 32 | FULL_DIM | FULL_DIM | 41482 | 5185 | +| FakeQuantitze_457 | MatMul_452 | u8;i8 | i32 | 1 16 128 128;1 16 128 64 | 1 16 128 64 | 0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 64 | 128 | 32 | FULL_DIM | FULL_DIM | 39427 | 4928 | diff --git a/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp new file mode 100644 index 00000000000000..294d45467ed72e --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef SNIPPETS_DEBUG_CAPS + +#pragma once + +#include "snippets/itt.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/specific_loop_iter_handlers.hpp" +#include "snippets/lowered/pass/iter_handler.hpp" +#include "snippets/op/brgemm.hpp" +#include "snippets/utils/utils.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface BrgemmDebugParams + * @brief Brgemm parameters dump pass + * @ingroup snippets + */ +class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { +public: + BrgemmDebugParams(const std::string& subgraph_name) : m_subgraph_name(subgraph_name) {} + OPENVINO_RTTI("BrgemmDebugParams", "", RangedPass); + + bool run(snippets::lowered::LinearIR& linear_ir, + snippets::lowered::LinearIR::constExprIt begin, + snippets::lowered::LinearIR::constExprIt end) override; + +private: + std::string collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr, + const snippets::lowered::LinearIR& linear_ir); + + std::string m_subgraph_name; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov + +#endif // SNIPPETS_DEBUG_CAPS diff --git a/src/common/snippets/include/snippets/op/perf_count.hpp b/src/common/snippets/include/snippets/op/perf_count.hpp index b6c8eb4264f1b5..d89125c0daecaf 100644 --- a/src/common/snippets/include/snippets/op/perf_count.hpp +++ b/src/common/snippets/include/snippets/op/perf_count.hpp @@ -11,6 +11,37 @@ namespace ov { namespace snippets { + +namespace op { +class PerfCountEnd; +} // namespace op + +namespace utils { + +/** + * @interface PerfCountDumper + * @brief Dumper for node debug properties + * @ingroup snippets + */ +class Dumper { +public: + Dumper(); + ~Dumper(); + + void update(const op::PerfCountEnd* node, + ov::threading::ThreadLocal accumulation, + ov::threading::ThreadLocal iteration); + +private: + void dump_brgemm_params_to_csv(); + + static std::string brgemm_csv_path; + static std::map m_debug_params_map; + static size_t nodes_count; +}; + +} // namespace utils + namespace op { /** @@ -74,10 +105,9 @@ class PerfCountEnd : public PerfCountEndBase { public: OPENVINO_OP("PerfCountEnd", "SnippetsOpset", PerfCountEndBase); PerfCountEnd(const Output& pc_begin); - PerfCountEnd() = default; - ~PerfCountEnd() { - output_perf_count(); - } + PerfCountEnd(); + ~PerfCountEnd(); + void output_perf_count(); std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; @@ -87,6 +117,8 @@ class PerfCountEnd : public PerfCountEndBase { private: ov::threading::ThreadLocal accumulation; ov::threading::ThreadLocal iteration; + + utils::Dumper csv_dumper; std::shared_ptr m_pc_begin = nullptr; }; diff --git a/src/common/snippets/include/snippets/utils/debug_caps_config.hpp b/src/common/snippets/include/snippets/utils/debug_caps_config.hpp index 67f791009e5167..a8726395d5393c 100644 --- a/src/common/snippets/include/snippets/utils/debug_caps_config.hpp +++ b/src/common/snippets/include/snippets/utils/debug_caps_config.hpp @@ -60,6 +60,15 @@ class DebugCapsConfig { } } dumpLIR; + struct : PropertyGroup { + std::string csv_path; + std::vector getPropertySetters() override { + return { + PropertySetterPtr(new StringPropertySetter("path", csv_path, "path to dumped brgemm params")), + }; + } + } dumpParams; + // Snippets performance count mode // Disabled - default, w/o perf count for snippets // Chrono - perf count with chrono call. This is a universal method, and support multi-thread case to output perf diff --git a/src/common/snippets/include/snippets/utils/utils.hpp b/src/common/snippets/include/snippets/utils/utils.hpp index dc480c4a81e3f9..c450e24573b66e 100644 --- a/src/common/snippets/include/snippets/utils/utils.hpp +++ b/src/common/snippets/include/snippets/utils/utils.hpp @@ -324,6 +324,15 @@ void visit_path(const lowered::ExpressionPtr& expr, std::function func, bool visit_parent_path); +/** + * @brief Converts a tensor to a string representation. + * Each value in the tensor is converted to a string. If the value is a full dimension, it is represented as + * "FULL_DIM". If the value is dynamic, it is represented as "?". + * @param tensor The tensor to be converted to a string. + * @return A string representation of the tensor. + */ +std::string tensor2str(const VectorDims& tensor, const std::string& delimiter = ", "); + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 245470ae1a48af..7389990cba54dc 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -170,18 +170,6 @@ ExpressionPtr Expression::clone() const { } bool Expression::visit_attributes(AttributeVisitor &visitor) { - auto subtensor2str = [](const VectorDims& subtensor) { - std::stringstream ss; - for (size_t i = 0; i < subtensor.size(); ++i) { - const auto& v = subtensor[i]; - const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" : - utils::is_dynamic_value(v) ? "?" : std::to_string(v); - const auto del = i < subtensor.size() - 1 ? ", " : ""; - ss << v_str << del; - } - return ss.str(); - }; - std::ostringstream in_regs, out_regs; std::vector> shapes; std::vector> subtensors; @@ -194,7 +182,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) { const auto& subtensor = desc->get_subtensor(); if (!subtensor.empty()) - subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor)); + subtensors.emplace_back("in_subtensor_" + std::to_string(i), utils::tensor2str(subtensor)); const auto& layout = desc->get_layout(); if (!layout.empty() && !utils::is_planar_layout(layout)) @@ -210,7 +198,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) { const auto& subtensor = desc->get_subtensor(); if (!subtensor.empty()) - subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor)); + subtensors.emplace_back("out_subtensor_" + std::to_string(i), utils::tensor2str(subtensor)); const auto& layout = desc->get_layout(); if (!layout.empty() && !utils::is_planar_layout(layout)) diff --git a/src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp b/src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp new file mode 100644 index 00000000000000..7fbc129eedade7 --- /dev/null +++ b/src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp @@ -0,0 +1,154 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef SNIPPETS_DEBUG_CAPS +#include "snippets/lowered/pass/brgemm_debug_params.hpp" + +#include "snippets/itt.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/pass/propagate_subtensors.hpp" +#include "snippets/lowered/pass/iter_handler.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/utils/utils.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +bool BrgemmDebugParams::run(snippets::lowered::LinearIR& linear_ir, + snippets::lowered::LinearIR::constExprIt begin, + snippets::lowered::LinearIR::constExprIt end) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmDebugParams") + if (linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) { + return false; + } + static size_t seq_number = 0; + bool modified = false; + auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; + for (auto expr_it = begin; expr_it != end; expr_it++) { + const auto& brgemm_expr = *expr_it; + const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); + if (!brgemm) + continue; + // Collect brgemm parameters + auto params = collect_params(brgemm_expr, linear_ir); + const auto& perf_count_begin = std::make_shared(); + perf_count_begin->set_friendly_name(std::string("PerfCount_Begin_") + std::to_string(seq_number) + + "_DebugParams"); + const auto empty_inputs = std::vector{}; + linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it); + + const auto& perf_count_end = std::make_shared(perf_count_begin->output(0)); + perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) + + "_DebugParams"); + // Attach brgemm parameters to PerfCountEnd node + perf_count_end->get_rt_info()["brgemm_params"] = params; + perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path; + linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it)); + seq_number++; + modified = true; + } + return modified; +} + +std::string BrgemmDebugParams::collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr, + const snippets::lowered::LinearIR& linear_ir) { + const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); + OPENVINO_ASSERT(brgemm, "Brgemm is nullptr!"); + std::stringstream ss; + ss << m_subgraph_name << ','; + ss << brgemm_expr->get_node()->get_friendly_name() << ','; + for (size_t i = 0; i < brgemm->get_input_size(); ++i) { + ss << brgemm->get_input_element_type(i); + if (i != brgemm->get_input_size() - 1) { + ss << ';'; + } + } + ss << ','; + for (size_t i = 0; i < brgemm->get_output_size(); ++i) { + ss << brgemm->get_output_element_type(i); + if (i != brgemm->get_output_size() - 1) { + ss << ';'; + } + } + ss << ','; + for (size_t i = 0; i < brgemm->inputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); + const auto& shape = ov::snippets::utils::get_planar_vdims(port_desc->get_shape(), port_desc->get_layout()); + ss << utils::tensor2str(shape, " "); + ss << ';'; + } + ss.seekp(-1, ss.cur); + ss << ','; + for (size_t i = 0; i < brgemm->outputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); + const auto& shape = ov::snippets::utils::get_preordered_vdims(port_desc->get_shape(), port_desc->get_layout()); + ss << utils::tensor2str(shape, " "); + ss << ';'; + } + ss.seekp(-1, ss.cur); + ss << ','; + for (size_t i = 0; i < brgemm->inputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); + ss << utils::tensor2str(port_desc->get_layout(), " "); + ss << ';'; + } + ss << ','; + for (size_t i = 0; i < brgemm->outputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); + ss << utils::tensor2str(port_desc->get_layout(), " "); + ss << ';'; + } + ss << ','; + + const auto& in_0_desc = brgemm_expr->get_input_port_descriptor(0); + const auto& in_1_desc = brgemm_expr->get_input_port_descriptor(1); + const auto& out_desc = brgemm_expr->get_output_port_descriptor(0); + + const auto& in_0_planar_dims = + ov::snippets::utils::get_planar_vdims(in_0_desc->get_shape(), in_0_desc->get_layout()); + const auto& in_1_planar_dims = + ov::snippets::utils::get_planar_vdims(in_1_desc->get_shape(), in_1_desc->get_layout()); + const auto& out_preordered_dims = + ov::snippets::utils::get_preordered_vdims(out_desc->get_shape(), out_desc->get_layout()); + + const auto& m = *++out_preordered_dims.rbegin(); + const auto& n = *out_preordered_dims.rbegin(); + const auto& k0 = *in_0_planar_dims.rbegin(); + const auto& k1 = *++in_1_planar_dims.rbegin(); + size_t k = 0; + OPENVINO_ASSERT(utils::merge_dynamic_dim(k, k0, k1), + "Brgemm input descriptors have incompatible K dimension value."); + ss << static_cast(m) << ',' << static_cast(n) << ',' << static_cast(k) << ','; + + size_t m_block = in_0_desc->get_subtensor().front(); + size_t n_block = in_1_desc->get_subtensor().back(); + size_t k_block = out_desc->get_subtensor().back(); + + auto append_block_info = [&](size_t block) { + if (block == utils::get_full_dim_value()) { + ss << "FULL_DIM"; + } else if (block == utils::get_dynamic_value()) { + ss << "?"; + } else { + ss << block; + } + ss << ','; + }; + + append_block_info(m_block); + append_block_info(n_block); + append_block_info(k_block); + return ss.str(); +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov + +#endif // SNIPPETS_DEBUG_CAPS diff --git a/src/common/snippets/src/lowered/pass/validate.cpp b/src/common/snippets/src/lowered/pass/validate.cpp index 5e6f31ae3f80ea..3ab2d91164dc0e 100644 --- a/src/common/snippets/src/lowered/pass/validate.cpp +++ b/src/common/snippets/src/lowered/pass/validate.cpp @@ -153,9 +153,14 @@ bool Validate::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lo if (found != m_validation_map.cend()) { (found->second)(expr, linear_ir); } - OPENVINO_ASSERT(expr->get_output_count() == node->get_output_size() || - ov::is_type(node) || - ov::is_type(node), "Incorrect count of output port descriptors!"); + bool bypass_output_size_check = +#ifdef SNIPPETS_DEBUG_CAPS + ov::is_type(node) || ov::is_type(node) || +#endif // SNIPPETS_DEBUG_CAPS + ov::is_type(node) || ov::is_type(node); + + OPENVINO_ASSERT(expr->get_output_count() == node->get_output_size() || bypass_output_size_check, + "Incorrect count of output port descriptors!"); expr->validate(); // Loop expr doesn't have shapes and layouts if (!ov::is_type(node)) diff --git a/src/common/snippets/src/op/perf_count.cpp b/src/common/snippets/src/op/perf_count.cpp index 45ed4018751676..d70e215efa7e08 100644 --- a/src/common/snippets/src/op/perf_count.cpp +++ b/src/common/snippets/src/op/perf_count.cpp @@ -3,10 +3,94 @@ // #ifdef SNIPPETS_DEBUG_CAPS +#include + #include "snippets/op/perf_count.hpp" namespace ov { namespace snippets { + +//////////////////utils::Dumper/////////////// + +namespace utils { + +Dumper::Dumper() { + ++nodes_count; +} + +Dumper::~Dumper() { + --nodes_count; + if (nodes_count == 0) { + dump_brgemm_params_to_csv(); + } +} + +void Dumper::update(const op::PerfCountEnd* node, ov::threading::ThreadLocal accumulation, ov::threading::ThreadLocal iteration) { + OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node."); + auto iterator_iter = iteration.begin(); + auto iterator_acc = accumulation.begin(); + int t_num = 0; + uint64_t avg_max = 0; + std::cout << "Perf count data in perfCountEnd node with name " << node->get_friendly_name() << " is:" << std::endl; + for (; iterator_iter != iteration.end(); ++iterator_iter, ++iterator_acc) { + const auto iter = *iterator_iter; + const auto acc = *iterator_acc; + uint64_t avg = iter == 0 ? 0 : acc / iter; + if (avg > avg_max) + avg_max = avg; + std::cout << "accumulated time:" << acc << "ns, iteration:" << iter << " avg time:" << avg << "ns" << " on thread:" << t_num << std::endl; + t_num++; + } + + // max time of all threads: combine for reduce max + auto BinaryFunc = [](const uint64_t& a, const uint64_t& b) { + return a >= b ? a : b; + }; + + // max accumulation + uint64_t acc_max = accumulation.combine(BinaryFunc); + std::cout << "max accumulated time:" << acc_max << "ns" << std::endl; + // max avg + std::cout << "max avg time:" << avg_max << "ns" << std::endl; + + if (acc_max == 0 || avg_max == 0 || node->get_friendly_name().find("_DebugParams") == std::string::npos) { + return; + } + const auto& rt_info = node->get_rt_info(); + auto brgemm_params_it = rt_info.find("brgemm_params"); + if (brgemm_params_it == rt_info.end()) { + return; + } + if (brgemm_csv_path.empty()) { + auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path"); + if (brgemm_csv_path_it != rt_info.end()) { + brgemm_csv_path = brgemm_csv_path_it->second.as(); + } + } + m_debug_params_map[node->get_friendly_name()] = + brgemm_params_it->second.as() + std::to_string(acc_max) + ',' + std::to_string(avg_max); +} + +size_t Dumper::nodes_count = 0; +std::map Dumper::m_debug_params_map; +std::string Dumper::brgemm_csv_path; // NOLINT + +void Dumper::dump_brgemm_params_to_csv() { + if (m_debug_params_map.empty() || brgemm_csv_path.empty()) { + return; + } + std::ofstream csv_file(brgemm_csv_path); + OPENVINO_ASSERT(csv_file.is_open(), "Failed to open csv file for brgemm debug parameters."); + csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_block,k_block,acc_max_time," + "avg_max_time\n"; + for (const auto& [_, params] : m_debug_params_map) { + csv_file << params << '\n'; + } + csv_file.close(); +} + +} // namespace utils + namespace op { /////////////////PerfCountBeginBase///////////////// @@ -62,11 +146,22 @@ void PerfCountBegin::set_start_time() { } //////////////////PerfCountEnd/////////////// -PerfCountEnd::PerfCountEnd(const Output& pc_begin) : PerfCountEndBase({pc_begin}), accumulation(0ul), iteration(0u) { + +PerfCountEnd::PerfCountEnd() : PerfCountEndBase() {} + + +PerfCountEnd::PerfCountEnd(const Output& pc_begin) + : PerfCountEndBase({pc_begin}), + accumulation(0ul), + iteration(0u) { constructor_validate_and_infer_types(); init_pc_begin(); } +PerfCountEnd::~PerfCountEnd() { + output_perf_count(); +} + std::shared_ptr PerfCountEnd::clone_with_new_inputs(const OutputVector& inputs) const { return std::make_shared(inputs.at(0)); } @@ -84,31 +179,7 @@ void PerfCountEnd::init_pc_begin() { } void PerfCountEnd::output_perf_count() { - OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node."); - auto iterator_iter = iteration.begin(); - auto iterator_acc = accumulation.begin(); - int t_num = 0; - uint64_t avg_max = 0; - std::cout << "Perf count data in perfCountEnd node with name " << get_friendly_name() << " is:"<< std::endl; - for (; iterator_iter != iteration.end(); ++iterator_iter, ++iterator_acc) { - const auto iter = *iterator_iter; - const auto acc = *iterator_acc; - uint64_t avg = iter == 0 ? 0 : acc / iter; - if (avg > avg_max) - avg_max = avg; - std::cout << "accumulated time:" << acc << "ns, iteration:" << iter << " avg time:" << avg << "ns"<< " on thread:" << t_num << std::endl; - t_num++; - } - - // max time of all threads: combine for reduce max - auto BinaryFunc = [](const uint64_t& a, const uint64_t& b) { - return a >= b ? a : b; - }; - // max accumulation - uint64_t acc_max = accumulation.combine(BinaryFunc); - std::cout << "max accumulated time:" << acc_max << "ns" << std::endl; - // max avg - std::cout << "max avg time:" << avg_max << "ns" << std::endl; + csv_dumper.update(this, accumulation, iteration); } } // namespace op diff --git a/src/common/snippets/src/utils/debug_caps_config.cpp b/src/common/snippets/src/utils/debug_caps_config.cpp index b80795b12912cd..5f8b58e1aff45b 100644 --- a/src/common/snippets/src/utils/debug_caps_config.cpp +++ b/src/common/snippets/src/utils/debug_caps_config.cpp @@ -22,6 +22,9 @@ void DebugCapsConfig::readProperties() { dumpLIR.parseAndSet(envVarValue); OPENVINO_ASSERT(!dumpLIR.passes.empty(), "Passes option in OV_SNIPPETS_DUMP_LIR must be provided."); } + if ((envVarValue = readEnv("OV_SNIPPETS_DUMP_BRGEMM_PARAMS"))) { + dumpParams.parseAndSet(envVarValue); + } } void DebugCapsConfig::PropertyGroup::parseAndSet(const std::string& str) { diff --git a/src/common/snippets/src/utils/utils.cpp b/src/common/snippets/src/utils/utils.cpp index 8f815a88dc56ff..248ef989790f7a 100644 --- a/src/common/snippets/src/utils/utils.cpp +++ b/src/common/snippets/src/utils/utils.cpp @@ -368,6 +368,24 @@ void visit_path(const lowered::ExpressionPtr& expr, } } +std::string tensor2str(const VectorDims& tensor, const std::string& delimiter) { + std::stringstream ss; + for (size_t i = 0; i < tensor.size(); ++i) { + const auto& v = tensor[i]; + std::string v_str; + if (utils::is_full_dim_value(v)) { + v_str = "FULL_DIM"; + } else if (utils::is_dynamic_value(v)) { + v_str = "?"; + } else { + v_str = std::to_string(v); + } + const auto del = i < tensor.size() - 1 ? delimiter : ""; + ss << v_str << del; + } + return ss.str(); +} + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 43a005b27cb450..ba87ed7c13750f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -8,6 +8,7 @@ #include "onednn/dnnl.h" #include "openvino/core/parallel.hpp" #include "shape_inference/custom/subgraph.hpp" +#include "snippets/lowered/pass/brgemm_debug_params.hpp" #include "snippets/lowered/pass/init_loops.hpp" #include "snippets/lowered/pass/insert_buffers.hpp" #include "snippets/lowered/pass/insert_loops.hpp" @@ -525,6 +526,13 @@ Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { ov::snippets::lowered::pass::MarkLoops, ov::intel_cpu::pass::BrgemmCPUBlocking); +#ifdef SNIPPETS_DEBUG_CAPS + SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, + ov::intel_cpu::pass::BrgemmCPUBlocking, + ov::snippets::lowered::pass::BrgemmDebugParams, + getName()); +#endif // SNIPPETS_DEBUG_CAPS + SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, ov::snippets::lowered::pass::InitLoops, ov::intel_cpu::pass::AdjustBrgemmCopyBLoopPorts);