From 4a75747b5b9b779aedb300b3344a0d4c10403c1d Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Fri, 17 Jan 2025 11:34:24 +0100 Subject: [PATCH 1/6] [Snippets] Add debug caps for dumping snippets parameters --- .../debug_capabilities/parameters_dump.md | 19 ++ .../lowered/pass/brgemm_debug_params.hpp | 171 ++++++++++++++++++ .../include/snippets/op/perf_count.hpp | 13 +- .../snippets/utils/debug_caps_config.hpp | 9 + .../snippets/include/snippets/utils/utils.hpp | 9 + .../snippets/src/lowered/expression.cpp | 16 +- src/common/snippets/src/op/perf_count.cpp | 54 +++++- .../snippets/src/utils/debug_caps_config.cpp | 3 + src/common/snippets/src/utils/utils.cpp | 12 ++ src/plugins/intel_cpu/src/nodes/subgraph.cpp | 8 + 10 files changed, 295 insertions(+), 19 deletions(-) create mode 100644 src/common/snippets/docs/debug_capabilities/parameters_dump.md create mode 100644 src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp diff --git a/src/common/snippets/docs/debug_capabilities/parameters_dump.md b/src/common/snippets/docs/debug_capabilities/parameters_dump.md new file mode 100644 index 00000000000000..26b49357dbabfd --- /dev/null +++ b/src/common/snippets/docs/debug_capabilities/parameters_dump.md @@ -0,0 +1,19 @@ +# Snippet parameters dump + +Snippet parameters can be captured during the pass flow, which can be useful for debugging and optimization purposes. + +To turn on snippet parameters dump feature, the following environment variable should be used: +```sh + OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=" binary ... +``` + +Examples: +```sh + OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=brgemm.csv" binary ... +``` + +Output example: +| subgraph_name | name | in_type | out_type | in_shapes | out_shapes | in_layouts | out_layouts | M | N | K | m_block | n_block | k_block | acc_max_time | avg_max_time | +|--------------------|------------|-------------|----------|-------------------------------------|----------------------|--------------------------|-------------|-----|-----|-----|---------|----------|----------|---------------|---------------| +| FakeQuantitze_457 | MatMul_438 | i8;i8;f32 | i32 | 1 16 128 64;1 16 64 128;1 16 64 128 | 1 16 128 128 | 0 2 1 3;0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 128 | 64 | 32 | FULL_DIM | FULL_DIM | 41482 | 5185 | +| FakeQuantitze_457 | MatMul_452 | u8;i8 | i32 | 1 16 128 128;1 16 128 64 | 1 16 128 64 | 0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 64 | 128 | 32 | FULL_DIM | FULL_DIM | 39427 | 4928 | diff --git a/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp new file mode 100644 index 00000000000000..6f559cbe4ad69f --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp @@ -0,0 +1,171 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef SNIPPETS_DEBUG_CAPS + +#pragma once + +#include "snippets/itt.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/specific_loop_iter_handlers.hpp" +#include "snippets/lowered/pass/iter_handler.hpp" +#include "snippets/op/brgemm.hpp" +#include "snippets/utils/utils.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface BrgemmDebugParams + * @brief Brgemm parameters dump pass + * @ingroup snippets + */ +template ::value, bool>::type = true> +class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { +public: + BrgemmDebugParams(const std::string& subgraph_name) : m_subgraph_name(subgraph_name) {} + OPENVINO_RTTI("BrgemmDebugParams", "", RangedPass); + + bool run(snippets::lowered::LinearIR& linear_ir, + snippets::lowered::LinearIR::constExprIt begin, + snippets::lowered::LinearIR::constExprIt end) override final { // NOLINT + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmDebugParams") + if (linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) { + return false; + } + static size_t seq_number = 0; + bool modified = false; + for (auto expr_it = begin; expr_it != end; expr_it++) { + const auto& brgemm_expr = *expr_it; + const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); + if (!brgemm) + continue; + // Collect brgemm parameters + auto params = collect_params(brgemm_expr, linear_ir); + const auto& perf_count_begin = std::make_shared(); + perf_count_begin->set_friendly_name(std::string("PerfCount_Begin_") + std::to_string(seq_number) + + "_DebugParams"); + const auto empty_inputs = std::vector{}; + linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it); + + const auto& perf_count_end = std::make_shared(perf_count_begin->output(0)); + perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) + + "_DebugParams"); + // Attach brgemm parameters to PerfCountEnd node + auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; + perf_count_end->get_rt_info()["brgemm_params"] = params; + perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path; + linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it)); + seq_number++; + modified = true; + } + return modified; + } + +private: + std::string collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr, + const snippets::lowered::LinearIR& linear_ir) { + auto debug_config = linear_ir.get_config().debug_config; + const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); + OPENVINO_ASSERT(brgemm, "Brgemm is nullptr!"); + std::stringstream ss; + ss << m_subgraph_name << ','; + ss << brgemm_expr->get_node()->get_friendly_name() << ','; + for (size_t i = 0; i < brgemm->get_input_size(); ++i) { + ss << brgemm->get_input_element_type(i); + if (i != brgemm->get_input_size() - 1) { + ss << ';'; + } + } + ss << ','; + for (size_t i = 0; i < brgemm->get_output_size(); ++i) { + ss << brgemm->get_output_element_type(i); + if (i != brgemm->get_output_size() - 1) { + ss << ';'; + } + } + ss << ','; + for (size_t i = 0; i < brgemm->inputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); + const auto& shape = ov::snippets::utils::get_planar_vdims(port_desc->get_shape(), port_desc->get_layout()); + ss << utils::tensor2str(shape, " "); + ss << ';'; + } + ss.seekp(-1, ss.cur); + ss << ','; + for (size_t i = 0; i < brgemm->outputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); + const auto& shape = + ov::snippets::utils::get_preordered_vdims(port_desc->get_shape(), port_desc->get_layout()); + ss << utils::tensor2str(shape, " "); + ss << ';'; + } + ss.seekp(-1, ss.cur); + ss << ','; + for (size_t i = 0; i < brgemm->inputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); + ss << utils::tensor2str(port_desc->get_layout(), " "); + ss << ';'; + } + ss << ','; + for (size_t i = 0; i < brgemm->outputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); + ss << utils::tensor2str(port_desc->get_layout(), " "); + ss << ';'; + } + ss << ','; + + const auto& in_0_desc = brgemm_expr->get_input_port_descriptor(0); + const auto& in_1_desc = brgemm_expr->get_input_port_descriptor(1); + const auto& out_desc = brgemm_expr->get_output_port_descriptor(0); + + const auto& in_0_planar_dims = + ov::snippets::utils::get_planar_vdims(in_0_desc->get_shape(), in_0_desc->get_layout()); + const auto& in_1_planar_dims = + ov::snippets::utils::get_planar_vdims(in_1_desc->get_shape(), in_1_desc->get_layout()); + const auto& out_preordered_dims = + ov::snippets::utils::get_preordered_vdims(out_desc->get_shape(), out_desc->get_layout()); + + const auto& m = *++out_preordered_dims.rbegin(); + const auto& n = *out_preordered_dims.rbegin(); + const auto& k0 = *in_0_planar_dims.rbegin(); + const auto& k1 = *++in_1_planar_dims.rbegin(); + size_t k = 0; + OPENVINO_ASSERT(utils::merge_dynamic_dim(k, k0, k1), + "Brgemm input descriptors have incompatible K dimension value."); + ss << static_cast(m) << ',' << static_cast(n) << ',' << static_cast(k) << ','; + + size_t m_block = in_0_desc->get_subtensor().front(); + size_t n_block = in_1_desc->get_subtensor().back(); + size_t k_block = out_desc->get_subtensor().back(); + + auto append_block_info = [&](size_t block) { + if (block == utils::get_full_dim_value()) { + ss << "FULL_DIM"; + } else if (block == utils::get_dynamic_value()) { + ss << "?"; + } else { + ss << block; + } + ss << ','; + }; + + append_block_info(m_block); + append_block_info(n_block); + append_block_info(k_block); + return ss.str(); + } + + std::string m_subgraph_name; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov + +#endif // SNIPPETS_DEBUG_CAPS diff --git a/src/common/snippets/include/snippets/op/perf_count.hpp b/src/common/snippets/include/snippets/op/perf_count.hpp index b6c8eb4264f1b5..144a0bd8b7be62 100644 --- a/src/common/snippets/include/snippets/op/perf_count.hpp +++ b/src/common/snippets/include/snippets/op/perf_count.hpp @@ -74,20 +74,25 @@ class PerfCountEnd : public PerfCountEndBase { public: OPENVINO_OP("PerfCountEnd", "SnippetsOpset", PerfCountEndBase); PerfCountEnd(const Output& pc_begin); - PerfCountEnd() = default; - ~PerfCountEnd() { - output_perf_count(); - } + PerfCountEnd(); + ~PerfCountEnd(); + void output_perf_count(); std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; void init_pc_begin(); void set_accumulated_time(); + void dump_brgemm_params_to_csv(); + private: ov::threading::ThreadLocal accumulation; ov::threading::ThreadLocal iteration; std::shared_ptr m_pc_begin = nullptr; + + static std::string brgemm_csv_path; + static std::map m_debug_params_map; + static size_t nodes_count; }; } // namespace op diff --git a/src/common/snippets/include/snippets/utils/debug_caps_config.hpp b/src/common/snippets/include/snippets/utils/debug_caps_config.hpp index 67f791009e5167..a8726395d5393c 100644 --- a/src/common/snippets/include/snippets/utils/debug_caps_config.hpp +++ b/src/common/snippets/include/snippets/utils/debug_caps_config.hpp @@ -60,6 +60,15 @@ class DebugCapsConfig { } } dumpLIR; + struct : PropertyGroup { + std::string csv_path; + std::vector getPropertySetters() override { + return { + PropertySetterPtr(new StringPropertySetter("path", csv_path, "path to dumped brgemm params")), + }; + } + } dumpParams; + // Snippets performance count mode // Disabled - default, w/o perf count for snippets // Chrono - perf count with chrono call. This is a universal method, and support multi-thread case to output perf diff --git a/src/common/snippets/include/snippets/utils/utils.hpp b/src/common/snippets/include/snippets/utils/utils.hpp index dc480c4a81e3f9..c450e24573b66e 100644 --- a/src/common/snippets/include/snippets/utils/utils.hpp +++ b/src/common/snippets/include/snippets/utils/utils.hpp @@ -324,6 +324,15 @@ void visit_path(const lowered::ExpressionPtr& expr, std::function func, bool visit_parent_path); +/** + * @brief Converts a tensor to a string representation. + * Each value in the tensor is converted to a string. If the value is a full dimension, it is represented as + * "FULL_DIM". If the value is dynamic, it is represented as "?". + * @param tensor The tensor to be converted to a string. + * @return A string representation of the tensor. + */ +std::string tensor2str(const VectorDims& tensor, const std::string& delimiter = ", "); + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/src/lowered/expression.cpp b/src/common/snippets/src/lowered/expression.cpp index 245470ae1a48af..7389990cba54dc 100644 --- a/src/common/snippets/src/lowered/expression.cpp +++ b/src/common/snippets/src/lowered/expression.cpp @@ -170,18 +170,6 @@ ExpressionPtr Expression::clone() const { } bool Expression::visit_attributes(AttributeVisitor &visitor) { - auto subtensor2str = [](const VectorDims& subtensor) { - std::stringstream ss; - for (size_t i = 0; i < subtensor.size(); ++i) { - const auto& v = subtensor[i]; - const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" : - utils::is_dynamic_value(v) ? "?" : std::to_string(v); - const auto del = i < subtensor.size() - 1 ? ", " : ""; - ss << v_str << del; - } - return ss.str(); - }; - std::ostringstream in_regs, out_regs; std::vector> shapes; std::vector> subtensors; @@ -194,7 +182,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) { const auto& subtensor = desc->get_subtensor(); if (!subtensor.empty()) - subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor)); + subtensors.emplace_back("in_subtensor_" + std::to_string(i), utils::tensor2str(subtensor)); const auto& layout = desc->get_layout(); if (!layout.empty() && !utils::is_planar_layout(layout)) @@ -210,7 +198,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) { const auto& subtensor = desc->get_subtensor(); if (!subtensor.empty()) - subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor)); + subtensors.emplace_back("out_subtensor_" + std::to_string(i), utils::tensor2str(subtensor)); const auto& layout = desc->get_layout(); if (!layout.empty() && !utils::is_planar_layout(layout)) diff --git a/src/common/snippets/src/op/perf_count.cpp b/src/common/snippets/src/op/perf_count.cpp index 45ed4018751676..81094a4b4babfe 100644 --- a/src/common/snippets/src/op/perf_count.cpp +++ b/src/common/snippets/src/op/perf_count.cpp @@ -3,6 +3,8 @@ // #ifdef SNIPPETS_DEBUG_CAPS +#include + #include "snippets/op/perf_count.hpp" namespace ov { @@ -62,9 +64,30 @@ void PerfCountBegin::set_start_time() { } //////////////////PerfCountEnd/////////////// -PerfCountEnd::PerfCountEnd(const Output& pc_begin) : PerfCountEndBase({pc_begin}), accumulation(0ul), iteration(0u) { + +size_t PerfCountEnd::nodes_count = 0; +std::map PerfCountEnd::m_debug_params_map; +std::string PerfCountEnd::brgemm_csv_path; // NOLINT + +PerfCountEnd::PerfCountEnd() : PerfCountEndBase() { + ++nodes_count; +} + +PerfCountEnd::PerfCountEnd(const Output& pc_begin) + : PerfCountEndBase({pc_begin}), + accumulation(0ul), + iteration(0u) { constructor_validate_and_infer_types(); init_pc_begin(); + ++nodes_count; +} + +PerfCountEnd::~PerfCountEnd() { + output_perf_count(); + --nodes_count; + if (nodes_count == 0) { + dump_brgemm_params_to_csv(); + } } std::shared_ptr PerfCountEnd::clone_with_new_inputs(const OutputVector& inputs) const { @@ -109,6 +132,35 @@ void PerfCountEnd::output_perf_count() { std::cout << "max accumulated time:" << acc_max << "ns" << std::endl; // max avg std::cout << "max avg time:" << avg_max << "ns" << std::endl; + + // Dump brgemm debug parameters to csv file + if (acc_max != 0 && avg_max != 0 && get_friendly_name().find("_DebugParams") != std::string::npos) { + const auto& rt_info = get_rt_info(); + auto brgemm_params_it = rt_info.find("brgemm_params"); + if (brgemm_params_it == rt_info.end()) { + return; + } + if (brgemm_csv_path.empty()) { + auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path"); + brgemm_csv_path = brgemm_csv_path_it->second.as(); + } + m_debug_params_map[get_friendly_name()] = + brgemm_params_it->second.as() + std::to_string(acc_max) + ',' + std::to_string(avg_max); + } +} + +void PerfCountEnd::dump_brgemm_params_to_csv() { + if (m_debug_params_map.empty() || brgemm_csv_path.empty()) { + return; + } + std::ofstream csv_file(brgemm_csv_path); + OPENVINO_ASSERT(csv_file.is_open(), "Failed to open csv file for brgemm debug parameters."); + csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_block,k_block,acc_max_time," + "avg_max_time\n"; + for (const auto& [_, params] : m_debug_params_map) { + csv_file << params << '\n'; + } + csv_file.close(); } } // namespace op diff --git a/src/common/snippets/src/utils/debug_caps_config.cpp b/src/common/snippets/src/utils/debug_caps_config.cpp index b80795b12912cd..5f8b58e1aff45b 100644 --- a/src/common/snippets/src/utils/debug_caps_config.cpp +++ b/src/common/snippets/src/utils/debug_caps_config.cpp @@ -22,6 +22,9 @@ void DebugCapsConfig::readProperties() { dumpLIR.parseAndSet(envVarValue); OPENVINO_ASSERT(!dumpLIR.passes.empty(), "Passes option in OV_SNIPPETS_DUMP_LIR must be provided."); } + if ((envVarValue = readEnv("OV_SNIPPETS_DUMP_BRGEMM_PARAMS"))) { + dumpParams.parseAndSet(envVarValue); + } } void DebugCapsConfig::PropertyGroup::parseAndSet(const std::string& str) { diff --git a/src/common/snippets/src/utils/utils.cpp b/src/common/snippets/src/utils/utils.cpp index 8f815a88dc56ff..5f6f8033c10f8f 100644 --- a/src/common/snippets/src/utils/utils.cpp +++ b/src/common/snippets/src/utils/utils.cpp @@ -368,6 +368,18 @@ void visit_path(const lowered::ExpressionPtr& expr, } } +std::string tensor2str(const VectorDims& tensor, const std::string& delimiter) { + std::stringstream ss; + for (size_t i = 0; i < tensor.size(); ++i) { + const auto& v = tensor[i]; + const auto v_str = + utils::is_full_dim_value(v) ? "FULL_DIM" : utils::is_dynamic_value(v) ? "?" : std::to_string(v); + const auto del = i < tensor.size() - 1 ? delimiter : ""; + ss << v_str << del; + } + return ss.str(); +} + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 43a005b27cb450..4dc8dd8ef49951 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -8,6 +8,7 @@ #include "onednn/dnnl.h" #include "openvino/core/parallel.hpp" #include "shape_inference/custom/subgraph.hpp" +#include "snippets/lowered/pass/brgemm_debug_params.hpp" #include "snippets/lowered/pass/init_loops.hpp" #include "snippets/lowered/pass/insert_buffers.hpp" #include "snippets/lowered/pass/insert_loops.hpp" @@ -525,6 +526,13 @@ Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { ov::snippets::lowered::pass::MarkLoops, ov::intel_cpu::pass::BrgemmCPUBlocking); +#ifdef SNIPPETS_DEBUG_CAPS + SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, + ov::intel_cpu::pass::BrgemmCPUBlocking, + ov::snippets::lowered::pass::BrgemmDebugParams, + getName()); +#endif // SNIPPETS_DEBUG_CAPS + SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, ov::snippets::lowered::pass::InitLoops, ov::intel_cpu::pass::AdjustBrgemmCopyBLoopPorts); From e8387b7ec2d7f0ff6897023bc7894fbf3829af9b Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Fri, 17 Jan 2025 12:40:27 +0100 Subject: [PATCH 2/6] validation --- src/common/snippets/src/lowered/pass/validate.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/common/snippets/src/lowered/pass/validate.cpp b/src/common/snippets/src/lowered/pass/validate.cpp index 5e6f31ae3f80ea..3ab2d91164dc0e 100644 --- a/src/common/snippets/src/lowered/pass/validate.cpp +++ b/src/common/snippets/src/lowered/pass/validate.cpp @@ -153,9 +153,14 @@ bool Validate::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lo if (found != m_validation_map.cend()) { (found->second)(expr, linear_ir); } - OPENVINO_ASSERT(expr->get_output_count() == node->get_output_size() || - ov::is_type(node) || - ov::is_type(node), "Incorrect count of output port descriptors!"); + bool bypass_output_size_check = +#ifdef SNIPPETS_DEBUG_CAPS + ov::is_type(node) || ov::is_type(node) || +#endif // SNIPPETS_DEBUG_CAPS + ov::is_type(node) || ov::is_type(node); + + OPENVINO_ASSERT(expr->get_output_count() == node->get_output_size() || bypass_output_size_check, + "Incorrect count of output port descriptors!"); expr->validate(); // Loop expr doesn't have shapes and layouts if (!ov::is_type(node)) From 06eee03067117bab48eef6bac7971b7b0962024f Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Fri, 24 Jan 2025 19:16:17 +0100 Subject: [PATCH 3/6] small fixes --- .../snippets/docs/debug_capabilities/parameters_dump.md | 5 +++-- .../include/snippets/lowered/pass/brgemm_debug_params.hpp | 3 +-- src/common/snippets/src/op/perf_count.cpp | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/common/snippets/docs/debug_capabilities/parameters_dump.md b/src/common/snippets/docs/debug_capabilities/parameters_dump.md index 26b49357dbabfd..40fa46a55d1d9a 100644 --- a/src/common/snippets/docs/debug_capabilities/parameters_dump.md +++ b/src/common/snippets/docs/debug_capabilities/parameters_dump.md @@ -1,8 +1,8 @@ # Snippet parameters dump -Snippet parameters can be captured during the pass flow, which can be useful for debugging and optimization purposes. +The pass dumps selected properties of some performance-critical operations in Subgraphs. Only MatMuls are currently supported by this pass. -To turn on snippet parameters dump feature, the following environment variable should be used: +To turn on snippet properties dump feature, the following environment variable should be used: ```sh OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=" binary ... ``` @@ -13,6 +13,7 @@ Examples: ``` Output example: + | subgraph_name | name | in_type | out_type | in_shapes | out_shapes | in_layouts | out_layouts | M | N | K | m_block | n_block | k_block | acc_max_time | avg_max_time | |--------------------|------------|-------------|----------|-------------------------------------|----------------------|--------------------------|-------------|-----|-----|-----|---------|----------|----------|---------------|---------------| | FakeQuantitze_457 | MatMul_438 | i8;i8;f32 | i32 | 1 16 128 64;1 16 64 128;1 16 64 128 | 1 16 128 128 | 0 2 1 3;0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 128 | 64 | 32 | FULL_DIM | FULL_DIM | 41482 | 5185 | diff --git a/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp index 6f559cbe4ad69f..96dc258dc05fa8 100644 --- a/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp @@ -39,6 +39,7 @@ class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { } static size_t seq_number = 0; bool modified = false; + auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; for (auto expr_it = begin; expr_it != end; expr_it++) { const auto& brgemm_expr = *expr_it; const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); @@ -56,7 +57,6 @@ class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) + "_DebugParams"); // Attach brgemm parameters to PerfCountEnd node - auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; perf_count_end->get_rt_info()["brgemm_params"] = params; perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path; linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it)); @@ -69,7 +69,6 @@ class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { private: std::string collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr, const snippets::lowered::LinearIR& linear_ir) { - auto debug_config = linear_ir.get_config().debug_config; const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); OPENVINO_ASSERT(brgemm, "Brgemm is nullptr!"); std::stringstream ss; diff --git a/src/common/snippets/src/op/perf_count.cpp b/src/common/snippets/src/op/perf_count.cpp index 81094a4b4babfe..f27eb355ba10d2 100644 --- a/src/common/snippets/src/op/perf_count.cpp +++ b/src/common/snippets/src/op/perf_count.cpp @@ -142,7 +142,9 @@ void PerfCountEnd::output_perf_count() { } if (brgemm_csv_path.empty()) { auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path"); - brgemm_csv_path = brgemm_csv_path_it->second.as(); + if (brgemm_csv_path_it != rt_info.end()) { + brgemm_csv_path = brgemm_csv_path_it->second.as(); + } } m_debug_params_map[get_friendly_name()] = brgemm_params_it->second.as() + std::to_string(acc_max) + ',' + std::to_string(avg_max); From f9790ce7059781160b4aa13640d3607afe149a20 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 27 Jan 2025 09:08:10 +0100 Subject: [PATCH 4/6] remove template --- .../lowered/pass/brgemm_debug_params.hpp | 127 +-------------- .../src/lowered/pass/brgemm_debug_params.cpp | 154 ++++++++++++++++++ src/plugins/intel_cpu/src/nodes/subgraph.cpp | 2 +- 3 files changed, 157 insertions(+), 126 deletions(-) create mode 100644 src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp diff --git a/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp index 96dc258dc05fa8..294d45467ed72e 100644 --- a/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/brgemm_debug_params.hpp @@ -23,8 +23,6 @@ namespace pass { * @brief Brgemm parameters dump pass * @ingroup snippets */ -template ::value, bool>::type = true> class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { public: BrgemmDebugParams(const std::string& subgraph_name) : m_subgraph_name(subgraph_name) {} @@ -32,132 +30,11 @@ class BrgemmDebugParams : public snippets::lowered::pass::RangedPass { bool run(snippets::lowered::LinearIR& linear_ir, snippets::lowered::LinearIR::constExprIt begin, - snippets::lowered::LinearIR::constExprIt end) override final { // NOLINT - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmDebugParams") - if (linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) { - return false; - } - static size_t seq_number = 0; - bool modified = false; - auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; - for (auto expr_it = begin; expr_it != end; expr_it++) { - const auto& brgemm_expr = *expr_it; - const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); - if (!brgemm) - continue; - // Collect brgemm parameters - auto params = collect_params(brgemm_expr, linear_ir); - const auto& perf_count_begin = std::make_shared(); - perf_count_begin->set_friendly_name(std::string("PerfCount_Begin_") + std::to_string(seq_number) + - "_DebugParams"); - const auto empty_inputs = std::vector{}; - linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it); - - const auto& perf_count_end = std::make_shared(perf_count_begin->output(0)); - perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) + - "_DebugParams"); - // Attach brgemm parameters to PerfCountEnd node - perf_count_end->get_rt_info()["brgemm_params"] = params; - perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path; - linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it)); - seq_number++; - modified = true; - } - return modified; - } + snippets::lowered::LinearIR::constExprIt end) override; private: std::string collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr, - const snippets::lowered::LinearIR& linear_ir) { - const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); - OPENVINO_ASSERT(brgemm, "Brgemm is nullptr!"); - std::stringstream ss; - ss << m_subgraph_name << ','; - ss << brgemm_expr->get_node()->get_friendly_name() << ','; - for (size_t i = 0; i < brgemm->get_input_size(); ++i) { - ss << brgemm->get_input_element_type(i); - if (i != brgemm->get_input_size() - 1) { - ss << ';'; - } - } - ss << ','; - for (size_t i = 0; i < brgemm->get_output_size(); ++i) { - ss << brgemm->get_output_element_type(i); - if (i != brgemm->get_output_size() - 1) { - ss << ';'; - } - } - ss << ','; - for (size_t i = 0; i < brgemm->inputs().size(); ++i) { - const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); - const auto& shape = ov::snippets::utils::get_planar_vdims(port_desc->get_shape(), port_desc->get_layout()); - ss << utils::tensor2str(shape, " "); - ss << ';'; - } - ss.seekp(-1, ss.cur); - ss << ','; - for (size_t i = 0; i < brgemm->outputs().size(); ++i) { - const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); - const auto& shape = - ov::snippets::utils::get_preordered_vdims(port_desc->get_shape(), port_desc->get_layout()); - ss << utils::tensor2str(shape, " "); - ss << ';'; - } - ss.seekp(-1, ss.cur); - ss << ','; - for (size_t i = 0; i < brgemm->inputs().size(); ++i) { - const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); - ss << utils::tensor2str(port_desc->get_layout(), " "); - ss << ';'; - } - ss << ','; - for (size_t i = 0; i < brgemm->outputs().size(); ++i) { - const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); - ss << utils::tensor2str(port_desc->get_layout(), " "); - ss << ';'; - } - ss << ','; - - const auto& in_0_desc = brgemm_expr->get_input_port_descriptor(0); - const auto& in_1_desc = brgemm_expr->get_input_port_descriptor(1); - const auto& out_desc = brgemm_expr->get_output_port_descriptor(0); - - const auto& in_0_planar_dims = - ov::snippets::utils::get_planar_vdims(in_0_desc->get_shape(), in_0_desc->get_layout()); - const auto& in_1_planar_dims = - ov::snippets::utils::get_planar_vdims(in_1_desc->get_shape(), in_1_desc->get_layout()); - const auto& out_preordered_dims = - ov::snippets::utils::get_preordered_vdims(out_desc->get_shape(), out_desc->get_layout()); - - const auto& m = *++out_preordered_dims.rbegin(); - const auto& n = *out_preordered_dims.rbegin(); - const auto& k0 = *in_0_planar_dims.rbegin(); - const auto& k1 = *++in_1_planar_dims.rbegin(); - size_t k = 0; - OPENVINO_ASSERT(utils::merge_dynamic_dim(k, k0, k1), - "Brgemm input descriptors have incompatible K dimension value."); - ss << static_cast(m) << ',' << static_cast(n) << ',' << static_cast(k) << ','; - - size_t m_block = in_0_desc->get_subtensor().front(); - size_t n_block = in_1_desc->get_subtensor().back(); - size_t k_block = out_desc->get_subtensor().back(); - - auto append_block_info = [&](size_t block) { - if (block == utils::get_full_dim_value()) { - ss << "FULL_DIM"; - } else if (block == utils::get_dynamic_value()) { - ss << "?"; - } else { - ss << block; - } - ss << ','; - }; - - append_block_info(m_block); - append_block_info(n_block); - append_block_info(k_block); - return ss.str(); - } + const snippets::lowered::LinearIR& linear_ir); std::string m_subgraph_name; }; diff --git a/src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp b/src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp new file mode 100644 index 00000000000000..7fbc129eedade7 --- /dev/null +++ b/src/common/snippets/src/lowered/pass/brgemm_debug_params.cpp @@ -0,0 +1,154 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef SNIPPETS_DEBUG_CAPS +#include "snippets/lowered/pass/brgemm_debug_params.hpp" + +#include "snippets/itt.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/pass/propagate_subtensors.hpp" +#include "snippets/lowered/pass/iter_handler.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/utils/utils.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +bool BrgemmDebugParams::run(snippets::lowered::LinearIR& linear_ir, + snippets::lowered::LinearIR::constExprIt begin, + snippets::lowered::LinearIR::constExprIt end) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmDebugParams") + if (linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) { + return false; + } + static size_t seq_number = 0; + bool modified = false; + auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; + for (auto expr_it = begin; expr_it != end; expr_it++) { + const auto& brgemm_expr = *expr_it; + const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); + if (!brgemm) + continue; + // Collect brgemm parameters + auto params = collect_params(brgemm_expr, linear_ir); + const auto& perf_count_begin = std::make_shared(); + perf_count_begin->set_friendly_name(std::string("PerfCount_Begin_") + std::to_string(seq_number) + + "_DebugParams"); + const auto empty_inputs = std::vector{}; + linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it); + + const auto& perf_count_end = std::make_shared(perf_count_begin->output(0)); + perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) + + "_DebugParams"); + // Attach brgemm parameters to PerfCountEnd node + perf_count_end->get_rt_info()["brgemm_params"] = params; + perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path; + linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it)); + seq_number++; + modified = true; + } + return modified; +} + +std::string BrgemmDebugParams::collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr, + const snippets::lowered::LinearIR& linear_ir) { + const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); + OPENVINO_ASSERT(brgemm, "Brgemm is nullptr!"); + std::stringstream ss; + ss << m_subgraph_name << ','; + ss << brgemm_expr->get_node()->get_friendly_name() << ','; + for (size_t i = 0; i < brgemm->get_input_size(); ++i) { + ss << brgemm->get_input_element_type(i); + if (i != brgemm->get_input_size() - 1) { + ss << ';'; + } + } + ss << ','; + for (size_t i = 0; i < brgemm->get_output_size(); ++i) { + ss << brgemm->get_output_element_type(i); + if (i != brgemm->get_output_size() - 1) { + ss << ';'; + } + } + ss << ','; + for (size_t i = 0; i < brgemm->inputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); + const auto& shape = ov::snippets::utils::get_planar_vdims(port_desc->get_shape(), port_desc->get_layout()); + ss << utils::tensor2str(shape, " "); + ss << ';'; + } + ss.seekp(-1, ss.cur); + ss << ','; + for (size_t i = 0; i < brgemm->outputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); + const auto& shape = ov::snippets::utils::get_preordered_vdims(port_desc->get_shape(), port_desc->get_layout()); + ss << utils::tensor2str(shape, " "); + ss << ';'; + } + ss.seekp(-1, ss.cur); + ss << ','; + for (size_t i = 0; i < brgemm->inputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_input_port_descriptor(i); + ss << utils::tensor2str(port_desc->get_layout(), " "); + ss << ';'; + } + ss << ','; + for (size_t i = 0; i < brgemm->outputs().size(); ++i) { + const auto& port_desc = brgemm_expr->get_output_port_descriptor(i); + ss << utils::tensor2str(port_desc->get_layout(), " "); + ss << ';'; + } + ss << ','; + + const auto& in_0_desc = brgemm_expr->get_input_port_descriptor(0); + const auto& in_1_desc = brgemm_expr->get_input_port_descriptor(1); + const auto& out_desc = brgemm_expr->get_output_port_descriptor(0); + + const auto& in_0_planar_dims = + ov::snippets::utils::get_planar_vdims(in_0_desc->get_shape(), in_0_desc->get_layout()); + const auto& in_1_planar_dims = + ov::snippets::utils::get_planar_vdims(in_1_desc->get_shape(), in_1_desc->get_layout()); + const auto& out_preordered_dims = + ov::snippets::utils::get_preordered_vdims(out_desc->get_shape(), out_desc->get_layout()); + + const auto& m = *++out_preordered_dims.rbegin(); + const auto& n = *out_preordered_dims.rbegin(); + const auto& k0 = *in_0_planar_dims.rbegin(); + const auto& k1 = *++in_1_planar_dims.rbegin(); + size_t k = 0; + OPENVINO_ASSERT(utils::merge_dynamic_dim(k, k0, k1), + "Brgemm input descriptors have incompatible K dimension value."); + ss << static_cast(m) << ',' << static_cast(n) << ',' << static_cast(k) << ','; + + size_t m_block = in_0_desc->get_subtensor().front(); + size_t n_block = in_1_desc->get_subtensor().back(); + size_t k_block = out_desc->get_subtensor().back(); + + auto append_block_info = [&](size_t block) { + if (block == utils::get_full_dim_value()) { + ss << "FULL_DIM"; + } else if (block == utils::get_dynamic_value()) { + ss << "?"; + } else { + ss << block; + } + ss << ','; + }; + + append_block_info(m_block); + append_block_info(n_block); + append_block_info(k_block); + return ss.str(); +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov + +#endif // SNIPPETS_DEBUG_CAPS diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 4dc8dd8ef49951..ba87ed7c13750f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -529,7 +529,7 @@ Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { #ifdef SNIPPETS_DEBUG_CAPS SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, ov::intel_cpu::pass::BrgemmCPUBlocking, - ov::snippets::lowered::pass::BrgemmDebugParams, + ov::snippets::lowered::pass::BrgemmDebugParams, getName()); #endif // SNIPPETS_DEBUG_CAPS From ad5254736082972be34d5430e090ec90745855f0 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 27 Jan 2025 10:04:02 +0100 Subject: [PATCH 5/6] take on extracting dumper --- .../include/snippets/op/perf_count.hpp | 40 ++++- src/common/snippets/src/op/perf_count.cpp | 151 ++++++++++-------- 2 files changed, 118 insertions(+), 73 deletions(-) diff --git a/src/common/snippets/include/snippets/op/perf_count.hpp b/src/common/snippets/include/snippets/op/perf_count.hpp index 144a0bd8b7be62..fa049e32b4a056 100644 --- a/src/common/snippets/include/snippets/op/perf_count.hpp +++ b/src/common/snippets/include/snippets/op/perf_count.hpp @@ -11,6 +11,37 @@ namespace ov { namespace snippets { + +namespace op { +class PerfCountEnd; +} // namespace op + +namespace utils { + +/** + * @interface PerfCountDumper + * @brief Dumper for node debug properties + * @ingroup snippets + */ +class Dumper { +public: + Dumper(); + ~Dumper(); + + void update(const op::PerfCountEnd* node, + ov::threading::ThreadLocal accumulation, + ov::threading::ThreadLocal iteration); + +private: + void dump_brgemm_params_to_csv(); + + static std::string brgemm_csv_path; + static std::map m_debug_params_map; + static size_t nodes_count; +}; + +} // namespace utils + namespace op { /** @@ -83,16 +114,13 @@ class PerfCountEnd : public PerfCountEndBase { void init_pc_begin(); void set_accumulated_time(); - void dump_brgemm_params_to_csv(); - private: + ov::threading::ThreadLocal accumulation; ov::threading::ThreadLocal iteration; - std::shared_ptr m_pc_begin = nullptr; - static std::string brgemm_csv_path; - static std::map m_debug_params_map; - static size_t nodes_count; + utils::Dumper csv_dumper; + std::shared_ptr m_pc_begin = nullptr; }; } // namespace op diff --git a/src/common/snippets/src/op/perf_count.cpp b/src/common/snippets/src/op/perf_count.cpp index f27eb355ba10d2..d70e215efa7e08 100644 --- a/src/common/snippets/src/op/perf_count.cpp +++ b/src/common/snippets/src/op/perf_count.cpp @@ -9,6 +9,88 @@ namespace ov { namespace snippets { + +//////////////////utils::Dumper/////////////// + +namespace utils { + +Dumper::Dumper() { + ++nodes_count; +} + +Dumper::~Dumper() { + --nodes_count; + if (nodes_count == 0) { + dump_brgemm_params_to_csv(); + } +} + +void Dumper::update(const op::PerfCountEnd* node, ov::threading::ThreadLocal accumulation, ov::threading::ThreadLocal iteration) { + OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node."); + auto iterator_iter = iteration.begin(); + auto iterator_acc = accumulation.begin(); + int t_num = 0; + uint64_t avg_max = 0; + std::cout << "Perf count data in perfCountEnd node with name " << node->get_friendly_name() << " is:" << std::endl; + for (; iterator_iter != iteration.end(); ++iterator_iter, ++iterator_acc) { + const auto iter = *iterator_iter; + const auto acc = *iterator_acc; + uint64_t avg = iter == 0 ? 0 : acc / iter; + if (avg > avg_max) + avg_max = avg; + std::cout << "accumulated time:" << acc << "ns, iteration:" << iter << " avg time:" << avg << "ns" << " on thread:" << t_num << std::endl; + t_num++; + } + + // max time of all threads: combine for reduce max + auto BinaryFunc = [](const uint64_t& a, const uint64_t& b) { + return a >= b ? a : b; + }; + + // max accumulation + uint64_t acc_max = accumulation.combine(BinaryFunc); + std::cout << "max accumulated time:" << acc_max << "ns" << std::endl; + // max avg + std::cout << "max avg time:" << avg_max << "ns" << std::endl; + + if (acc_max == 0 || avg_max == 0 || node->get_friendly_name().find("_DebugParams") == std::string::npos) { + return; + } + const auto& rt_info = node->get_rt_info(); + auto brgemm_params_it = rt_info.find("brgemm_params"); + if (brgemm_params_it == rt_info.end()) { + return; + } + if (brgemm_csv_path.empty()) { + auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path"); + if (brgemm_csv_path_it != rt_info.end()) { + brgemm_csv_path = brgemm_csv_path_it->second.as(); + } + } + m_debug_params_map[node->get_friendly_name()] = + brgemm_params_it->second.as() + std::to_string(acc_max) + ',' + std::to_string(avg_max); +} + +size_t Dumper::nodes_count = 0; +std::map Dumper::m_debug_params_map; +std::string Dumper::brgemm_csv_path; // NOLINT + +void Dumper::dump_brgemm_params_to_csv() { + if (m_debug_params_map.empty() || brgemm_csv_path.empty()) { + return; + } + std::ofstream csv_file(brgemm_csv_path); + OPENVINO_ASSERT(csv_file.is_open(), "Failed to open csv file for brgemm debug parameters."); + csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_block,k_block,acc_max_time," + "avg_max_time\n"; + for (const auto& [_, params] : m_debug_params_map) { + csv_file << params << '\n'; + } + csv_file.close(); +} + +} // namespace utils + namespace op { /////////////////PerfCountBeginBase///////////////// @@ -65,13 +147,8 @@ void PerfCountBegin::set_start_time() { //////////////////PerfCountEnd/////////////// -size_t PerfCountEnd::nodes_count = 0; -std::map PerfCountEnd::m_debug_params_map; -std::string PerfCountEnd::brgemm_csv_path; // NOLINT +PerfCountEnd::PerfCountEnd() : PerfCountEndBase() {} -PerfCountEnd::PerfCountEnd() : PerfCountEndBase() { - ++nodes_count; -} PerfCountEnd::PerfCountEnd(const Output& pc_begin) : PerfCountEndBase({pc_begin}), @@ -79,15 +156,10 @@ PerfCountEnd::PerfCountEnd(const Output& pc_begin) iteration(0u) { constructor_validate_and_infer_types(); init_pc_begin(); - ++nodes_count; } PerfCountEnd::~PerfCountEnd() { output_perf_count(); - --nodes_count; - if (nodes_count == 0) { - dump_brgemm_params_to_csv(); - } } std::shared_ptr PerfCountEnd::clone_with_new_inputs(const OutputVector& inputs) const { @@ -107,62 +179,7 @@ void PerfCountEnd::init_pc_begin() { } void PerfCountEnd::output_perf_count() { - OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node."); - auto iterator_iter = iteration.begin(); - auto iterator_acc = accumulation.begin(); - int t_num = 0; - uint64_t avg_max = 0; - std::cout << "Perf count data in perfCountEnd node with name " << get_friendly_name() << " is:"<< std::endl; - for (; iterator_iter != iteration.end(); ++iterator_iter, ++iterator_acc) { - const auto iter = *iterator_iter; - const auto acc = *iterator_acc; - uint64_t avg = iter == 0 ? 0 : acc / iter; - if (avg > avg_max) - avg_max = avg; - std::cout << "accumulated time:" << acc << "ns, iteration:" << iter << " avg time:" << avg << "ns"<< " on thread:" << t_num << std::endl; - t_num++; - } - - // max time of all threads: combine for reduce max - auto BinaryFunc = [](const uint64_t& a, const uint64_t& b) { - return a >= b ? a : b; - }; - // max accumulation - uint64_t acc_max = accumulation.combine(BinaryFunc); - std::cout << "max accumulated time:" << acc_max << "ns" << std::endl; - // max avg - std::cout << "max avg time:" << avg_max << "ns" << std::endl; - - // Dump brgemm debug parameters to csv file - if (acc_max != 0 && avg_max != 0 && get_friendly_name().find("_DebugParams") != std::string::npos) { - const auto& rt_info = get_rt_info(); - auto brgemm_params_it = rt_info.find("brgemm_params"); - if (brgemm_params_it == rt_info.end()) { - return; - } - if (brgemm_csv_path.empty()) { - auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path"); - if (brgemm_csv_path_it != rt_info.end()) { - brgemm_csv_path = brgemm_csv_path_it->second.as(); - } - } - m_debug_params_map[get_friendly_name()] = - brgemm_params_it->second.as() + std::to_string(acc_max) + ',' + std::to_string(avg_max); - } -} - -void PerfCountEnd::dump_brgemm_params_to_csv() { - if (m_debug_params_map.empty() || brgemm_csv_path.empty()) { - return; - } - std::ofstream csv_file(brgemm_csv_path); - OPENVINO_ASSERT(csv_file.is_open(), "Failed to open csv file for brgemm debug parameters."); - csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_block,k_block,acc_max_time," - "avg_max_time\n"; - for (const auto& [_, params] : m_debug_params_map) { - csv_file << params << '\n'; - } - csv_file.close(); + csv_dumper.update(this, accumulation, iteration); } } // namespace op From 867a53d92dc40ab690aa1d694857084417c01987 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 27 Jan 2025 10:24:13 +0100 Subject: [PATCH 6/6] ternary --- src/common/snippets/include/snippets/op/perf_count.hpp | 1 - src/common/snippets/src/utils/utils.cpp | 10 ++++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/common/snippets/include/snippets/op/perf_count.hpp b/src/common/snippets/include/snippets/op/perf_count.hpp index fa049e32b4a056..d89125c0daecaf 100644 --- a/src/common/snippets/include/snippets/op/perf_count.hpp +++ b/src/common/snippets/include/snippets/op/perf_count.hpp @@ -115,7 +115,6 @@ class PerfCountEnd : public PerfCountEndBase { void set_accumulated_time(); private: - ov::threading::ThreadLocal accumulation; ov::threading::ThreadLocal iteration; diff --git a/src/common/snippets/src/utils/utils.cpp b/src/common/snippets/src/utils/utils.cpp index 5f6f8033c10f8f..248ef989790f7a 100644 --- a/src/common/snippets/src/utils/utils.cpp +++ b/src/common/snippets/src/utils/utils.cpp @@ -372,8 +372,14 @@ std::string tensor2str(const VectorDims& tensor, const std::string& delimiter) { std::stringstream ss; for (size_t i = 0; i < tensor.size(); ++i) { const auto& v = tensor[i]; - const auto v_str = - utils::is_full_dim_value(v) ? "FULL_DIM" : utils::is_dynamic_value(v) ? "?" : std::to_string(v); + std::string v_str; + if (utils::is_full_dim_value(v)) { + v_str = "FULL_DIM"; + } else if (utils::is_dynamic_value(v)) { + v_str = "?"; + } else { + v_str = std::to_string(v); + } const auto del = i < tensor.size() - 1 ? delimiter : ""; ss << v_str << del; }