From 094f1bc182acbefa03b4f00d667ebfb9bae751ca Mon Sep 17 00:00:00 2001 From: Mircea-Aurelian Dan Date: Wed, 25 Sep 2024 17:10:59 +0300 Subject: [PATCH 1/6] [`intel_npu`] [`DUPLICATE`] [`master`] [`reduce memory consumption`] Avoid creating a blob copy while exporting a compiled model (#26783) ### Details: - *Duplicates PR https://github.com/openvinotoolkit/openvino/pull/26754* - *Add support for new L0 API 1.7* - *Change return type of `getCompiledNetwork` to new custom `CompiledNetwork` container* ### Tickets: - *[151912](https://jira.devtools.intel.com/browse/CVS-151912)* --- .../src/al/include/intel_npu/al/icompiler.hpp | 32 +++++- .../src/backend/include/zero_types.hpp | 8 +- .../include/driver_compiler_adapter.hpp | 2 +- .../include/zero_compiler_in_driver.hpp | 21 +++- .../compiler/src/driver_compiler_adapter.cpp | 11 +- .../compiler/src/zero_compiler_in_driver.cpp | 106 ++++++++++++------ .../src/plugin/src/compiled_model.cpp | 19 ++-- .../intel_npu/thirdparty/level-zero-ext | 2 +- 8 files changed, 150 insertions(+), 51 deletions(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp index f3e96e8650b672..570e2057d9b5e5 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp @@ -151,6 +151,32 @@ struct NetworkDescription final { NetworkMetadata metadata; }; +/** + * @struct CompiledNetwork + * @brief Custom container for compiled network, used for export + * @var CompiledNetwork::data + * Pointer to the address of compiled network + * @var CompiledNetwork:size + * Size of the compiled network + * @var CompiledNetwork::ownedStorage + * Plugin owned compiled network storage that is required in case of a driver that + * doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network. + * @note It's unsafe to store either data or size outside of the compiled network object as its destructor + * would release the owning container + */ + +struct CompiledNetwork { + const uint8_t* data; + size_t size; + CompiledNetwork(const uint8_t* data, size_t size, std::vector storage) + : data(data), + size(size), + ownedStorage(std::move(storage)) {} + +private: + std::vector ownedStorage; +}; + /** * @interface ICompiler * @brief An interface to be implemented by a concrete compiler to provide @@ -203,8 +229,10 @@ class ICompiler : public std::enable_shared_from_this { // Driver compiler can use this to release graphHandle, if we do not have executor virtual void release([[maybe_unused]] std::shared_ptr networkDescription){}; - virtual std::vector getCompiledNetwork(std::shared_ptr networkDescription) { - return networkDescription->compiledNetwork; + virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) { + return CompiledNetwork(networkDescription.compiledNetwork.data(), + networkDescription.compiledNetwork.size(), + networkDescription.compiledNetwork); } protected: diff --git a/src/plugins/intel_npu/src/backend/include/zero_types.hpp b/src/plugins/intel_npu/src/backend/include/zero_types.hpp index b0dbef843868fe..834d66a45a80d9 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_types.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_types.hpp @@ -16,7 +16,7 @@ /** * @brief Last version of Table of Graph Extension functions used within plugin */ -using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_6_t; +using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t; /** * @brief Last version of the Command Queue functions used within plugin */ @@ -155,6 +155,12 @@ struct ze_graph_dditable_ext_decorator final { throwWhenUnsupported("pfnDeviceGetGraphProperties2", ZE_GRAPH_EXT_VERSION_1_6); return _impl->pfnDeviceGetGraphProperties2(hDevice, pDeviceGraphProperties); } + + // version 1.7 + ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) { + throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7); + return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary); + } }; /** diff --git a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp index 3f02cecd2b0f19..99de755e1c49aa 100644 --- a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp @@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler { void release(std::shared_ptr networkDescription) override; - std::vector getCompiledNetwork(std::shared_ptr networkDescription) override; + CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; private: /** diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index 108b48cf0c6f73..523fc87a7f9dd3 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -43,6 +43,11 @@ using SerializedIR = std::pair>; (std::is_same::value || std::is_same::value || \ std::is_same::value || std::is_same::value) +#define UseCopyForNativeBinary(T) \ + (std::is_same::value || std::is_same::value || \ + std::is_same::value || std::is_same::value || \ + std::is_same::value) + /** * Adapter to use CiD through ZeroAPI */ @@ -100,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler { void release(std::shared_ptr networkDescription) override; - std::vector getCompiledNetwork(std::shared_ptr networkDescription) override; + CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; private: NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; @@ -123,6 +128,20 @@ class LevelZeroCompilerInDriver final : public ICompiler { std::vector& inputs, std::vector& outputs) const; + template = true> + void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& blob, + uint8_t*& blobPtr, + size_t& blobSize) const; + + template = true> + void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& /* unusedBlob */, + uint8_t*& blobPtr, + size_t& blobSize) const; + template = true> ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr& model, const Config& config, diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index ceacd9cda037a5..1f2a23539a99f5 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -64,6 +64,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr>(driverHandle, + deviceHandle, + zeContext, + graph_ddi_table_ext); + break; default: apiAdapter = std::make_shared>(driverHandle, deviceHandle, @@ -109,10 +115,9 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr apiAdapter->release(std::move(networkDescription)); } -std::vector LevelZeroCompilerAdapter::getCompiledNetwork( - std::shared_ptr networkDescription) { +CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) { _logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)"); - return apiAdapter->getCompiledNetwork(std::move(networkDescription)); + return apiAdapter->getCompiledNetwork(networkDescription); } } // namespace driverCompilerAdapter diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index c1398d227820da..0e02bb48f3a4b7 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -363,46 +363,83 @@ void LevelZeroCompilerInDriver::release(std::shared_ptr -std::vector LevelZeroCompilerInDriver::getCompiledNetwork( - std::shared_ptr networkDescription) { - if (networkDescription->metadata.graphHandle != nullptr && networkDescription->compiledNetwork.size() == 0) { +template > +void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& blob, + uint8_t*& blobPtr, + size_t& blobSize) const { + // Get blob size first + auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr); + blob.resize(blobSize); + + OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, + "Failed to compile network. L0 pfnGetNativeBinary get blob size", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result), + ". ", + getLatestBuildError()); + + // Get blob data + result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data()); + + OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, + "Failed to compile network. L0 pfnGetNativeBinary get blob data", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result), + ". ", + getLatestBuildError()); + + blobPtr = blob.data(); +} + +template +template > +void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, + ze_graph_handle_t graphHandle, + std::vector& /* unusedBlob */, + uint8_t*& blobPtr, + size_t& blobSize) const { + // Get blob ptr and size + auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr); + + OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, + "Failed to compile network. L0 pfnGetNativeBinary get blob size", + " result: ", + ze_result_to_string(result), + ", code 0x", + std::hex, + uint64_t(result), + ". ", + getLatestBuildError()); +} + +template +CompiledNetwork LevelZeroCompilerInDriver::getCompiledNetwork( + const NetworkDescription& networkDescription) { + if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) { _logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle"); - ze_graph_handle_t graphHandle = static_cast(networkDescription->metadata.graphHandle); + ze_graph_handle_t graphHandle = static_cast(networkDescription.metadata.graphHandle); - // Get blob size first + uint8_t* blobPtr = nullptr; size_t blobSize = -1; + std::vector blob; + + getNativeBinary(_graphDdiTableExt, graphHandle, blob, blobPtr, blobSize); - auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr); - - OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, - "Failed to compile network. L0 pfnGetNativeBinary get blob size", - " result: ", - ze_result_to_string(result), - ", code 0x", - std::hex, - uint64_t(result), - ". ", - getLatestBuildError()); - - std::vector blob(blobSize); - // Get blob data - result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data()); - - OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS, - "Failed to compile network. L0 pfnGetNativeBinary get blob data", - " result: ", - ze_result_to_string(result), - ", code 0x", - std::hex, - uint64_t(result), - ". ", - getLatestBuildError()); _logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob"); - return blob; - } else { - _logger.info("return the blob from network description"); - return networkDescription->compiledNetwork; + return CompiledNetwork(blobPtr, blobSize, std::move(blob)); } + _logger.info("return the blob from network description"); + return CompiledNetwork(networkDescription.compiledNetwork.data(), + networkDescription.compiledNetwork.size(), + networkDescription.compiledNetwork); } template @@ -1201,6 +1238,7 @@ template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; +template class LevelZeroCompilerInDriver; } // namespace driverCompilerAdapter } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 914879feee359f..51ed0e2c5c4858 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -27,10 +27,11 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE = "Can't create infer request!\n" "Please make sure that the device is available. Only exports can be made."; -std::uint32_t hash(const std::vector& data) { +std::uint32_t hash(const intel_npu::CompiledNetwork& blob) { std::uint32_t result = 1171117u; - for (const auto& c : data) - result = ((result << 7) + result) + static_cast(c); + for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) { + result = ((result << 7) + result) + static_cast(*it); + } return result; } @@ -139,15 +140,17 @@ std::shared_ptr CompiledModel::create_sync_infer_request( void CompiledModel::export_model(std::ostream& stream) const { _logger.debug("CompiledModel::export_model"); - const auto&& blob = _compiler->getCompiledNetwork(_networkPtr); - stream.write(reinterpret_cast(blob.data()), blob.size()); - std::stringstream str; - str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob); - _logger.info(str.str().c_str()); + const auto blob = _compiler->getCompiledNetwork(*_networkPtr); + stream.write(reinterpret_cast(blob.data), blob.size); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); } else { + if (_logger.level() >= ov::log::Level::INFO) { + std::stringstream str; + str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob); + _logger.info(str.str().c_str()); + } _logger.info("Write blob to stream successfully."); } } diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index 16c85231a82ee1..816b5ce120096c 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit 16c85231a82ee1a0b06ed7ab7da3f411a0878ed7 +Subproject commit 816b5ce120096cbc115b56ed43f8a030eb420b19 From 416bfb44b054a69ed9e09622c8c7ec47c232f059 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 25 Sep 2024 17:10:42 +0200 Subject: [PATCH 2/6] [TESTS] Fix rerun mechanism in case of failed tracing (#26758) ### Details: - *Fix rerun mechanism in case of failed tracing* ### Tickets: - *CVS-153111* Co-authored-by: Andrei Kochin --- .../py_frontend_tests/test_torchvision_preprocessor.py | 2 +- tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py index 78bd4d526dddb3..ea731d4a7aefb9 100644 --- a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py +++ b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py @@ -36,7 +36,7 @@ def _infer_pipelines(test_input, preprocess_pipeline, input_channels=3): try: return _infer_pipelines_impl(test_input, preprocess_pipeline, input_channels) except RuntimeError as e: - if "builtin cannot be used as a value" in e: + if "builtin cannot be used as a value" in str(e): # This is a potentially sporadic issue print(f"An error occurred: {e}. Retrying...") retries += 1 diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 2481f1d65ef8fb..a2f54076de9d7f 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -76,7 +76,7 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti try: return self._test_impl(model, ref_net, kind, ie_device, precision, ir_version, infer_timeout, dynamic_shapes, **kwargs) except RuntimeError as e: - if "builtin cannot be used as a value" in e: + if "builtin cannot be used as a value" in str(e): # This is a potentially sporadic issue print(f"An error occurred: {e}. Retrying...") retries += 1 From 288c5f961e40bdfbb44e5765a424a29ab13ffe2b Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 25 Sep 2024 16:24:29 +0100 Subject: [PATCH 3/6] [NPUW] Support i4 patterns for compute pipeline (#26785) Following up with i4 patterns on https://github.com/openvinotoolkit/openvino/pull/25679 --- .../npuw/partitioning/online/compiler.cpp | 9 +- .../npuw/partitioning/online/snapshot.cpp | 17 ++- .../npuw/partitioning/patterns/compute.cpp | 119 ++++++++++++++---- .../npuw/partitioning/patterns/compute.hpp | 18 ++- 4 files changed, 127 insertions(+), 36 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 9acdb396293f3c..6a9cf017fded81 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -140,10 +140,8 @@ std::vector getIsolates(const std::string isolates_unparsed) { if (!isolates.empty()) { LOG_INFO("Online partitioning will isolate subgraphs containing specified patterns."); } else { - LOG_WARN("Incorect pattern in NPUW_ONLINE_ISOLATE!" - << " Please, follow the example: " - << "Op:Select/NPU,P:DQMatMulGQ/compute,P:DQMatMulCW/compute,P:RMSNorm/compute. " - << "No isolate rules will be taken into account during partitioning!"); + LOG_WARN("Incorect pattern in NPUW_ONLINE_ISOLATE! No isolate rules will be taken into account during " + "partitioning!"); } return isolates; @@ -193,7 +191,8 @@ std::vector getNoFolds(const std::string& nofolds_unparsed) { void setComputeConfig(PassContext& ctx) { // FIXME: initialize via a dedicated function instead of parsing - ctx.isolates = detail::getIsolates("P:DQMatMulGQ/compute,P:DQMatMulCW/compute,P:RMSNorm/compute"); + ctx.isolates = detail::getIsolates("P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,P:DQMatMulGQi4/" + "compute,P:DQMatMulCWi4/compute,P:RMSNorm/compute"); ctx.nofolds = detail::getNoFolds("compute"); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index 2ee36fcb09361a..a35f33eab49178 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -404,14 +404,21 @@ void Snapshot::earlyRegroup() { if (isolate.pattern == "RMSNorm") { rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; - } else if (isolate.pattern == "DQMatMulCW") { - rewr.add_matcher(shared_from_this(), isolate.tag); + } else if (isolate.pattern == "DQMatMulCWu4") { + rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; - } else if (isolate.pattern == "DQMatMulGQ") { - rewr.add_matcher(shared_from_this(), isolate.tag); + } else if (isolate.pattern == "DQMatMulGQu4") { + rewr.add_matcher(shared_from_this(), isolate.tag); + handle_patterns = true; + } else if (isolate.pattern == "DQMatMulCWi4") { + rewr.add_matcher(shared_from_this(), isolate.tag); + handle_patterns = true; + } else if (isolate.pattern == "DQMatMulGQi4") { + rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; } else { - LOG_WARN("OPENVINO_NPUW_ISOLATE only supports RMSNorm, DQMatMulCW, DQMatMulGQ " + LOG_WARN("OPENVINO_NPUW_ISOLATE only supports RMSNorm, DQMatMulCWu4, DQMatMulGQu4, DQMatMulCWi4, " + "DQMatMulGQi4 " << "as patterns. Isolate pattern " << isolate.pattern << " is skipped!"); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp index d43fc8d95c3ae8..e7f09b00cde2a2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp @@ -7,24 +7,7 @@ #include "../../logging.hpp" #include "../online/group.hpp" // online::Group #include "../online/snapshot.hpp" // online::Snapshot -#include "openvino/op/add.hpp" -#include "openvino/op/broadcast.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/op/convert.hpp" -#include "openvino/op/divide.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/greater.hpp" -#include "openvino/op/matmul.hpp" -#include "openvino/op/mod.hpp" -#include "openvino/op/multiply.hpp" -#include "openvino/op/power.hpp" -#include "openvino/op/reduce_mean.hpp" -#include "openvino/op/reshape.hpp" -#include "openvino/op/shape_of.hpp" -#include "openvino/op/sqrt.hpp" -#include "openvino/op/subtract.hpp" -#include "openvino/op/util/op_types.hpp" -#include "openvino/op/variadic_split.hpp" +#include "openvino/op/ops.hpp" #include "openvino/pass/pattern/op/label.hpp" // any_input #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/util/common_util.hpp" @@ -37,7 +20,7 @@ namespace compute { namespace opp = ov::pass::pattern; // TODO: visualize -DQMatMulGQ::DQMatMulGQ(const std::shared_ptr& snapshot, const std::string& isol_tag) { +DQMatMulGQu4::DQMatMulGQu4(const std::shared_ptr& snapshot, const std::string& isol_tag) { auto qweight = opp::wrap_type(); auto qzerop = opp::wrap_type(); auto qcoeff = opp::wrap_type(); @@ -87,11 +70,11 @@ DQMatMulGQ::DQMatMulGQ(const std::shared_ptr& snapsh return false; // root hasn't changed }; - register_matcher(std::make_shared(qmm, "TagDQMatMulGQ"), std::move(callback)); + register_matcher(std::make_shared(qmm, "TagDQMatMulGQu4"), std::move(callback)); } // TODO: visualize -DQMatMulCW::DQMatMulCW(const std::shared_ptr& snapshot, const std::string& isol_tag) { +DQMatMulCWu4::DQMatMulCWu4(const std::shared_ptr& snapshot, const std::string& isol_tag) { auto qweight = opp::wrap_type(); auto qzerop = opp::wrap_type(); auto qcoeff = opp::wrap_type(); @@ -140,7 +123,99 @@ DQMatMulCW::DQMatMulCW(const std::shared_ptr& snapsh return false; // root hasn't changed }; - register_matcher(std::make_shared(qmm, "TagDQMatMulCW"), std::move(callback)); + register_matcher(std::make_shared(qmm, "TagDQMatMulCWu4"), std::move(callback)); +} + +// TODO: visualize +DQMatMulGQi4::DQMatMulGQi4(const std::shared_ptr& snapshot, const std::string& isol_tag) { + auto qweight = opp::wrap_type(); + auto qcoeff = opp::wrap_type(); + + auto qcvtw = opp::wrap_type({qweight}); + + auto qmuls = opp::wrap_type({qcvtw, qcoeff}); + auto qreshp = opp::wrap_type({qmuls, opp::any_input()}); + auto qcvtr = opp::wrap_type({qreshp}); + auto qmm = opp::wrap_type({opp::any_input(), qcvtr}); + + auto node_to_gptr = snapshot->getNodeToGroupMap(); + + // Note: Use [=] to make sure the above objects stay alive in the callback + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr(); + auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr(); + + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qweight)); + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qcoeff)); + + auto matched_qweight = std::static_pointer_cast(matched_node_qweight); + auto matched_qcoeff = std::static_pointer_cast(matched_node_qcoeff); + + if ((ov::element::i4 == matched_qweight->get_element_type() || + ov::element::i8 == matched_qweight->get_element_type()) && + ov::element::f16 == matched_qcoeff->get_element_type()) { + // Partitioning ignores Const->Convert nodes, so qcvtw is not used + auto matched_qmuls = node_to_output.at(qmuls).get_node_shared_ptr(); + auto matched_qreshp = node_to_output.at(qreshp).get_node_shared_ptr(); + auto matched_qcvtr = node_to_output.at(qcvtr).get_node_shared_ptr(); + auto matched_qmm = node_to_output.at(qmm).get_node_shared_ptr(); + + node_to_gptr->at(matched_qmuls)->isolate(isol_tag); + node_to_gptr->at(matched_qreshp)->isolate(isol_tag); + node_to_gptr->at(matched_qcvtr)->isolate(isol_tag); + node_to_gptr->at(matched_qmm)->isolate(isol_tag); + } + + return false; // root hasn't changed + }; + register_matcher(std::make_shared(qmm, "TagDQMatMulGQi4"), std::move(callback)); +} + +// TODO: visualize +DQMatMulCWi4::DQMatMulCWi4(const std::shared_ptr& snapshot, const std::string& isol_tag) { + auto qweight = opp::wrap_type(); + auto qcoeff = opp::wrap_type(); + + auto qcvtw = opp::wrap_type({qweight}); + + auto qmuls = opp::wrap_type({qcvtw, qcoeff}); + + auto qcvtm = opp::wrap_type({qmuls}); + auto qmm = opp::wrap_type({opp::any_input(), qcvtm}); + + auto node_to_gptr = snapshot->getNodeToGroupMap(); + + // Note: Use [=] to make sure the above objects stay alive in the callback + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr(); + auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr(); + + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qweight)); + NPUW_ASSERT(ov::op::util::is_constant(matched_node_qcoeff)); + + auto matched_qweight = std::static_pointer_cast(matched_node_qweight); + auto matched_qcoeff = std::static_pointer_cast(matched_node_qcoeff); + + if ((ov::element::i4 == matched_qweight->get_element_type() || + ov::element::i8 == matched_qweight->get_element_type()) && + ov::element::f16 == matched_qcoeff->get_element_type()) { + // Partitioning ignores Const->Convert nodes, so qcvtw is not used + auto matched_qmuls = node_to_output.at(qmuls).get_node_shared_ptr(); + auto matched_qcvtm = node_to_output.at(qcvtm).get_node_shared_ptr(); + auto matched_qmm = node_to_output.at(qmm).get_node_shared_ptr(); + + node_to_gptr->at(matched_qmuls)->isolate(isol_tag); + node_to_gptr->at(matched_qcvtm)->isolate(isol_tag); + node_to_gptr->at(matched_qmm)->isolate(isol_tag); + } + + return false; // root hasn't changed + }; + register_matcher(std::make_shared(qmm, "TagDQMatMulCWi4"), std::move(callback)); } // TODO: visualize diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp index 80aa4d095d3c9f..92e60cb95fbdbe 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp @@ -21,14 +21,24 @@ class Snapshot; // Forward declaration namespace patterns { namespace compute { -class DQMatMulGQ : public ov::pass::MatcherPass { +class DQMatMulGQu4 : public ov::pass::MatcherPass { public: - DQMatMulGQ(const std::shared_ptr& snapshot, const std::string& isol_tag); + DQMatMulGQu4(const std::shared_ptr& snapshot, const std::string& isol_tag); }; -class DQMatMulCW : public ov::pass::MatcherPass { +class DQMatMulCWu4 : public ov::pass::MatcherPass { public: - DQMatMulCW(const std::shared_ptr& snapshot, const std::string& isol_tag); + DQMatMulCWu4(const std::shared_ptr& snapshot, const std::string& isol_tag); +}; + +class DQMatMulGQi4 : public ov::pass::MatcherPass { +public: + DQMatMulGQi4(const std::shared_ptr& snapshot, const std::string& isol_tag); +}; + +class DQMatMulCWi4 : public ov::pass::MatcherPass { +public: + DQMatMulCWi4(const std::shared_ptr& snapshot, const std::string& isol_tag); }; class RMSNorm : public ov::pass::MatcherPass { From 11abf3f9d7ec5be1140a6c14079963682fecc5ee Mon Sep 17 00:00:00 2001 From: Zoran Zomborat Date: Wed, 25 Sep 2024 18:26:11 +0300 Subject: [PATCH 4/6] Extend support to BF16 in npu plugin (#26469) ### Details: Extend BF16 logic in NPU plugin. To what it's worth, functional tests on NPU side are running with these changes, although there are some opens in NPU compiler which cause an accuracy; Until then this PR should be merged to ease up integration on our side. ### Tickets: [ - *ticket-id*](https://jira.devtools.intel.com/browse/EISW-140090) --- .../src/backend/include/zero_device.hpp | 1 + .../src/backend/src/zero_infer_request.cpp | 4 ++- .../skip_tests_config.cpp | 1 - .../tools/single-image-test/main.cpp | 31 ++++++++++++++++--- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_device.hpp b/src/plugins/intel_npu/src/backend/include/zero_device.hpp index 7453cfc300815e..9d034b1bb4038b 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_device.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_device.hpp @@ -69,6 +69,7 @@ class ZeroDevice : public IDevice { std::map device_gops = {{ov::element::f32, 0.f}, {ov::element::f16, 0.f}, + {ov::element::bf16, 0.f}, {ov::element::u8, 0.f}, {ov::element::i8, 0.f}}; diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index ec17b0e137cf25..0a8d8dded5e97d 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -551,6 +551,8 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi break; case ov::element::Type_t::f16: break; + case ov::element::Type_t::bf16: + break; case ov::element::Type_t::u4: break; case ov::element::Type_t::i4: @@ -575,7 +577,7 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi break; default: OPENVINO_THROW("Unsupported tensor precision: " + ov::element::Type(precision).get_type_name() + - "! Supported precisions: FP32, FP16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64"); + "! Supported precisions: FP32, FP16, BF16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64"); } } diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp index c38125a6458e7d..4eb829045c964a 100644 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -266,7 +266,6 @@ std::vector disabledTestPatterns() { _skipRegistry.addPatterns( "Tests with unsupported precision", { ".*InferRequestCheckTensorPrecision.*type=boolean.*", - ".*InferRequestCheckTensorPrecision.*type=bf16.*", ".*InferRequestCheckTensorPrecision.*type=f64.*", ".*InferRequestCheckTensorPrecision.*type=u1\\D.*", // [Track number: E#97469] diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp index 14fce26bdd7458..3b3009bb5f459c 100644 --- a/src/plugins/intel_npu/tools/single-image-test/main.cpp +++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp @@ -287,7 +287,8 @@ std::vector ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co "Unsupported layout: ", layout.to_string()); OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 || - precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32, + precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 || + precision == ov::element::Type_t::i32, "Unsupported precision: ", precision.get_type_name()); int cvType = 0; @@ -302,6 +303,9 @@ std::vector ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co } else if (precision == ov::element::Type_t::f16) { cvType = CV_16SC1; elemSize = sizeof(ov::float16); + } else if (precision == ov::element::Type_t::bf16) { + cvType = CV_16SC1; + elemSize = sizeof(ov::bfloat16); } else if (precision == ov::element::Type_t::i32) { cvType = CV_32SC1; elemSize = sizeof(int32_t); @@ -392,11 +396,14 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha cvType = static_cast(CV_32FC(C)); } else if (precision == ov::element::Type_t::f16) { cvType = static_cast(CV_16SC(C)); + } else if (precision == ov::element::Type_t::bf16) { + cvType = static_cast(CV_16SC(C)); } else if (precision == ov::element::Type_t::i32) { cvType = static_cast(CV_32SC(C)); } else { OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 || - precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32, + precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 || + precision == ov::element::Type_t::i32, "Unsupported precision ", precision.get_type_name()); } @@ -437,6 +444,10 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha const auto inPtr = in.ptr(); const auto outPtr = out.ptr(); convertBufferType(outPtr, inPtr, out.size().area() * C); + } else if (precision == ov::element::Type_t::bf16) { + const auto inPtr = in.ptr(); + const auto outPtr = out.ptr(); + convertBufferType(outPtr, inPtr, out.size().area() * C); } else if (precision == ov::element::Type_t::i32) { in.convertTo(out, CV_32S); } else { @@ -451,7 +462,8 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha } else if (layout == ov::Layout("NCHW")) { auto tensorPlanes = ovToCV(tensor, shape, layout, 0); - if (precision != ov::element::Type_t::f16) { + if (!(precision == ov::element::Type_t::f16 || + precision == ov::element::Type_t::bf16)) { cv::split(in, tensorPlanes); } else { std::vector inPlanes; @@ -461,8 +473,13 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha for (size_t i = 0; i < tensorPlanes.size(); ++i) { const auto inPtr = inPlanes[i].ptr(); - const auto outPtr = tensorPlanes[i].ptr(); - convertBufferType(outPtr, inPtr, inPlanes[i].size().area()); + if (precision == ov::element::Type_t::f16) { + const auto outPtr = tensorPlanes[i].ptr(); + convertBufferType(outPtr, inPtr, inPlanes[i].size().area()); + } else if (precision == ov::element::Type_t::bf16) { + const auto outPtr = tensorPlanes[i].ptr(); + convertBufferType(outPtr, inPtr, inPlanes[i].size().area()); + } } } @@ -1761,6 +1778,8 @@ static int runSingleImageTest() { inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f32; } else if (strEq(precision, "FP16")) { inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f16; + } else if (strEq(precision, "BF16")) { + inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::bf16; } else if (strEq(precision, "I32")) { inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::i32; } else if (strEq(precision, "I64")) { @@ -1808,6 +1827,8 @@ static int runSingleImageTest() { ov::element::Type prc_in = ov::element::u8; if (FLAGS_ip == "FP16") prc_in = ov::element::f16; + else if (FLAGS_ip == "BF16") + prc_in = ov::element::bf16; else if (FLAGS_ip == "FP32") prc_in = ov::element::f32; else if (FLAGS_ip == "I32") From a17efa6f20ec83348bcd077aca9c05d90c81aa48 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Wed, 25 Sep 2024 17:27:08 +0200 Subject: [PATCH 5/6] [GHA] Avoid running actions in forks (#26749) ### Tickets: - 153157 --- .github/actions/smart-ci/action.yml | 5 +++++ .github/actions/smart-ci/smart_ci.py | 11 +++++++++++ .github/workflows/android_arm64.yml | 2 +- .github/workflows/android_x64.yml | 2 +- .github/workflows/assign_issue.yml | 1 + .github/workflows/build_doc.yml | 1 + .github/workflows/check_pr_commits.yml | 1 + .github/workflows/cleanup_caches.yml | 2 ++ .github/workflows/code_snippets.yml | 1 + .github/workflows/code_style.yml | 1 + .github/workflows/coverity.yml | 1 + .github/workflows/debian_10_arm.yml | 1 + .github/workflows/dependency_review.yml | 1 + .github/workflows/fedora_29.yml | 2 +- .github/workflows/files_size.yml | 1 + .github/workflows/linux_arm64.yml | 2 +- .github/workflows/linux_conditional_compilation.yml | 3 ++- .github/workflows/linux_riscv.yml | 3 ++- .github/workflows/linux_sanitizers.yml | 1 + .github/workflows/mac.yml | 1 + .github/workflows/mac_arm64.yml | 1 + .github/workflows/mo.yml | 1 + .github/workflows/ovc.yml | 1 + .github/workflows/py_checks.yml | 1 + .github/workflows/send_workflows_to_opentelemetry.yml | 2 +- .github/workflows/stale_prs_and_issues.yml | 1 + .github/workflows/ubuntu_20.yml | 2 +- .github/workflows/ubuntu_22.yml | 2 +- .github/workflows/ubuntu_22_dpcpp.yml | 2 +- .github/workflows/ubuntu_24.yml | 2 +- .github/workflows/webassembly.yml | 2 +- .github/workflows/workflow_rerunner.yml | 3 ++- 32 files changed, 50 insertions(+), 13 deletions(-) diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml index 007db90d13251b..cd111d617ddc1b 100644 --- a/.github/actions/smart-ci/action.yml +++ b/.github/actions/smart-ci/action.yml @@ -43,6 +43,10 @@ inputs: description: "Comma-separated list of patterns (fnmatch-style). If PR has only matching files changed, return indicator that CI can be skipped" required: false + enable_for_org: + description: "Enables running workflows for a given organization; triggers from other orgs are skipped" + required: false + default: "openvinotoolkit" outputs: all_components: @@ -99,6 +103,7 @@ runs: -c "${{ inputs.components_config }}" \ -m "${{ inputs.components_config_schema }}" \ -l "${{ inputs.labeler_config }}" \ + --enable_for_org "${{ inputs.enable_for_org }}" \ --skip-when-only-listed-labels-set "${{ inputs.skip_when_only_listed_labels_set }}" \ --skip-when-only-listed-files-changed "${{ inputs.skip_when_only_listed_files_changed }}" shell: bash diff --git a/.github/actions/smart-ci/smart_ci.py b/.github/actions/smart-ci/smart_ci.py index 1c8558f4779108..e922d3d2ef5263 100644 --- a/.github/actions/smart-ci/smart_ci.py +++ b/.github/actions/smart-ci/smart_ci.py @@ -4,6 +4,8 @@ import os import re import argparse +import sys + import yaml import json import jsonschema @@ -146,6 +148,8 @@ def parse_args(): parser.add_argument('--skip-when-only-listed-files-changed', help="Comma-separated list of patterns (fnmatch-style). If PR has only matching files changed, " "return indicator that CI can be skipped") + parser.add_argument('--enable_for_org', default='openvinotoolkit', + help='Enable running workflows for a given organization; triggers from other orgs are skipped') args = parser.parse_args() return args @@ -176,6 +180,13 @@ def main(): components_config = yaml.safe_load(config) owner, repository = args.repo.split('/') + + if owner != args.enable_for_org: + logger.info(f"Running workflows is enabled only for repos in {args.enable_for_org} organization. " + f"The current workflow was initiated from other org: {owner}, skipping") + set_github_output("skip_workflow", "True") + sys.exit(0) + gh_api = GhApi(owner=owner, repo=repository, token=os.getenv("GITHUB_TOKEN")) pr = gh_api.pulls.get(args.pr) if args.pr else None diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 35c18b43e1f95d..bbd737dff0e569 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -43,6 +43,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -95,7 +96,6 @@ jobs: VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache' VCPKG_FORCE_SYSTEM_BINARIES: '1' SCCACHE_AZURE_KEY_PREFIX: android_arm64 - if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index e8fb4902e37612..3504b8cf2fdeb1 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -46,6 +46,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -94,7 +95,6 @@ jobs: ANDROID_SDK_VERSION: 29 ANDROID_ABI_CONFIG: x86_64 SCCACHE_AZURE_KEY_PREFIX: android_x64 - if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml index f466715f5cfcd3..b13c6736e27b0b 100644 --- a/.github/workflows/assign_issue.yml +++ b/.github/workflows/assign_issue.yml @@ -15,6 +15,7 @@ jobs: permissions: issues: write timeout-minutes: 10 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: take an issue uses: bdougie/take-action@1439165ac45a7461c2d89a59952cd7d941964b87 # v1.6.1 diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 7b380530cfaecd..d6d3a63e431ecd 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -15,6 +15,7 @@ permissions: read-all jobs: Build_Doc: runs-on: ubuntu-20.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/check_pr_commits.yml b/.github/workflows/check_pr_commits.yml index 75d18695c6e2f9..690b85046a108b 100644 --- a/.github/workflows/check_pr_commits.yml +++ b/.github/workflows/check_pr_commits.yml @@ -6,6 +6,7 @@ permissions: read-all jobs: Checks: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml index 6ba1a4164d9022..9c3992e2a85184 100644 --- a/.github/workflows/cleanup_caches.yml +++ b/.github/workflows/cleanup_caches.yml @@ -10,6 +10,7 @@ permissions: read-all jobs: Cleanup_PIP: runs-on: aks-linux-2-cores-8gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: @@ -35,6 +36,7 @@ jobs: Cleanup_CCACHE: runs-on: aks-linux-2-cores-8gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index ae5f9ee25624d3..82daec9ee791f9 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -25,6 +25,7 @@ jobs: matrix: os: ['ubuntu-22.04', 'macos-latest', 'windows-latest'] runs-on: ${{ matrix.os }} + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 2fbcc6b5f87761..c2db68edca3956 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -12,6 +12,7 @@ jobs: runs-on: ubuntu-22.04 permissions: pull-requests: write + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 1d2f8e3ff54820..0de4cb045bfeb4 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -31,6 +31,7 @@ jobs: run: shell: bash runs-on: aks-linux-16-cores-32gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 env: diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index 84496a50b9a480..7fab775b45b886 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -48,6 +48,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-16-cores-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 diff --git a/.github/workflows/dependency_review.yml b/.github/workflows/dependency_review.yml index fd2d4f02d57368..e73acd765ed7f9 100644 --- a/.github/workflows/dependency_review.yml +++ b/.github/workflows/dependency_review.yml @@ -6,6 +6,7 @@ permissions: read-all jobs: dependency-review: runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/fedora_29.yml b/.github/workflows/fedora_29.yml index a79b0f86af28f3..b3a540fb287bd8 100644 --- a/.github/workflows/fedora_29.yml +++ b/.github/workflows/fedora_29.yml @@ -48,6 +48,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -71,7 +72,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/files_size.yml b/.github/workflows/files_size.yml index 065fe71bc75893..6a006cc7d4ada8 100644 --- a/.github/workflows/files_size.yml +++ b/.github/workflows/files_size.yml @@ -10,6 +10,7 @@ permissions: read-all jobs: Check_Files_Size: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 7c0282b5519705..2e557ebc5ef477 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -52,6 +52,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-16-cores-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -75,7 +76,6 @@ jobs: Build: needs: [ Docker, Smart_CI ] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-arm' diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index f0ce141e8c004f..f9359e323d4baf 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -53,6 +53,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -104,7 +105,7 @@ jobs: SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat MODELS_PATH: /__w/openvino/openvino/testdata SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release_faster_build - if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }} + if: ${{ github.event_name != 'merge_group' }} steps: - name: Clone OpenVINO diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 6c26bb8b3859f0..3de8b56ad773cc 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -44,6 +44,7 @@ jobs: Docker: needs: Smart_CI runs-on: aks-linux-4-cores-16gb-docker-build + if: "!needs.smart_ci.outputs.skip_workflow" container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -88,7 +89,7 @@ jobs: CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp CCACHE_MAXSIZE: 2G - if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }} + if: ${{ github.event_name != 'merge_group' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 9105b3b00bf84c..367fce8eb98683 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -25,6 +25,7 @@ jobs: run: shell: bash runs-on: aks-linux-16-cores-32gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 715380811d6870..d60ef4608093b2 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -78,6 +78,7 @@ jobs: INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install BUILD_DIR: ${{ github.workspace }}/build + if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 2615fe16316ea7..73890d1284222e 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -78,6 +78,7 @@ jobs: INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install BUILD_DIR: ${{ github.workspace }}/build + if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index 7bbf3ba28001b9..9a112e7e53ced2 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -21,6 +21,7 @@ permissions: read-all jobs: Pylint-UT: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index a296f93e5a5187..1e2668f26cb579 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -16,6 +16,7 @@ permissions: read-all jobs: Pylint-UT: runs-on: ubuntu-22.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index db0918d0eb61c0..75a8a1b83f03d0 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -25,6 +25,7 @@ permissions: read-all jobs: linters: runs-on: ubuntu-20.04 + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml index 8f70389e645906..ef597e55858b0e 100644 --- a/.github/workflows/send_workflows_to_opentelemetry.yml +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -37,7 +37,7 @@ jobs: otel-export-trace: name: Export finished workflow metrics runs-on: aks-linux-2-cores-8gb - if: github.repository == 'openvinotoolkit/openvino' + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - name: Checkout diff --git a/.github/workflows/stale_prs_and_issues.yml b/.github/workflows/stale_prs_and_issues.yml index 395fc6a350e2ba..d246a7e83f4f73 100644 --- a/.github/workflows/stale_prs_and_issues.yml +++ b/.github/workflows/stale_prs_and_issues.yml @@ -12,6 +12,7 @@ jobs: issues: write pull-requests: write runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'openvinotoolkit' }} steps: - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 with: diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml index 5d74284b8c16fc..df1450a98e46a1 100644 --- a/.github/workflows/ubuntu_20.yml +++ b/.github/workflows/ubuntu_20.yml @@ -54,6 +54,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -77,7 +78,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index b2a2f78410e9f7..90618357b2c63f 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -56,6 +56,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -80,7 +81,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/ubuntu_22_dpcpp.yml b/.github/workflows/ubuntu_22_dpcpp.yml index 1a5c69f03fe690..1ee9df0095ff02 100644 --- a/.github/workflows/ubuntu_22_dpcpp.yml +++ b/.github/workflows/ubuntu_22_dpcpp.yml @@ -44,6 +44,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -66,7 +67,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index a0e1b314391b24..474e8a46ae57aa 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -51,6 +51,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -74,7 +75,6 @@ jobs: Build: needs: [Docker, Smart_CI] - if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: runner: 'aks-linux-16-cores-32gb' diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index 902fb0dfcb00f0..a4a1a17af9d5ce 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -48,6 +48,7 @@ jobs: Docker: needs: Smart_CI + if: "!needs.smart_ci.outputs.skip_workflow" runs-on: aks-linux-4-cores-16gb-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 @@ -88,7 +89,6 @@ jobs: OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: webassembly_Release - if: "!needs.smart_ci.outputs.skip_workflow" steps: - name: Clone OpenVINO uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 81af7aede704ea..1c193f35dfa17e 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -21,7 +21,8 @@ permissions: read-all jobs: rerun: name: Rerun Workflow - if: ${{ github.event.workflow_run.conclusion == 'failure' }} # Run only for the failed workflows + # Run only for the failed workflows in openvinotoolkit org + if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }} runs-on: aks-linux-2-cores-8gb permissions: actions: write From 2659786992f5fdeb8ea968ccc9ecfb6cd5717f68 Mon Sep 17 00:00:00 2001 From: Sun Xiaoxia Date: Wed, 25 Sep 2024 23:44:06 +0800 Subject: [PATCH 6/6] Support machines with more than 1024 cores (#26303) ### Details: - *Because sizeof(cpu_set_t) is a fixed size of 128 bytes, that is the maximum CPU number is 1023. So `sched_getaffinity(0, sizeof(cpu_set_t), mask)` returns error on machines with more than 1024 cores. The solution is that passing in dynamic size to sched_getaffinity() until it returns successfully.* ### Tickets: - *https://github.com/openvinotoolkit/openvino/issues/26140* --------- Co-authored-by: Wanglei Shen --- .../src/dev/threading/thread_affinity.cpp | 5 +++-- src/inference/src/os/lin/lin_system_conf.cpp | 10 ++++++---- src/inference/src/system_conf.cpp | 20 ++----------------- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/inference/src/dev/threading/thread_affinity.cpp b/src/inference/src/dev/threading/thread_affinity.cpp index f53941f270af99..791e5a7fc70f07 100644 --- a/src/inference/src/dev/threading/thread_affinity.cpp +++ b/src/inference/src/dev/threading/thread_affinity.cpp @@ -87,8 +87,9 @@ bool pin_thread_to_vacant_core(int thrIdx, } bool pin_current_thread_to_socket(int socket) { - const int sockets = ov::get_available_numa_nodes().size(); - const int cores = ov::get_number_of_cpu_cores(); + auto proc_type_table = get_org_proc_type_table(); + const int sockets = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : 1; + const int cores = proc_type_table[0][MAIN_CORE_PROC]; const int cores_per_socket = cores / sockets; int ncpus = 0; diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index 2dcf9eaa4d6ad4..e30bcbbe8bc55e 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -12,6 +12,7 @@ #include #include "dev/threading/parallel_custom_arena.hpp" +#include "dev/threading/thread_affinity.hpp" #include "openvino/core/except.hpp" #include "openvino/runtime/system_conf.hpp" #include "os/cpu_map_info.hpp" @@ -114,10 +115,11 @@ CPU::CPU() { }; auto check_valid_cpu = [&]() { - cpu_set_t mask; - CPU_ZERO(&mask); + ov::threading::CpuSet mask; + int ncpus = 0; + std::tie(mask, ncpus) = ov::threading::get_process_mask(); - if ((_processors == 0) || (sched_getaffinity(0, sizeof(cpu_set_t), &mask) == -1)) { + if ((_processors == 0) || mask == nullptr) { return -1; } @@ -128,7 +130,7 @@ CPU::CPU() { numa_node_list.assign(_sockets, std::vector()); for (int i = 0; i < _processors; i++) { - if (CPU_ISSET(i, &mask)) { + if (CPU_ISSET(i, mask)) { valid_cpu_mapping_table.emplace_back(_cpu_mapping_table[i]); if (_cpu_mapping_table[i][CPU_MAP_CORE_TYPE] == MAIN_CORE_PROC) { phy_core_list.emplace_back(_cpu_mapping_table[i][CPU_MAP_CORE_ID]); diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index 6ebec87feccba1..9de1eeb78e1547 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -327,26 +327,10 @@ int get_org_numa_id(int numa_node_id) { # ifndef _WIN32 int get_number_of_cpu_cores(bool bigCoresOnly) { CPU& cpu = cpu_info(); - unsigned numberOfProcessors = cpu._processors; unsigned totalNumberOfCpuCores = cpu._cores; OPENVINO_ASSERT(totalNumberOfCpuCores != 0, "Total number of cpu cores can not be 0."); - cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet; - CPU_ZERO(¤tCpuSet); - CPU_ZERO(&usedCoreSet); - CPU_ZERO(¤tCoreSet); - - sched_getaffinity(0, sizeof(currentCpuSet), ¤tCpuSet); - - for (unsigned processorId = 0u; processorId < numberOfProcessors; processorId++) { - if (CPU_ISSET(processorId, ¤tCpuSet)) { - unsigned coreId = processorId % totalNumberOfCpuCores; - if (!CPU_ISSET(coreId, &usedCoreSet)) { - CPU_SET(coreId, &usedCoreSet); - CPU_SET(processorId, ¤tCoreSet); - } - } - } - int phys_cores = CPU_COUNT(¤tCoreSet); + + int phys_cores = totalNumberOfCpuCores; # if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) auto core_types = custom::info::core_types(); if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {