From 094f1bc182acbefa03b4f00d667ebfb9bae751ca Mon Sep 17 00:00:00 2001
From: Mircea-Aurelian Dan <mircea-aurelian.dan@intel.com>
Date: Wed, 25 Sep 2024 17:10:59 +0300
Subject: [PATCH 1/6] [`intel_npu`] [`DUPLICATE`] [`master`] [`reduce memory
 consumption`] Avoid creating a blob copy while exporting a compiled model
 (#26783)

### Details:
- *Duplicates PR https://github.com/openvinotoolkit/openvino/pull/26754*
 - *Add support for new L0 API 1.7*
- *Change return type of `getCompiledNetwork` to new custom
`CompiledNetwork` container*

### Tickets:
 - *[151912](https://jira.devtools.intel.com/browse/CVS-151912)*
---
 .../src/al/include/intel_npu/al/icompiler.hpp |  32 +++++-
 .../src/backend/include/zero_types.hpp        |   8 +-
 .../include/driver_compiler_adapter.hpp       |   2 +-
 .../include/zero_compiler_in_driver.hpp       |  21 +++-
 .../compiler/src/driver_compiler_adapter.cpp  |  11 +-
 .../compiler/src/zero_compiler_in_driver.cpp  | 106 ++++++++++++------
 .../src/plugin/src/compiled_model.cpp         |  19 ++--
 .../intel_npu/thirdparty/level-zero-ext       |   2 +-
 8 files changed, 150 insertions(+), 51 deletions(-)
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
index f3e96e8650b672..570e2057d9b5e5 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
@@ -151,6 +151,32 @@ struct NetworkDescription final {
     NetworkMetadata metadata;
 };
 
+/**
+ * @struct CompiledNetwork
+ * @brief Custom container for compiled network, used for export
+ * @var CompiledNetwork::data
+ * Pointer to the address of compiled network
+ * @var CompiledNetwork:size
+ * Size of the compiled network
+ * @var CompiledNetwork::ownedStorage
+ * Plugin owned compiled network storage that is required in case of a driver that
+ * doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network.
+ * @note It's unsafe to store either data or size outside of the compiled network object as its destructor
+ * would release the owning container
+ */
+
+struct CompiledNetwork {
+    const uint8_t* data;
+    size_t size;
+    CompiledNetwork(const uint8_t* data, size_t size, std::vector<uint8_t> storage)
+        : data(data),
+          size(size),
+          ownedStorage(std::move(storage)) {}
+
+private:
+    std::vector<uint8_t> ownedStorage;
+};
+
 /**
  * @interface ICompiler
  * @brief An interface to be implemented by a concrete compiler to provide
@@ -203,8 +229,10 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
     // Driver compiler can use this to release graphHandle, if we do not have executor
     virtual void release([[maybe_unused]] std::shared_ptr<const NetworkDescription> networkDescription){};
 
-    virtual std::vector<uint8_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) {
-        return networkDescription->compiledNetwork;
+    virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) {
+        return CompiledNetwork(networkDescription.compiledNetwork.data(),
+                               networkDescription.compiledNetwork.size(),
+                               networkDescription.compiledNetwork);
     }
 
 protected:
diff --git a/src/plugins/intel_npu/src/backend/include/zero_types.hpp b/src/plugins/intel_npu/src/backend/include/zero_types.hpp
index b0dbef843868fe..834d66a45a80d9 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_types.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_types.hpp
@@ -16,7 +16,7 @@
 /**
  * @brief Last version of Table of Graph Extension functions used within plugin
  */
-using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_6_t;
+using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t;
 /**
  * @brief Last version of the Command Queue functions used within plugin
  */
@@ -155,6 +155,12 @@ struct ze_graph_dditable_ext_decorator final {
         throwWhenUnsupported("pfnDeviceGetGraphProperties2", ZE_GRAPH_EXT_VERSION_1_6);
         return _impl->pfnDeviceGetGraphProperties2(hDevice, pDeviceGraphProperties);
     }
+
+    // version 1.7
+    ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) {
+        throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7);
+        return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary);
+    }
 };
 
 /**
diff --git a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp
index 3f02cecd2b0f19..99de755e1c49aa 100644
--- a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp
@@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler {
 
     void release(std::shared_ptr<const NetworkDescription> networkDescription) override;
 
-    std::vector<uint8_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;
+    CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
 
 private:
     /**
diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
index 108b48cf0c6f73..523fc87a7f9dd3 100644
--- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
+++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
@@ -43,6 +43,11 @@ using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
     (std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
      std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value)
 
+#define UseCopyForNativeBinary(T)                                                                                  \
+    (std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
+     std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value || \
+     std::is_same<T, ze_graph_dditable_ext_1_6_t>::value)
+
 /**
  * Adapter to use CiD through ZeroAPI
  */
@@ -100,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {
 
     void release(std::shared_ptr<const NetworkDescription> networkDescription) override;
 
-    std::vector<uint8_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;
+    CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
 
 private:
     NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
@@ -123,6 +128,20 @@ class LevelZeroCompilerInDriver final : public ICompiler {
                      std::vector<IODescriptor>& inputs,
                      std::vector<IODescriptor>& outputs) const;
 
+    template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
+    void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
+                         ze_graph_handle_t graphHandle,
+                         std::vector<uint8_t>& blob,
+                         uint8_t*& blobPtr,
+                         size_t& blobSize) const;
+
+    template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
+    void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
+                         ze_graph_handle_t graphHandle,
+                         std::vector<uint8_t>& /* unusedBlob */,
+                         uint8_t*& blobPtr,
+                         size_t& blobSize) const;
+
     template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
     ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,
                                                          const Config& config,
diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp
index ceacd9cda037a5..1f2a23539a99f5 100644
--- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp
@@ -64,6 +64,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr<IEngineBacken
                                                                                               zeContext,
                                                                                               graph_ddi_table_ext);
         break;
+    case ZE_GRAPH_EXT_VERSION_1_7:
+        apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_7_t>>(driverHandle,
+                                                                                              deviceHandle,
+                                                                                              zeContext,
+                                                                                              graph_ddi_table_ext);
+        break;
     default:
         apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_2_t>>(driverHandle,
                                                                                               deviceHandle,
@@ -109,10 +115,9 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr<const NetworkDescription>
     apiAdapter->release(std::move(networkDescription));
 }
 
-std::vector<uint8_t> LevelZeroCompilerAdapter::getCompiledNetwork(
-    std::shared_ptr<const NetworkDescription> networkDescription) {
+CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) {
     _logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)");
-    return apiAdapter->getCompiledNetwork(std::move(networkDescription));
+    return apiAdapter->getCompiledNetwork(networkDescription);
 }
 
 }  // namespace driverCompilerAdapter
diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
index c1398d227820da..0e02bb48f3a4b7 100644
--- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
@@ -363,46 +363,83 @@ void LevelZeroCompilerInDriver<TableExtension>::release(std::shared_ptr<const Ne
 }
 
 template <typename TableExtension>
-std::vector<uint8_t> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
-    std::shared_ptr<const NetworkDescription> networkDescription) {
-    if (networkDescription->metadata.graphHandle != nullptr && networkDescription->compiledNetwork.size() == 0) {
+template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
+void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
+                                                                ze_graph_handle_t graphHandle,
+                                                                std::vector<uint8_t>& blob,
+                                                                uint8_t*& blobPtr,
+                                                                size_t& blobSize) const {
+    // Get blob size first
+    auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
+    blob.resize(blobSize);
+
+    OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
+                    "Failed to compile network. L0 pfnGetNativeBinary get blob size",
+                    " result: ",
+                    ze_result_to_string(result),
+                    ", code 0x",
+                    std::hex,
+                    uint64_t(result),
+                    ". ",
+                    getLatestBuildError());
+
+    // Get blob data
+    result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data());
+
+    OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
+                    "Failed to compile network. L0 pfnGetNativeBinary get blob data",
+                    " result: ",
+                    ze_result_to_string(result),
+                    ", code 0x",
+                    std::hex,
+                    uint64_t(result),
+                    ". ",
+                    getLatestBuildError());
+
+    blobPtr = blob.data();
+}
+
+template <typename TableExtension>
+template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
+void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
+                                                                ze_graph_handle_t graphHandle,
+                                                                std::vector<uint8_t>& /* unusedBlob */,
+                                                                uint8_t*& blobPtr,
+                                                                size_t& blobSize) const {
+    // Get blob ptr and size
+    auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
+
+    OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
+                    "Failed to compile network. L0 pfnGetNativeBinary get blob size",
+                    " result: ",
+                    ze_result_to_string(result),
+                    ", code 0x",
+                    std::hex,
+                    uint64_t(result),
+                    ". ",
+                    getLatestBuildError());
+}
+
+template <typename TableExtension>
+CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
+    const NetworkDescription& networkDescription) {
+    if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) {
         _logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
-        ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription->metadata.graphHandle);
+        ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);
 
-        // Get blob size first
+        uint8_t* blobPtr = nullptr;
         size_t blobSize = -1;
+        std::vector<uint8_t> blob;
+
+        getNativeBinary(_graphDdiTableExt, graphHandle, blob, blobPtr, blobSize);
 
-        auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
-
-        OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
-                        "Failed to compile network. L0 pfnGetNativeBinary get blob size",
-                        " result: ",
-                        ze_result_to_string(result),
-                        ", code 0x",
-                        std::hex,
-                        uint64_t(result),
-                        ". ",
-                        getLatestBuildError());
-
-        std::vector<uint8_t> blob(blobSize);
-        // Get blob data
-        result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data());
-
-        OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
-                        "Failed to compile network. L0 pfnGetNativeBinary get blob data",
-                        " result: ",
-                        ze_result_to_string(result),
-                        ", code 0x",
-                        std::hex,
-                        uint64_t(result),
-                        ". ",
-                        getLatestBuildError());
         _logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob");
-        return blob;
-    } else {
-        _logger.info("return the blob from network description");
-        return networkDescription->compiledNetwork;
+        return CompiledNetwork(blobPtr, blobSize, std::move(blob));
     }
+    _logger.info("return the blob from network description");
+    return CompiledNetwork(networkDescription.compiledNetwork.data(),
+                           networkDescription.compiledNetwork.size(),
+                           networkDescription.compiledNetwork);
 }
 
 template <typename TableExtension>
@@ -1201,6 +1238,7 @@ template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_3_t>;
 template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_4_t>;
 template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_5_t>;
 template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_6_t>;
+template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_7_t>;
 
 }  // namespace driverCompilerAdapter
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
index 914879feee359f..51ed0e2c5c4858 100644
--- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
@@ -27,10 +27,11 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE =
     "Can't create infer request!\n"
     "Please make sure that the device is available. Only exports can be made.";
 
-std::uint32_t hash(const std::vector<uint8_t>& data) {
+std::uint32_t hash(const intel_npu::CompiledNetwork& blob) {
     std::uint32_t result = 1171117u;
-    for (const auto& c : data)
-        result = ((result << 7) + result) + static_cast<uint32_t>(c);
+    for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) {
+        result = ((result << 7) + result) + static_cast<uint32_t>(*it);
+    }
     return result;
 }
 
@@ -139,15 +140,17 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
 
 void CompiledModel::export_model(std::ostream& stream) const {
     _logger.debug("CompiledModel::export_model");
-    const auto&& blob = _compiler->getCompiledNetwork(_networkPtr);
-    stream.write(reinterpret_cast<const char*>(blob.data()), blob.size());
-    std::stringstream str;
-    str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob);
-    _logger.info(str.str().c_str());
+    const auto blob = _compiler->getCompiledNetwork(*_networkPtr);
+    stream.write(reinterpret_cast<const char*>(blob.data), blob.size);
 
     if (!stream) {
         _logger.error("Write blob to stream failed. Blob is broken!");
     } else {
+        if (_logger.level() >= ov::log::Level::INFO) {
+            std::stringstream str;
+            str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob);
+            _logger.info(str.str().c_str());
+        }
         _logger.info("Write blob to stream successfully.");
     }
 }
diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext
index 16c85231a82ee1..816b5ce120096c 160000
--- a/src/plugins/intel_npu/thirdparty/level-zero-ext
+++ b/src/plugins/intel_npu/thirdparty/level-zero-ext
@@ -1 +1 @@
-Subproject commit 16c85231a82ee1a0b06ed7ab7da3f411a0878ed7
+Subproject commit 816b5ce120096cbc115b56ed43f8a030eb420b19

From 416bfb44b054a69ed9e09622c8c7ec47c232f059 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Wed, 25 Sep 2024 17:10:42 +0200
Subject: [PATCH 2/6] [TESTS] Fix rerun mechanism in case of failed tracing
 (#26758)

### Details:
 - *Fix rerun mechanism in case of failed tracing*

### Tickets:
 - *CVS-153111*

Co-authored-by: Andrei Kochin <andrei.kochin@intel.com>
---
 .../py_frontend_tests/test_torchvision_preprocessor.py          | 2 +-
 tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py
index 78bd4d526dddb3..ea731d4a7aefb9 100644
--- a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py
+++ b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py
@@ -36,7 +36,7 @@ def _infer_pipelines(test_input, preprocess_pipeline, input_channels=3):
         try:
             return _infer_pipelines_impl(test_input, preprocess_pipeline, input_channels)
         except RuntimeError as e:
-            if "builtin cannot be used as a value" in e:
+            if "builtin cannot be used as a value" in str(e):
                 # This is a potentially sporadic issue
                 print(f"An error occurred: {e}. Retrying...")
                 retries += 1
diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
index 2481f1d65ef8fb..a2f54076de9d7f 100644
--- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
+++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py
@@ -76,7 +76,7 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti
             try:
                 return self._test_impl(model, ref_net, kind, ie_device, precision, ir_version, infer_timeout, dynamic_shapes, **kwargs)
             except RuntimeError as e:
-                if "builtin cannot be used as a value" in e:
+                if "builtin cannot be used as a value" in str(e):
                     # This is a potentially sporadic issue
                     print(f"An error occurred: {e}. Retrying...")
                     retries += 1

From 288c5f961e40bdfbb44e5765a424a29ab13ffe2b Mon Sep 17 00:00:00 2001
From: Alexey Smirnov <alexey.smirnov@intel.com>
Date: Wed, 25 Sep 2024 16:24:29 +0100
Subject: [PATCH 3/6] [NPUW] Support i4 patterns for compute pipeline (#26785)

Following up with i4 patterns on
https://github.com/openvinotoolkit/openvino/pull/25679
---
 .../npuw/partitioning/online/compiler.cpp     |   9 +-
 .../npuw/partitioning/online/snapshot.cpp     |  17 ++-
 .../npuw/partitioning/patterns/compute.cpp    | 119 ++++++++++++++----
 .../npuw/partitioning/patterns/compute.hpp    |  18 ++-
 4 files changed, 127 insertions(+), 36 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
index 9acdb396293f3c..6a9cf017fded81 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
@@ -140,10 +140,8 @@ std::vector<Isolate> getIsolates(const std::string isolates_unparsed) {
     if (!isolates.empty()) {
         LOG_INFO("Online partitioning will isolate subgraphs containing specified patterns.");
     } else {
-        LOG_WARN("Incorect pattern in NPUW_ONLINE_ISOLATE!"
-                 << " Please, follow the example: "
-                 << "Op:Select/NPU,P:DQMatMulGQ/compute,P:DQMatMulCW/compute,P:RMSNorm/compute. "
-                 << "No isolate rules will be taken into account during partitioning!");
+        LOG_WARN("Incorect pattern in NPUW_ONLINE_ISOLATE! No isolate rules will be taken into account during "
+                 "partitioning!");
     }
 
     return isolates;
@@ -193,7 +191,8 @@ std::vector<std::string> getNoFolds(const std::string& nofolds_unparsed) {
 
 void setComputeConfig(PassContext& ctx) {
     // FIXME: initialize via a dedicated function instead of parsing
-    ctx.isolates = detail::getIsolates("P:DQMatMulGQ/compute,P:DQMatMulCW/compute,P:RMSNorm/compute");
+    ctx.isolates = detail::getIsolates("P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,P:DQMatMulGQi4/"
+                                       "compute,P:DQMatMulCWi4/compute,P:RMSNorm/compute");
     ctx.nofolds = detail::getNoFolds("compute");
 }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp
index 2ee36fcb09361a..a35f33eab49178 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp
@@ -404,14 +404,21 @@ void Snapshot::earlyRegroup() {
             if (isolate.pattern == "RMSNorm") {
                 rewr.add_matcher<ov::npuw::patterns::compute::RMSNorm>(shared_from_this(), isolate.tag);
                 handle_patterns = true;
-            } else if (isolate.pattern == "DQMatMulCW") {
-                rewr.add_matcher<ov::npuw::patterns::compute::DQMatMulCW>(shared_from_this(), isolate.tag);
+            } else if (isolate.pattern == "DQMatMulCWu4") {
+                rewr.add_matcher<ov::npuw::patterns::compute::DQMatMulCWu4>(shared_from_this(), isolate.tag);
                 handle_patterns = true;
-            } else if (isolate.pattern == "DQMatMulGQ") {
-                rewr.add_matcher<ov::npuw::patterns::compute::DQMatMulGQ>(shared_from_this(), isolate.tag);
+            } else if (isolate.pattern == "DQMatMulGQu4") {
+                rewr.add_matcher<ov::npuw::patterns::compute::DQMatMulGQu4>(shared_from_this(), isolate.tag);
+                handle_patterns = true;
+            } else if (isolate.pattern == "DQMatMulCWi4") {
+                rewr.add_matcher<ov::npuw::patterns::compute::DQMatMulCWi4>(shared_from_this(), isolate.tag);
+                handle_patterns = true;
+            } else if (isolate.pattern == "DQMatMulGQi4") {
+                rewr.add_matcher<ov::npuw::patterns::compute::DQMatMulGQi4>(shared_from_this(), isolate.tag);
                 handle_patterns = true;
             } else {
-                LOG_WARN("OPENVINO_NPUW_ISOLATE only supports RMSNorm, DQMatMulCW, DQMatMulGQ "
+                LOG_WARN("OPENVINO_NPUW_ISOLATE only supports RMSNorm, DQMatMulCWu4, DQMatMulGQu4, DQMatMulCWi4, "
+                         "DQMatMulGQi4 "
                          << "as patterns. Isolate pattern " << isolate.pattern << " is skipped!");
             }
         }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp
index d43fc8d95c3ae8..e7f09b00cde2a2 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp
@@ -7,24 +7,7 @@
 #include "../../logging.hpp"
 #include "../online/group.hpp"     // online::Group
 #include "../online/snapshot.hpp"  // online::Snapshot
-#include "openvino/op/add.hpp"
-#include "openvino/op/broadcast.hpp"
-#include "openvino/op/concat.hpp"
-#include "openvino/op/convert.hpp"
-#include "openvino/op/divide.hpp"
-#include "openvino/op/gather.hpp"
-#include "openvino/op/greater.hpp"
-#include "openvino/op/matmul.hpp"
-#include "openvino/op/mod.hpp"
-#include "openvino/op/multiply.hpp"
-#include "openvino/op/power.hpp"
-#include "openvino/op/reduce_mean.hpp"
-#include "openvino/op/reshape.hpp"
-#include "openvino/op/shape_of.hpp"
-#include "openvino/op/sqrt.hpp"
-#include "openvino/op/subtract.hpp"
-#include "openvino/op/util/op_types.hpp"
-#include "openvino/op/variadic_split.hpp"
+#include "openvino/op/ops.hpp"
 #include "openvino/pass/pattern/op/label.hpp"  // any_input
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "openvino/util/common_util.hpp"
@@ -37,7 +20,7 @@ namespace compute {
 namespace opp = ov::pass::pattern;
 
 // TODO: visualize
-DQMatMulGQ::DQMatMulGQ(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag) {
+DQMatMulGQu4::DQMatMulGQu4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qzerop = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
@@ -87,11 +70,11 @@ DQMatMulGQ::DQMatMulGQ(const std::shared_ptr<ov::npuw::online::Snapshot>& snapsh
 
         return false;  // root hasn't changed
     };
-    register_matcher(std::make_shared<opp::Matcher>(qmm, "TagDQMatMulGQ"), std::move(callback));
+    register_matcher(std::make_shared<opp::Matcher>(qmm, "TagDQMatMulGQu4"), std::move(callback));
 }
 
 // TODO: visualize
-DQMatMulCW::DQMatMulCW(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag) {
+DQMatMulCWu4::DQMatMulCWu4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag) {
     auto qweight = opp::wrap_type<ov::op::v0::Constant>();
     auto qzerop = opp::wrap_type<ov::op::v0::Constant>();
     auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
@@ -140,7 +123,99 @@ DQMatMulCW::DQMatMulCW(const std::shared_ptr<ov::npuw::online::Snapshot>& snapsh
 
         return false;  // root hasn't changed
     };
-    register_matcher(std::make_shared<opp::Matcher>(qmm, "TagDQMatMulCW"), std::move(callback));
+    register_matcher(std::make_shared<opp::Matcher>(qmm, "TagDQMatMulCWu4"), std::move(callback));
+}
+
+// TODO: visualize
+DQMatMulGQi4::DQMatMulGQi4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag) {
+    auto qweight = opp::wrap_type<ov::op::v0::Constant>();
+    auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
+
+    auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
+
+    auto qmuls = opp::wrap_type<ov::op::v1::Multiply>({qcvtw, qcoeff});
+    auto qreshp = opp::wrap_type<ov::op::v1::Reshape>({qmuls, opp::any_input()});
+    auto qcvtr = opp::wrap_type<ov::op::v0::Convert>({qreshp});
+    auto qmm = opp::wrap_type<ov::op::v0::MatMul>({opp::any_input(), qcvtr});
+
+    auto node_to_gptr = snapshot->getNodeToGroupMap();
+
+    // Note: Use [=] to make sure the above objects stay alive in the callback
+    auto callback = [=](ov::pass::pattern::Matcher& m) {
+        auto& node_to_output = m.get_pattern_value_map();
+
+        auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr();
+        auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr();
+
+        NPUW_ASSERT(ov::op::util::is_constant(matched_node_qweight));
+        NPUW_ASSERT(ov::op::util::is_constant(matched_node_qcoeff));
+
+        auto matched_qweight = std::static_pointer_cast<ov::op::v0::Constant>(matched_node_qweight);
+        auto matched_qcoeff = std::static_pointer_cast<ov::op::v0::Constant>(matched_node_qcoeff);
+
+        if ((ov::element::i4 == matched_qweight->get_element_type() ||
+             ov::element::i8 == matched_qweight->get_element_type()) &&
+            ov::element::f16 == matched_qcoeff->get_element_type()) {
+            // Partitioning ignores Const->Convert nodes, so qcvtw is not used
+            auto matched_qmuls = node_to_output.at(qmuls).get_node_shared_ptr();
+            auto matched_qreshp = node_to_output.at(qreshp).get_node_shared_ptr();
+            auto matched_qcvtr = node_to_output.at(qcvtr).get_node_shared_ptr();
+            auto matched_qmm = node_to_output.at(qmm).get_node_shared_ptr();
+
+            node_to_gptr->at(matched_qmuls)->isolate(isol_tag);
+            node_to_gptr->at(matched_qreshp)->isolate(isol_tag);
+            node_to_gptr->at(matched_qcvtr)->isolate(isol_tag);
+            node_to_gptr->at(matched_qmm)->isolate(isol_tag);
+        }
+
+        return false;  // root hasn't changed
+    };
+    register_matcher(std::make_shared<opp::Matcher>(qmm, "TagDQMatMulGQi4"), std::move(callback));
+}
+
+// TODO: visualize
+DQMatMulCWi4::DQMatMulCWi4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag) {
+    auto qweight = opp::wrap_type<ov::op::v0::Constant>();
+    auto qcoeff = opp::wrap_type<ov::op::v0::Constant>();
+
+    auto qcvtw = opp::wrap_type<ov::op::v0::Convert>({qweight});
+
+    auto qmuls = opp::wrap_type<ov::op::v1::Multiply>({qcvtw, qcoeff});
+
+    auto qcvtm = opp::wrap_type<ov::op::v0::Convert>({qmuls});
+    auto qmm = opp::wrap_type<ov::op::v0::MatMul>({opp::any_input(), qcvtm});
+
+    auto node_to_gptr = snapshot->getNodeToGroupMap();
+
+    // Note: Use [=] to make sure the above objects stay alive in the callback
+    auto callback = [=](ov::pass::pattern::Matcher& m) {
+        auto& node_to_output = m.get_pattern_value_map();
+
+        auto matched_node_qweight = node_to_output.at(qweight).get_node_shared_ptr();
+        auto matched_node_qcoeff = node_to_output.at(qcoeff).get_node_shared_ptr();
+
+        NPUW_ASSERT(ov::op::util::is_constant(matched_node_qweight));
+        NPUW_ASSERT(ov::op::util::is_constant(matched_node_qcoeff));
+
+        auto matched_qweight = std::static_pointer_cast<ov::op::v0::Constant>(matched_node_qweight);
+        auto matched_qcoeff = std::static_pointer_cast<ov::op::v0::Constant>(matched_node_qcoeff);
+
+        if ((ov::element::i4 == matched_qweight->get_element_type() ||
+             ov::element::i8 == matched_qweight->get_element_type()) &&
+            ov::element::f16 == matched_qcoeff->get_element_type()) {
+            // Partitioning ignores Const->Convert nodes, so qcvtw is not used
+            auto matched_qmuls = node_to_output.at(qmuls).get_node_shared_ptr();
+            auto matched_qcvtm = node_to_output.at(qcvtm).get_node_shared_ptr();
+            auto matched_qmm = node_to_output.at(qmm).get_node_shared_ptr();
+
+            node_to_gptr->at(matched_qmuls)->isolate(isol_tag);
+            node_to_gptr->at(matched_qcvtm)->isolate(isol_tag);
+            node_to_gptr->at(matched_qmm)->isolate(isol_tag);
+        }
+
+        return false;  // root hasn't changed
+    };
+    register_matcher(std::make_shared<opp::Matcher>(qmm, "TagDQMatMulCWi4"), std::move(callback));
 }
 
 // TODO: visualize
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp
index 80aa4d095d3c9f..92e60cb95fbdbe 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp
@@ -21,14 +21,24 @@ class Snapshot;  // Forward declaration
 namespace patterns {
 namespace compute {
 
-class DQMatMulGQ : public ov::pass::MatcherPass {
+class DQMatMulGQu4 : public ov::pass::MatcherPass {
 public:
-    DQMatMulGQ(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag);
+    DQMatMulGQu4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag);
 };
 
-class DQMatMulCW : public ov::pass::MatcherPass {
+class DQMatMulCWu4 : public ov::pass::MatcherPass {
 public:
-    DQMatMulCW(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag);
+    DQMatMulCWu4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag);
+};
+
+class DQMatMulGQi4 : public ov::pass::MatcherPass {
+public:
+    DQMatMulGQi4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag);
+};
+
+class DQMatMulCWi4 : public ov::pass::MatcherPass {
+public:
+    DQMatMulCWi4(const std::shared_ptr<ov::npuw::online::Snapshot>& snapshot, const std::string& isol_tag);
 };
 
 class RMSNorm : public ov::pass::MatcherPass {

From 11abf3f9d7ec5be1140a6c14079963682fecc5ee Mon Sep 17 00:00:00 2001
From: Zoran Zomborat <zoran.zomborat@intel.com>
Date: Wed, 25 Sep 2024 18:26:11 +0300
Subject: [PATCH 4/6] Extend support to BF16 in npu plugin (#26469)

### Details:
Extend BF16 logic in NPU plugin.
To what it's worth, functional tests on NPU side are running with these
changes, although there are some opens in NPU compiler which cause an
accuracy;
Until then this PR should be merged to ease up integration on our side.

### Tickets:
[ - *ticket-id*](https://jira.devtools.intel.com/browse/EISW-140090)
---
 .../src/backend/include/zero_device.hpp       |  1 +
 .../src/backend/src/zero_infer_request.cpp    |  4 ++-
 .../skip_tests_config.cpp                     |  1 -
 .../tools/single-image-test/main.cpp          | 31 ++++++++++++++++---
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/include/zero_device.hpp b/src/plugins/intel_npu/src/backend/include/zero_device.hpp
index 7453cfc300815e..9d034b1bb4038b 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_device.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_device.hpp
@@ -69,6 +69,7 @@ class ZeroDevice : public IDevice {
 
     std::map<ov::element::Type, float> device_gops = {{ov::element::f32, 0.f},
                                                       {ov::element::f16, 0.f},
+                                                      {ov::element::bf16, 0.f},
                                                       {ov::element::u8, 0.f},
                                                       {ov::element::i8, 0.f}};
 
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index ec17b0e137cf25..0a8d8dded5e97d 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -551,6 +551,8 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
         break;
     case ov::element::Type_t::f16:
         break;
+    case ov::element::Type_t::bf16:
+        break;
     case ov::element::Type_t::u4:
         break;
     case ov::element::Type_t::i4:
@@ -575,7 +577,7 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
         break;
     default:
         OPENVINO_THROW("Unsupported tensor precision: " + ov::element::Type(precision).get_type_name() +
-                       "! Supported precisions: FP32, FP16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64");
+                       "! Supported precisions: FP32, FP16, BF16, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64");
     }
 }
 
diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index c38125a6458e7d..4eb829045c964a 100644
--- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -266,7 +266,6 @@ std::vector<std::string> disabledTestPatterns() {
         _skipRegistry.addPatterns(
                 "Tests with unsupported precision", {
                 ".*InferRequestCheckTensorPrecision.*type=boolean.*",
-                ".*InferRequestCheckTensorPrecision.*type=bf16.*",
                 ".*InferRequestCheckTensorPrecision.*type=f64.*",
                 ".*InferRequestCheckTensorPrecision.*type=u1\\D.*",
                 // [Track number: E#97469]
diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp
index 14fce26bdd7458..3b3009bb5f459c 100644
--- a/src/plugins/intel_npu/tools/single-image-test/main.cpp
+++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp
@@ -287,7 +287,8 @@ std::vector<cv::Mat> ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co
                     "Unsupported layout: ", layout.to_string());
 
     OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 ||
-                            precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32,
+                            precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 ||
+                            precision == ov::element::Type_t::i32,
                     "Unsupported precision: ", precision.get_type_name());
 
     int cvType = 0;
@@ -302,6 +303,9 @@ std::vector<cv::Mat> ovToCV(const ov::Tensor& tensor, const ov::Shape& shape, co
     } else if (precision == ov::element::Type_t::f16) {
         cvType = CV_16SC1;
         elemSize = sizeof(ov::float16);
+    } else if (precision == ov::element::Type_t::bf16) {
+        cvType = CV_16SC1;
+        elemSize = sizeof(ov::bfloat16);
     } else if (precision == ov::element::Type_t::i32) {
         cvType = CV_32SC1;
         elemSize = sizeof(int32_t);
@@ -392,11 +396,14 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
         cvType = static_cast<int>(CV_32FC(C));
     } else if (precision == ov::element::Type_t::f16) {
         cvType = static_cast<int>(CV_16SC(C));
+    } else if (precision == ov::element::Type_t::bf16) {
+        cvType = static_cast<int>(CV_16SC(C));
     } else if (precision == ov::element::Type_t::i32) {
         cvType = static_cast<int>(CV_32SC(C));
     } else {
         OPENVINO_ASSERT(precision == ov::element::Type_t::u8 || precision == ov::element::Type_t::f32 ||
-                                precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::i32,
+                                precision == ov::element::Type_t::f16 || precision == ov::element::Type_t::bf16 ||
+                                precision == ov::element::Type_t::i32,
                         "Unsupported precision ", precision.get_type_name());
     }
 
@@ -437,6 +444,10 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
             const auto inPtr = in.ptr<float>();
             const auto outPtr = out.ptr<ov::float16>();
             convertBufferType(outPtr, inPtr, out.size().area() * C);
+        } else if (precision == ov::element::Type_t::bf16) {
+            const auto inPtr = in.ptr<float>();
+            const auto outPtr = out.ptr<ov::bfloat16>();
+            convertBufferType(outPtr, inPtr, out.size().area() * C);
         } else if (precision == ov::element::Type_t::i32) {
             in.convertTo(out, CV_32S);
         } else {
@@ -451,7 +462,8 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
     } else if (layout == ov::Layout("NCHW")) {
         auto tensorPlanes = ovToCV(tensor, shape, layout, 0);
 
-        if (precision != ov::element::Type_t::f16) {
+        if (!(precision == ov::element::Type_t::f16 ||
+            precision == ov::element::Type_t::bf16)) {
             cv::split(in, tensorPlanes);
         } else {
             std::vector<cv::Mat> inPlanes;
@@ -461,8 +473,13 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
 
             for (size_t i = 0; i < tensorPlanes.size(); ++i) {
                 const auto inPtr = inPlanes[i].ptr<float>();
-                const auto outPtr = tensorPlanes[i].ptr<ov::float16>();
-                convertBufferType(outPtr, inPtr, inPlanes[i].size().area());
+                if (precision == ov::element::Type_t::f16) {
+                    const auto outPtr = tensorPlanes[i].ptr<ov::float16>();
+                    convertBufferType(outPtr, inPtr, inPlanes[i].size().area());
+                } else if (precision == ov::element::Type_t::bf16) {
+                    const auto outPtr = tensorPlanes[i].ptr<ov::bfloat16>();
+                    convertBufferType(outPtr, inPtr, inPlanes[i].size().area());
+                }
             }
         }
 
@@ -1761,6 +1778,8 @@ static int runSingleImageTest() {
                         inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f32;
                     } else if (strEq(precision, "FP16")) {
                         inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::f16;
+                    } else if (strEq(precision, "BF16")) {
+                        inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::bf16;
                     } else if (strEq(precision, "I32")) {
                         inputBinPrecisionForOneInfer[inferIdx][precisionIdx] = ov::element::i32;
                     } else if (strEq(precision, "I64")) {
@@ -1808,6 +1827,8 @@ static int runSingleImageTest() {
                 ov::element::Type prc_in = ov::element::u8;
                 if (FLAGS_ip == "FP16")
                     prc_in = ov::element::f16;
+                else if (FLAGS_ip == "BF16")
+                    prc_in = ov::element::bf16;
                 else if (FLAGS_ip == "FP32")
                     prc_in = ov::element::f32;
                 else if (FLAGS_ip == "I32")

From a17efa6f20ec83348bcd077aca9c05d90c81aa48 Mon Sep 17 00:00:00 2001
From: Alina Kladieva <alina.kladieva@intel.com>
Date: Wed, 25 Sep 2024 17:27:08 +0200
Subject: [PATCH 5/6] [GHA] Avoid running actions in forks (#26749)

### Tickets:
 - 153157
---
 .github/actions/smart-ci/action.yml                   |  5 +++++
 .github/actions/smart-ci/smart_ci.py                  | 11 +++++++++++
 .github/workflows/android_arm64.yml                   |  2 +-
 .github/workflows/android_x64.yml                     |  2 +-
 .github/workflows/assign_issue.yml                    |  1 +
 .github/workflows/build_doc.yml                       |  1 +
 .github/workflows/check_pr_commits.yml                |  1 +
 .github/workflows/cleanup_caches.yml                  |  2 ++
 .github/workflows/code_snippets.yml                   |  1 +
 .github/workflows/code_style.yml                      |  1 +
 .github/workflows/coverity.yml                        |  1 +
 .github/workflows/debian_10_arm.yml                   |  1 +
 .github/workflows/dependency_review.yml               |  1 +
 .github/workflows/fedora_29.yml                       |  2 +-
 .github/workflows/files_size.yml                      |  1 +
 .github/workflows/linux_arm64.yml                     |  2 +-
 .github/workflows/linux_conditional_compilation.yml   |  3 ++-
 .github/workflows/linux_riscv.yml                     |  3 ++-
 .github/workflows/linux_sanitizers.yml                |  1 +
 .github/workflows/mac.yml                             |  1 +
 .github/workflows/mac_arm64.yml                       |  1 +
 .github/workflows/mo.yml                              |  1 +
 .github/workflows/ovc.yml                             |  1 +
 .github/workflows/py_checks.yml                       |  1 +
 .github/workflows/send_workflows_to_opentelemetry.yml |  2 +-
 .github/workflows/stale_prs_and_issues.yml            |  1 +
 .github/workflows/ubuntu_20.yml                       |  2 +-
 .github/workflows/ubuntu_22.yml                       |  2 +-
 .github/workflows/ubuntu_22_dpcpp.yml                 |  2 +-
 .github/workflows/ubuntu_24.yml                       |  2 +-
 .github/workflows/webassembly.yml                     |  2 +-
 .github/workflows/workflow_rerunner.yml               |  3 ++-
 32 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml
index 007db90d13251b..cd111d617ddc1b 100644
--- a/.github/actions/smart-ci/action.yml
+++ b/.github/actions/smart-ci/action.yml
@@ -43,6 +43,10 @@ inputs:
     description: "Comma-separated list of patterns (fnmatch-style). If PR has only matching files changed, 
                   return indicator that CI can be skipped"
     required: false
+  enable_for_org:
+    description: "Enables running workflows for a given organization; triggers from other orgs are skipped"
+    required: false
+    default: "openvinotoolkit"
 
 outputs:
   all_components:
@@ -99,6 +103,7 @@ runs:
           -c "${{ inputs.components_config }}" \
           -m "${{ inputs.components_config_schema }}" \
           -l "${{ inputs.labeler_config }}" \
+          --enable_for_org "${{ inputs.enable_for_org }}" \
           --skip-when-only-listed-labels-set "${{ inputs.skip_when_only_listed_labels_set }}" \
           --skip-when-only-listed-files-changed "${{ inputs.skip_when_only_listed_files_changed }}"
       shell: bash
diff --git a/.github/actions/smart-ci/smart_ci.py b/.github/actions/smart-ci/smart_ci.py
index 1c8558f4779108..e922d3d2ef5263 100644
--- a/.github/actions/smart-ci/smart_ci.py
+++ b/.github/actions/smart-ci/smart_ci.py
@@ -4,6 +4,8 @@
 import os
 import re
 import argparse
+import sys
+
 import yaml
 import json
 import jsonschema
@@ -146,6 +148,8 @@ def parse_args():
     parser.add_argument('--skip-when-only-listed-files-changed',
                         help="Comma-separated list of patterns (fnmatch-style). If PR has only matching files changed, "
                              "return indicator that CI can be skipped")
+    parser.add_argument('--enable_for_org', default='openvinotoolkit',
+                        help='Enable running workflows for a given organization; triggers from other orgs are skipped')
     args = parser.parse_args()
     return args
 
@@ -176,6 +180,13 @@ def main():
         components_config = yaml.safe_load(config)
 
     owner, repository = args.repo.split('/')
+
+    if owner != args.enable_for_org:
+        logger.info(f"Running workflows is enabled only for repos in {args.enable_for_org} organization. "
+                    f"The current workflow was initiated from other org: {owner}, skipping")
+        set_github_output("skip_workflow", "True")
+        sys.exit(0)
+
     gh_api = GhApi(owner=owner, repo=repository, token=os.getenv("GITHUB_TOKEN"))
     pr = gh_api.pulls.get(args.pr) if args.pr else None
 
diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml
index 35c18b43e1f95d..bbd737dff0e569 100644
--- a/.github/workflows/android_arm64.yml
+++ b/.github/workflows/android_arm64.yml
@@ -43,6 +43,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -95,7 +96,6 @@ jobs:
       VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache'
       VCPKG_FORCE_SYSTEM_BINARIES: '1'
       SCCACHE_AZURE_KEY_PREFIX: android_arm64
-    if: "!needs.smart_ci.outputs.skip_workflow"
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml
index e8fb4902e37612..3504b8cf2fdeb1 100644
--- a/.github/workflows/android_x64.yml
+++ b/.github/workflows/android_x64.yml
@@ -46,6 +46,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -94,7 +95,6 @@ jobs:
       ANDROID_SDK_VERSION: 29
       ANDROID_ABI_CONFIG: x86_64
       SCCACHE_AZURE_KEY_PREFIX: android_x64
-    if: "!needs.smart_ci.outputs.skip_workflow"
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
index f466715f5cfcd3..b13c6736e27b0b 100644
--- a/.github/workflows/assign_issue.yml
+++ b/.github/workflows/assign_issue.yml
@@ -15,6 +15,7 @@ jobs:
     permissions:
       issues: write
     timeout-minutes: 10
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: take an issue
         uses: bdougie/take-action@1439165ac45a7461c2d89a59952cd7d941964b87 # v1.6.1
diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
index 7b380530cfaecd..d6d3a63e431ecd 100644
--- a/.github/workflows/build_doc.yml
+++ b/.github/workflows/build_doc.yml
@@ -15,6 +15,7 @@ permissions: read-all
 jobs:
   Build_Doc:
     runs-on: ubuntu-20.04
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/check_pr_commits.yml b/.github/workflows/check_pr_commits.yml
index 75d18695c6e2f9..690b85046a108b 100644
--- a/.github/workflows/check_pr_commits.yml
+++ b/.github/workflows/check_pr_commits.yml
@@ -6,6 +6,7 @@ permissions: read-all
 jobs:
   Checks:
     runs-on: ubuntu-22.04
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/cleanup_caches.yml b/.github/workflows/cleanup_caches.yml
index 6ba1a4164d9022..9c3992e2a85184 100644
--- a/.github/workflows/cleanup_caches.yml
+++ b/.github/workflows/cleanup_caches.yml
@@ -10,6 +10,7 @@ permissions: read-all
 jobs:
   Cleanup_PIP:
     runs-on: aks-linux-2-cores-8gb
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     container:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
       volumes:
@@ -35,6 +36,7 @@ jobs:
 
   Cleanup_CCACHE:
     runs-on: aks-linux-2-cores-8gb
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     container:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
       volumes:
diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml
index ae5f9ee25624d3..82daec9ee791f9 100644
--- a/.github/workflows/code_snippets.yml
+++ b/.github/workflows/code_snippets.yml
@@ -25,6 +25,7 @@ jobs:
       matrix:
         os: ['ubuntu-22.04', 'macos-latest', 'windows-latest']
     runs-on: ${{ matrix.os }}
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml
index 2fbcc6b5f87761..c2db68edca3956 100644
--- a/.github/workflows/code_style.yml
+++ b/.github/workflows/code_style.yml
@@ -12,6 +12,7 @@ jobs:
     runs-on: ubuntu-22.04
     permissions:
       pull-requests: write
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
index 1d2f8e3ff54820..0de4cb045bfeb4 100644
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -31,6 +31,7 @@ jobs:
       run:
         shell: bash
     runs-on: aks-linux-16-cores-32gb
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     container:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
     env:
diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml
index 84496a50b9a480..7fab775b45b886 100644
--- a/.github/workflows/debian_10_arm.yml
+++ b/.github/workflows/debian_10_arm.yml
@@ -48,6 +48,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-16-cores-arm-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
diff --git a/.github/workflows/dependency_review.yml b/.github/workflows/dependency_review.yml
index fd2d4f02d57368..e73acd765ed7f9 100644
--- a/.github/workflows/dependency_review.yml
+++ b/.github/workflows/dependency_review.yml
@@ -6,6 +6,7 @@ permissions: read-all
 jobs:
   dependency-review:
     runs-on: ubuntu-latest
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/fedora_29.yml b/.github/workflows/fedora_29.yml
index a79b0f86af28f3..b3a540fb287bd8 100644
--- a/.github/workflows/fedora_29.yml
+++ b/.github/workflows/fedora_29.yml
@@ -48,6 +48,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -71,7 +72,6 @@ jobs:
 
   Build:
     needs: [Docker, Smart_CI]
-    if: "!needs.smart_ci.outputs.skip_workflow"
     uses: ./.github/workflows/job_build_linux.yml
     with:
       runner: 'aks-linux-16-cores-32gb'
diff --git a/.github/workflows/files_size.yml b/.github/workflows/files_size.yml
index 065fe71bc75893..6a006cc7d4ada8 100644
--- a/.github/workflows/files_size.yml
+++ b/.github/workflows/files_size.yml
@@ -10,6 +10,7 @@ permissions: read-all
 jobs:
   Check_Files_Size:
     runs-on: ubuntu-22.04
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
 
diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml
index 7c0282b5519705..2e557ebc5ef477 100644
--- a/.github/workflows/linux_arm64.yml
+++ b/.github/workflows/linux_arm64.yml
@@ -52,6 +52,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-16-cores-arm-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -75,7 +76,6 @@ jobs:
 
   Build:
     needs: [ Docker, Smart_CI ]
-    if: "!needs.smart_ci.outputs.skip_workflow"
     uses: ./.github/workflows/job_build_linux.yml
     with:
       runner: 'aks-linux-16-cores-arm'
diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml
index f0ce141e8c004f..f9359e323d4baf 100644
--- a/.github/workflows/linux_conditional_compilation.yml
+++ b/.github/workflows/linux_conditional_compilation.yml
@@ -53,6 +53,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -104,7 +105,7 @@ jobs:
       SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat
       MODELS_PATH: /__w/openvino/openvino/testdata
       SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release_faster_build
-    if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }}
+    if: ${{ github.event_name != 'merge_group' }}
 
     steps:
       - name: Clone OpenVINO
diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml
index 6c26bb8b3859f0..3de8b56ad773cc 100644
--- a/.github/workflows/linux_riscv.yml
+++ b/.github/workflows/linux_riscv.yml
@@ -44,6 +44,7 @@ jobs:
   Docker:
     needs: Smart_CI
     runs-on: aks-linux-4-cores-16gb-docker-build
+    if: "!needs.smart_ci.outputs.skip_workflow"
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
       volumes:
@@ -88,7 +89,7 @@ jobs:
       CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp
       CCACHE_MAXSIZE: 2G
 
-    if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }}
+    if: ${{ github.event_name != 'merge_group' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml
index 9105b3b00bf84c..367fce8eb98683 100644
--- a/.github/workflows/linux_sanitizers.yml
+++ b/.github/workflows/linux_sanitizers.yml
@@ -25,6 +25,7 @@ jobs:
       run:
         shell: bash
     runs-on: aks-linux-16-cores-32gb
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     container:
       image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
       volumes:
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 715380811d6870..d60ef4608093b2 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -78,6 +78,7 @@ jobs:
       INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js
       INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install
       BUILD_DIR: ${{ github.workspace }}/build
+    if: "!needs.smart_ci.outputs.skip_workflow"
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml
index 2615fe16316ea7..73890d1284222e 100644
--- a/.github/workflows/mac_arm64.yml
+++ b/.github/workflows/mac_arm64.yml
@@ -78,6 +78,7 @@ jobs:
       INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js
       INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install
       BUILD_DIR: ${{ github.workspace }}/build
+    if: "!needs.smart_ci.outputs.skip_workflow"
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml
index 7bbf3ba28001b9..9a112e7e53ced2 100644
--- a/.github/workflows/mo.yml
+++ b/.github/workflows/mo.yml
@@ -21,6 +21,7 @@ permissions: read-all
 jobs:
   Pylint-UT:
     runs-on: ubuntu-22.04
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml
index a296f93e5a5187..1e2668f26cb579 100644
--- a/.github/workflows/ovc.yml
+++ b/.github/workflows/ovc.yml
@@ -16,6 +16,7 @@ permissions: read-all
 jobs:
   Pylint-UT:
     runs-on: ubuntu-22.04
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml
index db0918d0eb61c0..75a8a1b83f03d0 100644
--- a/.github/workflows/py_checks.yml
+++ b/.github/workflows/py_checks.yml
@@ -25,6 +25,7 @@ permissions: read-all
 jobs:
   linters:
     runs-on: ubuntu-20.04
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml
index 8f70389e645906..ef597e55858b0e 100644
--- a/.github/workflows/send_workflows_to_opentelemetry.yml
+++ b/.github/workflows/send_workflows_to_opentelemetry.yml
@@ -37,7 +37,7 @@ jobs:
   otel-export-trace:
     name: Export finished workflow metrics
     runs-on: aks-linux-2-cores-8gb
-    if: github.repository == 'openvinotoolkit/openvino'
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
 
     steps:
       - name: Checkout
diff --git a/.github/workflows/stale_prs_and_issues.yml b/.github/workflows/stale_prs_and_issues.yml
index 395fc6a350e2ba..d246a7e83f4f73 100644
--- a/.github/workflows/stale_prs_and_issues.yml
+++ b/.github/workflows/stale_prs_and_issues.yml
@@ -12,6 +12,7 @@ jobs:
       issues: write
       pull-requests: write
     runs-on: ubuntu-latest
+    if: ${{ github.repository_owner == 'openvinotoolkit' }}
     steps:
       - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
         with:
diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml
index 5d74284b8c16fc..df1450a98e46a1 100644
--- a/.github/workflows/ubuntu_20.yml
+++ b/.github/workflows/ubuntu_20.yml
@@ -54,6 +54,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -77,7 +78,6 @@ jobs:
 
   Build:
     needs: [Docker, Smart_CI]
-    if: "!needs.smart_ci.outputs.skip_workflow"
     uses: ./.github/workflows/job_build_linux.yml
     with:
       runner: 'aks-linux-16-cores-32gb'
diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml
index b2a2f78410e9f7..90618357b2c63f 100644
--- a/.github/workflows/ubuntu_22.yml
+++ b/.github/workflows/ubuntu_22.yml
@@ -56,6 +56,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -80,7 +81,6 @@ jobs:
 
   Build:
     needs: [Docker, Smart_CI]
-    if: "!needs.smart_ci.outputs.skip_workflow"
     uses: ./.github/workflows/job_build_linux.yml
     with:
       runner: 'aks-linux-16-cores-32gb'
diff --git a/.github/workflows/ubuntu_22_dpcpp.yml b/.github/workflows/ubuntu_22_dpcpp.yml
index 1a5c69f03fe690..1ee9df0095ff02 100644
--- a/.github/workflows/ubuntu_22_dpcpp.yml
+++ b/.github/workflows/ubuntu_22_dpcpp.yml
@@ -44,6 +44,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -66,7 +67,6 @@ jobs:
 
   Build:
     needs: [Docker, Smart_CI]
-    if: "!needs.smart_ci.outputs.skip_workflow"
     uses: ./.github/workflows/job_build_linux.yml
     with:
       runner: 'aks-linux-16-cores-32gb'
diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml
index a0e1b314391b24..474e8a46ae57aa 100644
--- a/.github/workflows/ubuntu_24.yml
+++ b/.github/workflows/ubuntu_24.yml
@@ -51,6 +51,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -74,7 +75,6 @@ jobs:
 
   Build:
     needs: [Docker, Smart_CI]
-    if: "!needs.smart_ci.outputs.skip_workflow"
     uses: ./.github/workflows/job_build_linux.yml
     with:
       runner: 'aks-linux-16-cores-32gb'
diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml
index 902fb0dfcb00f0..a4a1a17af9d5ce 100644
--- a/.github/workflows/webassembly.yml
+++ b/.github/workflows/webassembly.yml
@@ -48,6 +48,7 @@ jobs:
 
   Docker:
     needs: Smart_CI
+    if: "!needs.smart_ci.outputs.skip_workflow"
     runs-on: aks-linux-4-cores-16gb-docker-build
     container:
       image: openvinogithubactions.azurecr.io/docker_build:0.2
@@ -88,7 +89,6 @@ jobs:
       OPENVINO_REPO: /__w/openvino/openvino/openvino
       OPENVINO_BUILD_DIR: /__w/openvino/openvino/openvino_build
       SCCACHE_AZURE_KEY_PREFIX: webassembly_Release
-    if: "!needs.smart_ci.outputs.skip_workflow"
     steps:
       - name: Clone OpenVINO
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml
index 81af7aede704ea..1c193f35dfa17e 100644
--- a/.github/workflows/workflow_rerunner.yml
+++ b/.github/workflows/workflow_rerunner.yml
@@ -21,7 +21,8 @@ permissions: read-all
 jobs:
   rerun:
     name: Rerun Workflow
-    if: ${{ github.event.workflow_run.conclusion == 'failure' }}  # Run only for the failed workflows
+    # Run only for the failed workflows in openvinotoolkit org
+    if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }}
     runs-on: aks-linux-2-cores-8gb
     permissions:
       actions: write

From 2659786992f5fdeb8ea968ccc9ecfb6cd5717f68 Mon Sep 17 00:00:00 2001
From: Sun Xiaoxia <xiaoxia.sun@intel.com>
Date: Wed, 25 Sep 2024 23:44:06 +0800
Subject: [PATCH 6/6] Support machines with more than 1024 cores (#26303)

### Details:
- *Because sizeof(cpu_set_t) is a fixed size of 128 bytes, that is the
maximum CPU number is 1023. So `sched_getaffinity(0, sizeof(cpu_set_t),
mask)` returns error on machines with more than 1024 cores. The solution
is that passing in dynamic size to sched_getaffinity() until it returns
successfully.*

### Tickets:
 - *https://github.com/openvinotoolkit/openvino/issues/26140*

---------

Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
---
 .../src/dev/threading/thread_affinity.cpp     |  5 +++--
 src/inference/src/os/lin/lin_system_conf.cpp  | 10 ++++++----
 src/inference/src/system_conf.cpp             | 20 ++-----------------
 3 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/inference/src/dev/threading/thread_affinity.cpp b/src/inference/src/dev/threading/thread_affinity.cpp
index f53941f270af99..791e5a7fc70f07 100644
--- a/src/inference/src/dev/threading/thread_affinity.cpp
+++ b/src/inference/src/dev/threading/thread_affinity.cpp
@@ -87,8 +87,9 @@ bool pin_thread_to_vacant_core(int thrIdx,
 }
 
 bool pin_current_thread_to_socket(int socket) {
-    const int sockets = ov::get_available_numa_nodes().size();
-    const int cores = ov::get_number_of_cpu_cores();
+    auto proc_type_table = get_org_proc_type_table();
+    const int sockets = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : 1;
+    const int cores = proc_type_table[0][MAIN_CORE_PROC];
     const int cores_per_socket = cores / sockets;
 
     int ncpus = 0;
diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp
index 2dcf9eaa4d6ad4..e30bcbbe8bc55e 100644
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -12,6 +12,7 @@
 #include <vector>
 
 #include "dev/threading/parallel_custom_arena.hpp"
+#include "dev/threading/thread_affinity.hpp"
 #include "openvino/core/except.hpp"
 #include "openvino/runtime/system_conf.hpp"
 #include "os/cpu_map_info.hpp"
@@ -114,10 +115,11 @@ CPU::CPU() {
     };
 
     auto check_valid_cpu = [&]() {
-        cpu_set_t mask;
-        CPU_ZERO(&mask);
+        ov::threading::CpuSet mask;
+        int ncpus = 0;
+        std::tie(mask, ncpus) = ov::threading::get_process_mask();
 
-        if ((_processors == 0) || (sched_getaffinity(0, sizeof(cpu_set_t), &mask) == -1)) {
+        if ((_processors == 0) || mask == nullptr) {
             return -1;
         }
 
@@ -128,7 +130,7 @@ CPU::CPU() {
 
         numa_node_list.assign(_sockets, std::vector<int>());
         for (int i = 0; i < _processors; i++) {
-            if (CPU_ISSET(i, &mask)) {
+            if (CPU_ISSET(i, mask)) {
                 valid_cpu_mapping_table.emplace_back(_cpu_mapping_table[i]);
                 if (_cpu_mapping_table[i][CPU_MAP_CORE_TYPE] == MAIN_CORE_PROC) {
                     phy_core_list.emplace_back(_cpu_mapping_table[i][CPU_MAP_CORE_ID]);
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
index 6ebec87feccba1..9de1eeb78e1547 100644
--- a/src/inference/src/system_conf.cpp
+++ b/src/inference/src/system_conf.cpp
@@ -327,26 +327,10 @@ int get_org_numa_id(int numa_node_id) {
 #    ifndef _WIN32
 int get_number_of_cpu_cores(bool bigCoresOnly) {
     CPU& cpu = cpu_info();
-    unsigned numberOfProcessors = cpu._processors;
     unsigned totalNumberOfCpuCores = cpu._cores;
     OPENVINO_ASSERT(totalNumberOfCpuCores != 0, "Total number of cpu cores can not be 0.");
-    cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet;
-    CPU_ZERO(&currentCpuSet);
-    CPU_ZERO(&usedCoreSet);
-    CPU_ZERO(&currentCoreSet);
-
-    sched_getaffinity(0, sizeof(currentCpuSet), &currentCpuSet);
-
-    for (unsigned processorId = 0u; processorId < numberOfProcessors; processorId++) {
-        if (CPU_ISSET(processorId, &currentCpuSet)) {
-            unsigned coreId = processorId % totalNumberOfCpuCores;
-            if (!CPU_ISSET(coreId, &usedCoreSet)) {
-                CPU_SET(coreId, &usedCoreSet);
-                CPU_SET(processorId, &currentCoreSet);
-            }
-        }
-    }
-    int phys_cores = CPU_COUNT(&currentCoreSet);
+
+    int phys_cores = totalNumberOfCpuCores;
 #        if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
     auto core_types = custom::info::core_types();
     if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {