From 5e1833dad1e65f2b06b2ecf73a0d256e6ea074bb Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Mon, 21 Oct 2024 10:56:22 +0200
Subject: [PATCH 01/22] [TRANSFROMATIONS] Add support for 'input_embeds'input
 in SDPAToPA

Add support for 'input_embeds' input in SDPAToPA transformation.
The input is used in VLM instead of 'input_ids' in text-only models.

The changes enable support of the SDPAToPA transformation for the
following models:
 * llava-hf/llava-1.5-7b-hf
 * llava-hf/llava-v1.6-mistral-7b-hf
 * llava-hf/llava-v1.6-vicuna-7b-hf
 * llava-hf/llama3-llava-next-8b-hf
 * openbmb/MiniCPM-V-2_6

Signed-off-by: Andrii Staikov <andrii.staikov@intel.com>

- Tickets:
 * CVS-152288
---
 src/core/src/pass/sdpa_to_paged_attention.cpp | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index 8b19b07f2f5d76..457f2a7138716c 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -53,8 +53,21 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
 
     auto sliding_window = v0::Constant::create(element::i32, Shape{}, {0});  // sliding_window
 
+    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model, const std::string& name) -> bool {
+        for (auto& t : model->inputs()) {
+            const auto& names = t.get_names();
+            if (names.find(name) != names.end()) {
+                return true;
+            }
+        }
+
+        return false;
+    };
+
+    auto input_ids_name = has_parameter(model, "input_ids") ? "input_ids" : "inputs_embeds";
+
     std::shared_ptr<v0::Parameter> input_ids_node =
-        std::dynamic_pointer_cast<v0::Parameter>(model->input("input_ids").get_node_shared_ptr());
+        std::dynamic_pointer_cast<v0::Parameter>(model->input(input_ids_name).get_node_shared_ptr());
     input_ids_node->set_partial_shape(PartialShape{-1});
     auto unsqueezed_input_ids =
         std::make_shared<v0::Unsqueeze>(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1}));
@@ -66,17 +79,6 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
     auto prev_max_seq_len =
         std::make_shared<v1::Subtract>(max_context_len, std::make_shared<v0::Convert>(cur_seq_len, element::i32));
 
-    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model, const std::string& name) -> bool {
-        for (auto& t : model->inputs()) {
-            const auto& names = t.get_names();
-            if (names.find(name) != names.end()) {
-                return true;
-            }
-        }
-
-        return false;
-    };
-
     ParameterVector kv_parameters;
     ParameterVector parameters_to_remove;
     ResultVector results_to_remove;  // # used, but cannot really track all Results in stateless model

From ae8902ca270d1801cb2069819935a063965857f8 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 6 Nov 2024 12:39:05 +0100
Subject: [PATCH 02/22] Force push to trigger update


From 96a7f59efa4cb249bd62c63d578da3a806c54e93 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 6 Nov 2024 12:44:53 +0100
Subject: [PATCH 03/22] added tests

---
 .../generate_ref_diffs.py                     | 46 +++++++++++---
 .../models/hf-tiny-random-vl-models-precommit |  2 +
 .../transformation_tests/sdpa2pa_ref_diff.py  | 28 +++++++++
 .../test_pa_transformation.py                 | 61 +++++++++++++++----
 4 files changed, 118 insertions(+), 19 deletions(-)
 create mode 100644 tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit

diff --git a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
index 6823256b3ccfc5..a5ed450e0af898 100644
--- a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
+++ b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
@@ -43,9 +43,36 @@
 from openvino._offline_transformations import paged_attention_transformation
 from openvino._pyopenvino.op import _PagedAttentionExtension, Parameter, Result
 from optimum.intel import OVModelForCausalLM
+from optimum.intel.openvino import OVModelForVisualCausalLM
+from typing import Type, Union
 
 nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign")
 
+def get_models_list_type(file_name: str, cls: Union[Type[OVModelForCausalLM], Type[OVModelForVisualCausalLM]]):
+    models = []
+    for line_items in utils.parse_list_file(file_name):
+        if len(line_items) == 2:
+            model_name, model_link = line_items
+            models.append((model_name, model_link, None, None, cls))
+        elif len(line_items) == 4:
+            model_name, model_link, mark, reason = line_items
+            models.append((model_name, model_link, mark, reason))
+        elif len(line_items) > 4:
+            model_name, model_link, mark, reason = line_items[:4]
+            if not mark:
+                mark = None
+            if not reason:
+                reason = None
+            other = line_items[4:]
+            transformations = [item[8:] for item in other if item.startswith('ts_name:')]
+            layers = [item[6:] for item in other if item.startswith('layer:')]
+            models.append((model_name, model_link, mark, reason, transformations, layers))
+        else:
+            items = ','.join(line_items)
+            assert False, \
+                f'Incorrect model info fields {items}. It must contain either 2 or 4 or more than 4 fields.'
+    return models
+
 def main():
     use_cache_eviction = False
     if len(sys.argv) >= 2:
@@ -55,32 +82,37 @@ def main():
 
     if OUTPUT_FILE.exists() and OUTPUT_FILE.is_file():
         OUTPUT_FILE.unlink()
-    
+
     with open(OUTPUT_FILE, 'w') as file:
-        model_list = utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))
+        model_list = get_models_list_type(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"), OVModelForCausalLM)
+        model_list.extend(get_models_list_type(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"), OVModelForVisualCausalLM))
         print(OUTPUT_FILE)
         print('ref_diff_map_cache_eviction = {' if use_cache_eviction else 'ref_diff_map = {', file=file)
 
-        for model_id, _, _, _ in model_list:
+        for model_id, _, _, _, cls in model_list:
             # wrapping in try/catch block to continue printing models even if one has failed
             try:
-                model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True)
+                model = cls.from_pretrained(model_id, export=True, trust_remote_code=True)
             except:
+                print(f"Couldn't read {model_id}.")
                 continue
 
+            ov_model = model.model if cls is OVModelForCausalLM else model.lm_model
+
             before_map = {}
-            for op in model.model.get_ordered_ops():
+            for op in ov_model.get_ordered_ops():
                 if op.get_type_name() in nodes_to_compare:
                     before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1
 
             # wrapping in try/catch block to continue printing models even if one has failed
             try:
-                paged_attention_transformation(model.model, use_cache_eviction, use_cache_eviction)
+                paged_attention_transformation(ov_model, use_cache_eviction, use_cache_eviction)
             except:
+                print(f"Couldn't run SDPAToPA transformation on {model_id} and generate diffs.")
                 continue
 
             after_map = {}
-            for op in model.model.get_ordered_ops():
+            for op in ov_model.get_ordered_ops():
                 if op.get_type_name() in nodes_to_compare:
                     after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1
 
diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
new file mode 100644
index 00000000000000..02bf0dd1d4d7c6
--- /dev/null
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
@@ -0,0 +1,2 @@
+katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next
+katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6
\ No newline at end of file
diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
index 23af913d9d102f..c2ab0ea51b887e 100644
--- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
+++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
@@ -305,6 +305,20 @@
 		"ReadValue" : -12,
 		"Assign" : -12,
 	},
+    "katuni4ka/tiny-random-llava-next" : {
+        "PagedAttentionExtension" : 2,
+        "Parameter" : 7,
+        "ReadValue" : -4,
+        "ScaledDotProductAttention" : -2,
+        "Assign" : -4,
+    },
+    "katuni4ka/tiny-random-minicpmv-2_6" : {
+            "PagedAttentionExtension" : 2,
+            "Parameter" : 7,
+            "ReadValue" : -4,
+            "ScaledDotProductAttention" : -2,
+            "Assign" : -4,
+    },
 }
 
 ref_diff_map_cache_eviction = {
@@ -609,4 +623,18 @@
 		"Parameter" : 20,
 		"Assign" : -12,
 	},
+    "katuni4ka/tiny-random-llava-next" : {
+            "Parameter" : 8,
+            "Assign" : -4,
+            "ReadValue" : -4,
+            "PagedAttentionExtension" : 2,
+            "ScaledDotProductAttention" : -2,
+    },
+    "katuni4ka/tiny-random-minicpmv-2_6" : {
+            "Parameter" : 8,
+            "Assign" : -4,
+            "ReadValue" : -4,
+            "PagedAttentionExtension" : 2,
+            "ScaledDotProductAttention" : -2,
+    },
 }
diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
index 02481439818f28..2bc6726dff030f 100644
--- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
+++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
@@ -4,6 +4,9 @@
 from openvino._offline_transformations import paged_attention_transformation
 from openvino._pyopenvino.op import _PagedAttentionExtension
 from optimum.intel import OVModelForCausalLM
+from optimum.intel.openvino import OVModelForVisualCausalLM
+from typing import Type, Union
+import openvino as ov
 from models_hub_common.utils import retry
 import models_hub_common.utils as utils
 from sdpa2pa_ref_diff import ref_diff_map, ref_diff_map_cache_eviction, nodes_to_compare
@@ -11,19 +14,19 @@
 import os
 import re
 
-@retry(3, exceptions=(OSError,), delay=1)
-def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_outputs):
-    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True)
-
+def compare_diffs(ov_model: ov.Model,
+                  model_id: str,
+                  use_block_indices_inputs: bool,
+                  use_score_outputs: bool):
     before_map = {}
-    for op in model.model.get_ordered_ops():
+    for op in ov_model.get_ordered_ops():
         if op.get_type_name() in nodes_to_compare:
             before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1
 
-    paged_attention_transformation(model.model, use_block_indices_inputs, use_score_outputs)
+    paged_attention_transformation(ov_model, use_block_indices_inputs, use_score_outputs)
 
     after_map = {}
-    for op in model.model.get_ordered_ops():
+    for op in ov_model.get_ordered_ops():
         if op.get_type_name() in nodes_to_compare:
             after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1
 
@@ -38,7 +41,7 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o
 
     assert reference_map == resulting_map
 
-    model_inputs = model.model.inputs
+    model_inputs = ov_model.inputs
     for input in model_inputs:
         names = list(input.get_names()) # names stored in as set (in this case usually of 1 element)
         for name in names:
@@ -53,7 +56,7 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o
         block_indices_pattern = r'block_indices\.[0-9]+'
         block_indices_counter = 0
 
-        model_inputs = model.model.inputs
+        model_inputs = ov_model.inputs
         for input in model_inputs:
             for name in list(input.get_names()):
                 if re.search(block_indices_pattern, name):
@@ -66,7 +69,7 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o
         score_pattern = r'scores\.[0-9]+'
         score_outputs_counter = 0
 
-        model_outputs = model.model.outputs
+        model_outputs = ov_model.outputs
         for output in model_outputs:
             for name in list(output.get_names()):
                 if re.search(score_pattern, name):
@@ -75,6 +78,18 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o
         assert block_indices_counter == resulting_map["PagedAttentionExtension"], \
                f"The number of scores outputs doesn't correspond to the expected value. Expected {resulting_map['PagedAttentionExtension']}, received {block_indices_counter}"
 
+@retry(3, exceptions=(OSError,), delay=1)
+def run_pa(tmp_path,
+           model_id,
+           model_link,
+           cls: Union[Type[OVModelForCausalLM], Type[OVModelForVisualCausalLM]],
+           use_block_indices_inputs,
+           use_score_outputs):
+    model = cls.from_pretrained(model_id, export=True, trust_remote_code=True)
+    ov_model = model.model if cls is OVModelForCausalLM else model.lm_model
+
+    compare_diffs(ov_model, model_id, use_block_indices_inputs, use_score_outputs)
+
 @pytest.mark.precommit
 @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
 def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device):
@@ -84,7 +99,7 @@ def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device)
         pytest.skip(reason)
     elif mark == 'xfail':
         pytest.xfail(reason)
-    run_pa(tmp_path, model_name, model_link, False, False)
+    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False)
 
 @pytest.mark.precommit
 @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
@@ -95,4 +110,26 @@ def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark,
         pytest.skip(reason)
     elif mark == 'xfail':
         pytest.xfail(reason)
-    run_pa(tmp_path, model_name, model_link, True, True)
\ No newline at end of file
+    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True)
+
+@pytest.mark.precommit
+@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
+def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device):
+    assert mark is None or mark == 'skip' or mark == 'xfail', \
+        "Incorrect test case: {}, {}".format(model_name, model_link)
+    if mark == 'skip':
+        pytest.skip(reason)
+    elif mark == 'xfail':
+        pytest.xfail(reason)
+    run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, False, False)
+
+@pytest.mark.precommit
+@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
+def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
+    assert mark is None or mark == 'skip' or mark == 'xfail', \
+        "Incorrect test case: {}, {}".format(model_name, model_link)
+    if mark == 'skip':
+        pytest.skip(reason)
+    elif mark == 'xfail':
+        pytest.xfail(reason)
+    run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True)
\ No newline at end of file

From be847341f75e05d8d1b42c564a495aa1a8e2b272 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 12 Nov 2024 18:38:26 +0100
Subject: [PATCH 04/22] added more models

---
 src/core/src/pass/sdpa_to_paged_attention.cpp | 54 ++++++------
 .../models/hf-tiny-random-models-precommit    | 86 +++++++++----------
 .../models/hf-tiny-random-vl-models-precommit |  6 +-
 .../transformation_tests/sdpa2pa_ref_diff.py  | 28 ++++++
 .../test_pa_transformation.py                 | 64 +++++++-------
 5 files changed, 134 insertions(+), 104 deletions(-)

diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index 457f2a7138716c..1b738971cb38dc 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -53,15 +53,21 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
 
     auto sliding_window = v0::Constant::create(element::i32, Shape{}, {0});  // sliding_window
 
-    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model, const std::string& name) -> bool {
-        for (auto& t : model->inputs()) {
-            const auto& names = t.get_names();
+    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model, const std::string& name) -> std::shared_ptr<v0::Parameter> {
+        for (auto& param : model->inputs()) {
+            const auto& names = param.get_names();
             if (names.find(name) != names.end()) {
-                return true;
+                if (auto casted_param = std::dynamic_pointer_cast<v0::Parameter>(param.get_node_shared_ptr())) {
+                    return casted_param;
+                } else {
+                    OPENVINO_THROW("The model is in the inconsistent state. Found input '",
+                                   name,
+                                   "', but couldn't cast it to v0::Parameter.");
+                }
             }
         }
 
-        return false;
+        return nullptr;
     };
 
     auto input_ids_name = has_parameter(model, "input_ids") ? "input_ids" : "inputs_embeds";
@@ -138,30 +144,22 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
     }
 
     for (auto& param_name : {"beam_idx", "attention_mask"}) {
-        if (has_parameter(model, param_name)) {
-            if (const auto& param =
-                    std::dynamic_pointer_cast<v0::Parameter>(model->input(param_name).get_node_shared_ptr())) {
-                model->remove_parameter(param);
-
-                if (param->output(0).get_target_inputs().size() == 0) {
-                    std::stringstream consumers;
-                    consumers << std::endl;
-                    for (auto& input : param->output(0).get_target_inputs()) {
-                        consumers << *input.get_node() << std::endl;
-                    }
-                    OPENVINO_ASSERT(param->output(0).get_target_inputs().size() == 0,
-                                    "PagedAttention transformation failed: couldn't remove ",
-                                    param->output(0).get_target_inputs().size(),
-                                    " inputs of ",
-                                    param_name,
-                                    " input: ",
-                                    consumers.str());
+        if (auto param = has_parameter(model, param_name)) {
+            model->remove_parameter(param);
+
+            if (param->output(0).get_target_inputs().size() == 0) {
+                std::stringstream consumers;
+                consumers << std::endl;
+                for (auto& input : param->output(0).get_target_inputs()) {
+                    consumers << *input.get_node() << std::endl;
                 }
-            } else {
-                OPENVINO_THROW("The model is in the inconsistent state. Found input '",
-                               param_name,
-                               "', but couldn't cast it to v0::Parameter.");
-                return false;
+                OPENVINO_ASSERT(param->output(0).get_target_inputs().size() == 0,
+                                "PagedAttention transformation failed: couldn't remove ",
+                                param->output(0).get_target_inputs().size(),
+                                " inputs of ",
+                                param_name,
+                                " input: ",
+                                consumers.str());
             }
         }
     }
diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
index 7c89c451ea4be5..c0145f6a773c21 100644
--- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
@@ -1,43 +1,43 @@
-hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM
-hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM
-hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM
-hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM
-hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM
-hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM
-hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM
-hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny
-hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM
-hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM
-hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM
-hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2
-hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM
-hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM
-hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM
-hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM
-hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM
-hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM
-hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM
-hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM
-katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse
-katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b
-katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen
-katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat
-katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2
-katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe
-katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2
-katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf
-katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2
-katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais
-katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm
-katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2
-katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm
-katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b
-katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx
-fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM
-fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2
-fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing
-Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM
-facebook/opt-125m,https://huggingface.co/facebook/opt-125m
-facebook/opt-350m,https://huggingface.co/facebook/opt-350m
-katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2
-katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
\ No newline at end of file
+#hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM
+#hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM
+#hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM
+#hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM
+#hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM
+#hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM
+#hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM
+#hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny
+#hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM
+#hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM
+#hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM
+#hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2
+#hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM
+#hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM
+#hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM
+#hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM
+#hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM
+#hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM
+#hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM
+#hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM
+#katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse
+#katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b
+#katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen
+#katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat
+#katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2
+#katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe
+#katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2
+#katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf
+#katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2
+#katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais
+#katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm
+#katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2
+#katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm
+#katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b
+#katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx
+#fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM
+#fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2
+#fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing
+#Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM
+#facebook/opt-125m,https://huggingface.co/facebook/opt-125m
+#facebook/opt-350m,https://huggingface.co/facebook/opt-350m
+#katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2
+#katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
\ No newline at end of file
diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
index 02bf0dd1d4d7c6..87f7aa99d5534b 100644
--- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
@@ -1,2 +1,4 @@
-katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next
-katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6
\ No newline at end of file
+#katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next
+#katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6
+katuni4ka/tiny-random-llava,https://huggingface.co/katuni4ka/tiny-random-llava
+katuni4ka/tiny-random-nanollava,https://huggingface.co/katuni4ka/tiny-random-nanollava
\ No newline at end of file
diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
index c2ab0ea51b887e..f704bb9d45f210 100644
--- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
+++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
@@ -319,6 +319,20 @@
             "ScaledDotProductAttention" : -2,
             "Assign" : -4,
     },
+    "katuni4ka/tiny-random-llava" : {
+            "Assign" : -4,
+            "Parameter" : 7,
+            "ReadValue" : -4,
+            "ScaledDotProductAttention" : -2,
+            "PagedAttentionExtension" : 2,
+    },
+    "katuni4ka/tiny-random-nanollava" : {
+            "Assign" : -4,
+            "Parameter" : 7,
+            "ReadValue" : -4,
+            "ScaledDotProductAttention" : -2,
+            "PagedAttentionExtension" : 2,
+    },
 }
 
 ref_diff_map_cache_eviction = {
@@ -637,4 +651,18 @@
             "PagedAttentionExtension" : 2,
             "ScaledDotProductAttention" : -2,
     },
+    "katuni4ka/tiny-random-llava" : {
+        "ReadValue" : -4,
+        "Parameter" : 8,
+        "ScaledDotProductAttention" : -2,
+        "PagedAttentionExtension" : 2,
+        "Assign" : -4,
+	},
+	"katuni4ka/tiny-random-nanollava" : {
+        "ReadValue" : -4,
+        "Parameter" : 8,
+        "ScaledDotProductAttention" : -2,
+        "PagedAttentionExtension" : 2,
+        "Assign" : -4,
+	},
 }
diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
index 2bc6726dff030f..19a04361e0a669 100644
--- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
+++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
@@ -90,31 +90,33 @@ def run_pa(tmp_path,
 
     compare_diffs(ov_model, model_id, use_block_indices_inputs, use_score_outputs)
 
-@pytest.mark.precommit
-@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
-def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device):
-    assert mark is None or mark == 'skip' or mark == 'xfail', \
-        "Incorrect test case: {}, {}".format(model_name, model_link)
-    if mark == 'skip':
-        pytest.skip(reason)
-    elif mark == 'xfail':
-        pytest.xfail(reason)
-    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False)
-
-@pytest.mark.precommit
-@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
-def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
-    assert mark is None or mark == 'skip' or mark == 'xfail', \
-        "Incorrect test case: {}, {}".format(model_name, model_link)
-    if mark == 'skip':
-        pytest.skip(reason)
-    elif mark == 'xfail':
-        pytest.xfail(reason)
-    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True)
+#@pytest.mark.precommit
+#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
+#def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device):
+#    assert mark is None or mark == 'skip' or mark == 'xfail', \
+#        "Incorrect test case: {}, {}".format(model_name, model_link)
+#    if mark == 'skip':
+#        pytest.skip(reason)
+#    elif mark == 'xfail':
+#        pytest.xfail(reason)
+#    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False)
+#
+#@pytest.mark.precommit
+#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
+#def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
+#    assert mark is None or mark == 'skip' or mark == 'xfail', \
+#        "Incorrect test case: {}, {}".format(model_name, model_link)
+#    if mark == 'skip':
+#        pytest.skip(reason)
+#    elif mark == 'xfail':
+#        pytest.xfail(reason)
+#    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True)
 
 @pytest.mark.precommit
 @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
 def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device):
+    if ie_device == 'GPU':
+        pytest.skip("SKIPPING GPU")
     assert mark is None or mark == 'skip' or mark == 'xfail', \
         "Incorrect test case: {}, {}".format(model_name, model_link)
     if mark == 'skip':
@@ -123,13 +125,13 @@ def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device):
         pytest.xfail(reason)
     run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, False, False)
 
-@pytest.mark.precommit
-@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
-def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
-    assert mark is None or mark == 'skip' or mark == 'xfail', \
-        "Incorrect test case: {}, {}".format(model_name, model_link)
-    if mark == 'skip':
-        pytest.skip(reason)
-    elif mark == 'xfail':
-        pytest.xfail(reason)
-    run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True)
\ No newline at end of file
+#@pytest.mark.precommit
+#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
+#def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
+#    assert mark is None or mark == 'skip' or mark == 'xfail', \
+#        "Incorrect test case: {}, {}".format(model_name, model_link)
+#    if mark == 'skip':
+#        pytest.skip(reason)
+#    elif mark == 'xfail':
+#        pytest.xfail(reason)
+#    run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True)
\ No newline at end of file

From e41adc675a3ade813a1766b134b6fa7c282701c3 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 12 Nov 2024 18:49:59 +0100
Subject: [PATCH 05/22] uncomment

---
 .../models/hf-tiny-random-models-precommit    | 86 +++++++++----------
 .../models/hf-tiny-random-vl-models-precommit |  4 +-
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
index c0145f6a773c21..7c89c451ea4be5 100644
--- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
@@ -1,43 +1,43 @@
-#hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM
-#hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM
-#hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM
-#hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM
-#hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM
-#hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM
-#hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM
-#hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny
-#hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM
-#hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM
-#hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM
-#hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2
-#hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM
-#hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM
-#hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM
-#hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM
-#hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM
-#hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM
-#hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM
-#hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM
-#katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse
-#katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b
-#katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen
-#katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat
-#katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2
-#katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe
-#katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2
-#katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf
-#katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2
-#katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais
-#katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm
-#katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2
-#katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm
-#katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b
-#katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx
-#fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM
-#fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2
-#fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing
-#Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM
-#facebook/opt-125m,https://huggingface.co/facebook/opt-125m
-#facebook/opt-350m,https://huggingface.co/facebook/opt-350m
-#katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2
-#katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
\ No newline at end of file
+hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM
+hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM
+hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM
+hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM
+hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM
+hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM
+hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM
+hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny
+hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM
+hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM
+hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM
+hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2
+hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM
+hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM
+hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM
+hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM
+hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM
+hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM
+hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM
+hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM
+katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse
+katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b
+katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen
+katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat
+katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2
+katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe
+katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2
+katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf
+katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2
+katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais
+katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm
+katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2
+katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm
+katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b
+katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx
+fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM
+fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2
+fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing
+Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM
+facebook/opt-125m,https://huggingface.co/facebook/opt-125m
+facebook/opt-350m,https://huggingface.co/facebook/opt-350m
+katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2
+katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
\ No newline at end of file
diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
index 87f7aa99d5534b..7cdd3fdb3527be 100644
--- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit
@@ -1,4 +1,4 @@
-#katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next
-#katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6
+katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next
+katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6
 katuni4ka/tiny-random-llava,https://huggingface.co/katuni4ka/tiny-random-llava
 katuni4ka/tiny-random-nanollava,https://huggingface.co/katuni4ka/tiny-random-nanollava
\ No newline at end of file

From f1a3f5f6fcb68696c16228ce25be410a837ad9bd Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 12 Nov 2024 18:51:02 +0100
Subject: [PATCH 06/22] uncomment v2

---
 .../test_pa_transformation.py                 | 64 +++++++++----------
 1 file changed, 31 insertions(+), 33 deletions(-)

diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
index 19a04361e0a669..2bc6726dff030f 100644
--- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
+++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py
@@ -90,33 +90,31 @@ def run_pa(tmp_path,
 
     compare_diffs(ov_model, model_id, use_block_indices_inputs, use_score_outputs)
 
-#@pytest.mark.precommit
-#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
-#def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device):
-#    assert mark is None or mark == 'skip' or mark == 'xfail', \
-#        "Incorrect test case: {}, {}".format(model_name, model_link)
-#    if mark == 'skip':
-#        pytest.skip(reason)
-#    elif mark == 'xfail':
-#        pytest.xfail(reason)
-#    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False)
-#
-#@pytest.mark.precommit
-#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
-#def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
-#    assert mark is None or mark == 'skip' or mark == 'xfail', \
-#        "Incorrect test case: {}, {}".format(model_name, model_link)
-#    if mark == 'skip':
-#        pytest.skip(reason)
-#    elif mark == 'xfail':
-#        pytest.xfail(reason)
-#    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True)
+@pytest.mark.precommit
+@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
+def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device):
+    assert mark is None or mark == 'skip' or mark == 'xfail', \
+        "Incorrect test case: {}, {}".format(model_name, model_link)
+    if mark == 'skip':
+        pytest.skip(reason)
+    elif mark == 'xfail':
+        pytest.xfail(reason)
+    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False)
+
+@pytest.mark.precommit
+@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")))
+def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
+    assert mark is None or mark == 'skip' or mark == 'xfail', \
+        "Incorrect test case: {}, {}".format(model_name, model_link)
+    if mark == 'skip':
+        pytest.skip(reason)
+    elif mark == 'xfail':
+        pytest.xfail(reason)
+    run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True)
 
 @pytest.mark.precommit
 @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
 def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device):
-    if ie_device == 'GPU':
-        pytest.skip("SKIPPING GPU")
     assert mark is None or mark == 'skip' or mark == 'xfail', \
         "Incorrect test case: {}, {}".format(model_name, model_link)
     if mark == 'skip':
@@ -125,13 +123,13 @@ def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device):
         pytest.xfail(reason)
     run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, False, False)
 
-#@pytest.mark.precommit
-#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
-#def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
-#    assert mark is None or mark == 'skip' or mark == 'xfail', \
-#        "Incorrect test case: {}, {}".format(model_name, model_link)
-#    if mark == 'skip':
-#        pytest.skip(reason)
-#    elif mark == 'xfail':
-#        pytest.xfail(reason)
-#    run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True)
\ No newline at end of file
+@pytest.mark.precommit
+@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit")))
+def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device):
+    assert mark is None or mark == 'skip' or mark == 'xfail', \
+        "Incorrect test case: {}, {}".format(model_name, model_link)
+    if mark == 'skip':
+        pytest.skip(reason)
+    elif mark == 'xfail':
+        pytest.xfail(reason)
+    run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True)
\ No newline at end of file

From c461407c732c74435030c0800b2ceddd51cc5fb3 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 12 Nov 2024 18:53:38 +0100
Subject: [PATCH 07/22] fix ident

---
 .../transformation_tests/sdpa2pa_ref_diff.py  | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
index f704bb9d45f210..85b4fd860dda53 100644
--- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
+++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
@@ -638,18 +638,18 @@
 		"Assign" : -12,
 	},
     "katuni4ka/tiny-random-llava-next" : {
-            "Parameter" : 8,
-            "Assign" : -4,
-            "ReadValue" : -4,
-            "PagedAttentionExtension" : 2,
-            "ScaledDotProductAttention" : -2,
+        "Parameter" : 8,
+        "Assign" : -4,
+        "ReadValue" : -4,
+        "PagedAttentionExtension" : 2,
+        "ScaledDotProductAttention" : -2,
     },
     "katuni4ka/tiny-random-minicpmv-2_6" : {
-            "Parameter" : 8,
-            "Assign" : -4,
-            "ReadValue" : -4,
-            "PagedAttentionExtension" : 2,
-            "ScaledDotProductAttention" : -2,
+        "Parameter" : 8,
+        "Assign" : -4,
+        "ReadValue" : -4,
+        "PagedAttentionExtension" : 2,
+        "ScaledDotProductAttention" : -2,
     },
     "katuni4ka/tiny-random-llava" : {
         "ReadValue" : -4,

From 8f7e81af950071582b8969dc264c8a70724f154f Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 12 Nov 2024 18:57:19 +0100
Subject: [PATCH 08/22] ident

---
 .../transformation_tests/sdpa2pa_ref_diff.py  | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
index 85b4fd860dda53..acc46ab936f146 100644
--- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
+++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
@@ -313,25 +313,25 @@
         "Assign" : -4,
     },
     "katuni4ka/tiny-random-minicpmv-2_6" : {
-            "PagedAttentionExtension" : 2,
-            "Parameter" : 7,
-            "ReadValue" : -4,
-            "ScaledDotProductAttention" : -2,
-            "Assign" : -4,
+        "PagedAttentionExtension" : 2,
+        "Parameter" : 7,
+        "ReadValue" : -4,
+        "ScaledDotProductAttention" : -2,
+        "Assign" : -4,
     },
     "katuni4ka/tiny-random-llava" : {
-            "Assign" : -4,
-            "Parameter" : 7,
-            "ReadValue" : -4,
-            "ScaledDotProductAttention" : -2,
-            "PagedAttentionExtension" : 2,
+        "Assign" : -4,
+        "Parameter" : 7,
+        "ReadValue" : -4,
+        "ScaledDotProductAttention" : -2,
+        "PagedAttentionExtension" : 2,
     },
     "katuni4ka/tiny-random-nanollava" : {
-            "Assign" : -4,
-            "Parameter" : 7,
-            "ReadValue" : -4,
-            "ScaledDotProductAttention" : -2,
-            "PagedAttentionExtension" : 2,
+        "Assign" : -4,
+        "Parameter" : 7,
+        "ReadValue" : -4,
+        "ScaledDotProductAttention" : -2,
+        "PagedAttentionExtension" : 2,
     },
 }
 

From d2558222735c3ba8b99ba464f4eeb7940520c536 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Tue, 12 Nov 2024 19:04:04 +0100
Subject: [PATCH 09/22] new ident

---
 .../transformation_tests/sdpa2pa_ref_diff.py  | 120 +++++++++---------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
index acc46ab936f146..01112e254fed99 100644
--- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
+++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py
@@ -305,34 +305,34 @@
 		"ReadValue" : -12,
 		"Assign" : -12,
 	},
-    "katuni4ka/tiny-random-llava-next" : {
-        "PagedAttentionExtension" : 2,
-        "Parameter" : 7,
-        "ReadValue" : -4,
-        "ScaledDotProductAttention" : -2,
-        "Assign" : -4,
-    },
-    "katuni4ka/tiny-random-minicpmv-2_6" : {
-        "PagedAttentionExtension" : 2,
-        "Parameter" : 7,
-        "ReadValue" : -4,
-        "ScaledDotProductAttention" : -2,
-        "Assign" : -4,
-    },
-    "katuni4ka/tiny-random-llava" : {
-        "Assign" : -4,
-        "Parameter" : 7,
-        "ReadValue" : -4,
-        "ScaledDotProductAttention" : -2,
-        "PagedAttentionExtension" : 2,
-    },
-    "katuni4ka/tiny-random-nanollava" : {
-        "Assign" : -4,
-        "Parameter" : 7,
-        "ReadValue" : -4,
-        "ScaledDotProductAttention" : -2,
-        "PagedAttentionExtension" : 2,
-    },
+	"katuni4ka/tiny-random-llava-next" : {
+		"PagedAttentionExtension" : 2,
+		"Parameter" : 7,
+		"ReadValue" : -4,
+		"ScaledDotProductAttention" : -2,
+		"Assign" : -4,
+	},
+	"katuni4ka/tiny-random-minicpmv-2_6" : {
+		"PagedAttentionExtension" : 2,
+		"Parameter" : 7,
+		"ReadValue" : -4,
+		"ScaledDotProductAttention" : -2,
+		"Assign" : -4,
+	},
+	"katuni4ka/tiny-random-llava" : {
+		"Assign" : -4,
+		"Parameter" : 7,
+		"ReadValue" : -4,
+		"ScaledDotProductAttention" : -2,
+		"PagedAttentionExtension" : 2,
+	},
+	"katuni4ka/tiny-random-nanollava" : {
+		"Assign" : -4,
+		"Parameter" : 7,
+		"ReadValue" : -4,
+		"ScaledDotProductAttention" : -2,
+		"PagedAttentionExtension" : 2,
+	},
 }
 
 ref_diff_map_cache_eviction = {
@@ -560,13 +560,13 @@
 		"Parameter" : 14,
 		"Assign" : -8,
 	},
-    "katuni4ka/tiny-random-minicpm" : {
-        "ScaledDotProductAttention" : -4,
-        "Parameter" : 14,
-        "PagedAttentionExtension" : 4,
-        "ReadValue" : -8,
-        "Assign" : -8,
-    },
+	"katuni4ka/tiny-random-minicpm" : {
+		"ScaledDotProductAttention" : -4,
+		"Parameter" : 14,
+		"PagedAttentionExtension" : 4,
+		"ReadValue" : -8,
+		"Assign" : -8,
+	},
 	"katuni4ka/tiny-random-falcon-40b" : {
 		"ScaledDotProductAttention" : -2,
 		"ReadValue" : -4,
@@ -637,32 +637,32 @@
 		"Parameter" : 20,
 		"Assign" : -12,
 	},
-    "katuni4ka/tiny-random-llava-next" : {
-        "Parameter" : 8,
-        "Assign" : -4,
-        "ReadValue" : -4,
-        "PagedAttentionExtension" : 2,
-        "ScaledDotProductAttention" : -2,
-    },
-    "katuni4ka/tiny-random-minicpmv-2_6" : {
-        "Parameter" : 8,
-        "Assign" : -4,
-        "ReadValue" : -4,
-        "PagedAttentionExtension" : 2,
-        "ScaledDotProductAttention" : -2,
-    },
-    "katuni4ka/tiny-random-llava" : {
-        "ReadValue" : -4,
-        "Parameter" : 8,
-        "ScaledDotProductAttention" : -2,
-        "PagedAttentionExtension" : 2,
-        "Assign" : -4,
+	"katuni4ka/tiny-random-llava-next" : {
+		"Parameter" : 8,
+		"Assign" : -4,
+		"ReadValue" : -4,
+		"PagedAttentionExtension" : 2,
+		"ScaledDotProductAttention" : -2,
+	},
+	"katuni4ka/tiny-random-minicpmv-2_6" : {
+		"Parameter" : 8,
+		"Assign" : -4,
+		"ReadValue" : -4,
+		"PagedAttentionExtension" : 2,
+		"ScaledDotProductAttention" : -2,
+	},
+	"katuni4ka/tiny-random-llava" : {
+		"ReadValue" : -4,
+		"Parameter" : 8,
+		"ScaledDotProductAttention" : -2,
+		"PagedAttentionExtension" : 2,
+		"Assign" : -4,
 	},
 	"katuni4ka/tiny-random-nanollava" : {
-        "ReadValue" : -4,
-        "Parameter" : 8,
-        "ScaledDotProductAttention" : -2,
-        "PagedAttentionExtension" : 2,
-        "Assign" : -4,
+		"ReadValue" : -4,
+		"Parameter" : 8,
+		"ScaledDotProductAttention" : -2,
+		"PagedAttentionExtension" : 2,
+		"Assign" : -4,
 	},
 }

From 8dc6631a628c81076413090a5d8a732455074788 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 10:34:42 +0100
Subject: [PATCH 10/22] code style

---
 src/core/src/pass/sdpa_to_paged_attention.cpp | 3 ++-
 tests/requirements_pytorch                    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index 1b738971cb38dc..1babfc5b4ed80c 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -53,7 +53,8 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
 
     auto sliding_window = v0::Constant::create(element::i32, Shape{}, {0});  // sliding_window
 
-    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model, const std::string& name) -> std::shared_ptr<v0::Parameter> {
+    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model,
+                             const std::string& name) -> std::shared_ptr<v0::Parameter> {
         for (auto& param : model->inputs()) {
             const auto& names = param.get_names();
             if (names.find(name) != names.end()) {
diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index 56446beba12600..0b51189a0ae98a 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -55,3 +55,4 @@ rjieba==0.1.11
 # - katuni4ka/tiny-random-internlm2
 transformers_stream_generator==0.0.5
 einops==0.8.0
+flash_attn==2.6.3
\ No newline at end of file

From a1eef49252e5591f3211e9a7be7f971b6b94aa9d Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 12:17:18 +0100
Subject: [PATCH 11/22] via link

---
 tests/requirements_pytorch | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index 0b51189a0ae98a..dae4e45b721891 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -55,4 +55,5 @@ rjieba==0.1.11
 # - katuni4ka/tiny-random-internlm2
 transformers_stream_generator==0.0.5
 einops==0.8.0
-flash_attn==2.6.3
\ No newline at end of file
+
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
\ No newline at end of file

From 6f63934b3f0e6018929302d71239a3461590c75d Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 12:48:11 +0100
Subject: [PATCH 12/22] install flash_attn separately

---
 .github/workflows/job_pytorch_layer_tests.yml | 5 +++++
 tests/requirements_pytorch                    | 4 +---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index b0eba0a278e582..bc12af7b49ab62 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -118,6 +118,11 @@ jobs:
         run: |
           # pytorch test requirements
           python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_pytorch
+      
+      - name: Install flash_attn module
+        run: |
+          # due to flash_attn issues, it needs to be installed separately from other packages
+          pip install flash_attn
 
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196
diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index dae4e45b721891..f90d43590149da 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -54,6 +54,4 @@ rjieba==0.1.11
 # - katuni4ka/tiny-random-qwen
 # - katuni4ka/tiny-random-internlm2
 transformers_stream_generator==0.0.5
-einops==0.8.0
-
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
\ No newline at end of file
+einops==0.8.0
\ No newline at end of file

From 5b05187226259a0e53f9dff42112c64fa619e37e Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 13:14:05 +0100
Subject: [PATCH 13/22] no-build-isolation

---
 .github/workflows/job_pytorch_layer_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index bc12af7b49ab62..a7ef5210e5e05b 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -122,7 +122,7 @@ jobs:
       - name: Install flash_attn module
         run: |
           # due to flash_attn issues, it needs to be installed separately from other packages
-          pip install flash_attn
+          pip install flash_attn --no-build-isolation
 
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196

From ecb5f474f980e95a4852d36decfbc043d18b7228 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 17:28:51 +0100
Subject: [PATCH 14/22] use another link

---
 .github/workflows/job_pytorch_layer_tests.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index a7ef5210e5e05b..3c49830f073af6 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -122,7 +122,8 @@ jobs:
       - name: Install flash_attn module
         run: |
           # due to flash_attn issues, it needs to be installed separately from other packages
-          pip install flash_attn --no-build-isolation
+          # pip install flash_attn --no-build-isolation
+          pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
 
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196

From aabe792d58151bcaeef335744aba025bd727ff3a Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 18:14:45 +0100
Subject: [PATCH 15/22] use another flash_attn

---
 .github/workflows/job_pytorch_layer_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index 3c49830f073af6..dbf0e1913c3db1 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -123,7 +123,7 @@ jobs:
         run: |
           # due to flash_attn issues, it needs to be installed separately from other packages
           # pip install flash_attn --no-build-isolation
-          pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
+          pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post1/flash_attn-2.7.0.post1+cu12torch2.5cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
 
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196

From aef65609e1ae2bf5d03c4a2827b5fea626b433a9 Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Wed, 13 Nov 2024 18:44:39 +0100
Subject: [PATCH 16/22] skip cuda

---
 .github/workflows/job_pytorch_layer_tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index dbf0e1913c3db1..4eeed787d1678e 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -122,8 +122,8 @@ jobs:
       - name: Install flash_attn module
         run: |
           # due to flash_attn issues, it needs to be installed separately from other packages
-          # pip install flash_attn --no-build-isolation
-          pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post1/flash_attn-2.7.0.post1+cu12torch2.5cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
+          export FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE
+          pip install flash_attn --no-build-isolation
 
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196

From 32aaabbfe503aad518391ef3600494f12c5871bc Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Thu, 14 Nov 2024 10:57:02 +0100
Subject: [PATCH 17/22] remove flash_attn

---
 .github/workflows/job_pytorch_layer_tests.yml | 6 ------
 tests/requirements_pytorch                    | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index 4eeed787d1678e..5f7e678cc37f95 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -119,12 +119,6 @@ jobs:
           # pytorch test requirements
           python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_pytorch
       
-      - name: Install flash_attn module
-        run: |
-          # due to flash_attn issues, it needs to be installed separately from other packages
-          export FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE
-          pip install flash_attn --no-build-isolation
-
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196
         # due to CVS-152795, parallel run is not possible on Windows
diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index f90d43590149da..56446beba12600 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -54,4 +54,4 @@ rjieba==0.1.11
 # - katuni4ka/tiny-random-qwen
 # - katuni4ka/tiny-random-internlm2
 transformers_stream_generator==0.0.5
-einops==0.8.0
\ No newline at end of file
+einops==0.8.0

From 89ad50fd95d5f94822d0f488570efdf22aba278f Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Thu, 14 Nov 2024 12:54:54 +0100
Subject: [PATCH 18/22] use new transformers

---
 .github/workflows/job_pytorch_layer_tests.yml | 2 +-
 tests/requirements_pytorch                    | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index 5f7e678cc37f95..b0eba0a278e582 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -118,7 +118,7 @@ jobs:
         run: |
           # pytorch test requirements
           python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_pytorch
-      
+
       - name: PyTorch Layer Tests
         if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196
         # due to CVS-152795, parallel run is not possible on Windows
diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index 56446beba12600..deb851a96b76b2 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -11,9 +11,7 @@ torchvision==0.20.1; platform_system != "Darwin" or platform_machine != "x86_64"
 torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
 torchaudio==2.5.1; platform_system != "Darwin" or platform_machine != "x86_64"
 torchaudio==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64"
-# transformers 4.45.1 is available
-# but optimum still requires <4.45.0
-transformers==4.44.2
+transformers==4.46.2
 pytest==7.0.1
 pytest-html==4.1.1
 pytest-xdist[psutil]==3.6.1

From c3ca9e44626512fdbf6033e396b2be625474c96e Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Thu, 14 Nov 2024 16:43:57 +0100
Subject: [PATCH 19/22] disable models

---
 src/core/src/pass/sdpa_to_paged_attention.cpp          |  8 ++++----
 .../models/hf-tiny-random-models-precommit             | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index 1babfc5b4ed80c..ee501cbfe20822 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -28,7 +28,7 @@ static std::shared_ptr<v0::Parameter> setName(std::shared_ptr<v0::Parameter> nod
     // Set name for both node and output tensor (should be only one tensor, and any other names will be overriden by a
     // given single name)
     node->set_friendly_name(name);
-    OPENVINO_ASSERT(node->get_output_size() == 1);  // Should I use assert here?
+    OPENVINO_ASSERT(node->get_output_size() == 1);
     node->get_output_tensor(0).set_names({name});
     return node;
 }
@@ -55,10 +55,10 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
 
     auto has_parameter = [=](const std::shared_ptr<ov::Model>& model,
                              const std::string& name) -> std::shared_ptr<v0::Parameter> {
-        for (auto& param : model->inputs()) {
+        for (const auto& param : model->inputs()) {
             const auto& names = param.get_names();
-            if (names.find(name) != names.end()) {
-                if (auto casted_param = std::dynamic_pointer_cast<v0::Parameter>(param.get_node_shared_ptr())) {
+            if (names.count(name)) {
+                if (auto casted_param = ov::as_type_ptr<v0::Parameter>(param.get_node_shared_ptr())) {
                     return casted_param;
                 } else {
                     OPENVINO_THROW("The model is in the inconsistent state. Found input '",
diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
index 7c89c451ea4be5..df8ea51874094b 100644
--- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
@@ -1,8 +1,8 @@
 hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM
 hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM
 hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM
-hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM
-hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM
+hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM,xfail,CVS-157416
+hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLMk
 hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM
 hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM
 hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny
@@ -17,7 +17,7 @@ hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-interna
 hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM
 hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM
 hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM
-hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM
+hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM,xfail,CVS-157416
 katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse
 katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b
 katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen
@@ -37,7 +37,7 @@ fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-
 fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2
 fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing
 Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM
-facebook/opt-125m,https://huggingface.co/facebook/opt-125m
-facebook/opt-350m,https://huggingface.co/facebook/opt-350m
+facebook/opt-125m,https://huggingface.co/facebook/opt-125m,xfail,CVS-157416
+facebook/opt-350m,https://huggingface.co/facebook/opt-350m,xfail,CVS-157416
 katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2
 katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4
\ No newline at end of file

From 8831a8327e24767da0b4428482af357f306165be Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Thu, 14 Nov 2024 16:51:29 +0100
Subject: [PATCH 20/22] fix k

---
 .../transformation_tests/models/hf-tiny-random-models-precommit | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
index df8ea51874094b..0cf588ca25edb4 100644
--- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
+++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit
@@ -2,7 +2,7 @@ hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-inte
 hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM
 hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM
 hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM,xfail,CVS-157416
-hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLMk
+hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM
 hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM
 hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM
 hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny

From 94037c169d94927cb6cd4227919177b44e55829b Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Fri, 15 Nov 2024 10:55:08 +0100
Subject: [PATCH 21/22] minor review concerns

---
 src/core/src/pass/sdpa_to_paged_attention.cpp  | 18 ++++++++++++------
 .../transformation_tests/generate_ref_diffs.py |  2 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index ee501cbfe20822..d52b0f7cc5967d 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -53,7 +53,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
 
     auto sliding_window = v0::Constant::create(element::i32, Shape{}, {0});  // sliding_window
 
-    auto has_parameter = [=](const std::shared_ptr<ov::Model>& model,
+    auto get_parameter = [=](const std::shared_ptr<ov::Model>& model,
                              const std::string& name) -> std::shared_ptr<v0::Parameter> {
         for (const auto& param : model->inputs()) {
             const auto& names = param.get_names();
@@ -71,10 +71,16 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
         return nullptr;
     };
 
-    auto input_ids_name = has_parameter(model, "input_ids") ? "input_ids" : "inputs_embeds";
+    std::shared_ptr<v0::Parameter> input_ids_node;
+    for (const auto& name : {"input_ids", "inputs_embeds"}) {
+        input_ids_node = get_parameter(model, name);
+    }
+
+    if (!input_ids_node) {
+        OPENVINO_THROW("The model doesn't contain input_ids or input_embeds input. Aborting.");
+        return false;
+    }
 
-    std::shared_ptr<v0::Parameter> input_ids_node =
-        std::dynamic_pointer_cast<v0::Parameter>(model->input(input_ids_name).get_node_shared_ptr());
     input_ids_node->set_partial_shape(PartialShape{-1});
     auto unsqueezed_input_ids =
         std::make_shared<v0::Unsqueeze>(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1}));
@@ -93,7 +99,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
     ResultVector score_results;
 
     std::shared_ptr<v0::Parameter> position_ids;
-    if (!has_parameter(model, "position_ids")) {
+    if (!get_parameter(model, "position_ids")) {
         position_ids = setName(std::make_shared<v0::Parameter>(element::i64, PartialShape{-1}), "position_ids");
         model->add_parameters({position_ids});
     } else {
@@ -145,7 +151,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
     }
 
     for (auto& param_name : {"beam_idx", "attention_mask"}) {
-        if (auto param = has_parameter(model, param_name)) {
+        if (auto param = get_parameter(model, param_name)) {
             model->remove_parameter(param);
 
             if (param->output(0).get_target_inputs().size() == 0) {
diff --git a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
index a5ed450e0af898..72051783fa7422 100644
--- a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
+++ b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py
@@ -58,7 +58,7 @@ def get_models_list_type(file_name: str, cls: Union[Type[OVModelForCausalLM], Ty
             model_name, model_link, mark, reason = line_items
             models.append((model_name, model_link, mark, reason))
         elif len(line_items) > 4:
-            model_name, model_link, mark, reason = line_items[:4]
+            model_name, model_link, mark, reason, *other = line_items
             if not mark:
                 mark = None
             if not reason:

From 541b7145ccada7a9b08de62a0ab33d5afadf47dd Mon Sep 17 00:00:00 2001
From: Andrii Staikov <andrii.staikov@intel.com>
Date: Fri, 15 Nov 2024 10:58:23 +0100
Subject: [PATCH 22/22] fast break

---
 src/core/src/pass/sdpa_to_paged_attention.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp
index d52b0f7cc5967d..1c43795151cab7 100644
--- a/src/core/src/pass/sdpa_to_paged_attention.cpp
+++ b/src/core/src/pass/sdpa_to_paged_attention.cpp
@@ -73,7 +73,9 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr<ov::Mode
 
     std::shared_ptr<v0::Parameter> input_ids_node;
     for (const auto& name : {"input_ids", "inputs_embeds"}) {
-        input_ids_node = get_parameter(model, name);
+        if (input_ids_node = get_parameter(model, name)) {
+            break;
+        }
     }
 
     if (!input_ids_node) {