From 5e1833dad1e65f2b06b2ecf73a0d256e6ea074bb Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Mon, 21 Oct 2024 10:56:22 +0200 Subject: [PATCH 01/22] [TRANSFROMATIONS] Add support for 'input_embeds'input in SDPAToPA Add support for 'input_embeds' input in SDPAToPA transformation. The input is used in VLM instead of 'input_ids' in text-only models. The changes enable support of the SDPAToPA transformation for the following models: * llava-hf/llava-1.5-7b-hf * llava-hf/llava-v1.6-mistral-7b-hf * llava-hf/llava-v1.6-vicuna-7b-hf * llava-hf/llama3-llava-next-8b-hf * openbmb/MiniCPM-V-2_6 Signed-off-by: Andrii Staikov - Tickets: * CVS-152288 --- src/core/src/pass/sdpa_to_paged_attention.cpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 8b19b07f2f5d76..457f2a7138716c 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -53,8 +53,21 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr& model, const std::string& name) -> bool { + for (auto& t : model->inputs()) { + const auto& names = t.get_names(); + if (names.find(name) != names.end()) { + return true; + } + } + + return false; + }; + + auto input_ids_name = has_parameter(model, "input_ids") ? "input_ids" : "inputs_embeds"; + std::shared_ptr input_ids_node = - std::dynamic_pointer_cast(model->input("input_ids").get_node_shared_ptr()); + std::dynamic_pointer_cast(model->input(input_ids_name).get_node_shared_ptr()); input_ids_node->set_partial_shape(PartialShape{-1}); auto unsqueezed_input_ids = std::make_shared(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1})); @@ -66,17 +79,6 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr(max_context_len, std::make_shared(cur_seq_len, element::i32)); - auto has_parameter = [=](const std::shared_ptr& model, const std::string& name) -> bool { - for (auto& t : model->inputs()) { - const auto& names = t.get_names(); - if (names.find(name) != names.end()) { - return true; - } - } - - return false; - }; - ParameterVector kv_parameters; ParameterVector parameters_to_remove; ResultVector results_to_remove; // # used, but cannot really track all Results in stateless model From ae8902ca270d1801cb2069819935a063965857f8 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 6 Nov 2024 12:39:05 +0100 Subject: [PATCH 02/22] Force push to trigger update From 96a7f59efa4cb249bd62c63d578da3a806c54e93 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 6 Nov 2024 12:44:53 +0100 Subject: [PATCH 03/22] added tests --- .../generate_ref_diffs.py | 46 +++++++++++--- .../models/hf-tiny-random-vl-models-precommit | 2 + .../transformation_tests/sdpa2pa_ref_diff.py | 28 +++++++++ .../test_pa_transformation.py | 61 +++++++++++++++---- 4 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit diff --git a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py index 6823256b3ccfc5..a5ed450e0af898 100644 --- a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py +++ b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py @@ -43,9 +43,36 @@ from openvino._offline_transformations import paged_attention_transformation from openvino._pyopenvino.op import _PagedAttentionExtension, Parameter, Result from optimum.intel import OVModelForCausalLM +from optimum.intel.openvino import OVModelForVisualCausalLM +from typing import Type, Union nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign") +def get_models_list_type(file_name: str, cls: Union[Type[OVModelForCausalLM], Type[OVModelForVisualCausalLM]]): + models = [] + for line_items in utils.parse_list_file(file_name): + if len(line_items) == 2: + model_name, model_link = line_items + models.append((model_name, model_link, None, None, cls)) + elif len(line_items) == 4: + model_name, model_link, mark, reason = line_items + models.append((model_name, model_link, mark, reason)) + elif len(line_items) > 4: + model_name, model_link, mark, reason = line_items[:4] + if not mark: + mark = None + if not reason: + reason = None + other = line_items[4:] + transformations = [item[8:] for item in other if item.startswith('ts_name:')] + layers = [item[6:] for item in other if item.startswith('layer:')] + models.append((model_name, model_link, mark, reason, transformations, layers)) + else: + items = ','.join(line_items) + assert False, \ + f'Incorrect model info fields {items}. It must contain either 2 or 4 or more than 4 fields.' + return models + def main(): use_cache_eviction = False if len(sys.argv) >= 2: @@ -55,32 +82,37 @@ def main(): if OUTPUT_FILE.exists() and OUTPUT_FILE.is_file(): OUTPUT_FILE.unlink() - + with open(OUTPUT_FILE, 'w') as file: - model_list = utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")) + model_list = get_models_list_type(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"), OVModelForCausalLM) + model_list.extend(get_models_list_type(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"), OVModelForVisualCausalLM)) print(OUTPUT_FILE) print('ref_diff_map_cache_eviction = {' if use_cache_eviction else 'ref_diff_map = {', file=file) - for model_id, _, _, _ in model_list: + for model_id, _, _, _, cls in model_list: # wrapping in try/catch block to continue printing models even if one has failed try: - model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) + model = cls.from_pretrained(model_id, export=True, trust_remote_code=True) except: + print(f"Couldn't read {model_id}.") continue + ov_model = model.model if cls is OVModelForCausalLM else model.lm_model + before_map = {} - for op in model.model.get_ordered_ops(): + for op in ov_model.get_ordered_ops(): if op.get_type_name() in nodes_to_compare: before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1 # wrapping in try/catch block to continue printing models even if one has failed try: - paged_attention_transformation(model.model, use_cache_eviction, use_cache_eviction) + paged_attention_transformation(ov_model, use_cache_eviction, use_cache_eviction) except: + print(f"Couldn't run SDPAToPA transformation on {model_id} and generate diffs.") continue after_map = {} - for op in model.model.get_ordered_ops(): + for op in ov_model.get_ordered_ops(): if op.get_type_name() in nodes_to_compare: after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1 diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit new file mode 100644 index 00000000000000..02bf0dd1d4d7c6 --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit @@ -0,0 +1,2 @@ +katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next +katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6 \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py index 23af913d9d102f..c2ab0ea51b887e 100644 --- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -305,6 +305,20 @@ "ReadValue" : -12, "Assign" : -12, }, + "katuni4ka/tiny-random-llava-next" : { + "PagedAttentionExtension" : 2, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "Assign" : -4, + }, + "katuni4ka/tiny-random-minicpmv-2_6" : { + "PagedAttentionExtension" : 2, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "Assign" : -4, + }, } ref_diff_map_cache_eviction = { @@ -609,4 +623,18 @@ "Parameter" : 20, "Assign" : -12, }, + "katuni4ka/tiny-random-llava-next" : { + "Parameter" : 8, + "Assign" : -4, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + }, + "katuni4ka/tiny-random-minicpmv-2_6" : { + "Parameter" : 8, + "Assign" : -4, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + }, } diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py index 02481439818f28..2bc6726dff030f 100644 --- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py +++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py @@ -4,6 +4,9 @@ from openvino._offline_transformations import paged_attention_transformation from openvino._pyopenvino.op import _PagedAttentionExtension from optimum.intel import OVModelForCausalLM +from optimum.intel.openvino import OVModelForVisualCausalLM +from typing import Type, Union +import openvino as ov from models_hub_common.utils import retry import models_hub_common.utils as utils from sdpa2pa_ref_diff import ref_diff_map, ref_diff_map_cache_eviction, nodes_to_compare @@ -11,19 +14,19 @@ import os import re -@retry(3, exceptions=(OSError,), delay=1) -def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_outputs): - model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) - +def compare_diffs(ov_model: ov.Model, + model_id: str, + use_block_indices_inputs: bool, + use_score_outputs: bool): before_map = {} - for op in model.model.get_ordered_ops(): + for op in ov_model.get_ordered_ops(): if op.get_type_name() in nodes_to_compare: before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1 - paged_attention_transformation(model.model, use_block_indices_inputs, use_score_outputs) + paged_attention_transformation(ov_model, use_block_indices_inputs, use_score_outputs) after_map = {} - for op in model.model.get_ordered_ops(): + for op in ov_model.get_ordered_ops(): if op.get_type_name() in nodes_to_compare: after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1 @@ -38,7 +41,7 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o assert reference_map == resulting_map - model_inputs = model.model.inputs + model_inputs = ov_model.inputs for input in model_inputs: names = list(input.get_names()) # names stored in as set (in this case usually of 1 element) for name in names: @@ -53,7 +56,7 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o block_indices_pattern = r'block_indices\.[0-9]+' block_indices_counter = 0 - model_inputs = model.model.inputs + model_inputs = ov_model.inputs for input in model_inputs: for name in list(input.get_names()): if re.search(block_indices_pattern, name): @@ -66,7 +69,7 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o score_pattern = r'scores\.[0-9]+' score_outputs_counter = 0 - model_outputs = model.model.outputs + model_outputs = ov_model.outputs for output in model_outputs: for name in list(output.get_names()): if re.search(score_pattern, name): @@ -75,6 +78,18 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o assert block_indices_counter == resulting_map["PagedAttentionExtension"], \ f"The number of scores outputs doesn't correspond to the expected value. Expected {resulting_map['PagedAttentionExtension']}, received {block_indices_counter}" +@retry(3, exceptions=(OSError,), delay=1) +def run_pa(tmp_path, + model_id, + model_link, + cls: Union[Type[OVModelForCausalLM], Type[OVModelForVisualCausalLM]], + use_block_indices_inputs, + use_score_outputs): + model = cls.from_pretrained(model_id, export=True, trust_remote_code=True) + ov_model = model.model if cls is OVModelForCausalLM else model.lm_model + + compare_diffs(ov_model, model_id, use_block_indices_inputs, use_score_outputs) + @pytest.mark.precommit @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device): @@ -84,7 +99,7 @@ def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device) pytest.skip(reason) elif mark == 'xfail': pytest.xfail(reason) - run_pa(tmp_path, model_name, model_link, False, False) + run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False) @pytest.mark.precommit @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) @@ -95,4 +110,26 @@ def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, pytest.skip(reason) elif mark == 'xfail': pytest.xfail(reason) - run_pa(tmp_path, model_name, model_link, True, True) \ No newline at end of file + run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True) + +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) +def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, False, False) + +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) +def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True) \ No newline at end of file From be847341f75e05d8d1b42c564a495aa1a8e2b272 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 12 Nov 2024 18:38:26 +0100 Subject: [PATCH 04/22] added more models --- src/core/src/pass/sdpa_to_paged_attention.cpp | 54 ++++++------ .../models/hf-tiny-random-models-precommit | 86 +++++++++---------- .../models/hf-tiny-random-vl-models-precommit | 6 +- .../transformation_tests/sdpa2pa_ref_diff.py | 28 ++++++ .../test_pa_transformation.py | 64 +++++++------- 5 files changed, 134 insertions(+), 104 deletions(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 457f2a7138716c..1b738971cb38dc 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -53,15 +53,21 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr& model, const std::string& name) -> bool { - for (auto& t : model->inputs()) { - const auto& names = t.get_names(); + auto has_parameter = [=](const std::shared_ptr& model, const std::string& name) -> std::shared_ptr { + for (auto& param : model->inputs()) { + const auto& names = param.get_names(); if (names.find(name) != names.end()) { - return true; + if (auto casted_param = std::dynamic_pointer_cast(param.get_node_shared_ptr())) { + return casted_param; + } else { + OPENVINO_THROW("The model is in the inconsistent state. Found input '", + name, + "', but couldn't cast it to v0::Parameter."); + } } } - return false; + return nullptr; }; auto input_ids_name = has_parameter(model, "input_ids") ? "input_ids" : "inputs_embeds"; @@ -138,30 +144,22 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr(model->input(param_name).get_node_shared_ptr())) { - model->remove_parameter(param); - - if (param->output(0).get_target_inputs().size() == 0) { - std::stringstream consumers; - consumers << std::endl; - for (auto& input : param->output(0).get_target_inputs()) { - consumers << *input.get_node() << std::endl; - } - OPENVINO_ASSERT(param->output(0).get_target_inputs().size() == 0, - "PagedAttention transformation failed: couldn't remove ", - param->output(0).get_target_inputs().size(), - " inputs of ", - param_name, - " input: ", - consumers.str()); + if (auto param = has_parameter(model, param_name)) { + model->remove_parameter(param); + + if (param->output(0).get_target_inputs().size() == 0) { + std::stringstream consumers; + consumers << std::endl; + for (auto& input : param->output(0).get_target_inputs()) { + consumers << *input.get_node() << std::endl; } - } else { - OPENVINO_THROW("The model is in the inconsistent state. Found input '", - param_name, - "', but couldn't cast it to v0::Parameter."); - return false; + OPENVINO_ASSERT(param->output(0).get_target_inputs().size() == 0, + "PagedAttention transformation failed: couldn't remove ", + param->output(0).get_target_inputs().size(), + " inputs of ", + param_name, + " input: ", + consumers.str()); } } } diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit index 7c89c451ea4be5..c0145f6a773c21 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit @@ -1,43 +1,43 @@ -hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM -hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM -hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM -hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM -hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM -hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM -hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM -hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny -hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM -hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM -hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM -hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2 -hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM -hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM -hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM -hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM -hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM -hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM -hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM -hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM -katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse -katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b -katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen -katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat -katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2 -katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe -katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2 -katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf -katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2 -katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais -katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm -katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2 -katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm -katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b -katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx -fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM -fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2 -fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing -Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM -facebook/opt-125m,https://huggingface.co/facebook/opt-125m -facebook/opt-350m,https://huggingface.co/facebook/opt-350m -katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 -katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file +#hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM +#hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM +#hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM +#hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM +#hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM +#hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM +#hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM +#hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny +#hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM +#hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM +#hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM +#hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2 +#hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM +#hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM +#hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM +#hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM +#hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM +#hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM +#hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM +#hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM +#katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse +#katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b +#katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen +#katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat +#katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2 +#katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe +#katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2 +#katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf +#katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2 +#katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais +#katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm +#katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2 +#katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm +#katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b +#katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx +#fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM +#fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2 +#fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing +#Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM +#facebook/opt-125m,https://huggingface.co/facebook/opt-125m +#facebook/opt-350m,https://huggingface.co/facebook/opt-350m +#katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 +#katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit index 02bf0dd1d4d7c6..87f7aa99d5534b 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit @@ -1,2 +1,4 @@ -katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next -katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6 \ No newline at end of file +#katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next +#katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6 +katuni4ka/tiny-random-llava,https://huggingface.co/katuni4ka/tiny-random-llava +katuni4ka/tiny-random-nanollava,https://huggingface.co/katuni4ka/tiny-random-nanollava \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py index c2ab0ea51b887e..f704bb9d45f210 100644 --- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -319,6 +319,20 @@ "ScaledDotProductAttention" : -2, "Assign" : -4, }, + "katuni4ka/tiny-random-llava" : { + "Assign" : -4, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + }, + "katuni4ka/tiny-random-nanollava" : { + "Assign" : -4, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + }, } ref_diff_map_cache_eviction = { @@ -637,4 +651,18 @@ "PagedAttentionExtension" : 2, "ScaledDotProductAttention" : -2, }, + "katuni4ka/tiny-random-llava" : { + "ReadValue" : -4, + "Parameter" : 8, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + "Assign" : -4, + }, + "katuni4ka/tiny-random-nanollava" : { + "ReadValue" : -4, + "Parameter" : 8, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + "Assign" : -4, + }, } diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py index 2bc6726dff030f..19a04361e0a669 100644 --- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py +++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py @@ -90,31 +90,33 @@ def run_pa(tmp_path, compare_diffs(ov_model, model_id, use_block_indices_inputs, use_score_outputs) -@pytest.mark.precommit -@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) -def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device): - assert mark is None or mark == 'skip' or mark == 'xfail', \ - "Incorrect test case: {}, {}".format(model_name, model_link) - if mark == 'skip': - pytest.skip(reason) - elif mark == 'xfail': - pytest.xfail(reason) - run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False) - -@pytest.mark.precommit -@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) -def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): - assert mark is None or mark == 'skip' or mark == 'xfail', \ - "Incorrect test case: {}, {}".format(model_name, model_link) - if mark == 'skip': - pytest.skip(reason) - elif mark == 'xfail': - pytest.xfail(reason) - run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True) +#@pytest.mark.precommit +#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) +#def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device): +# assert mark is None or mark == 'skip' or mark == 'xfail', \ +# "Incorrect test case: {}, {}".format(model_name, model_link) +# if mark == 'skip': +# pytest.skip(reason) +# elif mark == 'xfail': +# pytest.xfail(reason) +# run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False) +# +#@pytest.mark.precommit +#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) +#def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): +# assert mark is None or mark == 'skip' or mark == 'xfail', \ +# "Incorrect test case: {}, {}".format(model_name, model_link) +# if mark == 'skip': +# pytest.skip(reason) +# elif mark == 'xfail': +# pytest.xfail(reason) +# run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True) @pytest.mark.precommit @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device): + if ie_device == 'GPU': + pytest.skip("SKIPPING GPU") assert mark is None or mark == 'skip' or mark == 'xfail', \ "Incorrect test case: {}, {}".format(model_name, model_link) if mark == 'skip': @@ -123,13 +125,13 @@ def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device): pytest.xfail(reason) run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, False, False) -@pytest.mark.precommit -@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) -def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): - assert mark is None or mark == 'skip' or mark == 'xfail', \ - "Incorrect test case: {}, {}".format(model_name, model_link) - if mark == 'skip': - pytest.skip(reason) - elif mark == 'xfail': - pytest.xfail(reason) - run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True) \ No newline at end of file +#@pytest.mark.precommit +#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) +#def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): +# assert mark is None or mark == 'skip' or mark == 'xfail', \ +# "Incorrect test case: {}, {}".format(model_name, model_link) +# if mark == 'skip': +# pytest.skip(reason) +# elif mark == 'xfail': +# pytest.xfail(reason) +# run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True) \ No newline at end of file From e41adc675a3ade813a1766b134b6fa7c282701c3 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 12 Nov 2024 18:49:59 +0100 Subject: [PATCH 05/22] uncomment --- .../models/hf-tiny-random-models-precommit | 86 +++++++++---------- .../models/hf-tiny-random-vl-models-precommit | 4 +- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit index c0145f6a773c21..7c89c451ea4be5 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit @@ -1,43 +1,43 @@ -#hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM -#hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM -#hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM -#hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM -#hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM -#hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM -#hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM -#hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny -#hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM -#hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM -#hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM -#hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2 -#hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM -#hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM -#hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM -#hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM -#hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM -#hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM -#hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM -#hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM -#katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse -#katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b -#katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen -#katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat -#katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2 -#katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe -#katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2 -#katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf -#katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2 -#katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais -#katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm -#katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2 -#katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm -#katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b -#katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx -#fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM -#fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2 -#fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing -#Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM -#facebook/opt-125m,https://huggingface.co/facebook/opt-125m -#facebook/opt-350m,https://huggingface.co/facebook/opt-350m -#katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 -#katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file +hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM +hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM +hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM +hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM +hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM +hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM +hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM +hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny +hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTBigCodeForCausalLM +hf-internal-testing/tiny-random-Starcoder2ForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-Starcoder2ForCausalLM +hf-internal-testing/tiny-random-BloomForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BloomForCausalLM +hf-internal-testing/tiny-random-gpt2,https://huggingface.co/hf-internal-testing/tiny-random-gpt2 +hf-internal-testing/tiny-random-BlenderbotForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BlenderbotForCausalLM +hf-internal-testing/tiny-random-PegasusForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PegasusForCausalLM +hf-internal-testing/tiny-random-PhiForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PhiForCausalLM +hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MptForCausalLM +hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM +hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM +hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM +hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM +katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse +katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b +katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen +katuni4ka/tiny-random-aquilachat,https://huggingface.co/katuni4ka/tiny-random-aquilachat +katuni4ka/tiny-random-aquila2,https://huggingface.co/katuni4ka/tiny-random-aquila2 +katuni4ka/tiny-random-qwen1.5-moe,https://huggingface.co/katuni4ka/tiny-random-qwen1.5-moe +katuni4ka/tiny-random-codegen2,https://huggingface.co/katuni4ka/tiny-random-codegen2 +katuni4ka/tiny-random-olmo-hf,https://huggingface.co/katuni4ka/tiny-random-olmo-hf +katuni4ka/tiny-random-baichuan2,https://huggingface.co/katuni4ka/tiny-random-baichuan2 +katuni4ka/tiny-random-jais,https://huggingface.co/katuni4ka/tiny-random-jais +katuni4ka/tiny-random-internlm,https://huggingface.co/katuni4ka/tiny-random-internlm +katuni4ka/tiny-random-internlm2,https://huggingface.co/katuni4ka/tiny-random-internlm2 +katuni4ka/tiny-random-minicpm,https://huggingface.co/katuni4ka/tiny-random-minicpm +katuni4ka/tiny-random-falcon-40b,https://huggingface.co/katuni4ka/tiny-random-falcon-40b +katuni4ka/tiny-random-dbrx,https://huggingface.co/katuni4ka/tiny-random-dbrx +fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM +fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2 +fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing +Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM +facebook/opt-125m,https://huggingface.co/facebook/opt-125m +facebook/opt-350m,https://huggingface.co/facebook/opt-350m +katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 +katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit index 87f7aa99d5534b..7cdd3fdb3527be 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-vl-models-precommit @@ -1,4 +1,4 @@ -#katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next -#katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6 +katuni4ka/tiny-random-llava-next,https://huggingface.co/katuni4ka/tiny-random-llava-next +katuni4ka/tiny-random-minicpmv-2_6,https://huggingface.co/katuni4ka/tiny-random-minicpmv-2_6 katuni4ka/tiny-random-llava,https://huggingface.co/katuni4ka/tiny-random-llava katuni4ka/tiny-random-nanollava,https://huggingface.co/katuni4ka/tiny-random-nanollava \ No newline at end of file From f1a3f5f6fcb68696c16228ce25be410a837ad9bd Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 12 Nov 2024 18:51:02 +0100 Subject: [PATCH 06/22] uncomment v2 --- .../test_pa_transformation.py | 64 +++++++++---------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py index 19a04361e0a669..2bc6726dff030f 100644 --- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py +++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py @@ -90,33 +90,31 @@ def run_pa(tmp_path, compare_diffs(ov_model, model_id, use_block_indices_inputs, use_score_outputs) -#@pytest.mark.precommit -#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) -#def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device): -# assert mark is None or mark == 'skip' or mark == 'xfail', \ -# "Incorrect test case: {}, {}".format(model_name, model_link) -# if mark == 'skip': -# pytest.skip(reason) -# elif mark == 'xfail': -# pytest.xfail(reason) -# run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False) -# -#@pytest.mark.precommit -#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) -#def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): -# assert mark is None or mark == 'skip' or mark == 'xfail', \ -# "Incorrect test case: {}, {}".format(model_name, model_link) -# if mark == 'skip': -# pytest.skip(reason) -# elif mark == 'xfail': -# pytest.xfail(reason) -# run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True) +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) +def test_pa_precommit(tmp_path, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, False, False) + +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) +def test_pa_precommit_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_pa(tmp_path, model_name, model_link, OVModelForCausalLM, True, True) @pytest.mark.precommit @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device): - if ie_device == 'GPU': - pytest.skip("SKIPPING GPU") assert mark is None or mark == 'skip' or mark == 'xfail', \ "Incorrect test case: {}, {}".format(model_name, model_link) if mark == 'skip': @@ -125,13 +123,13 @@ def test_pa_vlm(tmp_path, model_name, model_link, mark, reason, ie_device): pytest.xfail(reason) run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, False, False) -#@pytest.mark.precommit -#@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) -#def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): -# assert mark is None or mark == 'skip' or mark == 'xfail', \ -# "Incorrect test case: {}, {}".format(model_name, model_link) -# if mark == 'skip': -# pytest.skip(reason) -# elif mark == 'xfail': -# pytest.xfail(reason) -# run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True) \ No newline at end of file +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-vl-models-precommit"))) +def test_pa_vlm_use_cache_eviction(tmp_path, model_name, model_link, mark, reason, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_pa(tmp_path, model_name, model_link, OVModelForVisualCausalLM, True, True) \ No newline at end of file From c461407c732c74435030c0800b2ceddd51cc5fb3 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 12 Nov 2024 18:53:38 +0100 Subject: [PATCH 07/22] fix ident --- .../transformation_tests/sdpa2pa_ref_diff.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py index f704bb9d45f210..85b4fd860dda53 100644 --- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -638,18 +638,18 @@ "Assign" : -12, }, "katuni4ka/tiny-random-llava-next" : { - "Parameter" : 8, - "Assign" : -4, - "ReadValue" : -4, - "PagedAttentionExtension" : 2, - "ScaledDotProductAttention" : -2, + "Parameter" : 8, + "Assign" : -4, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, }, "katuni4ka/tiny-random-minicpmv-2_6" : { - "Parameter" : 8, - "Assign" : -4, - "ReadValue" : -4, - "PagedAttentionExtension" : 2, - "ScaledDotProductAttention" : -2, + "Parameter" : 8, + "Assign" : -4, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, }, "katuni4ka/tiny-random-llava" : { "ReadValue" : -4, From 8f7e81af950071582b8969dc264c8a70724f154f Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 12 Nov 2024 18:57:19 +0100 Subject: [PATCH 08/22] ident --- .../transformation_tests/sdpa2pa_ref_diff.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py index 85b4fd860dda53..acc46ab936f146 100644 --- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -313,25 +313,25 @@ "Assign" : -4, }, "katuni4ka/tiny-random-minicpmv-2_6" : { - "PagedAttentionExtension" : 2, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "Assign" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "Assign" : -4, }, "katuni4ka/tiny-random-llava" : { - "Assign" : -4, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "PagedAttentionExtension" : 2, + "Assign" : -4, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, }, "katuni4ka/tiny-random-nanollava" : { - "Assign" : -4, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "PagedAttentionExtension" : 2, + "Assign" : -4, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, }, } From d2558222735c3ba8b99ba464f4eeb7940520c536 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 12 Nov 2024 19:04:04 +0100 Subject: [PATCH 09/22] new ident --- .../transformation_tests/sdpa2pa_ref_diff.py | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py index acc46ab936f146..01112e254fed99 100644 --- a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -305,34 +305,34 @@ "ReadValue" : -12, "Assign" : -12, }, - "katuni4ka/tiny-random-llava-next" : { - "PagedAttentionExtension" : 2, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "Assign" : -4, - }, - "katuni4ka/tiny-random-minicpmv-2_6" : { - "PagedAttentionExtension" : 2, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "Assign" : -4, - }, - "katuni4ka/tiny-random-llava" : { - "Assign" : -4, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "PagedAttentionExtension" : 2, - }, - "katuni4ka/tiny-random-nanollava" : { - "Assign" : -4, - "Parameter" : 7, - "ReadValue" : -4, - "ScaledDotProductAttention" : -2, - "PagedAttentionExtension" : 2, - }, + "katuni4ka/tiny-random-llava-next" : { + "PagedAttentionExtension" : 2, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "Assign" : -4, + }, + "katuni4ka/tiny-random-minicpmv-2_6" : { + "PagedAttentionExtension" : 2, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "Assign" : -4, + }, + "katuni4ka/tiny-random-llava" : { + "Assign" : -4, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + }, + "katuni4ka/tiny-random-nanollava" : { + "Assign" : -4, + "Parameter" : 7, + "ReadValue" : -4, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + }, } ref_diff_map_cache_eviction = { @@ -560,13 +560,13 @@ "Parameter" : 14, "Assign" : -8, }, - "katuni4ka/tiny-random-minicpm" : { - "ScaledDotProductAttention" : -4, - "Parameter" : 14, - "PagedAttentionExtension" : 4, - "ReadValue" : -8, - "Assign" : -8, - }, + "katuni4ka/tiny-random-minicpm" : { + "ScaledDotProductAttention" : -4, + "Parameter" : 14, + "PagedAttentionExtension" : 4, + "ReadValue" : -8, + "Assign" : -8, + }, "katuni4ka/tiny-random-falcon-40b" : { "ScaledDotProductAttention" : -2, "ReadValue" : -4, @@ -637,32 +637,32 @@ "Parameter" : 20, "Assign" : -12, }, - "katuni4ka/tiny-random-llava-next" : { - "Parameter" : 8, - "Assign" : -4, - "ReadValue" : -4, - "PagedAttentionExtension" : 2, - "ScaledDotProductAttention" : -2, - }, - "katuni4ka/tiny-random-minicpmv-2_6" : { - "Parameter" : 8, - "Assign" : -4, - "ReadValue" : -4, - "PagedAttentionExtension" : 2, - "ScaledDotProductAttention" : -2, - }, - "katuni4ka/tiny-random-llava" : { - "ReadValue" : -4, - "Parameter" : 8, - "ScaledDotProductAttention" : -2, - "PagedAttentionExtension" : 2, - "Assign" : -4, + "katuni4ka/tiny-random-llava-next" : { + "Parameter" : 8, + "Assign" : -4, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + }, + "katuni4ka/tiny-random-minicpmv-2_6" : { + "Parameter" : 8, + "Assign" : -4, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + }, + "katuni4ka/tiny-random-llava" : { + "ReadValue" : -4, + "Parameter" : 8, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + "Assign" : -4, }, "katuni4ka/tiny-random-nanollava" : { - "ReadValue" : -4, - "Parameter" : 8, - "ScaledDotProductAttention" : -2, - "PagedAttentionExtension" : 2, - "Assign" : -4, + "ReadValue" : -4, + "Parameter" : 8, + "ScaledDotProductAttention" : -2, + "PagedAttentionExtension" : 2, + "Assign" : -4, }, } From 8dc6631a628c81076413090a5d8a732455074788 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 10:34:42 +0100 Subject: [PATCH 10/22] code style --- src/core/src/pass/sdpa_to_paged_attention.cpp | 3 ++- tests/requirements_pytorch | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 1b738971cb38dc..1babfc5b4ed80c 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -53,7 +53,8 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr& model, const std::string& name) -> std::shared_ptr { + auto has_parameter = [=](const std::shared_ptr& model, + const std::string& name) -> std::shared_ptr { for (auto& param : model->inputs()) { const auto& names = param.get_names(); if (names.find(name) != names.end()) { diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index 56446beba12600..0b51189a0ae98a 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -55,3 +55,4 @@ rjieba==0.1.11 # - katuni4ka/tiny-random-internlm2 transformers_stream_generator==0.0.5 einops==0.8.0 +flash_attn==2.6.3 \ No newline at end of file From a1eef49252e5591f3211e9a7be7f971b6b94aa9d Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 12:17:18 +0100 Subject: [PATCH 11/22] via link --- tests/requirements_pytorch | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index 0b51189a0ae98a..dae4e45b721891 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -55,4 +55,5 @@ rjieba==0.1.11 # - katuni4ka/tiny-random-internlm2 transformers_stream_generator==0.0.5 einops==0.8.0 -flash_attn==2.6.3 \ No newline at end of file + +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl \ No newline at end of file From 6f63934b3f0e6018929302d71239a3461590c75d Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 12:48:11 +0100 Subject: [PATCH 12/22] install flash_attn separately --- .github/workflows/job_pytorch_layer_tests.yml | 5 +++++ tests/requirements_pytorch | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index b0eba0a278e582..bc12af7b49ab62 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -118,6 +118,11 @@ jobs: run: | # pytorch test requirements python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_pytorch + + - name: Install flash_attn module + run: | + # due to flash_attn issues, it needs to be installed separately from other packages + pip install flash_attn - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index dae4e45b721891..f90d43590149da 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -54,6 +54,4 @@ rjieba==0.1.11 # - katuni4ka/tiny-random-qwen # - katuni4ka/tiny-random-internlm2 transformers_stream_generator==0.0.5 -einops==0.8.0 - -https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl \ No newline at end of file +einops==0.8.0 \ No newline at end of file From 5b05187226259a0e53f9dff42112c64fa619e37e Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 13:14:05 +0100 Subject: [PATCH 13/22] no-build-isolation --- .github/workflows/job_pytorch_layer_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index bc12af7b49ab62..a7ef5210e5e05b 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -122,7 +122,7 @@ jobs: - name: Install flash_attn module run: | # due to flash_attn issues, it needs to be installed separately from other packages - pip install flash_attn + pip install flash_attn --no-build-isolation - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 From ecb5f474f980e95a4852d36decfbc043d18b7228 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 17:28:51 +0100 Subject: [PATCH 14/22] use another link --- .github/workflows/job_pytorch_layer_tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index a7ef5210e5e05b..3c49830f073af6 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -122,7 +122,8 @@ jobs: - name: Install flash_attn module run: | # due to flash_attn issues, it needs to be installed separately from other packages - pip install flash_attn --no-build-isolation + # pip install flash_attn --no-build-isolation + pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp312-cp312-linux_x86_64.whl - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 From aabe792d58151bcaeef335744aba025bd727ff3a Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 18:14:45 +0100 Subject: [PATCH 15/22] use another flash_attn --- .github/workflows/job_pytorch_layer_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 3c49830f073af6..dbf0e1913c3db1 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -123,7 +123,7 @@ jobs: run: | # due to flash_attn issues, it needs to be installed separately from other packages # pip install flash_attn --no-build-isolation - pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp312-cp312-linux_x86_64.whl + pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post1/flash_attn-2.7.0.post1+cu12torch2.5cxx11abiTRUE-cp312-cp312-linux_x86_64.whl - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 From aef65609e1ae2bf5d03c4a2827b5fea626b433a9 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 13 Nov 2024 18:44:39 +0100 Subject: [PATCH 16/22] skip cuda --- .github/workflows/job_pytorch_layer_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index dbf0e1913c3db1..4eeed787d1678e 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -122,8 +122,8 @@ jobs: - name: Install flash_attn module run: | # due to flash_attn issues, it needs to be installed separately from other packages - # pip install flash_attn --no-build-isolation - pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post1/flash_attn-2.7.0.post1+cu12torch2.5cxx11abiTRUE-cp312-cp312-linux_x86_64.whl + export FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE + pip install flash_attn --no-build-isolation - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 From 32aaabbfe503aad518391ef3600494f12c5871bc Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Thu, 14 Nov 2024 10:57:02 +0100 Subject: [PATCH 17/22] remove flash_attn --- .github/workflows/job_pytorch_layer_tests.yml | 6 ------ tests/requirements_pytorch | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 4eeed787d1678e..5f7e678cc37f95 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -119,12 +119,6 @@ jobs: # pytorch test requirements python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_pytorch - - name: Install flash_attn module - run: | - # due to flash_attn issues, it needs to be installed separately from other packages - export FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE - pip install flash_attn --no-build-isolation - - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 # due to CVS-152795, parallel run is not possible on Windows diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index f90d43590149da..56446beba12600 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -54,4 +54,4 @@ rjieba==0.1.11 # - katuni4ka/tiny-random-qwen # - katuni4ka/tiny-random-internlm2 transformers_stream_generator==0.0.5 -einops==0.8.0 \ No newline at end of file +einops==0.8.0 From 89ad50fd95d5f94822d0f488570efdf22aba278f Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Thu, 14 Nov 2024 12:54:54 +0100 Subject: [PATCH 18/22] use new transformers --- .github/workflows/job_pytorch_layer_tests.yml | 2 +- tests/requirements_pytorch | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 5f7e678cc37f95..b0eba0a278e582 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -118,7 +118,7 @@ jobs: run: | # pytorch test requirements python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_pytorch - + - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 # due to CVS-152795, parallel run is not possible on Windows diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index 56446beba12600..deb851a96b76b2 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -11,9 +11,7 @@ torchvision==0.20.1; platform_system != "Darwin" or platform_machine != "x86_64" torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64" torchaudio==2.5.1; platform_system != "Darwin" or platform_machine != "x86_64" torchaudio==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64" -# transformers 4.45.1 is available -# but optimum still requires <4.45.0 -transformers==4.44.2 +transformers==4.46.2 pytest==7.0.1 pytest-html==4.1.1 pytest-xdist[psutil]==3.6.1 From c3ca9e44626512fdbf6033e396b2be625474c96e Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Thu, 14 Nov 2024 16:43:57 +0100 Subject: [PATCH 19/22] disable models --- src/core/src/pass/sdpa_to_paged_attention.cpp | 8 ++++---- .../models/hf-tiny-random-models-precommit | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 1babfc5b4ed80c..ee501cbfe20822 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -28,7 +28,7 @@ static std::shared_ptr setName(std::shared_ptr nod // Set name for both node and output tensor (should be only one tensor, and any other names will be overriden by a // given single name) node->set_friendly_name(name); - OPENVINO_ASSERT(node->get_output_size() == 1); // Should I use assert here? + OPENVINO_ASSERT(node->get_output_size() == 1); node->get_output_tensor(0).set_names({name}); return node; } @@ -55,10 +55,10 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr& model, const std::string& name) -> std::shared_ptr { - for (auto& param : model->inputs()) { + for (const auto& param : model->inputs()) { const auto& names = param.get_names(); - if (names.find(name) != names.end()) { - if (auto casted_param = std::dynamic_pointer_cast(param.get_node_shared_ptr())) { + if (names.count(name)) { + if (auto casted_param = ov::as_type_ptr(param.get_node_shared_ptr())) { return casted_param; } else { OPENVINO_THROW("The model is in the inconsistent state. Found input '", diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit index 7c89c451ea4be5..df8ea51874094b 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit @@ -1,8 +1,8 @@ hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-LlamaForCausalLM hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM -hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM -hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM +hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM,xfail,CVS-157416 +hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLMk hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny @@ -17,7 +17,7 @@ hf-internal-testing/tiny-random-MptForCausalLM,https://huggingface.co/hf-interna hf-internal-testing/tiny-random-StableLmForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-StableLmForCausalLM hf-internal-testing/tiny-random-PersimmonForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-PersimmonForCausalLM hf-internal-testing/tiny-random-FalconForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-FalconForCausalLM -hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM +hf-tiny-model-private/tiny-random-OPTForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-OPTForCausalLM,xfail,CVS-157416 katuni4ka/tiny-random-xverse,https://huggingface.co/katuni4ka/tiny-random-xverse katuni4ka/tiny-random-baichuan2-13b,https://huggingface.co/katuni4ka/tiny-random-baichuan2-13b katuni4ka/tiny-random-qwen,https://huggingface.co/katuni4ka/tiny-random-qwen @@ -37,7 +37,7 @@ fxmarty/tiny-random-GemmaForCausalLM,https://huggingface.co/fxmarty/tiny-random- fxmarty/tiny-dummy-qwen2,https://huggingface.co/fxmarty/tiny-dummy-qwen2 fxmarty/really-tiny-falcon-testing,https://huggingface.co/fxmarty/really-tiny-falcon-testing Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi3ForCausalLM -facebook/opt-125m,https://huggingface.co/facebook/opt-125m -facebook/opt-350m,https://huggingface.co/facebook/opt-350m +facebook/opt-125m,https://huggingface.co/facebook/opt-125m,xfail,CVS-157416 +facebook/opt-350m,https://huggingface.co/facebook/opt-350m,xfail,CVS-157416 katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file From 8831a8327e24767da0b4428482af357f306165be Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Thu, 14 Nov 2024 16:51:29 +0100 Subject: [PATCH 20/22] fix k --- .../transformation_tests/models/hf-tiny-random-models-precommit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit index df8ea51874094b..0cf588ca25edb4 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit @@ -2,7 +2,7 @@ hf-internal-testing/tiny-random-LlamaForCausalLM,https://huggingface.co/trl-inte hf-internal-testing/tiny-random-CohereForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CohereForCausalLM hf-internal-testing/tiny-random-GPTJForCausalLM,https://huggingface.co/trl-internal-testing/tiny-random-GPTJForCausalLM hf-internal-testing/tiny-random-GPTNeoForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoForCausalLM,xfail,CVS-157416 -hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLMk +hf-internal-testing/tiny-random-GPTNeoXForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-GPTNeoXForCausalLM hf-internal-testing/tiny-random-MistralForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-MistralForCausalLM hf-internal-testing/tiny-random-CodeGenForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-CodeGenForCausalLM hf-internal-testing/Mixtral-tiny,https://huggingface.co/hf-internal-testing/Mixtral-tiny From 94037c169d94927cb6cd4227919177b44e55829b Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 15 Nov 2024 10:55:08 +0100 Subject: [PATCH 21/22] minor review concerns --- src/core/src/pass/sdpa_to_paged_attention.cpp | 18 ++++++++++++------ .../transformation_tests/generate_ref_diffs.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index ee501cbfe20822..d52b0f7cc5967d 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -53,7 +53,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr& model, + auto get_parameter = [=](const std::shared_ptr& model, const std::string& name) -> std::shared_ptr { for (const auto& param : model->inputs()) { const auto& names = param.get_names(); @@ -71,10 +71,16 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr input_ids_node; + for (const auto& name : {"input_ids", "inputs_embeds"}) { + input_ids_node = get_parameter(model, name); + } + + if (!input_ids_node) { + OPENVINO_THROW("The model doesn't contain input_ids or input_embeds input. Aborting."); + return false; + } - std::shared_ptr input_ids_node = - std::dynamic_pointer_cast(model->input(input_ids_name).get_node_shared_ptr()); input_ids_node->set_partial_shape(PartialShape{-1}); auto unsqueezed_input_ids = std::make_shared(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1})); @@ -93,7 +99,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr position_ids; - if (!has_parameter(model, "position_ids")) { + if (!get_parameter(model, "position_ids")) { position_ids = setName(std::make_shared(element::i64, PartialShape{-1}), "position_ids"); model->add_parameters({position_ids}); } else { @@ -145,7 +151,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptrremove_parameter(param); if (param->output(0).get_target_inputs().size() == 0) { diff --git a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py index a5ed450e0af898..72051783fa7422 100644 --- a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py +++ b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py @@ -58,7 +58,7 @@ def get_models_list_type(file_name: str, cls: Union[Type[OVModelForCausalLM], Ty model_name, model_link, mark, reason = line_items models.append((model_name, model_link, mark, reason)) elif len(line_items) > 4: - model_name, model_link, mark, reason = line_items[:4] + model_name, model_link, mark, reason, *other = line_items if not mark: mark = None if not reason: From 541b7145ccada7a9b08de62a0ab33d5afadf47dd Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Fri, 15 Nov 2024 10:58:23 +0100 Subject: [PATCH 22/22] fast break --- src/core/src/pass/sdpa_to_paged_attention.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index d52b0f7cc5967d..1c43795151cab7 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -73,7 +73,9 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr input_ids_node; for (const auto& name : {"input_ids", "inputs_embeds"}) { - input_ids_node = get_parameter(model, name); + if (input_ids_node = get_parameter(model, name)) { + break; + } } if (!input_ids_node) {