From ebdf19fa7e6071f0aba6af44cfc16ae0ef3c05fa Mon Sep 17 00:00:00 2001 From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:53:54 +0800 Subject: [PATCH] [NPU] Further fix saving of generation config (#12657) * Further fix saving of generation config * Fix based on comments * Small fix --- .../npu_pipeline_model/convert_pipeline.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py index 6eda2d046d8..16f8a7241d4 100644 --- a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +++ b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py @@ -473,10 +473,6 @@ def convert_llm_for_deploy(model: torch.nn.Module, "n_splits_linear": n_splits_linear, "n_splits_down_proj": n_splits_down_proj, "lm_head_low_bit": lm_head_low_bit} - model.config.update(update_dict) - model.config.save_pretrained(save_directory) - if model.can_generate(): - model.generation_config.save_pretrained(save_directory) from .qwen import convert_qwen_layer, convert_fused_qwen_layer from .qwen import convert_lm_head_and_embedding @@ -537,8 +533,6 @@ def convert_llm_for_deploy(model: torch.nn.Module, "n_splits_linear": n_splits_linear, "n_splits_down_proj": n_splits_down_proj, "lm_head_low_bit": lm_head_low_bit} - model.config.update(update_dict) - model.config.save_pretrained(save_directory) from .llama import convert_llama_layer, convert_fused_llama_layer from .llama import convert_lm_head_and_embedding @@ -577,8 +571,6 @@ def convert_llm_for_deploy(model: torch.nn.Module, "n_splits_linear": n_splits_linear, "n_splits_down_proj": n_splits_down_proj, "lm_head_low_bit": lm_head_low_bit} - model.config.update(update_dict) - model.config.save_pretrained(save_directory) from .minicpm import convert_minicpm_layer, convert_fused_minicpm_layer from .minicpm import convert_lm_head_and_embedding @@ -595,3 +587,8 @@ def convert_llm_for_deploy(model: torch.nn.Module, save_directory, weight_dir, convert_model=True, max_prompt_len=max_prompt_len) + + model.config.update(update_dict) + model.config.save_pretrained(save_directory) + if model.can_generate(): + model.generation_config.save_pretrained(save_directory)