From e767358a644905256f4f3f1a6befe249f36567b3 Mon Sep 17 00:00:00 2001 From: Prince Canuma Date: Thu, 5 Dec 2024 23:09:59 +0100 Subject: [PATCH 1/2] bump version (Paligemma-2) --- mlx_vlm/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlx_vlm/version.py b/mlx_vlm/version.py index ae73625..bbab024 100644 --- a/mlx_vlm/version.py +++ b/mlx_vlm/version.py @@ -1 +1 @@ -__version__ = "0.1.3" +__version__ = "0.1.4" From 138c34a87e1c07353658c86a9ae107096e580afa Mon Sep 17 00:00:00 2001 From: Prince Canuma Date: Thu, 5 Dec 2024 23:25:40 +0100 Subject: [PATCH 2/2] add prompt with image token --- mlx_vlm/prompt_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlx_vlm/prompt_utils.py b/mlx_vlm/prompt_utils.py index 90422da..2cfc527 100644 --- a/mlx_vlm/prompt_utils.py +++ b/mlx_vlm/prompt_utils.py @@ -59,6 +59,7 @@ def add_image_tokens(message, token_format): " ".join([f"<|image_{i+1}|>" for i in range(num_images)]), ), "prompt_only": lambda: prompt, + "prompt_with_image_token": lambda: "" * num_images + prompt, } model_to_format = { @@ -72,7 +73,7 @@ def add_image_tokens(message, token_format): "phi3_v": "message_with_numbered_image_tokens", "multi_modality": "message_with_image_token", "pixtral": "message_list_with_image_type", - "paligemma": "prompt_only", + "paligemma": "prompt_with_image_token", "florence2": "prompt_only", "mllama": "message_list_with_image", "molmo": "prompt_only",