From e767358a644905256f4f3f1a6befe249f36567b3 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Thu, 5 Dec 2024 23:09:59 +0100
Subject: [PATCH 1/2] bump version (Paligemma-2)

---
 mlx_vlm/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlx_vlm/version.py b/mlx_vlm/version.py
index ae73625..bbab024 100644
--- a/mlx_vlm/version.py
+++ b/mlx_vlm/version.py
@@ -1 +1 @@
-__version__ = "0.1.3"
+__version__ = "0.1.4"

From 138c34a87e1c07353658c86a9ae107096e580afa Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Thu, 5 Dec 2024 23:25:40 +0100
Subject: [PATCH 2/2] add prompt with image token

---
 mlx_vlm/prompt_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlx_vlm/prompt_utils.py b/mlx_vlm/prompt_utils.py
index 90422da..2cfc527 100644
--- a/mlx_vlm/prompt_utils.py
+++ b/mlx_vlm/prompt_utils.py
@@ -59,6 +59,7 @@ def add_image_tokens(message, token_format):
             " ".join([f"<|image_{i+1}|>" for i in range(num_images)]),
         ),
         "prompt_only": lambda: prompt,
+        "prompt_with_image_token": lambda: "<image>" * num_images + prompt,
     }
 
     model_to_format = {
@@ -72,7 +73,7 @@ def add_image_tokens(message, token_format):
         "phi3_v": "message_with_numbered_image_tokens",
         "multi_modality": "message_with_image_token",
         "pixtral": "message_list_with_image_type",
-        "paligemma": "prompt_only",
+        "paligemma": "prompt_with_image_token",
         "florence2": "prompt_only",
         "mllama": "message_list_with_image",
         "molmo": "prompt_only",