From cd1cba0915a437b9de1ad6cdc8529dba42817a2c Mon Sep 17 00:00:00 2001 From: Vignesh Aigal Date: Wed, 1 May 2024 20:53:33 -0700 Subject: [PATCH 1/2] Add support for createing objrefs for http or data uri --- .../providers/promptly/file_operations.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/llmstack/processors/providers/promptly/file_operations.py b/llmstack/processors/providers/promptly/file_operations.py index 9fc037c4078..68c4d35bcb8 100644 --- a/llmstack/processors/providers/promptly/file_operations.py +++ b/llmstack/processors/providers/promptly/file_operations.py @@ -2,6 +2,7 @@ import json import logging import os +import re import shutil import tempfile import uuid @@ -24,6 +25,9 @@ logger = logging.getLogger(__name__) +IMAGE_HTTP_URI_REGEX = re.compile(r"^https?://.*\.(png|jpg|jpeg|gif|svg|bmp|webp)$") +IMAGE_DATA_URI_REGEX = re.compile(r"^data:image/(png|jpg|jpeg|gif|svg|bmp|webp);.*$") + def _file_extension_from_mime_type(mime_type): if mime_type == "text/plain": @@ -62,6 +66,7 @@ class FileMimeType(str, Enum): MARKDOWN = "text/markdown" PDF = "application/pdf" OCTET_STREAM = "application/octet-stream" + IMAGE = "image/*" def __str__(self): return self.value @@ -85,7 +90,7 @@ class FileOperationsInput(ApiProcessorSchema): description="The contents of the file. Skip this field if you want to create an archive of the directory", ) content_mime_type: Optional[FileMimeType] = Field( - default=FileMimeType.TEXT, + default=None, description="The mimetype of the content.", ) content_objref: Optional[str] = Field( @@ -259,9 +264,20 @@ def process(self) -> dict: if input_content_mime_type != self._input.output_mime_type: raise ValueError("Source content mime type does not match provided mime type") - data_uri = create_data_uri( - input_content_bytes, input_content_mime_type, base64_encode=True, filename=full_file_path - ) + if ( + self._input.content + and self._input.output_mime_type == FileMimeType.IMAGE + and (IMAGE_HTTP_URI_REGEX.match(self._input.content) or IMAGE_DATA_URI_REGEX.match(self._input.content)) + ): + objref = self._upload_asset_from_url(self._input.content) + async_to_sync(output_stream.write)( + FileOperationsOutput(directory=directory, filename=filename, objref=objref) + ) + data_uri = None + else: + data_uri = create_data_uri( + input_content_bytes, input_content_mime_type, base64_encode=True, filename=full_file_path + ) elif operation == FileOperationOperation.ARCHIVE: result = self._get_all_session_assets(include_name=True, include_data=True) if result and "assets" in result and len(result["assets"]): From 7031447f5ea2fcde416b82ef86245074fcf48860 Mon Sep 17 00:00:00 2001 From: Vignesh Aigal Date: Wed, 1 May 2024 21:03:08 -0700 Subject: [PATCH 2/2] . --- llmstack/processors/providers/promptly/file_operations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llmstack/processors/providers/promptly/file_operations.py b/llmstack/processors/providers/promptly/file_operations.py index 68c4d35bcb8..16db56ebff5 100644 --- a/llmstack/processors/providers/promptly/file_operations.py +++ b/llmstack/processors/providers/promptly/file_operations.py @@ -261,8 +261,6 @@ def process(self) -> dict: elif operation == FileOperationOperation.CREATE: if input_content_bytes is None or input_content_mime_type is None: raise ValueError("Content is missing or invalid") - if input_content_mime_type != self._input.output_mime_type: - raise ValueError("Source content mime type does not match provided mime type") if ( self._input.content @@ -274,7 +272,11 @@ def process(self) -> dict: FileOperationsOutput(directory=directory, filename=filename, objref=objref) ) data_uri = None + else: + if input_content_mime_type != self._input.output_mime_type: + raise ValueError("Source content mime type does not match provided mime type") + data_uri = create_data_uri( input_content_bytes, input_content_mime_type, base64_encode=True, filename=full_file_path ) @@ -290,8 +292,6 @@ def process(self) -> dict: async_to_sync(output_stream.write)( FileOperationsOutput(directory=directory, filename=filename, objref=asset) ) - else: - raise ValueError("Failed to create data uri") # Finalize the output stream output = output_stream.finalize()