Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for createing objrefs for http or data uri #235

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions llmstack/processors/providers/promptly/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import os
import re
import shutil
import tempfile
import uuid
Expand All @@ -24,6 +25,9 @@

logger = logging.getLogger(__name__)

IMAGE_HTTP_URI_REGEX = re.compile(r"^https?://.*\.(png|jpg|jpeg|gif|svg|bmp|webp)$")
IMAGE_DATA_URI_REGEX = re.compile(r"^data:image/(png|jpg|jpeg|gif|svg|bmp|webp);.*$")


def _file_extension_from_mime_type(mime_type):
if mime_type == "text/plain":
Expand Down Expand Up @@ -62,6 +66,7 @@ class FileMimeType(str, Enum):
MARKDOWN = "text/markdown"
PDF = "application/pdf"
OCTET_STREAM = "application/octet-stream"
IMAGE = "image/*"

def __str__(self):
return self.value
Expand All @@ -85,7 +90,7 @@ class FileOperationsInput(ApiProcessorSchema):
description="The contents of the file. Skip this field if you want to create an archive of the directory",
)
content_mime_type: Optional[FileMimeType] = Field(
default=FileMimeType.TEXT,
default=None,
description="The mimetype of the content.",
)
content_objref: Optional[str] = Field(
Expand Down Expand Up @@ -256,12 +261,25 @@ def process(self) -> dict:
elif operation == FileOperationOperation.CREATE:
if input_content_bytes is None or input_content_mime_type is None:
raise ValueError("Content is missing or invalid")
if input_content_mime_type != self._input.output_mime_type:
raise ValueError("Source content mime type does not match provided mime type")

data_uri = create_data_uri(
input_content_bytes, input_content_mime_type, base64_encode=True, filename=full_file_path
)
if (
self._input.content
and self._input.output_mime_type == FileMimeType.IMAGE
and (IMAGE_HTTP_URI_REGEX.match(self._input.content) or IMAGE_DATA_URI_REGEX.match(self._input.content))
):
objref = self._upload_asset_from_url(self._input.content)
async_to_sync(output_stream.write)(
FileOperationsOutput(directory=directory, filename=filename, objref=objref)
)
data_uri = None

else:
if input_content_mime_type != self._input.output_mime_type:
raise ValueError("Source content mime type does not match provided mime type")

data_uri = create_data_uri(
input_content_bytes, input_content_mime_type, base64_encode=True, filename=full_file_path
)
elif operation == FileOperationOperation.ARCHIVE:
result = self._get_all_session_assets(include_name=True, include_data=True)
if result and "assets" in result and len(result["assets"]):
Expand All @@ -274,8 +292,6 @@ def process(self) -> dict:
async_to_sync(output_stream.write)(
FileOperationsOutput(directory=directory, filename=filename, objref=asset)
)
else:
raise ValueError("Failed to create data uri")

# Finalize the output stream
output = output_stream.finalize()
Expand Down
Loading