Skip to content

Commit

Permalink
v0.2.36 png support
Browse files Browse the repository at this point in the history
  • Loading branch information
phact committed Jan 8, 2025
1 parent eded6a9 commit b728e91
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 26 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v0.2.35
v0.2.36
59 changes: 38 additions & 21 deletions impl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import sys
import traceback
from typing import Callable, Sequence, Union, Any

import httpx
Expand Down Expand Up @@ -102,29 +103,45 @@ async def shutdown_event():

class APIVersionMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
version_header = request.headers.get("OpenAI-Beta")
if version_header is None or version_header == "assistants=v1":
response = await call_next(request)
return response
if version_header == "assistants=v2":
request.scope['path'] = request.scope['path'].replace("v1", "v2")
if 'raw_path' in request.scope:
request.scope['raw_path'] = request.scope['raw_path'].replace(b'v1', b'v2')
try:
response = await call_next(request)
return response
except Exception as e:
if hasattr(request.state, "dbid"):
logger.error(f"Error: {e}, dbid: {request.state.dbid}")
else:
logger.error(f"Error: {e}")
print(e)
raise e
# Retrieve and normalize the version header
version_header = request.headers.get("OpenAI-Beta", "").lower()

else:
try:
if version_header in [None, "", "assistants=v1"]:
# Default version or v1: Proceed as is
return await call_next(request)

if version_header == "assistants=v2":
# Modify path for v2 requests
request.scope['path'] = request.scope['path'].replace("v1", "v2")
if 'raw_path' in request.scope:
request.scope['raw_path'] = request.scope['raw_path'].replace(b'v1', b'v2')

# Proceed with the modified request
return await call_next(request)

# Unsupported version
return Response(
f"Unsupported version: {version_header})",
status_code=400)
content=f"Unsupported version: {version_header}",
status_code=400
)

except Exception as e:
# Structured logging for errors
trace = traceback.format_exc()
dbid = getattr(request.state, "dbid", None)
error_message = f"Error processing request: {e}\nTraceback: {trace}"
if dbid:
logger.error(f"{error_message}, dbid: {dbid}")
else:
logger.error(error_message)

# Return a generic internal server error response
return Response(
content="An internal server error occurred.",
status_code=500
)



app.add_middleware(APIVersionMiddleware)
Expand Down
21 changes: 17 additions & 4 deletions impl/services/file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import csv
import mimetypes
import os
import traceback
from io import BufferedReader
from typing import Optional, List

Expand All @@ -9,7 +10,7 @@
from fastapi import UploadFile, HTTPException
from loguru import logger
from PyPDF2 import PdfReader
from markitdown import MarkItDown
from markitdown import MarkItDown, UnsupportedFormatException
from openai import OpenAI

from impl.astra_vector import HandledResponse
Expand Down Expand Up @@ -112,7 +113,7 @@ def extract_text_from_file(file: BufferedReader, mimetype: str, openai_api_key,
# Extract text from pdf using PyPDF2
reader = PdfReader(file)
extracted_text = " ".join([page.extract_text() for page in reader.pages])
elif mimetype == "text/plain" or mimetype == "text/markdown" or mimetype == "application/sql":
elif mimetype == "text/plain" or mimetype == "text/markdown" or mimetype == "application/sql" or mimetype == "message/rfc822":
# Read text from plain text file
extracted_text = file.read().decode("utf-8")
elif (
Expand Down Expand Up @@ -145,15 +146,27 @@ def extract_text_from_file(file: BufferedReader, mimetype: str, openai_api_key,
extracted_text += run.text + " "
extracted_text += "\n"
else:
if openai_api_key is None:
raise HTTPException(
status_code=400,
detail="OpenAI API key is required for this file type: {}\nRemember to set your OPENAI_API_KEY env var".format(mimetype),
)
client = OpenAI(api_key=openai_api_key)
md = MarkItDown(mlm_client=client, mlm_model="gpt-4o")
md = MarkItDown(llm_client=client, llm_model="gpt-4o")
try:
extracted_text = md.convert(source=filepath).text_content
except UnsupportedFormatException as e:
logger.error(f"markitdown unsupported exception for: {mimetype}\nerror: {e}")
raise HTTPException(
status_code=400,
detail="Unsupported file type: {}\nError: {}".format(mimetype, e)
)
except Exception as e:
logger.error(f"Error: {e}\nTrace: {traceback.format_exc()}")
# Unsupported file type
raise HTTPException(
status_code=400,
detail="Unsupported file type: {}".format(mimetype),
detail="Unknown error parsing file type: {}\nError: {}".format(mimetype, e)
)
return extracted_text

Expand Down

0 comments on commit b728e91

Please sign in to comment.