Skip to content

Commit

Permalink
Support file upload summary for DocSum microservice (#823)
Browse files Browse the repository at this point in the history
* Support file upload summary for DocSum microservice

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add aiofiles

Signed-off-by: lvliang-intel <[email protected]>

* move file handlding in gateway

Signed-off-by: lvliang-intel <[email protected]>

* rollback docsum code

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* only import iin docsum

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* clean dependency

Signed-off-by: lvliang-intel <[email protected]>

* update code

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: lvliang-intel <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
lvliang-intel and pre-commit-ci[bot] authored Oct 24, 2024
1 parent 5ed428f commit fa2ea64
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 5 deletions.
62 changes: 57 additions & 5 deletions comps/cores/mega/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import base64
import os
from io import BytesIO
from typing import Union
from typing import List, Union

import requests
from fastapi import Request
from fastapi import File, Request, UploadFile
from fastapi.responses import StreamingResponse
from PIL import Image

Expand Down Expand Up @@ -361,11 +361,63 @@ def __init__(self, megaservice, host="0.0.0.0", port=8888):
megaservice, host, port, str(MegaServiceEndpoint.DOC_SUMMARY), ChatCompletionRequest, ChatCompletionResponse
)

async def handle_request(self, request: Request):
data = await request.json()
def read_pdf(self, file):
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader(file)
docs = loader.load_and_split()
return docs

def read_text_from_file(self, file, save_file_name):
import docx2txt
from langchain.text_splitter import CharacterTextSplitter

# read text file
if file.headers["content-type"] == "text/plain":
file.file.seek(0)
content = file.file.read().decode("utf-8")
# Split text
text_splitter = CharacterTextSplitter()
texts = text_splitter.split_text(content)
# Create multiple documents
file_content = texts
# read pdf file
elif file.headers["content-type"] == "application/pdf":
documents = self.read_pdf(save_file_name)
file_content = [doc.page_content for doc in documents]
# read docx file
elif (
file.headers["content-type"] == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
or file.headers["content-type"] == "application/octet-stream"
):
file_content = docx2txt.process(save_file_name)

return file_content

async def handle_request(self, request: Request, files: List[UploadFile] = File(...)):
data = await request.form()
stream_opt = data.get("stream", True)
chat_request = ChatCompletionRequest.parse_obj(data)
prompt = self._handle_message(chat_request.messages)
file_summaries = []
for file in files:
file_path = f"/tmp/{file.filename}"

import aiofiles

async with aiofiles.open(file_path, "wb") as f:
await f.write(await file.read())
docs = self.read_text_from_file(file, file_path)
os.remove(file_path)
if isinstance(docs, list):
file_summaries.extend(docs)
else:
file_summaries.append(docs)

if file_summaries:
prompt = self._handle_message(chat_request.messages) + "\n".join(file_summaries)
else:
prompt = self._handle_message(chat_request.messages)

parameters = LLMParams(
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
top_k=chat_request.top_k if chat_request.top_k else 10,
Expand Down
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
aiofiles
aiohttp
docarray
docx2txt
fastapi
httpx
kubernetes
langchain
langchain-community
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
Pillow
prometheus-fastapi-instrumentator
pypdf
python-multipart
pyyaml
requests
shortuuid
Expand Down

0 comments on commit fa2ea64

Please sign in to comment.