You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The documentation provides sample code for uploading a file from the client side through the upload_api. However, I have observed that uploading files through this API takes longer than uploading through the UI. I would like to know the reason for this discrepancy and how I can make the process faster. Also, if multiple clients send files through this API, how is this handled?
import os
import ast
import time
from tqdm import tqdm
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from gradio_client import Client
class DocumentUploader:
def __init__(self, host_url: str, api_key: str = 'EMPTY'):
self.client = Client(host_url)
self.api_key = api_key
def upload_document(self, local_file_path: str) -> str:
"""Uploads a document to the server and returns the server file path with a progress bar."""
with tqdm(total=100, desc=f"Uploading {os.path.basename(local_file_path)}", unit='%', ncols=80) as pbar:
_, server_file_path = self.client.predict(local_file_path, api_name='/upload_api')
pbar.update(100)
return server_file_path
def add_document_and_ocr(self, server_file_path: str, loaders: list) -> dict:
"""Adds the document to the server with OCR processing and shows progress."""
with tqdm(total=100, desc=f"Processing {os.path.basename(server_file_path)}", unit='%', ncols=80) as pbar:
res = self.client.predict(
server_file_path, "UserData", True, 512, True, *loaders, api_name='/add_file_api'
)
pbar.update(100)
return res
def query_document(self, langchain_mode: str, instruction: str) -> str:
"""Queries the document based on given instructions and returns the response."""
kwargs = dict(langchain_mode=langchain_mode, instruction=instruction)
res = self.client.predict(str(kwargs), api_name='/submit_nochat_api')
return ast.literal_eval(res)['response']
def process_file(self, file_path: str):
"""Processes a single file with progress display."""
loaders = [
['Caption', 'CaptionBlip2', 'Pix2Struct', 'OCR', 'DocTR'],
['PyMuPDF', 'Unstructured', 'PyPDF', 'TryHTML', 'OCR'],
None, None
]
print(f"Processing single file: {file_path}")
server_file_path = self.upload_document(file_path)
self.add_document_and_ocr(server_file_path, loaders)
def query_uploaded_documents(self, instruction: str):
"""Queries the already uploaded documents."""
response = self.query_document("UserData", instruction)
print("Query response:", response)
class NewFileHandler(FileSystemEventHandler):
"""Event handler that triggers when a new file is added to the folder."""
def __init__(self, uploader: DocumentUploader):
self.uploader = uploader
def on_created(self, event):
"""Triggered when a new file is created in the watched folder."""
if not event.is_directory and event.src_path.endswith(".pdf"):
print(f"New file detected: {event.src_path}")
self.uploader.process_file(event.src_path)
def monitor_folder(folder_path: str, uploader: DocumentUploader):
"""Monitors the folder and triggers the uploader when new files are added."""
event_handler = NewFileHandler(uploader)
observer = Observer()
observer.schedule(event_handler, folder_path, recursive=False)
observer.start()
print(f"Monitoring folder: {folder_path}")
try:
while True:
time.sleep(1) # Keep the script running
except KeyboardInterrupt:
observer.stop()
observer.join()
# Usage example
host_url = "http://xx.xx.x.xx:7860/"
folder_path = "data"
uploader = DocumentUploader(host_url)
# Start monitoring the folder
monitor_folder(folder_path, uploader)
The text was updated successfully, but these errors were encountered:
Hi, maybe the API and UI are using different options by default for which handlers (e.g. doctr, unstructured, OCR, vision, etc.) are used. Good to compare logs for each.
Note that the API and UI use the same code and use gradio's unified way of generating API from UI itself.
The documentation provides sample code for uploading a file from the client side through the
upload_api
. However, I have observed that uploading files through this API takes longer than uploading through the UI. I would like to know the reason for this discrepancy and how I can make the process faster. Also, if multiple clients send files through this API, how is this handled?The text was updated successfully, but these errors were encountered: