-
-
Notifications
You must be signed in to change notification settings - Fork 164
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(deploy): add automated OmniParser deployment
* add working omniparser deploy.py, Dockerfile, pyproject.toml, README.md, .env.example, .dockerignore, client.py
- Loading branch information
Showing
7 changed files
with
1,027 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
AWS_ACCESS_KEY_ID= | ||
AWS_SECRET_ACCESS_KEY= | ||
AWS_REGION= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
``` | ||
# First time setup | ||
cd deploy | ||
uv venv | ||
source .venv/bin/activate | ||
uv pip install -e . | ||
# Subsequent usage | ||
python deploy/models/omniparser/deploy.py start | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
__pycache__ | ||
*.pyc | ||
*.pyo | ||
*.pyd | ||
.Python | ||
env | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
.tox | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.log | ||
.pytest_cache | ||
.env | ||
.venv | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ | ||
git-lfs \ | ||
wget \ | ||
libgl1 \ | ||
libglib2.0-0 \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& git lfs install | ||
|
||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ | ||
bash miniconda.sh -b -p /opt/conda && \ | ||
rm miniconda.sh | ||
ENV PATH="/opt/conda/bin:$PATH" | ||
|
||
RUN conda create -n omni python=3.12 && \ | ||
echo "source activate omni" > ~/.bashrc | ||
ENV CONDA_DEFAULT_ENV=omni | ||
ENV PATH="/opt/conda/envs/omni/bin:$PATH" | ||
|
||
WORKDIR /app | ||
|
||
RUN git clone https://github.com/microsoft/OmniParser.git && \ | ||
cd OmniParser && \ | ||
git lfs install && \ | ||
git lfs pull | ||
|
||
WORKDIR /app/OmniParser | ||
|
||
RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \ | ||
pip uninstall -y opencv-python opencv-python-headless && \ | ||
pip install --no-cache-dir opencv-python-headless==4.8.1.78 && \ | ||
pip install -r requirements.txt && \ | ||
pip install huggingface_hub fastapi uvicorn | ||
|
||
# Download V2 weights | ||
RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \ | ||
mkdir -p /app/OmniParser/weights && \ | ||
cd /app/OmniParser && \ | ||
rm -rf weights/icon_detect weights/icon_caption weights/icon_caption_florence && \ | ||
for folder in icon_caption icon_detect; do \ | ||
huggingface-cli download microsoft/OmniParser-v2.0 --local-dir weights --repo-type model --include "$folder/*"; \ | ||
done && \ | ||
mv weights/icon_caption weights/icon_caption_florence | ||
|
||
# Pre-download OCR models during build | ||
RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \ | ||
cd /app/OmniParser && \ | ||
python3 -c "import easyocr; reader = easyocr.Reader(['en']); print('Downloaded EasyOCR model')" && \ | ||
python3 -c "from paddleocr import PaddleOCR; ocr = PaddleOCR(lang='en', use_angle_cls=False, use_gpu=False, show_log=False); print('Downloaded PaddleOCR model')" | ||
|
||
CMD ["python3", "/app/OmniParser/omnitool/omniparserserver/omniparserserver.py", \ | ||
"--som_model_path", "/app/OmniParser/weights/icon_detect/model.pt", \ | ||
"--caption_model_path", "/app/OmniParser/weights/icon_caption_florence", \ | ||
"--device", "cuda", \ | ||
"--BOX_TRESHOLD", "0.05", \ | ||
"--host", "0.0.0.0", \ | ||
"--port", "8000"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
"""Client module for interacting with the OmniParser server.""" | ||
|
||
import base64 | ||
import fire | ||
import requests | ||
|
||
from loguru import logger | ||
from PIL import Image, ImageDraw | ||
|
||
|
||
def image_to_base64(image_path: str) -> str: | ||
"""Convert an image file to base64 string. | ||
Args: | ||
image_path: Path to the image file | ||
Returns: | ||
str: Base64 encoded string of the image | ||
""" | ||
with open(image_path, "rb") as image_file: | ||
return base64.b64encode(image_file.read()).decode("utf-8") | ||
|
||
|
||
def plot_results( | ||
original_image_path: str, | ||
som_image_base64: str, | ||
parsed_content_list: list[dict[str, list[float]]], | ||
) -> None: | ||
"""Plot parsing results on the original image. | ||
Args: | ||
original_image_path: Path to the original image | ||
som_image_base64: Base64 encoded SOM image | ||
parsed_content_list: List of parsed content with bounding boxes | ||
""" | ||
# Open original image | ||
image = Image.open(original_image_path) | ||
width, height = image.size | ||
|
||
# Create drawable image | ||
draw = ImageDraw.Draw(image) | ||
|
||
# Draw bounding boxes and labels | ||
for item in parsed_content_list: | ||
# Get normalized coordinates and convert to pixel coordinates | ||
x1, y1, x2, y2 = item["bbox"] | ||
x1 = int(x1 * width) | ||
y1 = int(y1 * height) | ||
x2 = int(x2 * width) | ||
y2 = int(y2 * height) | ||
|
||
label = item["content"] | ||
|
||
# Draw rectangle | ||
draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=2) | ||
|
||
# Draw label background | ||
text_bbox = draw.textbbox((x1, y1), label) | ||
draw.rectangle( | ||
[text_bbox[0] - 2, text_bbox[1] - 2, text_bbox[2] + 2, text_bbox[3] + 2], | ||
fill="white", | ||
) | ||
|
||
# Draw label text | ||
draw.text((x1, y1), label, fill="red") | ||
|
||
# Show image | ||
image.show() | ||
|
||
|
||
def parse_image( | ||
image_path: str, | ||
server_url: str, | ||
) -> None: | ||
"""Parse an image using the OmniParser server. | ||
Args: | ||
image_path: Path to the image file | ||
server_url: URL of the OmniParser server | ||
""" | ||
# Remove trailing slash from server_url if present | ||
server_url = server_url.rstrip("/") | ||
|
||
# Convert image to base64 | ||
base64_image = image_to_base64(image_path) | ||
|
||
# Prepare request | ||
url = f"{server_url}/parse/" | ||
payload = {"base64_image": base64_image} | ||
|
||
try: | ||
# First, check if the server is available | ||
probe_url = f"{server_url}/probe/" | ||
probe_response = requests.get(probe_url) | ||
probe_response.raise_for_status() | ||
logger.info("Server is available") | ||
|
||
# Make request to API | ||
response = requests.post(url, json=payload) | ||
response.raise_for_status() | ||
|
||
# Parse response | ||
result = response.json() | ||
som_image_base64 = result["som_image_base64"] | ||
parsed_content_list = result["parsed_content_list"] | ||
|
||
# Plot results | ||
plot_results(image_path, som_image_base64, parsed_content_list) | ||
|
||
# Print latency | ||
logger.info(f"API Latency: {result['latency']:.2f} seconds") | ||
|
||
except requests.exceptions.ConnectionError: | ||
logger.error(f"Error: Could not connect to server at {server_url}") | ||
logger.error("Please check if the server is running and the URL is correct") | ||
except requests.exceptions.RequestException as e: | ||
logger.error(f"Error making request to API: {e}") | ||
except Exception as e: | ||
logger.error(f"Error: {e}") | ||
|
||
|
||
def main() -> None: | ||
"""Main entry point for the client application.""" | ||
fire.Fire(parse_image) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.