Skip to content

Commit

Permalink
quality fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
anmarques committed Nov 4, 2024
1 parent 24e6527 commit 3946709
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 23 deletions.
7 changes: 4 additions & 3 deletions src/guidellm/backend/openai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import io
from typing import AsyncGenerator, Dict, List, Optional
import io, base64

from loguru import logger
from openai import AsyncOpenAI, OpenAI
Expand Down Expand Up @@ -182,7 +183,7 @@ def _build_messages(self, request: TextGenerationRequest) -> Dict:
image_url = {"url": f"data:image/{im_format.lower()};base64,{im_b64}"}
content.append({"type": "image_url", "image_url": image_url})

content.append({"type": "text", "text": request.prompt})
content.append({"type": "text", "text": request.prompt})
messages = [{"role": "user", "content": content}]

return messages
2 changes: 1 addition & 1 deletion src/guidellm/core/request.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
from typing import Any, Dict, Optional, List
from typing import Any, Dict, List, Optional

from pydantic import Field

Expand Down
8 changes: 4 additions & 4 deletions src/guidellm/request/emulated.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from guidellm.config import settings
from guidellm.core.request import TextGenerationRequest
from guidellm.request.base import GenerationMode, RequestGenerator
from guidellm.utils import clean_text, filter_text, load_text, split_text, load_images
from guidellm.utils import clean_text, filter_text, load_images, load_text, split_text

__all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"]

Expand Down Expand Up @@ -402,9 +402,9 @@ def sample_prompt(self, tokens: int) -> str:
right = mid

return self._tokens.create_text(start_line_index, left)


def sample_images(self):
image_indices = self._rng.choice(len(self._images), size=self._config.images, replace=False)

return [self._images[i] for i in image_indices]
return [self._images[i] for i in image_indices]
2 changes: 1 addition & 1 deletion src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .images import ImageDescriptor, load_images
from .injector import create_report, inject_data
from .progress import BenchmarkReportProgress
from .text import (
Expand All @@ -12,7 +13,6 @@
split_lines_by_punctuation,
split_text,
)
from .images import load_images, ImageDescriptor
from .transformers import (
load_transformers_dataset,
resolve_transformers_dataset,
Expand Down
27 changes: 13 additions & 14 deletions src/guidellm/utils/images.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from PIL import Image
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from pydantic import Field, ConfigDict
from typing import List, Optional
from io import BytesIO

from loguru import logger
from typing import List, Optional
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup
from loguru import logger
from PIL import Image
from pydantic import ConfigDict, Field

from guidellm.config import settings
from guidellm.core.serializable import Serializable
Expand All @@ -19,14 +18,14 @@ class ImageDescriptor(Serializable):
A class to represent image data in serializable format.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)

url: Optional[str] = Field(description="url address for image.")
image: Image.Image = Field(description="PIL image", exclude=True)
filename: Optional[int] = Field(
default=None,
description="Image filename.",
)


def load_images(data: str) -> List[ImageDescriptor]:
"""
Expand All @@ -45,25 +44,25 @@ def load_images(data: str) -> List[ImageDescriptor]:
response = requests.get(data, timeout=settings.request_timeout)
response.raise_for_status()

soup = BeautifulSoup(response.text, 'html.parser')
soup = BeautifulSoup(response.text, "html.parser")
for img_tag in soup.find_all("img"):
img_url = img_tag.get("src")

if img_url:
# Handle relative URLs
img_url = urljoin(data, img_url)

# Download the image
logger.debug("Loading image: {}", img_url)
img_response = requests.get(img_url)
img_response.raise_for_status()

# Load image into Pillow
images.append(
ImageDescriptor(
url=img_url,
url=img_url,
image=Image.open(BytesIO(img_response.content)),
)
)

return images
return images

0 comments on commit 3946709

Please sign in to comment.