Skip to content

Commit

Permalink
🚧 WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmytro Parfeniuk committed Jul 10, 2024
1 parent 71600c1 commit faa88cc
Show file tree
Hide file tree
Showing 48 changed files with 531 additions and 372 deletions.
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Docker configurations

# You can hardcode the platform to build the vLLM locally since it is supported only
# for the x86 CPU architecture. ARM CPU architecture may cause to some issues without that.
# BUILDPLATFORM=linux/x86_64

# This environment variable defines which port will be available locally
DOCKER_VLLM_PORT_EXPOSE=8000
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ lint.select = ["E", "F", "W"]
max-line-length = 88

[tool.pytest.ini_options]
addopts = '-s -vvv --cache-clear'
asyncio_mode = 'auto'
python_classes = "DisableTestClasses"
markers = [
"smoke: quick tests to check basic functionality",
Expand Down
File renamed without changes.
Empty file added src/domain/__init__.py
Empty file.
File renamed without changes.
22 changes: 11 additions & 11 deletions src/guidellm/backend/base.py → src/domain/backend/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import functools
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Generic, Iterator, List, Optional, Type, Union
from typing import Iterator, List, Optional, Type, Union

from loguru import logger

from guidellm.core.request import TextGenerationRequest
from guidellm.core.result import TextGenerationResult
from domain.core import TextGenerationRequest, TextGenerationResult

__all__ = ["Backend", "BackendEngine", "GenerativeResponse"]

Expand Down Expand Up @@ -46,8 +44,8 @@ class Backend(ABC):

_registry = {}

@staticmethod
def register(backend_type: BackendEngine):
@classmethod
def register(cls, backend_type: BackendEngine):
"""
A decorator to register a backend class in the backend registry.
Expand All @@ -56,13 +54,13 @@ def register(backend_type: BackendEngine):
"""

def inner_wrapper(wrapped_class: Type["Backend"]):
Backend._registry[backend_type] = wrapped_class
cls._registry[backend_type] = wrapped_class
return wrapped_class

return inner_wrapper

@staticmethod
def create(backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":
@classmethod
def create(cls, backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":
"""
Factory method to create a backend based on the backend type.
Expand All @@ -76,11 +74,11 @@ def create(backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":

logger.info(f"Creating backend of type {backend_type}")

if backend_type not in Backend._registry:
if backend_type not in cls._registry:
logger.error(f"Unsupported backend type: {backend_type}")
raise ValueError(f"Unsupported backend type: {backend_type}")

return Backend._registry[backend_type](**kwargs)
return cls._registry[backend_type](**kwargs)

def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
"""
Expand All @@ -91,6 +89,7 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
:return: The populated result result.
:rtype: TextGenerationResult
"""

logger.info(f"Submitting request with prompt: {request.prompt}")
result_id = str(uuid.uuid4())
result = TextGenerationResult(result_id)
Expand All @@ -108,6 +107,7 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
break

logger.info(f"Request completed with output: {result.output}")

return result

@abstractmethod
Expand Down
12 changes: 7 additions & 5 deletions src/guidellm/backend/openai.py → src/domain/backend/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from typing import Any, Dict, Iterable, Iterator, List, Optional

from loguru import logger
from openai import OpenAI, Stream
from openai import OpenAI
from openai.types import Completion
from transformers import AutoTokenizer

from guidellm.backend import Backend, BackendEngine, GenerativeResponse
from guidellm.core.request import TextGenerationRequest
from domain.backend import Backend, BackendEngine, GenerativeResponse
from domain.core import TextGenerationRequest

__all__ = ["OpenAIBackend"]

Expand Down Expand Up @@ -49,15 +49,17 @@ def __init__(

if not (_api_key := (openai_api_key or os.getenv("OPENAI_API_KEY", None))):
raise ValueError(
"`OPENAI_API_KEY` environment variable or --openai-api-key CLI parameter "
"`OPENAI_API_KEY` environment variable "
"or --openai-api-key CLI parameter "
"must be specify for the OpenAI backend"
)

if not (
_base_url := (internal_callback_url or os.getenv("OPENAI_BASE_URL", None))
):
raise ValueError(
"`OPENAI_BASE_URL` environment variable or --openai-base-url CLI parameter "
"`OPENAI_BASE_URL` environment variable "
"or --openai-base-url CLI parameter "
"must be specify for the OpenAI backend"
)
self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url)
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from typing import List, Union
from typing import List, Union, Optional

import numpy as np
from loguru import logger

__all__ = ["Distribution"]


class Distribution:
"""
Expand All @@ -16,7 +14,7 @@ class Distribution:
:type data: List[Union[int, float]], optional
"""

def __init__(self, data: List[Union[int, float]] = None):
def __init__(self, data: Optional[List[Union[int, float]]] = None):
"""
Initialize the Distribution with optional data.
Expand Down
2 changes: 0 additions & 2 deletions src/guidellm/core/request.py → src/domain/core/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

from loguru import logger

__all__ = ["TextGenerationRequest"]


class TextGenerationRequest:
"""
Expand Down
73 changes: 42 additions & 31 deletions src/guidellm/core/result.py → src/domain/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,10 @@

from loguru import logger

from guidellm.core.distribution import Distribution
from guidellm.core.request import TextGenerationRequest
from domain.load_generator import LoadGenerationMode

__all__ = [
"TextGenerationResult",
"TextGenerationError",
"TextGenerationBenchmark",
"TextGenerationBenchmarkReport",
"RequestConcurrencyMeasurement",
]
from .distribution import Distribution
from .request import TextGenerationRequest


class TextGenerationResult:
Expand Down Expand Up @@ -70,7 +64,7 @@ def __str__(self) -> str:
f"end_time={self._end_time})"
)

def __eq__(self, other: "TextGenerationResult") -> bool:
def __eq__(self, other) -> bool:
"""
Check equality between two TextGenerationResult instances.
Expand All @@ -79,15 +73,18 @@ def __eq__(self, other: "TextGenerationResult") -> bool:
:return: True if the instances are equal, False otherwise.
:rtype: bool
"""
return (
self._request == other._request
and self._prompt == other._prompt
and self._output == other._output
and self._start_time == other._start_time
and self._end_time == other._end_time
and self._first_token_time == other._first_token_time
and self._decode_times == other._decode_times
)
if not isinstance(other, "TextGenerationResult"):
raise NotImplementedError("Only TextGenerationResult types are supported.")
else:
return (
self._request == other._request
and self._prompt == other._prompt
and self._output == other._output
and self._start_time == other._start_time
and self._end_time == other._end_time
and self._first_token_time == other._first_token_time
and self._decode_times == other._decode_times
)

@property
def request(self) -> TextGenerationRequest:
Expand Down Expand Up @@ -308,7 +305,7 @@ class RequestConcurrencyMeasurement:


class TextGenerationBenchmark:
def __init__(self, mode: str, rate: Optional[float]):
def __init__(self, mode: LoadGenerationMode, rate: Optional[float]):
"""
Initialize the TextGenerationBenchmark.
Expand Down Expand Up @@ -347,7 +344,7 @@ def __str__(self) -> str:
f"request_rate={self.request_rate})"
)

def __eq__(self, other: "TextGenerationBenchmark") -> bool:
def __eq__(self, other) -> bool:
"""
Check equality between two TextGenerationBenchmark instances.
Expand All @@ -356,13 +353,18 @@ def __eq__(self, other: "TextGenerationBenchmark") -> bool:
:return: True if the instances are equal, False otherwise.
:rtype: bool
"""
return (
self._mode == other._mode
and self._rate == other._rate
and self._results == other._results
and self._errors == other._errors
and self._concurrencies == other._concurrencies
)
if not isinstance(other, "TextGenerationBenchmark"):
raise NotImplementedError(
"Only TextGenerationBenchmark types are supported."
)
else:
return (
self._mode == other._mode
and self._rate == other._rate
and self._results == other._results
and self._errors == other._errors
and self._concurrencies == other._concurrencies
)

def __iter__(self):
"""
Expand All @@ -373,7 +375,7 @@ def __iter__(self):
return iter(self._results)

@property
def mode(self) -> str:
def mode(self) -> LoadGenerationMode:
"""
Get the mode of the result.
Expand Down Expand Up @@ -531,6 +533,9 @@ def __init__(self):

logger.debug("Initialized TextGenerationBenchmarkReport")

def __len__(self):
return len(self._benchmarks)

def __repr__(self) -> str:
return (
f"TextGenerationBenchmarkReport("
Expand All @@ -545,7 +550,7 @@ def __str__(self) -> str:
f"benchmarks_summary=[{', '.join(str(b) for b in self._benchmarks)}])"
)

def __eq__(self, other: "TextGenerationBenchmarkReport") -> bool:
def __eq__(self, other) -> bool:
"""
Check equality between two TextGenerationBenchmarkReport instances.
Expand All @@ -554,7 +559,13 @@ def __eq__(self, other: "TextGenerationBenchmarkReport") -> bool:
:return: True if the instances are equal, False otherwise.
:rtype: bool
"""
return self._benchmarks == other._benchmarks and self._args == other._args

if not isinstance(object, "TextGenerationBenchmarkReport"):
raise NotImplementedError(
"Only TextGenerationBenchmarkReport types are supported."
)
else:
return self._benchmarks == other._benchmarks and self._args == other._args

def __iter__(self):
return iter(self._benchmarks)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from .executor import Executor
from .profile_generator import (
Profile,
ProfileGenerationModes,
ProfileGenerationMode,
ProfileGenerator,
SingleProfileGenerator,
SweepProfileGenerator,
)

__all__ = [
"Executor",
"ProfileGenerationModes",
"ProfileGenerationMode",
"Profile",
"ProfileGenerator",
"SingleProfileGenerator",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,41 +1,34 @@
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional

from guidellm.backend import Backend
from guidellm.core import TextGenerationBenchmarkReport
from guidellm.executor.profile_generator import ProfileGenerationModes, ProfileGenerator
from guidellm.request import RequestGenerator
from guidellm.scheduler.scheduler import Scheduler

__all__ = ["Executor"]
from domain.backend import Backend
from domain.core import TextGenerationBenchmarkReport
from domain.executor.profile_generator import ProfileGenerationMode, ProfileGenerator
from domain.request import RequestGenerator
from domain.scheduler.scheduler import Scheduler


class Executor:
def __init__(
self,
request_generator: RequestGenerator,
backend: Backend,
profile_mode: Union[str, ProfileGenerationModes] = "single",
request_generator: RequestGenerator,
profile_mode: ProfileGenerationMode = ProfileGenerationMode.SINGLE,
profile_args: Optional[Dict[str, Any]] = None,
max_requests: Optional[int] = None,
max_duration: Optional[float] = None,
):
self.request_generator = request_generator
self.backend = backend
self.profile = ProfileGenerator.create_generator(
self.backend: Backend = backend
self.request_generator: RequestGenerator = request_generator
self.profile_generator: ProfileGenerator = ProfileGenerator.create(
profile_mode, **(profile_args or {})
)
self.max_requests = max_requests
self.max_duration = max_duration
self.max_requests: Optional[int] = max_requests
self.max_duration: Optional[float] = max_duration

def run(self) -> TextGenerationBenchmarkReport:
report = TextGenerationBenchmarkReport()

while True:
profile = self.profile.next_profile(report)

if profile is None:
break

for profile in self.profile_generator:
scheduler = Scheduler(
request_generator=self.request_generator,
backend=self.backend,
Expand All @@ -44,7 +37,6 @@ def run(self) -> TextGenerationBenchmarkReport:
max_requests=self.max_requests,
max_duration=self.max_duration,
)

benchmark = scheduler.run()
report.add_benchmark(benchmark)

Expand Down
Loading

0 comments on commit faa88cc

Please sign in to comment.