🚧 WIP

neuralmagic · Jul 10, 2024 · faa88cc · faa88cc
1 parent 71600c1
commit faa88cc
Show file tree

Hide file tree

Showing 48 changed files with 531 additions and 372 deletions.
diff --git a/.env.example b/.env.example
@@ -0,0 +1,8 @@
+## Docker configurations
+
+# You can hardcode the platform to build the vLLM locally since it is supported only
+# for the x86 CPU architecture. ARM CPU architecture may cause to some issues without that.
+# BUILDPLATFORM=linux/x86_64
+
+# This environment variable defines which port will be available locally
+DOCKER_VLLM_PORT_EXPOSE=8000
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,6 +20,8 @@ lint.select = ["E", "F", "W"]
 max-line-length = 88
 
 [tool.pytest.ini_options]
+addopts = '-s -vvv --cache-clear'
+asyncio_mode = 'auto'
 python_classes = "DisableTestClasses"
 markers = [
     "smoke: quick tests to check basic functionality",

diff --git a/src/guidellm/__init__.py → src/config/__init__.py b/src/guidellm/__init__.py → src/config/__init__.py
diff --git a/src/domain/__init__.py b/src/domain/__init__.py
diff --git a/src/guidellm/backend/__init__.py → src/domain/backend/__init__.py b/src/guidellm/backend/__init__.py → src/domain/backend/__init__.py
diff --git a/src/guidellm/backend/base.py → src/domain/backend/base.py b/src/guidellm/backend/base.py → src/domain/backend/base.py
@@ -1,14 +1,12 @@
-import functools
 import uuid
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
-from typing import Generic, Iterator, List, Optional, Type, Union
+from typing import Iterator, List, Optional, Type, Union
 
 from loguru import logger
 
-from guidellm.core.request import TextGenerationRequest
-from guidellm.core.result import TextGenerationResult
+from domain.core import TextGenerationRequest, TextGenerationResult
 
 __all__ = ["Backend", "BackendEngine", "GenerativeResponse"]
 
@@ -46,8 +44,8 @@ class Backend(ABC):
 
     _registry = {}
 
-    @staticmethod
-    def register(backend_type: BackendEngine):
+    @classmethod
+    def register(cls, backend_type: BackendEngine):
         """
         A decorator to register a backend class in the backend registry.
 
@@ -56,13 +54,13 @@ def register(backend_type: BackendEngine):
         """
 
         def inner_wrapper(wrapped_class: Type["Backend"]):
-            Backend._registry[backend_type] = wrapped_class
+            cls._registry[backend_type] = wrapped_class
             return wrapped_class
 
         return inner_wrapper
 
-    @staticmethod
-    def create(backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":
+    @classmethod
+    def create(cls, backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":
         """
         Factory method to create a backend based on the backend type.
 
@@ -76,11 +74,11 @@ def create(backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":
 
         logger.info(f"Creating backend of type {backend_type}")
 
-        if backend_type not in Backend._registry:
+        if backend_type not in cls._registry:
             logger.error(f"Unsupported backend type: {backend_type}")
             raise ValueError(f"Unsupported backend type: {backend_type}")
 
-        return Backend._registry[backend_type](**kwargs)
+        return cls._registry[backend_type](**kwargs)
 
     def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
         """
@@ -91,6 +89,7 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
         :return: The populated result result.
         :rtype: TextGenerationResult
         """
+
         logger.info(f"Submitting request with prompt: {request.prompt}")
         result_id = str(uuid.uuid4())
         result = TextGenerationResult(result_id)
@@ -108,6 +107,7 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
                 break
 
         logger.info(f"Request completed with output: {result.output}")
+
         return result
 
     @abstractmethod

diff --git a/src/guidellm/backend/openai.py → src/domain/backend/openai.py b/src/guidellm/backend/openai.py → src/domain/backend/openai.py
@@ -3,12 +3,12 @@
 from typing import Any, Dict, Iterable, Iterator, List, Optional
 
 from loguru import logger
-from openai import OpenAI, Stream
+from openai import OpenAI
 from openai.types import Completion
 from transformers import AutoTokenizer
 
-from guidellm.backend import Backend, BackendEngine, GenerativeResponse
-from guidellm.core.request import TextGenerationRequest
+from domain.backend import Backend, BackendEngine, GenerativeResponse
+from domain.core import TextGenerationRequest
 
 __all__ = ["OpenAIBackend"]
 
@@ -49,15 +49,17 @@ def __init__(
 
         if not (_api_key := (openai_api_key or os.getenv("OPENAI_API_KEY", None))):
             raise ValueError(
-                "`OPENAI_API_KEY` environment variable or --openai-api-key CLI parameter "
+                "`OPENAI_API_KEY` environment variable "
+                "or --openai-api-key CLI parameter "
                 "must be specify for the OpenAI backend"
             )
 
         if not (
             _base_url := (internal_callback_url or os.getenv("OPENAI_BASE_URL", None))
         ):
             raise ValueError(
-                "`OPENAI_BASE_URL` environment variable or --openai-base-url CLI parameter "
+                "`OPENAI_BASE_URL` environment variable "
+                "or --openai-base-url CLI parameter "
                 "must be specify for the OpenAI backend"
             )
         self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url)

diff --git a/src/guidellm/core/__init__.py → src/domain/core/__init__.py b/src/guidellm/core/__init__.py → src/domain/core/__init__.py
diff --git a/src/guidellm/core/distribution.py → src/domain/core/distribution.py b/src/guidellm/core/distribution.py → src/domain/core/distribution.py
@@ -1,10 +1,8 @@
-from typing import List, Union
+from typing import List, Union, Optional
 
 import numpy as np
 from loguru import logger
 
-__all__ = ["Distribution"]
-
 
 class Distribution:
     """
@@ -16,7 +14,7 @@ class Distribution:
     :type data: List[Union[int, float]], optional
     """
 
-    def __init__(self, data: List[Union[int, float]] = None):
+    def __init__(self, data: Optional[List[Union[int, float]]] = None):
         """
         Initialize the Distribution with optional data.
 

diff --git a/src/guidellm/core/request.py → src/domain/core/request.py b/src/guidellm/core/request.py → src/domain/core/request.py
@@ -3,8 +3,6 @@
 
 from loguru import logger
 
-__all__ = ["TextGenerationRequest"]
-
 
 class TextGenerationRequest:
     """

diff --git a/src/guidellm/core/result.py → src/domain/core/result.py b/src/guidellm/core/result.py → src/domain/core/result.py
@@ -4,16 +4,10 @@
 
 from loguru import logger
 
-from guidellm.core.distribution import Distribution
-from guidellm.core.request import TextGenerationRequest
+from domain.load_generator import LoadGenerationMode
 
-__all__ = [
-    "TextGenerationResult",
-    "TextGenerationError",
-    "TextGenerationBenchmark",
-    "TextGenerationBenchmarkReport",
-    "RequestConcurrencyMeasurement",
-]
+from .distribution import Distribution
+from .request import TextGenerationRequest
 
 
 class TextGenerationResult:
@@ -70,7 +64,7 @@ def __str__(self) -> str:
             f"end_time={self._end_time})"
         )
 
-    def __eq__(self, other: "TextGenerationResult") -> bool:
+    def __eq__(self, other) -> bool:
         """
         Check equality between two TextGenerationResult instances.
 
@@ -79,15 +73,18 @@ def __eq__(self, other: "TextGenerationResult") -> bool:
         :return: True if the instances are equal, False otherwise.
         :rtype: bool
         """
-        return (
-            self._request == other._request
-            and self._prompt == other._prompt
-            and self._output == other._output
-            and self._start_time == other._start_time
-            and self._end_time == other._end_time
-            and self._first_token_time == other._first_token_time
-            and self._decode_times == other._decode_times
-        )
+        if not isinstance(other, "TextGenerationResult"):
+            raise NotImplementedError("Only TextGenerationResult types are supported.")
+        else:
+            return (
+                self._request == other._request
+                and self._prompt == other._prompt
+                and self._output == other._output
+                and self._start_time == other._start_time
+                and self._end_time == other._end_time
+                and self._first_token_time == other._first_token_time
+                and self._decode_times == other._decode_times
+            )
 
     @property
     def request(self) -> TextGenerationRequest:
@@ -308,7 +305,7 @@ class RequestConcurrencyMeasurement:
 
 
 class TextGenerationBenchmark:
-    def __init__(self, mode: str, rate: Optional[float]):
+    def __init__(self, mode: LoadGenerationMode, rate: Optional[float]):
         """
         Initialize the TextGenerationBenchmark.
 
@@ -347,7 +344,7 @@ def __str__(self) -> str:
             f"request_rate={self.request_rate})"
         )
 
-    def __eq__(self, other: "TextGenerationBenchmark") -> bool:
+    def __eq__(self, other) -> bool:
         """
         Check equality between two TextGenerationBenchmark instances.
 
@@ -356,13 +353,18 @@ def __eq__(self, other: "TextGenerationBenchmark") -> bool:
         :return: True if the instances are equal, False otherwise.
         :rtype: bool
         """
-        return (
-            self._mode == other._mode
-            and self._rate == other._rate
-            and self._results == other._results
-            and self._errors == other._errors
-            and self._concurrencies == other._concurrencies
-        )
+        if not isinstance(other, "TextGenerationBenchmark"):
+            raise NotImplementedError(
+                "Only TextGenerationBenchmark types are supported."
+            )
+        else:
+            return (
+                self._mode == other._mode
+                and self._rate == other._rate
+                and self._results == other._results
+                and self._errors == other._errors
+                and self._concurrencies == other._concurrencies
+            )
 
     def __iter__(self):
         """
@@ -373,7 +375,7 @@ def __iter__(self):
         return iter(self._results)
 
     @property
-    def mode(self) -> str:
+    def mode(self) -> LoadGenerationMode:
         """
         Get the mode of the result.
 
@@ -531,6 +533,9 @@ def __init__(self):
 
         logger.debug("Initialized TextGenerationBenchmarkReport")
 
+    def __len__(self):
+        return len(self._benchmarks)
+
     def __repr__(self) -> str:
         return (
             f"TextGenerationBenchmarkReport("
@@ -545,7 +550,7 @@ def __str__(self) -> str:
             f"benchmarks_summary=[{', '.join(str(b) for b in self._benchmarks)}])"
         )
 
-    def __eq__(self, other: "TextGenerationBenchmarkReport") -> bool:
+    def __eq__(self, other) -> bool:
         """
         Check equality between two TextGenerationBenchmarkReport instances.
 
@@ -554,7 +559,13 @@ def __eq__(self, other: "TextGenerationBenchmarkReport") -> bool:
         :return: True if the instances are equal, False otherwise.
         :rtype: bool
         """
-        return self._benchmarks == other._benchmarks and self._args == other._args
+
+        if not isinstance(object, "TextGenerationBenchmarkReport"):
+            raise NotImplementedError(
+                "Only TextGenerationBenchmarkReport types are supported."
+            )
+        else:
+            return self._benchmarks == other._benchmarks and self._args == other._args
 
     def __iter__(self):
         return iter(self._benchmarks)

diff --git a/src/guidellm/executor/__init__.py → src/domain/executor/__init__.py b/src/guidellm/executor/__init__.py → src/domain/executor/__init__.py
@@ -1,15 +1,15 @@
 from .executor import Executor
 from .profile_generator import (
     Profile,
-    ProfileGenerationModes,
+    ProfileGenerationMode,
     ProfileGenerator,
     SingleProfileGenerator,
     SweepProfileGenerator,
 )
 
 __all__ = [
     "Executor",
-    "ProfileGenerationModes",
+    "ProfileGenerationMode",
     "Profile",
     "ProfileGenerator",
     "SingleProfileGenerator",

diff --git a/src/guidellm/executor/executor.py → src/domain/executor/executor.py b/src/guidellm/executor/executor.py → src/domain/executor/executor.py
@@ -1,41 +1,34 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
-from guidellm.backend import Backend
-from guidellm.core import TextGenerationBenchmarkReport
-from guidellm.executor.profile_generator import ProfileGenerationModes, ProfileGenerator
-from guidellm.request import RequestGenerator
-from guidellm.scheduler.scheduler import Scheduler
-
-__all__ = ["Executor"]
+from domain.backend import Backend
+from domain.core import TextGenerationBenchmarkReport
+from domain.executor.profile_generator import ProfileGenerationMode, ProfileGenerator
+from domain.request import RequestGenerator
+from domain.scheduler.scheduler import Scheduler
 
 
 class Executor:
     def __init__(
         self,
-        request_generator: RequestGenerator,
         backend: Backend,
-        profile_mode: Union[str, ProfileGenerationModes] = "single",
+        request_generator: RequestGenerator,
+        profile_mode: ProfileGenerationMode = ProfileGenerationMode.SINGLE,
         profile_args: Optional[Dict[str, Any]] = None,
         max_requests: Optional[int] = None,
         max_duration: Optional[float] = None,
     ):
-        self.request_generator = request_generator
-        self.backend = backend
-        self.profile = ProfileGenerator.create_generator(
+        self.backend: Backend = backend
+        self.request_generator: RequestGenerator = request_generator
+        self.profile_generator: ProfileGenerator = ProfileGenerator.create(
             profile_mode, **(profile_args or {})
         )
-        self.max_requests = max_requests
-        self.max_duration = max_duration
+        self.max_requests: Optional[int] = max_requests
+        self.max_duration: Optional[float] = max_duration
 
     def run(self) -> TextGenerationBenchmarkReport:
         report = TextGenerationBenchmarkReport()
 
-        while True:
-            profile = self.profile.next_profile(report)
-
-            if profile is None:
-                break
-
+        for profile in self.profile_generator:
             scheduler = Scheduler(
                 request_generator=self.request_generator,
                 backend=self.backend,
@@ -44,7 +37,6 @@ def run(self) -> TextGenerationBenchmarkReport:
                 max_requests=self.max_requests,
                 max_duration=self.max_duration,
             )
-
             benchmark = scheduler.run()
             report.add_benchmark(benchmark)