diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..8f97b1c --- /dev/null +++ b/.env.example @@ -0,0 +1,4 @@ +# OpenAI compatible server address. +# If you are going to run the +OPENAI_BASE_URL=http://127.0.0.1:8080 +OPENAI_API_KEY=invalid diff --git a/Makefile b/Makefile index 649e728..213f509 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,12 @@ install: python -m pip install -r requirements.txt + .PHONY: install.dev install.dev: python -m pip install -e .[dev] + .PHONY: build build: python setup.py sdist bdist_wheel @@ -15,8 +17,7 @@ build: quality: python -m ruff check src tests python -m isort --check src tests - python -m flake8 src tests --max-line-length 88 - python -m mypy src + python -m mypy .PHONY: style @@ -28,16 +29,19 @@ style: .PHONY: test test: - python -m pytest -s -vvv --cache-clear tests + python -m pytest tests + .PHONY: test.unit test.unit: python -m pytest tests/unit + .PHONY: test.integration test.integration: python -m pytest tests/integration + .PHONY: test.e2e test.e2e: python -m pytest tests/e2e diff --git a/README.md b/README.md index e7ec0b1..d41a8c7 100644 --- a/README.md +++ b/README.md @@ -1 +1,36 @@ -# guidellm \ No newline at end of file +# guidellm + +# Project configuration + +The project is configured with environment variables. Check the example in `.env.example`. + +```sh +# Create .env file and update the configuration +cp .env.example .env + +# Export all variables +set -o allexport; source .env; set +o allexport +``` + +## Environment Variables + +| Variable | Default Value | Description | +| --------------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OPENAI_BASE_URL | http://127.0.0.1:8080 | The host where the `openai` library will make requests to. For running integration tests it is required to have the external OpenAI compatible server running. | +| OPENAI_API_KEY | invalid | [OpenAI Platform](https://platform.openai.com/api-keys) to create a new API key. This value is not used for tests. | + +## Running Tests + +`pytest` package is used as a testing framework. All the tests are int he `tests/` folder. +`pytest` configuration is in the `pyproject.toml`. + +The `Makefile` includes all the necessary commands that could be run from the project root. + +```sh +# Using Makefile +make test # run all the tests +make test.integration # run only integration tests +make test.unit # run only unit tests +make test.e2e # run only E2E tests +``` + diff --git a/docs/README.md b/docs/README.md index 30404ce..e69de29 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1 +0,0 @@ -TODO \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 30404ce..1333ed7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1 +1 @@ -TODO \ No newline at end of file +TODO diff --git a/pyproject.toml b/pyproject.toml index 368610b..79bf4ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,30 +1,60 @@ [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +requires = ['setuptools', 'wheel'] +build-backend = 'setuptools.build_meta' + + [tool.black] line-length = 88 target-version = ['py38'] + + [tool.isort] -profile = "black" +profile = 'black' + -[tool.mypy] -files = "src/guidellm" [tool.ruff] -exclude = ["build", "dist", "env", ".venv"] +exclude = ['build', 'dist', 'env', '.venv'] lint.select = ["E", "F", "W"] +line-length = 88 + -[tool.flake8] -max-line-length = 88 [tool.pytest.ini_options] addopts = '-s -vvv --cache-clear' asyncio_mode = 'auto' markers = [ - "smoke: quick tests to check basic functionality", - "sanity: detailed tests to ensure major functions work correctly", - "regression: tests to ensure that new changes do not break existing functionality" + 'smoke: quick tests to check basic functionality', + 'sanity: detailed tests to ensure major functions work correctly', + 'regression: tests to ensure that new changes do not break existing functionality' +] +filterwarnings = [ + 'ignore::RuntimeWarning', + 'ignore::UserWarning', + 'ignore::DeprecationWarning', ] + + +[tool.mypy] +python_version = '3.8' +files = 'src/guidellm' +show_error_codes = true +namespace_packages = false +check_untyped_defs = true + +warn_redundant_casts = true +warn_unused_ignores = true + +# Silint "type import errors" as our 3rd-party libs does not have types +# Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery +follow_imports = 'silent' + +[[tool.mypy.overrides]] +module = ['transformers.*'] +ignore_missing_imports=true + + + diff --git a/setup.py b/setup.py index fa87232..112c885 100644 --- a/setup.py +++ b/setup.py @@ -1,50 +1,51 @@ -from setuptools import setup, find_packages from typing import Tuple +from setuptools import find_packages, setup + def _setup_long_description() -> Tuple[str, str]: return open("README.md", "r", encoding="utf-8").read(), "text/markdown" setup( - name='guidellm', - version='0.1.0', - author='Neuralmagic, Inc.', - description='Guidance platform for deploying and managing large language models.', + name="guidellm", + version="0.1.0", + author="Neuralmagic, Inc.", + description="Guidance platform for deploying and managing large language models.", long_description=_setup_long_description()[0], long_description_content_type=_setup_long_description()[1], license="Apache", url="https://github.com/neuralmagic/guidellm", - packages=find_packages(where='src'), - package_dir={'': 'src'}, + packages=find_packages(where="src"), + package_dir={"": "src"}, include_package_data=True, install_requires=[ - 'click', - 'datasets', - 'loguru', - 'numpy', - 'openai', - 'requests', - 'transformers', + "click", + "datasets", + "loguru", + "numpy", + "openai", + "requests", + "transformers", ], extras_require={ - 'dev': [ - 'pytest', - 'sphinx', - 'ruff', - 'mypy', - 'black', - 'isort', - 'flake8', - 'pre-commit', + "dev": [ + "black", + "flake8", + "isort", + "mypy", + "pre-commit", + "pytest", + "ruff", + "sphinx", ], }, entry_points={ - 'console_scripts': [ - 'guidellm=guidellm.main:main', + "console_scripts": [ + "guidellm=guidellm.main:main", ], }, - python_requires=">=3.8.0", + python_requires=">=3.8.0,<4.0", classifiers=[ "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3", diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index cc5c740..ba6fe69 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,9 +1,9 @@ -from .base import Backend, BackendTypes, GenerativeResponse +from .base import Backend, BackendEngine, GenerativeResponse from .openai import OpenAIBackend __all__ = [ "Backend", - "BackendTypes", + "BackendEngine", "GenerativeResponse", "OpenAIBackend", ] diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 22aab80..cef19cb 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -1,4 +1,3 @@ -import uuid from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum @@ -6,13 +5,19 @@ from loguru import logger -from guidellm.core.request import TextGenerationRequest -from guidellm.core.result import TextGenerationResult +from guidellm.core import TextGenerationRequest, TextGenerationResult -__all__ = ["Backend", "BackendTypes", "GenerativeResponse"] +__all__ = ["Backend", "BackendEngine", "GenerativeResponse"] -class BackendTypes(Enum): +class BackendEngine(str, Enum): + """ + Determines the Engine of the LLM Backend. + All the implemented backends in the project have the engine. + + NOTE: the `TEST` engine has to be used only for testing purposes. + """ + TEST = "test" OPENAI_SERVER = "openai_server" @@ -33,43 +38,46 @@ class GenerativeResponse: class Backend(ABC): """ - An abstract base class for generative AI backends. + An abstract base class with template methods for generative AI backends. """ _registry = {} - @staticmethod - def register_backend(backend_type: BackendTypes): + @classmethod + def register(cls, backend_type: BackendEngine): """ A decorator to register a backend class in the backend registry. :param backend_type: The type of backend to register. - :type backend_type: BackendTypes + :type backend_type: BackendType """ def inner_wrapper(wrapped_class: Type["Backend"]): - Backend._registry[backend_type] = wrapped_class + cls._registry[backend_type] = wrapped_class return wrapped_class return inner_wrapper - @staticmethod - def create_backend(backend_type: Union[str, BackendTypes], **kwargs) -> "Backend": + @classmethod + def create(cls, backend_type: Union[str, BackendEngine], **kwargs) -> "Backend": """ Factory method to create a backend based on the backend type. :param backend_type: The type of backend to create. - :type backend_type: BackendTypes + :type backend_type: BackendType :param kwargs: Additional arguments for backend initialization. :type kwargs: dict :return: An instance of a subclass of Backend. :rtype: Backend """ + logger.info(f"Creating backend of type {backend_type}") - if backend_type not in Backend._registry: + + if backend_type not in cls._registry: logger.error(f"Unsupported backend type: {backend_type}") raise ValueError(f"Unsupported backend type: {backend_type}") - return Backend._registry[backend_type](**kwargs) + + return cls._registry[backend_type](**kwargs) def submit(self, request: TextGenerationRequest) -> TextGenerationResult: """ @@ -80,23 +88,23 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult: :return: The populated result result. :rtype: TextGenerationResult """ + logger.info(f"Submitting request with prompt: {request.prompt}") - result_id = str(uuid.uuid4()) - result = TextGenerationResult(result_id) + + result = TextGenerationResult(request=request) result.start(request.prompt) - for response in self.make_request(request): + for response in self.make_request(request): # GenerativeResponse if response.type_ == "token_iter" and response.add_token: result.output_token(response.add_token) elif response.type_ == "final": result.end( - response.output, response.prompt_token_count, response.output_token_count, ) - break logger.info(f"Request completed with output: {result.output}") + return result @abstractmethod @@ -111,7 +119,8 @@ def make_request( :return: An iterator over the generative responses. :rtype: Iterator[GenerativeResponse] """ - raise NotImplementedError() + + pass @abstractmethod def available_models(self) -> List[str]: @@ -121,8 +130,10 @@ def available_models(self) -> List[str]: :return: A list of available models. :rtype: List[str] """ - raise NotImplementedError() + pass + + @property @abstractmethod def default_model(self) -> str: """ @@ -131,7 +142,8 @@ def default_model(self) -> str: :return: The default model. :rtype: str """ - raise NotImplementedError() + + pass @abstractmethod def model_tokenizer(self, model: str) -> Optional[str]: @@ -143,4 +155,5 @@ def model_tokenizer(self, model: str) -> Optional[str]: :return: The tokenizer for the model, or None if it cannot be created. :rtype: Optional[str] """ - raise NotImplementedError() + + pass diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index ce9f6c2..a55b408 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -1,16 +1,19 @@ -from typing import Any, Iterator, List, Optional +import functools +import os +from typing import Any, Dict, Iterator, List, Optional -import openai from loguru import logger +from openai import OpenAI, Stream +from openai.types import Completion from transformers import AutoTokenizer -from guidellm.backend import Backend, BackendTypes, GenerativeResponse -from guidellm.core.request import TextGenerationRequest +from guidellm.backend import Backend, BackendEngine, GenerativeResponse +from guidellm.core import TextGenerationRequest __all__ = ["OpenAIBackend"] -@Backend.register_backend(BackendTypes.OPENAI_SERVER) +@Backend.register(BackendEngine.OPENAI_SERVER) class OpenAIBackend(Backend): """ An OpenAI backend implementation for the generative AI result. @@ -33,34 +36,37 @@ class OpenAIBackend(Backend): def __init__( self, - target: Optional[str] = None, - host: Optional[str] = None, - port: Optional[int] = None, - path: Optional[str] = None, + openai_api_key: Optional[str] = None, + internal_callback_url: Optional[str] = None, model: Optional[str] = None, - api_key: Optional[str] = None, - **request_args, + **request_args: Any, ): - self.target = target - self.model = model - self.request_args = request_args - - if not self.target: - if not host: - raise ValueError("Host is required if target is not provided.") - - port_incl = f":{port}" if port else "" - path_incl = path if path else "" - self.target = f"http://{host}{port_incl}{path_incl}" + """ + Initialize an OpenAI Client + """ - openai.api_base = self.target - openai.api_key = api_key + self.request_args = request_args - if not model: - self.model = self.default_model() + if not (_api_key := (openai_api_key or os.getenv("OPENAI_API_KEY", None))): + raise ValueError( + "`OPENAI_API_KEY` environment variable " + "or --openai-api-key CLI parameter " + "must be specify for the OpenAI backend" + ) + + if not ( + _base_url := (internal_callback_url or os.getenv("OPENAI_BASE_URL", None)) + ): + raise ValueError( + "`OPENAI_BASE_URL` environment variable " + "or --openai-base-url CLI parameter " + "must be specify for the OpenAI backend" + ) + self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url) + self.model = model or self.default_model logger.info( - f"Initialized OpenAIBackend with target: {self.target} " + f"Initialized OpenAIBackend with callback url: {internal_callback_url} " f"and model: {self.model}" ) @@ -75,52 +81,46 @@ def make_request( :return: An iterator over the generative responses. :rtype: Iterator[GenerativeResponse] """ + logger.debug(f"Making request to OpenAI backend with prompt: {request.prompt}") - num_gen_tokens = request.params.get("generated_tokens", None) - request_args = { - "n": 1, - } - if num_gen_tokens: - request_args["max_tokens"] = num_gen_tokens - request_args["stop"] = None + # How many completions to generate for each prompt + request_args: Dict = {"n": 1} + + if (num_gen_tokens := request.params.get("generated_tokens", None)) is not None: + request_args.update(max_tokens=num_gen_tokens, stop=None) if self.request_args: request_args.update(self.request_args) - response = openai.Completion.create( - engine=self.model, + response: Stream[Completion] = self.openai_client.completions.create( + model=self.model, prompt=request.prompt, stream=True, **request_args, ) for chunk in response: - if chunk.get("choices"): - choice = chunk["choices"][0] - if choice.get("finish_reason") == "stop": - logger.debug("Received final response from OpenAI backend") - yield GenerativeResponse( - type_="final", - output=choice["text"], - prompt=request.prompt, - prompt_token_count=( - request.token_count - if request.token_count - else self._token_count(request.prompt) - ), - output_token_count=( - num_gen_tokens - if num_gen_tokens - else self._token_count(choice["text"]) - ), - ) - break - else: - logger.debug("Received token from OpenAI backend") - yield GenerativeResponse( - type_="token_iter", add_token=choice["text"] - ) + chunk_content: str = getattr(chunk, "content", "") + + if getattr(chunk, "stop", True) is True: + logger.debug("Received final response from OpenAI backend") + + yield GenerativeResponse( + type_="final", + prompt=getattr(chunk, "prompt", request.prompt), + prompt_token_count=( + request.prompt_token_count or self._token_count(request.prompt) + ), + output_token_count=( + num_gen_tokens + if num_gen_tokens + else self._token_count(chunk_content) + ), + ) + else: + logger.debug("Received token from OpenAI backend") + yield GenerativeResponse(type_="token_iter", add_token=chunk_content) def available_models(self) -> List[str]: """ @@ -129,10 +129,16 @@ def available_models(self) -> List[str]: :return: A list of available models. :rtype: List[str] """ - models = [model["id"] for model in openai.Engine.list()["data"]] + + models: list[str] = [ + model.id for model in self.openai_client.models.list().data + ] logger.info(f"Available models: {models}") + return models + @property + @functools.lru_cache(maxsize=1) def default_model(self) -> str: """ Get the default model for the backend. @@ -140,10 +146,11 @@ def default_model(self) -> str: :return: The default model. :rtype: str """ - models = self.available_models() - if models: + + if models := self.available_models(): logger.info(f"Default model: {models[0]}") return models[0] + logger.error("No models available.") raise ValueError("No models available.") diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 545c815..92c3bc0 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -1,11 +1,11 @@ import uuid -from typing import Any, Dict, Optional +from dataclasses import dataclass, field +from typing import Dict, Optional from loguru import logger -__all__ = ["TextGenerationRequest"] - +@dataclass(frozen=True) class TextGenerationRequest: """ A class to represent a text generation request for generative AI workloads. @@ -21,37 +21,21 @@ class TextGenerationRequest: :type params: Optional[Dict[str, Any]] """ - def __init__( - self, - prompt: str, - prompt_token_count: Optional[int] = None, - generated_token_count: Optional[int] = None, - params: Optional[Dict[str, Any]] = None, - ): - """ - Initialize the TextGenerationRequest with a prompt and optional parameters. + prompt: str + id: uuid.UUID = field(default_factory=uuid.uuid4) + prompt_token_count: Optional[int] = None + generated_token_count: Optional[int] = None + params: Dict = field(default_factory=dict) - :param prompt: The input prompt for the text generation request. - :type prompt: str - :param prompt_token_count: The number of tokens in the prompt, defaults to None. - :type prompt_token_count: Optional[int] - :param generated_token_count: The number of tokens to generate, - defaults to None. - :type generated_token_count: Optional[int] - :param params: Optional parameters for the text generation request, - defaults to None. - :type params: Optional[Dict[str, Any]] + def __post_init__(self) -> None: + """ + Log the initialization of the TextGenerationRequest instance. """ - self._id = str(uuid.uuid4()) - self._prompt = prompt - self._prompt_token_count = prompt_token_count - self._generated_token_count = generated_token_count - self._params = params or {} logger.debug( - f"Initialized TextGenerationRequest with id={self._id}, " - f"prompt={prompt}, prompt_token_count={prompt_token_count}, " - f"generated_token_count={generated_token_count}, params={params}" + f"Initialized TextGenerationRequest with id={self.id}, " + f"prompt={self.prompt}, prompt_token_count={self.prompt_token_count}, " + f"generated_token_count={self.generated_token_count}, params={self.params}" ) def __repr__(self) -> str: @@ -63,59 +47,9 @@ def __repr__(self) -> str: """ return ( f"TextGenerationRequest(" - f"id={self._id}, " - f"prompt={self._prompt}, " - f"prompt_token_count={self._prompt_token_count}, " - f"generated_token_count={self._generated_token_count}, " - f"params={self._params})" + f"id={self.id}, " + f"prompt={self.prompt}, " + f"prompt_token_count={self.prompt_token_count}, " + f"generated_token_count={self.generated_token_count}, " + f"params={self.params})" ) - - @property - def id(self) -> str: - """ - Get the unique identifier for the text generation request. - - :return: The unique identifier. - :rtype: str - """ - return self._id - - @property - def prompt(self) -> str: - """ - Get the input prompt for the text generation request. - - :return: The input prompt. - :rtype: str - """ - return self._prompt - - @property - def prompt_token_count(self) -> Optional[int]: - """ - Get the number of tokens in the prompt for the text generation request. - - :return: The number of tokens in the prompt. - :rtype: Optional[int] - """ - return self._prompt_token_count - - @property - def generated_token_count(self) -> Optional[int]: - """ - Get the number of tokens to generate for the text generation request. - - :return: The number of tokens to generate. - :rtype: Optional[int] - """ - return self._generated_token_count - - @property - def params(self) -> Dict[str, Any]: - """ - Get the optional parameters for the text generation request. - - :return: The optional parameters. - :rtype: Dict[str, Any] - """ - return self._params diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index a72a03b..45597ae 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -70,7 +70,7 @@ def __str__(self) -> str: f"end_time={self._end_time})" ) - def __eq__(self, other: "TextGenerationResult") -> bool: + def __eq__(self, other: object) -> bool: """ Check equality between two TextGenerationResult instances. @@ -79,6 +79,12 @@ def __eq__(self, other: "TextGenerationResult") -> bool: :return: True if the instances are equal, False otherwise. :rtype: bool """ + + if not isinstance(other, "TextGenerationResult"): + raise NotImplementedError( + "Only TextGenerationResult type could be used in that operation" + ) + return ( self._request == other._request and self._prompt == other._prompt @@ -198,7 +204,6 @@ def output_token(self, token: str): def end( self, - output: str, prompt_token_count: Optional[int] = None, output_token_count: Optional[int] = None, ): @@ -214,21 +219,13 @@ def end( defaults to word count. :type output_token_count: Optional[int] """ - self._output = output + self._end_time = time() - self._output_word_count = len(output.split()) - self._output_token_count = ( - output_token_count - if output_token_count is not None - else self._output_word_count - ) - self._prompt_token_count = ( - prompt_token_count - if prompt_token_count is not None - else self._prompt_word_count - ) + self._output_word_count = len(self.output.split()) + self._output_token_count = output_token_count or self._output_word_count + self._prompt_token_count = prompt_token_count or self._prompt_word_count - logger.info(f"Text generation ended with output: '{output}'") + logger.info(f"Text generation ended with output: '{self.output}'") class TextGenerationError: diff --git a/src/guidellm/main.py b/src/guidellm/main.py index adf0aa9..e6ddc65 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -78,7 +78,7 @@ def main( num_requests, ): # Create backend - Backend.create_backend( + Backend.create( backend_type=backend, target=target, host=host, diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py index 3e84819..9583518 100644 --- a/src/guidellm/request/base.py +++ b/src/guidellm/request/base.py @@ -1,3 +1,4 @@ +import contextlib import threading import time from abc import ABC, abstractmethod @@ -8,9 +9,6 @@ from transformers import AutoTokenizer, PreTrainedTokenizer from guidellm.core.request import TextGenerationRequest -from guidellm.utils import STANDARD_SLEEP_INTERVAL - -__all__ = ["RequestGenerator"] class RequestGenerator(ABC): @@ -127,7 +125,6 @@ def create_item(self) -> TextGenerationRequest: :return: A new result request. :rtype: TextGenerationRequest """ - raise NotImplementedError() def stop(self): """ @@ -144,16 +141,15 @@ def _populate_queue(self): Populate the request queue in the background. """ while not self._stop_event.is_set(): - try: + with contextlib.suppress(Full): if self._queue.qsize() < self._async_queue_size: item = self.create_item() - self._queue.put(item, timeout=STANDARD_SLEEP_INTERVAL) + self._queue.put(item, timeout=0.1) logger.debug( "Item added to queue. Current queue size: {}", self._queue.qsize(), ) else: - time.sleep(STANDARD_SLEEP_INTERVAL) - except Full: - continue + time.sleep(0.1) + logger.info("RequestGenerator stopped populating queue") diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..c6630d3 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,33 @@ +import os +from typing import Callable, Optional + +import pytest +from loguru import logger + +from guidellm.backend import Backend, BackendEngine, OpenAIBackend + + +def pytest_configure() -> None: + logger.disable("guidellm") + + +@pytest.fixture +def openai_backend_factory() -> Callable[..., OpenAIBackend]: + """ + OpenAI Backend factory method. + Call without provided arguments returns default Backend service. + """ + + def inner_wrapper(*_, base_url: Optional[str] = None, **kwargs) -> OpenAIBackend: + defaults = { + "backend_type": BackendEngine.OPENAI_SERVER, + "openai_api_key": "required but not used", + "internal_callback_url": base_url + or os.getenv("OPENAI_BASE_URL", "http://localhost:8080"), + } + + defaults.update(kwargs) + + return Backend.create(**defaults) + + return inner_wrapper diff --git a/tests/dummy/__init__.py b/tests/dummy/__init__.py new file mode 100644 index 0000000..350cfdb --- /dev/null +++ b/tests/dummy/__init__.py @@ -0,0 +1,10 @@ +""" +The tests.dummy package package represents dummy data factories and test services. +test.dummy.data.OpenAIModel - openai.Model test factory +test.dummy.data.OpenAICompletion - openai.Completion test factory +test.dummy.data.OpenAICompletionChoice - openai.CompletionChoice test factory +test.dummy.services.TestRequestGenerator - RequestGenerator that is used + for testing purposes +""" + +from . import data, services # noqa: F401 diff --git a/tests/dummy/data/__init__.py b/tests/dummy/data/__init__.py new file mode 100644 index 0000000..95a2c94 --- /dev/null +++ b/tests/dummy/data/__init__.py @@ -0,0 +1,3 @@ +from .openai import openai_completion_factory, openai_model_factory + +__all__ = ["openai_completion_factory", "openai_model_factory"] diff --git a/tests/dummy/data/openai.py b/tests/dummy/data/openai.py new file mode 100644 index 0000000..1e45d8a --- /dev/null +++ b/tests/dummy/data/openai.py @@ -0,0 +1,53 @@ +""" +This module includes data models factories for openai 3-rd party package +""" + +import random +import string +import time +import uuid +from typing import Generator + +from openai.types import Completion, Model + + +def words(n: int = 1) -> Generator[str, None, None]: + for _ in range(n): + yield "".join( + (random.choice(string.ascii_letters) for _ in range(random.randint(3, 10))) + ) + + +def openai_completion_factory( + n: int = 3, **kwargs +) -> Generator[Completion, None, None]: + """ + The factory that yields the openai Completion instance. + """ + + for i in range(1, n + 1): + payload = { + "id": str(uuid.uuid4()), + "choices": [], + "stop": False if i < n else True, + "content": " ".join(words(random.randint(3, 10))) if i < n else "", + "object": "text_completion", + "model": "mock-model", + "created": int(time.time()), + } + payload.update(kwargs) + + yield Completion(**payload) + + +def openai_model_factory(n: int = 3) -> Generator[Model, None, None]: + """ + The factory that yields the random openai Model instance. + """ + for _ in range(n): + yield Model( + id=str(uuid.uuid4()), + created=int(time.time()), + object="model", + owned_by="neuralmagic", + ) diff --git a/tests/dummy/services/__init__.py b/tests/dummy/services/__init__.py new file mode 100644 index 0000000..8c63c5c --- /dev/null +++ b/tests/dummy/services/__init__.py @@ -0,0 +1,5 @@ +from .requests import TestRequestGenerator + +__all__ = [ + "TestRequestGenerator", +] diff --git a/tests/dummy/services/requests.py b/tests/dummy/services/requests.py new file mode 100644 index 0000000..3bb8152 --- /dev/null +++ b/tests/dummy/services/requests.py @@ -0,0 +1,12 @@ +from guidellm.core import TextGenerationRequest +from guidellm.request import RequestGenerator + + +class TestRequestGenerator(RequestGenerator): + """ + This class represents the Testing Request Generator. + The purpose - to be used for testing. + """ + + def create_item(self) -> TextGenerationRequest: + return TextGenerationRequest(prompt="Test prompt") diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index e69de29..c43cd8d 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -0,0 +1,7 @@ +""" +Integration tests test the functionality between 2 or more dedicated modules. +In our case, some integration tests may hit external infrastructure components. + +For example, the backend integration test may hit +a mocked OpenAI-compatible server for better testing. +""" diff --git a/tests/integration/backend/test_openai_backend_submit.py b/tests/integration/backend/test_openai_backend_submit.py new file mode 100644 index 0000000..bb164a4 --- /dev/null +++ b/tests/integration/backend/test_openai_backend_submit.py @@ -0,0 +1,69 @@ +import os +import time +from typing import Callable + +import pytest +import requests +from openai.pagination import SyncPage +from openai.types import Model + +from guidellm.backend import OpenAIBackend +from guidellm.core import TextGenerationRequest, TextGenerationResult + + +@pytest.fixture(scope="session", autouse=True) +def openai_server_healthcheck(): + """ + Check if the openai server is running + """ + + if not (openai_server := os.getenv("OPENAI_BASE_URL", None)): + raise ValueError( + "Integration backend tests can't be run without OPENAI_BASE_URL specified" + ) + + try: + requests.get(openai_server) + except requests.ConnectionError: + raise SystemExit( + "Integration backend tests can't be run without " + f"OpenAI compatible server running. Please check the {openai_server}" + ) + + +@pytest.mark.integration +def test_openai_submit_request( + mocker, openai_backend_factory: Callable[..., OpenAIBackend] +): + """ + Check the OpenAI making request and checking the results. + + Check if the total time that is stored in the TextGenerationResult corresponds + to the real execution time + """ + + openai_resources_models_list_patch = mocker.patch( + "openai.resources.models.Models.list", + return_value=SyncPage( + object="list", + data=[ + Model( + id="d69244a8-3f30-4f08-a432-8c83d5f254ad", + created=1719814049, + object="model", + owned_by="guidellm", + ) + ], + ), + ) + backend: OpenAIBackend = openai_backend_factory() + request = TextGenerationRequest(prompt="Say this is a test") + + start_time = time.perf_counter() + result: TextGenerationResult = backend.submit(request=request) + total_for_submit = time.perf_counter() - start_time + + assert result.start_time is not None + assert result.end_time is not None + assert openai_resources_models_list_patch.call_count == 1 + assert abs((result.end_time - result.start_time) - total_for_submit) < 1 diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py new file mode 100644 index 0000000..1c3e12f --- /dev/null +++ b/tests/unit/backend/test_base.py @@ -0,0 +1,44 @@ +from typing import Iterator, List, Optional + +import pytest + +from guidellm.backend import Backend, BackendEngine, GenerativeResponse, OpenAIBackend +from guidellm.core import TextGenerationRequest + + +@Backend.register(backend_type=BackendEngine.TEST) +class TestBackend(Backend): + """ + The test implementation of a LLM Backend. + """ + + def __init__(self, target: str, model: str = "test"): + self.target: str = target + self.model: str = model + + def make_request( + self, request: TextGenerationRequest + ) -> Iterator[GenerativeResponse]: + raise NotImplementedError + + def available_models(self) -> List[str]: + raise NotImplementedError + + @property + def default_model(self) -> str: + raise NotImplementedError + + def model_tokenizer(self, model: str) -> Optional[str]: + raise NotImplementedError + + +@pytest.mark.smoke +def test_backend_registry(): + """ + Ensure that all registered classes exist in the Backend._registry. + """ + + assert Backend._registry == { + BackendEngine.TEST: TestBackend, + BackendEngine.OPENAI_SERVER: OpenAIBackend, + } diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py new file mode 100644 index 0000000..e75297b --- /dev/null +++ b/tests/unit/backend/test_openai_backend.py @@ -0,0 +1,101 @@ +""" +This module includes unit tests for the OpenAI Backend Service. +""" + +from typing import Callable, List, Optional + +import pytest +from openai.types import Completion + +from guidellm.backend import Backend, BackendEngine, OpenAIBackend +from guidellm.core import TextGenerationRequest +from tests.dummy.services import TestRequestGenerator + + +@pytest.mark.smoke +def test_openai_backend_creation_with_default_model(openai_backend_factory: Callable): + """ + Test whether the OpenAI Backend service is created correctly + with all default parameters. + Also checks whether the `default_models` parameter does not abuse the OpenAI API. + """ + + backend_service = openai_backend_factory() + assert isinstance(backend_service, OpenAIBackend) + assert backend_service.default_model == backend_service.available_models()[0] + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "extra_kwargs", + [ + {"openai_api_key": "dummy"}, + {"internal_callback_url": "dummy"}, + ], +) +def test_openai_backend_creation_required_arguments(mocker, extra_kwargs: dict): + """ + Both OpenAI key & internal callback URL are required to work with OpenAI Backend. + """ + + # Ignore environment variables + mocker.patch("os.getenv", return_value=None) + + with pytest.raises(ValueError): + Backend.create( + backend_type=BackendEngine.OPENAI_SERVER, + **extra_kwargs, + ) + + +@pytest.mark.smoke +def test_model_tokenizer(openai_backend_factory): + backend_service = openai_backend_factory() + assert backend_service.model_tokenizer("bert-base-uncased") + + +@pytest.mark.smoke +def test_model_tokenizer_no_model(openai_backend_factory): + backend_service = openai_backend_factory() + tokenizer = backend_service.model_tokenizer("invalid") + assert tokenizer is None + + +@pytest.mark.smoke +def test_make_request( + openai_backend_factory, openai_completion_create_patch: List[Completion] +): + """ + Test `OpenAIBackend.make_request()` workflow. + + Notes: + * The output token count is not used without the `TextGenerationResult.start()` + and `TextGenerationResult.start()` + """ + + request: TextGenerationRequest = TestRequestGenerator().create_item() + backend_service: OpenAIBackend = openai_backend_factory() + total_generative_responses = 0 + + for generative_response, patched_completion in zip( + backend_service.make_request(request=request), + openai_completion_create_patch, + ): + + total_generative_responses += 1 + expected_token: Optional[str] = getattr(patched_completion, "content") or None + + assert generative_response.add_token == expected_token + assert ( + generative_response.type_ == "final" + if getattr(patched_completion, "stop") is True + else "token_iter" + ) + if expected_token is not None: + assert generative_response.prompt_token_count is None + assert generative_response.output_token_count is None + else: + assert generative_response.prompt_token_count == 2 + assert generative_response.output_token_count == 0 + + assert total_generative_responses == 3 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..3f53aa4 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,36 @@ +from typing import List, cast + +import openai +import pytest +from openai.pagination import SyncPage + +from tests import dummy + + +@pytest.fixture(autouse=True) +def openai_completion_create_patch(mocker) -> openai.Stream[openai.types.Completion]: + """ + Mock available models function to avoid OpenAI API call. + """ + + items = [item for item in dummy.data.openai_completion_factory()] + mocker.patch("openai.resources.completions.Completions.create", return_value=items) + + return cast(openai.Stream[openai.types.Completion], items) + + +@pytest.fixture(autouse=True) +def openai_models_list_patch(mocker) -> List[openai.types.Model]: + """ + Mock available models function to avoid OpenAI API call. + """ + + items: list[openai.types.Model] = [ + item for item in dummy.data.openai_model_factory() + ] + mocker.patch( + "openai.resources.models.Models.list", + return_value=SyncPage(object="list", data=items), + ) + + return items diff --git a/tests/unit/core/test_request.py b/tests/unit/core/test_request.py index 2b066d4..672597c 100644 --- a/tests/unit/core/test_request.py +++ b/tests/unit/core/test_request.py @@ -20,7 +20,10 @@ def test_text_generation_request_initialization_with_params(): generated_token_count = 100 params = {"temperature": 0.7} request = TextGenerationRequest( - prompt, prompt_token_count, generated_token_count, params + prompt=prompt, + prompt_token_count=prompt_token_count, + generated_token_count=generated_token_count, + params=params, ) assert request.prompt == prompt assert request.prompt_token_count == prompt_token_count @@ -35,7 +38,10 @@ def test_text_generation_request_repr(): generated_token_count = 100 params = {"temperature": 0.7} request = TextGenerationRequest( - prompt, prompt_token_count, generated_token_count, params + prompt=prompt, + prompt_token_count=prompt_token_count, + generated_token_count=generated_token_count, + params=params, ) assert repr(request) == ( f"TextGenerationRequest(id={request.id}, prompt={prompt}, " diff --git a/tests/unit/core/test_result.py b/tests/unit/core/test_result.py index 8cedc59..3eeb752 100644 --- a/tests/unit/core/test_result.py +++ b/tests/unit/core/test_result.py @@ -49,8 +49,8 @@ def test_text_generation_result_repr(): def test_text_generation_result_end(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request) - result.end("The end") - assert result.output == "The end" + result.end() + assert result.output == "" assert result.end_time is not None diff --git a/tests/unit/request/test_base.py b/tests/unit/request/test_base.py index 1871d1c..ae83497 100644 --- a/tests/unit/request/test_base.py +++ b/tests/unit/request/test_base.py @@ -1,15 +1,9 @@ -import time from unittest.mock import Mock, patch import pytest -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator - - -class TestRequestGenerator(RequestGenerator): - def create_item(self) -> TextGenerationRequest: - return TextGenerationRequest(prompt="Test prompt") +from guidellm.core import TextGenerationRequest +from tests.dummy.services import TestRequestGenerator @pytest.mark.smoke @@ -77,24 +71,6 @@ def test_request_generator_repr(): ) -@pytest.mark.regression -def test_request_generator_create_item_not_implemented(): - with pytest.raises(TypeError): - - class IncompleteRequestGenerator(RequestGenerator): - pass - - IncompleteRequestGenerator() - - class IncompleteCreateItemGenerator(RequestGenerator): - def create_item(self): - super().create_item() - - generator = IncompleteCreateItemGenerator() - with pytest.raises(NotImplementedError): - generator.create_item() - - @pytest.mark.regression def test_request_generator_iter_calls_create_item(): generator = TestRequestGenerator(mode="sync") @@ -108,7 +84,7 @@ def test_request_generator_iter_calls_create_item(): if len(items) == 5: break - assert generator._queue.qsize() == 0 + assert len(items) == 5 generator.create_item.assert_called() @@ -126,7 +102,5 @@ def test_request_generator_async_iter_calls_create_item(): break generator.stop() - stop_size = generator._queue.qsize() - time.sleep(0.1) - assert generator._queue.qsize() == stop_size + assert len(items) == 5 generator.create_item.assert_called()