From 222cc4441c5b84f2e5b0f683612dfe985f3b2682 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Tue, 2 Jul 2024 21:59:00 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20WIP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 + Makefile | 7 ++- setup.py | 51 +++++++++-------- src/guidellm/backend/__init__.py | 4 +- src/guidellm/backend/base.py | 37 +++++++++---- src/guidellm/backend/openai.py | 77 ++++++++++++++------------ src/guidellm/main.py | 21 ++++++- src/guidellm/request/test.py | 13 +++++ tests/conftest.py | 1 + tests/integration/request/test_base.py | 7 +-- tests/unit/backend/__init__.py | 0 tests/unit/backend/conftest.py | 37 +++++++++++++ tests/unit/backend/test_base.py | 58 +++++++++++++++++++ tests/unit/backend/test_openai.py | 65 ++++++++++++++++++++++ 14 files changed, 299 insertions(+), 82 deletions(-) create mode 100644 src/guidellm/request/test.py create mode 100644 tests/conftest.py create mode 100644 tests/unit/backend/__init__.py create mode 100644 tests/unit/backend/conftest.py create mode 100644 tests/unit/backend/test_base.py create mode 100644 tests/unit/backend/test_openai.py diff --git a/.gitignore b/.gitignore index 82f9275..8a86cca 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# MacOS files +.DS_Store diff --git a/Makefile b/Makefile index 53f3537..4d0be04 100644 --- a/Makefile +++ b/Makefile @@ -15,8 +15,9 @@ style: isort src tests flake8 src tests --max-line-length 88 -# test: -# pytest tests + +test: + python -m pytest --cache-clear -vvv -x -s ./tests build: python setup.py sdist bdist_wheel @@ -31,4 +32,4 @@ clean: rm -rf .mypy_cache rm -rf .pytest_cache -.PHONY: install install-dev quality style test test-unit test-integration test-e2e test-smoke test-sanity test-regression build clean +.PHONY: install install-dev quality style test build clean diff --git a/setup.py b/setup.py index 9075d84..4a3235d 100644 --- a/setup.py +++ b/setup.py @@ -1,46 +1,49 @@ -from setuptools import setup, find_packages from typing import Tuple +from setuptools import find_packages, setup + def _setup_long_description() -> Tuple[str, str]: return open("README.md", "r", encoding="utf-8").read(), "text/markdown" setup( - name='guidellm', - version='0.1.0', - author='Neuralmagic, Inc.', - description='Guidance platform for deploying and managing large language models.', + name="guidellm", + version="0.1.0", + author="Neuralmagic, Inc.", + description="Guidance platform for deploying and managing large language models.", long_description=_setup_long_description()[0], long_description_content_type=_setup_long_description()[1], license="Apache", url="https://github.com/neuralmagic/guidellm", - packages=find_packages(where='src'), - package_dir={'': 'src'}, + packages=find_packages(where="src"), + package_dir={"": "src"}, include_package_data=True, install_requires=[ - 'datasets', - 'loguru', - 'numpy', - 'openai', - 'requests', - 'transformers', + "datasets", + "loguru", + "numpy", + "openai", + "requests", + "transformers", ], extras_require={ - 'dev': [ - 'pytest', - 'sphinx', - 'ruff', - 'mypy', - 'black', - 'isort', - 'flake8', - 'pre-commit', + "dev": [ + "black", + "flake8", + "isort", + "mypy", + "pre-commit", + "pytest", + "pytest-asyncio", + "pytest-mock", + "ruff", + "sphinx", ], }, entry_points={ - 'console_scripts': [ - 'guidellm=guidellm.main:main', + "console_scripts": [ + "guidellm=guidellm.main:main", ], }, python_requires=">=3.8.0", diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index cc5c740..ba6fe69 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,9 +1,9 @@ -from .base import Backend, BackendTypes, GenerativeResponse +from .base import Backend, BackendEngine, GenerativeResponse from .openai import OpenAIBackend __all__ = [ "Backend", - "BackendTypes", + "BackendEngine", "GenerativeResponse", "OpenAIBackend", ] diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 22aab80..c58857e 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -1,18 +1,26 @@ +import functools import uuid from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum -from typing import Iterator, List, Optional, Type, Union +from typing import Generic, Iterator, List, Optional, Type, Union from loguru import logger from guidellm.core.request import TextGenerationRequest from guidellm.core.result import TextGenerationResult -__all__ = ["Backend", "BackendTypes", "GenerativeResponse"] +__all__ = ["Backend", "BackendEngine", "GenerativeResponse"] -class BackendTypes(Enum): +class BackendEngine(str, Enum): + """ + Determines the Engine of the LLM Backend. + All the implemented backends in the project have the engine. + + NOTE: the `TEST` engine has to be used only for testing purposes. + """ + TEST = "test" OPENAI_SERVER = "openai_server" @@ -33,18 +41,18 @@ class GenerativeResponse: class Backend(ABC): """ - An abstract base class for generative AI backends. + An abstract base class with template methods for generative AI backends. """ _registry = {} @staticmethod - def register_backend(backend_type: BackendTypes): + def register(backend_type: BackendEngine): """ A decorator to register a backend class in the backend registry. :param backend_type: The type of backend to register. - :type backend_type: BackendTypes + :type backend_type: BackendType """ def inner_wrapper(wrapped_class: Type["Backend"]): @@ -54,21 +62,24 @@ def inner_wrapper(wrapped_class: Type["Backend"]): return inner_wrapper @staticmethod - def create_backend(backend_type: Union[str, BackendTypes], **kwargs) -> "Backend": + def create(backend_type: Union[str, BackendEngine], **kwargs) -> "Backend": """ Factory method to create a backend based on the backend type. :param backend_type: The type of backend to create. - :type backend_type: BackendTypes + :type backend_type: BackendType :param kwargs: Additional arguments for backend initialization. :type kwargs: dict :return: An instance of a subclass of Backend. :rtype: Backend """ + logger.info(f"Creating backend of type {backend_type}") + if backend_type not in Backend._registry: logger.error(f"Unsupported backend type: {backend_type}") raise ValueError(f"Unsupported backend type: {backend_type}") + return Backend._registry[backend_type](**kwargs) def submit(self, request: TextGenerationRequest) -> TextGenerationResult: @@ -121,8 +132,10 @@ def available_models(self) -> List[str]: :return: A list of available models. :rtype: List[str] """ - raise NotImplementedError() + pass + + @property @abstractmethod def default_model(self) -> str: """ @@ -131,7 +144,8 @@ def default_model(self) -> str: :return: The default model. :rtype: str """ - raise NotImplementedError() + + pass @abstractmethod def model_tokenizer(self, model: str) -> Optional[str]: @@ -143,4 +157,5 @@ def model_tokenizer(self, model: str) -> Optional[str]: :return: The tokenizer for the model, or None if it cannot be created. :rtype: Optional[str] """ - raise NotImplementedError() + + pass diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index ce9f6c2..6194b5d 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -1,16 +1,18 @@ -from typing import Any, Iterator, List, Optional +import functools +import os +from typing import Any, Dict, Iterator, List, Optional -import openai from loguru import logger +from openai import OpenAI from transformers import AutoTokenizer -from guidellm.backend import Backend, BackendTypes, GenerativeResponse +from guidellm.backend import Backend, BackendEngine, GenerativeResponse from guidellm.core.request import TextGenerationRequest __all__ = ["OpenAIBackend"] -@Backend.register_backend(BackendTypes.OPENAI_SERVER) +@Backend.register(BackendEngine.OPENAI_SERVER) class OpenAIBackend(Backend): """ An OpenAI backend implementation for the generative AI result. @@ -33,34 +35,35 @@ class OpenAIBackend(Backend): def __init__( self, - target: Optional[str] = None, - host: Optional[str] = None, - port: Optional[int] = None, - path: Optional[str] = None, + openai_api_key: Optional[str] = None, + internal_callback_url: Optional[str] = None, model: Optional[str] = None, - api_key: Optional[str] = None, - **request_args, + **request_args: Any, ): - self.target = target - self.model = model - self.request_args = request_args - - if not self.target: - if not host: - raise ValueError("Host is required if target is not provided.") - - port_incl = f":{port}" if port else "" - path_incl = path if path else "" - self.target = f"http://{host}{port_incl}{path_incl}" + """ + Initialize an OpenAI Client + """ - openai.api_base = self.target - openai.api_key = api_key + self.request_args = request_args - if not model: - self.model = self.default_model() + if not (_api_key := (openai_api_key or os.getenv("OPENAI_API_KEY", None))): + raise ValueError( + "`OPENAI_API_KEY` environment variable or --openai-api-key CLI parameter " + "must be specify for the OpenAI backend" + ) + + if not ( + _base_url := (internal_callback_url or os.getenv("OPENAI_BASE_URL", None)) + ): + raise ValueError( + "`OPENAI_BASE_URL` environment variable or --openai-base-url CLI parameter " + "must be specify for the OpenAI backend" + ) + self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url) + self.model = model or self.default_model logger.info( - f"Initialized OpenAIBackend with target: {self.target} " + f"Initialized OpenAIBackend with callback url: {internal_callback_url} " f"and model: {self.model}" ) @@ -75,11 +78,10 @@ def make_request( :return: An iterator over the generative responses. :rtype: Iterator[GenerativeResponse] """ + logger.debug(f"Making request to OpenAI backend with prompt: {request.prompt}") num_gen_tokens = request.params.get("generated_tokens", None) - request_args = { - "n": 1, - } + request_args: Dict = {"n": 1} if num_gen_tokens: request_args["max_tokens"] = num_gen_tokens @@ -88,8 +90,8 @@ def make_request( if self.request_args: request_args.update(self.request_args) - response = openai.Completion.create( - engine=self.model, + response = self.openai_client.completions.create( + model=self.model, prompt=request.prompt, stream=True, **request_args, @@ -129,10 +131,16 @@ def available_models(self) -> List[str]: :return: A list of available models. :rtype: List[str] """ - models = [model["id"] for model in openai.Engine.list()["data"]] + + models: list[str] = [ + model.id for model in self.openai_client.models.list().data + ] logger.info(f"Available models: {models}") + return models + @property + @functools.lru_cache(maxsize=1) def default_model(self) -> str: """ Get the default model for the backend. @@ -140,10 +148,11 @@ def default_model(self) -> str: :return: The default model. :rtype: str """ - models = self.available_models() - if models: + + if models := self.available_models(): logger.info(f"Default model: {models[0]}") return models[0] + logger.error("No models available.") raise ValueError("No models available.") diff --git a/src/guidellm/main.py b/src/guidellm/main.py index adf0aa9..ece858c 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -3,6 +3,7 @@ import click from guidellm.backend import Backend +from guidellm.backend.base import BackendEngine from guidellm.core import TextGenerationBenchmarkReport from guidellm.executor import Executor from guidellm.request import ( @@ -23,7 +24,19 @@ @click.option("--port", type=str, help="Port for benchmarking") @click.option("--path", type=str, help="Path for benchmarking") @click.option( - "--backend", type=str, default="openai_server", help="Backend type for benchmarking" + "--backend", + type=BackendEngine, + default=BackendEngine.OPENAI_SERVER, + help="Backend type for benchmarking", +) +@click.option( + "--internal-callback-url", type=str, default=None, help="Internal callback url" +) +@click.option( + "--openai-api-key", + type=str, + default=None, + help="OpenAI API Key. Required if `backend=openai_server`", ) @click.option("--model", type=str, default=None, help="Model to use for benchmarking") @click.option("--task", type=str, default=None, help="Task to use for benchmarking") @@ -66,7 +79,9 @@ def main( host, port, path, + internal_callback_url, backend, + openai_api_key, model, task, data, @@ -78,13 +93,15 @@ def main( num_requests, ): # Create backend - Backend.create_backend( + Backend.create( backend_type=backend, target=target, host=host, port=port, path=path, model=model, + openai_api_key=openai_api_key, + internal_callback_url=internal_callback_url, ) # Create request generator diff --git a/src/guidellm/request/test.py b/src/guidellm/request/test.py new file mode 100644 index 0000000..bc74706 --- /dev/null +++ b/src/guidellm/request/test.py @@ -0,0 +1,13 @@ +from guidellm.core import TextGenerationRequest + +from .base import RequestGenerator + + +class TestRequestGenerator(RequestGenerator): + """ + This class represents the Testing Request Generator. + The purpose - to be used for testing. + """ + + def create_item(self) -> TextGenerationRequest: + return TextGenerationRequest(prompt="Test prompt") diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1 @@ + diff --git a/tests/integration/request/test_base.py b/tests/integration/request/test_base.py index a631909..30cd32f 100644 --- a/tests/integration/request/test_base.py +++ b/tests/integration/request/test_base.py @@ -1,12 +1,7 @@ import pytest from transformers import AutoTokenizer, PreTrainedTokenizerBase -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator - -class TestRequestGenerator(RequestGenerator): - def create_item(self) -> TextGenerationRequest: - return TextGenerationRequest(prompt="Test prompt") +from guidellm.request.test import TestRequestGenerator @pytest.mark.smoke diff --git a/tests/unit/backend/__init__.py b/tests/unit/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/backend/conftest.py b/tests/unit/backend/conftest.py new file mode 100644 index 0000000..c4f7f8b --- /dev/null +++ b/tests/unit/backend/conftest.py @@ -0,0 +1,37 @@ +import pytest +from openai.pagination import SyncPage +from openai.types import Model + + +@pytest.fixture(autouse=True) +def openai_models_list_patch(mocker): + """ + Mock available models function to avoid OpenAI API call. + """ + + return mocker.patch( + "openai.resources.models.Models.list", + return_value=SyncPage( + object="list", + data=[ + Model( + id="model-id-1", + object="model", + created=1686935002, + owned_by="openai", + ), + Model( + id="model-id-2", + object="model", + created=1686935003, + owned_by="openai", + ), + Model( + id="model-id-3", + object="model", + created=1686935004, + owned_by="openai", + ), + ], + ), + ) diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py new file mode 100644 index 0000000..30b8538 --- /dev/null +++ b/tests/unit/backend/test_base.py @@ -0,0 +1,58 @@ +from typing import Iterator, List, Optional + +import pytest + +from guidellm.backend import Backend, BackendEngine, GenerativeResponse, OpenAIBackend +from guidellm.core import TextGenerationRequest + + +@Backend.register(backend_type=BackendEngine.TEST) +class TestBackend(Backend): + """ + The test implementation of a LLM Backend. + """ + + def __init__(self, target: str, model: str = "test"): + self.target: str = target + self.model: str = model + + def make_request( + self, request: TextGenerationRequest + ) -> Iterator[GenerativeResponse]: + raise NotImplementedError + + def available_models(self) -> List[str]: + raise NotImplementedError + + def default_model(self) -> str: + raise NotImplementedError + + def model_tokenizer(self, model: str) -> Optional[str]: + raise NotImplementedError + + +@pytest.mark.smoke +def test_backend_registry(): + """ + Ensure that all registered classes exist in the registry + after they + """ + + assert Backend._registry == { + BackendEngine.TEST: TestBackend, + BackendEngine.OPENAI_SERVER: OpenAIBackend, + } + + +def test_backend_service_factory(): + """ + Ensure that the Backend factory methods works as expected + """ + + backend_service: Backend = Backend.create( + backend_type=BackendEngine.TEST, + target="http://localhost:8000/completions", + model="test", + ) + + assert isinstance(backend_service, TestBackend) diff --git a/tests/unit/backend/test_openai.py b/tests/unit/backend/test_openai.py new file mode 100644 index 0000000..cd86683 --- /dev/null +++ b/tests/unit/backend/test_openai.py @@ -0,0 +1,65 @@ +""" +This module includes unit tests for the OpenAI Backend Service. +""" + +import pytest + +from guidellm.backend import Backend, BackendEngine, OpenAIBackend +from guidellm.request.test import TestRequestGenerator + + +@pytest.mark.sanity +def test_openai_backend_creation_with_default_model(): + """ + Test whether the OpenAI Backend service is created correctly + with all default parameters. + + Also checks wheather the `default_models` parameter does not abuse the OpenAI API. + """ + + backend_service = Backend.create( + backend_type=BackendEngine.OPENAI_SERVER, + openai_api_key="dummy api key", + internal_callback_url="http://localhost:8000", + ) + + assert isinstance(backend_service, OpenAIBackend) + assert backend_service.default_model == "model-id-1" + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "extra_kwargs", + [ + {"openai_api_key": "dummy"}, + {"openai_base_url": "dummy"}, + ], +) +def test_openai_backend_creation_required_arguments(extra_kwargs: dict): + with pytest.raises(ValueError): + Backend.create( + backend_type=BackendEngine.OPENAI_SERVER, + target="https://dummy.com", + **extra_kwargs, + ) + + +def test_model_tokenizer(mocker): + backend_service = Backend.create( + backend_type=BackendEngine.OPENAI_SERVER, + openai_api_key="dummy api key", + internal_callback_url="http://localhost:8000", + ) + + assert backend_service.model_tokenizer("bert-base-uncased") + + +def test_model_tokenizer_no_model(): + backend_service = Backend.create( + backend_type=BackendEngine.OPENAI_SERVER, + openai_api_key="dummy api key", + internal_callback_url="http://localhost:8000", + ) + tokenizer = backend_service.model_tokenizer("invalid") + + assert tokenizer is None