From c3abc8db8aa830bd7af5e2b1b8dae7eecf1d9bf4 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 22 Aug 2024 12:48:55 +0300 Subject: [PATCH 01/22] WIP --- src/guidellm/backend/deepsparse.py | 150 +++++++++++++++++++++++++++++ src/guidellm/config.py | 15 ++- src/guidellm/main.py | 2 +- 3 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 src/guidellm/backend/deepsparse.py diff --git a/src/guidellm/backend/deepsparse.py b/src/guidellm/backend/deepsparse.py new file mode 100644 index 0000000..7d66664 --- /dev/null +++ b/src/guidellm/backend/deepsparse.py @@ -0,0 +1,150 @@ +from typing import Any, Dict, Generator, List, Optional + +import openai +from loguru import logger +from openai import OpenAI, Stream +from openai.types import Completion +from transformers import AutoTokenizer + +from guidellm.backend import Backend, BackendEngine, GenerativeResponse +from guidellm.config import settings +from guidellm.core import TextGenerationRequest + +__all__ = ["DeepsparseBackend"] + + +@Backend.register(BackendEngine.DEEPSPARSE) +class DeepsparseBackend(Backend): + """ + An Deepsparse backend implementation for the generative AI result. + + :param XXX: Description + :type XXX: str + """ + + def __init__( + self, + target: Optional[str] = None, + host: Optional[str] = None, + port: Optional[int] = None, + model: Optional[str] = None, + **request_args, + ): + """ + Initialize an OpenAI Client + """ + + self.request_args = request_args + + + breakpoint() # TODO: remove + if target is not None: + base_url = target + elif host and port: + base_url = f"{host}:{port}" + elif settings.openai.base_url is not None: + base_url = settings.openai.base_url + else: + raise ValueError( + "`GUIDELLM__DEEPSPARSE__BASE_URL` environment variable " + "or --target CLI parameter must be specified for the OpenAI backend." + ) + + self.openai_client = OpenAI(api_key=_api_key, base_url=base_url) + self.model = model or self.default_model + + logger.info("OpenAI {} Backend listening on {}", self.model, target) + + def make_request( + self, + request: TextGenerationRequest, + ) -> Generator[GenerativeResponse, None, None]: + """ + Make a request to the OpenAI backend. + + :param request: The result request to submit. + :type request: TextGenerationRequest + :return: An iterator over the generative responses. + :rtype: Iterator[GenerativeResponse] + """ + + logger.debug(f"Making request to OpenAI backend with prompt: {request.prompt}") + + # How many completions to generate for each prompt + request_args: Dict = {"n": 1} + + num_gen_tokens: int = ( + request.params.get("generated_tokens", None) + or settings.openai.max_gen_tokens + ) + request_args.update({"max_tokens": num_gen_tokens, "stop": None}) + + if self.request_args: + request_args.update(self.request_args) + + response: Stream[Completion] = self.openai_client.completions.create( + model=self.model, + prompt=request.prompt, + stream=True, + **request_args, + ) + + for chunk in response: + chunk_content: str = getattr(chunk, "content", "") + + if getattr(chunk, "stop", True) is True: + logger.debug("Received final response from OpenAI backend") + + yield GenerativeResponse( + type_="final", + prompt=getattr(chunk, "prompt", request.prompt), + prompt_token_count=( + request.prompt_token_count or self._token_count(request.prompt) + ), + output_token_count=(self._token_count(chunk_content)), + ) + else: + logger.debug("Received token from OpenAI backend") + yield GenerativeResponse(type_="token_iter", add_token=chunk_content) + + def available_models(self) -> List[str]: + """ + Get the available models for the backend. + + :return: A list of available models. + :rtype: List[str] + """ + + try: + models: List[str] = [ + model.id for model in self.openai_client.models.list().data + ] + except openai.NotFoundError as error: + logger.error("No available models for OpenAI Backend") + raise error + else: + logger.info(f"Available models: {models}") + return models + + def model_tokenizer(self, model: str) -> Optional[Any]: + """ + Get the tokenizer for a model. + + :param model: The model to get the tokenizer for. + :type model: str + :return: The tokenizer for the model, or None if it cannot be created. + :rtype: Optional[Any] + """ + try: + tokenizer = AutoTokenizer.from_pretrained(model) + logger.info(f"Tokenizer created for model: {model}") + return tokenizer + except Exception as err: # noqa: BLE001 + logger.warning(f"Could not create tokenizer for model {model}: {err}") + return None + + def _token_count(self, text: str) -> int: + token_count = len(text.split()) + logger.debug(f"Token count for text '{text}': {token_count}") + return token_count + diff --git a/src/guidellm/config.py b/src/guidellm/config.py index fc15eff..fe2f4e1 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -107,6 +107,18 @@ class OpenAISettings(BaseModel): max_gen_tokens: int = 4096 +class DeepsparseSettings(BaseModel): + """ + Deepsparse settings for the application to connect to the API + for Deepsparse server based pathways + """ + + # NOTE: The default value is default address of deepsparse.server + base_url: str = "http://localhost:5543" + + max_gen_tokens: int = 4096 + + class ReportGenerationSettings(BaseModel): """ Report generation settings for the application @@ -151,8 +163,7 @@ class Settings(BaseSettings): # Request settings openai: OpenAISettings = OpenAISettings() - - # Report settings + deepsprase: DeepsparseSettings = DeepsparseSettings() report_generation: ReportGenerationSettings = ReportGenerationSettings() @model_validator(mode="after") diff --git a/src/guidellm/main.py b/src/guidellm/main.py index d754d6a..a337115 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -26,7 +26,7 @@ @click.option("--port", type=str, default=None, help="Port for benchmarking") @click.option( "--backend", - type=click.Choice(["test", "openai_server"]), + type=click.Choice(["test", "openai_server", "deepsparse"]), default="openai_server", help="Backend type for benchmarking", ) From a6d9a05ebe813967f3697220b6970dfa41dfe598 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 22 Aug 2024 13:46:00 +0300 Subject: [PATCH 02/22] =?UTF-8?q?=E2=9C=85=20Tests=20are=20fixed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/conftest.py | 7 ++----- tests/unit/core/test_distribution.py | 6 +++--- tests/unit/request/test_emulated.py | 2 ++ tests/unit/test_config.py | 12 +++++++++++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index ae2cc34..9c35e99 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -22,12 +22,9 @@ def _fake_tokenize(text: str) -> List[int]: @pytest.fixture() def mock_requests_pride_and_prejudice(): - text_path = Path(__file__).parent / "dummy" / "data" / "pride_and_prejudice.txt" + text_path = Path(__file__).parent.parent / "dummy/data/pride_and_prejudice.txt" text_content = text_path.read_text() with requests_mock.Mocker() as mock: - mock.get( - "https://www.gutenberg.org/files/1342/1342-0.txt", - text=text_content, - ) + mock.get("https://www.gutenberg.org/files/1342/1342-0.txt", text=text_content) yield mock diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py index 95b7e92..128a1f1 100644 --- a/tests/unit/core/test_distribution.py +++ b/tests/unit/core/test_distribution.py @@ -73,9 +73,9 @@ def test_distribution_str(): "'percentile_values': [1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96]" in str(dist) ) - assert "'min': 1" in str(dist) - assert "'max': 5" in str(dist) - assert "'range': 4" in str(dist) + assert "'min': np.float64(1.0)" in str(dist) + assert "'max': np.float64(5.0)" in str(dist) + assert "'range': np.float64(4.0)" in str(dist) @pytest.mark.regression() diff --git a/tests/unit/request/test_emulated.py b/tests/unit/request/test_emulated.py index 699b1d6..cd89d54 100644 --- a/tests/unit/request/test_emulated.py +++ b/tests/unit/request/test_emulated.py @@ -347,6 +347,8 @@ def test_emulated_request_generator_lifecycle( generator = EmulatedRequestGenerator( str(file_path) if config_type == "file_str" else file_path ) + else: + raise for _ in range(5): request = generator.create_item() diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 13e1699..c79ba88 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,4 +1,5 @@ import pytest +from pydantic_settings import BaseSettings, SettingsConfigDict from guidellm.config import ( Environment, @@ -9,9 +10,18 @@ ) +class DefaultSettings(Settings, BaseSettings): + """ + This class overrides the original `Settings` class with another `model_config` + to ignore local environment variables for each runtime. + """ + + model_config = SettingsConfigDict() + + @pytest.mark.smoke() def test_default_settings(): - settings = Settings() + settings = DefaultSettings() assert settings.env == Environment.PROD assert settings.logging == LoggingSettings() assert settings.openai == OpenAISettings() From d116c0c8cd892a8a14309ef49b982e9ecda77fab Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 22 Aug 2024 18:43:05 +0300 Subject: [PATCH 03/22] =?UTF-8?q?=F0=9F=93=8C=20deepsparse=20is=20added=20?= =?UTF-8?q?to=20dependencies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 4d54edc..ecd1e56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "pyyaml>=6.0.0", "requests", "transformers", + "deepsparse[llm]>=1.8.0", ] [project.optional-dependencies] From c000dbfa99f9ddabcb1dde52e02956e6f860f2c1 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 22 Aug 2024 18:43:33 +0300 Subject: [PATCH 04/22] =?UTF-8?q?=E2=9C=A8=20deepsparse=20backend=20integr?= =?UTF-8?q?ation=20is=20added?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/__init__.py | 2 + src/guidellm/backend/base.py | 2 +- src/guidellm/backend/deepsparse.py | 126 +++++++++-------------------- src/guidellm/config.py | 8 +- 4 files changed, 42 insertions(+), 96 deletions(-) diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index ba6fe69..869d9f8 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,4 +1,5 @@ from .base import Backend, BackendEngine, GenerativeResponse +from .deepsparse import DeepsparseBackend from .openai import OpenAIBackend __all__ = [ @@ -6,4 +7,5 @@ "BackendEngine", "GenerativeResponse", "OpenAIBackend", + "DeepsparseBackend", ] diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index a5fc35e..13776fb 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -10,7 +10,7 @@ __all__ = ["Backend", "BackendEngine", "GenerativeResponse"] -BackendEngine = Literal["test", "openai_server"] +BackendEngine = Literal["test", "openai_server", "deepsparse"] class GenerativeResponse(BaseModel): diff --git a/src/guidellm/backend/deepsparse.py b/src/guidellm/backend/deepsparse.py index 7d66664..9514051 100644 --- a/src/guidellm/backend/deepsparse.py +++ b/src/guidellm/backend/deepsparse.py @@ -1,66 +1,33 @@ -from typing import Any, Dict, Generator, List, Optional +from typing import Any, AsyncGenerator, List, Optional -import openai +from deepsparse import Pipeline from loguru import logger -from openai import OpenAI, Stream -from openai.types import Completion from transformers import AutoTokenizer -from guidellm.backend import Backend, BackendEngine, GenerativeResponse -from guidellm.config import settings +from guidellm.backend import Backend, GenerativeResponse from guidellm.core import TextGenerationRequest __all__ = ["DeepsparseBackend"] -@Backend.register(BackendEngine.DEEPSPARSE) +@Backend.register("deepsparse") class DeepsparseBackend(Backend): """ An Deepsparse backend implementation for the generative AI result. - - :param XXX: Description - :type XXX: str """ - def __init__( - self, - target: Optional[str] = None, - host: Optional[str] = None, - port: Optional[int] = None, - model: Optional[str] = None, - **request_args, - ): - """ - Initialize an OpenAI Client - """ - + def __init__(self, model: Optional[str] = None, **request_args): self.request_args = request_args + self.pipeline: Pipeline = Pipeline.create( + task="sentiment-analysis", + model_path=model or self.default_model, + ) - - breakpoint() # TODO: remove - if target is not None: - base_url = target - elif host and port: - base_url = f"{host}:{port}" - elif settings.openai.base_url is not None: - base_url = settings.openai.base_url - else: - raise ValueError( - "`GUIDELLM__DEEPSPARSE__BASE_URL` environment variable " - "or --target CLI parameter must be specified for the OpenAI backend." - ) - - self.openai_client = OpenAI(api_key=_api_key, base_url=base_url) - self.model = model or self.default_model - - logger.info("OpenAI {} Backend listening on {}", self.model, target) - - def make_request( - self, - request: TextGenerationRequest, - ) -> Generator[GenerativeResponse, None, None]: + async def make_request( + self, request: TextGenerationRequest + ) -> AsyncGenerator[GenerativeResponse, None]: """ - Make a request to the OpenAI backend. + Make a request to the Deepsparse Python API client. :param request: The result request to submit. :type request: TextGenerationRequest @@ -68,44 +35,30 @@ def make_request( :rtype: Iterator[GenerativeResponse] """ - logger.debug(f"Making request to OpenAI backend with prompt: {request.prompt}") - - # How many completions to generate for each prompt - request_args: Dict = {"n": 1} - - num_gen_tokens: int = ( - request.params.get("generated_tokens", None) - or settings.openai.max_gen_tokens - ) - request_args.update({"max_tokens": num_gen_tokens, "stop": None}) - - if self.request_args: - request_args.update(self.request_args) - - response: Stream[Completion] = self.openai_client.completions.create( - model=self.model, - prompt=request.prompt, - stream=True, - **request_args, + logger.debug( + f"Making request to Deepsparse backend with prompt: {request.prompt}" ) - for chunk in response: - chunk_content: str = getattr(chunk, "content", "") - - if getattr(chunk, "stop", True) is True: - logger.debug("Received final response from OpenAI backend") - + token_count = 0 + for response in self.pipeline.generations: + if not (token := response.text): yield GenerativeResponse( type_="final", - prompt=getattr(chunk, "prompt", request.prompt), - prompt_token_count=( - request.prompt_token_count or self._token_count(request.prompt) - ), - output_token_count=(self._token_count(chunk_content)), + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, ) + break else: - logger.debug("Received token from OpenAI backend") - yield GenerativeResponse(type_="token_iter", add_token=chunk_content) + token_count += 1 + yield GenerativeResponse( + type_="token_iter", + add_token=token, + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + def available_models(self) -> List[str]: """ @@ -115,16 +68,11 @@ def available_models(self) -> List[str]: :rtype: List[str] """ - try: - models: List[str] = [ - model.id for model in self.openai_client.models.list().data - ] - except openai.NotFoundError as error: - logger.error("No available models for OpenAI Backend") - raise error - else: - logger.info(f"Available models: {models}") - return models + # WARNING: The default model from the documentation is defined here + + return [ + "zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none" + ] def model_tokenizer(self, model: str) -> Optional[Any]: """ @@ -135,6 +83,7 @@ def model_tokenizer(self, model: str) -> Optional[Any]: :return: The tokenizer for the model, or None if it cannot be created. :rtype: Optional[Any] """ + try: tokenizer = AutoTokenizer.from_pretrained(model) logger.info(f"Tokenizer created for model: {model}") @@ -147,4 +96,3 @@ def _token_count(self, text: str) -> int: token_count = len(text.split()) logger.debug(f"Token count for text '{text}': {token_count}") return token_count - diff --git a/src/guidellm/config.py b/src/guidellm/config.py index fe2f4e1..10e4817 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -109,14 +109,10 @@ class OpenAISettings(BaseModel): class DeepsparseSettings(BaseModel): """ - Deepsparse settings for the application to connect to the API - for Deepsparse server based pathways + Deepsparse settings for the Python API library """ - # NOTE: The default value is default address of deepsparse.server - base_url: str = "http://localhost:5543" - - max_gen_tokens: int = 4096 + model: str = "zoo:mpt-7b-dolly_mpt_pretrain-pruned50_quantized" class ReportGenerationSettings(BaseModel): From 52e1d3b2b886970e57cb8537a12806fe1c6eb2f2 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Mon, 26 Aug 2024 14:28:46 +0300 Subject: [PATCH 05/22] deepsparse package limitations are applied --- DEVELOPING.md | 39 +++++++++++----- pyproject.toml | 4 +- src/guidellm/backend/deepsparse/__init__.py | 28 +++++++++++ .../{deepsparse.py => deepsparse/backend.py} | 5 +- src/guidellm/utils/__init__.py | 3 ++ src/guidellm/utils/dependencies.py | 46 +++++++++++++++++++ 6 files changed, 108 insertions(+), 17 deletions(-) create mode 100644 src/guidellm/backend/deepsparse/__init__.py rename src/guidellm/backend/{deepsparse.py => deepsparse/backend.py} (97%) create mode 100644 src/guidellm/utils/dependencies.py diff --git a/DEVELOPING.md b/DEVELOPING.md index 01b6cc8..2bae63d 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -15,19 +15,33 @@ Before you begin, ensure you have the following installed: - `pip` (Python package installer) - `git` (version control system) -### Installation +### Clone the repository: -1. Clone the repository: +```sh +git clone https://github.com/neuralmagic/guidellm.git +cd guidellm +``` - ```bash - git clone https://github.com/neuralmagic/guidellm.git - cd guidellm - ``` +### Install dependencies: -2. Install the required dependencies: - ```bash - pip install -e .[dev] - ``` +All the dependencies are specified in `pyproject.toml` file. There is an option to install only required dependencies and optional dependencies + +Install required dependencies along with optional `dev` dependencies. + +```sh +pip install -e .[dev] +# or pip install -e '.[dev]' +``` + +#### Working with **Not-Default** backends + +If you work with `deepsparse` backend, etc it has some other software limitations. +In order to install dependencies for the specific backend, run: + +```sh +pip install -e .[deepsparse] +# or pip install -e '.[deepsparse]' +``` ## Project Structure @@ -46,8 +60,9 @@ guidellm/ └── README.md ``` -- **src/guidellm/**: Main source code for the project. -- **tests/**: Test cases categorized into unit, integration, and end-to-end tests. +- `pyproject.toml`: Project metadata +- `**src/guidellm/**`: Main source code for the project. +- `**tests/**`: Test cases categorized into unit, integration, and end-to-end tests. ## Development Environment Setup diff --git a/pyproject.toml b/pyproject.toml index ecd1e56..e2e9ad6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ dependencies = [ "pyyaml>=6.0.0", "requests", "transformers", - "deepsparse[llm]>=1.8.0", ] [project.optional-dependencies] @@ -65,6 +64,9 @@ dev = [ "types-requests~=2.32.0", "types-toml", ] +deepsparse = [ + "deepsparse", +] [project.entry-points.console_scripts] diff --git a/src/guidellm/backend/deepsparse/__init__.py b/src/guidellm/backend/deepsparse/__init__.py new file mode 100644 index 0000000..99f89b3 --- /dev/null +++ b/src/guidellm/backend/deepsparse/__init__.py @@ -0,0 +1,28 @@ +""" +This package encapsulates the "Deepsparse Backend" implementation. +ref: https://github.com/neuralmagic/deepsparse + +The `deepsparse` package supports Python3.6..Python3.11, +when the `guidellm` start from Python3.8. + +Safe range of versions is Python3.8..Python3.11 +for the Deepsparse Backend implementation. +""" + +from guidellm.utils import check_python_version, module_is_available + +# Ensure that python is in valid range +check_python_version(min_version="3.8", max_version="3.11") + +# Ensure that deepsparse is installed +module_is_available( + module="deepsparse", + helper=( + "`deepsparse` package is not available. " + "Please try `pip install -e '.[deepsparse]'`" + ), +) + +from .backend import DeepsparseBackend + +__all__ = ["DeepsparseBackend"] diff --git a/src/guidellm/backend/deepsparse.py b/src/guidellm/backend/deepsparse/backend.py similarity index 97% rename from src/guidellm/backend/deepsparse.py rename to src/guidellm/backend/deepsparse/backend.py index 9514051..60e4db7 100644 --- a/src/guidellm/backend/deepsparse.py +++ b/src/guidellm/backend/deepsparse/backend.py @@ -7,10 +7,8 @@ from guidellm.backend import Backend, GenerativeResponse from guidellm.core import TextGenerationRequest -__all__ = ["DeepsparseBackend"] - -@Backend.register("deepsparse") +@Backend.register(backend_type="deepsparse") class DeepsparseBackend(Backend): """ An Deepsparse backend implementation for the generative AI result. @@ -59,7 +57,6 @@ async def make_request( output_token_count=token_count, ) - def available_models(self) -> List[str]: """ Get the available models for the backend. diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 60668cf..4776848 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,3 +1,4 @@ +from .dependencies import check_python_version, module_is_available from .injector import create_report, inject_data from .text import ( clean_text, @@ -35,4 +36,6 @@ "resolve_transformers_dataset_split", "split_lines_by_punctuation", "split_text", + "check_python_version", + "module_is_available", ] diff --git a/src/guidellm/utils/dependencies.py b/src/guidellm/utils/dependencies.py new file mode 100644 index 0000000..2d28f57 --- /dev/null +++ b/src/guidellm/utils/dependencies.py @@ -0,0 +1,46 @@ +import importlib +import sys +from typing import Tuple + + +def _extract_python_version(data: str) -> Tuple[int, ...]: + """Extract '3.12' -> (3, 12).""" + + if len(items := data.split(".")) > 2: + raise ValueError("Python version format: MAJOR.MINOR") + + if not all((item.isnumeric() for item in items)): + raise ValueError("Python version must include only numbers") + + return tuple(int(item) for item in items) + + +def check_python_version(min_version: str, max_version: str) -> None: + """Raises the error if the current version is not in the range.""" + + min_version_info: Tuple[int, ...] = _extract_python_version(min_version) + max_version_info: Tuple[int, ...] = _extract_python_version(max_version) + current_version_info: Tuple[int, int] = ( + sys.version_info.major, + sys.version_info.minor, + ) + + if not (min_version_info <= current_version_info <= max_version_info): + raise RuntimeError( + "This feature requires Python version " + f"to be in range: {min_version}..{max_version}." + "You are using Python {}.{}.{}".format( + sys.version_info.major, + sys.version_info.minor, + sys.version_info.micro, + ) + ) + + +def module_is_available(module: str, helper: str): + """Ensure that the module is available for other project components.""" + + try: + importlib.import_module(module) + except ImportError: + raise RuntimeError(f"Module '{module}' is not available. {helper}") From 72187957cfc4040ee6e7c8a67ea18cfe457d271a Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Tue, 27 Aug 2024 10:54:35 +0300 Subject: [PATCH 06/22] =?UTF-8?q?=E2=9A=B0=EF=B8=8F=20removed=20`pytest.ma?= =?UTF-8?q?rk.asyncio()`=20due=20to=20pytest-asyncio=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/backend/test_base.py | 5 ----- tests/unit/backend/test_openai_backend.py | 2 -- tests/unit/executor/test_base.py | 4 ---- tests/unit/scheduler/test_base.py | 2 -- 4 files changed, 13 deletions(-) diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py index c518f82..4c360c6 100644 --- a/tests/unit/backend/test_base.py +++ b/tests/unit/backend/test_base.py @@ -39,7 +39,6 @@ def test_generative_response_creation(): @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_backend_make_request(): class MockBackend(Backend): async def make_request(self, request): @@ -79,7 +78,6 @@ def available_models(self): @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_backend_submit_final(): class MockBackend(Backend): async def make_request(self, request): @@ -95,7 +93,6 @@ def available_models(self): @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_backend_submit_multi(): class MockBackend(Backend): async def make_request(self, request): @@ -114,7 +111,6 @@ def available_models(self): @pytest.mark.regression() -@pytest.mark.asyncio() async def test_backend_submit_no_response(): class MockBackend(Backend): async def make_request(self, request): @@ -131,7 +127,6 @@ def available_models(self): @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_backend_submit_multi_final(): class MockBackend(Backend): async def make_request(self, request): diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index 80cab45..eb6e9e7 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -161,7 +161,6 @@ def test_openai_backend_models(mock_openai_client): ), ], ) -@pytest.mark.asyncio() async def test_openai_backend_make_request(req, request_args, mock_openai_client): backend = OpenAIBackend(**(request_args or {})) counter = 0 @@ -204,7 +203,6 @@ async def test_openai_backend_make_request(req, request_args, mock_openai_client @pytest.mark.sanity() -@pytest.mark.asyncio() async def test_openai_backend_submit(mock_openai_client): backend = OpenAIBackend() request = TextGenerationRequest(prompt="Test", prompt_token_count=1) diff --git a/tests/unit/executor/test_base.py b/tests/unit/executor/test_base.py index 2cfa670..62bd4b3 100644 --- a/tests/unit/executor/test_base.py +++ b/tests/unit/executor/test_base.py @@ -194,7 +194,6 @@ async def _run_executor_tests( @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_executor_run_sweep(mock_scheduler): num_requests = 15 @@ -214,7 +213,6 @@ async def test_executor_run_sweep(mock_scheduler): @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_executor_run_synchronous(mock_scheduler): num_requests = 15 @@ -232,7 +230,6 @@ async def test_executor_run_synchronous(mock_scheduler): @pytest.mark.smoke() -@pytest.mark.asyncio() async def test_executor_run_throughput(mock_scheduler): num_requests = 15 @@ -250,7 +247,6 @@ async def test_executor_run_throughput(mock_scheduler): @pytest.mark.smoke() -@pytest.mark.asyncio() @pytest.mark.parametrize( ("mode", "rate"), [ diff --git a/tests/unit/scheduler/test_base.py b/tests/unit/scheduler/test_base.py index 093b57a..62e9dce 100644 --- a/tests/unit/scheduler/test_base.py +++ b/tests/unit/scheduler/test_base.py @@ -110,7 +110,6 @@ def test_scheduler_invalid_instantiation( @pytest.mark.sanity() -@pytest.mark.asyncio() @pytest.mark.parametrize( "mode", [ @@ -177,7 +176,6 @@ def _submit(req): @pytest.mark.sanity() -@pytest.mark.asyncio() @pytest.mark.parametrize( "mode", [ From a5357ca658871fe2d9143229137e196937aef656 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Tue, 27 Aug 2024 10:55:29 +0300 Subject: [PATCH 07/22] =?UTF-8?q?=F0=9F=93=9D=20fixed=20class=20example?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DEVELOPING.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DEVELOPING.md b/DEVELOPING.md index 2bae63d..6163dc9 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -254,12 +254,14 @@ The project configuration entrypoint is represented by lazy-loaded `settigns` si The project is fully configurable with environment variables. All the default values and ```py -class NestedIntoLogging(BaseModel): +class Nested(BaseModel): nested: str = "default value" class LoggingSettings(BaseModel): # ... + disabled: bool = False + nested: Nested = Nested() class Settings(BaseSettings): From 68381a5db56d3dc93ea4352e32e2a1de4b34a9df Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 28 Aug 2024 15:08:45 +0300 Subject: [PATCH 08/22] =?UTF-8?q?=F0=9F=A7=B5=20rollback=20`pytest.mark.as?= =?UTF-8?q?yncio`=20fixtures?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/backend/test_base.py | 5 +++++ tests/unit/backend/test_openai_backend.py | 2 ++ tests/unit/executor/test_base.py | 8 +++++--- tests/unit/scheduler/test_base.py | 2 ++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py index 4c360c6..c518f82 100644 --- a/tests/unit/backend/test_base.py +++ b/tests/unit/backend/test_base.py @@ -39,6 +39,7 @@ def test_generative_response_creation(): @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_backend_make_request(): class MockBackend(Backend): async def make_request(self, request): @@ -78,6 +79,7 @@ def available_models(self): @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_backend_submit_final(): class MockBackend(Backend): async def make_request(self, request): @@ -93,6 +95,7 @@ def available_models(self): @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_backend_submit_multi(): class MockBackend(Backend): async def make_request(self, request): @@ -111,6 +114,7 @@ def available_models(self): @pytest.mark.regression() +@pytest.mark.asyncio() async def test_backend_submit_no_response(): class MockBackend(Backend): async def make_request(self, request): @@ -127,6 +131,7 @@ def available_models(self): @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_backend_submit_multi_final(): class MockBackend(Backend): async def make_request(self, request): diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index eb6e9e7..80cab45 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -161,6 +161,7 @@ def test_openai_backend_models(mock_openai_client): ), ], ) +@pytest.mark.asyncio() async def test_openai_backend_make_request(req, request_args, mock_openai_client): backend = OpenAIBackend(**(request_args or {})) counter = 0 @@ -203,6 +204,7 @@ async def test_openai_backend_make_request(req, request_args, mock_openai_client @pytest.mark.sanity() +@pytest.mark.asyncio() async def test_openai_backend_submit(mock_openai_client): backend = OpenAIBackend() request = TextGenerationRequest(prompt="Test", prompt_token_count=1) diff --git a/tests/unit/executor/test_base.py b/tests/unit/executor/test_base.py index 62bd4b3..1da6a7c 100644 --- a/tests/unit/executor/test_base.py +++ b/tests/unit/executor/test_base.py @@ -5,9 +5,7 @@ from guidellm.backend import Backend from guidellm.config import settings -from guidellm.core import ( - TextGenerationBenchmarkReport, -) +from guidellm.core import TextGenerationBenchmarkReport from guidellm.executor.base import Executor, ExecutorResult from guidellm.executor.profile_generator import ProfileGenerator from guidellm.request import RequestGenerator @@ -194,6 +192,7 @@ async def _run_executor_tests( @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_executor_run_sweep(mock_scheduler): num_requests = 15 @@ -213,6 +212,7 @@ async def test_executor_run_sweep(mock_scheduler): @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_executor_run_synchronous(mock_scheduler): num_requests = 15 @@ -230,6 +230,7 @@ async def test_executor_run_synchronous(mock_scheduler): @pytest.mark.smoke() +@pytest.mark.asyncio() async def test_executor_run_throughput(mock_scheduler): num_requests = 15 @@ -256,6 +257,7 @@ async def test_executor_run_throughput(mock_scheduler): ("poisson", [10, 20, 30]), ], ) +@pytest.mark.asyncio() async def test_executor_run_constant_poisson(mock_scheduler, mode, rate): num_requests = 15 diff --git a/tests/unit/scheduler/test_base.py b/tests/unit/scheduler/test_base.py index 62e9dce..2b393fd 100644 --- a/tests/unit/scheduler/test_base.py +++ b/tests/unit/scheduler/test_base.py @@ -119,6 +119,7 @@ def test_scheduler_invalid_instantiation( "constant", ], ) +@pytest.mark.asyncio() async def test_scheduler_run_number(mode): rate = 10.0 max_number = 20 @@ -186,6 +187,7 @@ def _submit(req): ], ) @pytest.mark.flaky(reruns=5) +@pytest.mark.asyncio() async def test_scheduler_run_duration(mode): rate = 10 max_duration = 2 From 5acb3a8246a830f304f6f813bad213af4c37ce17 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 28 Aug 2024 15:09:08 +0300 Subject: [PATCH 09/22] =?UTF-8?q?=E2=9C=A8=20Deepsparse=20Backend=20integr?= =?UTF-8?q?ation=20first=20implementation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * settings.py::DeepsparseSettings includes all the settings * tests/unit/backend/deepsparse.py includes unit tests * `TestTextGenerationPipeline` mocks the `deepsparse.pipeline.Pipeline` --- src/guidellm/backend/__init__.py | 2 +- src/guidellm/backend/deepsparse/__init__.py | 4 - src/guidellm/backend/deepsparse/backend.py | 59 +++++-- src/guidellm/config.py | 3 +- src/guidellm/utils/__init__.py | 2 + src/guidellm/utils/text.py | 32 +++- tests/unit/backend/test_deepsparse_backend.py | 155 ++++++++++++++++++ 7 files changed, 237 insertions(+), 20 deletions(-) create mode 100644 tests/unit/backend/test_deepsparse_backend.py diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index 869d9f8..e8b0a5f 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,5 +1,5 @@ from .base import Backend, BackendEngine, GenerativeResponse -from .deepsparse import DeepsparseBackend +from .deepsparse.backend import DeepsparseBackend from .openai import OpenAIBackend __all__ = [ diff --git a/src/guidellm/backend/deepsparse/__init__.py b/src/guidellm/backend/deepsparse/__init__.py index 99f89b3..a36ffd6 100644 --- a/src/guidellm/backend/deepsparse/__init__.py +++ b/src/guidellm/backend/deepsparse/__init__.py @@ -22,7 +22,3 @@ "Please try `pip install -e '.[deepsparse]'`" ), ) - -from .backend import DeepsparseBackend - -__all__ = ["DeepsparseBackend"] diff --git a/src/guidellm/backend/deepsparse/backend.py b/src/guidellm/backend/deepsparse/backend.py index 60e4db7..209da81 100644 --- a/src/guidellm/backend/deepsparse/backend.py +++ b/src/guidellm/backend/deepsparse/backend.py @@ -1,10 +1,12 @@ -from typing import Any, AsyncGenerator, List, Optional +import os +from typing import Any, AsyncGenerator, Dict, List, Optional -from deepsparse import Pipeline +from deepsparse import Pipeline, TextGeneration from loguru import logger from transformers import AutoTokenizer from guidellm.backend import Backend, GenerativeResponse +from guidellm.config import settings from guidellm.core import TextGenerationRequest @@ -15,11 +17,30 @@ class DeepsparseBackend(Backend): """ def __init__(self, model: Optional[str] = None, **request_args): - self.request_args = request_args - self.pipeline: Pipeline = Pipeline.create( - task="sentiment-analysis", - model_path=model or self.default_model, - ) + self._request_args: Dict[str, Any] = request_args + self.model: str = self._get_model(model) + self.pipeline: Pipeline = TextGeneration(model=self.model) + + def _get_model(self, model_from_cli: Optional[str] = None) -> str: + """Provides the model by the next priority list: + 1. from function argument (comes from CLI) + 1. from environment variable + 2. `self.default_model` from `self.available_models` + """ + + if model_from_cli is not None: + return model_from_cli + elif settings.deepsprase.model is not None: + logger.info( + "Using Deepsparse model from environment variable: {}".format( + settings.deepsprase.model + ) + ) + return settings.deepsprase.model + + else: + logger.info(f"Using default Deepsparse model: {self.default_model}") + return self.default_model async def make_request( self, request: TextGenerationRequest @@ -38,8 +59,23 @@ async def make_request( ) token_count = 0 - for response in self.pipeline.generations: - if not (token := response.text): + request_args = { + **self._request_args, + "streaming": True, + "max_new_tokens": request.output_token_count, + } + + if not (output := self.pipeline(prompt=request.prompt, **request_args)): + yield GenerativeResponse( + type_="final", + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + return + + for generation in output.generations: + if not (token := generation.text): yield GenerativeResponse( type_="final", prompt=request.prompt, @@ -66,10 +102,7 @@ def available_models(self) -> List[str]: """ # WARNING: The default model from the documentation is defined here - - return [ - "zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none" - ] + return ["hf:mgoin/TinyStories-33M-quant-deepsparse"] def model_tokenizer(self, model: str) -> Optional[Any]: """ diff --git a/src/guidellm/config.py b/src/guidellm/config.py index 10e4817..889c5d3 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -112,7 +112,7 @@ class DeepsparseSettings(BaseModel): Deepsparse settings for the Python API library """ - model: str = "zoo:mpt-7b-dolly_mpt_pretrain-pruned50_quantized" + model: Optional[str] = None class ReportGenerationSettings(BaseModel): @@ -135,6 +135,7 @@ class Settings(BaseSettings): ```sh export GUIDELLM__LOGGING__DISABLED=true export GUIDELLM__OPENAI__API_KEY=****** + export GUIDELLM__DEEPSPARSE__MODEL=****** ``` """ diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 4776848..00b3673 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -9,6 +9,7 @@ load_text, load_text_lines, parse_text_objects, + random_strings, split_lines_by_punctuation, split_text, ) @@ -30,6 +31,7 @@ "load_text", "load_text_lines", "load_transformers_dataset", + "random_strings", "parse_text_objects", "resolve_transformers_dataset", "resolve_transformers_dataset_column", diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index 13a0dff..de68a6b 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -1,8 +1,10 @@ import csv import json +import random import re +import string from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, Generator, List, Optional, Tuple, Union from urllib.parse import urlparse import ftfy @@ -23,6 +25,7 @@ "parse_text_objects", "split_lines_by_punctuation", "split_text", + "random_strings", ] @@ -453,3 +456,30 @@ def load_text_lines( # extract the lines from the data return [row[filter_] for row in data] if filter_ else [str(row) for row in data] + + +def random_strings( + min: int, max: int, n: int = 0, dataset: Optional[str] = None +) -> Generator[str, None, None]: + """Yield random strings. + + :param min: the min number of output characters + :param max: the max number of output characters + :param n: the number of outputs. If `0` -> works for infinite + :param dataset: represents allowed characters for the operation + """ + + characters: str = dataset or string.printable + + if n < 0: + raise ValueError("'n' must be >= '0'") + elif n == 0: + while True: + yield "".join( + (random.choice(characters) for _ in range(random.randint(min, max))) + ) + else: + for _ in range(n): + yield "".join( + (random.choice(characters) for _ in range(random.randint(min, max))) + ) diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse_backend.py new file mode 100644 index 0000000..579c35a --- /dev/null +++ b/tests/unit/backend/test_deepsparse_backend.py @@ -0,0 +1,155 @@ +from typing import Any, Dict, Generator, Optional, Type + +import pytest +from pydantic import BaseModel + +from guidellm.backend import Backend, DeepsparseBackend +from guidellm.config import reload_settings +from guidellm.core import TextGenerationRequest +from guidellm.utils import random_strings + + +class TestDeepsparseTextGeneration(BaseModel): + """The representation of a deepsparse data structure.""" + + text: str + + +class TestTextGenerationPipeline: + """Deepsparse TextGeneration test interface. + + By default this class generates '10' text responses. + + This class includes an additional development information + for better testing experience. + + Method `__call__` allows to mock the result object that comes from + `deepsparse.pipeline.Pipeline()` so everything is encapsulated right here. + + :param self._generation: dynamic representation of generated responses + from deepsparse interface. + """ + + def __init__(self): + self._generations: list[TestDeepsparseTextGeneration] = [] + self._prompt: Optional[str] = None + self._max_new_tokens: Optional[int] = None + + def __call__( + self, *_, prompt: str, max_new_tokens: Optional[int] = None, **kwargs + ) -> Any: + """Mocks the result from `deepsparse.pipeline.Pipeline()()`. + Set reserved request arguments on call + """ + + self._prompt = prompt + self._max_new_tokens = max_new_tokens + + return self + + @property + def generations(self) -> Generator[TestDeepsparseTextGeneration, None, None]: + for text in random_strings( + min=10, max=50, n=self._max_new_tokens if self._max_new_tokens else 10 + ): + generation = TestDeepsparseTextGeneration(text=text) + self._generations.append(generation) + yield generation + + +@pytest.fixture(autouse=True) +def mock_deepsparse_pipeline(mocker): + return mocker.patch( + "deepsparse.Pipeline.create", + return_value=TestTextGenerationPipeline(), + ) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + "create_payload", + [ + {}, + {"model": "test/custom_llm"}, + ], +) +def test_backend_creation(create_payload: Dict): + """Test the "Deepspaarse Backend" class + with defaults and custom input parameters. + """ + + backends: list[DeepsparseBackend] = [ + Backend.create("deepsparse", **create_payload), + DeepsparseBackend(**create_payload), + ] + + for backend in backends: + assert getattr(backend, "pipeline") + ( + getattr(backend, "model") == custom_model + if (custom_model := create_payload.get("model")) + else getattr(backend, "default_model") + ) + + +@pytest.mark.smoke() +def test_backend_model_from_env(mocker): + mocker.patch.dict( + "os.environ", + {"GUIDELLM__DEEPSPRASE__MODEL": "test_backend_model_from_env"}, + ) + + reload_settings() + + backends: list[DeepsparseBackend] = [ + Backend.create("deepsparse"), + DeepsparseBackend(), + ] + + for backend in backends: + assert getattr(backend, "model") == "test_backend_model_from_env" + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + "text_generation_request_create_payload", + [ + {"prompt": "Test prompt"}, + {"prompt": "Test prompt", "output_token_count": 20}, + ], +) +@pytest.mark.asyncio() +async def test_make_request(text_generation_request_create_payload: Dict): + backend = DeepsparseBackend() + + output_tokens: list[str] = [] + async for response in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ): + if response.add_token: + output_tokens.append(response.add_token) + assert "".join(output_tokens) == "".join( + (generation.text for generation in backend.pipeline._generations) + ) + + if max_tokens := text_generation_request_create_payload.get("output_token_count"): + assert len(backend.pipeline._generations) == max_tokens + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + "text_generation_request_create_payload,error", + [ + ({"prompt": "Test prompt", "output_token_count": -1}, ValueError), + ], +) +@pytest.mark.asyncio() +async def test_make_request_invalid_request_payload( + text_generation_request_create_payload: Dict, error: Type[Exception] +): + backend = DeepsparseBackend() + with pytest.raises(error): + async for _ in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ): + pass From 45e07d0ef8a4347dcc6969d11ea2d915e9b1105a Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 28 Aug 2024 16:36:43 +0300 Subject: [PATCH 10/22] code quality is provided --- pyproject.toml | 17 ++++-- src/guidellm/backend/deepsparse/backend.py | 26 +------- src/guidellm/executor/profile_generator.py | 14 +++-- src/guidellm/scheduler/base.py | 12 ++-- src/guidellm/utils/dependencies.py | 4 +- src/guidellm/utils/text.py | 8 ++- tests/e2e/cli/test_application_entrypoint.py | 2 +- tests/unit/backend/test_deepsparse_backend.py | 61 +++++++++++-------- tests/unit/core/test_distribution.py | 6 +- tests/unit/executor/test_profile_generator.py | 5 +- tests/unit/request/test_emulated.py | 2 +- tests/unit/scheduler/test_base.py | 6 +- tests/unit/utils/test_text.py | 24 ++++++++ 13 files changed, 103 insertions(+), 84 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e2e9ad6..946e6c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,10 @@ exclude = ["venv", ".tox"] # Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery follow_imports = 'silent' +[[tool.mypy.overrides]] +module = ["deepsparse.*", "transformers.*"] +ignore_missing_imports=true + [tool.ruff] line-length = 88 @@ -112,11 +116,14 @@ indent-style = "space" [tool.ruff.lint] ignore = [ "PLR0913", + "PLR2004", # allow numbers without constants definitions + "RET505", # allow `else` block after `if (condition): return value` line "TCH001", "COM812", "ISC001", "TCH002", "PLW1514", # allow Path.open without encoding + "S311", # allow standard pseudo-random generators ] select = [ @@ -172,19 +179,19 @@ select = [ "FIX", # flake8-fixme: detects FIXMEs and other temporary comments that should be resolved ] -[tool.ruff.lint.extend-per-file-ignores] -"tests/**/*.py" = [ + +[tool.ruff.lint.per-file-ignores] +"tests/*" = [ "S101", # asserts allowed in tests + "S105", # allow hardcoded passwords in tests + "S106", # allow hardcoded passwords in tests "ARG", # Unused function args allowed in tests "PLR2004", # Magic value used in comparison "TCH002", # No import only type checking in tests "SLF001", # enable private member access in tests - "S105", # allow hardcoded passwords in tests - "S311", # allow standard pseudo-random generators in tests "PT011", # allow generic exceptions in tests "N806", # allow uppercase variable names in tests "PGH003", # allow general ignores in tests - "S106", # allow hardcoded passwords in tests ] [tool.ruff.lint.isort] diff --git a/src/guidellm/backend/deepsparse/backend.py b/src/guidellm/backend/deepsparse/backend.py index 209da81..fa47b23 100644 --- a/src/guidellm/backend/deepsparse/backend.py +++ b/src/guidellm/backend/deepsparse/backend.py @@ -1,9 +1,7 @@ -import os from typing import Any, AsyncGenerator, Dict, List, Optional from deepsparse import Pipeline, TextGeneration from loguru import logger -from transformers import AutoTokenizer from guidellm.backend import Backend, GenerativeResponse from guidellm.config import settings @@ -32,12 +30,10 @@ def _get_model(self, model_from_cli: Optional[str] = None) -> str: return model_from_cli elif settings.deepsprase.model is not None: logger.info( - "Using Deepsparse model from environment variable: {}".format( - settings.deepsprase.model - ) + "Using Deepsparse model from environment variable: " + f"{settings.deepsprase.model}" ) return settings.deepsprase.model - else: logger.info(f"Using default Deepsparse model: {self.default_model}") return self.default_model @@ -104,24 +100,6 @@ def available_models(self) -> List[str]: # WARNING: The default model from the documentation is defined here return ["hf:mgoin/TinyStories-33M-quant-deepsparse"] - def model_tokenizer(self, model: str) -> Optional[Any]: - """ - Get the tokenizer for a model. - - :param model: The model to get the tokenizer for. - :type model: str - :return: The tokenizer for the model, or None if it cannot be created. - :rtype: Optional[Any] - """ - - try: - tokenizer = AutoTokenizer.from_pretrained(model) - logger.info(f"Tokenizer created for model: {model}") - return tokenizer - except Exception as err: # noqa: BLE001 - logger.warning(f"Could not create tokenizer for model {model}: {err}") - return None - def _token_count(self, text: str) -> int: token_count = len(text.split()) logger.debug(f"Token count for text '{text}': {token_count}") diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py index 8a36db0..d8f0075 100644 --- a/src/guidellm/executor/profile_generator.py +++ b/src/guidellm/executor/profile_generator.py @@ -162,12 +162,14 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil elif self.mode == "sweep": profile = self.create_sweep_profile( self.generated_count, - sync_benchmark=current_report.benchmarks[0] - if current_report.benchmarks - else None, - throughput_benchmark=current_report.benchmarks[1] - if len(current_report.benchmarks) > 1 - else None, + sync_benchmark=( + current_report.benchmarks[0] if current_report.benchmarks else None + ), + throughput_benchmark=( + current_report.benchmarks[1] + if len(current_report.benchmarks) > 1 + else None + ), ) else: err = ValueError(f"Invalid mode: {self.mode}") diff --git a/src/guidellm/scheduler/base.py b/src/guidellm/scheduler/base.py index 1474c12..325ac7b 100644 --- a/src/guidellm/scheduler/base.py +++ b/src/guidellm/scheduler/base.py @@ -229,16 +229,16 @@ def _get_count_total(): return ( self.max_number if self.max_number - else round(self.max_duration) - if self.max_duration - else 0 + else round(self.max_duration) if self.max_duration else 0 ) def _get_count_completed(): return min( - benchmark.request_count + benchmark.error_count - if self.max_number - else round(time.time() - start_time), + ( + benchmark.request_count + benchmark.error_count + if self.max_number + else round(time.time() - start_time) + ), _get_count_total(), ) diff --git a/src/guidellm/utils/dependencies.py b/src/guidellm/utils/dependencies.py index 2d28f57..ac67688 100644 --- a/src/guidellm/utils/dependencies.py +++ b/src/guidellm/utils/dependencies.py @@ -9,7 +9,7 @@ def _extract_python_version(data: str) -> Tuple[int, ...]: if len(items := data.split(".")) > 2: raise ValueError("Python version format: MAJOR.MINOR") - if not all((item.isnumeric() for item in items)): + if not all(item.isnumeric() for item in items): raise ValueError("Python version must include only numbers") return tuple(int(item) for item in items) @@ -43,4 +43,4 @@ def module_is_available(module: str, helper: str): try: importlib.import_module(module) except ImportError: - raise RuntimeError(f"Module '{module}' is not available. {helper}") + raise RuntimeError(f"Module '{module}' is not available. {helper}") from None diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index de68a6b..1bdba67 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -459,7 +459,7 @@ def load_text_lines( def random_strings( - min: int, max: int, n: int = 0, dataset: Optional[str] = None + min_chars: int, max_chars: int, n: int = 0, dataset: Optional[str] = None ) -> Generator[str, None, None]: """Yield random strings. @@ -476,10 +476,12 @@ def random_strings( elif n == 0: while True: yield "".join( - (random.choice(characters) for _ in range(random.randint(min, max))) + random.choice(characters) + for _ in range(random.randint(min_chars, max_chars)) ) else: for _ in range(n): yield "".join( - (random.choice(characters) for _ in range(random.randint(min, max))) + random.choice(characters) + for _ in range(random.randint(min_chars, max_chars)) ) diff --git a/tests/e2e/cli/test_application_entrypoint.py b/tests/e2e/cli/test_application_entrypoint.py index e555bb0..461190a 100644 --- a/tests/e2e/cli/test_application_entrypoint.py +++ b/tests/e2e/cli/test_application_entrypoint.py @@ -43,7 +43,7 @@ def test_main_cli_overrided( ( b"Usage: main [OPTIONS]\nTry 'main --help' for help.\n\n" b"Error: Invalid value for '--backend': " - b"'invalid' is not one of 'test', 'openai_server'.\n" + b"'invalid' is not one of 'test', 'openai_server', 'deepsparse'.\n" ), ), ( diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse_backend.py index 579c35a..2c522d5 100644 --- a/tests/unit/backend/test_deepsparse_backend.py +++ b/tests/unit/backend/test_deepsparse_backend.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Generator, Optional, Type +from typing import Any, Dict, Generator, List, Optional, cast import pytest from pydantic import BaseModel @@ -31,7 +31,7 @@ class TestTextGenerationPipeline: """ def __init__(self): - self._generations: list[TestDeepsparseTextGeneration] = [] + self._generations: List[TestDeepsparseTextGeneration] = [] self._prompt: Optional[str] = None self._max_new_tokens: Optional[int] = None @@ -50,7 +50,9 @@ def __call__( @property def generations(self) -> Generator[TestDeepsparseTextGeneration, None, None]: for text in random_strings( - min=10, max=50, n=self._max_new_tokens if self._max_new_tokens else 10 + min_chars=10, + max_chars=50, + n=self._max_new_tokens if self._max_new_tokens else 10, ): generation = TestDeepsparseTextGeneration(text=text) self._generations.append(generation) @@ -78,17 +80,20 @@ def test_backend_creation(create_payload: Dict): with defaults and custom input parameters. """ - backends: list[DeepsparseBackend] = [ - Backend.create("deepsparse", **create_payload), - DeepsparseBackend(**create_payload), - ] + backends: List[DeepsparseBackend] = cast( + List[DeepsparseBackend], + [ + Backend.create("deepsparse", **create_payload), + DeepsparseBackend(**create_payload), + ], + ) for backend in backends: - assert getattr(backend, "pipeline") + assert backend.pipeline ( - getattr(backend, "model") == custom_model + backend.model == custom_model if (custom_model := create_payload.get("model")) - else getattr(backend, "default_model") + else backend.default_model ) @@ -101,13 +106,16 @@ def test_backend_model_from_env(mocker): reload_settings() - backends: list[DeepsparseBackend] = [ - Backend.create("deepsparse"), - DeepsparseBackend(), - ] + backends: List[DeepsparseBackend] = cast( + List[DeepsparseBackend], + [ + Backend.create("deepsparse"), + DeepsparseBackend(), + ], + ) for backend in backends: - assert getattr(backend, "model") == "test_backend_model_from_env" + assert backend.model == "test_backend_model_from_env" @pytest.mark.smoke() @@ -122,14 +130,14 @@ def test_backend_model_from_env(mocker): async def test_make_request(text_generation_request_create_payload: Dict): backend = DeepsparseBackend() - output_tokens: list[str] = [] + output_tokens: List[str] = [] async for response in backend.make_request( request=TextGenerationRequest(**text_generation_request_create_payload) ): if response.add_token: output_tokens.append(response.add_token) assert "".join(output_tokens) == "".join( - (generation.text for generation in backend.pipeline._generations) + generation.text for generation in backend.pipeline._generations ) if max_tokens := text_generation_request_create_payload.get("output_token_count"): @@ -138,18 +146,23 @@ async def test_make_request(text_generation_request_create_payload: Dict): @pytest.mark.smoke() @pytest.mark.parametrize( - "text_generation_request_create_payload,error", + ("text_generation_request_create_payload", "error"), [ - ({"prompt": "Test prompt", "output_token_count": -1}, ValueError), + ( + {"prompt": "Test prompt", "output_token_count": -1}, + ValueError, + ), ], ) @pytest.mark.asyncio() async def test_make_request_invalid_request_payload( - text_generation_request_create_payload: Dict, error: Type[Exception] + text_generation_request_create_payload: Dict, error ): backend = DeepsparseBackend() with pytest.raises(error): - async for _ in backend.make_request( - request=TextGenerationRequest(**text_generation_request_create_payload) - ): - pass + [ + respnose + async for respnose in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ) + ] diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py index 128a1f1..2e2dd50 100644 --- a/tests/unit/core/test_distribution.py +++ b/tests/unit/core/test_distribution.py @@ -73,9 +73,9 @@ def test_distribution_str(): "'percentile_values': [1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96]" in str(dist) ) - assert "'min': np.float64(1.0)" in str(dist) - assert "'max': np.float64(5.0)" in str(dist) - assert "'range': np.float64(4.0)" in str(dist) + assert "'min': 1.0" in str(dist) + assert "'max': 5.0" in str(dist) + assert "'range': 4.0" in str(dist) @pytest.mark.regression() diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py index 1389459..37064ee 100644 --- a/tests/unit/executor/test_profile_generator.py +++ b/tests/unit/executor/test_profile_generator.py @@ -4,10 +4,7 @@ import pytest from guidellm import settings -from guidellm.core import ( - TextGenerationBenchmark, - TextGenerationBenchmarkReport, -) +from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport from guidellm.executor import Profile, ProfileGenerationMode, ProfileGenerator diff --git a/tests/unit/request/test_emulated.py b/tests/unit/request/test_emulated.py index cd89d54..6a225f3 100644 --- a/tests/unit/request/test_emulated.py +++ b/tests/unit/request/test_emulated.py @@ -348,7 +348,7 @@ def test_emulated_request_generator_lifecycle( str(file_path) if config_type == "file_str" else file_path ) else: - raise + raise Exception for _ in range(5): request = generator.create_item() diff --git a/tests/unit/scheduler/test_base.py b/tests/unit/scheduler/test_base.py index 2b393fd..039f4c6 100644 --- a/tests/unit/scheduler/test_base.py +++ b/tests/unit/scheduler/test_base.py @@ -12,11 +12,7 @@ TextGenerationResult, ) from guidellm.request import RequestGenerator -from guidellm.scheduler import ( - LoadGenerator, - Scheduler, - SchedulerResult, -) +from guidellm.scheduler import LoadGenerator, Scheduler, SchedulerResult @pytest.mark.smoke() diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py index 1d89ee3..9173cf7 100644 --- a/tests/unit/utils/test_text.py +++ b/tests/unit/utils/test_text.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List from unittest.mock import patch import pytest @@ -13,6 +14,7 @@ load_text, load_text_lines, parse_text_objects, + random_strings, split_lines_by_punctuation, split_text, ) @@ -392,3 +394,25 @@ def test_split_text_with_mixed_separators(): assert words == ["This", "is", "a", "test", "with", "mixed", "separators."] assert separators == ["\t", " ", " ", "\n", " ", " ", " "] assert new_lines == [0, 4] + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("min_chars", "max_chars", "n", "dataset", "total_chars_len"), + [ + (5, 5, 10, None, 50), # always 5 chars per response + (1, 10, 10, None, None), # 1..10 chars per each + ], +) +def test_random_strings_generation(min_chars, max_chars, n, dataset, total_chars_len): + results: List[str] = list( + random_strings(min_chars=min_chars, max_chars=max_chars, n=n, dataset=dataset) + ) + + # Ensure total results + assert len(results) == n + + if total_chars_len is not None: + assert sum(len(r) for r in results) == total_chars_len + else: + assert min_chars * n <= sum(len(r) for r in results) < max_chars * n From 1f1e0388bff612eaf28b8122633effcc515ce6d6 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Wed, 28 Aug 2024 17:18:53 +0300 Subject: [PATCH 11/22] fit Deepsparse Backend to work with new Backend abstraction --- src/guidellm/backend/deepsparse/backend.py | 11 +++- tests/e2e/cli/test_application_entrypoint.py | 68 -------------------- tests/unit/executor/test_base.py | 2 - 3 files changed, 9 insertions(+), 72 deletions(-) delete mode 100644 tests/e2e/cli/test_application_entrypoint.py diff --git a/src/guidellm/backend/deepsparse/backend.py b/src/guidellm/backend/deepsparse/backend.py index fa47b23..17037d0 100644 --- a/src/guidellm/backend/deepsparse/backend.py +++ b/src/guidellm/backend/deepsparse/backend.py @@ -15,9 +15,16 @@ class DeepsparseBackend(Backend): """ def __init__(self, model: Optional[str] = None, **request_args): + super().__init__( + type_="deepsparse", + model=self._get_model(model), + target="not used", + ) + self._request_args: Dict[str, Any] = request_args - self.model: str = self._get_model(model) - self.pipeline: Pipeline = TextGeneration(model=self.model) + self.pipeline: Pipeline = TextGeneration(model=self._model) + + logger.info("Deepsparse Backend uses model {}", self._model) def _get_model(self, model_from_cli: Optional[str] = None) -> str: """Provides the model by the next priority list: diff --git a/tests/e2e/cli/test_application_entrypoint.py b/tests/e2e/cli/test_application_entrypoint.py deleted file mode 100644 index 461190a..0000000 --- a/tests/e2e/cli/test_application_entrypoint.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import List -from unittest.mock import MagicMock - -import pytest -from click.testing import CliRunner - -from guidellm.main import main - - -def test_main_defaults_from_cli( - patch_main: MagicMock, cli_runner: CliRunner, default_main_kwargs -): - cli_runner.invoke(main) - - assert patch_main.call_count == 1 - assert patch_main.call_args.kwargs == default_main_kwargs - - -def test_main_cli_overrided( - patch_main: MagicMock, cli_runner: CliRunner, default_main_kwargs -): - cli_runner.invoke( - main, - ["--target", "localhost:9000", "--backend", "test", "--rate-type", "sweep"], - ) - default_main_kwargs.update( - { - "target": "localhost:9000", - "backend": "test", - "rate_type": "sweep", - } - ) - - assert patch_main.call_count == 1 - assert patch_main.call_args.kwargs == default_main_kwargs - - -@pytest.mark.parametrize( - ("args", "expected_stdout"), - [ - ( - ["--backend", "invalid", "--rate-type", "sweep"], - ( - b"Usage: main [OPTIONS]\nTry 'main --help' for help.\n\n" - b"Error: Invalid value for '--backend': " - b"'invalid' is not one of 'test', 'openai_server', 'deepsparse'.\n" - ), - ), - ( - ["--max-requests", "str instead of int"], - ( - b"Usage: main [OPTIONS]\nTry 'main --help' for help.\n\n" - b"Error: Invalid value for '--max-requests': " - b"'str instead of int' is not a valid integer.\n" - ), - ), - ], -) -def test_main_cli_validation_error( - patch_main: MagicMock, - cli_runner: CliRunner, - args: List[str], - expected_stdout: bytes, -): - result = cli_runner.invoke(main, args) - - assert patch_main.call_count == 0 - assert result.stdout_bytes == expected_stdout diff --git a/tests/unit/executor/test_base.py b/tests/unit/executor/test_base.py index 43afa38..1eec6ef 100644 --- a/tests/unit/executor/test_base.py +++ b/tests/unit/executor/test_base.py @@ -13,8 +13,6 @@ ProfileGenerationMode, ProfileGenerator, ) -from guidellm.executor.base import Executor, ExecutorResult -from guidellm.executor.profile_generator import ProfileGenerator from guidellm.request import RequestGenerator from guidellm.scheduler import Scheduler, SchedulerResult From ce1c3ba119044ac686455098518de3a5370baa10 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 29 Aug 2024 11:12:30 +0300 Subject: [PATCH 12/22] =?UTF-8?q?=F0=9F=94=A7=20`GUIDELLM=5F=5FLLM=5FMODEL?= =?UTF-8?q?`=20shared=20across=20all=20the=20backends?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * `DeepsparseSettings` is completely removed --- src/guidellm/backend/deepsparse/backend.py | 12 ++++++++---- src/guidellm/config.py | 12 ++---------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/guidellm/backend/deepsparse/backend.py b/src/guidellm/backend/deepsparse/backend.py index 17037d0..b667ab7 100644 --- a/src/guidellm/backend/deepsparse/backend.py +++ b/src/guidellm/backend/deepsparse/backend.py @@ -24,7 +24,7 @@ def __init__(self, model: Optional[str] = None, **request_args): self._request_args: Dict[str, Any] = request_args self.pipeline: Pipeline = TextGeneration(model=self._model) - logger.info("Deepsparse Backend uses model {}", self._model) + logger.info(f"Deepsparse Backend uses model {self._model}") def _get_model(self, model_from_cli: Optional[str] = None) -> str: """Provides the model by the next priority list: @@ -35,14 +35,18 @@ def _get_model(self, model_from_cli: Optional[str] = None) -> str: if model_from_cli is not None: return model_from_cli - elif settings.deepsprase.model is not None: + elif settings.llm_model is not None: logger.info( "Using Deepsparse model from environment variable: " - f"{settings.deepsprase.model}" + f"{settings.llm_model}" ) - return settings.deepsprase.model + return settings.llm_model else: logger.info(f"Using default Deepsparse model: {self.default_model}") + logger.info( + "To customize the model either set the 'GUIDELLM__LLM_MODEL' " + "environment variable or set the CLI argument '--model'" + ) return self.default_model async def make_request( diff --git a/src/guidellm/config.py b/src/guidellm/config.py index de23eee..3c31f17 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -107,14 +107,6 @@ class OpenAISettings(BaseModel): max_gen_tokens: int = 4096 -class DeepsparseSettings(BaseModel): - """ - Deepsparse settings for the Python API library - """ - - model: Optional[str] = None - - class ReportGenerationSettings(BaseModel): """ Report generation settings for the application @@ -135,7 +127,7 @@ class Settings(BaseSettings): ```sh export GUIDELLM__LOGGING__DISABLED=true export GUIDELLM__OPENAI__API_KEY=****** - export GUIDELLM__DEEPSPARSE__MODEL=****** + export GUIDELLM__LLM_MODEL=****** ``` """ @@ -149,6 +141,7 @@ class Settings(BaseSettings): # general settings env: Environment = Environment.PROD + llm_model: str = "mistralai/Mistral-7B-Instruct-v0.3" request_timeout: int = 30 max_concurrency: int = 512 num_sweep_profiles: int = 9 @@ -160,7 +153,6 @@ class Settings(BaseSettings): # Request settings openai: OpenAISettings = OpenAISettings() - deepsprase: DeepsparseSettings = DeepsparseSettings() report_generation: ReportGenerationSettings = ReportGenerationSettings() @model_validator(mode="after") From 8e88bae0b30b814695cb056ea34bd1fde31df3f9 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 29 Aug 2024 16:49:08 +0300 Subject: [PATCH 13/22] Test emulated data source constant -> settings value --- tests/unit/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 9c35e99..406460b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -5,6 +5,8 @@ import pytest import requests_mock +from guidellm.config import settings + @pytest.fixture() def mock_auto_tokenizer(): @@ -26,5 +28,5 @@ def mock_requests_pride_and_prejudice(): text_content = text_path.read_text() with requests_mock.Mocker() as mock: - mock.get("https://www.gutenberg.org/files/1342/1342-0.txt", text=text_content) + mock.get(settings.emulated_data.source, text=text_content) yield mock From 75e708b475364db7d461813a6bf38d5230e36cd9 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 29 Aug 2024 17:46:04 +0300 Subject: [PATCH 14/22] =?UTF-8?q?=F0=9F=92=84=20mdformat=20is=20happy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DEVELOPING.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DEVELOPING.md b/DEVELOPING.md index 95043f4..b230366 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -33,8 +33,7 @@ cd guidellm pip install -e .[dev] ``` -If you work with `deepsparse` backend, etc it has some other software limitations. -In order to install dependencies for the specific backend, run: +If you work with `deepsparse` backend, etc it has some other software limitations. In order to install dependencies for the specific backend, run: ```sh pip install -e .[deepsparse] From 913253fa075e32fbbb3e1df550fe2cdade320d98 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Thu, 29 Aug 2024 18:22:10 +0300 Subject: [PATCH 15/22] =?UTF-8?q?=E2=9C=85=20Tests=20are=20fixed=20accordi?= =?UTF-8?q?ng=20to=20a=20new=20Backend=20base=20implementation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/deepsparse/backend.py | 18 +++++++++++------- tests/unit/backend/test_deepsparse_backend.py | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/guidellm/backend/deepsparse/backend.py b/src/guidellm/backend/deepsparse/backend.py index b667ab7..924d04e 100644 --- a/src/guidellm/backend/deepsparse/backend.py +++ b/src/guidellm/backend/deepsparse/backend.py @@ -15,15 +15,12 @@ class DeepsparseBackend(Backend): """ def __init__(self, model: Optional[str] = None, **request_args): - super().__init__( - type_="deepsparse", - model=self._get_model(model), - target="not used", - ) - self._request_args: Dict[str, Any] = request_args + self._model = self._get_model(model) self.pipeline: Pipeline = TextGeneration(model=self._model) + super().__init__(type_="deepsparse", model=self._model, target="not used") + logger.info(f"Deepsparse Backend uses model {self._model}") def _get_model(self, model_from_cli: Optional[str] = None) -> str: @@ -89,7 +86,7 @@ async def make_request( prompt_token_count=request.prompt_token_count, output_token_count=token_count, ) - break + return else: token_count += 1 yield GenerativeResponse( @@ -100,6 +97,13 @@ async def make_request( output_token_count=token_count, ) + yield GenerativeResponse( + type_="final", + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) + def available_models(self) -> List[str]: """ Get the available models for the backend. diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse_backend.py index 2c522d5..f8916c7 100644 --- a/tests/unit/backend/test_deepsparse_backend.py +++ b/tests/unit/backend/test_deepsparse_backend.py @@ -101,7 +101,7 @@ def test_backend_creation(create_payload: Dict): def test_backend_model_from_env(mocker): mocker.patch.dict( "os.environ", - {"GUIDELLM__DEEPSPRASE__MODEL": "test_backend_model_from_env"}, + {"GUIDELLM__LLM_MODEL": "test_backend_model_from_env"}, ) reload_settings() From e376ed9225b8e19aa803aed44a3044ae114b1659 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 11:51:38 +0300 Subject: [PATCH 16/22] =?UTF-8?q?=F0=9F=94=A8=20tox=20tests=20include=20`d?= =?UTF-8?q?eepsparse`=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tox.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index 36e2809..40611c5 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ env_list = py38,py39,py310,py311,py312 [testenv] description = Run all tests deps = - .[dev] + .[dev,deepsparse] commands = pytest tests/ {posargs} @@ -14,7 +14,7 @@ commands = [testenv:test-unit] description = Run unit tests deps = - .[dev] + .[dev,deepsparse] commands = python -m pytest tests/unit {posargs} @@ -22,7 +22,7 @@ commands = [testenv:test-integration] description = Run integration tests deps = - .[dev] + .[dev,deepsparse] commands = python -m pytest tests/integration {posargs} @@ -30,7 +30,7 @@ commands = [testenv:test-e2e] description = Run end-to-end tests deps = - .[dev] + .[dev,deepsparse] commands = python -m pytest tests/e2e {posargs} From 3a2c6c1293be5dc42728325b882ae9fa9c587b56 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 11:52:15 +0300 Subject: [PATCH 17/22] =?UTF-8?q?=F0=9F=8F=B7=EF=B8=8F=20Type=20annotation?= =?UTF-8?q?s=20are=20added?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 717b9db..010cdd2 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -117,9 +117,10 @@ def __init__(self, type_: BackendEngine, target: str, model: str): :param target: The target URL for the backend. :param model: The model used by the backend. """ - self._type = type_ - self._target = target - self._model = model + + self._type: BackendEngine = type_ + self._target: str = target + self._model: str = model self.test_connection() From 74a6dfd37645f6cb12bf2a16d2367c3ca59e24be Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 11:52:46 +0300 Subject: [PATCH 18/22] =?UTF-8?q?=F0=9F=90=9B=20Assert=20with=20config=20v?= =?UTF-8?q?alues=20instead=20of=20constants?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/backend/test_openai_backend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index 396eb4c..6c11081 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -2,7 +2,8 @@ import pytest -from guidellm.backend import Backend, OpenAIBackend +from guidellm.backend import Backend +from guidellm.backend.openai import OpenAIBackend from guidellm.config import reload_settings, settings from guidellm.core import TextGenerationRequest @@ -245,8 +246,8 @@ def test_openai_backend_target(mock_openai_client): assert backend._client.kwargs["base_url"] == "http://test-target" # type: ignore backend = OpenAIBackend() - assert backend._async_client.kwargs["base_url"] == "http://localhost:8000/v1" # type: ignore - assert backend._client.kwargs["base_url"] == "http://localhost:8000/v1" # type: ignore + assert backend._async_client.kwargs["base_url"] == settings.openai.base_url # type: ignore + assert backend._client.kwargs["base_url"] == settings.openai.base_url # type: ignore backend = OpenAIBackend() assert backend._async_client.kwargs["base_url"] == settings.openai.base_url # type: ignore From 1a53951cab2968e8865ebf4a4e0f895a41ea9e12 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 11:53:21 +0300 Subject: [PATCH 19/22] =?UTF-8?q?=F0=9F=93=8C=20.[deepsparse]=20dependency?= =?UTF-8?q?=20is=20skipped=20if=20Python>3.11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9c69de0..942c1cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ dev = [ "types-toml", ] deepsparse = [ - "deepsparse", + "deepsparse; python_version < '3.12'", ] From 39ffcb314b0845f60d417b690c9f600f6ebd687e Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 11:57:29 +0300 Subject: [PATCH 20/22] =?UTF-8?q?=F0=9F=9A=9A=20DeepsparseBackend=20is=20m?= =?UTF-8?q?oved=20to=20a=20another=20module=20`backend.DeepsparseBackend`?= =?UTF-8?q?=20->=20`backend.deepsparse.DeepsparseBackend`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/__init__.py | 11 +---------- src/guidellm/backend/deepsparse/__init__.py | 10 +++++++--- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index 87db3bc..b6d1b9d 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -1,12 +1,3 @@ from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse -from .deepsparse.backend import DeepsparseBackend -from .openai import OpenAIBackend -__all__ = [ - "Backend", - "BackendEngine", - "BackendEnginePublic", - "GenerativeResponse", - "OpenAIBackend", - "DeepsparseBackend", -] +__all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"] diff --git a/src/guidellm/backend/deepsparse/__init__.py b/src/guidellm/backend/deepsparse/__init__.py index a36ffd6..31bf6e1 100644 --- a/src/guidellm/backend/deepsparse/__init__.py +++ b/src/guidellm/backend/deepsparse/__init__.py @@ -1,5 +1,6 @@ """ This package encapsulates the "Deepsparse Backend" implementation. + ref: https://github.com/neuralmagic/deepsparse The `deepsparse` package supports Python3.6..Python3.11, @@ -7,14 +8,13 @@ Safe range of versions is Python3.8..Python3.11 for the Deepsparse Backend implementation. + +In the end ensure that the `deepsparse` package is installed. """ from guidellm.utils import check_python_version, module_is_available -# Ensure that python is in valid range check_python_version(min_version="3.8", max_version="3.11") - -# Ensure that deepsparse is installed module_is_available( module="deepsparse", helper=( @@ -22,3 +22,7 @@ "Please try `pip install -e '.[deepsparse]'`" ), ) + +from .backend import DeepsparseBackend + +__all__ = ["DeepsparseBackend"] From 29e38e483c3658b4f4d535a12f7b471ef9a8e70b Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 11:58:27 +0300 Subject: [PATCH 21/22] =?UTF-8?q?=E2=9C=85=20Deepsparse=20tests=20are=20ig?= =?UTF-8?q?nored=20if=20Python>=3D3.12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/utils/dependencies.py | 35 +++++++++----- tests/unit/backend/test_deepsparse_backend.py | 48 ++++++++++++------- 2 files changed, 56 insertions(+), 27 deletions(-) diff --git a/src/guidellm/utils/dependencies.py b/src/guidellm/utils/dependencies.py index ac67688..9b862cd 100644 --- a/src/guidellm/utils/dependencies.py +++ b/src/guidellm/utils/dependencies.py @@ -1,6 +1,6 @@ import importlib import sys -from typing import Tuple +from typing import NoReturn, Tuple, Union def _extract_python_version(data: str) -> Tuple[int, ...]: @@ -15,8 +15,16 @@ def _extract_python_version(data: str) -> Tuple[int, ...]: return tuple(int(item) for item in items) -def check_python_version(min_version: str, max_version: str) -> None: - """Raises the error if the current version is not in the range.""" +def check_python_version( + min_version: str, max_version: str, raise_error=True +) -> Union[NoReturn, bool]: + """Validate Python version. + + :param min_version: the min (included) Python version in format: MAJOR.MINOR + :param max_version: the max (included) Python version in format: MAJOR.MINOR + :param raise_error: set to False if you don't want to raise the RuntimeError in + case the validation is failed + """ min_version_info: Tuple[int, ...] = _extract_python_version(min_version) max_version_info: Tuple[int, ...] = _extract_python_version(max_version) @@ -26,15 +34,20 @@ def check_python_version(min_version: str, max_version: str) -> None: ) if not (min_version_info <= current_version_info <= max_version_info): - raise RuntimeError( - "This feature requires Python version " - f"to be in range: {min_version}..{max_version}." - "You are using Python {}.{}.{}".format( - sys.version_info.major, - sys.version_info.minor, - sys.version_info.micro, + if raise_error: + raise RuntimeError( + "This feature requires Python version " + f"to be in range: {min_version}..{max_version}." + "You are using Python {}.{}.{}".format( + sys.version_info.major, + sys.version_info.minor, + sys.version_info.micro, + ) ) - ) + else: + return False + else: + return True def module_is_available(module: str, helper: str): diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse_backend.py index f8916c7..1206a28 100644 --- a/tests/unit/backend/test_deepsparse_backend.py +++ b/tests/unit/backend/test_deepsparse_backend.py @@ -1,13 +1,25 @@ +import sys from typing import Any, Dict, Generator, List, Optional, cast import pytest from pydantic import BaseModel -from guidellm.backend import Backend, DeepsparseBackend +from guidellm.backend import Backend from guidellm.config import reload_settings from guidellm.core import TextGenerationRequest from guidellm.utils import random_strings +pytestmark = pytest.mark.skipif( + sys.version_info >= (3, 12), reason="Unsupported Python version" +) + + +@pytest.fixture(scope="module") +def backend_class(): + from guidellm.backend.deepsparse import DeepsparseBackend + + return DeepsparseBackend + class TestDeepsparseTextGeneration(BaseModel): """The representation of a deepsparse data structure.""" @@ -39,7 +51,10 @@ def __call__( self, *_, prompt: str, max_new_tokens: Optional[int] = None, **kwargs ) -> Any: """Mocks the result from `deepsparse.pipeline.Pipeline()()`. - Set reserved request arguments on call + Set reserved request arguments on call. + + Note: `**kwargs` is required since it allows to mimic + the `deepsparse.Pipeline` behavior. """ self._prompt = prompt @@ -62,8 +77,7 @@ def generations(self) -> Generator[TestDeepsparseTextGeneration, None, None]: @pytest.fixture(autouse=True) def mock_deepsparse_pipeline(mocker): return mocker.patch( - "deepsparse.Pipeline.create", - return_value=TestTextGenerationPipeline(), + "deepsparse.Pipeline.create", return_value=TestTextGenerationPipeline() ) @@ -75,16 +89,16 @@ def mock_deepsparse_pipeline(mocker): {"model": "test/custom_llm"}, ], ) -def test_backend_creation(create_payload: Dict): +def test_backend_creation(create_payload: Dict, backend_class): """Test the "Deepspaarse Backend" class with defaults and custom input parameters. """ - backends: List[DeepsparseBackend] = cast( - List[DeepsparseBackend], + backends: List[backend_class] = cast( + List[backend_class], [ Backend.create("deepsparse", **create_payload), - DeepsparseBackend(**create_payload), + backend_class(**create_payload), ], ) @@ -98,7 +112,7 @@ def test_backend_creation(create_payload: Dict): @pytest.mark.smoke() -def test_backend_model_from_env(mocker): +def test_backend_model_from_env(mocker, backend_class): mocker.patch.dict( "os.environ", {"GUIDELLM__LLM_MODEL": "test_backend_model_from_env"}, @@ -106,11 +120,11 @@ def test_backend_model_from_env(mocker): reload_settings() - backends: List[DeepsparseBackend] = cast( - List[DeepsparseBackend], + backends: List[backend_class] = cast( + List[backend_class], [ Backend.create("deepsparse"), - DeepsparseBackend(), + backend_class(), ], ) @@ -127,8 +141,10 @@ def test_backend_model_from_env(mocker): ], ) @pytest.mark.asyncio() -async def test_make_request(text_generation_request_create_payload: Dict): - backend = DeepsparseBackend() +async def test_make_request( + text_generation_request_create_payload: Dict, backend_class +): + backend = backend_class() output_tokens: List[str] = [] async for response in backend.make_request( @@ -156,9 +172,9 @@ async def test_make_request(text_generation_request_create_payload: Dict): ) @pytest.mark.asyncio() async def test_make_request_invalid_request_payload( - text_generation_request_create_payload: Dict, error + text_generation_request_create_payload: Dict, error, backend_class ): - backend = DeepsparseBackend() + backend = backend_class() with pytest.raises(error): [ respnose From 4b3b4b540821658d29ec192e984f8da15626ca51 Mon Sep 17 00:00:00 2001 From: Dmytro Parfeniuk Date: Fri, 30 Aug 2024 13:07:23 +0300 Subject: [PATCH 22/22] =?UTF-8?q?=F0=9F=92=9A=20Linters=20are=20happy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/guidellm/backend/deepsparse/__init__.py | 2 +- src/guidellm/utils/dependencies.py | 6 +++--- tests/unit/backend/test_deepsparse_backend.py | 21 ++++++------------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/guidellm/backend/deepsparse/__init__.py b/src/guidellm/backend/deepsparse/__init__.py index 31bf6e1..c45a112 100644 --- a/src/guidellm/backend/deepsparse/__init__.py +++ b/src/guidellm/backend/deepsparse/__init__.py @@ -23,6 +23,6 @@ ), ) -from .backend import DeepsparseBackend +from .backend import DeepsparseBackend # noqa: E402 __all__ = ["DeepsparseBackend"] diff --git a/src/guidellm/utils/dependencies.py b/src/guidellm/utils/dependencies.py index 9b862cd..5022068 100644 --- a/src/guidellm/utils/dependencies.py +++ b/src/guidellm/utils/dependencies.py @@ -34,7 +34,9 @@ def check_python_version( ) if not (min_version_info <= current_version_info <= max_version_info): - if raise_error: + if raise_error is False: + return False + else: raise RuntimeError( "This feature requires Python version " f"to be in range: {min_version}..{max_version}." @@ -44,8 +46,6 @@ def check_python_version( sys.version_info.micro, ) ) - else: - return False else: return True diff --git a/tests/unit/backend/test_deepsparse_backend.py b/tests/unit/backend/test_deepsparse_backend.py index 1206a28..58e5761 100644 --- a/tests/unit/backend/test_deepsparse_backend.py +++ b/tests/unit/backend/test_deepsparse_backend.py @@ -1,5 +1,5 @@ import sys -from typing import Any, Dict, Generator, List, Optional, cast +from typing import Any, Dict, Generator, List, Optional import pytest from pydantic import BaseModel @@ -94,13 +94,10 @@ def test_backend_creation(create_payload: Dict, backend_class): with defaults and custom input parameters. """ - backends: List[backend_class] = cast( - List[backend_class], - [ - Backend.create("deepsparse", **create_payload), - backend_class(**create_payload), - ], - ) + backends = [ + Backend.create("deepsparse", **create_payload), + backend_class(**create_payload), + ] for backend in backends: assert backend.pipeline @@ -120,13 +117,7 @@ def test_backend_model_from_env(mocker, backend_class): reload_settings() - backends: List[backend_class] = cast( - List[backend_class], - [ - Backend.create("deepsparse"), - backend_class(), - ], - ) + backends = [Backend.create("deepsparse"), backend_class()] for backend in backends: assert backend.model == "test_backend_model_from_env"