diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3f484d5..1b2d8e6 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -57,6 +57,9 @@ jobs: publish: needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: nightly + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -68,4 +71,3 @@ jobs: with: publish_pypi: true publish_pypi_internal: true - build_type: nightly diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cc227aa..ee77922 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -59,6 +59,9 @@ jobs: name: Build & Publish the distribution needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: release + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -70,4 +73,3 @@ jobs: with: publish_pypi: true publish_pypi_internal: true - build_type: release diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml index 7922337..daf4e30 100644 --- a/.github/workflows/staging.yml +++ b/.github/workflows/staging.yml @@ -58,6 +58,9 @@ jobs: name: Build & Publish the distribution needs: [unit-tests, integration-tests, e2e-tests] runs-on: ubuntu-latest + env: + GUIDELLM_BUILD_TYPE: release + GUIDELLM_BUILD_NUMBER: ${{ github.event.pull_request.number || github.run_number }} steps: - uses: actions/checkout@v4 - name: Set up Python @@ -69,4 +72,3 @@ jobs: with: publish_pypi: false publish_pypi_internal: true - build_type: release diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7be4d9..2a085bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,7 @@ repos: # main dependencies click, datasets, + ftfy, loguru, numpy, openai, @@ -25,11 +26,13 @@ repos: pydantic_settings, pyyaml, requests, + rich, transformers, # dev dependencies pytest, pydantic_settings, + requests-mock, # types types-click, diff --git a/pyproject.toml b/pyproject.toml index 52cc17f..4e295aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ urls = { homepage = "https://github.com/neuralmagic/guidellm" } dependencies = [ "click", "datasets", + "ftfy>=6.0.0", "loguru", "numpy", "openai", @@ -34,6 +35,7 @@ dependencies = [ "pydantic-settings>=2.0.0", "pyyaml>=6.0.0", "requests", + "rich", "transformers", ] @@ -41,13 +43,16 @@ dependencies = [ dev = [ # general and configurations "pre-commit~=3.5.0", + "scipy~=1.10", "sphinx~=7.1.2", "tox~=4.16.0", # testing "pytest~=8.2.2", + "pytest-asyncio~=0.23.8", "pytest-cov~=5.0.0", "pytest-mock~=3.14.0", + "pytest-rerunfailures~=14.0", "requests-mock~=1.12.1", # code quality @@ -83,7 +88,7 @@ profile = "black" files = ["src/guidellm", "tests"] python_version = '3.8' warn_redundant_casts = true -warn_unused_ignores = true +warn_unused_ignores = false show_error_codes = true namespace_packages = true exclude = ["venv", ".tox"] @@ -92,22 +97,27 @@ exclude = ["venv", ".tox"] # Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery follow_imports = 'silent' -[[tool.mypy.overrides]] -module = ["transformers.*", "datasets.*"] -ignore_missing_imports=true - [tool.ruff] line-length = 88 +indent-width = 4 exclude = ["build", "dist", "env", ".venv"] -lint.ignore = [ + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" + +[tool.ruff.lint] +ignore = [ "PLR0913", "TCH001", "COM812", "ISC001", "TCH002", + "PLW1514", # allow Path.open without encoding + ] -lint.select = [ +select = [ # Rules reference: https://docs.astral.sh/ruff/rules/ # Code Style / Formatting @@ -127,11 +137,11 @@ lint.select = [ "Q", # flake8-quotes: enforces consistent use of single or double quotes "TCH", # flake8-type-checking: enforces type checking practices and standards "TID", # flake8-tidy-imports: enforces tidy and well-organized imports + "RUF022", # flake8-ruff: enforce sorting of __all__ in modules # Code Structure / Complexity "C4", # flake8-comprehensions: improves readability and performance of list, set, and dict comprehensions "C90", # mccabe: checks for overly complex code using cyclomatic complexity - "FBT", # flake8-boolean-trap: prevents the use of boolean traps in function arguments and calls "ISC", # flake8-implicit-str-concat: prevents implicit string concatenation "PIE", # flake8-pie: identifies and corrects common code inefficiencies and mistakes "R", # Refactor: suggests improvements to code structure and readability @@ -164,7 +174,6 @@ lint.select = [ "tests/**/*.py" = [ "S101", # asserts allowed in tests "ARG", # Unused function args allowed in tests - "FBT", # Booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize() "PLR2004", # Magic value used in comparison "TCH002", # No import only type checking in tests "SLF001", # enable private member access in tests @@ -173,8 +182,13 @@ lint.select = [ "PT011", # allow generic exceptions in tests "N806", # allow uppercase variable names in tests "PGH003", # allow general ignores in tests + "S106", # allow hardcoded passwords in tests + "PLR0915", # allow complext statements in tests ] +[tool.ruff.lint.isort] +known-first-party = ["guidellm", "tests"] + [tool.pytest.ini_options] addopts = '-s -vvv --cache-clear' diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py index 25747ff..fc660fc 100644 --- a/src/guidellm/__init__.py +++ b/src/guidellm/__init__.py @@ -3,4 +3,15 @@ evaluating and benchmarking large language models (LLMs). """ -from .logger import configure_logger, logger # noqa: F401 +import os + +import transformers # type: ignore # noqa: PGH003 + +from .config import settings +from .logger import configure_logger, logger + +__all__ = ["configure_logger", "logger", "settings"] + + +os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers +transformers.logging.set_verbosity_error() # Silence warnings for transformers diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py index 0c11e5b..c22b477 100644 --- a/src/guidellm/backend/base.py +++ b/src/guidellm/backend/base.py @@ -1,35 +1,39 @@ import functools from abc import ABC, abstractmethod -from dataclasses import dataclass -from enum import Enum -from typing import Dict, Iterator, List, Optional, Type +from typing import AsyncGenerator, Dict, List, Literal, Optional, Type from loguru import logger +from pydantic import BaseModel from guidellm.core import TextGenerationRequest, TextGenerationResult __all__ = ["Backend", "BackendEngine", "GenerativeResponse"] -class BackendEngine(str, Enum): - """ - Determines the Engine of the LLM Backend. - All the implemented backends in the project have the engine. - - NOTE: the `TEST` engine has to be used only for testing purposes. - """ +BackendEngine = Literal["test", "openai_server"] - TEST = "test" - OPENAI_SERVER = "openai_server" - -@dataclass -class GenerativeResponse: +class GenerativeResponse(BaseModel): """ - A dataclass to represent a response from a generative AI backend. + A model representing a response from a generative AI backend. + + :param type_: The type of response, either 'token_iter' for intermediate + token output or 'final' for the final result. + :type type_: Literal["token_iter", "final"] + :param add_token: The token to add to the output + (only applicable if type_ is 'token_iter'). + :type add_token: Optional[str] + :param prompt: The original prompt sent to the backend. + :type prompt: Optional[str] + :param output: The final generated output (only applicable if type_ is 'final'). + :type output: Optional[str] + :param prompt_token_count: The number of tokens in the prompt. + :type prompt_token_count: Optional[int] + :param output_token_count: The number of tokens in the output. + :type output_token_count: Optional[int] """ - type_: str # One of 'token_iter', 'final' + type_: Literal["token_iter", "final"] add_token: Optional[str] = None prompt: Optional[str] = None output: Optional[str] = None @@ -39,7 +43,20 @@ class GenerativeResponse: class Backend(ABC): """ - An abstract base class with template methods for generative AI backends. + Abstract base class for generative AI backends. + + This class provides a common interface for creating and interacting with different + generative AI backends. Subclasses should implement the abstract methods to + define specific backend behavior. + + :cvar _registry: A dictionary that maps BackendEngine types to backend classes. + :type _registry: Dict[BackendEngine, Type[Backend]] + :param type_: The type of the backend. + :type type_: BackendEngine + :param target: The target URL for the backend. + :type target: str + :param model: The model used by the backend. + :type model: str """ _registry: Dict[BackendEngine, "Type[Backend]"] = {} @@ -50,11 +67,14 @@ def register(cls, backend_type: BackendEngine): A decorator to register a backend class in the backend registry. :param backend_type: The type of backend to register. - :type backend_type: BackendType + :type backend_type: BackendEngine + :return: The decorated backend class. + :rtype: Type[Backend] """ def inner_wrapper(wrapped_class: Type["Backend"]): cls._registry[backend_type] = wrapped_class + logger.info("Registered backend type: {}", backend_type) return wrapped_class return inner_wrapper @@ -62,24 +82,31 @@ def inner_wrapper(wrapped_class: Type["Backend"]): @classmethod def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend": """ - Factory method to create a backend based on the backend type. + Factory method to create a backend instance based on the backend type. :param backend_type: The type of backend to create. - :type backend_type: BackendType + :type backend_type: BackendEngine :param kwargs: Additional arguments for backend initialization. :type kwargs: dict :return: An instance of a subclass of Backend. :rtype: Backend + :raises ValueError: If the backend type is not registered. """ - logger.info(f"Creating backend of type {backend_type}") + logger.info("Creating backend of type {}", backend_type) if backend_type not in cls._registry: - logger.error(f"Unsupported backend type: {backend_type}") - raise ValueError(f"Unsupported backend type: {backend_type}") + err = ValueError(f"Unsupported backend type: {backend_type}") + logger.error("{}", err) + raise err return Backend._registry[backend_type](**kwargs) + def __init__(self, type_: BackendEngine, target: str, model: str): + self._type = type_ + self._target = target + self._model = model + @property def default_model(self) -> str: """ @@ -87,82 +114,149 @@ def default_model(self) -> str: :return: The default model. :rtype: str + :raises ValueError: If no models are available. """ return _cachable_default_model(self) - def submit(self, request: TextGenerationRequest) -> TextGenerationResult: + @property + def type_(self) -> BackendEngine: + """ + Get the type of the backend. + + :return: The type of the backend. + :rtype: BackendEngine + """ + return self._type + + @property + def target(self) -> str: + """ + Get the target URL for the backend. + + :return: The target URL. + :rtype: str + """ + return self._target + + @property + def model(self) -> str: + """ + Get the model used by the backend. + + :return: The model name. + :rtype: str """ - Submit a result request and populate the BenchmarkResult. + return self._model - :param request: The result request to submit. + async def submit(self, request: TextGenerationRequest) -> TextGenerationResult: + """ + Submit a text generation request and return the result. + + This method handles the request submission to the backend and processes + the response in a streaming fashion if applicable. + + :param request: The request object containing the prompt + and other configurations. :type request: TextGenerationRequest - :return: The populated result result. + :return: The result of the text generation request. :rtype: TextGenerationResult + :raises ValueError: If no response is received from the backend. """ - logger.info(f"Submitting request with prompt: {request.prompt}") + logger.debug("Submitting request with prompt: {}", request.prompt) - result = TextGenerationResult( - request=TextGenerationRequest(prompt=request.prompt), - ) + result = TextGenerationResult(request=request) result.start(request.prompt) + received_final = False - for response in self.make_request(request): # GenerativeResponse - if response.type_ == "token_iter" and response.add_token: - result.output_token(response.add_token) + async for response in self.make_request(request): + logger.debug("Received response: {}", response) + if response.type_ == "token_iter": + result.output_token(response.add_token if response.add_token else "") elif response.type_ == "final": + if received_final: + err = ValueError( + "Received multiple final responses from the backend." + ) + logger.error(err) + raise err + result.end( + output=response.output, prompt_token_count=response.prompt_token_count, output_token_count=response.output_token_count, ) + received_final = True + else: + err = ValueError( + f"Invalid response received from the backend of type: " + f"{response.type_} for {response}" + ) + logger.error(err) + raise err - logger.info(f"Request completed with output: {result.output}") + if not received_final: + err = ValueError("No final response received from the backend.") + logger.error(err) + raise err + + logger.info("Request completed with output: {}", result.output) return result @abstractmethod - def make_request( + async def make_request( self, request: TextGenerationRequest, - ) -> Iterator[GenerativeResponse]: + ) -> AsyncGenerator[GenerativeResponse, None]: """ Abstract method to make a request to the backend. - :param request: The result request to submit. + Subclasses must implement this method to define how requests are handled + by the backend. + + :param request: The request object containing the prompt and + other configurations. :type request: TextGenerationRequest - :return: An iterator over the generative responses. - :rtype: Iterator[GenerativeResponse] + :yield: A generator yielding responses from the backend. + :rtype: AsyncGenerator[GenerativeResponse, None] """ - raise NotImplementedError + yield None # type: ignore # noqa: PGH003 @abstractmethod def available_models(self) -> List[str]: """ Abstract method to get the available models for the backend. + Subclasses must implement this method to provide the list of models + supported by the backend. + :return: A list of available models. :rtype: List[str] - """ - raise NotImplementedError - - @abstractmethod - def model_tokenizer(self, model: str) -> Optional[str]: - """ - Abstract method to get the tokenizer for a model. - - :param model: The model to get the tokenizer for. - :type model: str - :return: The tokenizer for the model, or None if it cannot be created. - :rtype: Optional[str] + :raises NotImplementedError: If the method is not implemented by a subclass. """ raise NotImplementedError @functools.lru_cache(maxsize=1) def _cachable_default_model(backend: Backend) -> str: - if models := backend.available_models(): - logger.debug(f"Default model: {models[0]}") + """ + Get the default model for a backend using LRU caching. + + This function caches the default model to optimize repeated lookups. + + :param backend: The backend instance for which to get the default model. + :type backend: Backend + :return: The default model. + :rtype: str + :raises ValueError: If no models are available. + """ + logger.debug("Getting default model for backend: {}", backend) + models = backend.available_models() + if models: + logger.debug("Default model: {}", models[0]) return models[0] - logger.error("No models available.") - raise ValueError("No models available.") + err = ValueError("No models available.") + logger.error(err) + raise err diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index af91709..2a12882 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -1,36 +1,35 @@ -from typing import Any, Dict, Generator, List, Optional +from typing import AsyncGenerator, Dict, List, Optional -import openai from loguru import logger -from openai import OpenAI, Stream -from openai.types import Completion -from transformers import AutoTokenizer +from openai import AsyncOpenAI, OpenAI -from guidellm.backend import Backend, BackendEngine, GenerativeResponse +from guidellm.backend.base import Backend, GenerativeResponse from guidellm.config import settings from guidellm.core import TextGenerationRequest __all__ = ["OpenAIBackend"] -@Backend.register(BackendEngine.OPENAI_SERVER) +@Backend.register("openai_server") class OpenAIBackend(Backend): """ - An OpenAI backend implementation for the generative AI result. + An OpenAI backend implementation for generative AI results. + This class provides an interface to communicate with the + OpenAI server for generating responses based on given prompts. + + :param openai_api_key: The API key for OpenAI. + If not provided, it will default to the key from settings. + :type openai_api_key: Optional[str] :param target: The target URL string for the OpenAI server. - :type target: str + :type target: Optional[str] :param host: Optional host for the OpenAI server. :type host: Optional[str] :param port: Optional port for the OpenAI server. :type port: Optional[int] - :param path: Optional path for the OpenAI server. - :type path: Optional[str] :param model: The OpenAI model to use, defaults to the first available model. :type model: Optional[str] - :param api_key: The OpenAI API key to use. - :type api_key: Optional[str] - :param request_args: Optional arguments for the OpenAI request. + :param request_args: Additional arguments for the OpenAI request. :type request_args: Dict[str, Any] """ @@ -43,125 +42,137 @@ def __init__( model: Optional[str] = None, **request_args, ): - """ - Initialize an OpenAI Client - """ - - self.request_args = request_args - - if not (_api_key := (openai_api_key or settings.openai.api_key)): - raise ValueError( - "`GUIDELLM__OPENAI__API_KEY` environment variable " - "or --openai-api-key CLI parameter " - "must be specify for the OpenAI backend", + self._request_args: Dict = request_args + api_key: str = openai_api_key or settings.openai.api_key + + if not api_key: + err = ValueError( + "`GUIDELLM__OPENAI__API_KEY` environment variable or " + "--openai-api-key CLI parameter must be specified for the " + "OpenAI backend." ) + logger.error("{}", err) + raise err - if target is not None: + if target: base_url = target elif host and port: - base_url = f"{host}:{port}" - elif settings.openai.base_url is not None: + base_url = f"{host}:{port}/v1" + elif settings.openai.base_url: base_url = settings.openai.base_url else: - raise ValueError( - "`GUIDELLM__OPENAI__BASE_URL` environment variable " - "or --target CLI parameter must be specified for the OpenAI backend." + err = ValueError( + "`GUIDELLM__OPENAI__BASE_URL` environment variable or " + "--target CLI parameter must be specified for the OpenAI backend." ) + logger.error("{}", err) + raise err - self.openai_client = OpenAI(api_key=_api_key, base_url=base_url) - self.model = model or self.default_model + self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url) + self._client = OpenAI(api_key=api_key, base_url=base_url) + self._model = model or self.default_model - logger.info("OpenAI {} Backend listening on {}", self.model, target) + super().__init__(type_="openai_server", target=base_url, model=self._model) + logger.info("OpenAI {} Backend listening on {}", self._model, base_url) - def make_request( + async def make_request( self, request: TextGenerationRequest, - ) -> Generator[GenerativeResponse, None, None]: + ) -> AsyncGenerator[GenerativeResponse, None]: """ Make a request to the OpenAI backend. - :param request: The result request to submit. + This method sends a prompt to the OpenAI backend and streams + the response tokens back. + + :param request: The text generation request to submit. :type request: TextGenerationRequest - :return: An iterator over the generative responses. - :rtype: Iterator[GenerativeResponse] + :yield: A stream of GenerativeResponse objects. + :rtype: AsyncGenerator[GenerativeResponse, None] """ - logger.debug(f"Making request to OpenAI backend with prompt: {request.prompt}") + logger.debug("Making request to OpenAI backend with prompt: {}", request.prompt) - # How many completions to generate for each prompt - request_args: Dict = {"n": 1} + request_args: Dict = { + "n": 1, # Number of completions for each prompt + } - num_gen_tokens: int = ( - request.params.get("generated_tokens", None) - or settings.openai.max_gen_tokens - ) - request_args.update({"max_tokens": num_gen_tokens, "stop": None}) + if request.output_token_count is not None: + request_args.update( + { + "max_tokens": request.output_token_count, + "stop": None, + } + ) + elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0: + request_args.update( + { + "max_tokens": settings.openai.max_gen_tokens, + } + ) - if self.request_args: - request_args.update(self.request_args) + request_args.update(self._request_args) - response: Stream[Completion] = self.openai_client.completions.create( + stream = await self._async_client.chat.completions.create( model=self.model, - prompt=request.prompt, + messages=[ + {"role": "system", "content": request.prompt}, + ], stream=True, **request_args, ) + token_count = 0 + async for chunk in stream: + choice = chunk.choices[0] + token = choice.delta.content or "" - for chunk in response: - chunk_content: str = getattr(chunk, "content", "") - - if getattr(chunk, "stop", True) is True: - logger.debug("Received final response from OpenAI backend") - + if choice.finish_reason is not None: yield GenerativeResponse( type_="final", - prompt=getattr(chunk, "prompt", request.prompt), - prompt_token_count=( - request.prompt_token_count or self._token_count(request.prompt) - ), - output_token_count=(self._token_count(chunk_content)), + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, ) - else: - logger.debug("Received token from OpenAI backend") - yield GenerativeResponse(type_="token_iter", add_token=chunk_content) + break + + token_count += 1 + yield GenerativeResponse( + type_="token_iter", + add_token=token, + prompt=request.prompt, + prompt_token_count=request.prompt_token_count, + output_token_count=token_count, + ) def available_models(self) -> List[str]: """ Get the available models for the backend. + This method queries the OpenAI API to retrieve a list of available models. + :return: A list of available models. :rtype: List[str] + :raises openai.OpenAIError: If an error occurs while retrieving models. """ try: - models: List[str] = [ - model.id for model in self.openai_client.models.list().data - ] - except openai.NotFoundError as error: - logger.error("No available models for OpenAI Backend") + return [model.id for model in self._client.models.list().data] + except Exception as error: + logger.error("Failed to retrieve available models: {}", error) raise error - else: - logger.info(f"Available models: {models}") - return models - def model_tokenizer(self, model: str) -> Optional[Any]: + def validate_connection(self): """ - Get the tokenizer for a model. + Validate the connection to the OpenAI backend. + + This method checks that the OpenAI backend is reachable and + the API key is valid. - :param model: The model to get the tokenizer for. - :type model: str - :return: The tokenizer for the model, or None if it cannot be created. - :rtype: Optional[Any] + :raises openai.OpenAIError: If the connection is invalid. """ + try: - tokenizer = AutoTokenizer.from_pretrained(model) - logger.info(f"Tokenizer created for model: {model}") - return tokenizer - except Exception as err: # noqa: BLE001 - logger.warning(f"Could not create tokenizer for model {model}: {err}") - return None - - def _token_count(self, text: str) -> int: - token_count = len(text.split()) - logger.debug(f"Token count for text '{text}': {token_count}") - return token_count + self._client.models.list() + except Exception as error: + logger.error("Failed to validate OpenAI connection: {}", error) + raise error diff --git a/src/guidellm/config/base.py b/src/guidellm/config.py similarity index 54% rename from src/guidellm/config/base.py rename to src/guidellm/config.py index 0cfab32..d6acfd4 100644 --- a/src/guidellm/config/base.py +++ b/src/guidellm/config.py @@ -1,16 +1,19 @@ from enum import Enum -from typing import Optional +from typing import Dict, List, Optional -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict __all__ = [ - "settings", - "Settings", + "DatasetSettings", + "EmulatedDataSettings", "Environment", "LoggingSettings", "OpenAISettings", "ReportGenerationSettings", + "Settings", + "reload_settings", + "settings", ] @@ -40,11 +43,54 @@ class LoggingSettings(BaseModel): disabled: bool = False clear_loggers: bool = True - console_log_level: str = "INFO" + console_log_level: str = "WARNING" log_file: Optional[str] = None log_file_level: Optional[str] = None +class DatasetSettings(BaseModel): + """ + Dataset settings for the application + """ + + preferred_data_columns: List[str] = Field( + default_factory=lambda: [ + "prompt", + "instruction", + "input", + "inputs", + "question", + "context", + "text", + "content", + "body", + "data", + ] + ) + preferred_data_splits: List[str] = Field( + default_factory=lambda: ["test", "tst", "validation", "val", "train"] + ) + default_tokenizer: str = "neuralmagic/Meta-Llama-3.1-8B-FP8" + + +class EmulatedDataSettings(BaseModel): + """ + Emulated data settings for the application to use + """ + + source: str = "https://www.gutenberg.org/files/1342/1342-0.txt" + filter_start: str = "It is a truth universally acknowledged, that a" + filter_end: str = "CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO." + clean_text_args: Dict[str, bool] = Field( + default_factory=lambda: { + "fix_encoding": True, + "clean_whitespace": True, + "remove_empty_lines": True, + "force_new_line_punctuation": True, + } + ) + + class OpenAISettings(BaseModel): """ OpenAI settings for the application to connect to the API @@ -52,7 +98,7 @@ class OpenAISettings(BaseModel): """ # OpenAI API key. - api_key: str = "invalid" + api_key: str = "invalid_token" # OpenAI-compatible server URL # NOTE: The default value is default address of llama.cpp web server @@ -62,7 +108,13 @@ class OpenAISettings(BaseModel): class ReportGenerationSettings(BaseModel): + """ + Report generation settings for the application + """ + source: str = "" + report_html_match: str = "window.report_data = {};" + report_html_placeholder: str = "{}" class Settings(BaseSettings): @@ -76,7 +128,6 @@ class Settings(BaseSettings): export GUIDELLM__LOGGING__DISABLED=true export GUIDELLM__OPENAI__API_KEY=****** ``` - """ model_config = SettingsConfigDict( @@ -87,11 +138,21 @@ class Settings(BaseSettings): env_file=".env", ) + # general settings env: Environment = Environment.PROD request_timeout: int = 30 - + max_concurrency: int = 512 + num_sweep_profiles: int = 9 logging: LoggingSettings = LoggingSettings() + + # Data settings + dataset: DatasetSettings = DatasetSettings() + emulated_data: EmulatedDataSettings = EmulatedDataSettings() + + # Request settings openai: OpenAISettings = OpenAISettings() + + # Report settings report_generation: ReportGenerationSettings = ReportGenerationSettings() @model_validator(mode="after") @@ -104,3 +165,11 @@ def set_default_source(cls, values): settings = Settings() + + +def reload_settings(): + """ + Reload the settings from the environment variables + """ + new_settings = Settings() + settings.__dict__.update(new_settings.__dict__) diff --git a/src/guidellm/config/__init__.py b/src/guidellm/config/__init__.py deleted file mode 100644 index 47544ef..0000000 --- a/src/guidellm/config/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .base import settings - -__all__ = ["settings"] diff --git a/src/guidellm/core/__init__.py b/src/guidellm/core/__init__.py index 9ada6c8..e738aa7 100644 --- a/src/guidellm/core/__init__.py +++ b/src/guidellm/core/__init__.py @@ -8,16 +8,17 @@ TextGenerationError, TextGenerationResult, ) -from .serializable import Serializable +from .serializable import Serializable, SerializableFileType __all__ = [ "Distribution", - "TextGenerationRequest", - "TextGenerationResult", - "TextGenerationError", - "TextGenerationBenchmark", - "TextGenerationBenchmarkReport", + "GuidanceReport", "RequestConcurrencyMeasurement", "Serializable", - "GuidanceReport", + "SerializableFileType", + "TextGenerationBenchmark", + "TextGenerationBenchmarkReport", + "TextGenerationError", + "TextGenerationRequest", + "TextGenerationResult", ] diff --git a/src/guidellm/core/distribution.py b/src/guidellm/core/distribution.py index d2fc075..fb9b12c 100644 --- a/src/guidellm/core/distribution.py +++ b/src/guidellm/core/distribution.py @@ -23,6 +23,9 @@ class Distribution(Serializable): def __str__(self): return f"Distribution({self.describe()})" + def __len__(self): + return len(self.data) + @property def mean(self) -> float: """ @@ -103,7 +106,7 @@ def percentiles(self, percentiles: List[float]) -> List[float]: logger.warning("No data points available to calculate percentiles.") return [0.0] * len(percentiles) - percentiles_values = np.percentile(self.data, percentiles).tolist() + percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist() # type: ignore # noqa: PGH003 logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}") return percentiles_values @@ -117,7 +120,7 @@ def min(self) -> float: logger.warning("No data points available to calculate minimum.") return 0.0 - min_value = np.min(self.data) + min_value: float = np.min(self.data) logger.debug(f"Calculated min: {min_value}") return min_value @@ -131,7 +134,7 @@ def max(self) -> float: logger.warning("No data points available to calculate maximum.") return 0.0 - max_value = np.max(self.data) + max_value: float = np.max(self.data) logger.debug(f"Calculated max: {max_value}") return max_value diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py index 12375fc..b6791e4 100644 --- a/src/guidellm/core/report.py +++ b/src/guidellm/core/report.py @@ -1,21 +1,321 @@ -from typing import List +import time +from datetime import datetime +from typing import List, Optional +from loguru import logger from pydantic import Field +from rich.console import Console, Group +from rich.live import Live +from rich.panel import Panel +from rich.table import Table -from guidellm.core.result import TextGenerationBenchmarkReport +from guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport from guidellm.core.serializable import Serializable -__all__ = [ - "GuidanceReport", -] +__all__ = ["GuidanceReport"] + + +def _create_benchmark_report_details(report: TextGenerationBenchmarkReport) -> str: + """ + Create a detailed string representation of a benchmark report. + + :param report: The benchmark report to generate details for. + :type report: TextGenerationBenchmarkReport + :return: A string containing the backend, data, rate, and limits of + the benchmark report. + :rtype: str + """ + backend = ( + f"Backend(type={report.args.get('backend_type', 'N/A')}, " + f"target={report.args.get('target', 'N/A')}, " + f"model={report.args.get('model', 'N/A')})" + ) + data = ( + f"Data(type={report.args.get('data_type', 'N/A')}, " + f"source={report.args.get('data', 'N/A')}, " + f"tokenizer={report.args.get('tokenizer', 'N/A')})" + ) + rate = ( + f"Rate(type={report.args.get('mode', 'N/A')}, " + f"rate={report.args.get('rate', 'N/A')})" + ) + limits = ( + f"Limits(max_number={report.args.get('max_number', 'N/A')} requests, " + f"max_duration={report.args.get('max_duration', 'N/A')} sec)" + ) + + logger.debug( + "Created benchmark report details for backend={}, data={}, rate={}, limits={}", + backend, + data, + rate, + limits, + ) + + return backend + "\n" + data + "\n" + rate + "\n" + limits + "\n" + + +def _benchmark_rate_id(benchmark: TextGenerationBenchmark) -> str: + """ + Generate a string identifier for a benchmark rate. + + :param benchmark: The benchmark for which to generate the rate ID. + :type benchmark: TextGenerationBenchmark + :return: A string representing the benchmark rate ID. + :rtype: str + """ + rate_id = ( + f"{benchmark.mode}@{benchmark.rate:.2f} req/sec" + if benchmark.rate + else f"{benchmark.mode}" + ) + logger.debug("Generated benchmark rate ID: {}", rate_id) + return rate_id + + +def _create_benchmark_report_requests_summary( + report: TextGenerationBenchmarkReport, +) -> Table: + """ + Create a table summarizing the requests of a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing the requests. + :rtype: Table + """ + table = Table( + "Benchmark", + "Requests Completed", + "Request Failed", + "Duration", + "Start Time", + "End Time", + title="[magenta]Requests Data by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + start_time_str = ( + datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S") + if benchmark.start_time + else "N/A" + ) + end_time_str = ( + datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S") + if benchmark.end_time + else "N/A" + ) + + table.add_row( + _benchmark_rate_id(benchmark), + f"{benchmark.request_count}/{benchmark.total_count}", + f"{benchmark.error_count}/{benchmark.total_count}", + f"{benchmark.duration:.2f} sec", + f"{start_time_str}", + f"{end_time_str}", + ) + logger.debug("Created requests summary table for the report.") + return table + + +def _create_benchmark_report_data_tokens_summary( + report: TextGenerationBenchmarkReport, +) -> Table: + """ + Create a table summarizing data tokens of a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing the data tokens. + :rtype: Table + """ + table = Table( + "Benchmark", + "Prompt", + "Prompt (1%, 5%, 50%, 95%, 99%)", + "Output", + "Output (1%, 5%, 50%, 95%, 99%)", + title="[magenta]Tokens Data by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + table.add_row( + _benchmark_rate_id(benchmark), + f"{benchmark.prompt_token_distribution.mean:.2f}", + ", ".join( + f"{percentile:.1f}" + for percentile in benchmark.prompt_token_distribution.percentiles( + [1, 5, 50, 95, 99] + ) + ), + f"{benchmark.output_token_distribution.mean:.2f}", + ", ".join( + f"{percentile:.1f}" + for percentile in benchmark.output_token_distribution.percentiles( + [1, 5, 50, 95, 99] + ) + ), + ) + logger.debug("Created data tokens summary table for the report.") + return table + + +def _create_benchmark_report_dist_perf_summary( + report: TextGenerationBenchmarkReport, +) -> Table: + """ + Create a table summarizing distribution performance of a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing the performance statistics. + :rtype: Table + """ + table = Table( + "Benchmark", + "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)", + "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", + "Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)", + title="[magenta]Performance Stats by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + table.add_row( + _benchmark_rate_id(benchmark), + ", ".join( + f"{percentile:.2f}" + for percentile in benchmark.request_latency_distribution.percentiles( + [1, 5, 10, 50, 90, 95, 99] + ) + ), + ", ".join( + f"{percentile * 1000:.1f}" + for percentile in benchmark.ttft_distribution.percentiles( + [1, 5, 10, 50, 90, 95, 99] + ) + ), + ", ".join( + f"{percentile * 1000:.1f}" + for percentile in benchmark.itl_distribution.percentiles( + [1, 5, 10, 50, 90, 95, 99] + ) + ), + ) + logger.debug("Created distribution performance summary table for the report.") + return table + + +def _create_benchmark_report_summary(report: TextGenerationBenchmarkReport) -> Table: + """ + Create a summary table for a benchmark report. + + :param report: The benchmark report to summarize. + :type report: TextGenerationBenchmarkReport + :return: A rich Table object summarizing overall performance. + :rtype: Table + """ + table = Table( + "Benchmark", + "Requests per Second", + "Request Latency", + "Time to First Token", + "Inter Token Latency", + "Output Token Throughput", + title="[magenta]Performance Summary by Benchmark[/magenta]", + title_style="bold", + title_justify="left", + show_header=True, + ) + + for benchmark in report.benchmarks_sorted: + table.add_row( + _benchmark_rate_id(benchmark), + f"{benchmark.completed_request_rate:.2f} req/sec", + f"{benchmark.request_latency:.2f} sec", + f"{benchmark.time_to_first_token:.2f} ms", + f"{benchmark.inter_token_latency:.2f} ms", + f"{benchmark.output_token_throughput:.2f} tokens/sec", + ) + logger.debug("Created overall performance summary table for the report.") + return table class GuidanceReport(Serializable): """ A class to manage the guidance reports that include the benchmarking details, potentially across multiple runs, for saving and loading from disk. + + :param benchmarks: The list of benchmarking reports. + :type benchmarks: List[TextGenerationBenchmarkReport] """ benchmarks: List[TextGenerationBenchmarkReport] = Field( default_factory=list, description="The list of benchmark reports." ) + + def print( + self, save_path: Optional[str] = None, continual_refresh: bool = False + ) -> None: + """ + Print the guidance report to the console. + + :param save_path: Optional path to save the report to disk. + :type save_path: Optional[str] + :param continual_refresh: Whether to continually refresh the report. + :type continual_refresh: bool + :return: None + """ + logger.info("Printing guidance report to console with save_path={}", save_path) + report_viz = Panel( + Group( + *[ + Panel( + Group( + _create_benchmark_report_details(benchmark), + "", + _create_benchmark_report_requests_summary(benchmark), + "", + _create_benchmark_report_data_tokens_summary(benchmark), + "", + _create_benchmark_report_dist_perf_summary(benchmark), + "", + _create_benchmark_report_summary(benchmark), + ), + title=( + f"[bold magenta]Benchmark Report " + f"{index + 1}[/bold magenta]" + ), + expand=True, + title_align="left", + ) + for index, benchmark in enumerate(self.benchmarks) + ], + ), + title=( + "[bold cyan]GuideLLM Benchmarks Report[/bold cyan] [italic]" + f"({save_path})[/italic]" + ), + expand=True, + title_align="left", + ) + console = Console() + + if continual_refresh: + logger.info("Starting live report with continual refresh.") + with Live(report_viz, refresh_per_second=1, console=console) as live: + while True: + live.update(report_viz) + time.sleep(1) + else: + console.print(report_viz) + + logger.info("Guidance report printing completed.") diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py index 83cfaca..133d12e 100644 --- a/src/guidellm/core/request.py +++ b/src/guidellm/core/request.py @@ -20,7 +20,7 @@ class TextGenerationRequest(Serializable): default=None, description="The number of tokens in the input prompt.", ) - generate_token_count: Optional[int] = Field( + output_token_count: Optional[int] = Field( default=None, description="The number of tokens to generate.", ) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 0743f0a..f218784 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -1,5 +1,5 @@ from time import time -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from loguru import logger from pydantic import Field @@ -9,11 +9,11 @@ from guidellm.core.serializable import Serializable __all__ = [ - "TextGenerationResult", - "TextGenerationError", + "RequestConcurrencyMeasurement", "TextGenerationBenchmark", "TextGenerationBenchmarkReport", - "RequestConcurrencyMeasurement", + "TextGenerationError", + "TextGenerationResult", ] @@ -98,10 +98,15 @@ def output_token(self, token: str): :param token: The decoded token. :type token: str """ - current_counter = time() + self._check_recording_started() - if not self.last_time: - raise ValueError("Last time is not specified to get the output token.") + if self.last_time is None: + raise ValueError( + "last time is not specified. " + "Did you call `text_generation_benchmark.start()`?" + ) + + current_counter = time() if not self.first_token_set: self.first_token_time = current_counter - self.last_time @@ -113,7 +118,7 @@ def output_token(self, token: str): logger.debug(f"Token '{token}' decoded in {decode_time} seconds") self.last_time = current_counter - self.output += f"{token} " + self.output += token logger.debug("Added token {} to output", token) def end( @@ -134,6 +139,7 @@ def end( defaults to word count. :type output_token_count: Optional[int] """ + self._check_recording_started() self.end_time = time() if output: @@ -147,26 +153,13 @@ def end( def _check_recording_started( self, - raise_exception: bool = True, # noqa: FBT001, FBT002 - ) -> bool: - """ - Ensure that the benchmark text generation recording is started. - - We can assume that if the `self._start_time` exist, - then the `start()` has been called. - """ - - if self.start_time is not None: - return True - - if raise_exception is True: + ): + if self.start_time is None: raise ValueError( "start time is not specified. " "Did you make the `text_generation_benchmark.start()`?", ) - return False - class TextGenerationError(Serializable): """ @@ -181,9 +174,6 @@ class TextGenerationError(Serializable): description="The error message that occurred during text generation.", ) - def model_post_init(self, _: Any): - logger.error(f"Text generation error occurred: {self.message}") - class RequestConcurrencyMeasurement(Serializable): """ @@ -198,12 +188,14 @@ class RequestConcurrencyMeasurement(Serializable): class TextGenerationBenchmark(Serializable): """ - A class to represent a benchmark of text generation requests + A class to represent a report of text generation requests (results and errors) for generative AI workloads. This is a set of results and errors for a specific mode and rate. """ - mode: str = Field(description="The generation mode, either 'async' or 'sync'.") + mode: Literal["asynchronous", "synchronous", "throughput"] = Field( + description="The generation mode, one of 'async', 'sync', or 'throughput'." + ) rate: Optional[float] = Field( default=None, description="The requested rate of requests per second.", @@ -249,6 +241,55 @@ def error_count(self) -> int: """ return len(self.errors) + @property + def total_count(self) -> int: + """ + Get the total number of requests in the result. + + :return: The total number of requests. + :rtype: int + """ + return self.request_count + self.error_count + + @property + def start_time(self) -> Optional[float]: + """ + Get the start time of the first request in the result. + + :return: The start time of the first request. + :rtype: Optional[float] + """ + if not self.results: + return None + + return self.results[0].start_time + + @property + def end_time(self) -> Optional[float]: + """ + Get the end time of the last request in the result. + + :return: The end time of the last request. + :rtype: Optional[float] + """ + if not self.results: + return None + + return self.results[-1].end_time + + @property + def duration(self) -> float: + """ + Get the duration of the result in seconds. + + :return: The duration of the result. + :rtype: float + """ + if not self.results or not self.start_time or not self.end_time: + return 0.0 + + return self.end_time - self.start_time + @property def completed_request_rate(self) -> float: """ @@ -257,44 +298,161 @@ def completed_request_rate(self) -> float: :return: The rate of requests per second. :rtype: float """ + if not self.results or not self.duration: + return 0.0 + + return len(self.results) / self.duration + + @property + def request_latency(self) -> float: + """ + Get the average request latency in seconds. + + :return: The average request latency in seconds. + :rtype: float + """ if not self.results: return 0.0 - if not self.results[0].start_time or not self.results[-1].end_time: - raise ValueError("Start time and End time are not defined") + return self.request_latency_distribution.mean + + @property + def request_latency_distribution(self) -> Distribution: + """ + Get the distribution of request latencies. - return self.request_count / ( - self.results[-1].end_time - self.results[0].start_time + :return: The distribution of request latencies. + :rtype: Distribution + """ + return Distribution( + data=[ + result.end_time - result.start_time + for result in self.results + if result.end_time is not None and result.start_time is not None + ] ) @property - def overloaded(self) -> bool: - if not self.results or not self.concurrencies: - raise ValueError("No results or concurrencies to check for overload.") + def time_to_first_token(self) -> float: + """ + Get the time taken to decode the first token in milliseconds. + + :return: The time taken to decode the first token in milliseconds. + :rtype: float + """ + if not self.results: + return 0.0 - if self.rate is None or len(self.concurrencies) < 2: # noqa: PLR2004 + return 1000 * self.ttft_distribution.mean + + @property + def ttft_distribution(self) -> Distribution: + """ + Get the distribution of time taken to decode the first token. + + :return: The distribution of time taken to decode the first token. + :rtype: Distribution + """ + return Distribution( + data=[ + result.first_token_time + for result in self.results + if result.first_token_time is not None + ] + ) + + @property + def inter_token_latency(self) -> float: + """ + Get the average time between tokens in milliseconds. + + :return: The average time between tokens. + :rtype: float + """ + if not self.results: + return 0.0 + + return 1000 * self.itl_distribution.mean + + @property + def itl_distribution(self) -> Distribution: + """ + Get the distribution of time between tokens. + + :return: The distribution of time between tokens. + :rtype: Distribution + """ + return Distribution( + data=[ + decode for result in self.results for decode in result.decode_times.data + ] + ) + + @property + def output_token_throughput(self) -> float: + """ + Get the average token throughput in tokens per second. + + :return: The average token throughput. + :rtype: float + """ + if not self.results or not self.duration: + return 0.0 + + total_tokens = sum(result.output_token_count for result in self.results) + + return total_tokens / self.duration + + @property + def prompt_token_distribution(self) -> Distribution: + """ + Get the distribution of prompt token counts. + + :return: The distribution of prompt token counts. + :rtype: Distribution + """ + return Distribution(data=[result.prompt_token_count for result in self.results]) + + @property + def output_token_distribution(self) -> Distribution: + """ + Get the distribution of output token counts. + + :return: The distribution of output token counts. + :rtype: Distribution + """ + return Distribution(data=[result.output_token_count for result in self.results]) + + @property + def overloaded(self) -> bool: + if ( + self.rate is None + or not self.results + or not self.concurrencies + or len(self.concurrencies) < 2 # noqa: PLR2004 + ): # if rate was not set, sync mode is assumed, # or we have less than 2 data points, # then we cannot be overloaded by definition return False - # if the calculated rate is less than 60% of the requested rate, + # if the calculated rate is less than 75% of the requested rate, # safe to assume the system is overloaded - return self.completed_request_rate < 0.60 * self.rate + return self.completed_request_rate < 0.75 * self.rate def request_started(self): """ Record the start of a generation request. """ if not self.concurrencies: - self.concurrencies.append( + self.concurrencies = [ RequestConcurrencyMeasurement( time=time(), completed=0, errored=0, processing=1, ), - ) + ] else: last = self.concurrencies[-1] self.concurrencies.append( @@ -318,33 +476,34 @@ def request_completed( :param result: The completed result or error. :type result: Union[TextGenerationResult, TextGenerationError] """ + if not self.concurrencies: + raise ValueError("Request completed without starting") + if isinstance(result, TextGenerationError): + is_error = True self.errors.append(result) - last = self.concurrencies[-1] - self.concurrencies.append( - RequestConcurrencyMeasurement( - time=time(), - completed=last.completed, - errored=last.errored + 1, - processing=last.processing - 1, - ), - ) - logger.warning( - f"Text generation request resulted in error: {result.message}", + logger.info( + "Text generation request resulted in error: {}", + result.message, ) else: + if not result.start_time or not result.end_time: + raise ValueError("Start time and End time are not defined") + + is_error = False self.results.append(result) - last = self.concurrencies[-1] - self.concurrencies.append( - RequestConcurrencyMeasurement( - time=time(), - completed=last.completed + 1, - errored=last.errored, - processing=last.processing - 1, - ), - ) logger.info("Text generation request completed successfully: {}", result) + last = self.concurrencies[-1] + self.concurrencies.append( + RequestConcurrencyMeasurement( + time=time(), + completed=last.completed + (not is_error), + errored=last.errored + is_error, + processing=last.processing - 1, + ) + ) + class TextGenerationBenchmarkReport(Serializable): """ @@ -357,8 +516,8 @@ class TextGenerationBenchmarkReport(Serializable): default_factory=list, description="The benchmarks of text generation requests.", ) - args: List[Dict[str, Any]] = Field( - default_factory=list, + args: Dict[str, Any] = Field( + default_factory=dict, description="The arguments used for the benchmarks.", ) diff --git a/src/guidellm/core/serializable.py b/src/guidellm/core/serializable.py index 2d81ebb..1e6b294 100644 --- a/src/guidellm/core/serializable.py +++ b/src/guidellm/core/serializable.py @@ -1,6 +1,5 @@ -from enum import Enum from pathlib import Path -from typing import Any, Union +from typing import Any, Literal, Union, get_args import yaml from loguru import logger @@ -9,13 +8,7 @@ __all__ = ["Serializable", "SerializableFileType"] -class SerializableFileType(Enum): - """ - Enum class for file types supported by Serializable. - """ - - YAML = "yaml" - JSON = "json" +SerializableFileType = Literal["yaml", "json"] class Serializable(BaseModel): @@ -86,7 +79,7 @@ def from_json(cls, data: str): def save_file( self, path: Union[str, Path], - type_: SerializableFileType = SerializableFileType.YAML, + type_: SerializableFileType = "yaml", ) -> str: """ Save the model to a file in either YAML or JSON format. @@ -107,28 +100,32 @@ def save_file( if path.suffix: # is a file - ext = path.suffix[1:].upper() - if ext not in SerializableFileType.__members__: + ext = path.suffix[1:].lower() + if type_ not in get_args(SerializableFileType): raise ValueError( - f"Unsupported file extension: {ext}. " - f"Expected one of {', '.join(SerializableFileType.__members__)}) " + f"Unsupported file extension: {type_}. " + f"Expected one of {SerializableFileType} " f"for {path}" ) - type_ = SerializableFileType[ext] + type_ = ext # type: ignore # noqa: PGH003 else: # is a directory - file_name = f"{self.__class__.__name__.lower()}.{type_.value.lower()}" + file_name = f"{self.__class__.__name__.lower()}.{type_}" path = path / file_name path.parent.mkdir(parents=True, exist_ok=True) with path.open("w") as file: - if type_ == SerializableFileType.YAML: + if type_ == "yaml": file.write(self.to_yaml()) - elif type_ == SerializableFileType.JSON: + elif type_ == "json": file.write(self.to_json()) else: - raise ValueError(f"Unsupported file format: {type_}") + raise ValueError( + f"Unsupported file extension: {type_}" + f"Expected one of {SerializableFileType} " + f"for {path}" + ) logger.info("Successfully saved {} to {}", self.__class__.__name__, path) @@ -153,25 +150,20 @@ def load_file(cls, path: Union[str, Path]): if not path.is_file(): raise ValueError(f"Path is not a file: {path}") - extension = path.suffix[1:].upper() - - if extension not in SerializableFileType.__members__: - raise ValueError( - f"Unsupported file extension: {extension}. " - f"Expected one of {', '.join(SerializableFileType.__members__)}) " - f"for {path}" - ) - - type_ = SerializableFileType[extension] + extension = path.suffix[1:].lower() with path.open() as file: data = file.read() - if type_ == SerializableFileType.YAML: + if extension == "yaml": obj = cls.from_yaml(data) - elif type_ == SerializableFileType.JSON: + elif extension == "json": obj = cls.from_json(data) else: - raise ValueError(f"Unsupported file format: {type_}") + raise ValueError( + f"Unsupported file extension: {extension}" + f"Expected one of {SerializableFileType} " + f"for {path}" + ) return obj diff --git a/src/guidellm/executor/__init__.py b/src/guidellm/executor/__init__.py index 7faa2a8..d5858d0 100644 --- a/src/guidellm/executor/__init__.py +++ b/src/guidellm/executor/__init__.py @@ -1,21 +1,10 @@ -from .executor import Executor -from .profile_generator import ( - RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER, - RATE_TYPE_TO_PROFILE_MODE_MAPPER, - FixedRateProfileGenerator, - Profile, - ProfileGenerationMode, - ProfileGenerator, - SweepProfileGenerator, -) +from .base import Executor, ExecutorResult +from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator __all__ = [ - "RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER", - "RATE_TYPE_TO_PROFILE_MODE_MAPPER", "Executor", - "ProfileGenerationMode", + "ExecutorResult", "Profile", + "ProfileGenerationMode", "ProfileGenerator", - "FixedRateProfileGenerator", - "SweepProfileGenerator", ] diff --git a/src/guidellm/executor/base.py b/src/guidellm/executor/base.py new file mode 100644 index 0000000..865ab30 --- /dev/null +++ b/src/guidellm/executor/base.py @@ -0,0 +1,213 @@ +from dataclasses import dataclass +from typing import AsyncGenerator, Optional, Sequence, Union + +from loguru import logger + +from guidellm.backend import Backend +from guidellm.core import TextGenerationBenchmarkReport +from guidellm.executor.profile_generator import ( + Profile, + ProfileGenerationMode, + ProfileGenerator, +) +from guidellm.request import RequestGenerator +from guidellm.scheduler import Scheduler, SchedulerResult + +__all__ = ["Executor", "ExecutorResult"] + + +@dataclass +class ExecutorResult: + """ + Data class representing the result of executing tasks in the Executor. + + :param completed: Indicates whether all tasks have completed. + :type completed: bool + :param count_total: Total number of profiles. + :type count_total: int + :param count_completed: Number of completed profiles. + :type count_completed: int + :param report: A report report for text generation. + :type report: TextGenerationBenchmarkReport + :param scheduler_result: Optional scheduler result for the last task. + :type scheduler_result: Optional[SchedulerResult] + """ + + completed: bool + count_total: int + count_completed: int + generation_modes: Sequence[ProfileGenerationMode] + report: TextGenerationBenchmarkReport + scheduler_result: Optional[SchedulerResult] = None + current_index: Optional[int] = None + current_profile: Optional[Profile] = None + + +class Executor: + """ + The Executor class manages the execution of tasks based on a given profile + generation mode and rate. It orchestrates the interaction between the backend, + request generator, and profile generator, and runs benchmarks accordingly. + + :param backend: The backend to run tasks against. + :type backend: Backend + :param request_generator: The generator that creates requests for execution. + :type request_generator: RequestGenerator + :param mode: The mode for profile generation (e.g., sweep, synchronous). + :type mode: ProfileGenerationMode + :param rate: The list of rates for load generation, or None. + :type rate: Optional[List[float]] + :param max_number: Maximum number of requests to generate for the scheduler + (a single report run), or None. + :type max_number: Optional[int] + :param max_duration: Maximum duration for generating requests for the scheduler, + (a single report run), or None. + :type max_duration: Optional[float] + """ + + def __init__( + self, + backend: Backend, + request_generator: RequestGenerator, + mode: ProfileGenerationMode = "sweep", + rate: Optional[Union[float, Sequence[float]]] = None, + max_number: Optional[int] = None, + max_duration: Optional[float] = None, + ): + self._backend = backend + self._generator = request_generator + self._max_number = max_number + self._max_duration = max_duration + self._profile_generator = ProfileGenerator(mode=mode, rate=rate) + logger.info("Executor initialized with mode: {}, rate: {}", mode, rate) + + @property + def backend(self) -> Backend: + """ + Returns the backend being used by the Executor. + + :return: Backend + :rtype: Backend + """ + return self._backend + + @property + def request_generator(self) -> RequestGenerator: + """ + Returns the request generator used by the Executor. + + :return: RequestGenerator + :rtype: RequestGenerator + """ + return self._generator + + @property + def profile_generator(self) -> ProfileGenerator: + """ + Returns the profile generator for generating profiles during execution. + + :return: ProfileGenerator + :rtype: ProfileGenerator + """ + return self._profile_generator + + @property + def max_number(self) -> Optional[int]: + """ + Returns the maximum number of requests to generate. + + :return: Maximum number of requests or None. + :rtype: Optional[int] + """ + return self._max_number + + @property + def max_duration(self) -> Optional[float]: + """ + Returns the maximum duration for generating requests. + + :return: Maximum duration in seconds or None. + :rtype: Optional[float] + """ + return self._max_duration + + async def run(self) -> AsyncGenerator[ExecutorResult, None]: + """ + Runs the Executor, generating and scheduling tasks based on the profile + generation mode. Yields results incrementally. + + :rtype: AsyncGenerator[ExecutorResult, None] + """ + report = TextGenerationBenchmarkReport() + report.args = { + # backend args + "backend_type": self.backend.type_, + "target": self.backend.target, + "model": self.backend.model, + # data args + "data_type": self.request_generator.type_, + "data": self.request_generator.source, + "tokenizer": self.request_generator.tokenizer.name_or_path, + # rate args + "mode": self.profile_generator.mode, + "rate": self.profile_generator.rates, + # limits args + "max_number": self.max_number, + "max_duration": self.max_duration, + } + profile_index = -1 + logger.info("Starting Executor run") + + yield ExecutorResult( + completed=False, + count_total=len(self.profile_generator), + count_completed=0, + generation_modes=self.profile_generator.profile_generation_modes, + report=report, + ) + + while profile := self.profile_generator.next(report): + logger.debug("Generated profile: {}", profile) + scheduler = Scheduler( + generator=self.request_generator, + worker=self.backend, + mode=profile.load_gen_mode, + rate=profile.load_gen_rate, + max_number=self.max_number or profile.args.get("max_number", None), + max_duration=self.max_duration, + ) + profile_index += 1 + + logger.info( + "Scheduling tasks with mode: {}, rate: {}", + profile.load_gen_mode, + profile.load_gen_rate, + ) + + async for scheduler_result in scheduler.run(): + if scheduler_result.completed: + report.add_benchmark(scheduler_result.benchmark) + logger.debug( + "Benchmark added for scheduler result: {}", + scheduler_result.benchmark, + ) + + yield ExecutorResult( + completed=False, + count_total=len(self.profile_generator), + count_completed=len(report.benchmarks), + generation_modes=self.profile_generator.profile_generation_modes, + report=report, + scheduler_result=scheduler_result, + current_index=profile_index, + current_profile=profile, + ) + + logger.info("Executor run completed") + yield ExecutorResult( + completed=True, + count_total=len(self.profile_generator), + count_completed=len(report.benchmarks), + generation_modes=self.profile_generator.profile_generation_modes, + report=report, + ) diff --git a/src/guidellm/executor/executor.py b/src/guidellm/executor/executor.py deleted file mode 100644 index e2b14c3..0000000 --- a/src/guidellm/executor/executor.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import Any, Dict, Optional - -from guidellm.backend import Backend -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.request import RequestGenerator -from guidellm.scheduler import Scheduler - -from .profile_generator import ProfileGenerationMode, ProfileGenerator - -__all__ = ["Executor"] - - -class Executor: - """ - The main purpose of the `class Executor` is to dispatch running tasks according - to the Profile Generation mode - """ - - def __init__( - self, - backend: Backend, - request_generator: RequestGenerator, - profile_mode: ProfileGenerationMode = ProfileGenerationMode.SWEEP, - profile_args: Optional[Dict[str, Any]] = None, - max_requests: Optional[int] = None, - max_duration: Optional[float] = None, - ): - self.request_generator = request_generator - self.backend = backend - self.profile_generator: ProfileGenerator = ProfileGenerator.create( - profile_mode, - **(profile_args or {}), - ) - self.max_requests: Optional[int] = max_requests - self.max_duration: Optional[float] = max_duration - self._scheduler: Optional[Scheduler] = None - - @property - def scheduler(self) -> Scheduler: - if self._scheduler is None: - raise ValueError("The scheduler is not set. Did you run the execution?") - - return self._scheduler - - def run(self) -> TextGenerationBenchmarkReport: - report = TextGenerationBenchmarkReport() - - while True: - if not (profile := self.profile_generator.next(report)): - break - - scheduler = Scheduler( - request_generator=self.request_generator, - backend=self.backend, - load_gen_mode=profile.load_gen_mode, - load_gen_rate=profile.load_gen_rate, - max_requests=self.max_requests, - max_duration=self.max_duration, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - report.add_benchmark(benchmark) - - return report diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py index 275237a..703ea05 100644 --- a/src/guidellm/executor/profile_generator.py +++ b/src/guidellm/executor/profile_generator.py @@ -1,178 +1,343 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from enum import Enum -from typing import Dict, List, Optional, Type, Union +from typing import Any, Dict, Literal, Optional, Sequence, Union, get_args import numpy as np +from loguru import logger +from pydantic import Field -from guidellm.core import TextGenerationBenchmarkReport +from guidellm.config import settings +from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport +from guidellm.core.serializable import Serializable from guidellm.scheduler import LoadGenerationMode __all__ = [ - "ProfileGenerationMode", "Profile", + "ProfileGenerationMode", "ProfileGenerator", - "FixedRateProfileGenerator", - "SweepProfileGenerator", ] -RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER = { - "synchronous": LoadGenerationMode.SYNCHRONOUS, - "constant": LoadGenerationMode.CONSTANT, - "poisson": LoadGenerationMode.POISSON, -} - - -class ProfileGenerationMode(Enum): - FIXED_RATE = "fixed_rate" - SWEEP = "sweep" +ProfileGenerationMode = Literal[ + "sweep", "synchronous", "throughput", "constant", "poisson" +] -RATE_TYPE_TO_PROFILE_MODE_MAPPER = { - "synchronous": ProfileGenerationMode.FIXED_RATE, - "constant": ProfileGenerationMode.FIXED_RATE, - "poisson": ProfileGenerationMode.FIXED_RATE, - "sweep": ProfileGenerationMode.SWEEP, -} +class Profile(Serializable): + """ + A data class representing a profile for load generation. + :param load_gen_mode: The mode of load generation (e.g., constant, poisson). + :type load_gen_mode: LoadGenerationMode + :param load_gen_rate: The rate of load generation, if applicable. + :type load_gen_rate: Optional[float] + :param args: Additional arguments for the profile. + :type args: Optional[Dict[str, Any]] + """ -@dataclass -class Profile: load_gen_mode: LoadGenerationMode - load_gen_rate: Optional[float] - - -class ProfileGenerator(ABC): - _registry: Dict[ProfileGenerationMode, "Type[ProfileGenerator]"] = {} - - @staticmethod - def register(mode: ProfileGenerationMode): - def inner_wrapper(wrapped_class): - ProfileGenerator._registry[mode] = wrapped_class - return wrapped_class - - return inner_wrapper - - @staticmethod - def create(mode: ProfileGenerationMode, **kwargs) -> "ProfileGenerator": - if mode not in ProfileGenerator._registry: - raise ValueError(f"Invalid profile generation mode: {mode}") + load_gen_rate: Optional[float] = None + args: Dict[str, Any] = Field(default_factory=dict) - return ProfileGenerator._registry[mode](**kwargs) - def __init__(self, mode: Union[str, ProfileGenerationMode]): - self._mode = ProfileGenerationMode(mode) - - @abstractmethod - def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: - """ """ +class ProfileGenerator: + """ + Generates profiles based on different load generation modes. + :param mode: The mode for profile generation (e.g., sweep, synchronous). + :type mode: ProfileGenerationMode + :param rate: The rate(s) for load generation; could be a float or list of floats. + :type rate: Optional[Union[float, Sequence[float]]] + """ -@ProfileGenerator.register(ProfileGenerationMode.FIXED_RATE) -class FixedRateProfileGenerator(ProfileGenerator): def __init__( self, - load_gen_mode: Optional[LoadGenerationMode], - rates: Optional[List[float]] = None, - **kwargs, # noqa: RET505, ARG002 + mode: ProfileGenerationMode, + rate: Optional[Union[float, Sequence[float]]] = None, ): - super().__init__(ProfileGenerationMode.FIXED_RATE) - if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0: - raise ValueError("custom rates are not supported in synchronous mode") - - self._rates: Optional[List[float]] = rates - self._load_gen_mode = load_gen_mode - self._generated: bool = False - self._rate_index: int = 0 - - def next(self, _: TextGenerationBenchmarkReport) -> Optional[Profile]: - if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - if self._generated: - return None - self._generated = True - return Profile( - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, + if mode not in get_args(ProfileGenerationMode): + err = ValueError( + f"{mode} is not a valid Profile Generation Mode. " + f"Valid options are {get_args(ProfileGenerationMode)}" ) - - if self._load_gen_mode in { - LoadGenerationMode.CONSTANT, - LoadGenerationMode.POISSON, - }: - if not self._rates: - raise ValueError( - "rates must be provided for constant and poisson modes" - ) - - if self._rate_index >= len(self._rates): - return None - current_rate = self._rates[self._rate_index] - self._rate_index += 1 - return Profile( - load_gen_mode=self._load_gen_mode, - load_gen_rate=current_rate, + logger.error(err) + raise err + + self._mode = mode + + if self._mode in ("sweep", "throughput", "synchronous"): + if rate is not None: + err = ValueError(f"Rates are not applicable for {self._mode} mode") + logger.error(err) + raise err + self._rates = None + else: + if not rate: + err = ValueError(f"Rates are required for {self._mode} mode") + logger.error(err) + raise err + self._rates = rate if isinstance(rate, Sequence) else [rate] + + for rt in self._rates: + if rt <= 0: + err = ValueError( + f"Rate must be > 0 for mode: {self._mode}. Given: {rt}" + ) + logger.error(err) + raise err + + self._generated_count = 0 + + def __len__(self) -> int: + """ + Returns the number of profiles to generate based on the mode and rates. + + :return: The number of profiles. + :rtype: int + """ + if self._mode == "sweep": + return settings.num_sweep_profiles + 2 + + if self._mode in ("throughput", "synchronous"): + return 1 + + if not self._rates: + raise ValueError(f"Rates are required for {self._mode} mode") + + return len(self._rates) + + @property + def mode(self) -> ProfileGenerationMode: + """ + Returns the current mode of profile generation. + + :return: The profile generation mode. + :rtype: ProfileGenerationMode + """ + return self._mode + + @property + def rates(self) -> Optional[Sequence[float]]: + """ + Returns the list of rates for load generation, if any. + + :return: Sequence of rates or None if not applicable. + :rtype: Optional[Sequence[float]] + """ + return self._rates + + @property + def generated_count(self) -> int: + """ + Returns the current count of generated profiles. + + :return: The current count of generated profiles. + :rtype: int + """ + return self._generated_count + + @property + def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]: + """ + Return the list of profile modes to be run in the report. + + :return: Sequence of profile modes to be run in the report. + :rtype: Sequence[ProfileGenerationMode] + """ + if self._mode == "sweep": + return ["synchronous", "throughput"] + ["constant"] * ( # type: ignore # noqa: PGH003 + settings.num_sweep_profiles ) - raise ValueError(f"Invalid rate type: {self._load_gen_mode}") + if self._mode in ["throughput", "synchronous"]: + return [self._mode] + if self._rates is None: + raise ValueError(f"Rates are required for {self._mode} mode") -@ProfileGenerator.register(ProfileGenerationMode.SWEEP) -class SweepProfileGenerator(ProfileGenerator): - def __init__( - self, - **kwargs, # noqa: RET505, ARG002 - ): - super().__init__(ProfileGenerationMode.SWEEP) - self._sync_run = False - self._max_found = False - self._pending_rates = None + if self._mode in ["constant", "poisson"]: + return [self._mode] * len(self._rates) - def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: - if not self._sync_run: - self._sync_run = True + raise ValueError(f"Invalid mode: {self._mode}") - return Profile( - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, + def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: + """ + Generates the next profile based on the current mode and report. + + :param current_report: The current report report. + :type current_report: TextGenerationBenchmarkReport + :return: The generated profile or None if no more profiles. + :rtype: Optional[Profile] + """ + logger.debug( + "Generating the next profile with mode: {}, current report: {}", + self.mode, + current_report, + ) + + if self.mode in ["constant", "poisson"]: + if not self.rates: + err = ValueError(f"Rates are required for {self.mode} mode") + logger.error(err) + raise err + + profile = self.create_fixed_rate_profile( + self.generated_count, + self.mode, + self.rates, ) - - if not self._max_found: - # check if we've found the maximum rate based on the last result - # if not, double the rate; if so, set the flag to fill in missing data - last_benchmark = current_report.benchmarks[-1] - - if not last_benchmark.overloaded: - last_rate = ( - last_benchmark.rate - if last_benchmark.rate - else last_benchmark.completed_request_rate - ) - return Profile( - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=last_rate * 2, - ) - - self._max_found = True - first_benchmark = current_report.benchmarks[0] - - min_rate = ( - first_benchmark.rate - if first_benchmark.rate - else first_benchmark.completed_request_rate + elif self.mode == "synchronous": + profile = self.create_synchronous_profile(self.generated_count) + elif self.mode == "throughput": + profile = self.create_throughput_profile(self.generated_count) + elif self.mode == "sweep": + profile = self.create_sweep_profile( + self.generated_count, + sync_benchmark=current_report.benchmarks[0] + if current_report.benchmarks + else None, + throughput_benchmark=current_report.benchmarks[1] + if len(current_report.benchmarks) > 1 + else None, ) - max_rate = ( - last_benchmark.rate - if last_benchmark.rate - else last_benchmark.completed_request_rate + else: + err = ValueError(f"Invalid mode: {self.mode}") + logger.error(err) + raise err + + self._generated_count += 1 + logger.info( + "Generated profile: {}, total generated count: {}", + profile, + self._generated_count, + ) + return profile + + @staticmethod + def create_fixed_rate_profile( + index: int, mode: ProfileGenerationMode, rates: Sequence[float] + ) -> Optional[Profile]: + """ + Creates a profile with a fixed rate. + + :param index: The index of the rate in the list. + :type index: int + :param mode: The mode for profile generation (e.g., constant, poisson). + :type mode: ProfileGenerationMode + :param rates: The list of rates for load generation. + :type rates: Sequence[float] + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + modes_map: Dict[str, LoadGenerationMode] = { + "constant": "constant", + "poisson": "poisson", + } + + if mode not in modes_map: + err = ValueError(f"Invalid mode: {mode}") + logger.error(err) + raise err + + profile = ( + Profile( + load_gen_mode=modes_map[mode], + load_gen_rate=rates[index], ) + if index < len(rates) + else None + ) + logger.debug("Created fixed rate profile: {}", profile) + return profile - self._pending_rates = list(np.linspace(min_rate, max_rate, 10)) + @staticmethod + def create_synchronous_profile(index: int) -> Optional[Profile]: + """ + Creates a profile with synchronous mode. + + :param index: The index of the profile to create. + :type index: int + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + profile = ( + Profile( + load_gen_mode="synchronous", + load_gen_rate=None, + ) + if index < 1 + else None + ) + logger.debug("Created synchronous profile: {}", profile) + return profile - if self._pending_rates: - rate = self._pending_rates.pop(0) - return Profile( - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=rate, + @staticmethod + def create_throughput_profile(index: int) -> Optional[Profile]: + """ + Creates a profile with throughput mode. + + :param index: The index of the profile to create. + :type index: int + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + profile = ( + Profile( + load_gen_mode="throughput", + load_gen_rate=None, ) + if index < 1 + else None + ) + logger.debug("Created throughput profile: {}", profile) + return profile - return None + @staticmethod + def create_sweep_profile( + index: int, + sync_benchmark: Optional[TextGenerationBenchmark], + throughput_benchmark: Optional[TextGenerationBenchmark], + ) -> Optional[Profile]: + """ + Creates a profile with sweep mode, generating profiles between + synchronous and throughput benchmarks. + + :param index: The index of the profile to create. + :type index: int + :param sync_benchmark: The synchronous report data. + :type sync_benchmark: Optional[TextGenerationBenchmark] + :param throughput_benchmark: The throughput report data. + :type throughput_benchmark: Optional[TextGenerationBenchmark] + :return: The generated profile or None if index is out of range. + :rtype: Optional[Profile] + """ + if index < 0 or index >= settings.num_sweep_profiles + 2: + return None + + if index == 0: + return ProfileGenerator.create_synchronous_profile(0) + + if not sync_benchmark: + err = ValueError("Synchronous report is required for sweep mode") + logger.error(err) + raise err + + if index == 1: + throughput_profile: Profile = ProfileGenerator.create_throughput_profile(0) # type: ignore # noqa: PGH003 + # set the max number of requests to 5 times the number of requests + # incase it is not set for the sweep to limit the number of requests + throughput_profile.args = {"max_number": sync_benchmark.request_count * 5} + return throughput_profile + + if not throughput_benchmark: + err = ValueError("Throughput report is required for sweep mode") + logger.error(err) + raise err + + min_rate = sync_benchmark.completed_request_rate + max_rate = throughput_benchmark.completed_request_rate + intermediate_rates = list( + np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1) + )[1:] + + return Profile( + load_gen_mode="constant", + load_gen_rate=intermediate_rates[index - 2], + ) diff --git a/src/guidellm/logger.py b/src/guidellm/logger.py index fb79f9f..f26966c 100644 --- a/src/guidellm/logger.py +++ b/src/guidellm/logger.py @@ -38,8 +38,7 @@ from loguru import logger -from guidellm.config import settings -from guidellm.config.base import LoggingSettings +from guidellm.config import LoggingSettings, settings __all__ = ["configure_logger", "logger"] diff --git a/src/guidellm/main.py b/src/guidellm/main.py index 72085c1..ddb9a4c 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,13 +1,12 @@ +import asyncio +from typing import get_args + import click from loguru import logger from guidellm.backend import Backend -from guidellm.core import GuidanceReport -from guidellm.executor import ( - RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER, - RATE_TYPE_TO_PROFILE_MODE_MAPPER, - Executor, -) +from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport +from guidellm.executor import Executor, ProfileGenerationMode from guidellm.logger import configure_logger from guidellm.request import ( EmulatedRequestGenerator, @@ -15,6 +14,7 @@ TransformersDatasetRequestGenerator, ) from guidellm.request.base import RequestGenerator +from guidellm.utils import BenchmarkReportProgress @click.command() @@ -28,7 +28,7 @@ @click.option("--port", type=str, default=None, help="Port for benchmarking") @click.option( "--backend", - type=click.Choice(["test", "openai_server"]), + type=click.Choice(["openai_server"]), default="openai_server", help="Backend type for benchmarking", ) @@ -48,8 +48,8 @@ ) @click.option( "--rate-type", - type=click.Choice(["sweep", "synchronous", "constant", "poisson"]), - default="synchronous", + type=click.Choice(get_args(ProfileGenerationMode)), + default="sweep", help="Type of rate generation for benchmarking", ) @click.option( @@ -75,7 +75,7 @@ "--output-path", type=str, default="benchmark_report.json", - help="Path to save benchmark report to", + help="Path to save report report to", ) def main( target, @@ -94,7 +94,7 @@ def main( output_path, ): # Create backend - _backend = Backend.create( + backend = Backend.create( backend_type=backend, target=target, host=host, @@ -113,7 +113,7 @@ def main( config=data, tokenizer=tokenizer ) elif data_type == "file": - request_generator = FileRequestGenerator(file_path=data, tokenizer=tokenizer) + request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer) elif data_type == "transformers": request_generator = TransformersDatasetRequestGenerator( dataset=data, tokenizer=tokenizer @@ -121,32 +121,69 @@ def main( else: raise ValueError(f"Unknown data type: {data_type}") - profile_mode = RATE_TYPE_TO_PROFILE_MODE_MAPPER.get(rate_type) - load_gen_mode = RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER.get(rate_type) - - if not profile_mode or not load_gen_mode: - raise ValueError("Invalid rate type") - - # Create executor executor = Executor( + backend=backend, request_generator=request_generator, - backend=_backend, - profile_mode=profile_mode, - profile_args={"load_gen_mode": load_gen_mode, "rates": rate}, - max_requests=max_requests, + mode=rate_type, + rate=rate if rate_type in ("constant", "poisson") else None, + max_number=max_requests, max_duration=max_seconds, ) - - logger.debug("Running the executor") - report = executor.run() + logger.debug( + "Running executor with args: {}", + { + "backend": backend, + "request_generator": request_generator, + "mode": rate_type, + "rate": rate, + "max_number": max_requests, + "max_duration": max_seconds, + }, + ) + report = asyncio.run(_run_executor_for_result(executor)) # Save or print results guidance_report = GuidanceReport() guidance_report.benchmarks.append(report) guidance_report.save_file(output_path) + guidance_report.print(output_path, continual_refresh=True) + + +async def _run_executor_for_result(executor: Executor) -> TextGenerationBenchmarkReport: + report = None + progress = BenchmarkReportProgress() + started = False + + async for result in executor.run(): + if not started: + progress.start(result.generation_modes) # type: ignore # noqa: PGH003 + started = True + + if result.current_index is not None: + description = f"{result.current_profile.load_gen_mode}" # type: ignore # noqa: PGH003 + if result.current_profile.load_gen_mode in ("constant", "poisson"): # type: ignore # noqa: PGH003 + description += f"@{result.current_profile.load_gen_rate:.2f} req/s" # type: ignore # noqa: PGH003 + + progress.update_benchmark( + index=result.current_index, + description=description, + completed=result.scheduler_result.completed, # type: ignore # noqa: PGH003 + completed_count=result.scheduler_result.count_completed, # type: ignore # noqa: PGH003 + completed_total=result.scheduler_result.count_total, # type: ignore # noqa: PGH003 + start_time=result.scheduler_result.benchmark.start_time, # type: ignore # noqa: PGH003 + req_per_sec=result.scheduler_result.benchmark.completed_request_rate, # type: ignore # noqa: PGH003 + ) + + if result.completed: + report = result.report + break + + progress.finish() + + if not report: + raise ValueError("No report generated by executor") - print("Guidance Report Complete:") # noqa: T201 - print(guidance_report) # noqa: T201 + return report if __name__ == "__main__": diff --git a/src/guidellm/request/__init__.py b/src/guidellm/request/__init__.py index ae46041..4feca91 100644 --- a/src/guidellm/request/__init__.py +++ b/src/guidellm/request/__init__.py @@ -1,12 +1,13 @@ -from .base import RequestGenerator +from .base import GenerationMode, RequestGenerator from .emulated import EmulatedConfig, EmulatedRequestGenerator from .file import FileRequestGenerator from .transformers import TransformersDatasetRequestGenerator __all__ = [ - "RequestGenerator", "EmulatedConfig", "EmulatedRequestGenerator", "FileRequestGenerator", + "GenerationMode", + "RequestGenerator", "TransformersDatasetRequestGenerator", ] diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py index 48e90ff..52935b7 100644 --- a/src/guidellm/request/base.py +++ b/src/guidellm/request/base.py @@ -3,48 +3,69 @@ import time from abc import ABC, abstractmethod from queue import Empty, Full, Queue -from typing import Iterator, Optional, Union +from typing import Iterator, Literal, Optional, Union from loguru import logger -from transformers import AutoTokenizer, PreTrainedTokenizer +from transformers import ( # type: ignore # noqa: PGH003 + AutoTokenizer, + PreTrainedTokenizer, +) +from guidellm.config import settings from guidellm.core.request import TextGenerationRequest +__all__ = ["GenerationMode", "RequestGenerator"] + + +GenerationMode = Literal["async", "sync"] + class RequestGenerator(ABC): """ A base class for request generators that generate result requests. + :param type_: The type of the request generator. + :type type_: str + :param source: The data source for the request generator. + :type source: str :param tokenizer: The tokenizer instance or the name/config to use for tokenizing prompts. :type tokenizer: Union[str, PreTrainedTokenizer] :param mode: The generation mode, either 'async' or 'sync'. - :type mode: str + :type mode: GenerationMode :param async_queue_size: The size of the request queue. :type async_queue_size: int """ def __init__( self, + type_: str, + source: str, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, ): + self._type = type_ + self._source = source self._async_queue_size: int = async_queue_size self._mode: str = mode self._queue: Queue = Queue(maxsize=async_queue_size) self._stop_event: threading.Event = threading.Event() - if tokenizer is not None: + if not tokenizer: + self._tokenizer = AutoTokenizer.from_pretrained( + settings.dataset.default_tokenizer + ) + logger.info("Initialized fake tokenizer for request generation") + else: self._tokenizer = ( AutoTokenizer.from_pretrained(tokenizer) if isinstance(tokenizer, str) else tokenizer ) - logger.info("Tokenizer initialized: {}", self._tokenizer) - else: - self._tokenizer = None - logger.debug("No tokenizer provided") + logger.info( + "Tokenizer initialized for request generation: {}", self._tokenizer + ) if self._mode == "async": self._thread = threading.Thread(target=self._populate_queue, daemon=True) @@ -82,18 +103,39 @@ def __iter__(self) -> Iterator[TextGenerationRequest]: self._queue.task_done() yield item except Empty: + time.sleep(0.01) continue else: while not self._stop_event.is_set(): yield self.create_item() @property - def tokenizer(self) -> Optional[PreTrainedTokenizer]: + def type_(self) -> str: + """ + Get the type of the request generator. + + :return: The type of the request generator. + :rtype: str + """ + return self._type + + @property + def source(self) -> str: + """ + Get the data source for the request generator. + + :return: The data source. + :rtype: str + """ + return self._source + + @property + def tokenizer(self) -> PreTrainedTokenizer: """ Get the tokenizer instance. :return: The tokenizer instance. - :rtype: Optional[PreTrainedTokenizer] + :rtype: PreTrainedTokenizer """ return self._tokenizer diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index 24bb27e..c85e9d7 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -1,28 +1,96 @@ import json -import re -import unicodedata +import math from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Tuple, Union import numpy as np -import requests from loguru import logger -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 from guidellm.config import settings from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator +from guidellm.request.base import GenerationMode, RequestGenerator +from guidellm.utils import clean_text, filter_text, load_text, split_text -__all__ = ["EmulatedConfig", "EmulatedRequestGenerator"] +__all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"] @dataclass class EmulatedConfig: """ - A dataclass to represent the configuration for emulated requests. + Configuration for emulated text generation requests. + + Args: + prompt_tokens (int): Number of prompt tokens. + prompt_tokens_variance (Optional[int]): Variance for prompt tokens. + prompt_tokens_min (Optional[int]): Minimum number of prompt tokens. + prompt_tokens_max (Optional[int]): Maximum number of prompt tokens. + generated_tokens (Optional[int]): Number of generated tokens. + generated_tokens_variance (Optional[int]): Variance for generated tokens. + generated_tokens_min (Optional[int]): Minimum number of generated tokens. + generated_tokens_max (Optional[int]): Maximum number of generated tokens. """ + @staticmethod + def create_config(config: Union[str, Path, Dict]) -> "EmulatedConfig": + """ + Create an EmulatedConfig instance from a configuration source. + + :param config: Configuration source, can be a dictionary, JSON string, + key=value string, or file path. + :type config: Union[str, Path, Dict] + :return: An instance of EmulatedConfig. + :rtype: EmulatedConfig + :raises FileNotFoundError: If the configuration file is not found. + :raises ValueError: If the configuration format is invalid. + """ + if isinstance(config, dict): + logger.debug("Loading configuration from dict: {}", config) + return EmulatedConfig(**config) + + if isinstance(config, Path) or ( + isinstance(config, str) and (config.endswith(".json") or "{" in config) + ): + logger.debug("Loading configuration from json: {}", config) + + if isinstance(config, str) and "{" in config: + json_text = config.strip() + else: + if isinstance(config, str): + config = Path(config) + + if not config.exists(): + raise FileNotFoundError(f"Configuration file not found: {config}") + + json_text = config.read_text(encoding="utf-8") + + json_dict = json.loads(json_text) + + return EmulatedConfig(**json_dict) + + if isinstance(config, str) and "=" in config: + logger.debug("Loading configuration from csv string: {}", config) + items = config.split(",") + config_dict = {} + for item in items: + key_value = item.strip().split("=") + if len(key_value) != 2: # noqa: PLR2004 + raise ValueError(f"Unexpected format for item: {item}") + key = key_value[0].strip() + value = ( + int(key_value[1].strip()) + if key_value[1].isnumeric() + else key_value[1] + ) + config_dict[key] = value + + return EmulatedConfig(**config_dict) # type: ignore # noqa: PGH003 + + raise ValueError( + f"Invalid configuration given for creation of EmulatedConfig: {config}" + ) + prompt_tokens: int prompt_tokens_variance: Optional[int] = None prompt_tokens_min: Optional[int] = None @@ -33,177 +101,287 @@ class EmulatedConfig: generated_tokens_min: Optional[int] = None generated_tokens_max: Optional[int] = None + @property + def prompt_tokens_range(self) -> Tuple[int, int]: + """ + Get the range (min, max) of prompt tokens to generate. + + :return: The range of prompt tokens. + :rtype: Tuple[int, int] + """ + return self._token_range( + self.prompt_tokens, + self.prompt_tokens_variance, + self.prompt_tokens_min, + self.prompt_tokens_max, + ) + + @property + def output_tokens_range(self) -> Tuple[int, int]: + """ + Get the range (min, max) of output tokens to generate. + + :return: The range of generated tokens. + :rtype: Tuple[int, int] + """ + if not self.generated_tokens: + return 0, 0 + + return self._token_range( + self.generated_tokens, + self.generated_tokens_variance, + self.generated_tokens_min, + self.generated_tokens_max, + ) + + def sample_prompt_tokens(self, rng: np.random.Generator) -> int: + """ + Sample the number of prompt tokens to generate. + + :param rng: The random number generator to use. + :type rng: np.random.Generator + :return: The number of prompt tokens to create. + :rtype: int + """ + return self._sample_tokens( + self.prompt_tokens, + self.prompt_tokens_variance, + self.prompt_tokens_min, + self.prompt_tokens_max, + rng, + ) + + def sample_output_tokens(self, rng: np.random.Generator) -> Optional[int]: + """ + Sample the number of output tokens to generate. + + :param rng: The random number generator to use. + :type rng: np.random.Generator + :return: The number of output tokens to generate. + :rtype: Optional[int] + """ + if not self.generated_tokens: + return None + + return self._sample_tokens( + self.generated_tokens, + self.generated_tokens_variance, + self.generated_tokens_min, + self.generated_tokens_max, + rng, + ) + + @staticmethod + def _sample_tokens( + base: int, + variance: Optional[int], + min_tokens: Optional[int], + max_tokens: Optional[int], + rng: np.random.Generator, + ) -> int: + min_tokens, max_tokens = EmulatedConfig._token_range( + base, variance, min_tokens, max_tokens + ) + + if min_tokens == max_tokens: + return min_tokens + + if not variance: + return rng.integers(min_tokens, max_tokens + 1) + + rand = rng.normal(base, math.sqrt(variance)) + + return int(min(max(rand, min_tokens), max_tokens)) + + @staticmethod + def _token_range( + base: int, + variance: Optional[int], + min_tokens: Optional[int], + max_tokens: Optional[int], + ) -> Tuple[int, int]: + if not variance: + return ( + min_tokens or base, + max_tokens or base, + ) + + min_tokens = min_tokens if min_tokens and min_tokens > 0 else 1 + max_tokens = ( + max_tokens if max_tokens and max_tokens > base else base + 5 * variance + ) + + return min_tokens, max_tokens + + +class EndlessTokens(List[str]): + """ + A list subclass that allows for endless data generation. + """ + + def __init__( + self, + data: Union[str, Path], + filter_start: Optional[Union[str, int]] = None, + filter_end: Optional[Union[str, int]] = None, + clean_text_args: Optional[Dict[str, bool]] = None, + ): + """ + Initialize EndlessDataWords with data. + + :param data: Source text data. + :type data: str + """ + logger.debug("Loading data from: {}", data) + data = load_text(data) + data = filter_text(data, filter_start, filter_end) + data = ( + clean_text(data) + if not clean_text_args + else clean_text(data, **clean_text_args) + ) + self._tokens, self._token_separators, self._line_indices = split_text(data) + + super().__init__(self._tokens) + + @property + def line_indices(self) -> List[int]: + """ + Get the list of start indices for lines. + + :return: List of start indices. + :rtype: List[int] + """ + return self._line_indices + + def create_text(self, start: int, length: int) -> str: + """ + Create a text snippet from the specified range. + + :param start: Start index. + :type start: int + :param length: Length of the snippet. + :type length: int + :return: Text snippet. + :rtype: str + """ + start = start % len(self) + text = "" + buff_token_sep = "" + + for counter in range(length): + index = (start + counter) % len(self) + text += buff_token_sep + self[index] + buff_token_sep = self._token_separators[index] + + return text + class EmulatedRequestGenerator(RequestGenerator): """ A request generator that generates emulated requests based on a configuration. - :param config: The configuration string or file. - :type config: Union[str, Dict] + :param config: The configuration string, file path, or dictionary. + :type config: Union[str, Dict, Path] :param random_seed: The random seed to use for generating requests. :type random_seed: Optional[int] :param tokenizer: The tokenizer instance or the name/config to use for tokenizing prompts. - :type tokenizer: Union[str, PreTrainedTokenizer] + :type tokenizer: Optional[Union[str, PreTrainedTokenizer]] :param mode: The generation mode, either 'async' or 'sync'. - :type mode: str + :type mode: GenerationMode :param async_queue_size: The size of the request queue. :type async_queue_size: int """ def __init__( self, - config: Union[str, Dict], + config: Union[str, Path, Dict], random_seed: Optional[int] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, ): - super().__init__(tokenizer, mode, async_queue_size) - self._config = self._load_config(config) - self._data = self._load_emulated_data() + """ + Initialize EmulatedRequestGenerator with configuration and tokenizer. + + :param config: Configuration source, can be a dictionary, + JSON string, or file path. + :type config: Union[str, Path, Dict] + :param random_seed: Optional seed for random number generator. + :type random_seed: Optional[int] + :param tokenizer: Tokenizer instance or configuration for tokenizing prompts. + :type tokenizer: Optional[Union[str, PreTrainedTokenizer]] + :param mode: Mode of request generation, either 'async' or 'sync'. + :type mode: str + :param async_queue_size: Size of the asynchronous queue. + :type async_queue_size: int + """ + self._config = EmulatedConfig.create_config(config) + self._tokens = EndlessTokens( + settings.emulated_data.source, + settings.emulated_data.filter_start, + settings.emulated_data.filter_end, + ) self._rng = np.random.default_rng(random_seed) + # NOTE: Must be after all the parameters since the queue population + # function requires attributes above + super().__init__( + type_="emulated", + source=str(config), + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) + def create_item(self) -> TextGenerationRequest: """ - Create a new result request item from the data. + Create a new text generation request item from the data. - :return: A new result request. + :return: A new text generation request. :rtype: TextGenerationRequest """ - prompt, prompt_token_count = self._sample_prompt() - generated_token_count = self._sample_generated() - - request = TextGenerationRequest( + logger.debug("Creating new text generation request") + target_prompt_token_count = self._config.sample_prompt_tokens(self._rng) + prompt = self.sample_prompt(target_prompt_token_count) + prompt_token_count = len(self.tokenizer.tokenize(prompt)) + output_token_count = self._config.sample_output_tokens(self._rng) + logger.debug("Generated prompt: {}", prompt) + + return TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, + output_token_count=output_token_count, ) - if generated_token_count: - request.params["generated_tokens"] = generated_token_count - - return request - - def _load_config(self, config: Union[str, Dict]) -> EmulatedConfig: - # load the config file from a dict, string (json or csv), or file path - if isinstance(config, dict): - config_dict = config - logger.info("Loaded configuration from dict: {}", config) - elif isinstance(config, str) and config.endswith(".json"): - with Path(config).open(encoding="utf-8") as file: - config_dict = json.load(file) - - logger.info("Loaded configuration from file: {}", config) - elif isinstance(config, str) and (config.index("{") > -1): - config_dict = json.loads(config.strip()) - logger.info("Loaded configuration from string: {}", config) - elif isinstance(config, str) and (config.index(",") > -1): - items = config.split(",") - config_dict = {} - for item in items: - key_value = item.split("=") - if len(key_value) != 2: # noqa: PLR2004 - raise ValueError(f"Unexpected format for item: {item}") - key, value = key_value - config_dict[key] = value - logger.info("Loaded configuration from csv string: {}", config) - else: - raise ValueError( - f"Invalid configuration given for EmulatedRequestGenerator: {config}" - ) - - # map the config to the EmulatedConfig dataclass - return EmulatedConfig(**config_dict or {}) - - def _load_emulated_data(self) -> List[str]: - url = "https://www.gutenberg.org/files/1342/1342-0.txt" - logger.info(f"Downloading text corpus from {url}") - response = requests.get(url, timeout=settings.request_timeout) - response.raise_for_status() - - content = response.text - start = content.index( - "It is a truth universally acknowledged, that a single man in possession" - ) - end = content.index("CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO.") - content = content[start:end] - - cleaned_content = ( - content.replace("\r\n", " ").replace("\r", " ").replace("\n", " ") - ) - cleaned_content = unicodedata.normalize("NFKD", cleaned_content) - cleaned_content = re.sub(r"\s+", " ", cleaned_content).strip() - - # break lines according to punctuation - lines_text = ( - cleaned_content.replace(". ", ".\n") - .replace("! ", "!\n") - .replace("? ", "?\n") - ) - lines: List[str] = lines_text.split("\n") - - return [line.strip() for line in lines if line.strip()] - - def _token_count(self, text: str) -> int: - return ( - len(self.tokenizer.tokenize(text)) if self.tokenizer else len(text.split()) - ) - - def _sample_prompt(self) -> Tuple[str, int]: - prompt_token_count = self._sample_tokens( - self._config.prompt_tokens, - self._config.prompt_tokens_variance, - self._config.prompt_tokens_min, - self._config.prompt_tokens_max, - ) - - prompt = self._data[self._rng.integers(0, len(self._data))] + def sample_prompt(self, tokens: int) -> str: + """ + Sample a prompt with the specified number of tokens. - while self._token_count(prompt) < prompt_token_count: - prompt += " " + self._data[self._rng.integers(0, len(self._data))] + :param tokens: Number of tokens for the prompt. + :type tokens: int + :return: Sampled prompt text. + :rtype: str + """ + start_line_index = self._rng.integers(0, len(self._tokens.line_indices)) - # truncate the prompt to the desired token count - words = prompt.split() + # binary search to find the proper number of tokens for the prompt + # this is because tokenizers differ in tokenization behavior left = 0 - right = len(words) + right = left + 5 * tokens + while left < right: mid = (left + right) // 2 - if self._token_count(" ".join(words[:mid])) < prompt_token_count: + prompt = self._tokens.create_text(start_line_index, mid) + token_count = len(self.tokenizer.tokenize(prompt)) + + if token_count == tokens: + return prompt + + if token_count < tokens: left = mid + 1 else: right = mid - prompt = " ".join(words[:left]) - - return prompt, prompt_token_count - def _sample_generated(self) -> Optional[int]: - if not self._config.generated_tokens: - return None - - return self._sample_tokens( - self._config.generated_tokens, - self._config.generated_tokens_variance, - self._config.generated_tokens_min, - self._config.generated_tokens_max, - ) - - def _sample_tokens( - self, - base: int, - variance: Optional[int], - min_tokens: Optional[int], - max_tokens: Optional[int], - ) -> int: - variance = variance or 0 - min_tokens = max(1, min_tokens or 1) - max_tokens = max( - min_tokens, max_tokens or base + 5 * variance if variance else 10000 - ) - - return max( - min( - base + self._rng.integers(-variance, variance + 1), - max_tokens, - ), - min_tokens, - ) + return self._tokens.create_text(start_line_index, left) diff --git a/src/guidellm/request/file.py b/src/guidellm/request/file.py index 9d46fbe..8460bd2 100644 --- a/src/guidellm/request/file.py +++ b/src/guidellm/request/file.py @@ -1,14 +1,13 @@ -import csv -import json from pathlib import Path -from typing import List, Optional, Union +from typing import Optional, Union from loguru import logger -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 +from guidellm.config import settings from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator -from guidellm.utils import PREFERRED_DATA_COLUMNS +from guidellm.request.base import GenerationMode, RequestGenerator +from guidellm.utils import load_text_lines __all__ = ["FileRequestGenerator"] @@ -17,8 +16,8 @@ class FileRequestGenerator(RequestGenerator): """ A request generator implementation for files. - :param file_path: The path to the file containing the data. - :type file_path: str + :param path: The path to the file containing the data. + :type path: Union[str, Path] :param tokenizer: The tokenizer instance or the name/config to use for tokenizing prompts. :type tokenizer: Union[str, PreTrainedTokenizer] @@ -30,16 +29,28 @@ class FileRequestGenerator(RequestGenerator): def __init__( self, - file_path: str, + path: Union[str, Path], tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, ): - super().__init__(tokenizer, mode, async_queue_size) - self._file_path = file_path - self._data = self._load_file() + self._path = path + self._data = load_text_lines( + path, + filters=settings.dataset.preferred_data_columns, + ) self._iterator = iter(self._data) + # NOTE: Must be after all the parameters since the queue population + # function requires attributes above + super().__init__( + type_="file", + source=str(path), + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) + def create_item(self) -> TextGenerationRequest: """ Create a new result request item from the data. @@ -47,89 +58,16 @@ def create_item(self) -> TextGenerationRequest: :return: A new result request. :rtype: TextGenerationRequest """ + logger.debug("Creating new request item from file data") + try: data = next(self._iterator) except StopIteration: self._iterator = iter(self._data) data = next(self._iterator) - token_count = ( - self.tokenizer(data)["input_ids"].shape[0] if self.tokenizer else None - ) + token_count = len(self.tokenizer.tokenize(data)) request = TextGenerationRequest(prompt=data, prompt_token_count=token_count) - logger.debug(f"Created new TextGenerationRequest: {request}") + logger.debug("Created new TextGenerationRequest: {}", request) return request - - def _load_file(self) -> List[str]: - if self._file_path.endswith(".txt"): - data = self._load_text_file() - elif self._file_path.endswith(".csv"): - data = self._load_csv_file() - elif self._file_path.endswith(".jsonl"): - data = self._load_jsonl_file() - elif self._file_path.endswith(".json"): - data = self._load_json_file() - else: - raise ValueError("Unsupported file type") - - return [line.strip() for line in data if line and line.strip()] - - def _load_text_file(self) -> List[str]: - with Path(self._file_path).open(encoding="utf-8") as file: - return file.readlines() - - def _load_csv_file(self) -> List[str]: - data = [] - with Path(self._file_path).open(encoding="utf-8") as file: - reader = csv.DictReader(file) - columns = reader.fieldnames - if not columns: - raise ValueError("Invalid empty value for columns") - for row in reader: - # convert the row to a dictionary - obj = {col: row[col] for col in columns} - data.append(obj) - - return self._extract_prompts(data) - - def _load_jsonl_file(self) -> List[str]: - data = [] - with Path(self._file_path).open(encoding="utf-8") as file: - for line in file: - obj = json.loads(line) - data.append(obj) - - return self._extract_prompts(data) - - def _load_json_file(self) -> List[str]: - with Path(self._file_path).open(encoding="utf-8") as file: - obj = json.load(file) - data = None - - if isinstance(obj, list): - data = obj - elif isinstance(obj, dict): - for value in obj.values(): - if isinstance(value, list): - data = value - break - - if data is None: - raise ValueError( - f"Unsupported JSON structure, " - f"expected a list or a dictionary with a list. Given: {obj}", - ) - - return self._extract_prompts(data) - - def _extract_prompts(self, objects: List[dict]) -> List[str]: - data = [] - for obj in objects: - for col in PREFERRED_DATA_COLUMNS: - if col in obj: - data.append(obj[col]) - break - else: - data.append(next(iter(obj.values()))) - return data diff --git a/src/guidellm/request/transformers.py b/src/guidellm/request/transformers.py index 814937c..eaab862 100644 --- a/src/guidellm/request/transformers.py +++ b/src/guidellm/request/transformers.py @@ -1,18 +1,21 @@ +from pathlib import Path from typing import Optional, Union -from datasets import ( +from datasets import ( # type: ignore # noqa: PGH003 Dataset, DatasetDict, IterableDataset, IterableDatasetDict, - load_dataset, ) from loguru import logger -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator -from guidellm.utils import PREFERRED_DATA_COLUMNS, PREFERRED_DATA_SPLITS +from guidellm.request.base import GenerationMode, RequestGenerator +from guidellm.utils import ( + load_transformers_dataset, + resolve_transformers_dataset_column, +) __all__ = ["TransformersDatasetRequestGenerator"] @@ -39,11 +42,13 @@ class TransformersDatasetRequestGenerator(RequestGenerator): def __init__( self, - dataset: str, + dataset: Union[ + str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset + ], split: Optional[str] = None, column: Optional[str] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: str = "async", + mode: GenerationMode = "async", async_queue_size: int = 50, **kwargs, ): @@ -51,12 +56,22 @@ def __init__( self._split = split self._column = column self._kwargs = kwargs - self._hf_dataset = self._load_dataset() - self._iterator = iter(self._hf_dataset) + + self._hf_dataset = load_transformers_dataset(dataset, split=split, **kwargs) + self._hf_column = resolve_transformers_dataset_column( + self._hf_dataset, column=column + ) + self._hf_dataset_iterator = iter(self._hf_dataset) # NOTE: Must be after all the parameters since the queue population # function requires attributes above - super().__init__(tokenizer, mode, async_queue_size) + super().__init__( + type_="transformers_dataset", + source=str(dataset), + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) def create_item(self) -> TextGenerationRequest: """ @@ -66,12 +81,16 @@ def create_item(self) -> TextGenerationRequest: :rtype: TextGenerationRequest """ - data = next(self._iterator) + logger.debug("Creating new request item from dataset") - prompt = data[self._column] if self._column in data else str(data) - token_count = ( - self._tokenizer(prompt)["input_ids"].shape[0] if self._tokenizer else None - ) + try: + data = next(self._hf_dataset_iterator) + except StopIteration: + self._hf_dataset_iterator = iter(self._hf_dataset) + data = next(self._hf_dataset_iterator) + + prompt = data[self._hf_column] + token_count = len(self.tokenizer.tokenize(prompt)) request = TextGenerationRequest( prompt=prompt, prompt_token_count=token_count, @@ -79,76 +98,3 @@ def create_item(self) -> TextGenerationRequest: logger.debug(f"Created new TextGenerationRequest: {request}") return request - - def _load_dataset(self) -> Dataset: - dataset = self._load_hf_dataset() - - if isinstance(dataset, (DatasetDict, IterableDatasetDict)): - split = self._load_data_split(dataset) - - if split not in dataset: - raise ValueError(f"Split '{split}' not found in dataset") - - dataset = dataset[split] - else: - self._split = str(dataset.split) if dataset else None - - column = self._load_data_column(dataset) - - if column not in dataset.column_names: - raise ValueError(f"Column '{column}' not found in dataset") - - logger.info( - f"Loaded dataset {self._dataset} with split: {self._split} " - f"and column: {self._column}", - ) - - return dataset - - def _load_hf_dataset( - self, - ) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: - if self._dataset.endswith(".csv") or self._dataset.endswith(".json"): - logger.debug(f"Loading dataset from local path: {self._dataset}") - extension = self._dataset.split(".")[-1] - - return load_dataset(extension, data_files=self._dataset, **self._kwargs) - - if self._dataset.endswith(".py"): - logger.debug(f"Loading dataset from local script: {self._dataset}") - - return load_dataset(self._dataset, **self._kwargs) - - logger.debug(f"Loading dataset: {self._dataset}") - - return load_dataset(self._dataset, **self._kwargs) - - def _load_data_split(self, dataset: Union[DatasetDict, IterableDatasetDict]) -> str: - if self._split: - return self._split - - for split in PREFERRED_DATA_SPLITS: - if split in dataset: - self._split = split - break - if self._split is None: - self._split = list(dataset)[0] - - logger.info(f"Inferred split to use: {self._split}") - - return self._split - - def _load_data_column(self, dataset: Union[Dataset, IterableDataset]) -> str: - if self._column: - return self._column - - for col in PREFERRED_DATA_COLUMNS: - if col in dataset.column_names: - self._column = col - break - if self._column is None: - self._column = list(dataset.column_names)[0] - - logger.info(f"Inferred column to use for prompts: {self._column}") - - return self._column diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py index 4b425ab..b3b4ac5 100644 --- a/src/guidellm/scheduler/__init__.py +++ b/src/guidellm/scheduler/__init__.py @@ -1,4 +1,4 @@ +from .base import Scheduler, SchedulerResult from .load_generator import LoadGenerationMode, LoadGenerator -from .scheduler import Scheduler -__all__ = ["LoadGenerationMode", "LoadGenerator", "Scheduler"] +__all__ = ["LoadGenerationMode", "LoadGenerator", "Scheduler", "SchedulerResult"] diff --git a/src/guidellm/scheduler/base.py b/src/guidellm/scheduler/base.py new file mode 100644 index 0000000..602166b --- /dev/null +++ b/src/guidellm/scheduler/base.py @@ -0,0 +1,374 @@ +import asyncio +import math +import time +from dataclasses import dataclass +from typing import AsyncGenerator, Literal, Optional, Union, get_args + +from loguru import logger + +from guidellm.backend import Backend +from guidellm.config import settings +from guidellm.core import ( + TextGenerationBenchmark, + TextGenerationError, + TextGenerationRequest, + TextGenerationResult, +) +from guidellm.request import RequestGenerator +from guidellm.scheduler.load_generator import LoadGenerationMode, LoadGenerator + +__all__ = ["Scheduler", "SchedulerResult"] + + +@dataclass +class SchedulerResult: + """ + Represents the result of a single task execution within the Scheduler. + + :param completed: Indicates if the task is completed. + :type completed: bool + :param count_total: Total number of tasks to be executed. + :type count_total: int + :param count_completed: Number of tasks that have been completed so far. + :type count_completed: int + :param report: Benchmark data for the task execution. + :type benchmark: TextGenerationBenchmark + :param current_result: The result of the current request, if any. + :type current_result: Optional[Union[TextGenerationResult, Exception]] + """ + + completed: bool + count_total: int + count_completed: int + benchmark: TextGenerationBenchmark + current_result: Optional[Union[TextGenerationResult, TextGenerationError]] = None + + +class Scheduler: + """ + Schedules and manages the execution of tasks for text generation requests. + + :param generator: The request generator that produces text generation requests. + :type generator: RequestGenerator + :param worker: The backend worker that processes the requests. + :type worker: Backend + :param mode: The mode of load generation (e.g., synchronous, asynchronous). + :type mode: LoadGenerationMode + :param rate: The rate at which requests are generated, if applicable. + :type rate: Optional[float] + :param max_number: Maximum number of requests to be processed. + :type max_number: Optional[int] + :param max_duration: Maximum duration in seconds for which requests + should be processed. + :type max_duration: Optional[float] + + :raises ValueError: If neither max_number nor max_duration is specified or + if they are not positive. + """ + + def __init__( + self, + generator: RequestGenerator, + worker: Backend, + mode: LoadGenerationMode = "synchronous", + rate: Optional[float] = None, + max_number: Optional[int] = None, + max_duration: Optional[float] = None, + ): + logger.info( + "Scheduler initialized with params: generator={}, worker={}, mode={}, " + "rate={}, max_number={}, max_duration={}", + generator, + worker, + mode, + rate, + max_number, + max_duration, + ) + + if mode not in get_args(LoadGenerationMode): + err = ValueError( + f"{mode} is not a valid Load Generation Mode. " + f"Valid options are {get_args(LoadGenerationMode)}" + ) + logger.error(err) + raise err + + if not max_number and not max_duration: + err = ValueError("Either max_number or max_duration must be specified") + logger.error(err) + raise err + + if max_number and max_number <= 0: + err = ValueError(f"max_number must be > 0, given: {max_number}") + logger.error(err) + raise err + + if max_duration and max_duration <= 0: + err = ValueError(f"max_duration must be > 0, given: {max_duration}") + logger.error(err) + raise err + + if mode in ["constant", "poisson"] and not rate: + err = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}") + logger.error(err) + raise err + + self._generator = generator + self._worker = worker + self._mode = mode + self._rate = rate + self._max_number = max_number + self._max_duration = max_duration + + self._load_generator = LoadGenerator(mode, rate) + + @property + def generator(self) -> RequestGenerator: + """ + The request generator that produces text generation requests. + + :return: The request generator instance. + :rtype: RequestGenerator + """ + return self._generator + + @property + def worker(self) -> Backend: + """ + The backend worker that processes the requests. + + :return: The backend worker instance. + :rtype: Backend + """ + return self._worker + + @property + def mode(self) -> LoadGenerationMode: + """ + The mode of load generation (e.g., synchronous, asynchronous). + + :return: The load generation mode. + :rtype: LoadGenerationMode + """ + return self._mode + + @property + def rate(self) -> Optional[float]: + """ + The rate at which requests are generated, if applicable. + + :return: The rate of request generation. + :rtype: Optional[float] + """ + return self._rate + + @property + def max_number(self) -> Optional[int]: + """ + Maximum number of requests to be processed. + + :return: The maximum number of requests. + :rtype: Optional[int] + """ + return self._max_number + + @property + def max_duration(self) -> Optional[float]: + """ + Maximum duration in seconds for which requests should be processed. + + :return: The maximum duration in seconds. + :rtype: Optional[float] + """ + return self._max_duration + + @property + def load_generator(self) -> LoadGenerator: + """ + The load generator responsible for generating load based on mode and rate. + + :return: The load generator instance. + :rtype: LoadGenerator + """ + return self._load_generator + + @property + def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]: + """ + The report mode for the scheduler. + + :return: The report mode. + :rtype: Literal["asynchronous", "synchronous", "throughput"] + """ + if self._mode == "synchronous": + return "synchronous" + + if self._mode == "throughput": + return "throughput" + + return "asynchronous" + + async def run(self) -> AsyncGenerator[SchedulerResult, None]: + """ + Run the scheduler to process requests based on the configured mode, rate, + maximum number, and maximum duration. + + :yield: The result of each task executed by the scheduler. + :rtype: Generator[SchedulerResult, None, None] + """ + logger.info("Starting Scheduler run") + + benchmark = TextGenerationBenchmark(mode=self.benchmark_mode, rate=self.rate) + start_time = time.time() + end_time = start_time + self.max_duration if self.max_duration else math.inf + max_number = float(self.max_number) if self.max_number else math.inf + runner = self._run_sync if self._mode == "synchronous" else self._run_async + count_total = ( + self.max_number + if self.max_number + else round(self.max_duration) + if self.max_duration + else 0 + ) + + # yield initial result for progress tracking + yield SchedulerResult( + completed=False, + count_total=count_total, + count_completed=0, + benchmark=benchmark, + ) + + run_count = 0 + async for res in runner(benchmark, end_time, max_number): + run_count += 1 + count_completed = ( + min(run_count, self.max_number) + if self.max_number + else round(time.time() - start_time) + if self.max_duration + else 0 + ) + + yield SchedulerResult( + completed=False, + count_total=count_total, + count_completed=count_completed, + benchmark=benchmark, + current_result=res, + ) + + logger.info("Scheduler run completed") + + yield SchedulerResult( + completed=True, + count_total=count_total, + count_completed=( + benchmark.request_count + benchmark.error_count + if self.max_number + else round(time.time() - start_time) + if self.max_duration + else 0 + ), + benchmark=benchmark, + ) + + async def _run_sync( + self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float + ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: + for index, (request, submit_at) in enumerate( + zip(self.generator, self.load_generator.times()) + ): + if index >= max_number or time.time() >= end_time: + break + + logger.debug( + "Running synchronous request={} at submit_at={}", + request, + submit_at, + ) + benchmark.request_started() + result = await self._submit_task_coroutine(request, submit_at, end_time) + if result is not None: + benchmark.request_completed(result) + logger.debug("Request completed with output: {}", result) + yield result + + async def _run_async( + self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float + ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: + tasks = [] + completed = 0 + + for index, (request, submit_at) in enumerate( + zip(self.generator, self.load_generator.times()) + ): + while (index + 1 - completed) >= settings.max_concurrency: + await asyncio.sleep(0.1) + + if index >= max_number or time.time() >= end_time or submit_at >= end_time: + break + + logger.debug( + "Running asynchronous request={} at submit_at={}", + request, + submit_at, + ) + + def _completed(_task: asyncio.Task) -> None: + nonlocal completed + completed += 1 + _res = _task.result() + + if _res: + benchmark.request_completed(_res) + logger.debug("Request completed: {}", _res) + + benchmark.request_started() + task = asyncio.create_task( + self._submit_task_coroutine(request, submit_at, end_time) + ) + task.add_done_callback(_completed) + tasks.append(task) + + # release control to the event loop for other tasks + await asyncio.sleep(0.001) + + for compl_task in asyncio.as_completed(tasks): + task_res = await compl_task + if task_res is not None: + yield task_res + + async def _submit_task_coroutine( + self, request: TextGenerationRequest, submit_at: float, end_time: float + ) -> Optional[Union[TextGenerationResult, TextGenerationError]]: + try: + if submit_at > end_time: + logger.info( + "Request {} submission time {} is greater than end time {}", + request, + submit_at, + end_time, + ) + raise asyncio.TimeoutError( + f"Request submission time {submit_at} " + f"is greater than end time {end_time}" + ) + + if submit_at > time.time(): + await asyncio.sleep(submit_at - time.time()) + + timeout = ( + end_time - time.time() if end_time and end_time < math.inf else None + ) + + return await asyncio.wait_for(self._worker.submit(request), timeout=timeout) + except asyncio.TimeoutError as exc: + logger.info("Request {} timed out: {}", request, exc) + + return None + except Exception as exc: # noqa: BLE001 + logger.warning("Request {} failed: {}", request, exc) + + return TextGenerationError(request=request, message=str(exc)) diff --git a/src/guidellm/scheduler/load_generator.py b/src/guidellm/scheduler/load_generator.py index a363761..f629752 100644 --- a/src/guidellm/scheduler/load_generator.py +++ b/src/guidellm/scheduler/load_generator.py @@ -1,61 +1,196 @@ import time -from enum import Enum -from typing import Generator +from typing import Generator, Literal, Optional, get_args import numpy as np +from loguru import logger __all__ = ["LoadGenerationMode", "LoadGenerator"] +LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"] -class LoadGenerationMode(str, Enum): - """ - Available values: - * SYNCHRONOUS - * CONSTANT (async) - * POISSON (async) +class LoadGenerator: """ + Load Generator class that generates timestamps for load generation. - SYNCHRONOUS = "synchronous" - CONSTANT = "constant" - POISSON = "poisson" + This class supports multiple load generation modes: "constant", "poisson", + "throughput", and "synchronous". Each mode has its own method for generating + timestamps based on the rate provided during initialization. + :param mode: The mode of load generation. Valid options are "constant", + "poisson", "throughput", and "synchronous". + :type mode: LoadGenerationMode + :param rate: The rate at which to generate timestamps. This value is + interpreted differently depending on the mode. + :type rate: float -class LoadGenerator: - def __init__(self, mode: LoadGenerationMode, rate: float): - if mode == LoadGenerationMode.SYNCHRONOUS: - raise ValueError("Synchronous mode not supported by LoadGenerator") + :raises ValueError: If an invalid mode is provided. + """ + + def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None): + """ + Initialize the Load Generator with the mode and rate. + + :param mode: The mode of load generation ("constant", "poisson", "throughput", + or "synchronous"). + :type mode: LoadGenerationMode + :param rate: The rate at which to generate timestamps. In the "constant" + mode, this represents the frequency of events. In the "poisson" mode, + it represents the average frequency. + :type rate: Optional[float] + """ + if mode not in get_args(LoadGenerationMode): + error = ValueError( + f"{mode} is not a valid Load Generation Mode. " + f"Valid options are {get_args(LoadGenerationMode)}" + ) + logger.error(error) + raise error + + if mode not in ["synchronous", "throughput"] and (rate is None or rate <= 0): + error = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}") + logger.error(error) + raise error self._mode = mode self._rate = rate + logger.debug( + "Initialized LoadGenerator with mode: {mode}, rate: {rate}", + mode=mode, + rate=rate, + ) + + @property + def mode(self) -> LoadGenerationMode: + """ + Get the mode of load generation. + + :return: The mode of load generation. + :rtype: LoadGenerationMode + """ + return self._mode + + @property + def rate(self) -> Optional[float]: + """ + Get the rate of load generation. + + :return: The rate of load generation. + :rtype: Optional[float] + """ + return self._rate def times(self) -> Generator[float, None, None]: - if self._mode == LoadGenerationMode.SYNCHRONOUS: - raise ValueError("Synchronous mode not supported by LoadGenerator") + """ + Generate timestamps for load generation based on the selected mode. + + :return: A generator that yields timestamps at which each load + should be initiated. + :rtype: Generator[float, None, None] + + :raises ValueError: If the mode is invalid. + """ + logger.debug(f"Generating timestamps using mode: {self._mode}") + + if self._mode == "throughput": + yield from self.throughput_times() + elif self._mode == "constant": + yield from self.constant_times() + elif self._mode == "poisson": + yield from self.poisson_times() + elif self._mode == "synchronous": + yield from self.synchronous_times() + else: + logger.error(f"Invalid mode encountered: {self._mode}") + raise ValueError(f"Invalid mode: {self._mode}") - elif self._mode == LoadGenerationMode.CONSTANT: - yield from self._constant_times() + def synchronous_times(self) -> Generator[float, None, None]: + """ + Generate invalid timestamps for the "synchronous" mode. - elif self._mode == LoadGenerationMode.POISSON: - yield from self._poisson_times() - else: - raise NotImplementedError( - f"{self._mode} is not supported Load Generation Mode", + :return: A generator that yields a constant invalid timestamp (-1.0). + :rtype: Generator[float, None, None] + """ + logger.debug("Generating invalid timestamps for synchronous mode") + while True: + yield -1.0 + + def throughput_times(self) -> Generator[float, None, None]: + """ + Generate timestamps at the maximum rate possible, returning the current time. + + :return: A generator that yields the current time in seconds. + :rtype: Generator[float, None, None] + """ + logger.debug("Generating timestamps at throughput rate") + while True: + yield time.time() + + def constant_times(self) -> Generator[float, None, None]: + """ + Generate timestamps at a constant rate based on the specified rate. + + :return: A generator that yields timestamps incremented by 1/rate seconds. + :rtype: Generator[float, None, None] + """ + logger.debug("Generating constant rate timestamps with rate: {}", self._rate) + + if self._rate is None or self._rate == 0: + raise ValueError( + "Rate must be > 0 for constant mode, given: {}", self._rate ) - def _constant_times(self) -> Generator[float, None, None]: start_time = time.time() time_increment = 1.0 / self._rate counter = 0 while True: - yield start_time + time_increment * counter + yield_time = start_time + time_increment * counter + logger.debug(f"Yielding timestamp: {yield_time}") + yield yield_time counter += 1 - def _poisson_times(self) -> Generator[float, None, None]: + def poisson_times(self) -> Generator[float, None, None]: + """ + Generate timestamps based on a Poisson process, where the number + of requests to be sent per second is drawn from a Poisson distribution. + The inter arrival time between requests is exponentially distributed. + + :return: A generator that yields timestamps based on a Poisson distribution. + :rtype: Generator[float, None, None] + """ + logger.debug("Generating Poisson rate timestamps with rate: {}", self._rate) + + if self._rate is None or self._rate == 0: + raise ValueError("Rate must be > 0 for poisson mode, given: {}", self._rate) + time_tracker = time.time() rng = np.random.default_rng() + time_increment = 1.0 while True: - yield time_tracker - time_tracker += rng.poisson(1.0 / self._rate) + num_requests = rng.poisson(self._rate) + + if num_requests == 0: + yield time_tracker + time_increment + else: + inter_arrival_times = rng.exponential(1.0 / self._rate, num_requests) + logger.debug( + "Calculated new inter-arrival times for poisson process: {}", + inter_arrival_times, + ) + arrival_time_tracker = time_tracker + + for arrival_time in inter_arrival_times: + arrival_time_tracker += arrival_time + + if arrival_time_tracker > time_tracker + time_increment: + logger.debug( + "Arrival time tracker: {} is greater than current time", + arrival_time_tracker, + ) + break + + yield arrival_time_tracker + + time_tracker += time_increment # Move on to the next time period diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py deleted file mode 100644 index 129dbaa..0000000 --- a/src/guidellm/scheduler/scheduler.py +++ /dev/null @@ -1,236 +0,0 @@ -import asyncio -import functools -import time -from typing import Callable, Generator, Iterable, List, Optional, Tuple - -from loguru import logger - -from guidellm.backend import Backend -from guidellm.core import ( - TextGenerationBenchmark, - TextGenerationError, - TextGenerationResult, -) -from guidellm.core.request import TextGenerationRequest -from guidellm.request import RequestGenerator - -from .load_generator import LoadGenerationMode, LoadGenerator - -__all__ = ["Scheduler"] - - -class Scheduler: - """ - The scheduler class is responsible for handling tasks and running - """ - - def __init__( - self, - request_generator: RequestGenerator, - backend: Backend, - load_gen_mode: LoadGenerationMode = LoadGenerationMode.SYNCHRONOUS, - load_gen_rate: Optional[float] = None, - max_requests: Optional[int] = None, - max_duration: Optional[float] = None, - ): - if max_requests is None and max_duration is None: - raise ValueError("Either num_requests or duration must be specified") - - if (max_requests is not None and max_requests <= 0) or ( - max_duration is not None and max_duration <= 0 - ): - raise ValueError("max_requests anx max_duration must be > 0") - - if load_gen_mode != LoadGenerationMode.SYNCHRONOUS and load_gen_rate is None: - raise ValueError( - "Rate must be specified for non-synchronous load generation modes", - ) - - self._request_generator = request_generator - self._backend = backend - self._load_gen_mode = load_gen_mode - self._load_gen_rate = load_gen_rate - self._max_requests = max_requests - self._max_duration = max_duration - - def run(self) -> TextGenerationBenchmark: - if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - report: TextGenerationBenchmark = self._run_sync() - else: - report = self._event_loop.run_until_complete(self._run_async()) - - return report - - @property - def load_generator(self) -> LoadGenerator: - if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - raise ValueError("LoadGenerator can't be used is synchronous mode") - - if not self._load_gen_rate: - raise ValueError("Invalid empty value for self._load_gen_rate") - - return LoadGenerator(self._load_gen_mode, self._load_gen_rate) - - def _cancel_running_tasks( - self, - tasks: Iterable[Tuple[TextGenerationRequest, asyncio.Task]], - benchmark: TextGenerationBenchmark, - ) -> None: - """ - Cancel all the running tasks for the scheduler and augment the - benchmark with error reports. - - :param tasks: The `tasks` iterable batch. Where the batch includes - the asyncio.Task and the signature context of that task. - """ - - for request, task in tasks: - if not task.done(): - logger.debug(f"Cancelling running task {task}") - task.cancel() - benchmark.errors.append( - TextGenerationError( - request=request, - message=str(asyncio.CancelledError()), - ), - ) - - def _run_sync(self) -> TextGenerationBenchmark: - benchmark = TextGenerationBenchmark(mode=self._load_gen_mode.value, rate=None) - start_time = time.time() - - logger.debug("Running scheduler in sync mode") - - for requests_counter, callback in enumerate(self._sync_tasks()): - if ( - self._max_requests is not None - and requests_counter >= self._max_requests - ) or ( - self._max_duration is not None - and time.time() - start_time >= self._max_duration - ): - break - - benchmark.request_started() - res = callback() - benchmark.request_completed(res) - - return benchmark - - async def _run_async(self) -> TextGenerationBenchmark: - """ - Running in async mode determines next steps: - * Iterate through all the tasks with load attached - * Check the execution time does not go over the max duration - * Check the number of requests is not greater than max requests - - If the max duration is not specified for the scheduler - check only - max requests and just break the loop without cancelling tasks. - """ - - benchmark: TextGenerationBenchmark = TextGenerationBenchmark( - mode=self._load_gen_mode.value, - rate=self._load_gen_rate, - ) - tasks: List[Tuple[TextGenerationRequest, asyncio.Task]] = [] - start_time: float = time.time() - - for requests_counter, (_task_package, task_start_time) in enumerate( - zip( - self._async_tasks(benchmark), - self.load_generator.times(), - ) - ): - request, task = _task_package - tasks.append((request, task)) - - if ( - self._max_duration is not None - and time.time() - start_time >= self._max_duration - ): - self._cancel_running_tasks(tasks=tasks, benchmark=benchmark) - break - - if ( - self._max_requests is not None - and requests_counter >= self._max_requests - 1 - ): - break - - if (pending_time := task_start_time - time.time()) > 0: - await asyncio.sleep(pending_time) - - if self._max_duration is None: - await asyncio.gather(*(t for _, t in tasks)) - else: - try: - # Set the timeout if the max duration is specified - await asyncio.wait_for( - asyncio.gather(*(t for _, t in tasks), return_exceptions=True), - self._max_duration, - ) - except asyncio.TimeoutError: - self._cancel_running_tasks(tasks=tasks, benchmark=benchmark) - - return benchmark - - def _sync_tasks(self) -> Generator[Callable[..., TextGenerationResult], None, None]: - """ - Iterate through `Backend.submit()` sync callbacks. - """ - - for request in self._request_generator: - yield functools.partial(self._backend.submit, request=request) - - def _async_tasks( - self, - benchmark: TextGenerationBenchmark, - ) -> Generator[Tuple[TextGenerationRequest, asyncio.Task], None, None]: - """ - Iterate through `Backend.submit()` async tasks. - """ - - for request in self._request_generator: - task: asyncio.Task = asyncio.create_task( - self._run_task_async(benchmark=benchmark, request=request), - name=f"Backend.submit({request.prompt})", - ) - - yield request, task - - async def _run_task_async( - self, - benchmark: TextGenerationBenchmark, - request: TextGenerationRequest, - ): - benchmark.request_started() - try: - res = await self._event_loop.run_in_executor( - None, - functools.partial(self._backend.submit, request=request), - ) - except asyncio.CancelledError as err: - benchmark.errors.append( - TextGenerationError( - request=request, - message=str(err), - ), - ) - else: - benchmark.request_completed(res) - - @property - def _event_loop(self) -> asyncio.AbstractEventLoop: - """ - The scheduler internal event loop. - - All the tasks should run in that Event Loop to be managed - from the Scheduler's level. - """ - - try: - loop = asyncio.get_running_loop() - except RuntimeError: - return asyncio.get_event_loop() - else: - return loop diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 549eee5..2fdd8ca 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,17 +1,40 @@ -from .constants import ( - PREFERRED_DATA_COLUMNS, - PREFERRED_DATA_SPLITS, - REPORT_HTML_MATCH, - REPORT_HTML_PLACEHOLDER, +from .injector import create_report, inject_data +from .progress import BenchmarkReportProgress +from .text import ( + clean_text, + filter_text, + is_path, + is_path_like, + is_url, + load_text, + load_text_lines, + parse_text_objects, + split_lines_by_punctuation, + split_text, +) +from .transformers import ( + load_transformers_dataset, + resolve_transformers_dataset, + resolve_transformers_dataset_column, + resolve_transformers_dataset_split, ) -from .injector import create_report, inject_data, load_html_file __all__ = [ - "PREFERRED_DATA_COLUMNS", - "PREFERRED_DATA_SPLITS", - "REPORT_HTML_MATCH", - "REPORT_HTML_PLACEHOLDER", + "BenchmarkReportProgress", + "clean_text", "create_report", + "filter_text", "inject_data", - "load_html_file", + "is_path", + "is_path_like", + "is_url", + "load_text", + "load_text_lines", + "load_transformers_dataset", + "parse_text_objects", + "resolve_transformers_dataset", + "resolve_transformers_dataset_column", + "resolve_transformers_dataset_split", + "split_lines_by_punctuation", + "split_text", ] diff --git a/src/guidellm/utils/constants.py b/src/guidellm/utils/constants.py deleted file mode 100644 index 1ccb02e..0000000 --- a/src/guidellm/utils/constants.py +++ /dev/null @@ -1,26 +0,0 @@ -__all__ = [ - "PREFERRED_DATA_COLUMNS", - "PREFERRED_DATA_SPLITS", - "REPORT_HTML_MATCH", - "REPORT_HTML_PLACEHOLDER", -] - - -PREFERRED_DATA_COLUMNS = [ - "prompt", - "instruction", - "input", - "inputs", - "question", - "context", - "text", - "content", - "body", - "data", -] - -PREFERRED_DATA_SPLITS = ["test", "validation", "train"] - -REPORT_HTML_MATCH = "window.report_data = {};" - -REPORT_HTML_PLACEHOLDER = "{}" diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py index 70f2150..fb5216a 100644 --- a/src/guidellm/utils/injector.py +++ b/src/guidellm/utils/injector.py @@ -1,16 +1,12 @@ from pathlib import Path from typing import Union -import requests from pydantic import BaseModel from guidellm.config import settings -from guidellm.utils.constants import ( - REPORT_HTML_MATCH, - REPORT_HTML_PLACEHOLDER, -) +from guidellm.utils.text import load_text -__all__ = ["create_report", "inject_data", "load_html_file"] +__all__ = ["create_report", "inject_data"] def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: @@ -29,9 +25,12 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: if not isinstance(output_path, Path): output_path = Path(output_path) - html_content = load_html_file(settings.report_generation.source) + html_content = load_text(settings.report_generation.source) report_content = inject_data( - model, html_content, REPORT_HTML_MATCH, REPORT_HTML_PLACEHOLDER + model, + html_content, + settings.report_generation.report_html_match, + settings.report_generation.report_html_placeholder, ) if not output_path.suffix: @@ -69,26 +68,3 @@ def inject_data( inject_str = match.replace(placeholder, model_str) return html.replace(match, inject_str) - - -def load_html_file(path_or_url: str) -> str: - """ - Load an HTML file from a path or URL - - :param path_or_url: the path or URL to load the HTML file from - :type path_or_url: str - :return: the HTML content - :rtype: str - """ - if path_or_url.startswith("http"): - response = requests.get(path_or_url, timeout=settings.request_timeout) - response.raise_for_status() - - return response.text - - path = Path(path_or_url) - - if not path.exists(): - raise FileNotFoundError(f"File not found: {path_or_url}") - - return path.read_text() diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py new file mode 100644 index 0000000..5c7a845 --- /dev/null +++ b/src/guidellm/utils/progress.py @@ -0,0 +1,196 @@ +from datetime import datetime +from typing import List + +from loguru import logger +from rich.console import Group +from rich.live import Live +from rich.panel import Panel +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TaskID, + TaskProgressColumn, + TextColumn, + TimeElapsedColumn, + TimeRemainingColumn, +) + +__all__ = ["BenchmarkReportProgress"] + + +class BenchmarkReportProgress: + """ + Manages the progress display for benchmarks and report generation using Rich. + + This class provides a visual representation of the benchmarking process + and report generation using Rich's progress bars and panels. + """ + + def __init__(self): + """ + Initialize the BenchmarkReportProgress with default settings. + + This method sets up the progress displays for both individual benchmarks + and the overall report, as well as initializing internal task management + structures. + """ + logger.info("Initializing BenchmarkReportProgress instance") + + self.benchmarks_progress = Progress( + TextColumn("[{task.fields[start_time_str]}]"), + SpinnerColumn(), + TaskProgressColumn(), + TextColumn("{task.description}"), + TextColumn(" "), + TextColumn( + "[bold cyan]({task.fields[req_per_sec]} req/sec avg)[/bold cyan]" + ), + ) + self.benchmarks_panel = Panel( + self.benchmarks_progress, + title="Benchmarks", + title_align="left", + expand=True, + ) + self.report_progress = Progress( + SpinnerColumn(), + TextColumn("Generating report..."), + BarColumn(bar_width=None), + TextColumn( + "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})" + ), + TextColumn("["), + TimeElapsedColumn(), + TextColumn("<"), + TimeRemainingColumn(), + TextColumn("]"), + ) + self.render_group = Group(self.benchmarks_panel, self.report_progress) + self.live = Live(self.render_group, redirect_stdout=True, redirect_stderr=True) + + self.report_task: TaskID = None # type: ignore # noqa: PGH003 + self.benchmark_tasks: List[TaskID] = [] + self.benchmark_tasks_started: List[bool] = [] + self.benchmark_tasks_completed: List[bool] = [] + self.benchmark_tasks_progress: List[float] = [] + + def start(self, task_descriptions: List[str]) -> None: + """ + Starts the live progress display and initializes benchmark tasks. + + :param task_descriptions: List of descriptions for each benchmark task. + :type task_descriptions: List[str] + """ + logger.info( + "Starting BenchmarkReportProgress with task descriptions: {}", + task_descriptions, + ) + self.live.start() + + for task_description in task_descriptions: + logger.debug("Adding task with description: {}", task_description) + task_id = self.benchmarks_progress.add_task( + task_description, + start=False, + total=None, + start_time_str="--:--:--", + req_per_sec="#.##", + ) + self.benchmark_tasks.append(task_id) + self.benchmark_tasks_started.append(False) + self.benchmark_tasks_completed.append(False) + self.benchmark_tasks_progress.append(0) + + self.report_task = self.report_progress.add_task( + "", + total=len(self.benchmark_tasks) * 100, # 100 points per report + completed_benchmarks=0, + total_benchmarks=len(task_descriptions), + ) + logger.info("Initialized {} benchmark tasks", len(task_descriptions)) + + def update_benchmark( + self, + index: int, + description: str, + completed: bool, + completed_count: int, + completed_total: int, + start_time: float, + req_per_sec: float, + ) -> None: + """ + Updates the progress of a specific benchmark task. + + :param index: Index of the benchmark task to update. + :type index: int + :param description: Description of the current benchmark task. + :type description: str + :param completed: Flag indicating if the benchmark is completed. + :type completed: bool + :param completed_count: Number of completed operations for the task. + :type completed_count: int + :param completed_total: Total number of operations for the task. + :type completed_total: int + :param start_time: Start time of the benchmark in timestamp format. + :type start_time: float + :param req_per_sec: Average requests per second. + :type req_per_sec: float + :raises ValueError: If trying to update a completed benchmark. + """ + if self.benchmark_tasks_completed[index]: + err = ValueError(f"Benchmark {index} already completed") + logger.error("Error updating benchmark: {}", err) + raise err + + if not self.benchmark_tasks_started[index]: + self.benchmark_tasks_started[index] = True + self.benchmarks_progress.start_task(self.benchmark_tasks[index]) + logger.info("Starting benchmark task at index {}", index) + + if completed: + self.benchmark_tasks_completed[index] = True + self.benchmark_tasks_progress[index] = 100 + self.benchmarks_progress.stop_task(self.benchmark_tasks[index]) + logger.info("Completed benchmark task at index {}", index) + + self.benchmark_tasks_progress[index] = completed_count / completed_total * 100 + self.benchmarks_progress.update( + self.benchmark_tasks[index], + description=description, + total=completed_total, + completed=completed_count if not completed else completed_total, + req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"), + start_time_str=datetime.fromtimestamp(start_time).strftime("%H:%M:%S") + if start_time + else "--:--:--", + ) + logger.debug( + "Updated benchmark task at index {}: {}% complete", + index, + self.benchmark_tasks_progress[index], + ) + self.report_progress.update( + self.report_task, + total=len(self.benchmark_tasks) * 100, + completed=sum(self.benchmark_tasks_progress), + completed_benchmarks=sum(self.benchmark_tasks_completed), + total_benchmarks=len(self.benchmark_tasks), + ) + + def finish(self) -> None: + """ + Marks the overall report task as finished and stops the live display. + """ + logger.info("Finishing BenchmarkReportProgress") + self.report_progress.update( + self.report_task, + total=len(self.benchmark_tasks) * 100, + completed=len(self.benchmark_tasks) * 100, + completed_benchmarks=len(self.benchmark_tasks), + total_benchmarks=len(self.benchmark_tasks), + ) + self.report_progress.stop_task(self.report_task) + self.live.stop() + logger.info("BenchmarkReportProgress finished and live display stopped") diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py new file mode 100644 index 0000000..13a0dff --- /dev/null +++ b/src/guidellm/utils/text.py @@ -0,0 +1,455 @@ +import csv +import json +import re +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib.parse import urlparse + +import ftfy +import requests +import yaml +from loguru import logger + +from guidellm.config import settings + +__all__ = [ + "clean_text", + "filter_text", + "is_path", + "is_path_like", + "is_url", + "load_text", + "load_text_lines", + "parse_text_objects", + "split_lines_by_punctuation", + "split_text", +] + + +NAME_TITLES = [ + "Mr.", + "Mrs.", + "Ms.", + "Dr.", + "Prof.", + "Jr.", + "Sr.", + "St.", + "Lt.", + "Col.", + "Gen.", + "Rep.", + "Sen.", + "Gov.", + "Pres.", +] +SENTENCE_REGEX = r'[^.!?]*[.!?]["\']?\s*(?=[A-Z])' +MAX_EXTENSION_LENGTH = 8 +MAX_PATH_LENGTH = 4096 +EXTENSION_TYPES = { + "csv": "csv", + "jsonl": "jsonl", + "json": "json", + "yaml": "yaml", + "yml": "yaml", + "txt": "txt", + "text": "txt", +} + + +def filter_text( + text: str, + filter_start: Optional[Union[str, int]] = None, + filter_end: Optional[Union[str, int]] = None, +) -> str: + """ + Filter text by start and end strings or indices + + :param text: the text to filter + :param filter_start: the start string or index to filter from + :param filter_end: the end string or index to filter to + :return: the filtered text + """ + filter_start_index = -1 + filter_end_index = -1 + + if filter_start and isinstance(filter_start, str): + filter_start_index = text.index(filter_start) + elif filter_start: + if not isinstance(filter_start, int): + raise ValueError(f"Invalid filter start index: {filter_start}") + filter_start_index = filter_start + + if filter_end and isinstance(filter_end, str): + filter_end_index = text.index(filter_end) + elif filter_end: + if not isinstance(filter_end, int): + raise ValueError(f"Invalid filter end index: {filter_end}") + filter_end_index = filter_end + + if filter_start_index > -1: + text = text[filter_start_index:] + if filter_end_index > -1: + text = text[:filter_end_index] + + return text + + +def clean_text( + text: str, + fix_encoding: bool = True, + clean_whitespace: bool = False, + remove_empty_lines: bool = False, + force_new_line_punctuation: bool = False, +) -> str: + """ + Clean text by fixing encoding, cleaning whitespace, removing empty lines, + and forcing new line punctuation + + :param text: the text to clean + :param fix_encoding: True to fix the encoding of the text, False to leave as is + :param clean_whitespace: True to clean the whitespace in the text + (remove extra spaces, tabs, etc), False to leave as is + :param remove_empty_lines: True to remove empty lines from the text + (lines with only whitespace), False to leave as is + :param force_new_line_punctuation: True to force new lines at punctuation + (line ends in a period, exclamation point, or question mark), + False to leave as is + :return: The cleaned text + """ + + if fix_encoding: + text = ftfy.fix_text(text) + + if clean_whitespace: + text = "\n".join( + [re.sub(r"\s+", " ", line).strip() for line in text.splitlines()] + ) + + if remove_empty_lines: + text = "\n".join([line for line in text.splitlines() if line.strip()]) + + if force_new_line_punctuation: + # first remove any existing new lines + text = " ".join(line for line in text.splitlines() if line.strip()) + lines = split_lines_by_punctuation(text) + text = "\n".join(lines) + + return text + + +def split_lines_by_punctuation(text: str) -> List[str]: + """ + Split text into lines based on punctuation + + :param text: the text to split + :return: the list of lines + """ + + lines = [] + current_line = "" + skip_next = False + + for index, char in enumerate(text): + if skip_next: + skip_next = False + continue + + current_line += char + + if char not in [".", "!", "?"]: + # must match end of sentence punctuation + continue + + # if this is the character for a title, don't split + if any(current_line.endswith(title) for title in NAME_TITLES): + continue + + char_next_1 = text[index + 1] if index + 1 < len(text) else None + char_next_2 = text[index + 2] if index + 2 < len(text) else None + char_next_3 = text[index + 3] if index + 3 < len(text) else None + + next_is_space = char_next_1 and char_next_1.isspace() + next_is_quote_and_space = char_next_1 in ["'", '"'] and char_next_2 == " " + + # next character must be a space or a quote, otherwise skip + if not next_is_space and not next_is_quote_and_space: + continue + + # after this, next character must be an upper case letter + upper_char = char_next_3 if next_is_quote_and_space else char_next_2 + next_is_upper = upper_char and ( + upper_char.isupper() or upper_char in ["'", '"'] + ) + + if not next_is_upper: + continue + + # if next char is a quote, add it and skip next + if next_is_quote_and_space: + current_line += text[index + 1] + skip_next = True + + lines.append(current_line.strip()) + current_line = "" + + if current_line: + lines.append(current_line.strip()) + + return lines + + +def is_url(url: str) -> bool: + """ + Check if a string is a URL + + :param url: the string to check + :return: True if the string is a URL, False if not + """ + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except Exception: # noqa: BLE001 + return False + + +def is_path(path: Any) -> bool: + """ + Check if a string is a path + + :param path: the string to check + :return: True if the string is a path, False if not + """ + if not isinstance(path, (str, Path)): + return False + + if isinstance(path, str): + path = Path(path) + + return path.exists() + + +def is_path_like(path: Any, enforce_file: bool = False) -> bool: + """ + Check if a string has a path like structure where it doesn't need to exist + + :param path: the string to check + :param enforce_file: True if the path should be a file, False if not + :return: True if the string is path like, False if not + """ + # if path isn't a str or Path, it's not a path + if not isinstance(path, (str, Path)): + return False + + if isinstance(path, Path): + path = str(path) + + # if text is too long, it's not a path (4096 for most linux setups) + if len(path) > MAX_PATH_LENGTH: + return False + + # if it starts with a URL scheme, it's not a path + if path.startswith(("http", "ftp")): + return False + + test_path = Path(path) + + # if it's supposed to be a file and there's no extension or + # the extension is too long, it's not a path + return not enforce_file or ( + bool(test_path.suffix) and len(test_path.suffix) <= MAX_EXTENSION_LENGTH + ) + + +def split_text(text: str) -> Tuple[List[str], List[str], List[int]]: + """ + Split text into words / tokens, the white space separators between words, + and the indices for each new line + + :param text: the text to split + :return: the words, the white space separators, and the new line indices + """ + if not text or not text.strip(): + return [], [], [] + + text = text.strip() + tokens = [] # type: List[str] + separators = [] # type: List[str] + new_lines = [0] + buffer = text[0] + is_token = not text[0].isspace() + + for char in text[1:]: + char_whitespace = char.isspace() + + if char == "\n": + new_lines.append(len(tokens) + 1) + + if char_whitespace and is_token: + tokens.append(buffer) + buffer = char + is_token = False + elif char_whitespace: + buffer += char + elif not char_whitespace and not is_token: + separators.append(buffer) + buffer = char + is_token = True + else: + buffer += char + + if buffer and is_token: + tokens.append(buffer) + separators.append(" ") + elif buffer: + separators.append(buffer) + + return tokens, separators, new_lines + + +def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str: + """ + Load an HTML file from a path or URL + + :param data: the path or URL to load the HTML file from + :type data: Union[str, Path] + :param encoding: the encoding to use when reading the file + :type encoding: str + :return: the HTML content + :rtype: str + """ + logger.debug("Loading text: {}", data) + + if not data: + return "" + + # check URLs + if isinstance(data, str) and data.startswith("http"): + response = requests.get(data, timeout=settings.request_timeout) + response.raise_for_status() + return response.text + + # check raw text + if isinstance(data, str) and not is_path_like(data, enforce_file=True): + return data + + # assume local file + if not isinstance(data, Path): + data = Path(data) + + if not data.exists(): + raise FileNotFoundError(f"File not found: {data}") + + if not data.is_file(): + raise IsADirectoryError(f"Path is a directory: {data}") + + return data.read_text(encoding=encoding) + + +def parse_text_objects(data: str, format_: str = "txt") -> List[Dict]: + """ + Parse text data into a list of dictionaries based on the format given + (csv, jsonl, json, yaml, txt). + + :param data: the text data to parse + :param format_: the format of the data to parse: + 'csv', 'jsonl', 'json', 'yaml', 'txt' + :return: the list of dictionaries parsed from the data, if text + then each line is a dictionary with a single key 'text' + """ + if not isinstance(data, str): + raise ValueError(f"Unsupported data given of type: {type(data)}") + + if format_ == "csv": + reader = csv.DictReader(data.splitlines()) + columns = reader.fieldnames + return [{col: row[col] for col in columns} for row in reader] # type: ignore # noqa: PGH003 + + if format_ == "jsonl": + return [json.loads(line) for line in data.splitlines() if line] + + if format_ in ("json", "yaml"): + data = json.loads(data) if format_ == "json" else yaml.safe_load(data) + + if not data: + return [] + + if isinstance(data, dict) and len(data) == 1: + logger.debug("Getting first value from JSON/YAML object: {}", data) + data = list(data.values())[0] + elif isinstance(data, dict): + logger.debug("Converting JSON/YAML object to list: {}", data) + data = list(data.values()) + + if not isinstance(data, list) or not isinstance(data[0], dict): + raise ValueError(f"Unsupported data structure given: {data}") + + return data + + if format_ == "txt": + return [{"text": line} for line in data.splitlines() if line] + + raise ValueError(f"Unsupported format given: {format_}") + + +def load_text_lines( + data: Union[str, Path, List[Dict]], + format_: Optional[str] = None, + filters: Optional[List[str]] = None, + encoding: Optional[str] = None, +) -> List[str]: + """ + Load text lines from a file or data object with optional filtering and formatting. + + + :param data: the data to load the text lines from + :param format_: the format of the data to load, if not provided will be inferred. + Supported formats: 'csv', 'jsonl', 'json', 'yaml', 'txt' + :param filters: the keys to filter the data by when loading in order of preference. + If not provided, will use the first key in the data object. + :param encoding: the encoding to use when reading the file + :return: the list of text lines + """ + logger.debug( + "Loading text lines with format {}, filters {}, encoding {} for data: {}", + format_, + filters, + encoding, + data, + ) + + if not data: + return [] + + if not format_ and isinstance(data, (str, Path)) and "." in str(data): + extension = str(data).split(".")[-1] + format_ = EXTENSION_TYPES.get(extension, "txt") + elif not format_: + format_ = "txt" + + # load the data if it's a path or URL + if isinstance(data, Path) or (isinstance(data, str) and data.startswith("http")): + data = load_text(data, encoding=encoding) + data = clean_text(data) + + # parse the data into a list of dictionaries based on the format + if isinstance(data, str): + data = parse_text_objects(data, format_) + + if not isinstance(data, list): + raise ValueError(f"Unsupported data given of type: {type(data)}") + + if not isinstance(data[0], dict): + raise ValueError(f"Unsupported data item type given: {type(data[0])}") + + # grab the first available filter key to use if preference order as provided + filter_ = list(data[0].keys())[0] + for filt in filters or []: + if filt not in data[0]: + continue + + filter_ = filt + break + + # extract the lines from the data + return [row[filter_] for row in data] if filter_ else [str(row) for row in data] diff --git a/src/guidellm/utils/transformers.py b/src/guidellm/utils/transformers.py new file mode 100644 index 0000000..5405729 --- /dev/null +++ b/src/guidellm/utils/transformers.py @@ -0,0 +1,151 @@ +from pathlib import Path +from typing import List, Optional, Union + +from datasets import ( # type: ignore # noqa: PGH003 + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + load_dataset, +) +from loguru import logger + +from guidellm.config import settings + +__all__ = [ + "load_transformers_dataset", + "resolve_transformers_dataset", + "resolve_transformers_dataset_column", + "resolve_transformers_dataset_split", +] + + +def load_transformers_dataset( + dataset: Union[ + str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset + ], + split: Optional[str] = None, + preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits, + **kwargs, +) -> Union[Dataset, IterableDataset]: + """ + Load a dataset from a file or a script and resolve the preferred split. + + :param dataset: the dataset file or script to load + :param split: the dataset split to use + (overrides preferred_splits, must be in dataset) + :param preferred_splits: the preferred dataset splits to use + :param kwargs: additional keyword arguments to pass to the dataset loader + :return: the loaded dataset + """ + dataset = resolve_transformers_dataset(dataset, **kwargs) + + return resolve_transformers_dataset_split(dataset, split, preferred_splits) + + +def resolve_transformers_dataset( + dataset: Union[ + str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset + ], + **kwargs, +) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: + """ + Resolve the dataset from a file (csv, json, script) or a dataset name. + + :param dataset: the dataset file or script to load + :param kwargs: additional keyword arguments to pass to the dataset loader + :return: the loaded dataset + """ + if isinstance( + dataset, (DatasetDict, Dataset, IterableDatasetDict, IterableDataset) + ): + return dataset + + if not isinstance(dataset, (str, Path)): + raise ValueError(f"Invalid dataset type: {type(dataset)}") + + dataset = str(dataset) + + if dataset.endswith((".csv", ".json")): + logger.debug("Loading dataset from local path: {}", dataset) + extension = dataset.split(".")[-1] + + return load_dataset(extension, data_files=dataset, **kwargs) + + if dataset.endswith(".py"): + logger.debug("Loading dataset from local script: {}", dataset) + + return load_dataset(dataset, **kwargs) + + logger.debug("Loading dataset: {}", dataset) + + return load_dataset(dataset, **kwargs) + + +def resolve_transformers_dataset_split( + dataset: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset], + split: Optional[str] = None, + preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits, +) -> Union[Dataset, IterableDataset]: + """ + Resolve the preferred split from a dataset dictionary. + + :param dataset: the dataset to resolve the split from + :param split: the dataset split to use + (overrides preferred_splits, must be in dataset) + :param preferred_splits: the preferred dataset splits to use + :return: the resolved dataset split + """ + if not isinstance(dataset, (DatasetDict, IterableDatasetDict)): + logger.debug("Dataset is not a dictionary, using default split") + return dataset + + if split: + if split not in dataset: + raise ValueError(f"Split '{split}' not found in dataset") + + return dataset[split] + + if preferred_splits: + for spl in preferred_splits: + if spl not in dataset: + continue + return dataset[spl] + + return list(dataset.values())[0] + + +def resolve_transformers_dataset_column( + dataset: Union[Dataset, IterableDataset], + column: Optional[str] = None, + preferred_columns: Optional[List[str]] = settings.dataset.preferred_data_columns, +) -> str: + """ + Resolve the preferred column from a dataset. + + :param dataset: the dataset to resolve the column from + :param column: the dataset column to use + (overrides preferred_columns, must be in dataset) + :param preferred_columns: the preferred dataset columns to use + :return: the resolved dataset column + """ + column_names = dataset.column_names + + if not column_names: + # grab from the first item + first_item = next(iter(dataset)) + column_names = list(first_item.keys()) + + if column: + if column not in column_names: + raise ValueError(f"Column '{column}' not found in dataset") + + return column + + if preferred_columns: + for col in preferred_columns: + if col not in column_names: + continue + return col + + return list(column_names)[0] diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 3fbe8a6..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,31 +0,0 @@ -from typing import Callable, Optional - -import pytest -from guidellm.backend import Backend, BackendEngine, OpenAIBackend -from guidellm.config import settings -from loguru import logger - - -def pytest_configure() -> None: - logger.disable("guidellm") - - -@pytest.fixture() -def openai_backend_factory() -> Callable[..., OpenAIBackend]: - """ - OpenAI Backend factory method. - Call without provided arguments returns default Backend service. - """ - - def inner_wrapper(*_, base_url: Optional[str] = None, **kwargs) -> OpenAIBackend: - defaults = { - "backend_type": BackendEngine.OPENAI_SERVER, - "openai_api_key": "required but not used", - "target": base_url or settings.openai.base_url, - } - - defaults.update(kwargs) - - return Backend.create(**defaults) # type: ignore - - return inner_wrapper diff --git a/tests/dummy/data/pride_and_prejudice.txt b/tests/dummy/data/pride_and_prejudice.txt new file mode 100644 index 0000000..3b93b50 --- /dev/null +++ b/tests/dummy/data/pride_and_prejudice.txt @@ -0,0 +1,2015 @@ +*** START OF THE PROJECT GUTENBERG EBOOK 1342 *** + + PAGE + +Frontispiece iv + +Title-page v + +Dedication vii + +Heading to Preface ix + +Heading to List of Illustrations xxv + +Heading to Chapter I. 1 + +“He came down to see the place” 2 + +Mr. and Mrs. Bennet 5 + +“I hope Mr. Bingley will like it” 6 + +“I’m the tallest” 9 + +“He rode a black horse” 10 + +“When the party entered” 12 + +“She is tolerable” 15 + +Heading to Chapter IV. 18 + +Heading to Chapter V. 22 + +“Without once opening his lips” 24 + +Tailpiece to Chapter V. 26 + +Heading to Chapter VI. 27 + +“The entreaties of several” 31 + +“A note for Miss Bennet” 36 + +“Cheerful prognostics” 40 + +“The apothecary came” 43 + +“Covering a screen” 45 + +“Mrs. Bennet and her two youngest girls” 53 + +Heading to Chapter X. 60 + +“No, no; stay where you are” 67 + +“Piling up the fire” 69 + +Heading to Chapter XII. 75 + +Heading to Chapter XIII. 78 + +Heading to Chapter XIV. 84 + +“Protested that he never read novels” 87 + +Heading to Chapter XV. 89 + +Heading to Chapter XVI. 95 + +“The officers of the ----shire” 97 + +“Delighted to see their dear friend again” 108 + +Heading to Chapter XVIII. 113 + +“Such very superior dancing is not often seen” 118 + +“To assure you in the most animated language” 132 + +Heading to Chapter XX. 139 + +“They entered the breakfast-room” 143 + +Heading to Chapter XXI. 146 + +“Walked back with them” 148 + +Heading to Chapter XXII. 154 + +“So much love and eloquence” 156 + +“Protested he must be entirely mistaken” 161 + +“Whenever she spoke in a low voice” 166 + +Heading to Chapter XXIV. 168 + +Heading to Chapter XXV. 175 + +“Offended two or three young ladies” 177 + +“Will you come and see me?” 181 + +“On the stairs” 189 + +“At the door” 194 + +“In conversation with the ladies” 198 + +“Lady Catherine,” said she, “you have given me a treasure” 200 + +Heading to Chapter XXX. 209 + +“He never failed to inform them” 211 + +“The gentlemen accompanied him” 213 + +Heading to Chapter XXXI. 215 + +Heading to Chapter XXXII. 221 + +“Accompanied by their aunt” 225 + +“On looking up” 228 + +Heading to Chapter XXXIV. 235 + +“Hearing herself called” 243 + +Heading to Chapter XXXVI. 253 + +“Meeting accidentally in town” 256 + +“His parting obeisance” 261 + +“Dawson” 263 + +“The elevation of his feelings” 267 + +“They had forgotten to leave any message” 270 + +“How nicely we are crammed in!” 272 + +Heading to Chapter XL. 278 + +“I am determined never to speak of it again” 283 + +“When Colonel Miller’s regiment went away” 285 + +“Tenderly flirting” 290 + +The arrival of the Gardiners 294 + +“Conjecturing as to the date” 301 + +Heading to Chapter XLIV. 318 + +“To make herself agreeable to all” 321 + +“Engaged by the river” 327 + +Heading to Chapter XLVI. 334 + +“I have not an instant to lose” 339 + +“The first pleasing earnest of their welcome” 345 + +The Post 359 + +“To whom I have related the affair” 363 + +Heading to Chapter XLIX. 368 + +“But perhaps you would like to read it” 370 + +“The spiteful old ladies” 377 + +“With an affectionate smile” 385 + +“I am sure she did not listen” 393 + +“Mr. Darcy with him” 404 + +“Jane happened to look round” 415 + +“Mrs. Long and her nieces” 420 + +“Lizzy, my dear, I want to speak to you” 422 + +Heading to Chapter LVI. 431 + +“After a short survey” 434 + +“But now it comes out” 442 + +“The efforts of his aunt” 448 + +“Unable to utter a syllable” 457 + +“The obsequious civility” 466 + +Heading to Chapter LXI. 472 + +The End 476 + + + + +[Illustration: ·PRIDE AND PREJUDICE· + + + + +Chapter I.] + + +It is a truth universally acknowledged, that a single man in possession +of a good fortune must be in want of a wife. + +However little known the feelings or views of such a man may be on his +first entering a neighbourhood, this truth is so well fixed in the minds +of the surrounding families, that he is considered as the rightful +property of some one or other of their daughters. + +“My dear Mr. Bennet,” said his lady to him one day, “have you heard that +Netherfield Park is let at last?” + +Mr. Bennet replied that he had not. + +“But it is,” returned she; “for Mrs. Long has just been here, and she +told me all about it.” + +Mr. Bennet made no answer. + +“Do not you want to know who has taken it?” cried his wife, impatiently. + +“_You_ want to tell me, and I have no objection to hearing it.” + +[Illustration: + +“He came down to see the place” + +[_Copyright 1894 by George Allen._]] + +This was invitation enough. + +“Why, my dear, you must know, Mrs. Long says that Netherfield is taken +by a young man of large fortune from the north of England; that he came +down on Monday in a chaise and four to see the place, and was so much +delighted with it that he agreed with Mr. Morris immediately; that he is +to take possession before Michaelmas, and some of his servants are to be +in the house by the end of next week.” + +“What is his name?” + +“Bingley.” + +“Is he married or single?” + +“Oh, single, my dear, to be sure! A single man of large fortune; four or +five thousand a year. What a fine thing for our girls!” + +“How so? how can it affect them?” + +“My dear Mr. Bennet,” replied his wife, “how can you be so tiresome? You +must know that I am thinking of his marrying one of them.” + +“Is that his design in settling here?” + +“Design? Nonsense, how can you talk so! But it is very likely that he +_may_ fall in love with one of them, and therefore you must visit him as +soon as he comes.” + +“I see no occasion for that. You and the girls may go--or you may send +them by themselves, which perhaps will be still better; for as you are +as handsome as any of them, Mr. Bingley might like you the best of the +party.” + +“My dear, you flatter me. I certainly _have_ had my share of beauty, but +I do not pretend to be anything extraordinary now. When a woman has five +grown-up daughters, she ought to give over thinking of her own beauty.” + +“In such cases, a woman has not often much beauty to think of.” + +“But, my dear, you must indeed go and see Mr. Bingley when he comes into +the neighbourhood.” + +“It is more than I engage for, I assure you.” + +“But consider your daughters. Only think what an establishment it would +be for one of them. Sir William and Lady Lucas are determined to go, +merely on that account; for in general, you know, they visit no new +comers. Indeed you must go, for it will be impossible for _us_ to visit +him, if you do not.” + +“You are over scrupulous, surely. I dare say Mr. Bingley will be very +glad to see you; and I will send a few lines by you to assure him of my +hearty consent to his marrying whichever he chooses of the girls--though +I must throw in a good word for my little Lizzy.” + +“I desire you will do no such thing. Lizzy is not a bit better than the +others: and I am sure she is not half so handsome as Jane, nor half so +good-humoured as Lydia. But you are always giving _her_ the preference.” + +“They have none of them much to recommend them,” replied he: “they are +all silly and ignorant like other girls; but Lizzy has something more of +quickness than her sisters.” + +“Mr. Bennet, how can you abuse your own children in such a way? You take +delight in vexing me. You have no compassion on my poor nerves.” + +“You mistake me, my dear. I have a high respect for your nerves. They +are my old friends. I have heard you mention them with consideration +these twenty years at least.” + +“Ah, you do not know what I suffer.” + +“But I hope you will get over it, and live to see many young men of four +thousand a year come into the neighbourhood.” + +“It will be no use to us, if twenty such should come, since you will not +visit them.” + +“Depend upon it, my dear, that when there are twenty, I will visit them +all.” + +Mr. Bennet was so odd a mixture of quick parts, sarcastic humour, +reserve, and caprice, that the experience of three-and-twenty years had +been insufficient to make his wife understand his character. _Her_ mind +was less difficult to develope. She was a woman of mean understanding, +little information, and uncertain temper. When she was discontented, she +fancied herself nervous. The business of her life was to get her +daughters married: its solace was visiting and news. + +[Illustration: M^{r.} & M^{rs.} Bennet + +[_Copyright 1894 by George Allen._]] + + + + +[Illustration: + +“I hope Mr. Bingley will like it” + +[_Copyright 1894 by George Allen._]] + + + + +CHAPTER II. + + +[Illustration] + +Mr. Bennet was among the earliest of those who waited on Mr. Bingley. He +had always intended to visit him, though to the last always assuring his +wife that he should not go; and till the evening after the visit was +paid she had no knowledge of it. It was then disclosed in the following +manner. Observing his second daughter employed in trimming a hat, he +suddenly addressed her with,-- + +“I hope Mr. Bingley will like it, Lizzy.” + +“We are not in a way to know _what_ Mr. Bingley likes,” said her mother, +resentfully, “since we are not to visit.” + +“But you forget, mamma,” said Elizabeth, “that we shall meet him at the +assemblies, and that Mrs. Long has promised to introduce him.” + +“I do not believe Mrs. Long will do any such thing. She has two nieces +of her own. She is a selfish, hypocritical woman, and I have no opinion +of her.” + +“No more have I,” said Mr. Bennet; “and I am glad to find that you do +not depend on her serving you.” + +Mrs. Bennet deigned not to make any reply; but, unable to contain +herself, began scolding one of her daughters. + +“Don’t keep coughing so, Kitty, for heaven’s sake! Have a little +compassion on my nerves. You tear them to pieces.” + +“Kitty has no discretion in her coughs,” said her father; “she times +them ill.” + +“I do not cough for my own amusement,” replied Kitty, fretfully. “When +is your next ball to be, Lizzy?” + +“To-morrow fortnight.” + +“Ay, so it is,” cried her mother, “and Mrs. Long does not come back till +the day before; so, it will be impossible for her to introduce him, for +she will not know him herself.” + +“Then, my dear, you may have the advantage of your friend, and introduce +Mr. Bingley to _her_.” + +“Impossible, Mr. Bennet, impossible, when I am not acquainted with him +myself; how can you be so teasing?” + +“I honour your circumspection. A fortnight’s acquaintance is certainly +very little. One cannot know what a man really is by the end of a +fortnight. But if _we_ do not venture, somebody else will; and after +all, Mrs. Long and her nieces must stand their chance; and, therefore, +as she will think it an act of kindness, if you decline the office, I +will take it on myself.” + +The girls stared at their father. Mrs. Bennet said only, “Nonsense, +nonsense!” + +“What can be the meaning of that emphatic exclamation?” cried he. “Do +you consider the forms of introduction, and the stress that is laid on +them, as nonsense? I cannot quite agree with you _there_. What say you, +Mary? For you are a young lady of deep reflection, I know, and read +great books, and make extracts.” + +Mary wished to say something very sensible, but knew not how. + +“While Mary is adjusting her ideas,” he continued, “let us return to Mr. +Bingley.” + +“I am sick of Mr. Bingley,” cried his wife. + +“I am sorry to hear _that_; but why did you not tell me so before? If I +had known as much this morning, I certainly would not have called on +him. It is very unlucky; but as I have actually paid the visit, we +cannot escape the acquaintance now.” + +The astonishment of the ladies was just what he wished--that of Mrs. +Bennet perhaps surpassing the rest; though when the first tumult of joy +was over, she began to declare that it was what she had expected all the +while. + +“How good it was in you, my dear Mr. Bennet! But I knew I should +persuade you at last. I was sure you loved your girls too well to +neglect such an acquaintance. Well, how pleased I am! And it is such a +good joke, too, that you should have gone this morning, and never said a +word about it till now.” + +“Now, Kitty, you may cough as much as you choose,” said Mr. Bennet; and, +as he spoke, he left the room, fatigued with the raptures of his wife. + +“What an excellent father you have, girls,” said she, when the door was +shut. “I do not know how you will ever make him amends for his kindness; +or me either, for that matter. At our time of life, it is not so +pleasant, I can tell you, to be making new acquaintances every day; but +for your sakes we would do anything. Lydia, my love, though you _are_ +the youngest, I dare say Mr. Bingley will dance with you at the next +ball.” + +“Oh,” said Lydia, stoutly, “I am not afraid; for though I _am_ the +youngest, I’m the tallest.” + +The rest of the evening was spent in conjecturing how soon he would +return Mr. Bennet’s visit, and determining when they should ask him to +dinner. + +[Illustration: “I’m the tallest”] + + + + +[Illustration: + + “He rode a black horse” +] + + + + +CHAPTER III. + + +[Illustration] + +Not all that Mrs. Bennet, however, with the assistance of her five +daughters, could ask on the subject, was sufficient to draw from her +husband any satisfactory description of Mr. Bingley. They attacked him +in various ways, with barefaced questions, ingenious suppositions, and +distant surmises; but he eluded the skill of them all; and they were at +last obliged to accept the second-hand intelligence of their neighbour, +Lady Lucas. Her report was highly favourable. Sir William had been +delighted with him. He was quite young, wonderfully handsome, extremely +agreeable, and, to crown the whole, he meant to be at the next assembly +with a large party. Nothing could be more delightful! To be fond of +dancing was a certain step towards falling in love; and very lively +hopes of Mr. Bingley’s heart were entertained. + +“If I can but see one of my daughters happily settled at Netherfield,” +said Mrs. Bennet to her husband, “and all the others equally well +married, I shall have nothing to wish for.” + +In a few days Mr. Bingley returned Mr. Bennet’s visit, and sat about ten +minutes with him in his library. He had entertained hopes of being +admitted to a sight of the young ladies, of whose beauty he had heard +much; but he saw only the father. The ladies were somewhat more +fortunate, for they had the advantage of ascertaining, from an upper +window, that he wore a blue coat and rode a black horse. + +An invitation to dinner was soon afterwards despatched; and already had +Mrs. Bennet planned the courses that were to do credit to her +housekeeping, when an answer arrived which deferred it all. Mr. Bingley +was obliged to be in town the following day, and consequently unable to +accept the honour of their invitation, etc. Mrs. Bennet was quite +disconcerted. She could not imagine what business he could have in town +so soon after his arrival in Hertfordshire; and she began to fear that +he might always be flying about from one place to another, and never +settled at Netherfield as he ought to be. Lady Lucas quieted her fears a +little by starting the idea of his + +[Illustration: + + “When the Party entered” + +[_Copyright 1894 by George Allen._]] + +being gone to London only to get a large party for the ball; and a +report soon followed that Mr. Bingley was to bring twelve ladies and +seven gentlemen with him to the assembly. The girls grieved over such a +number of ladies; but were comforted the day before the ball by hearing +that, instead of twelve, he had brought only six with him from London, +his five sisters and a cousin. And when the party entered the +assembly-room, it consisted of only five altogether: Mr. Bingley, his +two sisters, the husband of the eldest, and another young man. + +Mr. Bingley was good-looking and gentlemanlike: he had a pleasant +countenance, and easy, unaffected manners. His sisters were fine women, +with an air of decided fashion. His brother-in-law, Mr. Hurst, merely +looked the gentleman; but his friend Mr. Darcy soon drew the attention +of the room by his fine, tall person, handsome features, noble mien, and +the report, which was in general circulation within five minutes after +his entrance, of his having ten thousand a year. The gentlemen +pronounced him to be a fine figure of a man, the ladies declared he was +much handsomer than Mr. Bingley, and he was looked at with great +admiration for about half the evening, till his manners gave a disgust +which turned the tide of his popularity; for he was discovered to be +proud, to be above his company, and above being pleased; and not all his +large estate in Derbyshire could save him from having a most forbidding, +disagreeable countenance, and being unworthy to be compared with his +friend. + +Mr. Bingley had soon made himself acquainted with all the principal +people in the room: he was lively and unreserved, danced every dance, +was angry that the ball closed so early, and talked of giving one +himself at Netherfield. Such amiable qualities must speak for +themselves. What a contrast between him and his friend! Mr. Darcy danced +only once with Mrs. Hurst and once with Miss Bingley, declined being +introduced to any other lady, and spent the rest of the evening in +walking about the room, speaking occasionally to one of his own party. +His character was decided. He was the proudest, most disagreeable man in +the world, and everybody hoped that he would never come there again. +Amongst the most violent against him was Mrs. Bennet, whose dislike of +his general behaviour was sharpened into particular resentment by his +having slighted one of her daughters. + +Elizabeth Bennet had been obliged, by the scarcity of gentlemen, to sit +down for two dances; and during part of that time, Mr. Darcy had been +standing near enough for her to overhear a conversation between him and +Mr. Bingley, who came from the dance for a few minutes to press his +friend to join it. + +“Come, Darcy,” said he, “I must have you dance. I hate to see you +standing about by yourself in this stupid manner. You had much better +dance.” + +“I certainly shall not. You know how I detest it, unless I am +particularly acquainted with my partner. At such an assembly as this, it +would be insupportable. Your sisters are engaged, and there is not +another woman in the room whom it would not be a punishment to me to +stand up with.” + +“I would not be so fastidious as you are,” cried Bingley, “for a +kingdom! Upon my honour, I never met with so many pleasant girls in my +life as I have this evening; and there are several of them, you see, +uncommonly pretty.” + +“_You_ are dancing with the only handsome girl in the room,” said Mr. +Darcy, looking at the eldest Miss Bennet. + +“Oh, she is the most beautiful creature I ever beheld! But there is one +of her sisters sitting down just behind you, who is very pretty, and I +dare say very agreeable. Do let me ask my partner to introduce you.” + +[Illustration: + +“She is tolerable” + +[_Copyright 1894 by George Allen._]] + +“Which do you mean?” and turning round, he looked for a moment at +Elizabeth, till, catching her eye, he withdrew his own, and coldly said, +“She is tolerable: but not handsome enough to tempt _me_; and I am in no +humour at present to give consequence to young ladies who are slighted +by other men. You had better return to your partner and enjoy her +smiles, for you are wasting your time with me.” + +Mr. Bingley followed his advice. Mr. Darcy walked off; and Elizabeth +remained with no very cordial feelings towards him. She told the story, +however, with great spirit among her friends; for she had a lively, +playful disposition, which delighted in anything ridiculous. + +The evening altogether passed off pleasantly to the whole family. Mrs. +Bennet had seen her eldest daughter much admired by the Netherfield +party. Mr. Bingley had danced with her twice, and she had been +distinguished by his sisters. Jane was as much gratified by this as her +mother could be, though in a quieter way. Elizabeth felt Jane’s +pleasure. Mary had heard herself mentioned to Miss Bingley as the most +accomplished girl in the neighbourhood; and Catherine and Lydia had been +fortunate enough to be never without partners, which was all that they +had yet learnt to care for at a ball. They returned, therefore, in good +spirits to Longbourn, the village where they lived, and of which they +were the principal inhabitants. They found Mr. Bennet still up. With a +book, he was regardless of time; and on the present occasion he had a +good deal of curiosity as to the event of an evening which had raised +such splendid expectations. He had rather hoped that all his wife’s +views on the stranger would be disappointed; but he soon found that he +had a very different story to hear. + +“Oh, my dear Mr. Bennet,” as she entered the room, “we have had a most +delightful evening, a most excellent ball. I wish you had been there. +Jane was so admired, nothing could be like it. Everybody said how well +she looked; and Mr. Bingley thought her quite beautiful, and danced with +her twice. Only think of _that_, my dear: he actually danced with her +twice; and she was the only creature in the room that he asked a second +time. First of all, he asked Miss Lucas. I was so vexed to see him stand +up with her; but, however, he did not admire her at all; indeed, nobody +can, you know; and he seemed quite struck with Jane as she was going +down the dance. So he inquired who she was, and got introduced, and +asked her for the two next. Then, the two third he danced with Miss +King, and the two fourth with Maria Lucas, and the two fifth with Jane +again, and the two sixth with Lizzy, and the _Boulanger_----” + +“If he had had any compassion for _me_,” cried her husband impatiently, +“he would not have danced half so much! For God’s sake, say no more of +his partners. O that he had sprained his ancle in the first dance!” + +“Oh, my dear,” continued Mrs. Bennet, “I am quite delighted with him. He +is so excessively handsome! and his sisters are charming women. I never +in my life saw anything more elegant than their dresses. I dare say the +lace upon Mrs. Hurst’s gown----” + +Here she was interrupted again. Mr. Bennet protested against any +description of finery. She was therefore obliged to seek another branch +of the subject, and related, with much bitterness of spirit, and some +exaggeration, the shocking rudeness of Mr. Darcy. + +“But I can assure you,” she added, “that Lizzy does not lose much by not +suiting _his_ fancy; for he is a most disagreeable, horrid man, not at +all worth pleasing. So high and so conceited, that there was no enduring +him! He walked here, and he walked there, fancying himself so very +great! Not handsome enough to dance with! I wish you had been there, my +dear, to have given him one of your set-downs. I quite detest the man.” + + + + +[Illustration] + + + + +CHAPTER IV. + + +[Illustration] + +When Jane and Elizabeth were alone, the former, who had been cautious in +her praise of Mr. Bingley before, expressed to her sister how very much +she admired him. + +“He is just what a young-man ought to be,” said she, “sensible, +good-humoured, lively; and I never saw such happy manners! so much ease, +with such perfect good breeding!” + +“He is also handsome,” replied Elizabeth, “which a young man ought +likewise to be if he possibly can. His character is thereby complete.” + +“I was very much flattered by his asking me to dance a second time. I +did not expect such a compliment.” + +“Did not you? _I_ did for you. But that is one great difference between +us. Compliments always take _you_ by surprise, and _me_ never. What +could be more natural than his asking you again? He could not help +seeing that you were about five times as pretty as every other woman in +the room. No thanks to his gallantry for that. Well, he certainly is +very agreeable, and I give you leave to like him. You have liked many a +stupider person.” + +“Dear Lizzy!” + +“Oh, you are a great deal too apt, you know, to like people in general. +You never see a fault in anybody. All the world are good and agreeable +in your eyes. I never heard you speak ill of a human being in my life.” + +“I would wish not to be hasty in censuring anyone; but I always speak +what I think.” + +“I know you do: and it is _that_ which makes the wonder. With _your_ +good sense, to be so honestly blind to the follies and nonsense of +others! Affectation of candour is common enough; one meets with it +everywhere. But to be candid without ostentation or design,--to take the +good of everybody’s character and make it still better, and say nothing +of the bad,--belongs to you alone. And so, you like this man’s sisters, +too, do you? Their manners are not equal to his.” + +“Certainly not, at first; but they are very pleasing women when you +converse with them. Miss Bingley is to live with her brother, and keep +his house; and I am much mistaken if we shall not find a very charming +neighbour in her.” + +Elizabeth listened in silence, but was not convinced: their behaviour at +the assembly had not been calculated to please in general; and with more +quickness of observation and less pliancy of temper than her sister, and +with a judgment, too, unassailed by any attention to herself, she was +very little disposed to approve them. They were, in fact, very fine +ladies; not deficient in good-humour when they were pleased, nor in the +power of being agreeable where they chose it; but proud and conceited. +They were rather handsome; had been educated in one of the first private +seminaries in town; had a fortune of twenty thousand pounds; were in the +habit of spending more than they ought, and of associating with people +of rank; and were, therefore, in every respect entitled to think well of +themselves and meanly of others. They were of a respectable family in +the north of England; a circumstance more deeply impressed on their +memories than that their brother’s fortune and their own had been +acquired by trade. + +Mr. Bingley inherited property to the amount of nearly a hundred +thousand pounds from his father, who had intended to purchase an estate, +but did not live to do it. Mr. Bingley intended it likewise, and +sometimes made choice of his county; but, as he was now provided with a +good house and the liberty of a manor, it was doubtful to many of those +who best knew the easiness of his temper, whether he might not spend the +remainder of his days at Netherfield, and leave the next generation to +purchase. + +His sisters were very anxious for his having an estate of his own; but +though he was now established only as a tenant, Miss Bingley was by no +means unwilling to preside at his table; nor was Mrs. Hurst, who had +married a man of more fashion than fortune, less disposed to consider +his house as her home when it suited her. Mr. Bingley had not been of +age two years when he was tempted, by an accidental recommendation, to +look at Netherfield House. He did look at it, and into it, for half an +hour; was pleased with the situation and the principal rooms, satisfied +with what the owner said in its praise, and took it immediately. + +Between him and Darcy there was a very steady friendship, in spite of a +great opposition of character. Bingley was endeared to Darcy by the +easiness, openness, and ductility of his temper, though no disposition +could offer a greater contrast to his own, and though with his own he +never appeared dissatisfied. On the strength of Darcy’s regard, Bingley +had the firmest reliance, and of his judgment the highest opinion. In +understanding, Darcy was the superior. Bingley was by no means +deficient; but Darcy was clever. He was at the same time haughty, +reserved, and fastidious; and his manners, though well bred, were not +inviting. In that respect his friend had greatly the advantage. Bingley +was sure of being liked wherever he appeared; Darcy was continually +giving offence. + +The manner in which they spoke of the Meryton assembly was sufficiently +characteristic. Bingley had never met with pleasanter people or prettier +girls in his life; everybody had been most kind and attentive to him; +there had been no formality, no stiffness; he had soon felt acquainted +with all the room; and as to Miss Bennet, he could not conceive an angel +more beautiful. Darcy, on the contrary, had seen a collection of people +in whom there was little beauty and no fashion, for none of whom he had +felt the smallest interest, and from none received either attention or +pleasure. Miss Bennet he acknowledged to be pretty; but she smiled too +much. + +Mrs. Hurst and her sister allowed it to be so; but still they admired +her and liked her, and pronounced her to be a sweet girl, and one whom +they should not object to know more of. Miss Bennet was therefore +established as a sweet girl; and their brother felt authorized by such +commendation to think of her as he chose. + + + + +[Illustration: [_Copyright 1894 by George Allen._]] + + + + +CHAPTER V. + + +[Illustration] + +Within a short walk of Longbourn lived a family with whom the Bennets +were particularly intimate. Sir William Lucas had been formerly in trade +in Meryton, where he had made a tolerable fortune, and risen to the +honour of knighthood by an address to the king during his mayoralty. The +distinction had, perhaps, been felt too strongly. It had given him a +disgust to his business and to his residence in a small market town; +and, quitting them both, he had removed with his family to a house about +a mile from Meryton, denominated from that period Lucas Lodge; where he +could think with pleasure of his own importance, and, unshackled by +business, occupy himself solely in being civil to all the world. For, +though elated by his rank, it did not render him supercilious; on the +contrary, he was all attention to everybody. By nature inoffensive, +friendly, and obliging, his presentation at St. James’s had made him +courteous. + +Lady Lucas was a very good kind of woman, not too clever to be a +valuable neighbour to Mrs. Bennet. They had several children. The eldest +of them, a sensible, intelligent young woman, about twenty-seven, was +Elizabeth’s intimate friend. + +That the Miss Lucases and the Miss Bennets should meet to talk over a +ball was absolutely necessary; and the morning after the assembly +brought the former to Longbourn to hear and to communicate. + +“_You_ began the evening well, Charlotte,” said Mrs. Bennet, with civil +self-command, to Miss Lucas. “_You_ were Mr. Bingley’s first choice.” + +“Yes; but he seemed to like his second better.” + +“Oh, you mean Jane, I suppose, because he danced with her twice. To be +sure that _did_ seem as if he admired her--indeed, I rather believe he +_did_--I heard something about it--but I hardly know what--something +about Mr. Robinson.” + +“Perhaps you mean what I overheard between him and Mr. Robinson: did not +I mention it to you? Mr. Robinson’s asking him how he liked our Meryton +assemblies, and whether he did not think there were a great many pretty +women in the room, and _which_ he thought the prettiest? and his +answering immediately to the last question, ‘Oh, the eldest Miss Bennet, +beyond a doubt: there cannot be two opinions on that point.’” + +“Upon my word! Well, that was very decided, indeed--that does seem as +if--but, however, it may all come to nothing, you know.” + +“_My_ overhearings were more to the purpose than _yours_, Eliza,” said +Charlotte. “Mr. Darcy is not so well worth listening to as his friend, +is he? Poor Eliza! to be only just _tolerable_.” + +“I beg you will not put it into Lizzy’s head to be vexed by his +ill-treatment, for he is such a disagreeable man that it would be quite +a misfortune to be liked by him. Mrs. Long told me last night that he +sat close to her for half an hour without once opening his lips.” + +[Illustration: “Without once opening his lips” + +[_Copyright 1894 by George Allen._]] + +“Are you quite sure, ma’am? Is not there a little mistake?” said Jane. +“I certainly saw Mr. Darcy speaking to her.” + +“Ay, because she asked him at last how he liked Netherfield, and he +could not help answering her; but she said he seemed very angry at being +spoke to.” + +“Miss Bingley told me,” said Jane, “that he never speaks much unless +among his intimate acquaintance. With _them_ he is remarkably +agreeable.” + +“I do not believe a word of it, my dear. If he had been so very +agreeable, he would have talked to Mrs. Long. But I can guess how it +was; everybody says that he is eat up with pride, and I dare say he had +heard somehow that Mrs. Long does not keep a carriage, and had to come +to the ball in a hack chaise.” + +“I do not mind his not talking to Mrs. Long,” said Miss Lucas, “but I +wish he had danced with Eliza.” + +“Another time, Lizzy,” said her mother, “I would not dance with _him_, +if I were you.” + +“I believe, ma’am, I may safely promise you _never_ to dance with him.” + +“His pride,” said Miss Lucas, “does not offend _me_ so much as pride +often does, because there is an excuse for it. One cannot wonder that so +very fine a young man, with family, fortune, everything in his favour, +should think highly of himself. If I may so express it, he has a _right_ +to be proud.” + +“That is very true,” replied Elizabeth, “and I could easily forgive +_his_ pride, if he had not mortified _mine_.” + +“Pride,” observed Mary, who piqued herself upon the solidity of her +reflections, “is a very common failing, I believe. By all that I have +ever read, I am convinced that it is very common indeed; that human +nature is particularly prone to it, and that there are very few of us +who do not cherish a feeling of self-complacency on the score of some +quality or other, real or imaginary. Vanity and pride are different +things, though the words are often used synonymously. A person may be +proud without being vain. Pride relates more to our opinion of +ourselves; vanity to what we would have others think of us.” + +“If I were as rich as Mr. Darcy,” cried a young Lucas, who came with his +sisters, “I should not care how proud I was. I would keep a pack of +foxhounds, and drink a bottle of wine every day.” + +“Then you would drink a great deal more than you ought,” said Mrs. +Bennet; “and if I were to see you at it, I should take away your bottle +directly.” + +The boy protested that she should not; she continued to declare that she +would; and the argument ended only with the visit. + +[Illustration] + + + + +[Illustration] + + + + +CHAPTER VI. + + +[Illustration] + +The ladies of Longbourn soon waited on those of Netherfield. The visit +was returned in due form. Miss Bennet’s pleasing manners grew on the +good-will of Mrs. Hurst and Miss Bingley; and though the mother was +found to be intolerable, and the younger sisters not worth speaking to, +a wish of being better acquainted with _them_ was expressed towards the +two eldest. By Jane this attention was received with the greatest +pleasure; but Elizabeth still saw superciliousness in their treatment of +everybody, hardly excepting even her sister, and could not like them; +though their kindness to Jane, such as it was, had a value, as arising, +in all probability, from the influence of their brother’s admiration. It +was generally evident, whenever they met, that he _did_ admire her; and +to _her_ it was equally evident that Jane was yielding to the preference +which she had begun to entertain for him from the first, and was in a +way to be very much in love; but she considered with pleasure that it +was not likely to be discovered by the world in general, since Jane +united with great strength of feeling, a composure of temper and an +uniform cheerfulness of manner, which would guard her from the +suspicions of the impertinent. She mentioned this to her friend, Miss +Lucas. + +“It may, perhaps, be pleasant,” replied Charlotte, “to be able to impose +on the public in such a case; but it is sometimes a disadvantage to be +so very guarded. If a woman conceals her affection with the same skill +from the object of it, she may lose the opportunity of fixing him; and +it will then be but poor consolation to believe the world equally in the +dark. There is so much of gratitude or vanity in almost every +attachment, that it is not safe to leave any to itself. We can all +_begin_ freely--a slight preference is natural enough; but there are +very few of us who have heart enough to be really in love without +encouragement. In nine cases out of ten, a woman had better show _more_ +affection than she feels. Bingley likes your sister undoubtedly; but he +may never do more than like her, if she does not help him on.” + +“But she does help him on, as much as her nature will allow. If _I_ can +perceive her regard for him, he must be a simpleton indeed not to +discover it too.” + +“Remember, Eliza, that he does not know Jane’s disposition as you do.” + +“But if a woman is partial to a man, and does not endeavor to conceal +it, he must find it out.” + +“Perhaps he must, if he sees enough of her. But though Bingley and Jane +meet tolerably often, it is never for many hours together; and as they +always see each other in large mixed parties, it is impossible that +every moment should be employed in conversing together. Jane should +therefore make the most of every half hour in which she can command his +attention. When she is secure of him, there will be leisure for falling +in love as much as she chooses.” + +“Your plan is a good one,” replied Elizabeth, “where nothing is in +question but the desire of being well married; and if I were determined +to get a rich husband, or any husband, I dare say I should adopt it. But +these are not Jane’s feelings; she is not acting by design. As yet she +cannot even be certain of the degree of her own regard, nor of its +reasonableness. She has known him only a fortnight. She danced four +dances with him at Meryton; she saw him one morning at his own house, +and has since dined in company with him four times. This is not quite +enough to make her understand his character.” + +“Not as you represent it. Had she merely _dined_ with him, she might +only have discovered whether he had a good appetite; but you must +remember that four evenings have been also spent together--and four +evenings may do a great deal.” + +“Yes: these four evenings have enabled them to ascertain that they both +like Vingt-un better than Commerce, but with respect to any other +leading characteristic, I do not imagine that much has been unfolded.” + +“Well,” said Charlotte, “I wish Jane success with all my heart; and if +she were married to him to-morrow, I should think she had as good a +chance of happiness as if she were to be studying his character for a +twelvemonth. Happiness in marriage is entirely a matter of chance. If +the dispositions of the parties are ever so well known to each other, or +ever so similar beforehand, it does not advance their felicity in the +least. They always continue to grow sufficiently unlike afterwards to +have their share of vexation; and it is better to know as little as +possible of the defects of the person with whom you are to pass your +life.” + +“You make me laugh, Charlotte; but it is not sound. You know it is not +sound, and that you would never act in this way yourself.” + +Occupied in observing Mr. Bingley’s attention to her sister, Elizabeth +was far from suspecting that she was herself becoming an object of some +interest in the eyes of his friend. Mr. Darcy had at first scarcely +allowed her to be pretty: he had looked at her without admiration at the +ball; and when they next met, he looked at her only to criticise. But no +sooner had he made it clear to himself and his friends that she had +hardly a good feature in her face, than he began to find it was rendered +uncommonly intelligent by the beautiful expression of her dark eyes. To +this discovery succeeded some others equally mortifying. Though he had +detected with a critical eye more than one failure of perfect symmetry +in her form, he was forced to acknowledge her figure to be light and +pleasing; and in spite of his asserting that her manners were not those +of the fashionable world, he was caught by their easy playfulness. Of +this she was perfectly unaware: to her he was only the man who made +himself agreeable nowhere, and who had not thought her handsome enough +to dance with. + +He began to wish to know more of her; and, as a step towards conversing +with her himself, attended to her conversation with others. His doing so +drew her notice. It was at Sir William Lucas’s, where a large party were +assembled. + +“What does Mr. Darcy mean,” said she to Charlotte, “by listening to my +conversation with Colonel Forster?” + +“That is a question which Mr. Darcy only can answer.” + +“But if he does it any more, I shall certainly let him know that I see +what he is about. He has a very satirical eye, and if I do not begin by +being impertinent myself, I shall soon grow afraid of him.” + +[Illustration: “The entreaties of several” [_Copyright 1894 by George +Allen._]] + +On his approaching them soon afterwards, though without seeming to have +any intention of speaking, Miss Lucas defied her friend to mention such +a subject to him, which immediately provoking Elizabeth to do it, she +turned to him and said,-- + +“Did not you think, Mr. Darcy, that I expressed myself uncommonly well +just now, when I was teasing Colonel Forster to give us a ball at +Meryton?” + +“With great energy; but it is a subject which always makes a lady +energetic.” + +“You are severe on us.” + +“It will be _her_ turn soon to be teased,” said Miss Lucas. “I am going +to open the instrument, Eliza, and you know what follows.” + +“You are a very strange creature by way of a friend!--always wanting me +to play and sing before anybody and everybody! If my vanity had taken a +musical turn, you would have been invaluable; but as it is, I would +really rather not sit down before those who must be in the habit of +hearing the very best performers.” On Miss Lucas’s persevering, however, +she added, “Very well; if it must be so, it must.” And gravely glancing +at Mr. Darcy, “There is a very fine old saying, which everybody here is +of course familiar with--‘Keep your breath to cool your porridge,’--and +I shall keep mine to swell my song.” + +Her performance was pleasing, though by no means capital. After a song +or two, and before she could reply to the entreaties of several that she +would sing again, she was eagerly succeeded at the instrument by her +sister Mary, who having, in consequence of being the only plain one in +the family, worked hard for knowledge and accomplishments, was always +impatient for display. + +Mary had neither genius nor taste; and though vanity had given her +application, it had given her likewise a pedantic air and conceited +manner, which would have injured a higher degree of excellence than she +had reached. Elizabeth, easy and unaffected, had been listened to with +much more pleasure, though not playing half so well; and Mary, at the +end of a long concerto, was glad to purchase praise and gratitude by +Scotch and Irish airs, at the request of her younger sisters, who with +some of the Lucases, and two or three officers, joined eagerly in +dancing at one end of the room. + +Mr. Darcy stood near them in silent indignation at such a mode of +passing the evening, to the exclusion of all conversation, and was too +much engrossed by his own thoughts to perceive that Sir William Lucas +was his neighbour, till Sir William thus began:-- + +“What a charming amusement for young people this is, Mr. Darcy! There is +nothing like dancing, after all. I consider it as one of the first +refinements of polished societies.” + +“Certainly, sir; and it has the advantage also of being in vogue amongst +the less polished societies of the world: every savage can dance.” + +Sir William only smiled. “Your friend performs delightfully,” he +continued, after a pause, on seeing Bingley join the group; “and I doubt +not that you are an adept in the science yourself, Mr. Darcy.” + +“You saw me dance at Meryton, I believe, sir.” + +“Yes, indeed, and received no inconsiderable pleasure from the sight. Do +you often dance at St. James’s?” + +“Never, sir.” + +“Do you not think it would be a proper compliment to the place?” + +“It is a compliment which I never pay to any place if I can avoid it.” + +“You have a house in town, I conclude?” + +Mr. Darcy bowed. + +“I had once some thoughts of fixing in town myself, for I am fond of +superior society; but I did not feel quite certain that the air of +London would agree with Lady Lucas.” + +He paused in hopes of an answer: but his companion was not disposed to +make any; and Elizabeth at that instant moving towards them, he was +struck with the notion of doing a very gallant thing, and called out to +her,-- + +“My dear Miss Eliza, why are not you dancing? Mr. Darcy, you must allow +me to present this young lady to you as a very desirable partner. You +cannot refuse to dance, I am sure, when so much beauty is before you.” +And, taking her hand, he would have given it to Mr. Darcy, who, though +extremely surprised, was not unwilling to receive it, when she instantly +drew back, and said with some discomposure to Sir William,-- + +“Indeed, sir, I have not the least intention of dancing. I entreat you +not to suppose that I moved this way in order to beg for a partner.” + +Mr. Darcy, with grave propriety, requested to be allowed the honour of +her hand, but in vain. Elizabeth was determined; nor did Sir William at +all shake her purpose by his attempt at persuasion. + +“You excel so much in the dance, Miss Eliza, that it is cruel to deny me +the happiness of seeing you; and though this gentleman dislikes the +amusement in general, he can have no objection, I am sure, to oblige us +for one half hour.” + +“Mr. Darcy is all politeness,” said Elizabeth, smiling. + +“He is, indeed: but considering the inducement, my dear Miss Eliza, we +cannot wonder at his complaisance; for who would object to such a +partner?” + +Elizabeth looked archly, and turned away. Her resistance had not injured +her with the gentleman, and he was thinking of her with some +complacency, when thus accosted by Miss Bingley,-- + +“I can guess the subject of your reverie.” + +“I should imagine not.” + +“You are considering how insupportable it would be to pass many +evenings in this manner,--in such society; and, indeed, I am quite of +your opinion. I was never more annoyed! The insipidity, and yet the +noise--the nothingness, and yet the self-importance, of all these +people! What would I give to hear your strictures on them!” + +“Your conjecture is totally wrong, I assure you. My mind was more +agreeably engaged. I have been meditating on the very great pleasure +which a pair of fine eyes in the face of a pretty woman can bestow.” + +Miss Bingley immediately fixed her eyes on his face, and desired he +would tell her what lady had the credit of inspiring such reflections. +Mr. Darcy replied, with great intrepidity,-- + +“Miss Elizabeth Bennet.” + +“Miss Elizabeth Bennet!” repeated Miss Bingley. “I am all astonishment. +How long has she been such a favourite? and pray when am I to wish you +joy?” + +“That is exactly the question which I expected you to ask. A lady’s +imagination is very rapid; it jumps from admiration to love, from love +to matrimony, in a moment. I knew you would be wishing me joy.” + +“Nay, if you are so serious about it, I shall consider the matter as +absolutely settled. You will have a charming mother-in-law, indeed, and +of course she will be always at Pemberley with you.” + +He listened to her with perfect indifference, while she chose to +entertain herself in this manner; and as his composure convinced her +that all was safe, her wit flowed along. + + + + +[Illustration: + + “A note for Miss Bennet” + +[_Copyright 1894 by George Allen._]] + + + + +CHAPTER VII. + + +[Illustration] + +Mr. Bennet’s property consisted almost entirely in an estate of two +thousand a year, which, unfortunately for his daughters, was entailed, +in default of heirs male, on a distant relation; and their mother’s +fortune, though ample for her situation in life, could but ill supply +the deficiency of his. Her father had been an attorney in Meryton, and +had left her four thousand pounds. + +She had a sister married to a Mr. Philips, who had been a clerk to their +father and succeeded him in the business, and a brother settled in +London in a respectable line of trade. + +The village of Longbourn was only one mile from Meryton; a most +convenient distance for the young ladies, who were usually tempted +thither three or four times a week, to pay their duty to their aunt, and +to a milliner’s shop just over the way. The two youngest of the family, +Catherine and Lydia, were particularly frequent in these attentions: +their minds were more vacant than their sisters’, and when nothing +better offered, a walk to Meryton was necessary to amuse their morning +hours and furnish conversation for the evening; and, however bare of +news the country in general might be, they always contrived to learn +some from their aunt. At present, indeed, they were well supplied both +with news and happiness by the recent arrival of a militia regiment in +the neighbourhood; it was to remain the whole winter, and Meryton was +the head-quarters. + +Their visits to Mrs. Philips were now productive of the most interesting +intelligence. Every day added something to their knowledge of the +officers’ names and connections. Their lodgings were not long a secret, +and at length they began to know the officers themselves. Mr. Philips +visited them all, and this opened to his nieces a source of felicity +unknown before. They could talk of nothing but officers; and Mr. +Bingley’s large fortune, the mention of which gave animation to their +mother, was worthless in their eyes when opposed to the regimentals of +an ensign. + +After listening one morning to their effusions on this subject, Mr. +Bennet coolly observed,-- + +“From all that I can collect by your manner of talking, you must be two +of the silliest girls in the country. I have suspected it some time, but +I am now convinced.” + +Catherine was disconcerted, and made no answer; but Lydia, with perfect +indifference, continued to express her admiration of Captain Carter, and +her hope of seeing him in the course of the day, as he was going the +next morning to London. + +“I am astonished, my dear,” said Mrs. Bennet, “that you should be so +ready to think your own children silly. If I wished to think slightingly +of anybody’s children, it should not be of my own, however.” + +“If my children are silly, I must hope to be always sensible of it.” + +“Yes; but as it happens, they are all of them very clever.” + +“This is the only point, I flatter myself, on which we do not agree. I +had hoped that our sentiments coincided in every particular, but I must +so far differ from you as to think our two youngest daughters uncommonly +foolish.” + +“My dear Mr. Bennet, you must not expect such girls to have the sense of +their father and mother. When they get to our age, I dare say they will +not think about officers any more than we do. I remember the time when I +liked a red coat myself very well--and, indeed, so I do still at my +heart; and if a smart young colonel, with five or six thousand a year, +should want one of my girls, I shall not say nay to him; and I thought +Colonel Forster looked very becoming the other night at Sir William’s in +his regimentals.” + +“Mamma,” cried Lydia, “my aunt says that Colonel Forster and Captain +Carter do not go so often to Miss Watson’s as they did when they first +came; she sees them now very often standing in Clarke’s library.” + +Mrs. Bennet was prevented replying by the entrance of the footman with a +note for Miss Bennet; it came from Netherfield, and the servant waited +for an answer. Mrs. Bennet’s eyes sparkled with pleasure, and she was +eagerly calling out, while her daughter read,-- + +“Well, Jane, who is it from? What is it about? What does he say? Well, +Jane, make haste and tell us; make haste, my love.” + +“It is from Miss Bingley,” said Jane, and then read it aloud. + + /* NIND “My dear friend, */ + + “If you are not so compassionate as to dine to-day with Louisa and + me, we shall be in danger of hating each other for the rest of our + lives; for a whole day’s _tête-à -tête_ between two women can never + end without a quarrel. Come as soon as you can on the receipt of + this. My brother and the gentlemen are to dine with the officers. + Yours ever, + +“CAROLINE BINGLEY.” + +“With the officers!” cried Lydia: “I wonder my aunt did not tell us of +_that_.” + +“Dining out,” said Mrs. Bennet; “that is very unlucky.” + +“Can I have the carriage?” said Jane. + +“No, my dear, you had better go on horseback, because it seems likely to +rain; and then you must stay all night.” + +“That would be a good scheme,” said Elizabeth, “if you were sure that +they would not offer to send her home.” + +“Oh, but the gentlemen will have Mr. Bingley’s chaise to go to Meryton; +and the Hursts have no horses to theirs.” + +“I had much rather go in the coach.” + +“But, my dear, your father cannot spare the horses, I am sure. They are +wanted in the farm, Mr. Bennet, are not they?” + +[Illustration: Cheerful prognostics] + +“They are wanted in the farm much oftener than I can get them.” + +“But if you have got them to-day,” said Elizabeth, “my mother’s purpose +will be answered.” + +She did at last extort from her father an acknowledgment that the horses +were engaged; Jane was therefore obliged to go on horseback, and her +mother attended her to the door with many cheerful prognostics of a bad +day. Her hopes were answered; Jane had not been gone long before it +rained hard. Her sisters were uneasy for her, but her mother was +delighted. The rain continued the whole evening without intermission; +Jane certainly could not come back. + +“This was a lucky idea of mine, indeed!” said Mrs. Bennet, more than +once, as if the credit of making it rain were all her own. Till the next +morning, however, she was not aware of all the felicity of her +contrivance. Breakfast was scarcely over when a servant from Netherfield +brought the following note for Elizabeth:-- + + /* NIND “My dearest Lizzie, */ + + “I find myself very unwell this morning, which, I suppose, is to be + imputed to my getting wet through yesterday. My kind friends will + not hear of my returning home till I am better. They insist also on + my seeing Mr. Jones--therefore do not be alarmed if you should hear + of his having been to me--and, excepting a sore throat and a + headache, there is not much the matter with me. + +“Yours, etc.” + +“Well, my dear,” said Mr. Bennet, when Elizabeth had read the note +aloud, “if your daughter should have a dangerous fit of illness--if she +should die--it would be a comfort to know that it was all in pursuit of +Mr. Bingley, and under your orders.” + +“Oh, I am not at all afraid of her dying. People do not die of little +trifling colds. She will be taken good care of. As long as she stays +there, it is all very well. I would go and see her if I could have the +carriage.” + +Elizabeth, feeling really anxious, determined to go to her, though the +carriage was not to be had: and as she was no horsewoman, walking was +her only alternative. She declared her resolution. + +“How can you be so silly,” cried her mother, “as to think of such a +thing, in all this dirt! You will not be fit to be seen when you get +there.” + +“I shall be very fit to see Jane--which is all I want.” + +“Is this a hint to me, Lizzy,” said her father, “to send for the +horses?” + +“No, indeed. I do not wish to avoid the walk. The distance is nothing, +when one has a motive; only three miles. I shall be back by dinner.” + +“I admire the activity of your benevolence,” observed Mary, “but every +impulse of feeling should be guided by reason; and, in my opinion, +exertion should always be in proportion to what is required.” + +“We will go as far as Meryton with you,” said Catherine and Lydia. +Elizabeth accepted their company, and the three young ladies set off +together. + +“If we make haste,” said Lydia, as they walked along, “perhaps we may +see something of Captain Carter, before he goes.” + +In Meryton they parted: the two youngest repaired to the lodgings of one +of the officers’ wives, and Elizabeth continued her walk alone, crossing +field after field at a quick pace, jumping over stiles and springing +over puddles, with impatient activity, and finding herself at last +within view of the house, with weary ancles, dirty stockings, and a face +glowing with the warmth of exercise. + +She was shown into the breakfast parlour, where all but Jane were +assembled, and where her appearance created a great deal of surprise. +That she should have walked three miles so early in the day in such +dirty weather, and by herself, was almost incredible to Mrs. Hurst and +Miss Bingley; and Elizabeth was convinced that they held her in contempt +for it. She was received, however, very politely by them; and in their +brother’s manners there was something better than politeness--there was +good-humour and kindness. Mr. Darcy said very little, and Mr. Hurst +nothing at all. The former was divided between admiration of the +brilliancy which exercise had given to her complexion and doubt as to +the occasion’s justifying her coming so far alone. The latter was +thinking only of his breakfast. + +Her inquiries after her sister were not very favourably answered. Miss +Bennet had slept ill, and though up, was very feverish, and not well +enough to leave her room. Elizabeth was glad to be taken to her +immediately; and Jane, who had only been withheld by the fear of giving +alarm or inconvenience, from expressing in her note how much she longed +for such a visit, was delighted at her entrance. She was not equal, +however, to much conversation; and when Miss Bingley left them together, +could attempt little beside expressions of gratitude for the +extraordinary kindness she was treated with. Elizabeth silently attended +her. + +When breakfast was over, they were joined by the sisters; and Elizabeth +began to like them herself, when she saw how much affection and +solicitude they showed for Jane. The apothecary came; and having +examined his patient, said, as might be supposed, that she had caught a +violent cold, and that they must endeavour to get the better of it; +advised her to return to bed, and promised her some draughts. The advice +was followed readily, for the feverish symptoms increased, and her head +ached acutely. Elizabeth did not quit her room for a moment, nor were +the other ladies often absent; the gentlemen being out, they had in fact +nothing to do elsewhere. + +When the clock struck three, Elizabeth felt that she must go, and very +unwillingly said so. Miss Bingley offered her the carriage, and she only +wanted a little pressing to accept it, when Jane testified such concern +at parting with her that Miss Bingley was obliged to convert the offer +of the chaise into an invitation to remain at Netherfield for the +present. Elizabeth most thankfully consented, and a servant was +despatched to Longbourn, to acquaint the family with her stay, and bring +back a supply of clothes. + +[Illustration: + +“The Apothecary came” +] + + + + +[Illustration: + +“covering a screen” +] + + + + +CHAPTER VIII. + + +[Illustration] + +At five o’clock the two ladies retired to dress, and at half-past six +Elizabeth was summoned to dinner. To the civil inquiries which then +poured in, and amongst which she had the pleasure of distinguishing the +much superior solicitude of Mr. Bingley, she could not make a very +favourable answer. Jane was by no means better. The sisters, on hearing +this, repeated three or four times how much they were grieved, how +shocking it was to have a bad cold, and how excessively they disliked +being ill themselves; and then thought no more of the matter: and their +indifference towards Jane, when not immediately before them, restored +Elizabeth to the enjoyment of all her original dislike. + +Their brother, indeed, was the only one of the party whom she could +regard with any complacency. His anxiety for Jane was evident, and his +attentions to herself most pleasing; and they prevented her feeling +herself so much an intruder as she believed she was considered by the +others. She had very little notice from any but him. Miss Bingley was +engrossed by Mr. Darcy, her sister scarcely less so; and as for Mr. +Hurst, by whom Elizabeth sat, he was an indolent man, who lived only to +eat, drink, and play at cards, who, when he found her prefer a plain +dish to a ragout, had nothing to say to her. + +When dinner was over, she returned directly to Jane, and Miss Bingley +began abusing her as soon as she was out of the room. Her manners were +pronounced to be very bad indeed,--a mixture of pride and impertinence: +she had no conversation, no style, no taste, no beauty. Mrs. Hurst +thought the same, and added,-- + +“She has nothing, in short, to recommend her, but being an excellent +walker. I shall never forget her appearance this morning. She really +looked almost wild.” + +“She did indeed, Louisa. I could hardly keep my countenance. Very +nonsensical to come at all! Why must _she_ be scampering about the +country, because her sister had a cold? Her hair so untidy, so blowzy!” + +“Yes, and her petticoat; I hope you saw her petticoat, six inches deep +in mud, I am absolutely certain, and the gown which had been let down to +hide it not doing its office.” + +“Your picture may be very exact, Louisa,” said Bingley; “but this was +all lost upon me. I thought Miss Elizabeth Bennet looked remarkably well +when she came into the room this morning. Her dirty petticoat quite +escaped my notice.” + +“_You_ observed it, Mr. Darcy, I am sure,” said Miss Bingley; “and I am +inclined to think that you would not wish to see _your sister_ make such +an exhibition.” + +“Certainly not.” + +“To walk three miles, or four miles, or five miles, or whatever it is, +above her ancles in dirt, and alone, quite alone! what could she mean by +it? It seems to me to show an abominable sort of conceited independence, +a most country-town indifference to decorum.” + +“It shows an affection for her sister that is very pleasing,” said +Bingley. + +“I am afraid, Mr. Darcy,” observed Miss Bingley, in a half whisper, +“that this adventure has rather affected your admiration of her fine +eyes.” + +“Not at all,” he replied: “they were brightened by the exercise.” A +short pause followed this speech, and Mrs. Hurst began again,-- + +“I have an excessive regard for Jane Bennet,--she is really a very sweet +girl,--and I wish with all my heart she were well settled. But with such +a father and mother, and such low connections, I am afraid there is no +chance of it.” + +“I think I have heard you say that their uncle is an attorney in +Meryton?” + +“Yes; and they have another, who lives somewhere near Cheapside.” + +“That is capital,” added her sister; and they both laughed heartily. + +“If they had uncles enough to fill _all_ Cheapside,” cried Bingley, “it +would not make them one jot less agreeable.” + +“But it must very materially lessen their chance of marrying men of any +consideration in the world,” replied Darcy. + +To this speech Bingley made no answer; but his sisters gave it their +hearty assent, and indulged their mirth for some time at the expense of +their dear friend’s vulgar relations. + +With a renewal of tenderness, however, they repaired to her room on +leaving the dining-parlour, and sat with her till summoned to coffee. +She was still very poorly, and Elizabeth would not quit her at all, till +late in the evening, when she had the comfort of seeing her asleep, and +when it appeared to her rather right than pleasant that she should go +down stairs herself. On entering the drawing-room, she found the whole +party at loo, and was immediately invited to join them; but suspecting +them to be playing high, she declined it, and making her sister the +excuse, said she would amuse herself, for the short time she could stay +below, with a book. Mr. Hurst looked at her with astonishment. + +“Do you prefer reading to cards?” said he; “that is rather singular.” + +“Miss Eliza Bennet,” said Miss Bingley, “despises cards. She is a great +reader, and has no pleasure in anything else.” + +“I deserve neither such praise nor such censure,” cried Elizabeth; “I +am _not_ a great reader, and I have pleasure in many things.” + +“In nursing your sister I am sure you have pleasure,” said Bingley; “and +I hope it will soon be increased by seeing her quite well.” + +Elizabeth thanked him from her heart, and then walked towards a table +where a few books were lying. He immediately offered to fetch her +others; all that his library afforded. + +“And I wish my collection were larger for your benefit and my own +credit; but I am an idle fellow; and though I have not many, I have more +than I ever looked into.” + +Elizabeth assured him that she could suit herself perfectly with those +in the room. + +“I am astonished,” said Miss Bingley, “that my father should have left +so small a collection of books. What a delightful library you have at +Pemberley, Mr. Darcy!” + +“It ought to be good,” he replied: “it has been the work of many +generations.” + +“And then you have added so much to it yourself--you are always buying +books.” + +“I cannot comprehend the neglect of a family library in such days as +these.” + +“Neglect! I am sure you neglect nothing that can add to the beauties of +that noble place. Charles, when you build _your_ house, I wish it may be +half as delightful as Pemberley.” + +“I wish it may.” + +“But I would really advise you to make your purchase in that +neighbourhood, and take Pemberley for a kind of model. There is not a +finer county in England than Derbyshire.” + +“With all my heart: I will buy Pemberley itself, if Darcy will sell it.” + +“I am talking of possibilities, Charles.” + +“Upon my word, Caroline, I should think it more possible to get +Pemberley by purchase than by imitation.” + +Elizabeth was so much caught by what passed, as to leave her very little +attention for her book; and, soon laying it wholly aside, she drew near +the card-table, and stationed herself between Mr. Bingley and his eldest +sister, to observe the game. + +“Is Miss Darcy much grown since the spring?” said Miss Bingley: “will +she be as tall as I am?” + +“I think she will. She is now about Miss Elizabeth Bennet’s height, or +rather taller.” + +“How I long to see her again! I never met with anybody who delighted me +so much. Such a countenance, such manners, and so extremely accomplished +for her age! Her performance on the pianoforte is exquisite.” + +“It is amazing to me,” said Bingley, “how young ladies can have patience +to be so very accomplished as they all are.” + +“All young ladies accomplished! My dear Charles, what do you mean?” + +“Yes, all of them, I think. They all paint tables, cover screens, and +net purses. I scarcely know any one who cannot do all this; and I am +sure I never heard a young lady spoken of for the first time, without +being informed that she was very accomplished.” + +“Your list of the common extent of accomplishments,” said Darcy, “has +too much truth. The word is applied to many a woman who deserves it no +otherwise than by netting a purse or covering a screen; but I am very +far from agreeing with you in your estimation of ladies in general. I +cannot boast of knowing more than half-a-dozen in the whole range of my +acquaintance that are really accomplished.” + +“Nor I, I am sure,” said Miss Bingley. + +“Then,” observed Elizabeth, “you must comprehend a great deal in your +idea of an accomplished woman.” + +“Yes; I do comprehend a great deal in it.” + +“Oh, certainly,” cried his faithful assistant, “no one can be really +esteemed accomplished who does not greatly surpass what is usually met +with. A woman must have a thorough knowledge of music, singing, drawing, +dancing, and the modern languages, to deserve the word; and, besides all +this, she must possess a certain something in her air and manner of +walking, the tone of her voice, her address and expressions, or the word +will be but half deserved.” + +“All this she must possess,” added Darcy; “and to all she must yet add +something more substantial in the improvement of her mind by extensive +reading.” + +“I am no longer surprised at your knowing _only_ six accomplished women. +I rather wonder now at your knowing _any_.” + +“Are you so severe upon your own sex as to doubt the possibility of all +this?” + +“_I_ never saw such a woman. _I_ never saw such capacity, and taste, and +application, and elegance, as you describe, united.” + +Mrs. Hurst and Miss Bingley both cried out against the injustice of her +implied doubt, and were both protesting that they knew many women who +answered this description, when Mr. Hurst called them to order, with +bitter complaints of their inattention to what was going forward. As all +conversation was thereby at an end, Elizabeth soon afterwards left the +room. + +“Eliza Bennet,” said Miss Bingley, when the door was closed on her, “is +one of those young ladies who seek to recommend themselves to the other +sex by undervaluing their own; and with many men, I daresay, it +succeeds; but, in my opinion, it is a paltry device, a very mean art.” + +“Undoubtedly,” replied Darcy, to whom this remark was chiefly addressed, +“there is meanness in _all_ the arts which ladies sometimes condescend +to employ for captivation. Whatever bears affinity to cunning is +despicable.” + +Miss Bingley was not so entirely satisfied with this reply as to +continue the subject. + +Elizabeth joined them again only to say that her sister was worse, and +that she could not leave her. Bingley urged Mr. Jones’s being sent for +immediately; while his sisters, convinced that no country advice could +be of any service, recommended an express to town for one of the most +eminent physicians. This she would not hear of; but she was not so +unwilling to comply with their brother’s proposal; and it was settled +that Mr. Jones should be sent for early in the morning, if Miss Bennet +were not decidedly better. Bingley was quite uncomfortable; his sisters +declared that they were miserable. They solaced their wretchedness, +however, by duets after supper; while he could find no better relief to +his feelings than by giving his housekeeper directions that every +possible attention might be paid to the sick lady and her sister. + + + + +[Illustration: + +M^{rs} Bennet and her two youngest girls + +[_Copyright 1894 by George Allen._]] + + + + +CHAPTER IX. + + +[Illustration] + +Elizabeth passed the chief of the night in her sister’s room, and in the +morning had the pleasure of being able to send a tolerable answer to the +inquiries which she very early received from Mr. Bingley by a housemaid, +and some time afterwards from the two elegant ladies who waited on his +sisters. In spite of this amendment, however, she requested to have a +note sent to Longbourn, desiring her mother to visit Jane, and form her +own judgment of her situation. The note was immediately despatched, and +its contents as quickly complied with. Mrs. Bennet, accompanied by her +two youngest girls, reached Netherfield soon after the family breakfast. + +Had she found Jane in any apparent danger, Mrs. Bennet would have been +very miserable; but being satisfied on seeing her that her illness was +not alarming, she had no wish of her recovering immediately, as her +restoration to health would probably remove her from Netherfield. She +would not listen, therefore, to her daughter’s proposal of being carried +home; neither did the apothecary, who arrived about the same time, think +it at all advisable. After sitting a little while with Jane, on Miss +Bingley’s appearance and invitation, the mother and three daughters all +attended her into the breakfast parlour. Bingley met them with hopes +that Mrs. Bennet had not found Miss Bennet worse than she expected. + +“Indeed I have, sir,” was her answer. “She is a great deal too ill to be +moved. Mr. Jones says we must not think of moving her. We must trespass +a little longer on your kindness.” + +“Removed!” cried Bingley. “It must not be thought of. My sister, I am +sure, will not hear of her removal.” + +“You may depend upon it, madam,” said Miss Bingley, with cold civility, +“that Miss Bennet shall receive every possible attention while she +remains with us.” + +Mrs. Bennet was profuse in her acknowledgments. + +“I am sure,” she added, “if it was not for such good friends, I do not +know what would become of her, for she is very ill indeed, and suffers a +vast deal, though with the greatest patience in the world, which is +always the way with her, for she has, without exception, the sweetest +temper I ever met with. I often tell my other girls they are nothing to +_her_. You have a sweet room here, Mr. Bingley, and a charming prospect +over that gravel walk. I do not know a place in the country that is +equal to Netherfield. You will not think of quitting it in a hurry, I +hope, though you have but a short lease.” + +“Whatever I do is done in a hurry,” replied he; “and therefore if I +should resolve to quit Netherfield, I should probably be off in five +minutes. At present, however, I consider myself as quite fixed here.” + +“That is exactly what I should have supposed of you,” said Elizabeth. + +“You begin to comprehend me, do you?” cried he, turning towards her. + +“Oh yes--I understand you perfectly.” + +“I wish I might take this for a compliment; but to be so easily seen +through, I am afraid, is pitiful.” + +“That is as it happens. It does not necessarily follow that a deep, +intricate character is more or less estimable than such a one as yours.” + +“Lizzy,” cried her mother, “remember where you are, and do not run on in +the wild manner that you are suffered to do at home.” + +“I did not know before,” continued Bingley, immediately, “that you were +a studier of character. It must be an amusing study.” + +“Yes; but intricate characters are the _most_ amusing. They have at +least that advantage.” + +“The country,” said Darcy, “can in general supply but few subjects for +such a study. In a country neighbourhood you move in a very confined and +unvarying society.” + +“But people themselves alter so much, that there is something new to be +observed in them for ever.” + +“Yes, indeed,” cried Mrs. Bennet, offended by his manner of mentioning a +country neighbourhood. “I assure you there is quite as much of _that_ +going on in the country as in town.” + +Everybody was surprised; and Darcy, after looking at her for a moment, +turned silently away. Mrs. Bennet, who fancied she had gained a complete +victory over him, continued her triumph,-- + +“I cannot see that London has any great advantage over the country, for +my part, except the shops and public places. The country is a vast deal +pleasanter, is not it, Mr. Bingley?” + +“When I am in the country,” he replied, “I never wish to leave it; and +when I am in town, it is pretty much the same. They have each their +advantages, and I can be equally happy in either.” + +“Ay, that is because you have the right disposition. But that +gentleman,” looking at Darcy, “seemed to think the country was nothing +at all.” + +“Indeed, mamma, you are mistaken,” said Elizabeth, blushing for her +mother. “You quite mistook Mr. Darcy. He only meant that there was not +such a variety of people to be met with in the country as in town, which +you must acknowledge to be true.” + +“Certainly, my dear, nobody said there were; but as to not meeting with +many people in this neighbourhood, I believe there are few +neighbourhoods larger. I know we dine with four-and-twenty families.” + +Nothing but concern for Elizabeth could enable Bingley to keep his +countenance. His sister was less delicate, and directed her eye towards +Mr. Darcy with a very expressive smile. Elizabeth, for the sake of +saying something that might turn her mother’s thoughts, now asked her if +Charlotte Lucas had been at Longbourn since _her_ coming away. + +“Yes, she called yesterday with her father. What an agreeable man Sir +William is, Mr. Bingley--is not he? so much the man of fashion! so +genteel and so easy! He has always something to say to everybody. _That_ +is my idea of good breeding; and those persons who fancy themselves very +important and never open their mouths quite mistake the matter.” + +“Did Charlotte dine with you?” + +“No, she would go home. I fancy she was wanted about the mince-pies. For +my part, Mr. Bingley, _I_ always keep servants that can do their own +work; _my_ daughters are brought up differently. But everybody is to +judge for themselves, and the Lucases are a very good sort of girls, I +assure you. It is a pity they are not handsome! Not that _I_ think +Charlotte so _very_ plain; but then she is our particular friend.” + +“She seems a very pleasant young woman,” said Bingley. + +“Oh dear, yes; but you must own she is very plain. Lady Lucas herself +has often said so, and envied me Jane’s beauty. I do not like to boast +of my own child; but to be sure, Jane--one does not often see anybody +better looking. It is what everybody says. I do not trust my own +partiality. When she was only fifteen there was a gentleman at my +brother Gardiner’s in town so much in love with her, that my +sister-in-law was sure he would make her an offer before we came away. +But, however, he did not. Perhaps he thought her too young. However, he +wrote some verses on her, and very pretty they were.” + +“And so ended his affection,” said Elizabeth, impatiently. “There has +been many a one, I fancy, overcome in the same way. I wonder who first +discovered the efficacy of poetry in driving away love!” + +“I have been used to consider poetry as the _food_ of love,” said Darcy. + +“Of a fine, stout, healthy love it may. Everything nourishes what is +strong already. But if it be only a slight, thin sort of inclination, I +am convinced that one good sonnet will starve it entirely away.” + +Darcy only smiled; and the general pause which ensued made Elizabeth +tremble lest her mother should be exposing herself again. She longed to +speak, but could think of nothing to say; and after a short silence Mrs. +Bennet began repeating her thanks to Mr. Bingley for his kindness to +Jane, with an apology for troubling him also with Lizzy. Mr. Bingley was +unaffectedly civil in his answer, and forced his younger sister to be +civil also, and say what the occasion required. She performed her part, +indeed, without much graciousness, but Mrs. Bennet was satisfied, and +soon afterwards ordered her carriage. Upon this signal, the youngest of +her daughters put herself forward. The two girls had been whispering to +each other during the whole visit; and the result of it was, that the +youngest should tax Mr. Bingley with having promised on his first coming +into the country to give a ball at Netherfield. + +Lydia was a stout, well-grown girl of fifteen, with a fine complexion +and good-humoured countenance; a favourite with her mother, whose +affection had brought her into public at an early age. She had high +animal spirits, and a sort of natural self-consequence, which the +attentions of the officers, to whom her uncle’s good dinners and her +own easy manners recommended her, had increased into assurance. She was +very equal, therefore, to address Mr. Bingley on the subject of the +ball, and abruptly reminded him of his promise; adding, that it would be +the most shameful thing in the world if he did not keep it. His answer +to this sudden attack was delightful to her mother’s ear. + +“I am perfectly ready, I assure you, to keep my engagement; and, when +your sister is recovered, you shall, if you please, name the very day of +the ball. But you would not wish to be dancing while she is ill?” + +Lydia declared herself satisfied. “Oh yes--it would be much better to +wait till Jane was well; and by that time, most likely, Captain Carter +would be at Meryton again. And when you have given _your_ ball,” she +added, “I shall insist on their giving one also. I shall tell Colonel +Forster it will be quite a shame if he does not.” + +Mrs. Bennet and her daughters then departed, and Elizabeth returned +instantly to Jane, leaving her own and her relations’ behaviour to the +remarks of the two ladies and Mr. Darcy; the latter of whom, however, +could not be prevailed on to join in their censure of _her_, in spite of +all Miss Bingley’s witticisms on _fine eyes_. + + + + +[Illustration] + + + + +CHAPTER X. + + +[Illustration] + +The day passed much as the day before had done. Mrs. Hurst and Miss +Bingley had spent some hours of the morning with the invalid, who +continued, though slowly, to mend; and, in the evening, Elizabeth joined +their party in the drawing-room. The loo table, however, did not appear. +Mr. Darcy was writing, and Miss Bingley, seated near him, was watching +the progress of his letter, and repeatedly calling off his attention by +messages to his sister. Mr. Hurst and Mr. Bingley were at piquet, and +Mrs. Hurst was observing their game. + +Elizabeth took up some needlework, and was sufficiently amused in +attending to what passed between Darcy and his companion. The perpetual +commendations of the lady either on his hand-writing, or on the evenness +of his lines, or on the length of his letter, with the perfect unconcern +with which her praises were received, formed a curious dialogue, and was +exactly in unison with her opinion of each. + +“How delighted Miss Darcy will be to receive such a letter!” + +He made no answer. + +“You write uncommonly fast.” + +“You are mistaken. I write rather slowly.” + +“How many letters you must have occasion to write in the course of a +year! Letters of business, too! How odious I should think them!” + +“It is fortunate, then, that they fall to my lot instead of to yours.” + +“Pray tell your sister that I long to see her.” + +“I have already told her so once, by your desire.” + +“I am afraid you do not like your pen. Let me mend it for you. I mend +pens remarkably well.” + +“Thank you--but I always mend my own.” + +“How can you contrive to write so even?” + +He was silent. + +“Tell your sister I am delighted to hear of her improvement on the harp, +and pray let her know that I am quite in raptures with her beautiful +little design for a table, and I think it infinitely superior to Miss +Grantley’s.” + +“Will you give me leave to defer your raptures till I write again? At +present I have not room to do them justice.” + +“Oh, it is of no consequence. I shall see her in January. But do you +always write such charming long letters to her, Mr. Darcy?” + +“They are generally long; but whether always charming, it is not for me +to determine.” + +“It is a rule with me, that a person who can write a long letter with +ease cannot write ill.” + +“That will not do for a compliment to Darcy, Caroline,” cried her +brother, “because he does _not_ write with ease. He studies too much +for words of four syllables. Do not you, Darcy?” + +“My style of writing is very different from yours.” + +“Oh,” cried Miss Bingley, “Charles writes in the most careless way +imaginable. He leaves out half his words, and blots the rest.” + +“My ideas flow so rapidly that I have not time to express them; by which +means my letters sometimes convey no ideas at all to my correspondents.” + +“Your humility, Mr. Bingley,” said Elizabeth, “must disarm reproof.” + +“Nothing is more deceitful,” said Darcy, “than the appearance of +humility. It is often only carelessness of opinion, and sometimes an +indirect boast.” + +“And which of the two do you call _my_ little recent piece of modesty?” + +“The indirect boast; for you are really proud of your defects in +writing, because you consider them as proceeding from a rapidity of +thought and carelessness of execution, which, if not estimable, you +think at least highly interesting. The power of doing anything with +quickness is always much prized by the possessor, and often without any +attention to the imperfection of the performance. When you told Mrs. +Bennet this morning, that if you ever resolved on quitting Netherfield +you should be gone in five minutes, you meant it to be a sort of +panegyric, of compliment to yourself; and yet what is there so very +laudable in a precipitance which must leave very necessary business +undone, and can be of no real advantage to yourself or anyone else?” + + + CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO. + TOOKS COURT, CHANCERY LANE, LONDON. + + +*** END OF THE PROJECT GUTENBERG EBOOK 1342 *** diff --git a/tests/dummy/data/transformers.py b/tests/dummy/data/transformers.py new file mode 100644 index 0000000..7d8911b --- /dev/null +++ b/tests/dummy/data/transformers.py @@ -0,0 +1,50 @@ +from typing import Iterable + +from datasets import ( # type: ignore + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, +) + + +def create_sample_dataset( + column: str = "text", pattern: str = "sample text {}" +) -> Dataset: + return Dataset.from_dict({column: [pattern.format(ind) for ind in range(1, 4)]}) + + +def create_sample_iterable_dataset( + column: str = "text", pattern: str = "sample text {}" +) -> IterableDataset: + def _generator(): + for ind in range(1, 4): + yield {column: pattern.format(ind)} + + return IterableDataset.from_generator(_generator) + + +def create_sample_dataset_dict( + splits: Iterable[str] = ("train", "test"), + column: str = "text", + pattern: str = "sample text {}", +): + return DatasetDict( + { + split: create_sample_dataset(column=column, pattern=pattern) + for split in splits + } + ) + + +def create_sample_iterable_dataset_dict( + splits: Iterable[str] = ("train", "test"), + column: str = "text", + pattern: str = "sample text {}", +): + return IterableDatasetDict( + { + split: create_sample_iterable_dataset(column=column, pattern=pattern) + for split in splits + } + ) diff --git a/tests/dummy/services/requests.py b/tests/dummy/services/requests.py index 3bb8152..c502318 100644 --- a/tests/dummy/services/requests.py +++ b/tests/dummy/services/requests.py @@ -1,5 +1,7 @@ +from typing import Optional + from guidellm.core import TextGenerationRequest -from guidellm.request import RequestGenerator +from guidellm.request import GenerationMode, RequestGenerator class TestRequestGenerator(RequestGenerator): @@ -8,5 +10,19 @@ class TestRequestGenerator(RequestGenerator): The purpose - to be used for testing. """ + def __init__( + self, + tokenizer: Optional[str] = None, + mode: GenerationMode = "async", + async_queue_size: int = 50, + ): + super().__init__( + type_="test", + source="test", + tokenizer=tokenizer, + mode=mode, + async_queue_size=async_queue_size, + ) + def create_item(self) -> TextGenerationRequest: return TextGenerationRequest(prompt="Test prompt") diff --git a/tests/e2e/core/__init__.py b/tests/e2e/cli/__init__.py similarity index 100% rename from tests/e2e/core/__init__.py rename to tests/e2e/cli/__init__.py diff --git a/tests/unit/cli/conftest.py b/tests/e2e/cli/conftest.py similarity index 100% rename from tests/unit/cli/conftest.py rename to tests/e2e/cli/conftest.py diff --git a/tests/unit/cli/test_application_entrypoint.py b/tests/e2e/cli/test_application_entrypoint.py similarity index 93% rename from tests/unit/cli/test_application_entrypoint.py rename to tests/e2e/cli/test_application_entrypoint.py index 1ac3bc1..e555bb0 100644 --- a/tests/unit/cli/test_application_entrypoint.py +++ b/tests/e2e/cli/test_application_entrypoint.py @@ -3,6 +3,7 @@ import pytest from click.testing import CliRunner + from guidellm.main import main @@ -23,7 +24,11 @@ def test_main_cli_overrided( ["--target", "localhost:9000", "--backend", "test", "--rate-type", "sweep"], ) default_main_kwargs.update( - {"target": "localhost:9000", "backend": "test", "rate_type": "sweep"} + { + "target": "localhost:9000", + "backend": "test", + "rate_type": "sweep", + } ) assert patch_main.call_count == 1 diff --git a/tests/unit/cli/test_main_validation.py b/tests/e2e/cli/test_main_validation.py similarity index 99% rename from tests/unit/cli/test_main_validation.py rename to tests/e2e/cli/test_main_validation.py index 73196c2..db3d49b 100644 --- a/tests/unit/cli/test_main_validation.py +++ b/tests/e2e/cli/test_main_validation.py @@ -1,4 +1,5 @@ import pytest + from guidellm.main import main diff --git a/tests/integration/backend/__init__.py b/tests/integration/backend/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/integration/backend/test_openai_backend_submit.py b/tests/integration/backend/test_openai_backend_submit.py deleted file mode 100644 index 77c20c1..0000000 --- a/tests/integration/backend/test_openai_backend_submit.py +++ /dev/null @@ -1,71 +0,0 @@ -import time -from typing import Callable - -import pytest -import requests -from guidellm.backend import OpenAIBackend -from guidellm.config import settings -from guidellm.core import TextGenerationRequest, TextGenerationResult -from openai.pagination import SyncPage -from openai.types import Model - - -@pytest.fixture(scope="session", autouse=True) -def _openai_server_healthcheck(): - """ - Check if the openai server is running - """ - - if not (openai_server := settings.openai.base_url): - raise ValueError( - "Integration backend tests can't be run without " - "GUIDELLM__OPENAI__BASE_URL specified", - ) - - try: - requests.get(openai_server, timeout=10) - except requests.ConnectionError: - raise SystemExit( - "Integration backend tests can't be run without " - f"OpenAI compatible server running. Please check the {openai_server}", - ) from None - - -@pytest.mark.skip("OpenAI compatible service is not deployed yet") -@pytest.mark.sanity() -def test_openai_submit_request( - mocker, - openai_backend_factory: Callable[..., OpenAIBackend], -): - """ - Check the OpenAI making request and checking the results. - - Check if the total time that is stored in the TextGenerationResult corresponds - to the real execution time - """ - - openai_resources_models_list_patch = mocker.patch( - "openai.resources.models.Models.list", - return_value=SyncPage( - object="list", - data=[ - Model( - id="d69244a8-3f30-4f08-a432-8c83d5f254ad", - created=1719814049, - object="model", - owned_by="guidellm", - ), - ], - ), - ) - backend: OpenAIBackend = openai_backend_factory() - request = TextGenerationRequest(prompt="Generate numbers from 1 to 10") - - start_time = time.perf_counter() - result: TextGenerationResult = backend.submit(request=request) - total_for_submit = time.perf_counter() - start_time - - assert result.start_time is not None - assert result.end_time is not None - assert openai_resources_models_list_patch.call_count == 1 - assert abs((result.end_time - result.start_time) - total_for_submit) < 1 diff --git a/tests/integration/executor/__init__.py b/tests/integration/executor/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/integration/executor/conftest.py b/tests/integration/executor/conftest.py deleted file mode 100644 index e6dbbe1..0000000 --- a/tests/integration/executor/conftest.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import List, cast - -import openai -import pytest -from openai.pagination import SyncPage - -from tests import dummy - - -@pytest.fixture(autouse=True) -def openai_completion_create_patch( - mocker, -) -> openai.Stream[openai.types.Completion]: - """ - Mock available models function to avoid OpenAI API call. - """ - - items = list(dummy.data.openai_completion_factory()) - mocker.patch("openai.resources.completions.Completions.create", return_value=items) - - return cast(openai.Stream[openai.types.Completion], items) - - -@pytest.fixture(autouse=True) -def openai_models_list_patch(mocker) -> List[openai.types.Model]: - """ - Mock available models function to avoid OpenAI API call. - """ - - items: List[openai.types.Model] = list(dummy.data.openai_model_factory()) - mocker.patch( - "openai.resources.models.Models.list", - return_value=SyncPage(object="list", data=items), - ) - - return items diff --git a/tests/integration/executor/test_report_generation.py b/tests/integration/executor/test_report_generation.py deleted file mode 100644 index 9aa0fb3..0000000 --- a/tests/integration/executor/test_report_generation.py +++ /dev/null @@ -1,179 +0,0 @@ -import time - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.scheduler import LoadGenerationMode - -from tests import dummy - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_sync_mode( - openai_backend_factory, - openai_completion_create_patch, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.SYNCHRONOUS, - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=1, - max_duration=2, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].results) == 1 - assert report.benchmarks[0].results[0].output == " ".join( - item.content for item in openai_completion_create_patch - ) - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_infinite( - openai_backend_factory, -): - """ - Test without max duration defined. - - Does not matter how many requests is specified, - the execution DOES NOT have any duration limitations. - """ - - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=2, - max_duration=None, # not specified for no limitations - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].errors) == 0 - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_limited( - openai_backend_factory, -): - """ - Test with max duration defined. - """ - - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=2, - max_duration=3, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].results) == 2 - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_failed( - mocker, - openai_backend_factory, -): - """ - Test max duration immediate tasks iteration break up - because of the `time.time() - start_time >= self._max_duration`. - """ - - mocker.patch("guidellm.backend.Backend.submit", side_effect=Exception) - - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=3, - max_duration=None, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].errors) == 3 - - -@pytest.mark.sanity() -def test_executor_openai_single_report_generation_constant_mode_cancelled_reports( - openai_backend_factory, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generation_mode = ProfileGenerationMode.FIXED_RATE - profile_generator_kwargs = { - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0], - } - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=profile_generation_mode, - profile_args=profile_generator_kwargs, - max_requests=5, - max_duration=3, - ) - - start_time: float = time.perf_counter() - report: TextGenerationBenchmarkReport = executor.run() - end_time: float = time.perf_counter() - start_time - - assert isinstance(executor.backend, OpenAIBackend) - assert len(report.benchmarks) == 1 - assert len(report.benchmarks[0].errors) > 0 - assert round(end_time) == 3 diff --git a/tests/integration/request/__init__.py b/tests/integration/request/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/integration/request/test_base.py b/tests/integration/request/test_base.py deleted file mode 100644 index 5df6fe7..0000000 --- a/tests/integration/request/test_base.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import RequestGenerator -from transformers import AutoTokenizer, PreTrainedTokenizerBase - - -class TestRequestGenerator(RequestGenerator): - def create_item(self) -> TextGenerationRequest: - return TextGenerationRequest(prompt="Test prompt") - - -@pytest.mark.smoke() -def test_request_generator_with_hf_tokenizer(): - tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") - generator = TestRequestGenerator(tokenizer=tokenizer) - assert generator.tokenizer == tokenizer - - -@pytest.mark.smoke() -def test_request_generator_with_string_tokenizer(): - generator = TestRequestGenerator(tokenizer="bert-base-uncased") - assert isinstance(generator.tokenizer, PreTrainedTokenizerBase) - assert generator.tokenizer.name_or_path == "bert-base-uncased" diff --git a/tests/integration/test_guidellm.py b/tests/integration/test_guidellm.py new file mode 100644 index 0000000..75ab221 --- /dev/null +++ b/tests/integration/test_guidellm.py @@ -0,0 +1,8 @@ +import pytest + +from guidellm.config import settings + + +@pytest.mark.smoke() +def test_import(): + assert settings diff --git a/tests/unit/backend/test_base.py b/tests/unit/backend/test_base.py index dfe6259..9247eb1 100644 --- a/tests/unit/backend/test_base.py +++ b/tests/unit/backend/test_base.py @@ -1,44 +1,201 @@ -from typing import Iterator, List, Optional - import pytest -from guidellm.backend import Backend, BackendEngine, GenerativeResponse, OpenAIBackend -from guidellm.core import TextGenerationRequest + +from guidellm.backend import Backend, GenerativeResponse +from guidellm.core import TextGenerationRequest, TextGenerationResult -@Backend.register(backend_type=BackendEngine.TEST) -class TestBackend(Backend): - """ - The test implementation of a LLM Backend. - """ +@pytest.mark.smoke() +def test_backend_registry(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") - def __init__(self, target: str, model: str = "test"): - self.target: str = target - self.model: str = model + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="Test") - def make_request( - self, - request: TextGenerationRequest, - ) -> Iterator[GenerativeResponse]: - raise NotImplementedError + def available_models(self): + return ["mock-model"] - def available_models(self) -> List[str]: - raise NotImplementedError + backend_type = "test" + Backend.register(backend_type)(MockBackend) # type: ignore + assert Backend._registry[backend_type] is MockBackend # type: ignore - @property - def default_model(self) -> str: - raise NotImplementedError + backend_instance = Backend.create(backend_type) # type: ignore + assert isinstance(backend_instance, MockBackend) - def model_tokenizer(self, model: str) -> Optional[str]: - raise NotImplementedError + with pytest.raises(ValueError): + Backend.create("invalid_type") # type: ignore @pytest.mark.smoke() -def test_backend_registry(): - """ - Ensure that all registered classes exist in the Backend._registry. - """ - - assert Backend._registry == { - BackendEngine.TEST: TestBackend, - BackendEngine.OPENAI_SERVER: OpenAIBackend, - } +def test_generative_response_creation(): + response = GenerativeResponse(type_="final", output="Test Output") + assert response.type_ == "final" + assert response.output == "Test Output" + assert response.add_token is None + assert response.prompt is None + + response = GenerativeResponse(type_="token_iter", add_token="token") + assert response.type_ == "token_iter" + assert response.add_token == "token" + assert response.output is None + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_make_request(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + async def make_request(self, request): + yield GenerativeResponse( + type_="token_iter", + add_token="Token", + prompt="Hello, world!", + prompt_token_count=5, + ) + yield GenerativeResponse( + type_="final", + output="This is a final response.", + prompt="Hello, world!", + prompt_token_count=5, + output_token_count=10, + ) + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + index = 0 + + async for response in backend.make_request(TextGenerationRequest(prompt="Test")): + if index == 0: + assert response.type_ == "token_iter" + assert response.add_token == "Token" + assert response.prompt == "Hello, world!" + assert response.prompt_token_count == 5 + else: + assert response.type_ == "final" + assert response.output == "This is a final response." + assert response.prompt == "Hello, world!" + assert response.prompt_token_count == 5 + assert response.output_token_count == 10 + index += 1 + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_submit_final(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="Test") + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + result = await backend.submit(TextGenerationRequest(prompt="Test")) + assert isinstance(result, TextGenerationResult) + assert result.output == "Test" + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_submit_multi(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + async def make_request(self, request): + yield GenerativeResponse(type_="token_iter", add_token="Token") + yield GenerativeResponse(type_="token_iter", add_token=" ") + yield GenerativeResponse(type_="token_iter", add_token="Test") + yield GenerativeResponse(type_="final") + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + result = await backend.submit(TextGenerationRequest(prompt="Test")) + assert isinstance(result, TextGenerationResult) + assert result.output == "Token Test" + + +@pytest.mark.regression() +@pytest.mark.asyncio() +async def test_backend_submit_no_response(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + async def make_request(self, request): + if False: # simulate no yield + yield + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + + with pytest.raises(ValueError): + await backend.submit(TextGenerationRequest(prompt="Test")) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_backend_submit_multi_final(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + async def make_request(self, request): + yield GenerativeResponse(type_="token_iter", add_token="Token") + yield GenerativeResponse(type_="token_iter", add_token=" ") + yield GenerativeResponse(type_="token_iter", add_token="Test") + yield GenerativeResponse(type_="final") + yield GenerativeResponse(type_="final") + + def available_models(self): + return ["mock-model"] + + backend = MockBackend() + + with pytest.raises(ValueError): + await backend.submit(TextGenerationRequest(prompt="Test")) + + +@pytest.mark.smoke() +def test_backend_models(): + class MockBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + def available_models(self): + return ["mock-model", "mock-model-2"] + + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="") + + backend = MockBackend() + assert backend.available_models() == ["mock-model", "mock-model-2"] + assert backend.default_model == "mock-model" + + +@pytest.mark.regression() +def test_backend_abstract_methods(): + with pytest.raises(TypeError): + Backend() # type: ignore + + class IncompleteBackend(Backend): + def __init__(self): + super().__init__("test", "http://localhost:8000", "mock-model") + + async def make_request(self, request): + yield GenerativeResponse(type_="final", output="Test") + + with pytest.raises(TypeError): + IncompleteBackend() # type: ignore diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index 7777bcd..73afa94 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -1,75 +1,293 @@ -""" -This module includes unit tests for the OpenAI Backend Service. -""" - -from typing import Callable, Optional +from unittest.mock import AsyncMock, Mock, patch import pytest -from guidellm.backend import OpenAIBackend + +from guidellm.backend import Backend, OpenAIBackend +from guidellm.config import reload_settings, settings from guidellm.core import TextGenerationRequest -from tests.dummy.services import TestRequestGenerator +@pytest.fixture() +def mock_openai_client(): + with patch("guidellm.backend.openai.AsyncOpenAI") as mock_async_const, patch( + "guidellm.backend.openai.OpenAI" + ) as mock_sync_const: + mock_model = Mock() + mock_model.id = "mock-model" + mock_model_2 = Mock() + mock_model_2.id = "mock-model-2" + mock_model_data = Mock() + mock_model_data.data = [mock_model, mock_model_2] -@pytest.mark.smoke() -def test_openai_backend_creation_with_default_model(openai_backend_factory: Callable): - """ - Test whether the OpenAI Backend service is created correctly - with all default parameters. - Also checks whether the `default_models` parameter does not abuse the OpenAI API. - """ + def create_async_create(inst): + async def stream(): + for ind in range(3): + choice = Mock() + choice.delta.content = f"token{ind}" if ind % 2 == 0 else " " + choice.finish_reason = None + chunk = Mock() + chunk.choices = [choice] + + yield chunk + + choice = Mock() + choice.finish_reason = "stop" + chunk = Mock() + chunk.choices = [choice] + yield chunk - backend_service = openai_backend_factory() + async def create(*args, **kwargs): + inst.create_args = args + inst.create_kwargs = kwargs + return stream() - assert isinstance(backend_service, OpenAIBackend) - assert backend_service.default_model == backend_service.available_models()[0] + return create + + def async_constructor(*args, **kwargs): + mock_async_instance = AsyncMock() + mock_async_instance.models.list.return_value = mock_model_data + mock_async_instance.args = args + mock_async_instance.kwargs = kwargs + mock_async_instance.chat.completions.create.side_effect = ( + create_async_create(mock_async_instance) + ) + + return mock_async_instance + + def sync_constructor(*args, **kwargs): + mock_sync_instance = Mock() + mock_sync_instance.models.list.return_value = mock_model_data + mock_sync_instance.args = args + mock_sync_instance.kwargs = kwargs + return mock_sync_instance + + mock_async_const.side_effect = async_constructor + mock_sync_const.side_effect = sync_constructor + yield mock_async_const, mock_sync_const @pytest.mark.smoke() -def test_model_tokenizer(openai_backend_factory): - backend_service = openai_backend_factory() - assert backend_service.model_tokenizer("bert-base-uncased") +@pytest.mark.parametrize( + ( + "openai_api_key", + "target", + "host", + "port", + "model", + "request_args", + "expected_base_url", + ), + [ + ( + "test_key", + "http://test-target", + None, + None, + "test-model", + {"arg1": "value1"}, + "http://test-target", + ), + ("test_key", None, "localhost", 8000, "test-model", {}, "localhost:8000/v1"), + (None, None, None, None, None, {}, settings.openai.base_url), + ], +) +def test_openai_backend_create( + openai_api_key, + target, + host, + port, + model, + request_args, + expected_base_url, + mock_openai_client, +): + backends = [ + Backend.create( + "openai_server", + openai_api_key=openai_api_key, + target=target, + host=host, + port=port, + model=model, + **request_args, + ), + OpenAIBackend( + openai_api_key=openai_api_key, + target=target, + host=host, + port=port, + model=model, + **request_args, + ), + ] + + for backend in backends: + assert backend._async_client.kwargs["api_key"] == ( # type: ignore + openai_api_key or settings.openai.api_key + ) + assert backend._async_client.kwargs["base_url"] == expected_base_url # type: ignore + assert backend._client.kwargs["api_key"] == ( # type: ignore + openai_api_key or settings.openai.api_key + ) + assert backend._client.kwargs["base_url"] == expected_base_url # type: ignore + if model: + assert backend._model == model # type: ignore @pytest.mark.smoke() -def test_model_tokenizer_no_model(openai_backend_factory): - backend_service = openai_backend_factory() - tokenizer = backend_service.model_tokenizer("invalid") - assert tokenizer is None +def test_openai_backend_models(mock_openai_client): + backend = OpenAIBackend() + assert backend.available_models() == ["mock-model", "mock-model-2"] + assert backend.default_model == "mock-model" + assert backend.model == "mock-model" @pytest.mark.smoke() -def test_make_request(openai_backend_factory, openai_completion_create_patch): - """ - Test `OpenAIBackend.make_request()` workflow. - - Notes: - * The output token count is not used without the `TextGenerationResult.start()` - and `TextGenerationResult.start()` - """ - - request: TextGenerationRequest = TestRequestGenerator().create_item() - backend_service: OpenAIBackend = openai_backend_factory() - total_generative_responses = 0 - - for generative_response, completion_patch in zip( - backend_service.make_request(request=request), - openai_completion_create_patch, - ): - total_generative_responses += 1 - expected_token: Optional[str] = completion_patch.content or None - - assert generative_response.add_token == expected_token +@pytest.mark.parametrize( + ("req", "request_args"), + [ + (TextGenerationRequest(prompt="Test"), None), + ( + TextGenerationRequest(prompt="Test", params={"generated_tokens": 10}), + None, + ), + ( + TextGenerationRequest(prompt="Test", params={"generated_tokens": 10}), + {"max_tokens": 10}, + ), + ( + TextGenerationRequest(prompt="Test"), + {"max_tokens": 10, "stop": "stop"}, + ), + ], +) +@pytest.mark.asyncio() +async def test_openai_backend_make_request(req, request_args, mock_openai_client): + backend = OpenAIBackend(**(request_args or {})) + counter = 0 + + async for response in backend.make_request(req): + if counter < 3: + assert response.type_ == "token_iter" + assert response.add_token == f"token{counter}" if counter % 2 == 0 else " " + elif counter == 3: + assert response.type_ == "final" + else: + raise ValueError("Too many responses received from the backend") + + counter += 1 + + # check the kwargs passed to the openai client + # now that the generator has been consumed + assert backend._async_client.create_args == () # type: ignore + assert backend._async_client.create_kwargs["model"] == "mock-model" # type: ignore + assert backend._async_client.create_kwargs["messages"] == [ # type: ignore + {"role": "system", "content": req.prompt} + ] + assert backend._async_client.create_kwargs["stream"] # type: ignore + assert backend._async_client.create_kwargs["n"] == 1 # type: ignore + + if req.output_token_count is not None: assert ( - generative_response.type_ == "final" - if completion_patch.stop is True - else "token_iter" + backend._async_client.create_kwargs["max_tokens"] == req.output_token_count # type: ignore ) - if expected_token is not None: - assert generative_response.prompt_token_count is None - assert generative_response.output_token_count is None - else: - assert generative_response.prompt_token_count == 2 - assert generative_response.output_token_count == 0 + assert backend._async_client.create_kwargs["stop"] is None # type: ignore + elif request_args is not None and "max_tokens" not in request_args: + assert ( + backend._async_client.create_kwargs["max_tokens"] # type: ignore + == settings.openai.max_gen_tokens + ) + + if request_args: + for key, value in request_args.items(): + assert backend._async_client.create_kwargs[key] == value # type: ignore + + +@pytest.mark.sanity() +@pytest.mark.asyncio() +async def test_openai_backend_submit(mock_openai_client): + backend = OpenAIBackend() + request = TextGenerationRequest(prompt="Test", prompt_token_count=1) + result = await backend.submit(request) + + assert result.request == request + assert result.prompt == request.prompt + assert result.prompt_token_count == 1 + assert result.output == "token0 token2" + assert result.output_token_count == 3 + assert result.last_time is not None + assert result.first_token_set + assert result.start_time is not None + assert result.first_token_time is not None + assert result.end_time is not None + assert len(result.decode_times) == 2 + + +@pytest.mark.sanity() +def test_openai_backend_api_key(mock_openai_client): + backend = OpenAIBackend() + assert backend._async_client.kwargs["api_key"] == settings.openai.api_key # type: ignore + assert backend._client.kwargs["api_key"] == settings.openai.api_key # type: ignore + + backend = OpenAIBackend(openai_api_key="test_key") + assert backend._async_client.kwargs["api_key"] == "test_key" # type: ignore + assert backend._client.kwargs["api_key"] == "test_key" # type: ignore + + +@pytest.mark.sanity() +def test_openai_backend_api_key_env(mock_openai_client, mocker): + mocker.patch.dict( + "os.environ", + { + "GUIDELLM__OPENAI__API_KEY": "test_key", + }, + ) + reload_settings() + + backend = OpenAIBackend() + assert backend._async_client.kwargs["api_key"] == "test_key" # type: ignore + assert backend._client.kwargs["api_key"] == "test_key" # type: ignore + + +@pytest.mark.sanity() +def test_openai_backend_target(mock_openai_client): + backend = OpenAIBackend(target="http://test-target") + assert backend._async_client.kwargs["base_url"] == "http://test-target" # type: ignore + assert backend._client.kwargs["base_url"] == "http://test-target" # type: ignore + + backend = OpenAIBackend(host="localhost", port=8000) + assert backend._async_client.kwargs["base_url"] == "localhost:8000/v1" # type: ignore + assert backend._client.kwargs["base_url"] == "localhost:8000/v1" # type: ignore + + backend = OpenAIBackend() + assert backend._async_client.kwargs["base_url"] == settings.openai.base_url # type: ignore + assert backend._client.kwargs["base_url"] == settings.openai.base_url # type: ignore + + +@pytest.mark.sanity() +def test_openai_backend_target_env(mock_openai_client, mocker): + mocker.patch.dict( + "os.environ", + { + "GUIDELLM__OPENAI__BASE_URL": "http://test-target", + }, + ) + reload_settings() + + backend = OpenAIBackend() + assert backend._async_client.kwargs["base_url"] == "http://test-target" # type: ignore + assert backend._client.kwargs["base_url"] == "http://test-target" # type: ignore + + +@pytest.mark.regression() +def test_openai_backend_target_none_error(mock_openai_client, mocker): + mocker.patch.dict( + "os.environ", + { + "GUIDELLM__OPENAI__BASE_URL": "", + }, + ) + reload_settings() - assert total_generative_responses == 3 + with pytest.raises(ValueError): + OpenAIBackend(target=None, host=None, port=None) diff --git a/tests/unit/cli/__init__.py b/tests/unit/cli/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/unit/config/__init__.py b/tests/unit/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index e6dbbe1..3257a8d 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,36 +1,35 @@ -from typing import List, cast +from pathlib import Path +from typing import List +from unittest.mock import MagicMock, patch -import openai import pytest -from openai.pagination import SyncPage +import requests_mock -from tests import dummy +@pytest.fixture() +def mock_auto_tokenizer(): + with patch("transformers.AutoTokenizer.from_pretrained") as mock_from_pretrained: -@pytest.fixture(autouse=True) -def openai_completion_create_patch( - mocker, -) -> openai.Stream[openai.types.Completion]: - """ - Mock available models function to avoid OpenAI API call. - """ + def _fake_tokenize(text: str) -> List[int]: + tokens = text.split() + return [0] * len(tokens) - items = list(dummy.data.openai_completion_factory()) - mocker.patch("openai.resources.completions.Completions.create", return_value=items) + mock_tokenizer = MagicMock() + mock_tokenizer.tokenize = MagicMock(side_effect=_fake_tokenize) + mock_from_pretrained.return_value = mock_tokenizer + yield mock_tokenizer - return cast(openai.Stream[openai.types.Completion], items) - -@pytest.fixture(autouse=True) -def openai_models_list_patch(mocker) -> List[openai.types.Model]: - """ - Mock available models function to avoid OpenAI API call. - """ - - items: List[openai.types.Model] = list(dummy.data.openai_model_factory()) - mocker.patch( - "openai.resources.models.Models.list", - return_value=SyncPage(object="list", data=items), +@pytest.fixture() +def mock_requests_pride_and_prejudice(): + text_path = ( + Path(__file__).parent.parent / "dummy" / "data" / "pride_and_prejudice.txt" ) - - return items + text_content = text_path.read_text() + + with requests_mock.Mocker() as mock: + mock.get( + "https://www.gutenberg.org/files/1342/1342-0.txt", + text=text_content, + ) + yield mock diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py index 1525107..95b7e92 100644 --- a/tests/unit/core/test_distribution.py +++ b/tests/unit/core/test_distribution.py @@ -1,4 +1,5 @@ import pytest + from guidellm.core import Distribution @@ -20,6 +21,22 @@ def test_distribution_statistics(): assert dist.min == 1 assert dist.max == 5 assert dist.range == 4 + assert dist.percentile(50) == 3.0 + assert dist.percentiles([25, 50, 75]) == pytest.approx([2.0, 3.0, 4.0]) + + +@pytest.mark.smoke() +def test_distribution_no_data(): + dist = Distribution(data=[]) + assert dist.mean == 0.0 + assert dist.median == 0.0 + assert dist.variance == 0.0 + assert dist.std_deviation == 0.0 + assert dist.min == 0.0 + assert dist.max == 0.0 + assert dist.range == 0.0 + assert dist.percentile(50) == 0.0 + assert dist.percentiles([25, 50, 75]) == [0.0, 0.0, 0.0] @pytest.mark.sanity() @@ -41,49 +58,50 @@ def test_distribution_remove_data(): assert dist.data == [1, 3, 5] -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_str(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) - assert str(dist) == ( - "Distribution({'mean': 3.0, 'median': 3.0, " - "'variance': 2.0, 'std_deviation': 1.4142135623730951, " - "'percentile_indices': " - "[10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], " - "'percentile_values': " - "[1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96], " - "'min': 1, 'max': 5, 'range': 4})" + assert "Distribution({" in str(dist) + assert "'mean': 3.0" in str(dist) + assert "'median': 3.0" in str(dist) + assert "'variance': 2.0" in str(dist) + assert "'percentile_indices': [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]" in str( + dist + ) + assert ( + "'percentile_values': [1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96]" + in str(dist) ) + assert "'min': 1" in str(dist) + assert "'max': 5" in str(dist) + assert "'range': 4" in str(dist) -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_repr(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) - assert repr(dist) == f"Distribution(data={data})" + assert repr(dist) == f"Distribution(data={dist.data})" -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_json(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) json_str = dist.to_json() - assert '"data":[1,2,3,4,5]' in json_str + assert f'"data":[{dist.data[0]}' in json_str dist_restored = Distribution.from_json(json_str) assert dist_restored.data == data -@pytest.mark.skip("fix me") @pytest.mark.regression() def test_distribution_yaml(): data = [1, 2, 3, 4, 5] dist = Distribution(data=data) yaml_str = dist.to_yaml() - assert "data:\n- 1\n- 2\n- 3\n- 4\n- 5\n" in yaml_str + assert f"data:\n- {dist.data[0]}" in yaml_str dist_restored = Distribution.from_yaml(yaml_str) assert dist_restored.data == data diff --git a/tests/unit/core/test_report.py b/tests/unit/core/test_report.py index 713aea5..5d56c25 100644 --- a/tests/unit/core/test_report.py +++ b/tests/unit/core/test_report.py @@ -2,12 +2,12 @@ from pathlib import Path import pytest + from guidellm.core import ( Distribution, GuidanceReport, TextGenerationBenchmark, TextGenerationBenchmarkReport, - TextGenerationError, TextGenerationRequest, TextGenerationResult, ) @@ -32,21 +32,20 @@ def sample_benchmark_report() -> TextGenerationBenchmarkReport: first_token_time=None, decode_times=sample_distribution, ) - sample_error = TextGenerationError(request=sample_request, message="sample error") sample_benchmark = TextGenerationBenchmark( - mode="async", + mode="asynchronous", rate=1.0, results=[sample_result], - errors=[sample_error], + errors=[], concurrencies=[], ) return TextGenerationBenchmarkReport( - benchmarks=[sample_benchmark], args=[{"arg1": "value1"}] + benchmarks=[sample_benchmark], args={"arg1": "value1"} ) def compare_guidance_reports(report1: GuidanceReport, report2: GuidanceReport) -> bool: - return report1 == report2 + return report1.benchmarks == report2.benchmarks @pytest.mark.smoke() @@ -61,14 +60,10 @@ def test_guidance_report_initialization_with_params(sample_benchmark_report): assert report.benchmarks == [sample_benchmark_report] -@pytest.mark.smoke() -def test_guidance_report_file(sample_benchmark_report): +@pytest.mark.sanity() +def test_guidance_report_print(sample_benchmark_report): report = GuidanceReport(benchmarks=[sample_benchmark_report]) - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "report.yaml" - report.save_file(file_path) - loaded_report = GuidanceReport.load_file(file_path) - assert compare_guidance_reports(report, loaded_report) + report.print() # This will output to the console @pytest.mark.regression() @@ -85,3 +80,34 @@ def test_guidance_report_yaml(sample_benchmark_report): yaml_str = report.to_yaml() loaded_report = GuidanceReport.from_yaml(yaml_str) assert compare_guidance_reports(report, loaded_report) + + +@pytest.mark.regression() +def test_guidance_report_save_load_file(sample_benchmark_report): + report = GuidanceReport(benchmarks=[sample_benchmark_report]) + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "report.yaml" + report.save_file(file_path) + loaded_report = GuidanceReport.load_file(file_path) + assert compare_guidance_reports(report, loaded_report) + + +@pytest.mark.regression() +def test_empty_guidance_report(): + report = GuidanceReport() + assert len(report.benchmarks) == 0 + report.print() # Ensure it doesn't raise error with no benchmarks + + +@pytest.mark.regression() +def test_compare_guidance_reports(sample_benchmark_report): + report1 = GuidanceReport(benchmarks=[sample_benchmark_report]) + report2 = GuidanceReport(benchmarks=[sample_benchmark_report]) + assert compare_guidance_reports(report1, report2) + + +@pytest.mark.regression() +def test_compare_guidance_reports_inequality(sample_benchmark_report): + report1 = GuidanceReport(benchmarks=[sample_benchmark_report]) + report2 = GuidanceReport(benchmarks=[]) + assert not compare_guidance_reports(report1, report2) diff --git a/tests/unit/core/test_request.py b/tests/unit/core/test_request.py index 4d90a14..8550eb2 100644 --- a/tests/unit/core/test_request.py +++ b/tests/unit/core/test_request.py @@ -1,4 +1,5 @@ import pytest + from guidellm.core import TextGenerationRequest @@ -8,7 +9,7 @@ def test_text_generation_request_initialization(): request = TextGenerationRequest(prompt=prompt) assert request.prompt == prompt assert request.prompt_token_count is None - assert request.generate_token_count is None + assert request.output_token_count is None assert request.params == {} @@ -16,17 +17,17 @@ def test_text_generation_request_initialization(): def test_text_generation_request_initialization_with_params(): prompt = "Generate a story" prompt_token_count = 50 - generate_token_count = 100 + output_token_count = 100 params = {"temperature": 0.7} request = TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, - generate_token_count=generate_token_count, + output_token_count=output_token_count, params=params, ) assert request.prompt == prompt assert request.prompt_token_count == prompt_token_count - assert request.generate_token_count == generate_token_count + assert request.output_token_count == output_token_count assert request.params == params @@ -34,12 +35,12 @@ def test_text_generation_request_initialization_with_params(): def test_request_json(): prompt = "Generate text" prompt_token_count = 10 - generate_token_count = 50 + output_token_count = 50 params = {"temperature": 0.7} request = TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, - generate_token_count=generate_token_count, + output_token_count=output_token_count, params=params, ) json_str = request.to_json() @@ -50,7 +51,7 @@ def test_request_json(): assert request.id == request_restored.id assert request_restored.prompt == prompt assert request_restored.prompt_token_count == prompt_token_count - assert request_restored.generate_token_count == generate_token_count + assert request_restored.output_token_count == output_token_count assert request_restored.params == params @@ -58,12 +59,12 @@ def test_request_json(): def test_request_yaml(): prompt = "Generate text" prompt_token_count = 15 - generate_token_count = 55 + output_token_count = 55 params = {"temperature": 0.8} request = TextGenerationRequest( prompt=prompt, prompt_token_count=prompt_token_count, - generate_token_count=generate_token_count, + output_token_count=output_token_count, params=params, ) yaml_str = request.to_yaml() @@ -74,5 +75,5 @@ def test_request_yaml(): assert request.id == request_restored.id assert request_restored.prompt == prompt assert request_restored.prompt_token_count == prompt_token_count - assert request_restored.generate_token_count == generate_token_count + assert request_restored.output_token_count == output_token_count assert request_restored.params == params diff --git a/tests/unit/core/test_result.py b/tests/unit/core/test_result.py index a6d942b..02232ba 100644 --- a/tests/unit/core/test_result.py +++ b/tests/unit/core/test_result.py @@ -1,4 +1,7 @@ +import time + import pytest + from guidellm.core import ( TextGenerationBenchmark, TextGenerationBenchmarkReport, @@ -17,7 +20,7 @@ def test_text_generation_result_initialization(): assert result.output == "" -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_result_start(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) @@ -27,23 +30,25 @@ def test_text_generation_result_start(): assert result.start_time is not None -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_result_output_token(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) prompt = "Once upon a time" result.start(prompt) - token = "the" - result.output_token(token) + tokens = ["the", " ", "quick", " ", "brown", " ", "fox"] + for token in tokens: + result.output_token(token) + result.end() assert result.last_time assert result.start_time - assert result.output == f"{token} " + assert result.output == "the quick brown fox" assert result.last_time is not None assert result.last_time > result.start_time -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_result_end(): request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) @@ -57,6 +62,16 @@ def test_text_generation_result_end(): assert result.end_time > result.start_time +@pytest.mark.sanity() +def test_text_generation_result_improper_lifecycle(): + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + with pytest.raises(ValueError): + result.output_token("the") + with pytest.raises(ValueError): + result.end("The end") + + @pytest.mark.regression() def test_text_generation_result_json(): request = TextGenerationRequest(prompt="Generate a story") @@ -144,34 +159,98 @@ def test_text_generation_error_yaml(): @pytest.mark.smoke() def test_text_generation_benchmark_initialization(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) - assert benchmark.mode == "test" + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + assert benchmark.mode == "synchronous" assert benchmark.rate == 1.0 assert benchmark.request_count == 0 assert benchmark.error_count == 0 -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_benchmark_started(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + assert benchmark.completed_request_rate == 0.0 + assert not benchmark.overloaded benchmark.request_started() assert len(benchmark.concurrencies) == 1 -@pytest.mark.regression() +@pytest.mark.smoke() +def test_text_generation_benchmark_expected_rate(): + num_requests = 5 + time_per_request = 0.25 + expected_rate = 1.0 / time_per_request + + benchmark = TextGenerationBenchmark(mode="synchronous", rate=expected_rate) + + for index in range(num_requests): + request = TextGenerationRequest(prompt=f"Generate a story {index}") + benchmark.request_started() + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(time_per_request) + result.end("The end") + benchmark.request_completed(result) + + assert len(benchmark.results) == num_requests + assert len(benchmark.errors) == 0 + assert len(benchmark.concurrencies) == 10 + assert benchmark.request_count == num_requests + assert benchmark.error_count == 0 + assert benchmark.completed_request_rate == pytest.approx(expected_rate, rel=0.1) + assert not benchmark.overloaded + + +@pytest.mark.smoke() +def test_text_generation_benchmark_overloaded_rate(): + num_requests = 5 + time_per_request = 0.25 + expected_rate = 1.0 / time_per_request + + benchmark = TextGenerationBenchmark(mode="synchronous", rate=expected_rate * 1.5) + + for index in range(num_requests): + request = TextGenerationRequest(prompt=f"Generate a story {index}") + benchmark.request_started() + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(time_per_request) + result.end("The end") + benchmark.request_completed(result) + + assert len(benchmark.results) == num_requests + assert len(benchmark.errors) == 0 + assert len(benchmark.concurrencies) == 10 + assert benchmark.request_count == num_requests + assert benchmark.error_count == 0 + assert benchmark.completed_request_rate == pytest.approx(expected_rate, rel=0.1) + assert benchmark.overloaded + + +@pytest.mark.smoke() def test_text_generation_benchmark_completed_with_result(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + + with pytest.raises(ValueError): + benchmark.request_completed(None) # type: ignore + benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) + + with pytest.raises(ValueError): + benchmark.request_completed(result) + + result.start("Once upon a time") + result.end("The end") benchmark.request_completed(result) assert benchmark.request_count == 1 assert benchmark.error_count == 0 -@pytest.mark.regression() +@pytest.mark.smoke() def test_text_generation_benchmark_completed_with_error(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") error = TextGenerationError(request=request, message=str(Exception("Test error"))) @@ -180,15 +259,30 @@ def test_text_generation_benchmark_completed_with_error(): assert benchmark.error_count == 1 +@pytest.mark.regression() +def test_text_generation_benchmark_iter(): + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) + benchmark.request_started() + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + result.start("Once upon a time") + result.end("The end") + benchmark.request_completed(result) + for res in benchmark: + assert res == result + + @pytest.mark.regression() def test_text_generation_benchmark_json(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) + result.start("Once upon a time") + result.end("The end") benchmark.request_completed(result) json_str = benchmark.to_json() - assert '"mode":"test"' in json_str + assert '"mode":"synchronous"' in json_str assert '"rate":1.0' in json_str benchmark_restored = TextGenerationBenchmark.from_json(json_str) @@ -203,13 +297,15 @@ def test_text_generation_benchmark_json(): @pytest.mark.regression() def test_text_generation_benchmark_yaml(): - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) benchmark.request_started() request = TextGenerationRequest(prompt="Generate a story") result = TextGenerationResult(request=request) + result.start("Once upon a time") + result.end("The end") benchmark.request_completed(result) yaml_str = benchmark.to_yaml() - assert "mode: test" in yaml_str + assert "mode: synchronous" in yaml_str assert "rate: 1.0" in yaml_str benchmark_restored = TextGenerationBenchmark.from_yaml(yaml_str) @@ -229,22 +325,65 @@ def test_text_generation_benchmark_report_initialization(): assert len(report.args) == 0 -@pytest.mark.sanity() +@pytest.mark.smoke() def test_text_generation_benchmark_report_add_benchmark(): report = TextGenerationBenchmarkReport() - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) report.add_benchmark(benchmark) assert len(report.benchmarks) == 1 +@pytest.mark.sanity() +def test_text_generation_benchmark_report_iter(): + report = TextGenerationBenchmarkReport() + + fast_benchmark = TextGenerationBenchmark(mode="synchronous", rate=10.0) + for _ in range(5): + fast_benchmark.request_started() + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(0.1) + result.end("The end") + fast_benchmark.request_completed(result) + report.add_benchmark(fast_benchmark) + + slow_benchmark = TextGenerationBenchmark(mode="synchronous", rate=5.0) + for _ in range(5): + slow_benchmark.request_started() + request = TextGenerationRequest(prompt="Generate a story") + result = TextGenerationResult(request=request) + result.start("Once upon a time") + time.sleep(0.2) + result.end("The end") + slow_benchmark.request_completed(result) + report.add_benchmark(slow_benchmark) + + for index, benchmark in enumerate(report): + if index == 0: + assert benchmark == fast_benchmark + elif index == 1: + assert benchmark == slow_benchmark + else: + raise AssertionError("Unexpected report in report") + + for index, benchmark in enumerate(report.benchmarks_sorted): + if index == 0: + assert benchmark == slow_benchmark + elif index == 1: + assert benchmark == fast_benchmark + else: + raise AssertionError("Unexpected report in report") + + @pytest.mark.regression() def test_text_generation_benchmark_report_json(): report = TextGenerationBenchmarkReport() - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) report.add_benchmark(benchmark) json_str = report.to_json() assert '"benchmarks":' in json_str - assert '"args":[]' in json_str + assert '"args":{}' in json_str report_restored = TextGenerationBenchmarkReport.from_json(json_str) assert len(report.benchmarks) == len(report_restored.benchmarks) @@ -257,11 +396,11 @@ def test_text_generation_benchmark_report_json(): @pytest.mark.regression() def test_text_generation_benchmark_report_yaml(): report = TextGenerationBenchmarkReport() - benchmark = TextGenerationBenchmark(mode="test", rate=1.0) + benchmark = TextGenerationBenchmark(mode="synchronous", rate=1.0) report.add_benchmark(benchmark) yaml_str = report.to_yaml() assert "benchmarks:" in yaml_str - assert "args: []" in yaml_str + assert "args: {}" in yaml_str report_restored = TextGenerationBenchmarkReport.from_yaml(yaml_str) assert len(report.benchmarks) == len(report_restored.benchmarks) diff --git a/tests/unit/core/test_serializable.py b/tests/unit/core/test_serializable.py index b2d238d..ce0cec8 100644 --- a/tests/unit/core/test_serializable.py +++ b/tests/unit/core/test_serializable.py @@ -2,7 +2,8 @@ from pathlib import Path import pytest -from guidellm.core.serializable import Serializable, SerializableFileType + +from guidellm.core.serializable import Serializable class ExampleModel(Serializable): @@ -11,32 +12,28 @@ class ExampleModel(Serializable): @pytest.mark.smoke() -def test_serializable_to_json(): +def test_serializable_json(): + # to json example = ExampleModel(name="John Doe", age=30) json_str = example.to_json() assert '"name":"John Doe"' in json_str assert '"age":30' in json_str - -@pytest.mark.smoke() -def test_serializable_from_json(): - json_str = '{"name": "John Doe", "age": 30}' + # from json example = ExampleModel.from_json(json_str) assert example.name == "John Doe" assert example.age == 30 @pytest.mark.smoke() -def test_serializable_to_yaml(): +def test_serializable_yaml(): + # to yaml example = ExampleModel(name="John Doe", age=30) yaml_str = example.to_yaml() assert "name: John Doe" in yaml_str assert "age: 30" in yaml_str - -@pytest.mark.smoke() -def test_serializable_from_yaml(): - yaml_str = "name: John Doe\nage: 30\n" + # from yaml example = ExampleModel.from_yaml(yaml_str) assert example.name == "John Doe" assert example.age == 30 @@ -47,7 +44,7 @@ def test_serializable_file_json(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "example.json" - saved_path = example.save_file(file_path, SerializableFileType.JSON) + saved_path = example.save_file(file_path, "json") assert Path(saved_path).exists() loaded_example = ExampleModel.load_file(saved_path) assert loaded_example.name == "John Doe" @@ -59,7 +56,7 @@ def test_serializable_file_yaml(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "example.yaml" - saved_path = example.save_file(file_path, SerializableFileType.YAML) + saved_path = example.save_file(file_path, "yaml") assert Path(saved_path).exists() loaded_example = ExampleModel.load_file(saved_path) assert loaded_example.name == "John Doe" @@ -78,11 +75,11 @@ def test_serializable_file_without_extension(): assert loaded_example.age == 30 -@pytest.mark.smoke() +@pytest.mark.sanity() def test_serializable_file_with_directory_json(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir, SerializableFileType.JSON) + saved_path = example.save_file(temp_dir, "json") assert Path(saved_path).exists() assert saved_path.endswith(".json") loaded_example = ExampleModel.load_file(saved_path) @@ -90,11 +87,11 @@ def test_serializable_file_with_directory_json(): assert loaded_example.age == 30 -@pytest.mark.smoke() +@pytest.mark.sanity() def test_serializable_file_with_directory_yaml(): example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir, SerializableFileType.YAML) + saved_path = example.save_file(temp_dir, "yaml") assert Path(saved_path).exists() assert saved_path.endswith(".yaml") loaded_example = ExampleModel.load_file(saved_path) @@ -102,45 +99,53 @@ def test_serializable_file_with_directory_yaml(): assert loaded_example.age == 30 -@pytest.mark.smoke() -def test_serializable_save_file_invalid_extension(): +@pytest.mark.sanity() +def test_serializable_file_infer_extension(): + example = ExampleModel(name="John Doe", age=30) + with tempfile.TemporaryDirectory() as temp_dir: + inferred_path = example.save_file(temp_dir, "json") + assert Path(inferred_path).exists() + assert inferred_path.endswith(".json") + loaded_example = ExampleModel.load_file(inferred_path) + assert loaded_example.name == "John Doe" + assert loaded_example.age == 30 + + +@pytest.mark.regression() +def test_serializable_file_invalid_extension(): + # to file example = ExampleModel(name="John Doe", age=30) with tempfile.TemporaryDirectory() as temp_dir: invalid_file_path = Path(temp_dir) / "example.txt" with pytest.raises(ValueError, match="Unsupported file extension.*"): example.save_file(invalid_file_path) + # to directory + with tempfile.TemporaryDirectory() as temp_dir: + invalid_file_path = Path(temp_dir) + with pytest.raises(ValueError, match="Unsupported file extension.*"): + example.save_file(invalid_file_path, type_="txt") # type: ignore -@pytest.mark.smoke() -def test_serializable_load_file_invalid_extension(): + # from file with tempfile.TemporaryDirectory() as temp_dir: invalid_file_path = Path(temp_dir) / "example.txt" with invalid_file_path.open("w") as file: file.write("invalid content") - with pytest.raises(ValueError, match="Unsupported file extension: TXT"): + with pytest.raises(ValueError, match="Unsupported file extension.*"): ExampleModel.load_file(invalid_file_path) -@pytest.mark.smoke() -def test_serializable_file_no_type_provided(): - example = ExampleModel(name="John Doe", age=30) +@pytest.mark.regression() +def test_serializable_load_missing_path(): with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "example" - saved_path = example.save_file(file_path) - assert Path(saved_path).exists() - assert saved_path.endswith(".yaml") - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 + invalid_file_path = Path(temp_dir) / "example.yaml" + with pytest.raises(FileNotFoundError): + ExampleModel.load_file(invalid_file_path) -@pytest.mark.smoke() -def test_serializable_file_infer_extension(): - example = ExampleModel(name="John Doe", age=30) +@pytest.mark.regression() +def test_serializable_load_non_file_path(): with tempfile.TemporaryDirectory() as temp_dir: - inferred_path = example.save_file(temp_dir, SerializableFileType.JSON) - assert Path(inferred_path).exists() - assert inferred_path.endswith(".json") - loaded_example = ExampleModel.load_file(inferred_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 + invalid_file_path = Path(temp_dir) + with pytest.raises(ValueError, match="Path is not a file.*"): + ExampleModel.load_file(invalid_file_path) diff --git a/tests/unit/executor/test_base.py b/tests/unit/executor/test_base.py new file mode 100644 index 0000000..844cf7f --- /dev/null +++ b/tests/unit/executor/test_base.py @@ -0,0 +1,542 @@ +from typing import List, Optional, Union +from unittest.mock import create_autospec, patch + +import pytest + +from guidellm.backend import Backend +from guidellm.config import settings +from guidellm.core import ( + TextGenerationBenchmarkReport, +) +from guidellm.executor import ( + Executor, + ExecutorResult, + Profile, + ProfileGenerationMode, + ProfileGenerator, +) +from guidellm.request import RequestGenerator +from guidellm.scheduler import Scheduler, SchedulerResult + + +@pytest.fixture() +def mock_scheduler(): + with patch("guidellm.executor.base.Scheduler") as mock_scheduler: + + def scheduler_constructor(*args, **kwargs): + mock_instance = create_autospec(Scheduler, instance=True) + mock_instance.args = args + mock_instance.kwargs = kwargs + num_requests = kwargs.get("max_number", 10) + + async def run(): + benchmark = create_autospec( + TextGenerationBenchmarkReport, instance=True + ) + benchmark.completed_request_rate = kwargs.get("rate", None) + yield SchedulerResult( + completed=False, + count_total=10, + count_completed=0, + benchmark=benchmark, + current_result=None, + ) + + for index in range(num_requests): + yield SchedulerResult( + completed=False, + count_total=10, + count_completed=index + 1, + benchmark=benchmark, + current_result=create_autospec( + TextGenerationBenchmarkReport, instance=True + ), + ) + + yield SchedulerResult( + completed=True, + count_total=num_requests, + count_completed=num_requests, + benchmark=benchmark, + current_result=None, + ) + + mock_instance.run.side_effect = run + + return mock_instance + + mock_scheduler.side_effect = scheduler_constructor + yield mock_scheduler + + +@pytest.mark.smoke() +def test_executor_result_instantiation(): + report = create_autospec(TextGenerationBenchmarkReport, instance=True) + scheduler_result = create_autospec(SchedulerResult, instance=True) + executor_result = ExecutorResult( + completed=True, + count_total=10, + count_completed=5, + generation_modes=["synchronous", "throughput", "constant"], + report=report, + scheduler_result=scheduler_result, + ) + + assert executor_result.completed is True + assert executor_result.count_total == 10 + assert executor_result.count_completed == 5 + assert executor_result.report == report + assert executor_result.scheduler_result == scheduler_result + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("sweep", None), + ("synchronous", None), + ("throughput", None), + ("constant", 10), + ("constant", [10, 20, 30]), + ("poisson", 10), + ("poisson", [10, 20, 30]), + ], +) +def test_executor_instantiation(mode, rate): + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode=mode, + rate=rate, + max_number=100, + max_duration=60.0, + ) + + assert executor.backend == backend + assert executor.request_generator == request_generator + assert executor.profile_generator is not None + assert isinstance(executor.profile_generator, ProfileGenerator) + assert executor.profile_generator.mode == mode + assert ( + executor.profile_generator.rates == rate + if not rate or isinstance(rate, list) + else [rate] + ) + assert executor.max_number == 100 + assert executor.max_duration == 60.0 + + +def _check_executor_result_base( + result: ExecutorResult, + expected_completed: bool, + expected_count_total: int, + expected_count_completed: int, + expected_generation_modes: List[ProfileGenerationMode], +): + assert result.completed == expected_completed + assert result.count_total == expected_count_total + assert result.count_completed == expected_count_completed + assert result.generation_modes == expected_generation_modes + + +def _check_executor_result_report( + result: ExecutorResult, + mode: ProfileGenerationMode, + rate: Optional[Union[float, List[float]]], + max_number: Optional[int], + max_duration: Optional[float], + benchmarks_count: int, +): + assert result.report is not None + assert isinstance(result.report, TextGenerationBenchmarkReport) + + # check args + for expected in ( + "backend_type", + "target", + "model", + "data_type", + "data", + "tokenizer", + "mode", + "rate", + "max_number", + "max_duration", + ): + assert expected in result.report.args + + assert result.report.args["mode"] == mode + assert ( + result.report.args["rate"] == rate + if rate is None or not isinstance(rate, (float, int)) + else [rate] + ) + assert result.report.args["max_number"] == max_number + assert result.report.args["max_duration"] == max_duration + + # check benchmarks + assert len(result.report.benchmarks) == benchmarks_count + for benchmark in result.report.benchmarks: + assert isinstance(benchmark, TextGenerationBenchmarkReport) + + +def _check_executor_result_scheduler( + result: ExecutorResult, + expected_scheduler_result: bool, + expected_generation_modes: List[ProfileGenerationMode], + expected_index: Optional[int], + expected_profile_mode: Optional[ProfileGenerationMode], + expected_profile_rate: Optional[float], +): + if not expected_scheduler_result: + assert result.scheduler_result is None + assert result.current_index is None + assert result.current_profile is None + + return + + assert result.scheduler_result is not None + assert isinstance(result.scheduler_result, SchedulerResult) + assert result.current_index == expected_index + assert result.current_profile is not None + assert isinstance(result.current_profile, Profile) + assert result.current_profile.load_gen_mode == expected_profile_mode + assert result.current_profile.load_gen_rate == expected_profile_rate + assert ( + result.current_profile.load_gen_mode + == expected_generation_modes[expected_index] # type: ignore + ) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +async def test_executor_run_sweep(mock_scheduler): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode="sweep", + rate=None, + max_number=num_requests, + ) + + num_profiles = 2 + settings.num_sweep_profiles + generation_modes = ["synchronous", "throughput"] + [ + "constant" + ] * settings.num_sweep_profiles + generation_rates = [None, None] + list(range(2, settings.num_sweep_profiles + 2)) + output_rates = [1, settings.num_sweep_profiles + 1] + list( + range(2, settings.num_sweep_profiles + 2) + ) + + iterator = executor.run() + + # Check start result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=0, + expected_generation_modes=generation_modes, # type: ignore + ) + _check_executor_result_report( + result=result, + mode="sweep", + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=0, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, # type: ignore + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) + + for scheduler_index in range(num_profiles): + for request_index in range(num_requests + 2): + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + expected_generation_modes=generation_modes, # type: ignore + ) + _check_executor_result_report( + result=result, + mode="sweep", + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=True, + expected_generation_modes=generation_modes, # type: ignore + expected_index=scheduler_index, + expected_profile_mode=generation_modes[scheduler_index], # type: ignore + expected_profile_rate=generation_rates[scheduler_index], + ) + # set the rate for the benchmark for sweep profile generation + result.report.benchmarks[-1].completed_request_rate = output_rates[ # type: ignore + scheduler_index + ] + result.report.benchmarks[-1].request_count = num_requests # type: ignore + + # Check end result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=True, + expected_count_total=num_profiles, + expected_count_completed=num_profiles, + expected_generation_modes=generation_modes, # type: ignore + ) + _check_executor_result_report( + result=result, + mode="sweep", + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=num_profiles, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, # type: ignore + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + "mode", + [ + "synchronous", + "throughput", + ], +) +async def test_executor_run_non_rate_modes(mock_scheduler, mode): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode=mode, + rate=None, + max_number=num_requests, + ) + + iterator = executor.run() + + # Check start result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=1, + expected_count_completed=0, + expected_generation_modes=[mode], + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=0, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=[mode], + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) + + for request_index in range(num_requests + 2): + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=1, + expected_count_completed=0 if request_index < num_requests + 1 else 1, + expected_generation_modes=[mode], + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=0 if request_index < num_requests + 1 else 1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=True, + expected_generation_modes=[mode], + expected_index=0, + expected_profile_mode=mode, + expected_profile_rate=None, + ) + + # Check end result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=True, + expected_count_total=1, + expected_count_completed=1, + expected_generation_modes=[mode], + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=None, + max_number=num_requests, + max_duration=None, + benchmarks_count=1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=[mode], + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) + + +@pytest.mark.smoke() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("constant", 10), + ("constant", [10, 20, 30]), + ("poisson", 10), + ("poisson", [10, 20, 30]), + ], +) +async def test_executor_run_rate_modes(mock_scheduler, mode, rate): + num_requests = 15 + + backend = create_autospec(Backend, instance=True) + request_generator = create_autospec(RequestGenerator, instance=True) + executor = Executor( + backend=backend, + request_generator=request_generator, + mode=mode, + rate=rate, + max_number=num_requests, + ) + + num_profiles = len(rate) if isinstance(rate, list) else 1 + generation_modes = [mode] * num_profiles + generation_rates = rate if isinstance(rate, list) else [rate] + + iterator = executor.run() + + # Check start result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=0, + expected_generation_modes=generation_modes, + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=rate, + max_number=num_requests, + max_duration=None, + benchmarks_count=0, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) + + for scheduler_index in range(num_profiles): + for request_index in range(num_requests + 2): + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=False, + expected_count_total=num_profiles, + expected_count_completed=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + expected_generation_modes=generation_modes, + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=rate, + max_number=num_requests, + max_duration=None, + benchmarks_count=scheduler_index + if request_index < num_requests + 1 + else scheduler_index + 1, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=True, + expected_generation_modes=generation_modes, + expected_index=scheduler_index, + expected_profile_mode=generation_modes[scheduler_index], + expected_profile_rate=generation_rates[scheduler_index], + ) + + # Check end result + result = await iterator.__anext__() + _check_executor_result_base( + result=result, + expected_completed=True, + expected_count_total=num_profiles, + expected_count_completed=num_profiles, + expected_generation_modes=generation_modes, + ) + _check_executor_result_report( + result=result, + mode=mode, + rate=rate, + max_number=num_requests, + max_duration=None, + benchmarks_count=num_profiles, + ) + _check_executor_result_scheduler( + result=result, + expected_scheduler_result=False, + expected_generation_modes=generation_modes, + expected_index=None, + expected_profile_mode=None, + expected_profile_rate=None, + ) diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py deleted file mode 100644 index f0f3968..0000000 --- a/tests/unit/executor/test_executor.py +++ /dev/null @@ -1,86 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest -from guidellm.backend.base import Backend -from guidellm.executor import Executor, Profile, ProfileGenerator -from guidellm.executor.profile_generator import ProfileGenerationMode -from guidellm.request.base import RequestGenerator -from guidellm.scheduler import LoadGenerationMode - - -def test_executor_creation(): - mock_request_generator = MagicMock(spec=RequestGenerator) - mock_backend = MagicMock(spec=Backend) - profile_mode = ProfileGenerationMode.SWEEP - profile_args = None - max_requests = None - max_duration = None - executor = Executor( - mock_backend, - mock_request_generator, - profile_mode, - profile_args, - max_requests, - max_duration, - ) - assert executor.request_generator == mock_request_generator - assert executor.backend == mock_backend - assert executor.max_requests == max_requests - assert executor.max_duration == max_duration - - -@pytest.fixture() -def mock_request_generator(): - return MagicMock(spec=RequestGenerator) - - -@pytest.fixture() -def mock_backend(): - return MagicMock(spec=Backend) - - -@pytest.fixture() -def mock_scheduler(): - with patch("guidellm.executor.executor.Scheduler") as MockScheduler: - yield MockScheduler - - -def test_executor_run(mock_request_generator, mock_backend, mock_scheduler): - mock_profile_generator = MagicMock(spec=ProfileGenerator) - profiles = [ - Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=1.0), - Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=2.0), - None, - ] - mock_profile_generator.next.side_effect = profiles - - with patch( - "guidellm.executor.executor.ProfileGenerator.create", - return_value=mock_profile_generator, - ): - executor = Executor( - request_generator=mock_request_generator, - backend=mock_backend, - profile_mode=ProfileGenerationMode.FIXED_RATE, - profile_args={ - "load_gen_mode": LoadGenerationMode.CONSTANT, - "rates": [1.0, 2.0], - }, - max_requests=10, - max_duration=100, - ) - - mock_benchmark = MagicMock() - mock_scheduler.return_value.run.return_value = mock_benchmark - - report = executor.run() - - assert mock_scheduler.call_count == 2 - assert len(report.benchmarks) == 2 - assert report.benchmarks[0] == mock_benchmark - assert report.benchmarks[1] == mock_benchmark - calls = mock_scheduler.call_args_list - assert calls[0][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT - assert calls[0][1]["load_gen_rate"] == 1.0 - assert calls[1][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT - assert calls[1][1]["load_gen_rate"] == 2.0 diff --git a/tests/unit/executor/test_fixed_rate_profile_generation_mode.py b/tests/unit/executor/test_fixed_rate_profile_generation_mode.py deleted file mode 100644 index bdb5fc5..0000000 --- a/tests/unit/executor/test_fixed_rate_profile_generation_mode.py +++ /dev/null @@ -1,49 +0,0 @@ -from typing import List, Optional - -import pytest -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.scheduler import LoadGenerationMode - -from tests import dummy - - -@pytest.mark.parametrize( - "load_gen_mode", - [ - LoadGenerationMode.SYNCHRONOUS, - LoadGenerationMode.POISSON, - LoadGenerationMode.CONSTANT, - ], -) -def test_executor_single_profile_generator_benchmark_report( - mocker, - openai_backend_factory, - load_gen_mode, -): - scheduler_run_patch = mocker.patch( - "guidellm.scheduler.scheduler.Scheduler.run", - return_value=TextGenerationBenchmark(mode="test", rate=1.0), - ) - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - rates: Optional[List[float]] = [1.0] - if load_gen_mode == LoadGenerationMode.SYNCHRONOUS: - rates = None - profile_generator_kwargs = {"load_gen_mode": load_gen_mode, "rates": rates} - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=ProfileGenerationMode.FIXED_RATE, - profile_args=profile_generator_kwargs, - max_requests=1, - max_duration=None, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert scheduler_run_patch.call_count == 1 - assert len(report.benchmarks) == 1 - assert report.benchmarks[0].mode == "test" diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py index 897b8e3..9c91d57 100644 --- a/tests/unit/executor/test_profile_generator.py +++ b/tests/unit/executor/test_profile_generator.py @@ -1,154 +1,204 @@ -from unittest.mock import MagicMock +from typing import get_args +from unittest.mock import create_autospec -import numpy as np import pytest -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.executor import ( - FixedRateProfileGenerator, - ProfileGenerationMode, - ProfileGenerator, - SweepProfileGenerator, + +from guidellm import settings +from guidellm.core import ( + TextGenerationBenchmark, + TextGenerationBenchmarkReport, +) +from guidellm.executor import Profile, ProfileGenerationMode, ProfileGenerator + + +@pytest.mark.smoke() +def test_profile_generator_mode(): + assert set(get_args(ProfileGenerationMode)) == { + "sweep", + "synchronous", + "throughput", + "constant", + "poisson", + } + + +@pytest.mark.smoke() +def test_profile_instantiation(): + profile = Profile(load_gen_mode="constant", load_gen_rate=10) + assert profile.load_gen_mode == "constant" + assert profile.load_gen_rate == 10 + assert profile.args == {} + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("sweep", None), + ("synchronous", None), + ("throughput", None), + ("constant", 10), + ("constant", [10, 20, 30]), + ("poisson", 10), + ("poisson", [10, 20, 30]), + ], ) -from guidellm.scheduler import LoadGenerationMode - -# Fixed Rate Profile Generator - - -def test_fixed_rate_profile_generator_creation(): - rates = [1.0] - load_gen_mode = LoadGenerationMode.CONSTANT - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - rates=rates, - load_gen_mode=load_gen_mode, - ) - assert isinstance(test_profile_generator, FixedRateProfileGenerator) - assert test_profile_generator._rates == rates - assert test_profile_generator._load_gen_mode == load_gen_mode - assert test_profile_generator._rate_index == 0 - - -def test_synchronous_mode_rate_list_error(): - rates = [1.0] - load_gen_mode = LoadGenerationMode.SYNCHRONOUS - with pytest.raises( - ValueError, - match="custom rates are not supported in synchronous mode", - ): - ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - rates=rates, - load_gen_mode=load_gen_mode, +def test_profile_generator_instantiation(mode, rate): + generator = ProfileGenerator(mode=mode, rate=rate) + assert generator.mode == mode + + if rate is None: + assert generator.rates is None + elif isinstance(rate, list): + assert generator.rates == rate + else: + assert generator.rates == [rate] + + if mode == "sweep": + assert len(generator) == settings.num_sweep_profiles + 2 + assert ( + generator.profile_generation_modes + == ["synchronous", "throughput"] + + ["constant"] * settings.num_sweep_profiles ) + elif mode in ("throughput", "synchronous"): + assert len(generator) == 1 + assert generator.profile_generation_modes == [mode] + else: + assert len(generator) == len(rate) if isinstance(rate, list) else 1 + assert generator.profile_generation_modes == [mode] * ( + len(rate) if isinstance(rate, list) else 1 + ) + + assert generator.generated_count == 0 + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + # invalid modes + ("invalid_mode", None), + # rates supplied for non-applicable modes + ("sweep", 10), + ("sweep", [10, 20, 30]), + ("synchronous", 10), + ("synchronous", [10, 20, 30]), + ("throughput", 10), + ("throughput", [10, 20, 30]), + # invalid rates supplied for applicable modes + ("constant", None), + ("constant", -1), + ("constant", 0), + ("poisson", None), + ("poisson", -1), + ("poisson", 0), + ], +) +def test_profile_generator_invalid_instantiation(mode, rate): + with pytest.raises(ValueError): + ProfileGenerator(mode=mode, rate=rate) + + +@pytest.mark.sanity() +def test_profile_generator_next_sweep(): + generator = ProfileGenerator(mode="sweep") + current_report = TextGenerationBenchmarkReport() + + for index in range(settings.num_sweep_profiles + 2): + profile: Profile = generator.next(current_report) # type: ignore + + if index == 0: + assert profile.load_gen_mode == "synchronous" + assert profile.load_gen_rate is None + mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True) + mock_benchmark.completed_request_rate = 1 + current_report.add_benchmark(mock_benchmark) + elif index == 1: + assert profile.load_gen_mode == "throughput" + assert profile.load_gen_rate is None + mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True) + mock_benchmark.completed_request_rate = 10 + current_report.add_benchmark(mock_benchmark) + else: + assert profile.load_gen_mode == "constant" + assert profile.load_gen_rate == index + + assert generator.generated_count == index + 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +def test_profile_generator_next_synchronous(): + generator = ProfileGenerator(mode="synchronous") + current_report = TextGenerationBenchmarkReport() + + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "synchronous" + assert profile.load_gen_rate is None + assert generator.generated_count == 1 + + for _ in range(3): + assert generator.next(current_report) is None -def test_next_with_multiple_rates(): - rates = [1.0, 2.0] - load_gen_mode = LoadGenerationMode.CONSTANT - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - rates=rates, - load_gen_mode=load_gen_mode, - ) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - for rate in rates: - current_profile = test_profile_generator.next(mock_report) - assert current_profile is not None - assert current_profile.load_gen_rate == rate - assert current_profile.load_gen_mode == LoadGenerationMode.CONSTANT - assert test_profile_generator.next(mock_report) is None - - -def test_next_with_sync_mode(): - load_gen_mode = LoadGenerationMode.SYNCHRONOUS - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.FIXED_RATE, - load_gen_mode=load_gen_mode, - ) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - current_profile = test_profile_generator.next(mock_report) - assert current_profile is not None - assert current_profile.load_gen_rate is None - assert current_profile.load_gen_mode == LoadGenerationMode.SYNCHRONOUS - assert test_profile_generator.next(mock_report) is None - - -# Sweep Profile Generator - - -def test_sweep_profile_generator_creation(): - test_profile_generator = ProfileGenerator.create( - ProfileGenerationMode.SWEEP, - ) - assert isinstance(test_profile_generator, SweepProfileGenerator) - assert not test_profile_generator._sync_run - assert not test_profile_generator._max_found - assert test_profile_generator._pending_rates is None - assert test_profile_generator._pending_rates is None - - -def test_first_profile_is_synchronous(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - profile = test_profile_generator.next(mock_report) - assert profile is not None +@pytest.mark.sanity() +def test_profile_generator_next_throughput(): + generator = ProfileGenerator(mode="throughput") + current_report = TextGenerationBenchmarkReport() + + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "throughput" assert profile.load_gen_rate is None - assert profile.load_gen_mode == LoadGenerationMode.SYNCHRONOUS - - -def test_rate_doubles(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - mock_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_benchmark.overloaded = False - mock_benchmark.rate = 2.0 - mock_benchmark.request_rate = 2.0 - benchmarks = [mock_benchmark] - mock_report.benchmarks = benchmarks - test_profile_generator.next(mock_report) - - profile = test_profile_generator.next(mock_report) - assert profile is not None - assert profile.load_gen_rate == 4.0 - - -def test_max_found(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - mock_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_benchmark.overloaded = False - mock_benchmark.rate = 2.0 - mock_benchmark.request_rate = 2.0 - mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_overloaded_benchmark.overloaded = True - mock_overloaded_benchmark.rate = 4.0 - mock_overloaded_benchmark.request_rate = 4.0 - benchmarks = [mock_benchmark, mock_overloaded_benchmark] - mock_report.benchmarks = benchmarks - - test_profile_generator.next(mock_report) - profile = test_profile_generator.next(mock_report) - assert profile is not None - - # if benchmark wasn't overloaded, rates would have doubled to 8 - assert profile.load_gen_rate == 2.0 - - -def test_pending_rates(): - test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) - mock_report = MagicMock(spec=TextGenerationBenchmarkReport) - mock_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_benchmark.overloaded = False - mock_benchmark.rate = 2.0 - mock_benchmark.request_rate = 2.0 - mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark) - mock_overloaded_benchmark.overloaded = True - mock_overloaded_benchmark.rate = 8.0 - mock_overloaded_benchmark.request_rate = 8.0 - benchmarks = [mock_benchmark, mock_overloaded_benchmark] - mock_report.benchmarks = benchmarks - profile = test_profile_generator.next(mock_report) - for expected_rate in np.linspace(2.0, 8.0, 10): - profile = test_profile_generator.next(mock_report) - assert profile is not None - assert profile.load_gen_rate == expected_rate + assert generator.generated_count == 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + "rate", + [ + 10, + [10, 20, 30], + ], +) +def test_profile_generator_next_constant(rate): + generator = ProfileGenerator(mode="constant", rate=rate) + test_rates = rate if isinstance(rate, list) else [rate] + current_report = TextGenerationBenchmarkReport() + + for index, test_rate in enumerate(test_rates): + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "constant" + assert profile.load_gen_rate == test_rate + assert generator.generated_count == index + 1 + + for _ in range(3): + assert generator.next(current_report) is None + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + "rate", + [ + 10, + [10, 20, 30], + ], +) +def test_profile_generator_next_poisson(rate): + generator = ProfileGenerator(mode="poisson", rate=rate) + test_rates = rate if isinstance(rate, list) else [rate] + current_report = TextGenerationBenchmarkReport() + + for index, test_rate in enumerate(test_rates): + profile: Profile = generator.next(current_report) # type: ignore + assert profile.load_gen_mode == "poisson" + assert profile.load_gen_rate == test_rate + assert generator.generated_count == index + 1 + + for _ in range(3): + assert generator.next(current_report) is None diff --git a/tests/unit/executor/test_sweep_profile_generation_mode.py b/tests/unit/executor/test_sweep_profile_generation_mode.py deleted file mode 100644 index e3b9b67..0000000 --- a/tests/unit/executor/test_sweep_profile_generation_mode.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.scheduler import LoadGenerationMode - -from tests import dummy - - -@pytest.mark.skip("SweepProfileGenerator never break.") -@pytest.mark.parametrize( - "load_gen_mode", - [ - LoadGenerationMode.SYNCHRONOUS, - LoadGenerationMode.POISSON, - LoadGenerationMode.CONSTANT, - ], -) -def test_executor_sweep_profile_generator_benchmark_report( - mocker, - openai_backend_factory, - load_gen_mode, -): - scheduler_run_patch = mocker.patch( - "guidellm.scheduler.scheduler.Scheduler.run", - return_value=TextGenerationBenchmark(mode="test", rate=1.0), - ) - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - profile_generator_kwargs = {"rate_type": load_gen_mode, "rate": 1.0} - - executor = Executor( - backend=openai_backend_factory(), - request_generator=request_genrator, - profile_mode=ProfileGenerationMode.SWEEP, - profile_args=profile_generator_kwargs, - max_requests=1, - max_duration=None, - ) - - report: TextGenerationBenchmarkReport = executor.run() - - assert scheduler_run_patch.call_count == 1 - assert len(report.benchmarks) == 1 - assert report.benchmarks[0].mode == "test" diff --git a/tests/unit/request/test_base.py b/tests/unit/request/test_base.py index 8c71d02..8b75be1 100644 --- a/tests/unit/request/test_base.py +++ b/tests/unit/request/test_base.py @@ -1,30 +1,31 @@ -from unittest.mock import Mock, patch +import re +import time +from typing import List +from unittest.mock import MagicMock, Mock, patch import pytest -from guidellm.core import TextGenerationRequest +from guidellm.core import TextGenerationRequest from tests.dummy.services import TestRequestGenerator @pytest.mark.smoke() -def test_request_generator_sync_constructor(): +def test_request_generator_sync_constructor(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") assert generator.mode == "sync" assert generator.async_queue_size == 50 # Default value - assert generator.tokenizer is None @pytest.mark.smoke() -def test_request_generator_async_constructor(): +def test_request_generator_async_constructor(mock_auto_tokenizer): generator = TestRequestGenerator(mode="async", async_queue_size=10) assert generator.mode == "async" assert generator.async_queue_size == 10 - assert generator.tokenizer is None generator.stop() @pytest.mark.smoke() -def test_request_generator_sync_iter(): +def test_request_generator_sync_iter(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") items = [] for item in generator: @@ -37,7 +38,7 @@ def test_request_generator_sync_iter(): @pytest.mark.smoke() -def test_request_generator_async_iter(): +def test_request_generator_async_iter(mock_auto_tokenizer): generator = TestRequestGenerator(mode="async") items = [] for item in generator: @@ -50,31 +51,8 @@ def test_request_generator_async_iter(): assert items[0].prompt == "Test prompt" -@pytest.mark.regression() -def test_request_generator_with_mock_tokenizer(): - mock_tokenizer = Mock() - generator = TestRequestGenerator(tokenizer=mock_tokenizer) - assert generator.tokenizer == mock_tokenizer - - with patch( - "guidellm.request.base.AutoTokenizer", - ) as MockAutoTokenizer: # noqa: N806 - MockAutoTokenizer.from_pretrained.return_value = mock_tokenizer - generator = TestRequestGenerator(tokenizer="mock-tokenizer") - assert generator.tokenizer == mock_tokenizer - MockAutoTokenizer.from_pretrained.assert_called_with("mock-tokenizer") - - -@pytest.mark.regression() -def test_request_generator_repr(): - generator = TestRequestGenerator(mode="sync", async_queue_size=100) - assert repr(generator) == ( - "RequestGenerator(mode=sync, async_queue_size=100, tokenizer=None)" - ) - - -@pytest.mark.regression() -def test_request_generator_iter_calls_create_item(): +@pytest.mark.smoke() +def test_request_generator_iter_calls_create_item(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") generator.create_item = Mock( # type: ignore return_value=TextGenerationRequest(prompt="Mock prompt"), @@ -90,8 +68,8 @@ def test_request_generator_iter_calls_create_item(): generator.create_item.assert_called() -@pytest.mark.regression() -def test_request_generator_async_iter_calls_create_item(): +@pytest.mark.smoke() +def test_request_generator_async_iter_calls_create_item(mock_auto_tokenizer): generator = TestRequestGenerator(mode="sync") generator.create_item = Mock( # type: ignore return_value=TextGenerationRequest(prompt="Mock prompt"), @@ -106,3 +84,69 @@ def test_request_generator_async_iter_calls_create_item(): generator.stop() assert len(items) == 5 generator.create_item.assert_called() + + +@pytest.mark.sanity() +def test_request_generator_repr(mock_auto_tokenizer): + generator = TestRequestGenerator(mode="sync", async_queue_size=100) + repr_str = repr(generator) + assert repr_str.startswith("RequestGenerator(") + assert "mode=sync" in repr_str + assert "async_queue_size=100" in repr_str + assert "tokenizer= List[int]: + tokens = re.findall(r"\w+|[^\w\s]", text) + return [0] * len(tokens) + + mock_tokenizer = MagicMock() + mock_tokenizer.tokenize = MagicMock(side_effect=_fake_tokenize) + + generator = TestRequestGenerator(tokenizer=mock_tokenizer) + assert generator.tokenizer == mock_tokenizer + + with patch( + "guidellm.request.base.AutoTokenizer", + ) as MockAutoTokenizer: # noqa: N806 + MockAutoTokenizer.from_pretrained.return_value = mock_tokenizer + generator = TestRequestGenerator(tokenizer="mock-tokenizer") + assert generator.tokenizer == mock_tokenizer + MockAutoTokenizer.from_pretrained.assert_called_with("mock-tokenizer") + + +@pytest.mark.regression() +def test_request_generator_populate_queue(mock_auto_tokenizer): + generator = TestRequestGenerator(mode="async", async_queue_size=2) + generator.create_item = Mock( # type: ignore + return_value=TextGenerationRequest(prompt="Mock prompt") + ) + + time.sleep(0.2) # Allow some time for the queue to populate + generator.stop() + assert generator._queue.qsize() > 0 + + +@pytest.mark.regression() +def test_request_generator_async_stop_during_population(mock_auto_tokenizer): + generator = TestRequestGenerator(mode="async", async_queue_size=2) + generator.create_item = Mock( # type: ignore + return_value=TextGenerationRequest(prompt="Mock prompt") + ) + + time.sleep(0.1) # Allow some time for the queue to start populating + generator.stop() + + # Ensure the stop event is set and thread is no longer alive + assert generator._stop_event.is_set() + assert not generator._thread.is_alive() diff --git a/tests/unit/request/test_emulated.py b/tests/unit/request/test_emulated.py new file mode 100644 index 0000000..699b1d6 --- /dev/null +++ b/tests/unit/request/test_emulated.py @@ -0,0 +1,365 @@ +import json +import tempfile +from pathlib import Path +from typing import Tuple, Union + +import numpy as np +import pytest +from transformers import PreTrainedTokenizer # type: ignore + +from guidellm.core.request import TextGenerationRequest +from guidellm.request.emulated import ( + EmulatedConfig, + EmulatedRequestGenerator, + EndlessTokens, +) + + +@pytest.mark.smoke() +def test_emulated_config_construction(): + config = EmulatedConfig( + prompt_tokens=10, + prompt_tokens_variance=2, + prompt_tokens_min=5, + prompt_tokens_max=15, + generated_tokens=20, + generated_tokens_variance=4, + generated_tokens_min=10, + generated_tokens_max=30, + ) + assert config.prompt_tokens == 10 + assert config.prompt_tokens_variance == 2 + assert config.prompt_tokens_min == 5 + assert config.prompt_tokens_max == 15 + assert config.generated_tokens == 20 + assert config.generated_tokens_variance == 4 + assert config.generated_tokens_min == 10 + assert config.generated_tokens_max == 30 + + +@pytest.mark.smoke() +def test_emulated_config_create_dict(): + config_dict = { + "prompt_tokens": 10, + "prompt_tokens_variance": 2, + "prompt_tokens_min": 5, + "prompt_tokens_max": 15, + "generated_tokens": 20, + "generated_tokens_variance": 4, + "generated_tokens_min": 10, + "generated_tokens_max": 30, + } + config = EmulatedConfig.create_config(config_dict) + assert config.prompt_tokens == 10 + assert config.prompt_tokens_variance == 2 + assert config.prompt_tokens_min == 5 + assert config.prompt_tokens_max == 15 + assert config.generated_tokens == 20 + assert config.generated_tokens_variance == 4 + assert config.generated_tokens_min == 10 + assert config.generated_tokens_max == 30 + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("base", "variance", "min_tokens", "max_tokens", "expected_range"), + [ + (10, 2, None, None, (1, 10 + 5 * 2)), + (10, 2, 5, 15, (5, 15)), + (10, None, 5, 15, (5, 15)), + (10, 2, 1, None, (1, 10 + 5 * 2)), + ], +) +def test_emulated_config_token_range( + base: int, + variance: int, + min_tokens: int, + max_tokens: int, + expected_range: Tuple[int, int], +): + assert ( + EmulatedConfig._token_range(base, variance, min_tokens, max_tokens) + == expected_range + ) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("base", "variance", "min_tokens", "max_tokens", "expected_range"), + [ + (10, None, None, None, (10, 10)), + (10, 5, None, None, (1, 10 + 5 * 2)), + (10, 5, 5, 15, (5, 15)), + (10, None, 5, 15, (5, 15)), + (10, 5, 2, None, (2, 10 + 5 * 2)), + (10, 5, None, 20, (1, 20)), + ], +) +def test_emulated_config_sample_tokens( + base: int, + variance: int, + min_tokens: int, + max_tokens: int, + expected_range: Tuple[int, int], +): + rng = np.random.default_rng() + + for _ in range(100): + token_count = EmulatedConfig._sample_tokens( + base, variance, min_tokens, max_tokens, rng + ) + assert token_count >= expected_range[0] + assert token_count <= expected_range[1] + + +@pytest.mark.sanity() +def test_emulated_config_create(): + test_dict = { + "prompt_tokens": 10, + "prompt_tokens_variance": 2, + "prompt_tokens_min": 5, + "prompt_tokens_max": 15, + "generated_tokens": 20, + "generated_tokens_variance": 4, + "generated_tokens_min": 10, + "generated_tokens_max": 30, + } + compare_config = EmulatedConfig(**test_dict) + + # test dict + test_config = EmulatedConfig.create_config(test_dict) + assert ( + test_config == compare_config + ), f"Dictionary creation failed: {test_config} != {compare_config}" + + # test json str + test_config = EmulatedConfig.create_config(json.dumps(test_dict)) + assert ( + test_config == compare_config + ), f"JSON string creation failed: {test_config} != {compare_config}" + + # test json file str path + with tempfile.TemporaryDirectory() as temp_dir: + test_path = Path(temp_dir) / "test.json" + test_path.write_text(json.dumps(test_dict)) + test_config = EmulatedConfig.create_config(str(test_path)) + assert ( + test_config == compare_config + ), f"JSON file path creation failed: {test_config} != {compare_config}" + + # test json file Path object + with tempfile.TemporaryDirectory() as temp_dir: + test_path = Path(temp_dir) / "test.json" + test_path.write_text(json.dumps(test_dict)) + test_config = EmulatedConfig.create_config(test_path) + assert ( + test_config == compare_config + ), f"JSON file Path object creation failed: {test_config} != {compare_config}" + + # test key value string + test_str = ( + f"prompt_tokens={test_dict['prompt_tokens']}, " + f"prompt_tokens_variance={test_dict['prompt_tokens_variance']}, " + f"prompt_tokens_min={test_dict['prompt_tokens_min']}, " + f"prompt_tokens_max={test_dict['prompt_tokens_max']}, " + f"generated_tokens={test_dict['generated_tokens']}, " + f"generated_tokens_variance={test_dict['generated_tokens_variance']}, " + f"generated_tokens_min={test_dict['generated_tokens_min']}, " + f"generated_tokens_max={test_dict['generated_tokens_max']}" + ) + test_config = EmulatedConfig.create_config(test_str) + assert ( + test_config == compare_config + ), f"Key value string creation failed: {test_config} != {compare_config}" + + +# EndlessTokens + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "expected_words", "expected_indices"), + [ + ( + "word1 word2 word3\nword4 word5", + ["word1", "word2", "word3", "word4", "word5"], + [0, 3], + ), + ( + "word1 word2\n word3 word4\n word5", + ["word1", "word2", "word3", "word4", "word5"], + [0, 2, 4], + ), + ], +) +def test_endless_data_words_construction(data, expected_words, expected_indices): + tokens = EndlessTokens(data) + assert tokens == expected_words + assert tokens.line_indices == expected_indices + + +@pytest.mark.smoke() +def test_endless_data_words_create_from_basic_file(): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "test.txt" + file_path.write_text("word1 word2 word3\nword4 word5") + + tokens = EndlessTokens(file_path) + assert tokens == ["word1", "word2", "word3", "word4", "word5"] + assert tokens.line_indices == [0, 3] + + tokens = EndlessTokens(str(file_path)) + assert tokens == ["word1", "word2", "word3", "word4", "word5"] + assert tokens.line_indices == [0, 3] + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "start", "length", "expected_text"), + [ + ("word1 word2 word3 word4", 0, 2, "word1 word2"), + ("word1 word2\nword3 word4", 1, 2, "word2\nword3"), + ( + "word1 word2\nword3 word4", + 1, + 6, + "word2\nword3 word4 word1 word2\nword3", + ), + ], +) +def test_endless_data_words_create_text(data, start, length, expected_text): + words = EndlessTokens(data) + text = words.create_text(start, length) + assert text == expected_text + + +# EmulatedRequestGenerator + + +@pytest.mark.smoke() +def test_emulated_request_generator_construction(mocker): + mocker.patch( + "guidellm.request.emulated.EmulatedConfig.create_config", + return_value=EmulatedConfig(prompt_tokens=10), + ) + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + generator = EmulatedRequestGenerator(config="mock_config", mode="sync") + assert isinstance(generator._config, EmulatedConfig) + assert isinstance(generator._tokens, EndlessTokens) + + +@pytest.mark.smoke() +def test_emulated_request_generator_create_item(mocker): + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + mock_tokenizer = mocker.Mock(PreTrainedTokenizer) + mock_tokenizer.tokenize.return_value = ["word1", "word2"] + generator = EmulatedRequestGenerator( + config={ + "prompt_tokens": 10, + }, + tokenizer=mock_tokenizer, + mode="sync", + ) + item = generator.create_item() + assert isinstance(item, TextGenerationRequest) + + +@pytest.mark.smoke() +def test_emulated_request_generator_sample_prompt(mocker, mock_auto_tokenizer): + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + generator = EmulatedRequestGenerator(config={"prompt_tokens": 3}, mode="sync") + prompt = generator.sample_prompt(3) + assert prompt == "word1 word2 word1" + + request = generator.create_item() + assert request.prompt_token_count == 3 + + +@pytest.mark.smoke() +def test_emulated_request_generator_random_seed(mocker): + mocker.patch( + "guidellm.request.emulated.EndlessTokens", + return_value=EndlessTokens("word1 word2"), + ) + + rand_gen = EmulatedRequestGenerator( + config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, + random_seed=42, + mode="sync", + ) + rand_gen_comp_pos = EmulatedRequestGenerator( + config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, + random_seed=42, + mode="sync", + ) + rand_gen_comp_neg = EmulatedRequestGenerator( + config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, + random_seed=43, + mode="sync", + ) + + assert rand_gen.create_item().prompt == rand_gen_comp_pos.create_item().prompt + assert rand_gen.create_item().prompt != rand_gen_comp_neg.create_item().prompt + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("config_type", "config"), + [ + ("dict", {"prompt_tokens": 10, "generated_tokens": 20}), + ("dict", {"prompt_tokens": 10, "prompt_tokens_variance": 2}), + ( + "dict", + { + "prompt_tokens": 10, + "prompt_tokens_min": 5, + "prompt_tokens_max": 15, + "generated_tokens": 20, + }, + ), + ("json_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), + ("key_value_str", "prompt_tokens=10, generated_tokens=20"), + ("file_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), + ("file_path", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), + ], +) +def test_emulated_request_generator_lifecycle( + mock_requests_pride_and_prejudice, + mock_auto_tokenizer, + config_type: str, + config: Union[str, dict, Path], +): + if config_type in ["dict", "json_str", "key_value_str"]: + generator = EmulatedRequestGenerator(config) + elif config_type in ["file_str", "file_path"]: + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "test.json" + file_path.write_text(config) # type: ignore + generator = EmulatedRequestGenerator( + str(file_path) if config_type == "file_str" else file_path + ) + + for _ in range(5): + request = generator.create_item() + prompt_range = generator._config.prompt_tokens_range + outputs_range = generator._config.output_tokens_range + + assert request.prompt_token_count >= prompt_range[0] # type: ignore + assert request.prompt_token_count <= prompt_range[1] # type: ignore + + prompt_tokens = len(generator.tokenizer.tokenize(request.prompt)) + assert request.prompt_token_count == prompt_tokens + + if generator._config.generated_tokens: + assert len(outputs_range) == 2 + assert request.output_token_count >= outputs_range[0] # type: ignore + assert request.output_token_count <= outputs_range[1] # type: ignore diff --git a/tests/unit/request/test_file.py b/tests/unit/request/test_file.py new file mode 100644 index 0000000..b214e41 --- /dev/null +++ b/tests/unit/request/test_file.py @@ -0,0 +1,98 @@ +import tempfile +from pathlib import Path + +import pytest + +from guidellm.core.request import TextGenerationRequest +from guidellm.request.file import FileRequestGenerator + + +@pytest.mark.smoke() +def test_file_request_generator_constructor(mock_auto_tokenizer): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "example.txt" + file_path.write_text("This is a test.\nThis is another test.") + generator = FileRequestGenerator(file_path) + assert generator._path == file_path + assert generator._data == ["This is a test.", "This is another test."] + assert generator._iterator is not None + + +@pytest.mark.smoke() +def test_file_request_generator_create_item(mock_auto_tokenizer): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / "example.txt" + file_path.write_text("This is a test.\nThis is another test.") + generator = FileRequestGenerator(file_path, mode="sync") + request = generator.create_item() + assert isinstance(request, TextGenerationRequest) + assert request.prompt == "This is a test." + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("file_extension", "file_content"), + [ + ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"), + ( + "csv", + "text,label,extra\n" + "Test content 1.,1,extra 1\n" + "Test content 2.,2,extra 2\n" + "Test content 3.,3,extra 3\n", + ), + ( + "jsonl", + '{"text": "Test content 1."}\n' + '{"text": "Test content 2."}\n' + '{"text": "Test content 3."}\n', + ), + ( + "csv", + "prompt,text,extra\n" + "Test content 1., text 1, extra 1\n" + "Test content 2., text 2, extra 2\n" + "Test content 3., text 3, extra 3\n", + ), + ( + "json", + '[{"text": "Test content 1."}, ' + '{"text": "Test content 2."}, ' + '{"text": "Test content 3."}]\n', + ), + ( + "json", + '{"object_1": {"text": "Test content 1."}, ' + '"object_2": {"text": "Test content 2."}, ' + '"object_3": {"text": "Test content 3."}}\n', + ), + ( + "yaml", + "items:\n" + " - text: Test content 1.\n" + " - text: Test content 2.\n" + " - text: Test content 3.\n", + ), + ( + "yaml", + "object_1:\n text: Test content 1.\n" + "object_2:\n text: Test content 2.\n" + "object_3:\n text: Test content 3.\n", + ), + ], +) +def test_file_request_generator_file_types_lifecycle( + mock_auto_tokenizer, file_extension, file_content +): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / f"example.{file_extension}" + file_path.write_text(file_content) + generator = FileRequestGenerator(file_path) + + for index, request in enumerate(generator): + assert isinstance(request, TextGenerationRequest) + assert request.prompt == f"Test content {index + 1}." + assert request.prompt_token_count == 3 + + if index == 2: + break diff --git a/tests/unit/request/test_transformers.py b/tests/unit/request/test_transformers.py new file mode 100644 index 0000000..fcf933b --- /dev/null +++ b/tests/unit/request/test_transformers.py @@ -0,0 +1,95 @@ +from unittest.mock import patch + +import pytest + +from guidellm.core.request import TextGenerationRequest +from guidellm.request.transformers import TransformersDatasetRequestGenerator +from tests.dummy.data.transformers import ( + create_sample_dataset, + create_sample_dataset_dict, + create_sample_iterable_dataset, + create_sample_iterable_dataset_dict, +) + + +@pytest.mark.smoke() +def test_transformers_dataset_request_generator_constructor( + mock_auto_tokenizer, +): + dataset = create_sample_dataset() + with patch( + "guidellm.request.transformers.load_transformers_dataset", + return_value=dataset, + ), patch( + "guidellm.request.transformers.resolve_transformers_dataset_column", + return_value="text", + ): + generator = TransformersDatasetRequestGenerator( + dataset="dummy_dataset", + split="train", + column="text", + ) + assert generator._dataset == "dummy_dataset" + assert generator._split == "train" + assert generator._column == "text" + assert generator._hf_dataset == dataset + assert generator._hf_column == "text" + assert generator._hf_dataset_iterator is not None + + +@pytest.mark.smoke() +def test_transformers_dataset_request_generator_create_item( + mock_auto_tokenizer, +): + generator = TransformersDatasetRequestGenerator( + dataset=create_sample_dataset_dict(), + split="train", + column="text", + mode="sync", + ) + request = generator.create_item() + assert isinstance(request, TextGenerationRequest) + assert request.prompt == "sample text 1" + assert request.prompt_token_count == 3 + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset_dict(splits=["train"]), + ), + ( + "mock/directory/file.json", + create_sample_dataset(column="prompt"), + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + ), + (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None), + (create_sample_dataset(), None), + (create_sample_iterable_dataset_dict(splits=["validation"]), None), + (create_sample_iterable_dataset(), None), + ], +) +def test_transformers_dataset_request_generator_lifecycle( + mock_auto_tokenizer, dataset_arg, dataset +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + generator = TransformersDatasetRequestGenerator( + dataset=dataset_arg, mode="sync" + ) + + for index, request in enumerate(generator): + assert isinstance(request, TextGenerationRequest) + assert request.prompt == f"sample text {index + 1}" + assert request.prompt_token_count == 3 + + if index == 2: + break diff --git a/tests/unit/scheduler/conftest.py b/tests/unit/scheduler/conftest.py deleted file mode 100644 index c79d27c..0000000 --- a/tests/unit/scheduler/conftest.py +++ /dev/null @@ -1,15 +0,0 @@ -import pytest -from guidellm.core import TextGenerationRequest, TextGenerationResult - - -@pytest.fixture(autouse=True) -def backend_submit_patch(mocker): - patch = mocker.patch( - "guidellm.backend.base.Backend.submit", - return_value=TextGenerationResult( - request=TextGenerationRequest(prompt="Test prompt"), - ), - ) - patch.__name__ = "Backend.submit fallbackBackend.submit fallback" - - return patch diff --git a/tests/unit/scheduler/test_base.py b/tests/unit/scheduler/test_base.py new file mode 100644 index 0000000..b485e59 --- /dev/null +++ b/tests/unit/scheduler/test_base.py @@ -0,0 +1,279 @@ +import asyncio +import time +from unittest.mock import AsyncMock, create_autospec + +import pytest + +from guidellm.backend import Backend +from guidellm.core import ( + TextGenerationBenchmark, + TextGenerationRequest, + TextGenerationResult, +) +from guidellm.request import RequestGenerator +from guidellm.scheduler import ( + LoadGenerator, + Scheduler, + SchedulerResult, +) + + +@pytest.mark.smoke() +def test_scheduler_result(): + benchmark = create_autospec(TextGenerationBenchmark, instance=True) + result = TextGenerationResult( + request=TextGenerationRequest(prompt="prompt"), output="Test output" + ) + scheduler_result = SchedulerResult( + completed=True, + count_total=10, + count_completed=5, + benchmark=benchmark, + current_result=result, + ) + + assert scheduler_result.completed is True + assert scheduler_result.count_total == 10 + assert scheduler_result.count_completed == 5 + assert scheduler_result.benchmark == benchmark + assert scheduler_result.current_result == result + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate", "max_number", "max_duration"), + [ + ("synchronous", None, 10, None), + ("throughput", 5.0, None, 60.0), + ("poisson", 10.0, 100, None), + ("constant", 1.0, None, 120.0), + ], +) +def test_scheduler_instantiation(mode, rate, max_number, max_duration): + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + scheduler = Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_number=max_number, + max_duration=max_duration, + ) + + assert scheduler.generator == generator + assert scheduler.worker == worker + assert scheduler.mode == mode + assert scheduler.rate == rate + assert scheduler.max_number == max_number + assert scheduler.max_duration == max_duration + assert isinstance(scheduler.load_generator, LoadGenerator) + assert scheduler.benchmark_mode in {"synchronous", "asynchronous", "throughput"} + + +@pytest.mark.sanity() +@pytest.mark.parametrize( + ("mode", "rate", "max_number", "max_duration"), + [ + # invalid modes + ("invalid_mode", None, 10, None), + # invalid max settings + ("synchronous", None, None, None), + ("synchronous", None, -1, 10), + ("synchronous", None, 10, -1), + # invalid rate settings + ("constant", -1, None, 10), + ("constant", None, None, 10), + ("poisson", -1, None, 10), + ("poisson", None, None, 10), + ], +) +def test_scheduler_invalid_instantiation( + mode, + rate, + max_number, + max_duration, +): + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + + with pytest.raises(ValueError): + Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_number=max_number, + max_duration=max_duration, + ) + + +@pytest.mark.sanity() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + "mode", + [ + "synchronous", + "throughput", + "poisson", + "constant", + ], +) +async def test_scheduler_run_number(mode): + rate = 10.0 + max_number = 20 + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + + # Mock the request generator and backend submit behavior + generator.__iter__.return_value = iter( + [TextGenerationRequest(prompt="Test")] * (max_number * 2) + ) + worker.submit = AsyncMock() + + def _submit(req): + res = TextGenerationResult(request=req, output="Output") + res.start(prompt=req.prompt) + res.output_token("token") + res.end() + return res + + worker.submit.side_effect = _submit + + scheduler = Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_number=max_number, + ) + + run_count = 0 + count_completed = 0 + received_init = False + received_final = False + async for result in scheduler.run(): + run_count += 1 + + assert run_count <= max_number + 2 + assert result.count_total == max_number + assert result.benchmark is not None + assert isinstance(result.benchmark, TextGenerationBenchmark) + + if result.current_result is not None: + count_completed += 1 + + if run_count == 1: + assert not received_init + assert not received_final + assert count_completed == 0 + assert result.count_completed == 0 + assert not result.completed + assert result.current_result is None + received_init = True + elif run_count - 2 == max_number: + assert received_init + assert not received_final + assert count_completed == max_number + assert result.count_completed == max_number + assert result.completed + assert result.current_result is None + received_final = True + else: + assert received_init + assert not received_final + assert count_completed == run_count - 1 + assert result.count_completed == run_count - 1 + assert not result.completed + assert result.current_result is not None + assert isinstance(result.current_result, TextGenerationResult) + + assert received_init + assert received_final + assert count_completed == max_number + + +@pytest.mark.sanity() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + "mode", + [ + "synchronous", + "constant", + ], +) +@pytest.mark.flaky(reruns=5) +async def test_scheduler_run_duration(mode): + rate = 10 + max_duration = 2 + generator = create_autospec(RequestGenerator, instance=True) + worker = create_autospec(Backend, instance=True) + + # Mock the request generator and backend submit behavior + generator.__iter__.return_value = iter( + [TextGenerationRequest(prompt="Test")] * (rate * max_duration * 100) + ) + worker.submit = AsyncMock() + + async def _submit(req): + await asyncio.sleep(0.1) + res = TextGenerationResult(request=req, output="Output") + res.start(prompt=req.prompt) + res.output_token("token") + res.end() + return res + + worker.submit.side_effect = _submit + + scheduler = Scheduler( + generator, + worker, + mode=mode, + rate=rate, + max_duration=max_duration, + ) + + run_count = 0 + count_completed = 0 + received_init = False + received_final = False + start_time = time.time() + async for result in scheduler.run(): + run_count += 1 + + assert run_count <= max_duration * rate + 2 + assert result.count_total == max_duration + assert result.benchmark is not None + assert isinstance(result.benchmark, TextGenerationBenchmark) + + if result.current_result is not None: + count_completed += 1 + + if run_count == 1: + assert not received_init + assert not received_final + assert count_completed == 0 + assert result.count_completed == 0 + assert not result.completed + assert result.current_result is None + received_init = True + elif time.time() - start_time >= max_duration: + assert received_init + assert not received_final + assert result.count_completed == max_duration + assert result.completed + assert result.current_result is None + received_final = True + else: + assert received_init + assert not received_final + assert result.count_completed == round(time.time() - start_time) + assert not result.completed + assert result.current_result is not None + assert isinstance(result.current_result, TextGenerationResult) + + assert received_init + assert received_final + end_time = time.time() + assert pytest.approx(end_time - start_time, abs=0.1) == max_duration + assert pytest.approx(count_completed, abs=5) == max_duration * rate diff --git a/tests/unit/scheduler/test_basics.py b/tests/unit/scheduler/test_basics.py deleted file mode 100644 index f0ca04a..0000000 --- a/tests/unit/scheduler/test_basics.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.parametrize( - ("load_gen_mode", "max_requests", "max_duration", "load_gen_rate"), - [ - # Sync load generation mode payload - (LoadGenerationMode.SYNCHRONOUS, None, None, None), - (LoadGenerationMode.SYNCHRONOUS, 1, -1, 1.0), - (LoadGenerationMode.SYNCHRONOUS, -1, 1, 1.0), - (LoadGenerationMode.SYNCHRONOUS, None, -1, 1.0), - # Constant load generation mode payload - (LoadGenerationMode.CONSTANT, None, None, 1.0), - (LoadGenerationMode.CONSTANT, -1, 1, 1.0), - (LoadGenerationMode.CONSTANT, 1, 1, None), - (LoadGenerationMode.CONSTANT, 1, 0, None), - (LoadGenerationMode.CONSTANT, 0, 0, None), - # Poisson load generation mode payload - (LoadGenerationMode.POISSON, None, None, 1.0), - (LoadGenerationMode.POISSON, -1, 1, 1.0), - (LoadGenerationMode.POISSON, 1, 1, None), - (LoadGenerationMode.POISSON, 1, 0, None), - (LoadGenerationMode.POISSON, 0, 0, None), - ], -) -def test_scheduler_invalid_parameters( - openai_backend_factory: Callable[..., OpenAIBackend], - load_gen_mode, - max_requests, - max_duration, - load_gen_rate, -): - """ - Test scheduler initializer parameters validation. - """ - with pytest.raises(ValueError): - Scheduler( - request_generator=dummy.services.TestRequestGenerator(), - backend=openai_backend_factory(), - load_gen_mode=load_gen_mode, - load_gen_rate=load_gen_rate, - max_requests=max_requests, - max_duration=max_duration, - ) diff --git a/tests/unit/scheduler/test_constant_mode.py b/tests/unit/scheduler/test_constant_mode.py deleted file mode 100644 index 5910329..0000000 --- a/tests/unit/scheduler/test_constant_mode.py +++ /dev/null @@ -1,60 +0,0 @@ -import time -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmark -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_requests", [1, 2, 3]) -def test_scheduler_max_requests_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_requests: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=1.0, - max_requests=max_requests, - max_duration=None, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - - assert len(benchmark.results) == max_requests - assert benchmark.errors == [] - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_duration", [1, 2, 3]) -def test_scheduler_max_duration_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_duration: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.CONSTANT, - load_gen_rate=1.0, - max_requests=None, - max_duration=max_duration, - ) - - start_time = time.perf_counter() - scheduler.run() - end_time = time.perf_counter() - start_time - - assert round(end_time) == max_duration diff --git a/tests/unit/scheduler/test_load_generator.py b/tests/unit/scheduler/test_load_generator.py new file mode 100644 index 0000000..6b84ee0 --- /dev/null +++ b/tests/unit/scheduler/test_load_generator.py @@ -0,0 +1,153 @@ +import time +from typing import get_args + +import pytest +from scipy.stats import kstest # type: ignore + +from guidellm.scheduler import LoadGenerationMode, LoadGenerator + + +@pytest.mark.smoke() +def test_load_generator_mode(): + assert set(get_args(LoadGenerationMode)) == { + "synchronous", + "constant", + "poisson", + "throughput", + } + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("constant", 10), + ("poisson", 5), + ("throughput", None), + ("synchronous", None), + ], +) +def test_load_generator_instantiation(mode, rate): + generator = LoadGenerator(mode=mode, rate=rate) + assert generator.mode == mode + assert generator.rate == rate + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("mode", "rate", "expected_error"), + [ + ("invalid_mode", None, ValueError), + ("constant", 0, ValueError), + ("poisson", -1, ValueError), + ], +) +def test_load_generator_invalid_instantiation(mode, rate, expected_error): + with pytest.raises(expected_error): + LoadGenerator(mode=mode, rate=rate) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("mode", "rate"), + [ + ("synchronous", None), + ("throughput", None), + ("constant", 1), + ("poisson", 5), + ], +) +def test_load_generator_times(mode, rate): + # first check that the proper method is called + generator = LoadGenerator(mode=mode, rate=rate) + func_name = f"{mode}_times" + assert hasattr(generator, func_name) + assert callable(getattr(generator, func_name)) + + call_count = 0 + + def _increment_call_count(): + nonlocal call_count + call_count += 1 + yield -1.0 + + setattr(generator, func_name, _increment_call_count) + for time_ in generator.times(): + assert time_ == -1.0 + break + assert call_count == 1 + + # now check that the method generates reasonable timestamps + generator = LoadGenerator(mode=mode, rate=rate) + start_time = time.time() + for index, time_ in enumerate(generator.times()): + if index > 10: + break + + if mode == "synchronous": + assert time_ == -1.0 + else: + assert time_ >= start_time + + +@pytest.mark.smoke() +def test_load_generator_invalid_times(): + generator = LoadGenerator(mode="synchronous") + + for index, time_ in enumerate(generator.synchronous_times()): + if index > 10: + break + + assert time_ == -1.0 + + +@pytest.mark.smoke() +def test_load_generator_throughput_times(): + generator = LoadGenerator(mode="throughput") + + for index, time_ in enumerate(generator.throughput_times()): + if index > 10: + break + + assert time_ <= time.time() + + +@pytest.mark.smoke() +@pytest.mark.parametrize("rate", [1, 10, 42]) +def test_load_generator_constant_times(rate): + generator = LoadGenerator(mode="constant", rate=rate) + start_time = time.time() + + for index, time_ in enumerate(generator.constant_times()): + if index > 10: + break + + assert time_ == pytest.approx(start_time + index / rate, rel=1e-5) + + +@pytest.mark.smoke() +@pytest.mark.flaky(reruns=5) +def test_load_generator_poisson_times(): + rate = 5 + generator = LoadGenerator(mode="poisson", rate=rate) + start_time = time.time() + + times = [] + prev_time = start_time + + for index, current_time in enumerate(generator.poisson_times()): + if index > 100: + break + + times.append(current_time - prev_time) + prev_time = current_time + + mean_inter_arrival_time = 1 / rate + + # Perform Kolmogorov-Smirnov test to compare the sample distribution + # to the expected exponential distribution + ks_statistic, p_value = kstest(times, "expon", args=(0, mean_inter_arrival_time)) + assert p_value > 0.025, ( + f"Poisson-generated inter-arrival times do not " + f"match the expected exponential distribution (p-value: {p_value})" + ) diff --git a/tests/unit/scheduler/test_poission_mode.py b/tests/unit/scheduler/test_poission_mode.py deleted file mode 100644 index 1f10b2a..0000000 --- a/tests/unit/scheduler/test_poission_mode.py +++ /dev/null @@ -1,63 +0,0 @@ -import time -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmark -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_requests", [1, 5]) -def test_scheduler_max_requests_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - backend_submit_patch, - max_requests: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.POISSON, - load_gen_rate=1.0, - max_requests=max_requests, - max_duration=None, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - - assert backend_submit_patch.call_count == max_requests - assert len(benchmark.results) == max_requests - assert benchmark.errors == [] - - -@pytest.mark.skip("Poission can't be limited with max duration. TBD") -@pytest.mark.sanity() -@pytest.mark.parametrize("max_duration", [1, 3]) -def test_scheduler_max_duration_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_duration: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.POISSON, - load_gen_rate=1.0, - max_requests=None, - max_duration=max_duration, - ) - - start_time = time.perf_counter() - scheduler.run() - end_time = time.perf_counter() - start_time - - assert round(end_time) == max_duration diff --git a/tests/unit/scheduler/test_sync_mode.py b/tests/unit/scheduler/test_sync_mode.py deleted file mode 100644 index 7d08e6d..0000000 --- a/tests/unit/scheduler/test_sync_mode.py +++ /dev/null @@ -1,62 +0,0 @@ -import time -from typing import Callable - -import pytest -from guidellm.backend import OpenAIBackend -from guidellm.core import TextGenerationBenchmark -from guidellm.scheduler import LoadGenerationMode, Scheduler - -from tests import dummy - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_requests", [1, 5]) -def test_scheduler_max_requests_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - backend_submit_patch, - max_requests: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, - max_requests=max_requests, - max_duration=None, - ) - - benchmark: TextGenerationBenchmark = scheduler.run() - - assert backend_submit_patch.call_count == max_requests - assert len(benchmark.results) == max_requests - assert benchmark.errors == [] - - -@pytest.mark.sanity() -@pytest.mark.parametrize("max_duration", [1, 3]) -def test_scheduler_max_duration_limitation( - openai_backend_factory: Callable[..., OpenAIBackend], - max_duration: int, -): - request_genrator = dummy.services.TestRequestGenerator( - tokenizer="bert-base-uncased", - ) - - scheduler = Scheduler( - request_generator=request_genrator, - backend=openai_backend_factory(), - load_gen_mode=LoadGenerationMode.SYNCHRONOUS, - load_gen_rate=None, - max_requests=None, - max_duration=max_duration, - ) - - start_time = time.perf_counter() - scheduler.run() - end_time = time.perf_counter() - start_time - - assert round(end_time) == max_duration diff --git a/tests/unit/config/test_base.py b/tests/unit/test_config.py similarity index 98% rename from tests/unit/config/test_base.py rename to tests/unit/test_config.py index 0e9dadb..13e1699 100644 --- a/tests/unit/config/test_base.py +++ b/tests/unit/test_config.py @@ -1,5 +1,6 @@ import pytest -from guidellm.config.base import ( + +from guidellm.config import ( Environment, LoggingSettings, OpenAISettings, diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py index 91e6649..53e8b66 100644 --- a/tests/unit/test_logger.py +++ b/tests/unit/test_logger.py @@ -1,12 +1,13 @@ from pathlib import Path import pytest + from guidellm import configure_logger, logger -from guidellm.config.base import LoggingSettings +from guidellm.config import LoggingSettings @pytest.fixture(autouse=True) -def reset_logger(): +def reset_logger(): # noqa: PT004 # Ensure logger is reset before each test logger.remove() yield @@ -21,9 +22,12 @@ def test_default_logger_settings(capsys): # Default settings should log to console with INFO level and no file logging logger.info("Info message") logger.debug("Debug message") + logger.warning("Warning message") + logger.error("Error message") captured = capsys.readouterr() - assert captured.out.count("Info message") == 1 + assert captured.out.count("Warning message") == 1 + assert captured.out.count("Error message") == 1 assert "Debug message" not in captured.out @@ -105,5 +109,5 @@ def test_logging_disabled(capsys): logger.error("Error message") captured = capsys.readouterr() - assert captured.out == "" - assert captured.err == "" + assert not captured.out + assert not captured.err diff --git a/tests/unit/utils/test_injector.py b/tests/unit/utils/test_injector.py index 87484de..9a58575 100644 --- a/tests/unit/utils/test_injector.py +++ b/tests/unit/utils/test_injector.py @@ -1,16 +1,11 @@ from pathlib import Path -from unittest.mock import mock_open, patch import pytest -import requests -from guidellm.config import settings -from guidellm.utils.constants import ( - REPORT_HTML_MATCH, - REPORT_HTML_PLACEHOLDER, -) -from guidellm.utils.injector import create_report, inject_data, load_html_file from pydantic import BaseModel +from guidellm.config import settings +from guidellm.utils.injector import create_report, inject_data + class ExampleModel(BaseModel): name: str @@ -23,50 +18,15 @@ def test_inject_data(): html = "window.report_data = {};" expected_html = 'window.report_data = {"name":"Example App","version":"1.0.0"};' - result = inject_data(model, html, REPORT_HTML_MATCH, REPORT_HTML_PLACEHOLDER) + result = inject_data( + model, + html, + settings.report_generation.report_html_match, + settings.report_generation.report_html_placeholder, + ) assert result == expected_html -@pytest.mark.smoke() -def test_load_html_file_from_url(requests_mock): - url = "http://example.com/report.html" - mock_content = "Sample Report" - requests_mock.get(url, text=mock_content) - - result = load_html_file(url) - assert result == mock_content - - -@pytest.mark.sanity() -def test_load_html_file_from_invalid_url(requests_mock): - url = "http://example.com/404.html" - requests_mock.get(url, status_code=404) - - with pytest.raises(requests.exceptions.HTTPError): - load_html_file(url) - - -@pytest.mark.smoke() -def test_load_html_file_from_path(): - path = "sample_report.html" - mock_content = "Sample Report" - - with patch("pathlib.Path.open", mock_open(read_data=mock_content)), patch( - "pathlib.Path.exists", return_value=True - ): - result = load_html_file(path) - - assert result == mock_content - - -@pytest.mark.sanity() -def test_load_html_file_from_invalid_path(): - path = "invalid_report.html" - - with pytest.raises(FileNotFoundError): - load_html_file(path) - - @pytest.mark.smoke() def test_create_report_to_file(tmpdir): model = ExampleModel(name="Example App", version="1.0.0") diff --git a/tests/unit/utils/test_progress.py b/tests/unit/utils/test_progress.py new file mode 100644 index 0000000..637b2be --- /dev/null +++ b/tests/unit/utils/test_progress.py @@ -0,0 +1,116 @@ +import pytest + +from guidellm.utils import BenchmarkReportProgress + + +@pytest.fixture() +def benchmark_progress(): + return BenchmarkReportProgress() + + +@pytest.mark.smoke() +def test_initialization(benchmark_progress): + assert benchmark_progress.report_task is None + assert benchmark_progress.benchmark_tasks == [] + assert benchmark_progress.benchmark_tasks_started == [] + assert benchmark_progress.benchmark_tasks_completed == [] + assert benchmark_progress.benchmark_tasks_progress == [] + + +@pytest.mark.smoke() +def test_start_method(benchmark_progress): + descriptions = ["Benchmark 1", "Benchmark 2"] + benchmark_progress.start(descriptions) + + assert len(benchmark_progress.benchmark_tasks) == 2 + assert benchmark_progress.report_task is not None + + benchmark_progress.finish() + + +@pytest.mark.sanity() +def test_update_benchmark(benchmark_progress): + descriptions = ["Benchmark 1"] + benchmark_progress.start(descriptions) + + benchmark_progress.update_benchmark( + index=0, + description="Updating Benchmark 1", + completed=False, + completed_count=50, + completed_total=100, + start_time=0, + req_per_sec=10.5, + ) + assert benchmark_progress.benchmark_tasks_progress[0] == 50.0 + + benchmark_progress.finish() + + +@pytest.mark.sanity() +def test_finish_method(benchmark_progress): + descriptions = ["Benchmark 1", "Benchmark 2"] + benchmark_progress.start(descriptions) + benchmark_progress.finish() + + assert benchmark_progress.report_progress.finished + + +@pytest.mark.regression() +def test_error_on_update_completed_benchmark(benchmark_progress): + descriptions = ["Benchmark 1"] + benchmark_progress.start(descriptions) + benchmark_progress.update_benchmark( + index=0, + description="Benchmark 1", + completed=True, + completed_count=100, + completed_total=100, + start_time=0, + req_per_sec=10.5, + ) + + with pytest.raises(ValueError, match="already completed"): + benchmark_progress.update_benchmark( + index=0, + description="Benchmark 1", + completed=False, + completed_count=50, + completed_total=100, + start_time=0, + req_per_sec=10.5, + ) + + benchmark_progress.finish() + + +@pytest.mark.regression() +def test_multiple_updates(benchmark_progress): + descriptions = ["Benchmark 1", "Benchmark 2"] + benchmark_progress.start(descriptions) + + # First update + benchmark_progress.update_benchmark( + index=0, + description="Updating Benchmark 1", + completed=False, + completed_count=50, + completed_total=100, + start_time=0, + req_per_sec=5.0, + ) + assert benchmark_progress.benchmark_tasks_progress[0] == 50.0 + + # Second update, same task + benchmark_progress.update_benchmark( + index=0, + description="Updating Benchmark 1", + completed=True, + completed_count=100, + completed_total=100, + start_time=0, + req_per_sec=5.0, + ) + assert benchmark_progress.benchmark_tasks_progress[0] == 100.0 + + benchmark_progress.finish() diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py new file mode 100644 index 0000000..1d89ee3 --- /dev/null +++ b/tests/unit/utils/test_text.py @@ -0,0 +1,394 @@ +from pathlib import Path +from unittest.mock import patch + +import pytest +import requests + +from guidellm.utils.text import ( + clean_text, + filter_text, + is_path, + is_path_like, + is_url, + load_text, + load_text_lines, + parse_text_objects, + split_lines_by_punctuation, + split_text, +) + + +@pytest.fixture() +def sample_text(): + return "This is a sample text.\nThis is another line!" + + +@pytest.fixture() +def sample_dict_data(): + return [{"text": "line 1"}, {"text": "line 2"}, {"text": "line 3"}] + + +@pytest.fixture() +def sample_csv_data(): + return "text\nline 1\nline 2\nline 3" + + +@pytest.fixture() +def sample_jsonl_data(): + return '{"text": "line 1"}\n{"text": "line 2"}\n{"text": "line 3"}' + + +@pytest.fixture() +def sample_yaml_data(): + return """ + text: + - line 1 + - line 2 + - line 3 + """ + + +@pytest.fixture() +def mock_response(): + response = requests.Response() + response.status_code = 200 + response._content = b"Mock content" + return response + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("text", "start", "end", "expected"), + [ + ("hello world", "hello", "world", "hello "), + ("hello world", "world", None, "world"), + ("hello world", None, "hello", ""), + ("hello world", None, None, "hello world"), + ], +) +def test_filter_text(text, start, end, expected): + assert filter_text(text, start, end) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ( + "text", + "fix_encoding", + "clean_whitespace", + "remove_empty_lines", + "force_new_line_punctuation", + "expected", + ), + [ + ( + "This is\ta test.\n New line.", + True, + True, + False, + False, + "This is a test.\nNew line.", + ), + ( + "This is\ta test.\n New line.", + True, + True, + True, + False, + "This is a test.\nNew line.", + ), + ( + "This is a test. New line.", + True, + False, + False, + True, + "This is a test.\nNew line.", + ), + ], +) +def test_clean_text( + text, + fix_encoding, + clean_whitespace, + remove_empty_lines, + force_new_line_punctuation, + expected, +): + assert ( + clean_text( + text, + fix_encoding, + clean_whitespace, + remove_empty_lines, + force_new_line_punctuation, + ) + == expected + ) + + +@pytest.mark.smoke() +def test_split_lines_by_punctuation(sample_text): + expected = ["This is a sample text.", "This is another line!"] + assert split_lines_by_punctuation(sample_text) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("url", "expected"), + [ + ("https://example.com", True), + ("ftp://example.com", True), + ("not a url", False), + ], +) +def test_is_url(url, expected): + assert is_url(url) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("path", "expected"), + [ + (str(Path(__file__)), True), + ("/non/existent/path", False), + ], +) +def test_is_path(path, expected): + assert is_path(path) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("path", "enforce_file", "expected"), + [ + (str(Path(__file__)), True, True), + ("/non/existent/path", False, True), + ("https://example.com", False, False), + ], +) +def test_is_path_like(path, enforce_file, expected): + assert is_path_like(path, enforce_file) == expected + + +@pytest.mark.smoke() +def test_split_text(sample_text): + words, separators, new_lines = split_text(sample_text) + assert words == [ + "This", + "is", + "a", + "sample", + "text.", + "This", + "is", + "another", + "line!", + ] + assert separators == [" ", " ", " ", " ", "\n", " ", " ", " ", " "] + assert new_lines == [0, 5] + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "format_", "expected"), + [ + ("text\nline 1\nline 2", "csv", [{"text": "line 1"}, {"text": "line 2"}]), + ( + '{"text": "line 1"}\n{"text": "line 2"}', + "jsonl", + [{"text": "line 1"}, {"text": "line 2"}], + ), + ], +) +def test_parse_text_objects(data, format_, expected): + assert parse_text_objects(data, format_) == expected + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "expected"), + [ + ("https://example.com", "Mock content"), + (str(Path(__file__)), Path(__file__).read_text()), + ], +) +def test_load_text(data, expected, mock_response): + with patch("requests.get", return_value=mock_response): + assert load_text(data) == expected + + +@pytest.mark.regression() +def test_load_text_file_not_found(): + with pytest.raises(FileNotFoundError): + load_text("/non/existent/file.txt") + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("data", "format_", "filters", "expected"), + [ + ("text\nline 1\nline 2", "csv", None, ["line 1", "line 2"]), + ('{"text": "line 1"}\n{"text": "line 2"}', "jsonl", None, ["line 1", "line 2"]), + ("text\nline 1\nline 2", "txt", None, ["text", "line 1", "line 2"]), + ], +) +def test_load_text_lines(data, format_, filters, expected): + assert load_text_lines(data, format_=format_, filters=filters) == expected + + +@pytest.mark.regression() +def test_load_text_lines_invalid_data(): + with pytest.raises(ValueError): + load_text_lines(123) # type: ignore + + +@pytest.mark.regression() +def test_parse_text_objects_invalid_format(): + with pytest.raises(ValueError): + parse_text_objects("text", format_="unsupported") + + +@pytest.mark.regression() +def test_parse_text_objects_invalid_data(): + with pytest.raises(ValueError): + parse_text_objects(123) # type: ignore + + +@pytest.mark.regression() +@pytest.mark.parametrize( + ("data", "format_", "filters", "expected"), + [ + ( + "text\nline 1\nline 2\n", + "csv", + ["text"], + ["line 1", "line 2"], + ), + ], +) +def test_load_text_lines_with_filters(data, format_, filters, expected): + assert load_text_lines(data, format_=format_, filters=filters) == expected + + +@pytest.mark.regression() +def test_is_path_with_symlink(tmp_path): + # Create a symlink to a temporary file + target_file = tmp_path / "target_file.txt" + target_file.write_text("Sample content") + symlink_path = tmp_path / "symlink" + symlink_path.symlink_to(target_file) + + assert is_path(str(symlink_path)) is True + + +@pytest.mark.regression() +def test_is_path_like_with_symlink(tmp_path): + # Create a symlink to a temporary file + target_file = tmp_path / "target_file.txt" + target_file.write_text("Sample content") + symlink_path = tmp_path / "symlink.file" + symlink_path.symlink_to(target_file) + + assert is_path_like(str(symlink_path), enforce_file=True) is True + + +@pytest.mark.regression() +def test_load_text_lines_empty(): + # Test loading text lines from an empty string + assert load_text_lines("") == [] + + +@pytest.mark.regression() +def test_split_text_with_empty_string(): + words, separators, new_lines = split_text("") + assert words == [] + assert separators == [] + assert new_lines == [] + + +@pytest.mark.regression() +def test_split_lines_by_punctuation_with_no_punctuation(): + text = "This is a test without punctuation" + assert split_lines_by_punctuation(text) == [text] + + +@pytest.mark.regression() +def test_is_path_invalid_type(): + assert not is_path(None) + assert not is_path(123) + assert not is_path(["not", "a", "path"]) + + +@pytest.mark.regression() +def test_is_path_like_invalid_type(): + assert not is_path_like(None, enforce_file=False) + assert not is_path_like(123, enforce_file=True) + assert not is_path_like(["not", "a", "path"], enforce_file=False) + + +@pytest.mark.regression() +def test_load_text_invalid_url(): + with pytest.raises(requests.ConnectionError): + load_text("http://invalid.url") + + +@pytest.mark.regression() +def test_parse_text_objects_empty_csv(): + assert parse_text_objects("text\n", "csv") == [] + + +@pytest.mark.regression() +def test_parse_text_objects_empty_jsonl(): + assert parse_text_objects("", "jsonl") == [] + + +@pytest.mark.regression() +def test_parse_text_objects_invalid_jsonl(): + with pytest.raises(ValueError): + parse_text_objects("{invalid_json}", "jsonl") + + +@pytest.mark.regression() +def test_parse_text_objects_empty_yaml(): + assert parse_text_objects("", "yaml") == [] + + +@pytest.mark.regression() +def test_clean_text_with_unicode(): + text = "This is a test with unicode: \u2013 \u2014" + cleaned_text = clean_text(text, fix_encoding=True, clean_whitespace=True) + assert cleaned_text == "This is a test with unicode: – —" + + +@pytest.mark.regression() +def test_split_lines_by_punctuation_with_multiple_punctuations(): + text = "First sentence. Second sentence? Third sentence!" + expected = ["First sentence.", "Second sentence?", "Third sentence!"] + assert split_lines_by_punctuation(text) == expected + + +@pytest.mark.regression() +def test_is_url_empty_string(): + assert not is_url("") + + +@pytest.mark.regression() +def test_load_text_invalid_data(): + with pytest.raises(TypeError): + load_text(123) # type: ignore + + +@pytest.mark.regression() +def test_load_text_lines_empty_format(): + data = "text\nline 1\nline 2" + assert load_text_lines(data, format_="") == ["text", "line 1", "line 2"] + + +@pytest.mark.regression() +def test_split_text_with_mixed_separators(): + text = "This\tis a test\nwith mixed separators." + words, separators, new_lines = split_text(text) + assert words == ["This", "is", "a", "test", "with", "mixed", "separators."] + assert separators == ["\t", " ", " ", "\n", " ", " ", " "] + assert new_lines == [0, 4] diff --git a/tests/unit/utils/test_transformers.py b/tests/unit/utils/test_transformers.py new file mode 100644 index 0000000..5153da3 --- /dev/null +++ b/tests/unit/utils/test_transformers.py @@ -0,0 +1,236 @@ +from unittest.mock import patch + +import pytest +from datasets import ( # type: ignore + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, +) + +from guidellm.utils.transformers import ( + load_transformers_dataset, + resolve_transformers_dataset, + resolve_transformers_dataset_column, + resolve_transformers_dataset_split, +) +from tests.dummy.data.transformers import ( + create_sample_dataset, + create_sample_dataset_dict, + create_sample_iterable_dataset, + create_sample_iterable_dataset_dict, +) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset_dict(splits=["train"]), + "train", + None, + Dataset, + ), + ( + "mock/directory/file.json", + create_sample_dataset_dict(splits=["test"]), + None, + ("train", "test"), + Dataset, + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + None, + None, + Dataset, + ), + ( + create_sample_dataset_dict(splits=["val", "train"], column="custom"), + None, + "val", + None, + Dataset, + ), + ( + create_sample_dataset(), + None, + None, + None, + Dataset, + ), + ( + create_sample_iterable_dataset_dict(splits=["validation"]), + None, + None, + None, + IterableDataset, + ), + ( + create_sample_iterable_dataset(), + None, + "validation", + None, + IterableDataset, + ), + ], +) +def test_load_transformers_dataset( + dataset_arg, dataset, split, preferred_splits, expected_type +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + loaded_dataset = load_transformers_dataset( + dataset_arg, split=split, preferred_splits=preferred_splits + ) + assert isinstance(loaded_dataset, expected_type) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset(), + "train", + None, + Dataset, + ), + ( + "mock/directory/file.json", + create_sample_dataset_dict(splits=["test"]), + None, + ("train", "test"), + DatasetDict, + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + None, + None, + DatasetDict, + ), + ( + "mock/directory/file.unk", + create_sample_dataset_dict(splits=["test"], column="output"), + None, + None, + DatasetDict, + ), + ( + create_sample_dataset_dict(splits=["val", "train"], column="custom"), + None, + "val", + None, + DatasetDict, + ), + ( + create_sample_dataset(), + None, + None, + None, + Dataset, + ), + ( + create_sample_iterable_dataset_dict(splits=["validation"]), + None, + None, + None, + IterableDatasetDict, + ), + ( + create_sample_iterable_dataset(), + None, + "validation", + None, + IterableDataset, + ), + ], +) +def test_resolve_transformers_dataset( + dataset_arg, dataset, split, preferred_splits, expected_type +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + loaded_dataset = resolve_transformers_dataset( + dataset_arg, split=split, preferred_splits=preferred_splits + ) + assert isinstance(loaded_dataset, expected_type) + + +@pytest.mark.sanity() +def test_resolve_transformers_dataset_invalid(): + with pytest.raises(ValueError): + resolve_transformers_dataset(123) + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset", "split", "preferred_splits", "expected_type"), + [ + ( + create_sample_dataset(), + None, + None, + Dataset, + ), + ( + create_sample_iterable_dataset_dict(splits=["validation"]), + None, + None, + IterableDataset, + ), + ( + create_sample_iterable_dataset(), + "validation", + None, + IterableDataset, + ), + ], +) +def test_resolve_transformers_dataset_split( + dataset, split, preferred_splits, expected_type +): + loaded_dataset = resolve_transformers_dataset_split( + dataset, split=split, preferred_splits=preferred_splits + ) + assert isinstance(loaded_dataset, expected_type) + + +def test_resolve_transformers_dataset_split_missing(): + dataset = create_sample_dataset_dict() + with pytest.raises(ValueError): + resolve_transformers_dataset_split(dataset, split="missing") + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset", "column", "preferred_columns", "expected_column"), + [ + (create_sample_dataset(), None, None, "text"), + (create_sample_dataset(), "text", None, "text"), + (create_sample_dataset(), None, ["text"], "text"), + (create_sample_dataset(), None, ["data"], "text"), + (create_sample_iterable_dataset(), None, None, "text"), + ], +) +def test_resolve_transformers_dataset_column( + dataset, column, preferred_columns, expected_column +): + resolved_column = resolve_transformers_dataset_column( + dataset, column=column, preferred_columns=preferred_columns + ) + assert resolved_column == expected_column + + +def test_resolve_transformers_dataset_column_missing(): + dataset = create_sample_dataset() + with pytest.raises(ValueError): + resolve_transformers_dataset_column(dataset, column="missing") diff --git a/tox.ini b/tox.ini index f4ed8d9..b6b5daf 100644 --- a/tox.ini +++ b/tox.ini @@ -57,8 +57,8 @@ description = Run style checks and fixes deps = .[dev] commands = - ruff check --fix ruff format + ruff check --fix [testenv:types] @@ -78,7 +78,6 @@ deps = loguru toml commands = - python utils/inject_build_props.py python -m build diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/utils/inject_build_props.py b/utils/inject_build_props.py deleted file mode 100644 index 74000dd..0000000 --- a/utils/inject_build_props.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import re -from datetime import datetime -from pathlib import Path - -import toml -from loguru import logger - - -def get_build_type(): - return os.getenv("GUIDELLM_BUILD_TYPE", "dev") - - -def get_build_number(): - return os.getenv("GUIDELLM_BUILD_NUMBER", "0") - - -def construct_project_name_and_version(build_type, build_number, current_version): - if not re.match(r"^\d+\.\d+\.\d+$", current_version): - raise ValueError( - f"Version '{current_version}' does not match the " - f"semantic versioning pattern '#.#.#'", - ) - - if build_type == "dev": - project_name = "guidellm_dev" - version = f"{current_version}.dev{build_number}" - elif build_type == "nightly": - project_name = "guidellm_nightly" - date_str = datetime.now().strftime("%Y%m%d") - version = f"{current_version}.{date_str}" - elif build_type == "release": - project_name = "guidellm" - version = current_version - else: - raise ValueError(f"Unknown build type: {build_type}") - - return project_name, version - - -def update_pyproject_toml(project_name, version): - try: - with Path("pyproject.toml").open() as file: - data = toml.load(file) - - data["project"]["name"] = project_name - data["project"]["version"] = version - - with Path("pyproject.toml").open("w") as file: - toml.dump(data, file) - - logger.info( - f"Updated project name to: {project_name} and version to: {version}", - ) - except (FileNotFoundError, toml.TomlDecodeError) as e: - logger.error(f"Error reading or writing pyproject.toml: {e}") - raise - - -def main(): - build_type = get_build_type() - build_number = get_build_number() - - with Path("pyproject.toml").open() as file: - pyproject_data = toml.load(file) - - current_version = pyproject_data["project"]["version"] - project_name, version = construct_project_name_and_version( - build_type, - build_number, - current_version, - ) - - if build_type != "release": - update_pyproject_toml(project_name, version) - - -if __name__ == "__main__": - main()