diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..61aaac4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM --platform=linux/amd64 python:3.8-slim + +# Environment variables +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update \ + # dependencies for building Python packages && cleaning up unused files + && apt-get install -y build-essential \ + libcurl4-openssl-dev libssl-dev \ + && rm -rf /var/lib/apt/lists/* + + +# Python dependencies +RUN pip install --upgrade pip setuptools + +WORKDIR /app/ + +COPY ./ ./ + +RUN pip install -e '.[dev,deepsparse,vllm]' diff --git a/tests/unit/backend/test_vllm.py b/tests/unit/backend/test_vllm.py index de432f6..148c37c 100644 --- a/tests/unit/backend/test_vllm.py +++ b/tests/unit/backend/test_vllm.py @@ -7,16 +7,18 @@ import importlib import sys -from typing import Dict +from typing import Dict, List import pytest from guidellm.backend import Backend +from guidellm.config import reload_settings +from guidellm.core import TextGenerationRequest -pytestmark = pytest.mark.skipif( - sys.platform != "linux", - reason="Unsupported Platform. Try using Linux or WSL instead.", -) +# pytestmark = pytest.mark.skipif( +# sys.platform != "linux", +# reason="Unsupported Platform. Try using Linux or WSL instead.", +# ) @pytest.fixture(scope="module") @@ -29,7 +31,7 @@ def backend_class(): @pytest.fixture(autouse=True) def mock_vllm_llm(mocker): module = importlib.import_module("vllm") - llm = module.LLM( + llm = getattr(module, "LLM")( model="facebook/opt-125m", max_num_batched_tokens=4096, tensor_parallel_size=1, @@ -65,3 +67,67 @@ def test_backend_creation(create_payload: Dict, backend_class): if (custom_model := create_payload.get("model")) else backend.default_model ) + + +@pytest.mark.smoke() +def test_backend_model_from_env(mocker, backend_class): + mocker.patch.dict( + "os.environ", + {"GUIDELLM__LLM_MODEL": "test_backend_model_from_env"}, + ) + + reload_settings() + + backends = [Backend.create("vllm"), backend_class()] + + for backend in backends: + assert backend.model == "test_backend_model_from_env" + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + "text_generation_request_create_payload", + [ + {"prompt": "Test prompt"}, + {"prompt": "Test prompt", "output_token_count": 20}, + ], +) +@pytest.mark.asyncio() +async def test_make_request( + text_generation_request_create_payload: Dict, backend_class +): + backend = backend_class() + + output_tokens: List[str] = [] + async for response in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ): + if response.add_token: + output_tokens.append(response.add_token) + assert "".join(output_tokens) == "".join( + generation.text for generation in backend.pipeline._generations + ) + + if max_tokens := text_generation_request_create_payload.get("output_token_count"): + assert len(backend.pipeline._generations) == max_tokens + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("text_generation_request_create_payload", "error"), + [ + ({"prompt": "Test prompt"}, ValueError), + ], +) +@pytest.mark.asyncio() +async def test_make_request_invalid_request_payload( + text_generation_request_create_payload: Dict, error, backend_class +): + backend = backend_class() + with pytest.raises(error): + [ + respnose + async for respnose in backend.make_request( + request=TextGenerationRequest(**text_generation_request_create_payload) + ) + ]