🚧 WIP

neuralmagic · Jul 16, 2024 · 1d35f30 · 1d35f30
1 parent 440df7c
commit 1d35f30
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 6 deletions.
diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py
@@ -148,8 +148,6 @@ async def _wait_for_running_tasks(
                     if not asyncio_task.done():
                         asyncio_task.cancel()
 
-                # TODO: Add error results
-
             finally:
                 return benchmark
 

diff --git a/tests/unit/executor/test_report_generation.py b/tests/unit/executor/test_report_generation.py
@@ -3,7 +3,7 @@
 from guidellm.backend import OpenAIBackend
 from guidellm.core import TextGenerationBenchmarkReport
 from guidellm.executor import Executor, ProfileGenerationMode
-from guidellm.scheduler import LoadGenerationMode, Scheduler
+from guidellm.scheduler import LoadGenerationMode
 from tests import dummy
 
 
@@ -111,9 +111,6 @@ def test_executor_openai_single_report_generation_constant_mode_cancelled(
 ):
     """
     Test max duration immediate cancellation.
-
-    By default the openai completion is generated every 0.5 seconds in async mode
-    Which means that
     """
 
     request_genrator = dummy.services.TestRequestGenerator(
@@ -141,6 +138,41 @@ def test_executor_openai_single_report_generation_constant_mode_cancelled(
     assert len(report.benchmarks[0].results) == 0
 
 
+def test_executor_openai_single_report_generation_constant_mode_cancelled_reports(
+    openai_backend_factory,
+):
+    """
+    Test max duration immediate cancellation.
+
+    By default the mocked OpenAI service returns completion every 0.5 seconds
+    which means that we should take 6 of them in 3 seconds .
+    """
+
+    request_genrator = dummy.services.TestRequestGenerator(
+        tokenizer="bert-base-uncased"
+    )
+    profile_generation_mode = ProfileGenerationMode.SINGLE
+    profile_generator_kwargs = {
+        "rate_type": LoadGenerationMode.CONSTANT,
+        "rate": 1.0,
+    }
+
+    executor = Executor(
+        backend=openai_backend_factory(),
+        request_generator=request_genrator,
+        profile_mode=profile_generation_mode,
+        profile_args=profile_generator_kwargs,
+        max_requests=10,
+        max_duration=5,  # expected 6 tasks to be started for that condition
+    )
+
+    report: TextGenerationBenchmarkReport = executor.run()
+
+    assert isinstance(executor.backend, OpenAIBackend)
+    assert len(report.benchmarks) == 1
+    assert len(report.benchmarks[0].results) == 6
+
+
 @pytest.mark.sanity
 def test_executor_openai_single_report_generation_constant_mode_cancelled_failed(
     openai_backend_factory,