format

neuralmagic · May 13, 2024 · e257749 · e257749 · github-actions · May 13, 2024
1 parent 2f6849f
commit e257749
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/tests/distributed/test_pynccl_library.py b/tests/distributed/test_pynccl_library.py
@@ -1,7 +1,8 @@
-import pytest
 import multiprocessing
 import tempfile
 
+import pytest
+
 
 def target_fn(env, filepath):
     from vllm.utils import update_environment_variables
@@ -12,7 +13,7 @@ def target_fn(env, filepath):
 
 @pytest.mark.skip(
     reason="This test fails in automation b/c it deliberately raises "
-           "a RuntimeError. Skipping as a result.")
+    "a RuntimeError. Skipping as a result.")
 def test_library_file():
     # note: don't import vllm.distributed.device_communicators.pynccl
     # before running this test, otherwise the library file will be loaded

diff --git a/tests/models/test_gptq_marlin.py b/tests/models/test_gptq_marlin.py
@@ -85,8 +85,9 @@ def test_models(
                              quantization="gptq",
                              max_model_len=MAX_MODEL_LEN,
                              tensor_parallel_size=1)
-    gptq_outputs = gptq_model.generate_greedy_logprobs(
-        example_prompts[:-1], max_tokens, num_logprobs)
+    gptq_outputs = gptq_model.generate_greedy_logprobs(example_prompts[:-1],
+                                                       max_tokens,
+                                                       num_logprobs)
     del gptq_model
 
     check_logprobs_close(
Benchmark suite	Current: `e257749`	Previous: `df1f1a0`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`3.8400351114771656` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`1474.5734828072316` tokens/s