Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed May 13, 2024
1 parent 2f6849f commit e257749
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
5 changes: 3 additions & 2 deletions tests/distributed/test_pynccl_library.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
import multiprocessing
import tempfile

import pytest


def target_fn(env, filepath):
from vllm.utils import update_environment_variables
Expand All @@ -12,7 +13,7 @@ def target_fn(env, filepath):

@pytest.mark.skip(
reason="This test fails in automation b/c it deliberately raises "
"a RuntimeError. Skipping as a result.")
"a RuntimeError. Skipping as a result.")
def test_library_file():
# note: don't import vllm.distributed.device_communicators.pynccl
# before running this test, otherwise the library file will be loaded
Expand Down
5 changes: 3 additions & 2 deletions tests/models/test_gptq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ def test_models(
quantization="gptq",
max_model_len=MAX_MODEL_LEN,
tensor_parallel_size=1)
gptq_outputs = gptq_model.generate_greedy_logprobs(
example_prompts[:-1], max_tokens, num_logprobs)
gptq_outputs = gptq_model.generate_greedy_logprobs(example_prompts[:-1],
max_tokens,
num_logprobs)
del gptq_model

check_logprobs_close(
Expand Down

1 comment on commit e257749

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: e257749 Previous: df1f1a0 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"} 3.8400351114771656 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.2.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"} 1474.5734828072316 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.