Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Benchmarking : Prepare for GHA benchmark UI (#122)
Browse files Browse the repository at this point in the history
SUMMARY:
- Miscellaneous updates to benchmarking infrastructure to support Github
benchmarking UI
  - cleanup configs - Remove default arguments.
- Add a `description` field to the benchmarking scripts so we may
communicate intent to the UI
- Move benchmark_result.py to logging folder
- Add gha_benchmark_logging script that consumed BenchmarkResult JSON
and outputs a JSON that the Github Benchmark UI can understand.
- Add a minimal_test.json config that can be used for infra testing
- Make config list as nightly explicity and add the remote-push job to
nightly for fair comparison


TEST PLAN:
Manual testing
nm-benchmark manual trigger :
https://github.com/neuralmagic/nm-vllm/actions/runs/8284500798
nightly manual trigger :
https://github.com/neuralmagic/nm-vllm/actions/runs/8285535882

---------

Co-authored-by: Varun Sundar Rabindranath <[email protected]>
  • Loading branch information
varun-sundar-rabindranath and Varun Sundar Rabindranath authored Mar 14, 2024
1 parent ac9c9c8 commit feb86cd
Show file tree
Hide file tree
Showing 16 changed files with 318 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/actions/nm-benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ runs:
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
SUCCESS=0
.github/workflows/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
.github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
exit ${SUCCESS}
shell: bash
1 change: 1 addition & 0 deletions .github/data/nm_benchmark_configs_minimal_test_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
neuralmagic/benchmarks/configs/minimal_test.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
neuralmagic/benchmarks/configs/benchmark_serving.json
neuralmagic/benchmarks/configs/benchmark_throughput.json
neuralmagic/benchmarks/configs/benchmark_remote_push.json
4 changes: 2 additions & 2 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
uses: ./.github/workflows/nm-benchmark.yml
with:
label: aws-avx2-192G-4-a10g-96G
benchmark_config_list_file: ./.github/data/nm_benchmark_configs_list.txt
benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
timeout: 240
gitref: '${{ github.ref }}'
Gi_per_thread: 4
Expand All @@ -48,7 +48,7 @@ jobs:
uses: ./.github/workflows/nm-benchmark.yml
with:
label: aws-avx2-32G-a10g-24G
benchmark_config_list_file: ./.github/data/nm_benchmark_configs_list.txt
benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
timeout: 240
gitref: '${{ github.ref }}'
Gi_per_thread: 12
Expand Down
10 changes: 5 additions & 5 deletions neuralmagic/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def max_model_length_from_model_id(model: str,
return _get_and_verify_max_len(config, max_model_len=None)


def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]:
def script_args_to_cla(config: NamedTuple) -> Iterable[dict]:
#config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs

kv = vars(config.script_args)
Expand All @@ -41,17 +41,17 @@ def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]:
if len(v) == 0:
key_args.append(k)

key_args_cla = list(map(lambda k: f"--{k}", key_args))
key_args_cla = {f"{k}": "" for k in key_args}

# Remove empty lists from arg_lists and remove key args from keys
arg_lists = list(filter(lambda arg_list: len(arg_list) != 0, arg_lists))
keys = list(filter(lambda k: k not in key_args, keys))
assert len(keys) == len(arg_lists)

for args in itertools.product(*arg_lists):
cla = key_args_cla
for name, value in zip(keys, args):
cla.extend([f"--{name}", f"{value}"])
args_dict = dict(zip(keys, args))
cla = key_args_cla.copy()
cla.update(args_dict)
yield cla


Expand Down
2 changes: 1 addition & 1 deletion neuralmagic/benchmarks/configs/benchmark_serving.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"configs": [
{
"description": "Benchmark vllm serving",
"description": "VLLM Serving",
"models": [
"facebook/opt-125m",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
Expand Down
32 changes: 4 additions & 28 deletions neuralmagic/benchmarks/configs/benchmark_throughput.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"configs": [
{
"description": "Benchmark vllm engine throughput - with dataset",
"description": "VLLM Engine throughput (with dataset)",
"models": [
"facebook/opt-125m",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
Expand Down Expand Up @@ -36,7 +36,7 @@
}
},
{
"description": "Benchmark vllm engine prefill throughput - synthetic",
"description": "VLLM Engine prefill throughput (synthetic)",
"models": [
"facebook/opt-125m",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
Expand All @@ -46,9 +46,6 @@
"max_model_lens" : [4096],
"script_name": "benchmark_throughput",
"script_args": {
"backend": [
"vllm"
],
"input-len": [
1,
16,
Expand All @@ -62,23 +59,14 @@
"output-len": [
1
],
"n": [
1
],
"num-prompts": [
1
],
"seed": [
0
],
"dtype": [
"auto"
],
"use-all-available-gpus_" : []
}
},
{
"description": "Benchmark vllm engine decode throughput - synthetic",
"description": "VLLM Engine decode throughput (synthetic)",
"models": [
"facebook/opt-125m",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
Expand All @@ -88,18 +76,12 @@
"max_model_lens" : [4096],
"script_name": "benchmark_throughput",
"script_args": {
"backend": [
"vllm"
],
"input-len": [
2
],
"output-len": [
128
],
"n": [
1
],
"num-prompts": [
1,
4,
Expand All @@ -108,14 +90,8 @@
32,
64
],
"seed": [
0
],
"dtype": [
"auto"
],
"use-all-available-gpus_" : []
}
}
]
}
}
43 changes: 43 additions & 0 deletions neuralmagic/benchmarks/configs/minimal_test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"configs": [
{
"description": "Benchmark vllm serving",
"models": [
"mistralai/Mistral-7B-Instruct-v0.2"
],
"use_all_available_gpus" : "",
"max_model_lens": [
4096
],
"sparsity": [],
"script_name": "benchmark_serving",
"script_args": {
"nr-qps-pair_" : ["5,inf"],
"dataset": [
"sharegpt"
]
}
},
{
"description": "Benchmark vllm engine throughput - with dataset",
"models": [
"mistralai/Mistral-7B-Instruct-v0.2"
],
"max_model_lens" : [4096],
"script_name": "benchmark_throughput",
"script_args": {
"output-len": [
128
],
"num-prompts": [
100
],
"dataset" : [
"sharegpt"
],
"max-model-len" : [4096],
"use-all-available-gpus_" : []
}
}
]
}
17 changes: 16 additions & 1 deletion neuralmagic/benchmarks/run_benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import subprocess
import requests
import time
import json
import itertools

from typing import NamedTuple, Optional
Expand Down Expand Up @@ -122,12 +123,26 @@ def run_bench(server_cmd: str, bench_cmd: list[str], model: str) -> None:
" ".join([f"--{k} {v}" for k, v in server_args.items()])

for script_args in script_args_to_cla(config):

description = (f"{config.description}\n" +
f"model - {model}\n" +
f"max-model-len - {max_model_len}\n" +
f"sparsity - {sparsity}\n" +
f"{config.script_name} " +
f"{json.dumps(script_args, indent=2)}")

bench_cmd = (["python3", "-m"
f"{script_path}"] + script_args +
f"{script_path}"] +
["--description", f"{description}"] +
["--model", f"{model}"] +
["--tokenizer", f"{model}"] +
["--port", f"{BENCH_SERVER_PORT}"] +
["--host", f"{BENCH_SERVER_HOST}"])
# Add script args
for k, v in script_args.items():
bench_cmd.append(f"--{k}")
if v != "":
bench_cmd.append(f"{v}")

if output_directory:
bench_cmd += (["--save-directory", f"{output_directory}"] +
Expand Down
16 changes: 15 additions & 1 deletion neuralmagic/benchmarks/run_benchmark_throughput.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import json
from pathlib import Path
from typing import NamedTuple, Optional

Expand Down Expand Up @@ -29,10 +30,23 @@ def run_benchmark_throughput_script(config: NamedTuple,

for max_model_len in max_model_lens:
for script_args in script_args_to_cla(config):

description = (f"{config.description}\n"
f"model - {model}\n" +
f"max_model_len - {max_model_len}\n" +
f"{config.script_name} " +
f"{json.dumps(script_args, indent=2)}")

bench_cmd = (["python3", "-m", f"{script_path}"] +
script_args + ["--model", f"{model}"] +
["--description", f"{description}"] +
["--model", f"{model}"] +
["--tokenizer", f"{model}"] +
["--max-model-len", f"{max_model_len}"])
# Add script args
for k, v in script_args.items():
bench_cmd.append(f"--{k}")
if v != "":
bench_cmd.append(f"{v}")

if output_directory:
bench_cmd = bench_cmd + [
Expand Down
18 changes: 13 additions & 5 deletions neuralmagic/benchmarks/scripts/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@
from vllm.transformers_utils.tokenizer import get_tokenizer
from .common import generate_synthetic_requests, print_serving_request_io
from .datasets_registry import get_dataset, DatasetArgs
from .benchmark_result import (BenchmarkResult,
BenchmarkServingResultMetadataKeys as
ResultMetadataKeys,
BenchmarkServingResultMetricTemplates as
ResultMetricTemplates)
from .logging.benchmark_result import (BenchmarkResult,
BenchmarkServingResultMetadataKeys as
ResultMetadataKeys,
BenchmarkServingResultMetricTemplates as
ResultMetricTemplates)

from neuralmagic.benchmarks.scripts.backend_request_func import (
ASYNC_REQUEST_FUNCS,
Expand Down Expand Up @@ -337,6 +337,7 @@ def script_args_as_json_dict(script_args: argparse.Namespace):

current_dt = datetime.now()
result = BenchmarkResult(
description=args.description,
date=current_dt,
script_name=Path(__file__).name,
script_args=script_args_as_json_dict(args),
Expand Down Expand Up @@ -382,6 +383,13 @@ def from_str(arg: str):

parser = argparse.ArgumentParser(
description='''Benchmark the online serving throughput.''')
parser.add_argument(
"--description",
type=str,
default="benchmark-serving",
help=
"Benchmark description. This is primarily useful when we log the benchmark results and process them for plotting charts"
)
parser.add_argument(
"--backend",
type=str,
Expand Down
14 changes: 11 additions & 3 deletions neuralmagic/benchmarks/scripts/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from transformers import AutoTokenizer
from .common import generate_synthetic_requests, warmup_vllm_engine, num_available_gpus, print_request_outputs
from .datasets_registry import get_dataset, DatasetArgs
from .benchmark_result import (BenchmarkResult,
BenchmarkThroughputResultMetricTemplates as
ResultMetricTemplates)
from .logging.benchmark_result import (BenchmarkResult,
BenchmarkThroughputResultMetricTemplates
as ResultMetricTemplates)


def get_tensor_parallel_size(args: argparse.Namespace) -> int:
Expand Down Expand Up @@ -145,6 +145,7 @@ def main(args: argparse.Namespace):
current_dt = datetime.now()

result = BenchmarkResult(
description=args.description,
date=current_dt,
script_name=Path(__file__).name,
script_args=vars(args),
Expand All @@ -168,6 +169,13 @@ def main(args: argparse.Namespace):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Benchmark the throughput.")
parser.add_argument(
"--description",
type=str,
default="benchmark-throughput",
help=
"Benchmark description. This is primarily useful when we log the benchmark results and process them for plotting charts"
)
parser.add_argument("--backend",
type=str,
choices=["vllm"],
Expand Down
2 changes: 1 addition & 1 deletion neuralmagic/benchmarks/scripts/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_benchmarking_context() -> dict:
"torch_version": f"{torch.__version__}",
"torch_cuda_version": f"{torch.version.cuda}",
"cuda_devices": f"{cuda_devices}",
"cuda_device_names": f"{cuda_device_names}"
"cuda_device_names": cuda_device_names
}


Expand Down
Loading

0 comments on commit feb86cd

Please sign in to comment.