From feb86cd443e9e6cde3fe963f963f975abacd8427 Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Thu, 14 Mar 2024 17:14:56 -0400 Subject: [PATCH] Benchmarking : Prepare for GHA benchmark UI (#122) SUMMARY: - Miscellaneous updates to benchmarking infrastructure to support Github benchmarking UI - cleanup configs - Remove default arguments. - Add a `description` field to the benchmarking scripts so we may communicate intent to the UI - Move benchmark_result.py to logging folder - Add gha_benchmark_logging script that consumed BenchmarkResult JSON and outputs a JSON that the Github Benchmark UI can understand. - Add a minimal_test.json config that can be used for infra testing - Make config list as nightly explicity and add the remote-push job to nightly for fair comparison TEST PLAN: Manual testing nm-benchmark manual trigger : https://github.com/neuralmagic/nm-vllm/actions/runs/8284500798 nightly manual trigger : https://github.com/neuralmagic/nm-vllm/actions/runs/8285535882 --------- Co-authored-by: Varun Sundar Rabindranath --- .github/actions/nm-benchmark/action.yml | 2 +- ...nm_benchmark_configs_minimal_test_list.txt | 1 + ... => nm_benchmark_nightly_configs_list.txt} | 1 + .../scripts/nm-run-benchmarks.sh | 0 .github/workflows/nightly.yml | 4 +- neuralmagic/benchmarks/common.py | 10 +- .../benchmarks/configs/benchmark_serving.json | 2 +- .../configs/benchmark_throughput.json | 32 +--- .../benchmarks/configs/minimal_test.json | 43 +++++ .../benchmarks/run_benchmark_serving.py | 17 +- .../benchmarks/run_benchmark_throughput.py | 16 +- .../benchmarks/scripts/benchmark_serving.py | 18 +- .../scripts/benchmark_throughput.py | 14 +- neuralmagic/benchmarks/scripts/common.py | 2 +- .../scripts/{ => logging}/benchmark_result.py | 66 ++++++-- .../scripts/logging/gha_benchmark_logging.py | 154 ++++++++++++++++++ 16 files changed, 318 insertions(+), 64 deletions(-) create mode 100644 .github/data/nm_benchmark_configs_minimal_test_list.txt rename .github/data/{nm_benchmark_configs_list.txt => nm_benchmark_nightly_configs_list.txt} (65%) rename .github/{workflows => }/scripts/nm-run-benchmarks.sh (100%) create mode 100644 neuralmagic/benchmarks/configs/minimal_test.json rename neuralmagic/benchmarks/scripts/{ => logging}/benchmark_result.py (74%) create mode 100644 neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py diff --git a/.github/actions/nm-benchmark/action.yml b/.github/actions/nm-benchmark/action.yml index 32ccf215fa563..2c91778a31b29 100644 --- a/.github/actions/nm-benchmark/action.yml +++ b/.github/actions/nm-benchmark/action.yml @@ -24,7 +24,7 @@ runs: source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt SUCCESS=0 - .github/workflows/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$? + .github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$? echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT" exit ${SUCCESS} shell: bash diff --git a/.github/data/nm_benchmark_configs_minimal_test_list.txt b/.github/data/nm_benchmark_configs_minimal_test_list.txt new file mode 100644 index 0000000000000..44a0159554c79 --- /dev/null +++ b/.github/data/nm_benchmark_configs_minimal_test_list.txt @@ -0,0 +1 @@ +neuralmagic/benchmarks/configs/minimal_test.json diff --git a/.github/data/nm_benchmark_configs_list.txt b/.github/data/nm_benchmark_nightly_configs_list.txt similarity index 65% rename from .github/data/nm_benchmark_configs_list.txt rename to .github/data/nm_benchmark_nightly_configs_list.txt index 97f1a5057cf69..992aa34481ec1 100644 --- a/.github/data/nm_benchmark_configs_list.txt +++ b/.github/data/nm_benchmark_nightly_configs_list.txt @@ -1,2 +1,3 @@ neuralmagic/benchmarks/configs/benchmark_serving.json neuralmagic/benchmarks/configs/benchmark_throughput.json +neuralmagic/benchmarks/configs/benchmark_remote_push.json diff --git a/.github/workflows/scripts/nm-run-benchmarks.sh b/.github/scripts/nm-run-benchmarks.sh similarity index 100% rename from .github/workflows/scripts/nm-run-benchmarks.sh rename to .github/scripts/nm-run-benchmarks.sh diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a1a4317fbc8c3..41c2eaf07c489 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -36,7 +36,7 @@ jobs: uses: ./.github/workflows/nm-benchmark.yml with: label: aws-avx2-192G-4-a10g-96G - benchmark_config_list_file: ./.github/data/nm_benchmark_configs_list.txt + benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt timeout: 240 gitref: '${{ github.ref }}' Gi_per_thread: 4 @@ -48,7 +48,7 @@ jobs: uses: ./.github/workflows/nm-benchmark.yml with: label: aws-avx2-32G-a10g-24G - benchmark_config_list_file: ./.github/data/nm_benchmark_configs_list.txt + benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt timeout: 240 gitref: '${{ github.ref }}' Gi_per_thread: 12 diff --git a/neuralmagic/benchmarks/common.py b/neuralmagic/benchmarks/common.py index 2bd6f74aa6430..398f8973cc8d2 100644 --- a/neuralmagic/benchmarks/common.py +++ b/neuralmagic/benchmarks/common.py @@ -26,7 +26,7 @@ def max_model_length_from_model_id(model: str, return _get_and_verify_max_len(config, max_model_len=None) -def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]: +def script_args_to_cla(config: NamedTuple) -> Iterable[dict]: #config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs kv = vars(config.script_args) @@ -41,7 +41,7 @@ def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]: if len(v) == 0: key_args.append(k) - key_args_cla = list(map(lambda k: f"--{k}", key_args)) + key_args_cla = {f"{k}": "" for k in key_args} # Remove empty lists from arg_lists and remove key args from keys arg_lists = list(filter(lambda arg_list: len(arg_list) != 0, arg_lists)) @@ -49,9 +49,9 @@ def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]: assert len(keys) == len(arg_lists) for args in itertools.product(*arg_lists): - cla = key_args_cla - for name, value in zip(keys, args): - cla.extend([f"--{name}", f"{value}"]) + args_dict = dict(zip(keys, args)) + cla = key_args_cla.copy() + cla.update(args_dict) yield cla diff --git a/neuralmagic/benchmarks/configs/benchmark_serving.json b/neuralmagic/benchmarks/configs/benchmark_serving.json index 558767c3dbb7a..a128307cc83a0 100644 --- a/neuralmagic/benchmarks/configs/benchmark_serving.json +++ b/neuralmagic/benchmarks/configs/benchmark_serving.json @@ -1,7 +1,7 @@ { "configs": [ { - "description": "Benchmark vllm serving", + "description": "VLLM Serving", "models": [ "facebook/opt-125m", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", diff --git a/neuralmagic/benchmarks/configs/benchmark_throughput.json b/neuralmagic/benchmarks/configs/benchmark_throughput.json index 4166e92dd158b..5ec8231fc6e03 100644 --- a/neuralmagic/benchmarks/configs/benchmark_throughput.json +++ b/neuralmagic/benchmarks/configs/benchmark_throughput.json @@ -1,7 +1,7 @@ { "configs": [ { - "description": "Benchmark vllm engine throughput - with dataset", + "description": "VLLM Engine throughput (with dataset)", "models": [ "facebook/opt-125m", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", @@ -36,7 +36,7 @@ } }, { - "description": "Benchmark vllm engine prefill throughput - synthetic", + "description": "VLLM Engine prefill throughput (synthetic)", "models": [ "facebook/opt-125m", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", @@ -46,9 +46,6 @@ "max_model_lens" : [4096], "script_name": "benchmark_throughput", "script_args": { - "backend": [ - "vllm" - ], "input-len": [ 1, 16, @@ -62,23 +59,14 @@ "output-len": [ 1 ], - "n": [ - 1 - ], "num-prompts": [ 1 ], - "seed": [ - 0 - ], - "dtype": [ - "auto" - ], "use-all-available-gpus_" : [] } }, { - "description": "Benchmark vllm engine decode throughput - synthetic", + "description": "VLLM Engine decode throughput (synthetic)", "models": [ "facebook/opt-125m", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", @@ -88,18 +76,12 @@ "max_model_lens" : [4096], "script_name": "benchmark_throughput", "script_args": { - "backend": [ - "vllm" - ], "input-len": [ 2 ], "output-len": [ 128 ], - "n": [ - 1 - ], "num-prompts": [ 1, 4, @@ -108,14 +90,8 @@ 32, 64 ], - "seed": [ - 0 - ], - "dtype": [ - "auto" - ], "use-all-available-gpus_" : [] } } ] -} \ No newline at end of file +} diff --git a/neuralmagic/benchmarks/configs/minimal_test.json b/neuralmagic/benchmarks/configs/minimal_test.json new file mode 100644 index 0000000000000..b04ea29e8b620 --- /dev/null +++ b/neuralmagic/benchmarks/configs/minimal_test.json @@ -0,0 +1,43 @@ +{ + "configs": [ + { + "description": "Benchmark vllm serving", + "models": [ + "mistralai/Mistral-7B-Instruct-v0.2" + ], + "use_all_available_gpus" : "", + "max_model_lens": [ + 4096 + ], + "sparsity": [], + "script_name": "benchmark_serving", + "script_args": { + "nr-qps-pair_" : ["5,inf"], + "dataset": [ + "sharegpt" + ] + } + }, + { + "description": "Benchmark vllm engine throughput - with dataset", + "models": [ + "mistralai/Mistral-7B-Instruct-v0.2" + ], + "max_model_lens" : [4096], + "script_name": "benchmark_throughput", + "script_args": { + "output-len": [ + 128 + ], + "num-prompts": [ + 100 + ], + "dataset" : [ + "sharegpt" + ], + "max-model-len" : [4096], + "use-all-available-gpus_" : [] + } + } + ] +} diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py index 649451d235bc3..110d47e354e24 100644 --- a/neuralmagic/benchmarks/run_benchmark_serving.py +++ b/neuralmagic/benchmarks/run_benchmark_serving.py @@ -2,6 +2,7 @@ import subprocess import requests import time +import json import itertools from typing import NamedTuple, Optional @@ -122,12 +123,26 @@ def run_bench(server_cmd: str, bench_cmd: list[str], model: str) -> None: " ".join([f"--{k} {v}" for k, v in server_args.items()]) for script_args in script_args_to_cla(config): + + description = (f"{config.description}\n" + + f"model - {model}\n" + + f"max-model-len - {max_model_len}\n" + + f"sparsity - {sparsity}\n" + + f"{config.script_name} " + + f"{json.dumps(script_args, indent=2)}") + bench_cmd = (["python3", "-m" - f"{script_path}"] + script_args + + f"{script_path}"] + + ["--description", f"{description}"] + ["--model", f"{model}"] + ["--tokenizer", f"{model}"] + ["--port", f"{BENCH_SERVER_PORT}"] + ["--host", f"{BENCH_SERVER_HOST}"]) + # Add script args + for k, v in script_args.items(): + bench_cmd.append(f"--{k}") + if v != "": + bench_cmd.append(f"{v}") if output_directory: bench_cmd += (["--save-directory", f"{output_directory}"] + diff --git a/neuralmagic/benchmarks/run_benchmark_throughput.py b/neuralmagic/benchmarks/run_benchmark_throughput.py index 622ea32cdfbc5..d6a505df71559 100644 --- a/neuralmagic/benchmarks/run_benchmark_throughput.py +++ b/neuralmagic/benchmarks/run_benchmark_throughput.py @@ -1,4 +1,5 @@ import argparse +import json from pathlib import Path from typing import NamedTuple, Optional @@ -29,10 +30,23 @@ def run_benchmark_throughput_script(config: NamedTuple, for max_model_len in max_model_lens: for script_args in script_args_to_cla(config): + + description = (f"{config.description}\n" + f"model - {model}\n" + + f"max_model_len - {max_model_len}\n" + + f"{config.script_name} " + + f"{json.dumps(script_args, indent=2)}") + bench_cmd = (["python3", "-m", f"{script_path}"] + - script_args + ["--model", f"{model}"] + + ["--description", f"{description}"] + + ["--model", f"{model}"] + ["--tokenizer", f"{model}"] + ["--max-model-len", f"{max_model_len}"]) + # Add script args + for k, v in script_args.items(): + bench_cmd.append(f"--{k}") + if v != "": + bench_cmd.append(f"{v}") if output_directory: bench_cmd = bench_cmd + [ diff --git a/neuralmagic/benchmarks/scripts/benchmark_serving.py b/neuralmagic/benchmarks/scripts/benchmark_serving.py index 4b8061f6278af..f0c1d8d9951fc 100644 --- a/neuralmagic/benchmarks/scripts/benchmark_serving.py +++ b/neuralmagic/benchmarks/scripts/benchmark_serving.py @@ -33,11 +33,11 @@ from vllm.transformers_utils.tokenizer import get_tokenizer from .common import generate_synthetic_requests, print_serving_request_io from .datasets_registry import get_dataset, DatasetArgs -from .benchmark_result import (BenchmarkResult, - BenchmarkServingResultMetadataKeys as - ResultMetadataKeys, - BenchmarkServingResultMetricTemplates as - ResultMetricTemplates) +from .logging.benchmark_result import (BenchmarkResult, + BenchmarkServingResultMetadataKeys as + ResultMetadataKeys, + BenchmarkServingResultMetricTemplates as + ResultMetricTemplates) from neuralmagic.benchmarks.scripts.backend_request_func import ( ASYNC_REQUEST_FUNCS, @@ -337,6 +337,7 @@ def script_args_as_json_dict(script_args: argparse.Namespace): current_dt = datetime.now() result = BenchmarkResult( + description=args.description, date=current_dt, script_name=Path(__file__).name, script_args=script_args_as_json_dict(args), @@ -382,6 +383,13 @@ def from_str(arg: str): parser = argparse.ArgumentParser( description='''Benchmark the online serving throughput.''') + parser.add_argument( + "--description", + type=str, + default="benchmark-serving", + help= + "Benchmark description. This is primarily useful when we log the benchmark results and process them for plotting charts" + ) parser.add_argument( "--backend", type=str, diff --git a/neuralmagic/benchmarks/scripts/benchmark_throughput.py b/neuralmagic/benchmarks/scripts/benchmark_throughput.py index 182f15c5661f5..9138ea0f8ad47 100644 --- a/neuralmagic/benchmarks/scripts/benchmark_throughput.py +++ b/neuralmagic/benchmarks/scripts/benchmark_throughput.py @@ -14,9 +14,9 @@ from transformers import AutoTokenizer from .common import generate_synthetic_requests, warmup_vllm_engine, num_available_gpus, print_request_outputs from .datasets_registry import get_dataset, DatasetArgs -from .benchmark_result import (BenchmarkResult, - BenchmarkThroughputResultMetricTemplates as - ResultMetricTemplates) +from .logging.benchmark_result import (BenchmarkResult, + BenchmarkThroughputResultMetricTemplates + as ResultMetricTemplates) def get_tensor_parallel_size(args: argparse.Namespace) -> int: @@ -145,6 +145,7 @@ def main(args: argparse.Namespace): current_dt = datetime.now() result = BenchmarkResult( + description=args.description, date=current_dt, script_name=Path(__file__).name, script_args=vars(args), @@ -168,6 +169,13 @@ def main(args: argparse.Namespace): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Benchmark the throughput.") + parser.add_argument( + "--description", + type=str, + default="benchmark-throughput", + help= + "Benchmark description. This is primarily useful when we log the benchmark results and process them for plotting charts" + ) parser.add_argument("--backend", type=str, choices=["vllm"], diff --git a/neuralmagic/benchmarks/scripts/common.py b/neuralmagic/benchmarks/scripts/common.py index ee3a1611eac12..d4addb99a2878 100644 --- a/neuralmagic/benchmarks/scripts/common.py +++ b/neuralmagic/benchmarks/scripts/common.py @@ -41,7 +41,7 @@ def get_benchmarking_context() -> dict: "torch_version": f"{torch.__version__}", "torch_cuda_version": f"{torch.version.cuda}", "cuda_devices": f"{cuda_devices}", - "cuda_device_names": f"{cuda_device_names}" + "cuda_device_names": cuda_device_names } diff --git a/neuralmagic/benchmarks/scripts/benchmark_result.py b/neuralmagic/benchmarks/scripts/logging/benchmark_result.py similarity index 74% rename from neuralmagic/benchmarks/scripts/benchmark_result.py rename to neuralmagic/benchmarks/scripts/logging/benchmark_result.py index de00aa0519138..a997cbb855698 100644 --- a/neuralmagic/benchmarks/scripts/benchmark_result.py +++ b/neuralmagic/benchmarks/scripts/logging/benchmark_result.py @@ -8,7 +8,7 @@ from typing import Optional from types import SimpleNamespace from pathlib import Path -from .common import get_benchmarking_context +from ..common import get_benchmarking_context from datetime import datetime from dataclasses import dataclass, field from enum import Enum @@ -93,6 +93,8 @@ class BenchmarkResult: VLLM_VERSION_KEY_ = "vllm_version" METADATA_KEY_ = "metadata" METRICS_KEY_ = "metrics" + DESCRIPTION_KEY_ = "description" + GPU_DESCRIPTION_KEY_ = "gpu_description" DATE_KEY_ = "date" DATE_EPOCH_KEY_ = "epoch_time" SCRIPT_NAME_KEY_ = "script_name" @@ -107,23 +109,55 @@ class BenchmarkResult: def datetime_as_string(date: datetime): return date.astimezone().strftime("%Y-%m-%d %H:%M:%S %Z") - def __init__(self, date: datetime, script_name: str, script_args: dict, - tensor_parallel_size: int, model: str, + @staticmethod + def describe_gpu(bench_ctx: dict, num_gpus_used: int) -> str: + """ + Return a string that describes the gpus used in benchmarking + """ + cuda_device_names_key = "cuda_device_names" + gpu_names = bench_ctx.get(cuda_device_names_key) + assert gpu_names is not None + gpu_name = gpu_names[0] + + # Make sure all gpus are the same before we report. + assert all(map(lambda x: x == gpu_name, gpu_names[:num_gpus_used])) + + return f"{gpu_name} x {num_gpus_used}" + + def __init__(self, description: str, date: datetime, script_name: str, + script_args: dict, tensor_parallel_size: int, model: str, tokenizer: Optional[str], dataset: Optional[str]): - # TODO (varun) Add vllm version & githash + + bench_ctx = get_benchmarking_context() + + # TODO (varun) Add githash self.result_dict = { self.BENCHMARK_RESULT_SCHEMA_VERSION_KEY_: BENCHMARK_RESULTS_SCHEMA_VERSION, - self.VLLM_VERSION_KEY_: __vllm_version__, - self.BENCHMARKING_CONTEXT_KEY_: get_benchmarking_context(), - self.DATE_KEY_: BenchmarkResult.datetime_as_string(date), - self.DATE_EPOCH_KEY_: date.timestamp(), - self.SCRIPT_NAME_KEY_: script_name, - self.TENSOR_PARALLEL_SIZE_KEY_: tensor_parallel_size, - self.MODEL_KEY_: model, - self.TOKENIZER_KEY_: tokenizer if tokenizer is not None else model, - self.DATASET_KEY_: dataset if dataset is not None else "synthetic", - self.SCRIPT_ARGS_KEY_: script_args, + self.VLLM_VERSION_KEY_: + __vllm_version__, + self.BENCHMARKING_CONTEXT_KEY_: + bench_ctx, + self.DESCRIPTION_KEY_: + description, + self.GPU_DESCRIPTION_KEY_: + BenchmarkResult.describe_gpu(bench_ctx, tensor_parallel_size), + self.DATE_KEY_: + BenchmarkResult.datetime_as_string(date), + self.DATE_EPOCH_KEY_: + date.timestamp(), + self.SCRIPT_NAME_KEY_: + script_name, + self.TENSOR_PARALLEL_SIZE_KEY_: + tensor_parallel_size, + self.MODEL_KEY_: + model, + self.TOKENIZER_KEY_: + tokenizer if tokenizer is not None else model, + self.DATASET_KEY_: + dataset if dataset is not None else "synthetic", + self.SCRIPT_ARGS_KEY_: + script_args, # Any metadata that the caller script wants to store should be stored here. self.METADATA_KEY_: {}, # Any benchmarking metrics should be stored here. @@ -133,8 +167,8 @@ def __init__(self, date: datetime, script_name: str, script_args: dict, def __setitem__(self, key: str, item: any): self.result_dict[key] = item - def __getitem__(self, key: str) -> any: - return self.result_dict[key] + def __getitem__(self, key: str, default: any = None) -> any: + return self.result_dict.get(key, default) def add_metric(self, metric_template: MetricTemplate, value: float) -> None: diff --git a/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py new file mode 100644 index 0000000000000..a7564417ba702 --- /dev/null +++ b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py @@ -0,0 +1,154 @@ +""" +Scripts to process GHA benchmarking JSONs produced by BenchmarkResult +that could be consumed by `github-action-benchmark`. +Reference : https://github.com/benchmark-action/github-action-benchmark +""" +import argparse +import json +from pathlib import Path +from functools import reduce +from dataclasses import dataclass +from typing import List, Iterable, NamedTuple + +from .benchmark_result import GHABenchmarkToolName, BenchmarkResult, MetricTemplate + + +@dataclass +class GHARecord: + """ + GHARecord is what actually goes into the output JSON. + - name : Chart title. Unique names map to a unique chart. + - unit : Y-axis label. + - value : Value to plot. + - extra : Any extra information that is passed as a JSON string. + """ + name: str + unit: str + value: float + extra: str + + @staticmethod + def extra_from_benchmark_result(br: BenchmarkResult) -> str: + extra_as_dict = { + BenchmarkResult.DESCRIPTION_KEY_: + br.get(BenchmarkResult.DESCRIPTION_KEY_), + BenchmarkResult.BENCHMARKING_CONTEXT_KEY_: + br.get(BenchmarkResult.BENCHMARKING_CONTEXT_KEY_), + BenchmarkResult.SCRIPT_NAME_KEY_: + br.get(BenchmarkResult.SCRIPT_NAME_KEY_), + BenchmarkResult.SCRIPT_ARGS_KEY_: + br.get(BenchmarkResult.SCRIPT_ARGS_KEY_), + BenchmarkResult.GPU_DESCRIPTION_KEY_: + br.get(BenchmarkResult.GPU_DESCRIPTION_KEY_) + } + + return f"{json.dumps(extra_as_dict, indent=2)}" + + @staticmethod + def from_metric_template(metric_template: MetricTemplate, extra: str = ""): + return GHARecord( + name=f"{metric_template.key} ({metric_template.unit})", + unit=metric_template.unit, + value=metric_template.value, + extra=extra) + + +class Tool_Record_T(NamedTuple): + tool: GHABenchmarkToolName + record: GHARecord + + +def process(json_file_path: Path) -> Iterable[Tool_Record_T]: + + assert json_file_path.exists() + + json_data: dict = None + with open(json_file_path, "r") as f: + json_data = json.load(f) + assert json_data is not None + + print(f"processing file : {json_file_path}") + + hover_data = GHARecord.extra_from_benchmark_result(json_data) + metrics: Iterable[dict] = json_data.get(BenchmarkResult.METRICS_KEY_) + metrics: Iterable[MetricTemplate] = map( + lambda md: MetricTemplate.from_dict(md), metrics.values()) + + return map( + lambda metric: Tool_Record_T( + metric.tool, + GHARecord.from_metric_template(metric, extra=hover_data)), metrics) + + +def main(input_directory: Path, bigger_is_better_output_json_file_name: Path, + smaller_is_better_output_json_file_name: Path) -> None: + + def dump_to_json(gha_records: List[GHARecord], output_path: Path): + # Make output directory if it doesn't exist + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Make data JSON serializable + gha_record_dicts = list(map(lambda x: x.__dict__, gha_records)) + with open(output_path, 'w+') as f: + json.dump(gha_record_dicts, f, indent=4) + + json_file_paths = input_directory.glob('*.json') + tool_records: List[Tool_Record_T] = list( + reduce(lambda whole, part: whole + part, + (map(lambda json_file_path: list(process(json_file_path)), + json_file_paths)))) + + bigger_is_better: List[GHARecord] = list( + map( + lambda tool_record: tool_record.record, + filter( + lambda tool_record: tool_record.tool == GHABenchmarkToolName. + BiggerIsBetter, tool_records))) + + smaller_is_better: List[GHARecord] = list( + map( + lambda tool_record: tool_record.record, + filter( + lambda tool_record: tool_record.tool == GHABenchmarkToolName. + SmallerIsBetter, tool_records))) + + dump_to_json(bigger_is_better, bigger_is_better_output_json_file_name) + dump_to_json(smaller_is_better, smaller_is_better_output_json_file_name) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=""" + Process the benchmark JSONs produced by BenchmarkResult and output JSONs + that could be consumed by `github-action-benchmark` + Reference : https://github.com/benchmark-action/github-action-benchmark + """) + + parser.add_argument("-i", + "--input-json-directory", + required=True, + type=str, + help=""" + Path to the directory containing BenchmarkResult jsons. + This is typically the output directory passed to the benchmark + runner scripts like neuralmagic/benchmarks/run_benchmarks.py. + """) + + parser.add_argument("--bigger-is-better-output-file-path", + type=str, + required=True, + help=""" + An output file path, where the GHABenchmarkToolName BiggerIsBetter metrics are to be stored. + """) + + parser.add_argument("--smaller-is-better-output-file-path", + type=str, + required=True, + help=""" + An output file path, where the GHABenchmarkToolName SmallerIsBetter metrics are to be stored + """) + + args = parser.parse_args() + + main(Path(args.input_json_directory), + Path(args.bigger_is_better_output_file_path), + Path(args.smaller_is_better_output_file_path))