diff --git a/.github/actions/nm-benchmark/action.yml b/.github/actions/nm-benchmark/action.yml
index 32ccf215fa563..2c91778a31b29 100644
--- a/.github/actions/nm-benchmark/action.yml
+++ b/.github/actions/nm-benchmark/action.yml
@@ -24,7 +24,7 @@ runs:
       source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
       pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
       SUCCESS=0
-      .github/workflows/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
+      .github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
       echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
       exit ${SUCCESS}
     shell: bash
diff --git a/.github/data/nm_benchmark_configs_minimal_test_list.txt b/.github/data/nm_benchmark_configs_minimal_test_list.txt
new file mode 100644
index 0000000000000..44a0159554c79
--- /dev/null
+++ b/.github/data/nm_benchmark_configs_minimal_test_list.txt
@@ -0,0 +1 @@
+neuralmagic/benchmarks/configs/minimal_test.json
diff --git a/.github/data/nm_benchmark_configs_list.txt b/.github/data/nm_benchmark_nightly_configs_list.txt
similarity index 65%
rename from .github/data/nm_benchmark_configs_list.txt
rename to .github/data/nm_benchmark_nightly_configs_list.txt
index 97f1a5057cf69..992aa34481ec1 100644
--- a/.github/data/nm_benchmark_configs_list.txt
+++ b/.github/data/nm_benchmark_nightly_configs_list.txt
@@ -1,2 +1,3 @@
 neuralmagic/benchmarks/configs/benchmark_serving.json
 neuralmagic/benchmarks/configs/benchmark_throughput.json
+neuralmagic/benchmarks/configs/benchmark_remote_push.json
diff --git a/.github/workflows/scripts/nm-run-benchmarks.sh b/.github/scripts/nm-run-benchmarks.sh
similarity index 100%
rename from .github/workflows/scripts/nm-run-benchmarks.sh
rename to .github/scripts/nm-run-benchmarks.sh
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index a1a4317fbc8c3..41c2eaf07c489 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -36,7 +36,7 @@ jobs:
         uses: ./.github/workflows/nm-benchmark.yml
         with:
             label: aws-avx2-192G-4-a10g-96G
-            benchmark_config_list_file:  ./.github/data/nm_benchmark_configs_list.txt
+            benchmark_config_list_file:  ./.github/data/nm_benchmark_nightly_configs_list.txt
             timeout: 240
             gitref: '${{ github.ref }}'
             Gi_per_thread: 4
@@ -48,7 +48,7 @@ jobs:
         uses: ./.github/workflows/nm-benchmark.yml
         with:
             label: aws-avx2-32G-a10g-24G
-            benchmark_config_list_file:  ./.github/data/nm_benchmark_configs_list.txt
+            benchmark_config_list_file:  ./.github/data/nm_benchmark_nightly_configs_list.txt
             timeout: 240
             gitref: '${{ github.ref }}'
             Gi_per_thread: 12
diff --git a/neuralmagic/benchmarks/common.py b/neuralmagic/benchmarks/common.py
index 2bd6f74aa6430..398f8973cc8d2 100644
--- a/neuralmagic/benchmarks/common.py
+++ b/neuralmagic/benchmarks/common.py
@@ -26,7 +26,7 @@ def max_model_length_from_model_id(model: str,
     return _get_and_verify_max_len(config, max_model_len=None)
 
 
-def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]:
+def script_args_to_cla(config: NamedTuple) -> Iterable[dict]:
     #config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs
 
     kv = vars(config.script_args)
@@ -41,7 +41,7 @@ def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]:
         if len(v) == 0:
             key_args.append(k)
 
-    key_args_cla = list(map(lambda k: f"--{k}", key_args))
+    key_args_cla = {f"{k}": "" for k in key_args}
 
     # Remove empty lists from arg_lists and remove key args from keys
     arg_lists = list(filter(lambda arg_list: len(arg_list) != 0, arg_lists))
@@ -49,9 +49,9 @@ def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]:
     assert len(keys) == len(arg_lists)
 
     for args in itertools.product(*arg_lists):
-        cla = key_args_cla
-        for name, value in zip(keys, args):
-            cla.extend([f"--{name}", f"{value}"])
+        args_dict = dict(zip(keys, args))
+        cla = key_args_cla.copy()
+        cla.update(args_dict)
         yield cla
 
 
diff --git a/neuralmagic/benchmarks/configs/benchmark_serving.json b/neuralmagic/benchmarks/configs/benchmark_serving.json
index 558767c3dbb7a..a128307cc83a0 100644
--- a/neuralmagic/benchmarks/configs/benchmark_serving.json
+++ b/neuralmagic/benchmarks/configs/benchmark_serving.json
@@ -1,7 +1,7 @@
 {
 	"configs": [
 		{
-			"description": "Benchmark vllm serving",
+			"description": "VLLM Serving",
 			"models": [
 				"facebook/opt-125m",
 				"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
diff --git a/neuralmagic/benchmarks/configs/benchmark_throughput.json b/neuralmagic/benchmarks/configs/benchmark_throughput.json
index 4166e92dd158b..5ec8231fc6e03 100644
--- a/neuralmagic/benchmarks/configs/benchmark_throughput.json
+++ b/neuralmagic/benchmarks/configs/benchmark_throughput.json
@@ -1,7 +1,7 @@
 {
 	"configs": [
 		{
-			"description": "Benchmark vllm engine throughput - with dataset",
+			"description": "VLLM Engine throughput (with dataset)",
 			"models": [
 				"facebook/opt-125m",
 				"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -36,7 +36,7 @@
 			}
 		},
 		{
-			"description": "Benchmark vllm engine prefill throughput - synthetic",
+			"description": "VLLM Engine prefill throughput (synthetic)",
 			"models": [
 				"facebook/opt-125m",
 				"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -46,9 +46,6 @@
 			"max_model_lens" : [4096],
 			"script_name": "benchmark_throughput",
 			"script_args": {
-				"backend": [
-					"vllm"
-				],
 				"input-len": [
 					1,
 					16,
@@ -62,23 +59,14 @@
 				"output-len": [
 					1
 				],
-				"n": [
-					1
-				],
 				"num-prompts": [
 					1
 				],
-				"seed": [
-					0
-				],
-				"dtype": [
-					"auto"
-				],
 				"use-all-available-gpus_" : []
 			}
 		},
 		{
-			"description": "Benchmark vllm engine decode throughput - synthetic",
+			"description": "VLLM Engine decode throughput (synthetic)",
 			"models": [
 				"facebook/opt-125m",
 				"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -88,18 +76,12 @@
 			"max_model_lens" : [4096],
 			"script_name": "benchmark_throughput",
 			"script_args": {
-				"backend": [
-					"vllm"
-				],
 				"input-len": [
 					2
 				],
 				"output-len": [
 					128
 				],
-				"n": [
-					1
-				],
 				"num-prompts": [
 					1,
 					4,
@@ -108,14 +90,8 @@
 					32,
 					64
 				],
-				"seed": [
-					0
-				],
-				"dtype": [
-					"auto"
-				],
 				"use-all-available-gpus_" : []
 			}
 		}
 	]
-}
\ No newline at end of file
+}
diff --git a/neuralmagic/benchmarks/configs/minimal_test.json b/neuralmagic/benchmarks/configs/minimal_test.json
new file mode 100644
index 0000000000000..b04ea29e8b620
--- /dev/null
+++ b/neuralmagic/benchmarks/configs/minimal_test.json
@@ -0,0 +1,43 @@
+{
+	"configs": [
+		{
+			"description": "Benchmark vllm serving",
+			"models": [
+                          "mistralai/Mistral-7B-Instruct-v0.2"
+			],
+			"use_all_available_gpus" : "",
+			"max_model_lens": [
+				4096
+			],
+			"sparsity": [],
+			"script_name": "benchmark_serving",
+			"script_args": {
+                                "nr-qps-pair_" : ["5,inf"],
+				"dataset": [
+					"sharegpt"
+				]
+			}
+		},
+		{
+			"description": "Benchmark vllm engine throughput - with dataset",
+			"models": [
+                                "mistralai/Mistral-7B-Instruct-v0.2"
+			],
+			"max_model_lens" : [4096],
+			"script_name": "benchmark_throughput",
+			"script_args": {
+				"output-len": [
+					128
+				],
+				"num-prompts": [
+					100
+				],
+                                "dataset" : [
+                                  "sharegpt"
+                                ],
+                                "max-model-len" : [4096],
+				"use-all-available-gpus_" : []
+			}
+		}
+	]
+}
diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py
index 649451d235bc3..110d47e354e24 100644
--- a/neuralmagic/benchmarks/run_benchmark_serving.py
+++ b/neuralmagic/benchmarks/run_benchmark_serving.py
@@ -2,6 +2,7 @@
 import subprocess
 import requests
 import time
+import json
 import itertools
 
 from typing import NamedTuple, Optional
@@ -122,12 +123,26 @@ def run_bench(server_cmd: str, bench_cmd: list[str], model: str) -> None:
                             " ".join([f"--{k} {v}" for k, v in server_args.items()])
 
             for script_args in script_args_to_cla(config):
+
+                description = (f"{config.description}\n" +
+                               f"model - {model}\n" +
+                               f"max-model-len - {max_model_len}\n" +
+                               f"sparsity - {sparsity}\n" +
+                               f"{config.script_name} " +
+                               f"{json.dumps(script_args, indent=2)}")
+
                 bench_cmd = (["python3", "-m"
-                              f"{script_path}"] + script_args +
+                              f"{script_path}"] +
+                             ["--description", f"{description}"] +
                              ["--model", f"{model}"] +
                              ["--tokenizer", f"{model}"] +
                              ["--port", f"{BENCH_SERVER_PORT}"] +
                              ["--host", f"{BENCH_SERVER_HOST}"])
+                # Add script args
+                for k, v in script_args.items():
+                    bench_cmd.append(f"--{k}")
+                    if v != "":
+                        bench_cmd.append(f"{v}")
 
                 if output_directory:
                     bench_cmd += (["--save-directory", f"{output_directory}"] +
diff --git a/neuralmagic/benchmarks/run_benchmark_throughput.py b/neuralmagic/benchmarks/run_benchmark_throughput.py
index 622ea32cdfbc5..d6a505df71559 100644
--- a/neuralmagic/benchmarks/run_benchmark_throughput.py
+++ b/neuralmagic/benchmarks/run_benchmark_throughput.py
@@ -1,4 +1,5 @@
 import argparse
+import json
 from pathlib import Path
 from typing import NamedTuple, Optional
 
@@ -29,10 +30,23 @@ def run_benchmark_throughput_script(config: NamedTuple,
 
         for max_model_len in max_model_lens:
             for script_args in script_args_to_cla(config):
+
+                description = (f"{config.description}\n"
+                               f"model - {model}\n" +
+                               f"max_model_len - {max_model_len}\n" +
+                               f"{config.script_name} " +
+                               f"{json.dumps(script_args, indent=2)}")
+
                 bench_cmd = (["python3", "-m", f"{script_path}"] +
-                             script_args + ["--model", f"{model}"] +
+                             ["--description", f"{description}"] +
+                             ["--model", f"{model}"] +
                              ["--tokenizer", f"{model}"] +
                              ["--max-model-len", f"{max_model_len}"])
+                # Add script args
+                for k, v in script_args.items():
+                    bench_cmd.append(f"--{k}")
+                    if v != "":
+                        bench_cmd.append(f"{v}")
 
                 if output_directory:
                     bench_cmd = bench_cmd + [
diff --git a/neuralmagic/benchmarks/scripts/benchmark_serving.py b/neuralmagic/benchmarks/scripts/benchmark_serving.py
index 4b8061f6278af..f0c1d8d9951fc 100644
--- a/neuralmagic/benchmarks/scripts/benchmark_serving.py
+++ b/neuralmagic/benchmarks/scripts/benchmark_serving.py
@@ -33,11 +33,11 @@
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from .common import generate_synthetic_requests, print_serving_request_io
 from .datasets_registry import get_dataset, DatasetArgs
-from .benchmark_result import (BenchmarkResult,
-                               BenchmarkServingResultMetadataKeys as
-                               ResultMetadataKeys,
-                               BenchmarkServingResultMetricTemplates as
-                               ResultMetricTemplates)
+from .logging.benchmark_result import (BenchmarkResult,
+                                       BenchmarkServingResultMetadataKeys as
+                                       ResultMetadataKeys,
+                                       BenchmarkServingResultMetricTemplates as
+                                       ResultMetricTemplates)
 
 from neuralmagic.benchmarks.scripts.backend_request_func import (
     ASYNC_REQUEST_FUNCS,
@@ -337,6 +337,7 @@ def script_args_as_json_dict(script_args: argparse.Namespace):
 
         current_dt = datetime.now()
         result = BenchmarkResult(
+            description=args.description,
             date=current_dt,
             script_name=Path(__file__).name,
             script_args=script_args_as_json_dict(args),
@@ -382,6 +383,13 @@ def from_str(arg: str):
 
     parser = argparse.ArgumentParser(
         description='''Benchmark the online serving throughput.''')
+    parser.add_argument(
+        "--description",
+        type=str,
+        default="benchmark-serving",
+        help=
+        "Benchmark description. This is primarily useful when we log the benchmark results and process them for plotting charts"
+    )
     parser.add_argument(
         "--backend",
         type=str,
diff --git a/neuralmagic/benchmarks/scripts/benchmark_throughput.py b/neuralmagic/benchmarks/scripts/benchmark_throughput.py
index 182f15c5661f5..9138ea0f8ad47 100644
--- a/neuralmagic/benchmarks/scripts/benchmark_throughput.py
+++ b/neuralmagic/benchmarks/scripts/benchmark_throughput.py
@@ -14,9 +14,9 @@
 from transformers import AutoTokenizer
 from .common import generate_synthetic_requests, warmup_vllm_engine, num_available_gpus, print_request_outputs
 from .datasets_registry import get_dataset, DatasetArgs
-from .benchmark_result import (BenchmarkResult,
-                               BenchmarkThroughputResultMetricTemplates as
-                               ResultMetricTemplates)
+from .logging.benchmark_result import (BenchmarkResult,
+                                       BenchmarkThroughputResultMetricTemplates
+                                       as ResultMetricTemplates)
 
 
 def get_tensor_parallel_size(args: argparse.Namespace) -> int:
@@ -145,6 +145,7 @@ def main(args: argparse.Namespace):
         current_dt = datetime.now()
 
         result = BenchmarkResult(
+            description=args.description,
             date=current_dt,
             script_name=Path(__file__).name,
             script_args=vars(args),
@@ -168,6 +169,13 @@ def main(args: argparse.Namespace):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Benchmark the throughput.")
+    parser.add_argument(
+        "--description",
+        type=str,
+        default="benchmark-throughput",
+        help=
+        "Benchmark description. This is primarily useful when we log the benchmark results and process them for plotting charts"
+    )
     parser.add_argument("--backend",
                         type=str,
                         choices=["vllm"],
diff --git a/neuralmagic/benchmarks/scripts/common.py b/neuralmagic/benchmarks/scripts/common.py
index ee3a1611eac12..d4addb99a2878 100644
--- a/neuralmagic/benchmarks/scripts/common.py
+++ b/neuralmagic/benchmarks/scripts/common.py
@@ -41,7 +41,7 @@ def get_benchmarking_context() -> dict:
         "torch_version": f"{torch.__version__}",
         "torch_cuda_version": f"{torch.version.cuda}",
         "cuda_devices": f"{cuda_devices}",
-        "cuda_device_names": f"{cuda_device_names}"
+        "cuda_device_names": cuda_device_names
     }
 
 
diff --git a/neuralmagic/benchmarks/scripts/benchmark_result.py b/neuralmagic/benchmarks/scripts/logging/benchmark_result.py
similarity index 74%
rename from neuralmagic/benchmarks/scripts/benchmark_result.py
rename to neuralmagic/benchmarks/scripts/logging/benchmark_result.py
index de00aa0519138..a997cbb855698 100644
--- a/neuralmagic/benchmarks/scripts/benchmark_result.py
+++ b/neuralmagic/benchmarks/scripts/logging/benchmark_result.py
@@ -8,7 +8,7 @@
 from typing import Optional
 from types import SimpleNamespace
 from pathlib import Path
-from .common import get_benchmarking_context
+from ..common import get_benchmarking_context
 from datetime import datetime
 from dataclasses import dataclass, field
 from enum import Enum
@@ -93,6 +93,8 @@ class BenchmarkResult:
     VLLM_VERSION_KEY_ = "vllm_version"
     METADATA_KEY_ = "metadata"
     METRICS_KEY_ = "metrics"
+    DESCRIPTION_KEY_ = "description"
+    GPU_DESCRIPTION_KEY_ = "gpu_description"
     DATE_KEY_ = "date"
     DATE_EPOCH_KEY_ = "epoch_time"
     SCRIPT_NAME_KEY_ = "script_name"
@@ -107,23 +109,55 @@ class BenchmarkResult:
     def datetime_as_string(date: datetime):
         return date.astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
 
-    def __init__(self, date: datetime, script_name: str, script_args: dict,
-                 tensor_parallel_size: int, model: str,
+    @staticmethod
+    def describe_gpu(bench_ctx: dict, num_gpus_used: int) -> str:
+        """
+        Return a string that describes the gpus used in benchmarking
+        """
+        cuda_device_names_key = "cuda_device_names"
+        gpu_names = bench_ctx.get(cuda_device_names_key)
+        assert gpu_names is not None
+        gpu_name = gpu_names[0]
+
+        # Make sure all gpus are the same before we report.
+        assert all(map(lambda x: x == gpu_name, gpu_names[:num_gpus_used]))
+
+        return f"{gpu_name} x {num_gpus_used}"
+
+    def __init__(self, description: str, date: datetime, script_name: str,
+                 script_args: dict, tensor_parallel_size: int, model: str,
                  tokenizer: Optional[str], dataset: Optional[str]):
-        # TODO (varun) Add vllm version & githash
+
+        bench_ctx = get_benchmarking_context()
+
+        # TODO (varun) Add githash
         self.result_dict = {
             self.BENCHMARK_RESULT_SCHEMA_VERSION_KEY_:
             BENCHMARK_RESULTS_SCHEMA_VERSION,
-            self.VLLM_VERSION_KEY_: __vllm_version__,
-            self.BENCHMARKING_CONTEXT_KEY_: get_benchmarking_context(),
-            self.DATE_KEY_: BenchmarkResult.datetime_as_string(date),
-            self.DATE_EPOCH_KEY_: date.timestamp(),
-            self.SCRIPT_NAME_KEY_: script_name,
-            self.TENSOR_PARALLEL_SIZE_KEY_: tensor_parallel_size,
-            self.MODEL_KEY_: model,
-            self.TOKENIZER_KEY_: tokenizer if tokenizer is not None else model,
-            self.DATASET_KEY_: dataset if dataset is not None else "synthetic",
-            self.SCRIPT_ARGS_KEY_: script_args,
+            self.VLLM_VERSION_KEY_:
+            __vllm_version__,
+            self.BENCHMARKING_CONTEXT_KEY_:
+            bench_ctx,
+            self.DESCRIPTION_KEY_:
+            description,
+            self.GPU_DESCRIPTION_KEY_:
+            BenchmarkResult.describe_gpu(bench_ctx, tensor_parallel_size),
+            self.DATE_KEY_:
+            BenchmarkResult.datetime_as_string(date),
+            self.DATE_EPOCH_KEY_:
+            date.timestamp(),
+            self.SCRIPT_NAME_KEY_:
+            script_name,
+            self.TENSOR_PARALLEL_SIZE_KEY_:
+            tensor_parallel_size,
+            self.MODEL_KEY_:
+            model,
+            self.TOKENIZER_KEY_:
+            tokenizer if tokenizer is not None else model,
+            self.DATASET_KEY_:
+            dataset if dataset is not None else "synthetic",
+            self.SCRIPT_ARGS_KEY_:
+            script_args,
             # Any metadata that the caller script wants to store should be stored here.
             self.METADATA_KEY_: {},
             # Any benchmarking metrics should be stored here.
@@ -133,8 +167,8 @@ def __init__(self, date: datetime, script_name: str, script_args: dict,
     def __setitem__(self, key: str, item: any):
         self.result_dict[key] = item
 
-    def __getitem__(self, key: str) -> any:
-        return self.result_dict[key]
+    def __getitem__(self, key: str, default: any = None) -> any:
+        return self.result_dict.get(key, default)
 
     def add_metric(self, metric_template: MetricTemplate,
                    value: float) -> None:
diff --git a/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
new file mode 100644
index 0000000000000..a7564417ba702
--- /dev/null
+++ b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
@@ -0,0 +1,154 @@
+"""
+Scripts to process GHA benchmarking JSONs produced by BenchmarkResult
+that could be consumed by `github-action-benchmark`.
+Reference : https://github.com/benchmark-action/github-action-benchmark
+"""
+import argparse
+import json
+from pathlib import Path
+from functools import reduce
+from dataclasses import dataclass
+from typing import List, Iterable, NamedTuple
+
+from .benchmark_result import GHABenchmarkToolName, BenchmarkResult, MetricTemplate
+
+
+@dataclass
+class GHARecord:
+    """
+    GHARecord is what actually goes into the output JSON.
+        - name : Chart title. Unique names map to a unique chart.
+        - unit : Y-axis label.
+        - value : Value to plot.
+        - extra : Any extra information that is passed as a JSON string.
+    """
+    name: str
+    unit: str
+    value: float
+    extra: str
+
+    @staticmethod
+    def extra_from_benchmark_result(br: BenchmarkResult) -> str:
+        extra_as_dict = {
+            BenchmarkResult.DESCRIPTION_KEY_:
+            br.get(BenchmarkResult.DESCRIPTION_KEY_),
+            BenchmarkResult.BENCHMARKING_CONTEXT_KEY_:
+            br.get(BenchmarkResult.BENCHMARKING_CONTEXT_KEY_),
+            BenchmarkResult.SCRIPT_NAME_KEY_:
+            br.get(BenchmarkResult.SCRIPT_NAME_KEY_),
+            BenchmarkResult.SCRIPT_ARGS_KEY_:
+            br.get(BenchmarkResult.SCRIPT_ARGS_KEY_),
+            BenchmarkResult.GPU_DESCRIPTION_KEY_:
+            br.get(BenchmarkResult.GPU_DESCRIPTION_KEY_)
+        }
+
+        return f"{json.dumps(extra_as_dict, indent=2)}"
+
+    @staticmethod
+    def from_metric_template(metric_template: MetricTemplate, extra: str = ""):
+        return GHARecord(
+            name=f"{metric_template.key} ({metric_template.unit})",
+            unit=metric_template.unit,
+            value=metric_template.value,
+            extra=extra)
+
+
+class Tool_Record_T(NamedTuple):
+    tool: GHABenchmarkToolName
+    record: GHARecord
+
+
+def process(json_file_path: Path) -> Iterable[Tool_Record_T]:
+
+    assert json_file_path.exists()
+
+    json_data: dict = None
+    with open(json_file_path, "r") as f:
+        json_data = json.load(f)
+    assert json_data is not None
+
+    print(f"processing file : {json_file_path}")
+
+    hover_data = GHARecord.extra_from_benchmark_result(json_data)
+    metrics: Iterable[dict] = json_data.get(BenchmarkResult.METRICS_KEY_)
+    metrics: Iterable[MetricTemplate] = map(
+        lambda md: MetricTemplate.from_dict(md), metrics.values())
+
+    return map(
+        lambda metric: Tool_Record_T(
+            metric.tool,
+            GHARecord.from_metric_template(metric, extra=hover_data)), metrics)
+
+
+def main(input_directory: Path, bigger_is_better_output_json_file_name: Path,
+         smaller_is_better_output_json_file_name: Path) -> None:
+
+    def dump_to_json(gha_records: List[GHARecord], output_path: Path):
+        # Make output directory if it doesn't exist
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Make data JSON serializable
+        gha_record_dicts = list(map(lambda x: x.__dict__, gha_records))
+        with open(output_path, 'w+') as f:
+            json.dump(gha_record_dicts, f, indent=4)
+
+    json_file_paths = input_directory.glob('*.json')
+    tool_records: List[Tool_Record_T] = list(
+        reduce(lambda whole, part: whole + part,
+               (map(lambda json_file_path: list(process(json_file_path)),
+                    json_file_paths))))
+
+    bigger_is_better: List[GHARecord] = list(
+        map(
+            lambda tool_record: tool_record.record,
+            filter(
+                lambda tool_record: tool_record.tool == GHABenchmarkToolName.
+                BiggerIsBetter, tool_records)))
+
+    smaller_is_better: List[GHARecord] = list(
+        map(
+            lambda tool_record: tool_record.record,
+            filter(
+                lambda tool_record: tool_record.tool == GHABenchmarkToolName.
+                SmallerIsBetter, tool_records)))
+
+    dump_to_json(bigger_is_better, bigger_is_better_output_json_file_name)
+    dump_to_json(smaller_is_better, smaller_is_better_output_json_file_name)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="""
+        Process the benchmark JSONs produced by BenchmarkResult and output JSONs
+        that could be consumed by `github-action-benchmark`
+        Reference : https://github.com/benchmark-action/github-action-benchmark
+        """)
+
+    parser.add_argument("-i",
+                        "--input-json-directory",
+                        required=True,
+                        type=str,
+                        help="""
+            Path to the directory containing BenchmarkResult jsons.
+            This is typically the output directory passed to the benchmark
+            runner scripts like neuralmagic/benchmarks/run_benchmarks.py.
+        """)
+
+    parser.add_argument("--bigger-is-better-output-file-path",
+                        type=str,
+                        required=True,
+                        help="""
+            An output file path, where the GHABenchmarkToolName BiggerIsBetter metrics are to be stored.
+                        """)
+
+    parser.add_argument("--smaller-is-better-output-file-path",
+                        type=str,
+                        required=True,
+                        help="""
+            An output file path, where the GHABenchmarkToolName SmallerIsBetter metrics are to be stored
+                        """)
+
+    args = parser.parse_args()
+
+    main(Path(args.input_json_directory),
+         Path(args.bigger_is_better_output_file_path),
+         Path(args.smaller_is_better_output_file_path))