From d2158e68586283efc90ed592f7a9ab3c70fbfc8f Mon Sep 17 00:00:00 2001 From: F-G Fernandez <26927750+frgfm@users.noreply.github.com> Date: Sun, 7 Jan 2024 20:31:12 +0100 Subject: [PATCH] feat: Improves latency script by adding ONNX (#288) * feat: Improves latency script * ci: Fixes CI for latency script * ci: Fixes CI for latency script --- .github/workflows/scripts.yml | 1 + scripts/eval_latency.py | 90 +++++++++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 21 deletions(-) diff --git a/.github/workflows/scripts.yml b/.github/workflows/scripts.yml index 5d5c19db8..e530de478 100644 --- a/.github/workflows/scripts.yml +++ b/.github/workflows/scripts.yml @@ -30,6 +30,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -e . --upgrade + pip install onnx onnxruntime - name: Run analysis script run: python scripts/eval_latency.py rexnet1_0x diff --git a/scripts/eval_latency.py b/scripts/eval_latency.py index 9b3a27454..8ba3e046a 100644 --- a/scripts/eval_latency.py +++ b/scripts/eval_latency.py @@ -11,47 +11,94 @@ import time import numpy as np +import onnxruntime import torch from holocron import models @torch.inference_mode() -def main(args): - if args.device is None: - args.device = "cuda:0" if torch.cuda.is_available() else "cpu" - - device = torch.device(args.device) +def run_evaluation( + model: torch.nn.Module, img_tensor: torch.Tensor, num_it: int = 100, warmup_it: int = 10 +) -> np.array: + # Warmup + for _ in range(warmup_it): + _ = model(img_tensor) - # Pretrained imagenet model - model = models.__dict__[args.arch](pretrained=args.pretrained).eval().to(device=device) + timings = [] - # RepVGG - if args.arch.startswith("repvgg") or args.arch.startswith("mobileone"): - model.reparametrize() + # Evaluation runs + for _ in range(num_it): + start_ts = time.perf_counter() + _ = model(img_tensor) + timings.append(time.perf_counter() - start_ts) - # Compile (using tensor cores) - torch.set_float32_matmul_precision("high") - model = torch.compile(model) + return np.array(timings) - # Input - img_tensor = torch.rand((1, 3, args.size, args.size)).to(device=device) +def run_onnx_evaluation( + model: onnxruntime.InferenceSession, img_tensor: np.array, num_it: int = 100, warmup_it: int = 10 +) -> np.array: + # Set input + ort_input = {model.get_inputs()[0].name: img_tensor} # Warmup - for _ in range(10): - _ = model(img_tensor) + for _ in range(warmup_it): + _ = model.run(None, ort_input) timings = [] # Evaluation runs - for _ in range(args.it): + for _ in range(num_it): start_ts = time.perf_counter() - _ = model(img_tensor) + _ = model.run(None, ort_input) timings.append(time.perf_counter() - start_ts) - _timings = np.array(timings) + return np.array(timings) + + +@torch.inference_mode() +def main(args): + # Pretrained imagenet model + model = models.__dict__[args.arch](pretrained=args.pretrained).eval() + # Reparametrizable models + if args.arch.startswith("repvgg") or args.arch.startswith("mobileone"): + model.reparametrize() + + # Input + img_tensor = torch.rand((1, 3, args.size, args.size)) + + _timings = run_evaluation(model, img_tensor, args.it) + cpu_str = f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms" + + # ONNX + torch.onnx.export( + model, + img_tensor, + "tmp.onnx", + export_params=True, + opset_version=14, + ) + onnx_session = onnxruntime.InferenceSession("tmp.onnx") + npy_tensor = img_tensor.numpy() + _timings = run_onnx_evaluation(onnx_session, npy_tensor, args.it) + onnx_str = f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms" + + # GPU + if args.device is None: + args.device = "cuda:0" if torch.cuda.is_available() else "cpu" + if args.device == "cpu": + gpu_str = "N/A" + else: + device = torch.device(args.device) + model = model.to(device=device) + + # Input + img_tensor = img_tensor.to(device=device) + _timings = run_evaluation(model, img_tensor, args.it) + gpu_str = f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms" + print(f"{args.arch} ({args.it} runs on ({args.size}, {args.size}) inputs)") - print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms") + print(f"CPU - {cpu_str}\nONNX - {onnx_str}\nGPU - {gpu_str}") if __name__ == "__main__": @@ -62,6 +109,7 @@ def main(args): parser.add_argument("--size", type=int, default=224, help="The image input size") parser.add_argument("--device", type=str, default=None, help="Default device to perform computation on") parser.add_argument("--it", type=int, default=100, help="Number of iterations to run") + parser.add_argument("--warmup", type=int, default=10, help="Number of iterations for warmup") parser.add_argument( "--pretrained", dest="pretrained", help="Use pre-trained models from the modelzoo", action="store_true" )