firecracker-microvm · cm-iwata · Dec 30, 2024 · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025
diff --git a/tests/framework/ab_test.py b/tests/framework/ab_test.py
@@ -22,9 +22,9 @@
 does not block PRs). If not, it fails, preventing PRs from introducing new vulnerable dependencies.
 """
 import statistics
-from pathlib import Path
+from pathlib import Path, PosixPath
 from tempfile import TemporaryDirectory
-from typing import Callable, List, Optional, TypeVar
+from typing import Callable, Generator, List, Optional, Tuple, TypeVar
 
 import scipy
 
@@ -98,6 +98,32 @@ def git_ab_test(
         return result_a, result_b, comparison
 
 
+def git_clone_ab_dirs(
+    a_revision: str = DEFAULT_A_REVISION,
+    b_revision: Optional[str] = None,
+) -> Generator[Tuple[PosixPath, PosixPath], None, None]:
+    """
+    Prepare cloned Git repository to run A/B tests.
+
+    :param a_revision: The revision to checkout for the "A" part of the test. Defaults to the pull request target branch
+                       if run in CI, and "main" otherwise.
+    :param b_revision: The git revision to check out for "B" part of the test. Defaults to whatever is currently checked
+                       out (in which case no temporary directory will be created).
+    """
+
+    with TemporaryDirectory() as tmp_dir:
+        dir_a = git_clone(Path(tmp_dir) / a_revision, a_revision)
+
+        if b_revision:
+            dir_b = git_clone(Path(tmp_dir) / b_revision, b_revision)
+        else:
+            # By default, pytest execution happens inside the `tests` subdirectory. Pass the repository root, as
+            # documented.
+            dir_b = Path.cwd().parent
+
+        yield (dir_a, dir_b)
+
+
 DEFAULT_A_DIRECTORY = FC_WORKSPACE_DIR / "build" / "main"
 DEFAULT_B_DIRECTORY = FC_WORKSPACE_DIR / "build" / "cargo_target" / DEFAULT_TARGET_DIR
 

diff --git a/tests/integration_tests/performance/test_benchmarks.py b/tests/integration_tests/performance/test_benchmarks.py
@@ -2,93 +2,135 @@
 # SPDX-License-Identifier: Apache-2.0
 """Optional benchmarks-do-not-regress test"""
 import contextlib
-import json
 import logging
 import platform
+import re
 import shutil
 from pathlib import Path
+from typing import Callable, List
 
 import pytest
 
 from framework import utils
-from framework.ab_test import git_ab_test
+from framework.ab_test import binary_ab_test, git_clone_ab_dirs
 from host_tools.cargo_build import cargo
 
 LOGGER = logging.getLogger(__name__)
+git_clone_ab_dirs_one_time = pytest.fixture(git_clone_ab_dirs, scope="class")
 
 
-@pytest.mark.no_block_pr
-@pytest.mark.timeout(900)
-def test_no_regression_relative_to_target_branch():
+def get_benchmark_names() -> List[str]:
     """
-    Run the microbenchmarks in this repository, comparing results from pull
-    request target branch against what's achieved on HEAD
+    Get a list of benchmark test names
     """
-    git_ab_test(run_criterion, compare_results)
 
+    _, stdout, _ = cargo(
+        "bench",
+        f"--workspace --quiet --target {platform.machine()}-unknown-linux-musl",
+        "--list",
+    )
+
+    # Format a string like `page_fault #2: benchmark` to a string like `page_fault`.
+    benchmark_names = [
+        re.sub(r"\s#([0-9]*)", "", i.split(":")[0])
+        for i in stdout.split("\n")
+        if i.endswith(": benchmark")
+    ]
 
-def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
+    return list(set(benchmark_names))
+
+
+class TestBenchMarks:
     """
-    Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU
+    This class is used to prevent fixtures from being executed for each parameter in
+    a parametrize test.
     """
-    baseline_name = "a_baseline" if is_a else "b_baseline"
 
-    with contextlib.chdir(firecracker_checkout):
-        # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
-        # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
-        # extract the path to the compiled benchmark binary.
-        _, stdout, _ = cargo(
-            "bench",
-            f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
+    @pytest.mark.no_block_pr
+    @pytest.mark.parametrize("benchname", get_benchmark_names())
+    def test_no_regression_relative_to_target_branch(
+        self, benchname, git_clone_ab_dirs_one_time
+    ):
+        """
+        Run the microbenchmarks in this repository, comparing results from pull
+        request target branch against what's achieved on HEAD
+        """
+
+        dir_a = git_clone_ab_dirs_one_time[0]
+        dir_b = git_clone_ab_dirs_one_time[1]
+        run_criterion = get_run_criterion(benchname)
+        compare_results = get_compare_results(benchname)
+
+        binary_ab_test(
+            test_runner=run_criterion,
+            comparator=compare_results,
+            a_directory=dir_a,
+            b_directory=dir_b,
         )
 
-        executables = []
-        for line in stdout.split("\n"):
-            if line:
-                msg = json.loads(line)
-                executable = msg.get("executable")
-                if executable:
-                    executables.append(executable)
 
-        for executable in executables:
+def get_run_criterion(benchmark_name) -> Callable[[Path, bool], Path]:
+    """
+    Get function that executes specified benchmarks, and running them pinned to some CPU
+    """
+
+    def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
+        baseline_name = "a_baseline" if is_a else "b_baseline"
+
+        with contextlib.chdir(firecracker_checkout):
             utils.check_output(
-                f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
+                f"taskset -c 1 cargo bench --workspace --quiet -- {benchmark_name} --exact --save-baseline {baseline_name}"
             )
 
-    return firecracker_checkout / "build" / "cargo_target" / "criterion"
+        return firecracker_checkout / "build" / "cargo_target" / "criterion"
 
+    return _run_criterion
 
-def compare_results(location_a_baselines: Path, location_b_baselines: Path):
-    """Compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main"""
-    for benchmark in location_b_baselines.glob("*"):
-        data = json.loads(
-            (benchmark / "b_baseline" / "estimates.json").read_text("utf-8")
-        )
 
-        average_ns = data["mean"]["point_estimate"]
+def get_compare_results(benchmark_name) -> Callable[[Path, Path], None]:
+    """
+    Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
+    """
+
+    def _compare_results(location_a_baselines: Path, location_b_baselines: Path):
 
-        LOGGER.info("%s mean: %iµs", benchmark.name, average_ns / 1000)
+        _, stdout, _ = cargo(
+            "bench",
+            f"--workspace --target {platform.machine()}-unknown-linux-musl --quiet",
+            f"--exact {benchmark_name} --list",
+        )
+
+        # Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
+        # Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
+        bench_mark_targets = [
+            re.sub(r"\s#(?P<sub_id>[0-9]*)", r"_\g<sub_id>", i.split(":")[0])
+            for i in stdout.split("\n")
+            if i.endswith(": benchmark")
+        ]
+
+        # If benchmark test has multiple targets, the results of a single benchmark test will be output to multiple directories.
+        # For example, `page_fault` and `page_fault_2`.
+        # We need copy benchmark results each directories.
+        for bench_mark_target in bench_mark_targets:
+            shutil.copytree(
+                location_a_baselines / bench_mark_target / "a_baseline",
+                location_b_baselines / bench_mark_target / "a_baseline",
+            )
 
-    # Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
-    # to do the comparison
-    for benchmark in location_a_baselines.glob("*"):
-        shutil.copytree(
-            benchmark / "a_baseline",
-            location_b_baselines / benchmark.name / "a_baseline",
+        _, stdout, _ = cargo(
+            "bench",
+            f"--workspace --target {platform.machine()}-unknown-linux-musl",
+            f"{benchmark_name} --baseline a_baseline --load-baseline b_baseline",
         )
 
-    _, stdout, _ = cargo(
-        "bench",
-        f"--all --target {platform.machine()}-unknown-linux-musl",
-        "--baseline a_baseline --load-baseline b_baseline",
-    )
+        regressions_only = "\n\n".join(
+            result
+            for result in stdout.split("\n\n")
+            if "Performance has regressed." in result
+        )
 
-    regressions_only = "\n\n".join(
-        result
-        for result in stdout.split("\n\n")
-        if "Performance has regressed." in result
-    )
+        # If this string is anywhere in stdout, then at least one of our benchmarks
+        # is now performing worse with the PR changes.
+        assert not regressions_only, "\n" + regressions_only
 
-    # If this string is anywhere in stdout, then at least one of our benchmarks
-    # is now performing worse with the PR changes.
-    assert not regressions_only, "\n" + regressions_only
+    return _compare_results