Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: parametrize bench mark tests #4974

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
30 changes: 28 additions & 2 deletions tests/framework/ab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
does not block PRs). If not, it fails, preventing PRs from introducing new vulnerable dependencies.
"""
import statistics
from pathlib import Path
from pathlib import Path, PosixPath
from tempfile import TemporaryDirectory
from typing import Callable, List, Optional, TypeVar
from typing import Callable, Generator, List, Optional, Tuple, TypeVar

import scipy

Expand Down Expand Up @@ -98,6 +98,32 @@ def git_ab_test(
return result_a, result_b, comparison


def git_clone_ab_dirs(
a_revision: str = DEFAULT_A_REVISION,
b_revision: Optional[str] = None,
) -> Generator[Tuple[PosixPath, PosixPath], None, None]:
"""
Prepare cloned Git repository to run A/B tests.

:param a_revision: The revision to checkout for the "A" part of the test. Defaults to the pull request target branch
if run in CI, and "main" otherwise.
:param b_revision: The git revision to check out for "B" part of the test. Defaults to whatever is currently checked
out (in which case no temporary directory will be created).
"""

with TemporaryDirectory() as tmp_dir:
dir_a = git_clone(Path(tmp_dir) / a_revision, a_revision)

if b_revision:
dir_b = git_clone(Path(tmp_dir) / b_revision, b_revision)
else:
# By default, pytest execution happens inside the `tests` subdirectory. Pass the repository root, as
# documented.
dir_b = Path.cwd().parent

yield (dir_a, dir_b)


DEFAULT_A_DIRECTORY = FC_WORKSPACE_DIR / "build" / "main"
DEFAULT_B_DIRECTORY = FC_WORKSPACE_DIR / "build" / "cargo_target" / DEFAULT_TARGET_DIR

Expand Down
152 changes: 97 additions & 55 deletions tests/integration_tests/performance/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,93 +2,135 @@
# SPDX-License-Identifier: Apache-2.0
"""Optional benchmarks-do-not-regress test"""
import contextlib
import json
import logging
import platform
import re
import shutil
from pathlib import Path
from typing import Callable, List

import pytest

from framework import utils
from framework.ab_test import git_ab_test
from framework.ab_test import binary_ab_test, git_clone_ab_dirs
from host_tools.cargo_build import cargo

LOGGER = logging.getLogger(__name__)
git_clone_ab_dirs_one_time = pytest.fixture(git_clone_ab_dirs, scope="class")


@pytest.mark.no_block_pr
@pytest.mark.timeout(900)
def test_no_regression_relative_to_target_branch():
def get_benchmark_names() -> List[str]:
"""
Run the microbenchmarks in this repository, comparing results from pull
request target branch against what's achieved on HEAD
Get a list of benchmark test names
"""
git_ab_test(run_criterion, compare_results)

_, stdout, _ = cargo(
"bench",
f"--workspace --quiet --target {platform.machine()}-unknown-linux-musl",
"--list",
)

# Format a string like `page_fault #2: benchmark` to a string like `page_fault`.
benchmark_names = [
re.sub(r"\s#([0-9]*)", "", i.split(":")[0])
for i in stdout.split("\n")
if i.endswith(": benchmark")
]

def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
return list(set(benchmark_names))


class TestBenchMarks:
"""
Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU
This class is used to prevent fixtures from being executed for each parameter in
a parametrize test.
"""
baseline_name = "a_baseline" if is_a else "b_baseline"

with contextlib.chdir(firecracker_checkout):
# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
# extract the path to the compiled benchmark binary.
_, stdout, _ = cargo(
"bench",
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
@pytest.mark.no_block_pr
@pytest.mark.parametrize("benchname", get_benchmark_names())
def test_no_regression_relative_to_target_branch(
self, benchname, git_clone_ab_dirs_one_time
):
"""
Run the microbenchmarks in this repository, comparing results from pull
request target branch against what's achieved on HEAD
"""

dir_a = git_clone_ab_dirs_one_time[0]
dir_b = git_clone_ab_dirs_one_time[1]
run_criterion = get_run_criterion(benchname)
compare_results = get_compare_results(benchname)

binary_ab_test(
test_runner=run_criterion,
comparator=compare_results,
a_directory=dir_a,
b_directory=dir_b,
)

executables = []
for line in stdout.split("\n"):
if line:
msg = json.loads(line)
executable = msg.get("executable")
if executable:
executables.append(executable)

for executable in executables:
def get_run_criterion(benchmark_name) -> Callable[[Path, bool], Path]:
"""
Get function that executes specified benchmarks, and running them pinned to some CPU
"""

def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
baseline_name = "a_baseline" if is_a else "b_baseline"

with contextlib.chdir(firecracker_checkout):
utils.check_output(
f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
f"taskset -c 1 cargo bench --workspace --quiet -- {benchmark_name} --exact --save-baseline {baseline_name}"
)

return firecracker_checkout / "build" / "cargo_target" / "criterion"
return firecracker_checkout / "build" / "cargo_target" / "criterion"

return _run_criterion

def compare_results(location_a_baselines: Path, location_b_baselines: Path):
"""Compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main"""
for benchmark in location_b_baselines.glob("*"):
data = json.loads(
(benchmark / "b_baseline" / "estimates.json").read_text("utf-8")
)

average_ns = data["mean"]["point_estimate"]
def get_compare_results(benchmark_name) -> Callable[[Path, Path], None]:
"""
Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
"""

def _compare_results(location_a_baselines: Path, location_b_baselines: Path):

LOGGER.info("%s mean: %iµs", benchmark.name, average_ns / 1000)
_, stdout, _ = cargo(
"bench",
f"--workspace --target {platform.machine()}-unknown-linux-musl --quiet",
f"--exact {benchmark_name} --list",
)

# Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
# Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
bench_mark_targets = [
re.sub(r"\s#(?P<sub_id>[0-9]*)", r"_\g<sub_id>", i.split(":")[0])
for i in stdout.split("\n")
if i.endswith(": benchmark")
]

# If benchmark test has multiple targets, the results of a single benchmark test will be output to multiple directories.
# For example, `page_fault` and `page_fault_2`.
# We need copy benchmark results each directories.
for bench_mark_target in bench_mark_targets:
shutil.copytree(
location_a_baselines / bench_mark_target / "a_baseline",
location_b_baselines / bench_mark_target / "a_baseline",
)

# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
# to do the comparison
for benchmark in location_a_baselines.glob("*"):
shutil.copytree(
benchmark / "a_baseline",
location_b_baselines / benchmark.name / "a_baseline",
_, stdout, _ = cargo(
"bench",
f"--workspace --target {platform.machine()}-unknown-linux-musl",
f"{benchmark_name} --baseline a_baseline --load-baseline b_baseline",
)

_, stdout, _ = cargo(
"bench",
f"--all --target {platform.machine()}-unknown-linux-musl",
"--baseline a_baseline --load-baseline b_baseline",
)
regressions_only = "\n\n".join(
result
for result in stdout.split("\n\n")
if "Performance has regressed." in result
)

regressions_only = "\n\n".join(
result
for result in stdout.split("\n\n")
if "Performance has regressed." in result
)
# If this string is anywhere in stdout, then at least one of our benchmarks
# is now performing worse with the PR changes.
assert not regressions_only, "\n" + regressions_only

# If this string is anywhere in stdout, then at least one of our benchmarks
# is now performing worse with the PR changes.
assert not regressions_only, "\n" + regressions_only
return _compare_results