Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark Runner Scripts #168

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions util/cpu2017/buildspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash
: ${SPECTUNE:=base}
set -e
runcpu "-config=$CFGFILE" "--tune=$SPECTUNE" -a scrub all
runcpu "-config=$CFGFILE" "--tune=$SPECTUNE" -a build "$@"
4 changes: 4 additions & 0 deletions util/cpu2017/cpexecresults.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
grep -E '\.(csv|rsf|txt)' "$@" | # Find every .csv, .rsf, or .txt mentioned in the logs
sed -E 's/^\s*format:.*-> (.*)$/\1/g' | # Extract the filenames for that
xargs -L1 -I {} cp {} . # Take each file and copy it to the current directory
9 changes: 9 additions & 0 deletions util/cpu2017/cplogs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

# Given a single file "$1.*.log" containing the stdout from several runs,
# extracts the generated CPU2017.*.log files and copies them to a subdirectory "$1.logs/"
set -e
mkdir -p "$1.logs/"
grep 'The log for this run is in' "$1".*.log | sed -E 's/^.*(CPU.*\.log).*$/\1/g' | xargs -I {} cp /home/cpu2017/result/{} "$1.logs/"
(cd "$1.logs"; grep ' Building' * | sed -E 's/^(\S*\.log): Building ([^ ]*).*$/\1 \2.log/' | xargs -n2 mv)
rm "$1".logs/CPU*
10 changes: 10 additions & 0 deletions util/cpu2017/extract-benchs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

# Given a file "$1.*.log", splits it into one log file per benchmark, placed in "$1.logs/"

set -e
mkdir -p "$1.logs/"
cd "$1.logs/"
csplit ../"$1".*.log '/^ Building/' '{*}'
grep ' Building' * | sed -E 's/^(\S*): Building ([^ ]*).*$/\1 \2.log/' | xargs -n2 mv
rm xx*
6 changes: 6 additions & 0 deletions util/cpu2017/runspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash
: ${SPECTUNE:=base}
: ${SPECITERNUM:=3}
set -e
runcpu "-config=$CFGFILE" "--tune=$SPECTUNE" -n "$SPECITERNUM" -a scrub all
runcpu "-config=$CFGFILE" "--tune=$SPECTUNE" -n "$SPECITERNUM" -a run "$@"
2 changes: 2 additions & 0 deletions util/runners/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import multirun
from . import runwith
2 changes: 2 additions & 0 deletions util/runners/datename
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
date +'%F.%H.%M'
149 changes: 149 additions & 0 deletions util/runners/multirun.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
import argparse
import os
import shlex
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import json
import itertools
import functools

from runners import runwith

# %% Setup


def expand_matrix(matrix: List[Dict[str, Dict[str, str]]]) -> Tuple[List[Dict[str, str]], List[str]]:
withs = []
labels = []
from pprint import pprint

def merge_dict(a: dict, b: dict) -> dict:
assert not (a.keys() & b.keys())
a = a.copy()
a.update(b)
return a

for setting in itertools.product(*matrix):
withs_setting = functools.reduce(merge_dict, (obj[opt] for opt, obj in zip(setting, matrix)))
label = '-'.join(setting)

withs.append(withs_setting)
labels.append(label)

return withs, labels


def main(outdir: Path, optsched_cfg: Path, labels: List[str], withs: List[str], cmd: List[str], append_logs: bool = False, git_state: Optional[str] = None, validate_cmd: Optional[str] = None, analyze_cmds: List[str] = [], analyze_files: List[str] = [], matrix: List[Path] = []):
if withs is not None:
withs: List[Dict[str, str]] = [runwith.parse_withs(with_) for with_ in withs]
else:
assert matrix
assert not labels

withs = []
labels = []

for matrix_path in matrix:
matrix_json = json.loads(matrix_path.read_text())
new_withs, new_labels = expand_matrix(matrix_json)
withs += new_withs
labels += new_labels

assert len(labels) == len(withs)
assert not analyze_files or len(analyze_files) == len(analyze_cmds)

outdir = outdir.resolve()
logfiles = []

for label, with_ in zip(labels, withs):
print(f'Running {label} with settings:', ' '.join(f'{k}={v}' for k, v in with_.items()))
logfile = runwith.main(
outdir=outdir,
optsched_cfg=optsched_cfg,
label=label,
with_=with_,
cmd=[arg.replace('{{label}}', label) for arg in cmd],
append_logs=append_logs,
git_state=git_state,
)
logfiles.append(logfile)

if validate_cmd:
val_cmd = shlex.split(validate_cmd, comments=True)
if not validate_cmd.endswith('#'):
val_cmd += map(str, logfiles)
subprocess.run(subprocess.list2cmdline(val_cmd), cwd=outdir, check=True, shell=True)

if not analyze_files:
analyze_files = [None] * len(analyze_cmds)

for analyze_cmd, outfile in zip(analyze_cmds, analyze_files):
analyze_run = shlex.split(analyze_cmd, comments=True)
if not analyze_cmd.endswith('#'):
analyze_run += map(str, logfiles)
result = subprocess.run(subprocess.list2cmdline(analyze_run), cwd=outdir,
capture_output=True, encoding='utf-8', shell=True)
if result.returncode != 0:
print(
f'Analysis command {subprocess.list2cmdline(analyze_run)} failed with error code: {result.returncode}', file=sys.stderr)

print(result.stdout)
print(result.stderr, file=sys.stderr)
if outfile:
with open(outdir / outfile, 'w') as f:
f.write(result.stdout)


# %% Main
if __name__ == '__main__':
OPTSCHEDCFG = os.getenv('OPTSCHEDCFG')
RUN_CMD = os.getenv('RUN_CMD')
RUN_CMD = shlex.split(RUN_CMD) if RUN_CMD else RUN_CMD
VALIDATE_CMD = os.getenv('VALIDATE_CMD')
ANALYZE_CMD = os.getenv('ANALYZE_CMD')
RUNNER_GIT_REPO = os.getenv('RUNNER_GIT_REPO')

parser = argparse.ArgumentParser(description='Run the commands with the sched.ini settings')
parser.add_argument('-c', '--optsched-cfg',
required=OPTSCHEDCFG is None,
default=OPTSCHEDCFG,
help='The path to the optsched config to use. Defaults to the env variable OPTSCHEDCFG if it exists, else is required. The sched.ini is expected to be there')
parser.add_argument('-o', '--outdir', required=True, help='The path to place the output files at')
parser.add_argument('-L', '--labels', default='',
help='Comma separated labels to use for these runs. Must be equal to the number of --with flags. Any parts of the run command <cmd> will have the string {{label}} replaced with the label for the run.')
parser.add_argument('--with', nargs='*', action='append', metavar='KEY=VALUE',
help="The sched.ini settings to set for each run. Each run's settings should have a new --with flag.")
parser.add_argument('--matrix', type=Path, action='append', metavar='MATRIX.json',
help='A json file containing a matrix of configuration values to act as-if their product was specified via --with and --labels')
parser.add_argument(
'cmd', nargs='+', help='The command (with args) to run. Use - to default to the environment variable RUN_CMD.')
parser.add_argument('--append', action='store_true',
help='Allow a <label>.log file to exist, appending to it if so')
parser.add_argument('--git-state', default=RUNNER_GIT_REPO,
help='The path to a git repository to snapshot its state in our <outdir>. Defaults to the environment variable RUNNER_GIT_REPO if set. If not present, no git status will be generated.')

parser.add_argument('--validate', default=VALIDATE_CMD,
help='The command (single string) to run after all runs to validate that the runs were correct. Defaults to the env variable VALIDATE_CMD. The output log files will be passed to the command, one additional arg for each run. To skip this, end the command with a bash comment #')
parser.add_argument('--analyze', nargs='*', default=[ANALYZE_CMD] if ANALYZE_CMD else [],
help='The commands (each a single string) to run after all runs to analyze the runs and produce output. Defaults to the single command from the env variable ANALYZE_CMD. The output log files will be passed to each command, one additional arg for each run. To skip this, end the command with a bash comment #')
parser.add_argument('--analyze-files',
help='The filenames to place the stdout of each analyze command, comma separated.')

args = parser.parse_args()

main(
outdir=Path(args.outdir),
optsched_cfg=Path(args.optsched_cfg),
labels=list(filter(bool, args.labels.split(','))),
withs=getattr(args, 'with'),
matrix=args.matrix,
cmd=args.cmd if args.cmd != '-' else RUN_CMD,
append_logs=args.append,
git_state=args.git_state,
validate_cmd=args.validate,
analyze_cmds=args.analyze,
analyze_files=args.analyze_files.split(',') if args.analyze_files else [],
)
160 changes: 160 additions & 0 deletions util/runners/runwith.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python3
import argparse
import os
import re
import shlex
import shutil
import subprocess
from pathlib import Path
from typing import Callable, Dict, Iterable, List, Optional

# %% Setup


class InvalidSchedIniSettingError(Exception):
def __init__(self, message: str, keys: List[str]):
self.keys = keys
super().__init__(f'{message}: keys={keys}')


class LogFileExistsError(FileExistsError):
pass


class GitStateChangedError(Exception):
def __init__(self, message: str, old: Path, new: Path):
self.old = old
self.new = new
super().__init__(f'{message}: old={old}, new={new}')


def setup_outdir(outdir: Path, optsched_cfg: Path, keys: Iterable[str], git_state: str):
outdir.mkdir(parents=True, exist_ok=True)
if not (outdir / 'optsched-cfg').exists():
shutil.copytree(optsched_cfg, outdir / 'optsched-cfg')

if git_state:
p: Path = outdir / 'git.status'
p2: Path = outdir / 'git.status.2'
with open(p2, 'w') as f:
f.write(git_state)

if p.exists():
with open(p, 'r') as f:
if f.read() != git_state:
raise GitStateChangedError(
'Git state changed between runs! If this was intended, delete the old git.status file. See state files', p, p2)
os.remove(p)

p2.rename(p)

for key in keys:
p: Path = outdir / key
p.touch()


def edit_sched_ini(sched_ini: str, with_: Dict[str, str]) -> str:
missing_keys = []

for key, value in with_.items():
kv_re = re.compile(rf'(?<=^{key} ).*$', flags=re.MULTILINE)
if not kv_re.search(sched_ini):
missing_keys.append(key)
sched_ini = kv_re.sub(value, sched_ini)

if missing_keys:
raise InvalidSchedIniSettingError('Unable to find these keys in the sched.ini file', missing_keys)

return sched_ini


def save_sched_ini(outdir: Path, sched_ini: Path, label: str):
shutil.copy(sched_ini, outdir / f'{label}.sched.ini')


def edit_file(path: Path, edit: Callable[[str], str]):
# assert path.is_file()
with open(path, 'r+') as f:
contents = edit(f.read())
f.seek(0)
f.truncate()
f.write(contents)


def run_cmd(cmd: List[str], outdir: Path, label: str, logmode='w'):
logfile = outdir / f'{label}.log'
if logfile.exists() and logmode == 'w':
raise LogFileExistsError(
f'File already exists. Either use a fresh output directory, or specify that we should append to the file: {logfile}')

with open(outdir / f'{label}.log', logmode) as f:
subprocess.run(cmd, stdout=f, stderr=subprocess.STDOUT, cwd=outdir, check=True)

return logfile


def get_git_state(git_state: Optional[str]) -> str:
if not git_state:
return ''

git_repo = str(Path(git_state).resolve())
commit = subprocess.run(['git', '-C', git_repo, 'log', '-n1'], encoding='utf-8', capture_output=True, check=True)
status = subprocess.run(['git', '-C', git_repo, 'status'], encoding='utf-8', capture_output=True, check=True)
diff = subprocess.run(['git', '-C', git_repo, 'diff'], encoding='utf-8', capture_output=True, check=True)

return f'{git_repo}\n{commit.stdout}\n\n{status.stdout}\n\n{diff.stdout}'


def main(outdir: Path, optsched_cfg: Path, label: str, with_: Dict[str, str], cmd: List[str], append_logs: bool = False, git_state: Optional[str] = None):
outdir = outdir.resolve()
optsched_cfg = optsched_cfg.resolve()

git_state = get_git_state(git_state)
setup_outdir(outdir, optsched_cfg, with_.keys(), git_state)

sched_ini = optsched_cfg / 'sched.ini'
if with_:
edit_file(sched_ini, lambda f: edit_sched_ini(f, with_))
save_sched_ini(outdir, sched_ini, label)

return run_cmd(cmd, outdir, label, logmode='a' if append_logs else 'w')


def parse_withs(withs: List[str]) -> Dict[str, str]:
return dict(with_.split('=', maxsplit=1) for with_ in withs)


# %% Main
if __name__ == '__main__':
OPTSCHEDCFG = os.getenv('OPTSCHEDCFG')
RUN_CMD = os.getenv('RUN_CMD')
RUN_CMD = shlex.split(RUN_CMD) if RUN_CMD else RUN_CMD
RUNNER_GIT_REPO = os.getenv('RUNNER_GIT_REPO')

parser = argparse.ArgumentParser(description='Run the commands with the sched.ini settings')
parser.add_argument('-c', '--optsched-cfg',
required=OPTSCHEDCFG is None,
default=OPTSCHEDCFG,
help='The path to the optsched config to use. Defaults to the env variable OPTSCHEDCFG if it exists, else is required. The sched.ini is expected to be there')
parser.add_argument('-o', '--outdir', required=True, help='The path to place the output files at')
parser.add_argument('-L', '--label', required=True,
help='A label for this run, used in the output directory and for namespacing.')
parser.add_argument('--with', nargs='*', metavar='KEY=VALUE', help='The sched.ini settings to set.')
parser.add_argument(
'cmd', nargs='+', help='The command (with args) to run. Use - to default to the environment variable RUN_CMD.')
parser.add_argument('--append', action='store_true',
help='Allow a <label>.log file to exist, appending to it if so')
parser.add_argument('--git-state', default=RUNNER_GIT_REPO,
help='The path to a git repository to snapshot its state in our <outdir>. Defaults to the environment variable RUNNER_GIT_REPO if set. If not present, no git status will be generated.')

args = parser.parse_args()

main(
outdir=Path(args.outdir),
optsched_cfg=Path(args.optsched_cfg),
label=args.label,
with_=parse_withs(getattr(args, 'with') if getattr(args, 'with') is not None else []),
cmd=args.cmd if args.cmd != '-' else RUN_CMD,
append_logs=args.append,
git_state=args.git_state,
)
43 changes: 43 additions & 0 deletions util/runners/shocrunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python3

import argparse
import os
import subprocess
from pathlib import Path
from typing import Dict


def main(shocdriver: Path, outdir: Path, shoc_args: Dict[str, str]):
outdir = outdir.resolve()
if not outdir.exists():
outdir.mkdir()

cmd = [str(shocdriver), '-opencl']
for k, v in shoc_args.items():
cmd.append(k)
cmd.append(v)

subprocess.run(cmd + ['-benchmark', 'FFT'], check=True, cwd=outdir)
subprocess.run(cmd + ['-benchmark', 'GEMM'], check=True, cwd=outdir)
subprocess.run(cmd + ['-benchmark', 'MD'], check=True, cwd=outdir)
subprocess.run(cmd + ['-benchmark', 'Sort'], check=True, cwd=outdir)
subprocess.run(cmd + ['-benchmark', 'Spmv'], check=True, cwd=outdir)
subprocess.run(cmd + ['-benchmark', 'Stencil2D'], check=True, cwd=outdir)


if __name__ == '__main__':
SHOCDRIVER = os.getenv('SHOCDRIVER')
parser = argparse.ArgumentParser(description='Run the SHOC benchmarks')
parser.add_argument('--shocdriver', default=SHOCDRIVER, required=SHOCDRIVER is None,
help='The path the the shocdriver executable')
parser.add_argument('-o', '--outdir', required=True, help='The path to place the output files at')
parser.add_argument('-s', '--shoc-problem-size', default='4',
help='The SHOC problem size, passed on to the shocdriver')

args = parser.parse_args()

main(
shocdriver=Path(args.shocdriver),
outdir=Path(args.outdir),
shoc_args={'-s': args.shoc_problem_size},
)