Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GT analysis scripts #167

Open
wants to merge 45 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
6731d0a
Add script to find "negative blocks"
Quincunx271 Apr 9, 2021
ef9713e
Add script to compute block stats
Quincunx271 Apr 9, 2021
9c26a8d
Improve script to find negative blocks
Quincunx271 Apr 9, 2021
5939232
Transfer gt-analysis script
Quincunx271 Apr 9, 2021
425714c
Some more utilities
Quincunx271 Apr 16, 2021
059ec66
Fix some stats to be consistent with what they were before
Quincunx271 Apr 29, 2021
97e82da
Improve load_logs script
Quincunx271 Apr 30, 2021
f1c816a
Improve phrasing
Quincunx271 Apr 30, 2021
ebdacf7
Add metric for block cost
Quincunx271 Jun 1, 2021
848ef73
Remove irrelevant metrics
Quincunx271 Jun 1, 2021
6b45b92
[scripts] Allow multiple LBs & HeuristicResults
Quincunx271 Jun 16, 2021
84a07e6
Also calculate the number of nodes examined
Quincunx271 Jun 16, 2021
331b4a9
Change metrics again
Quincunx271 Jun 18, 2021
6c17e81
Fix zipping for duplicate blocks
Quincunx271 Jun 18, 2021
cebc547
Update zipped_keep_blocks_if to support empty case
Quincunx271 Jun 22, 2021
d8d99d1
Small improvement to compile time calculation
Quincunx271 Jun 22, 2021
81fe20a
Support pass filtering when recording compile time
Quincunx271 Jun 22, 2021
6dd293c
Adjust relative cost calculation
Quincunx271 Jun 22, 2021
8d305a8
Add plaidml total compile time analysis
Quincunx271 Jun 23, 2021
5199911
Properly zip cases where zip(*logs) is empty
Quincunx271 Jun 24, 2021
b3851d5
Add shoc total compile time analysis
Quincunx271 Jun 24, 2021
b5392cb
Fix validation test
Quincunx271 Jul 5, 2021
f3350b6
Provide a human-readable output format
Quincunx271 Jul 14, 2021
f862b89
Calculate time spent in LB + heuristic
Quincunx271 Jul 14, 2021
fea4e58
Transpose CSV output
Quincunx271 Jul 16, 2021
4aa6d7f
Add tool to combine sharded csv metrics
Quincunx271 Jul 17, 2021
64d6798
Add tool to combine exec-time results
Quincunx271 Jul 17, 2021
b36ad94
Add GT edge metrics, repeat metrics for opt blocks
Quincunx271 Jul 19, 2021
e087e9e
Allow multiple heuristic results to exist
Quincunx271 Jul 19, 2021
a2e7202
Fix validation-test for multiple lower bounds
Quincunx271 Jul 19, 2021
01bdd21
Fix validation-test args
Quincunx271 Jul 19, 2021
7a2702c
Include benchmark name in validation-test
Quincunx271 Jul 19, 2021
49ecbac
Support multi-run modes in combine-exec.py
Quincunx271 Jul 23, 2021
35de44c
Update validation-test.py for Adjusted PERP SCF
Quincunx271 Jul 26, 2021
4e41742
Have validation-test.py output which block failed
Quincunx271 Jul 26, 2021
d62e77b
Update validation-test.py to get spill cost
Quincunx271 Jul 26, 2021
c889dae
Revert "Update validation-test.py for Adjusted PERP SCF"
Quincunx271 Jul 30, 2021
38b4cff
Add functions to gather occupancy info
Quincunx271 Aug 4, 2021
0976ac6
Fix import utils
Quincunx271 Aug 4, 2021
da627e1
Add main function to func_stats
Quincunx271 Aug 4, 2021
208fa9f
Add spill func-level info functions
Quincunx271 Aug 7, 2021
cd0cb39
Add script to extract spill stats via regex
Quincunx271 Aug 11, 2021
088ded7
Fix weighted spill sum regex
Quincunx271 Aug 11, 2021
68e1d2e
Augment raw-spill-counts.py with hot-only filter
Quincunx271 Aug 13, 2021
cd53602
Encourage running load_logs with python -i
Quincunx271 Aug 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions util/CPU2006/combine-exec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3

from io import StringIO
import csv
import re
import sys
import argparse
from contextlib import ExitStack
from typing import Dict, Iterable, List, Tuple
from collections import Counter
from openpyxl import Workbook
from openpyxl.utils import get_column_letter


class DuplicateDataError(Exception):
def __init__(self, old, new, message):
self.old = old
self.new = new
self.message = message

super().__init__(f'{message} old: {old} -> new: {new}')


def is_blank_row(row: List[str]) -> bool:
return not row or all(cell in ('', 'NR') for cell in row[1:])


def merge_tables(str_tables: Iterable[str]) -> str:
data: Dict[str, List[List[str]]] = dict()
tables = [list(csv.reader(table.splitlines())) for table in str_tables]

for row in tables[0]:
if row:
data.setdefault(row[0], []).append(row)

for table in tables:
nth: Dict[str, int] = Counter()
for row in table:
if not is_blank_row(row):
index = nth[row[0]]
if row[0] in data:
if not is_blank_row(data[row[0]][index]) and data[row[0]][index] != row:
raise DuplicateDataError(data[row[0]][index], row, f'Duplicate data for {row[0]}.')
data[row[0]][index] = row
nth[row[0]] += 1

out = StringIO()
writer = csv.writer(out)
nth: Dict[str, int] = Counter()
for row in tables[0]:
if not row:
continue
index = nth[row[0]]
writer.writerow(data[row[0]][index])
nth[row[0]] += 1

return out.getvalue()


_RE_FOO_RESULTS_TABLE = re.compile(r'"(?P<tbl_name>\S+ Results) Table"')


def extract_tables(contents: str) -> Iterable[Tuple[str, str]]:
for m in _RE_FOO_RESULTS_TABLE.finditer(contents):
tbl_start = contents.find('\n\n', m.end()) + 1
tbl_end = contents.find('\n\n', tbl_start)
yield (m['tbl_name'], contents[tbl_start:tbl_end])


def main(files, out: str):
wb = Workbook()
files = [f.read() for f in files]
tbls = map(extract_tables, files)
for tbl_group in zip(*tbls):
assert len(set(name for name, _ in tbl_group)) == 1
ws = wb.create_sheet(tbl_group[0][0])

str_tables = (tbl for _, tbl in tbl_group)
merged = merge_tables(str_tables)
for row in csv.reader(merged.splitlines()):
ws.append(row)
for i, _ in enumerate(row):
ws.column_dimensions[get_column_letter(i + 1)].bestFit = True

wb.remove(wb.active)
wb.save(out)


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Merges multiple CPU2017 exec time csv results together')
parser.add_argument('-o', '--output', required=True, help='Where to write the output file')
parser.add_argument('csvs', nargs='+', help='The files to merge')

args = parser.parse_args()

with ExitStack() as stack:
files = [stack.enter_context(open(f, 'r')) for f in args.csvs]

main(files, args.output)
3 changes: 2 additions & 1 deletion util/analyze/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._types import Logs, Benchmark, Block
from ._main import parse_args
from .imports import import_cpu2006, import_plaidml, import_shoc, import_utils
from ._utils import *
from . import utils, ioutils
from .utils import foreach_bench
21 changes: 16 additions & 5 deletions util/analyze/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,27 @@ def parse_args(parser: argparse.ArgumentParser, *names, args=None):
'plaidml': import_plaidml.parse,
'shoc': import_shoc.parse,
}
parser = FILE_PARSERS[args.benchsuite]
fileparser = FILE_PARSERS[args.benchsuite]
blk_filter = block_filter(args.keep_blocks_if) if args.keep_blocks_if is not True else True

args_dict = vars(args)

def parse_input(x):
if isinstance(x, str):
result = fileparser(x)
if blk_filter is not True:
result = result.keep_blocks_if(blk_filter)
return result
else:
assert isinstance(x, list)
return [parse_input(l) for l in x]

# Go through the logs inputs and parse them.
for name in names:
result = parser(args_dict[name])
if blk_filter is not True:
result = result.keep_blocks_if(blk_filter)
args_dict[name] = result
args_dict[name] = parse_input(args_dict[name])

if hasattr(parser, '__analyze_post_process_parse_args__'):
for argname, postprocess in getattr(parser, '__analyze_post_process_parse_args__').items():
args_dict[argname] = postprocess(args_dict[argname])

return args
33 changes: 33 additions & 0 deletions util/analyze/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,26 @@ def __iter__(self):
for bench in self.benchmarks:
yield from bench.blocks

def __len__(self):
return sum(len(bench) for bench in self.benchmarks)

def __repr__(self):
benchmarks = ','.join(b.name for b in self.benchmarks)
return f'<Logs({benchmarks})>'

def keep_blocks_if(self, p):
return Logs([bench.keep_blocks_if(p) for bench in self.benchmarks])

def find_equiv(self, blk):
uid = blk.uniqueid()
return [b for b in self.benchmark(blk.info['benchmark']) if b.uniqueid() == uid]

def find_block(self, name, benchmark=None):
search = self
if benchmark is not None:
search = self.benchmark(benchmark)
return [b for b in search if b.name == name]


class Benchmark:
'''
Expand All @@ -67,6 +80,9 @@ def __init__(self, info, blocks):
def __iter__(self):
return iter(self.blocks)

def __len__(self):
return len(self.blocks)

@property
def benchmarks(self):
return (self,)
Expand All @@ -77,6 +93,16 @@ def __repr__(self):
def keep_blocks_if(self, p):
return Benchmark(self.info, [blk for blk in self.blocks if p(blk)])

def find_equiv(self, blk):
uid = blk.uniqueid()
return [b for b in self if b.uniqueid() == uid]

def find_block(self, name, benchmark=None):
if benchmark is not None:
if benchmark != self.name:
return []
return [b for b in self if b.name == name]


class Block:
'''
Expand All @@ -93,10 +119,14 @@ class Block:

def __init__(self, info, raw_log, events):
self.name = info['name']
self.benchmark = info['benchmark']
self.info = info
self.raw_log = raw_log
self.events = events

if 'PassFinished' in self:
self.info['pass'] = self.single('PassFinished')['num']

def single(self, event_name):
'''
Gets an event with the specified name, requiring exactly one match
Expand Down Expand Up @@ -132,3 +162,6 @@ def __repr__(self):

def uniqueid(self):
return frozenset(self.info.items())

def dump(self):
print(self.raw_log)
39 changes: 0 additions & 39 deletions util/analyze/_utils.py

This file was deleted.

5 changes: 2 additions & 3 deletions util/analyze/imports/import_cpu2006.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3

import os
import re

from . import import_utils

Expand All @@ -13,8 +12,8 @@ def parse(file):
with open(file, 'r') as f:
return import_utils.parse_multi_bench_file(
f.read(),
benchstart=re.compile(r'Building (?P<name>\S*)'),
filename=re.compile(r'/[fc]lang\b.*\s(\S+\.\S+)\n'))
benchstart=r'Building (?P<name>\S*)',
filename=r'/[fc]lang\b.*\s(\S+\.\S+)\n')


if __name__ == '__main__':
Expand Down
Loading