CSUS-LLVM · Quincunx271 · Apr 9, 2021 · Apr 9, 2021 · Apr 9, 2021 · Apr 9, 2021
diff --git a/util/CPU2006/combine-exec.py b/util/CPU2006/combine-exec.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+
+from io import StringIO
+import csv
+import re
+import sys
+import argparse
+from contextlib import ExitStack
+from typing import Dict, Iterable, List, Tuple
+from collections import Counter
+from openpyxl import Workbook
+from openpyxl.utils import get_column_letter
+
+
+class DuplicateDataError(Exception):
+    def __init__(self, old, new, message):
+        self.old = old
+        self.new = new
+        self.message = message
+
+        super().__init__(f'{message} old: {old} -> new: {new}')
+
+
+def is_blank_row(row: List[str]) -> bool:
+    return not row or all(cell in ('', 'NR') for cell in row[1:])
+
+
+def merge_tables(str_tables: Iterable[str]) -> str:
+    data: Dict[str, List[List[str]]] = dict()
+    tables = [list(csv.reader(table.splitlines())) for table in str_tables]
+
+    for row in tables[0]:
+        if row:
+            data.setdefault(row[0], []).append(row)
+
+    for table in tables:
+        nth: Dict[str, int] = Counter()
+        for row in table:
+            if not is_blank_row(row):
+                index = nth[row[0]]
+                if row[0] in data:
+                    if not is_blank_row(data[row[0]][index]) and data[row[0]][index] != row:
+                        raise DuplicateDataError(data[row[0]][index], row, f'Duplicate data for {row[0]}.')
+                data[row[0]][index] = row
+                nth[row[0]] += 1
+
+    out = StringIO()
+    writer = csv.writer(out)
+    nth: Dict[str, int] = Counter()
+    for row in tables[0]:
+        if not row:
+            continue
+        index = nth[row[0]]
+        writer.writerow(data[row[0]][index])
+        nth[row[0]] += 1
+
+    return out.getvalue()
+
+
+_RE_FOO_RESULTS_TABLE = re.compile(r'"(?P<tbl_name>\S+ Results) Table"')
+
+
+def extract_tables(contents: str) -> Iterable[Tuple[str, str]]:
+    for m in _RE_FOO_RESULTS_TABLE.finditer(contents):
+        tbl_start = contents.find('\n\n', m.end()) + 1
+        tbl_end = contents.find('\n\n', tbl_start)
+        yield (m['tbl_name'], contents[tbl_start:tbl_end])
+
+
+def main(files, out: str):
+    wb = Workbook()
+    files = [f.read() for f in files]
+    tbls = map(extract_tables, files)
+    for tbl_group in zip(*tbls):
+        assert len(set(name for name, _ in tbl_group)) == 1
+        ws = wb.create_sheet(tbl_group[0][0])
+
+        str_tables = (tbl for _, tbl in tbl_group)
+        merged = merge_tables(str_tables)
+        for row in csv.reader(merged.splitlines()):
+            ws.append(row)
+            for i, _ in enumerate(row):
+                ws.column_dimensions[get_column_letter(i + 1)].bestFit = True
+
+    wb.remove(wb.active)
+    wb.save(out)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Merges multiple CPU2017 exec time csv results together')
+    parser.add_argument('-o', '--output', required=True, help='Where to write the output file')
+    parser.add_argument('csvs', nargs='+', help='The files to merge')
+
+    args = parser.parse_args()
+
+    with ExitStack() as stack:
+        files = [stack.enter_context(open(f, 'r')) for f in args.csvs]
+
+        main(files, args.output)
diff --git a/util/analyze/__init__.py b/util/analyze/__init__.py
@@ -1,4 +1,5 @@
 from ._types import Logs, Benchmark, Block
 from ._main import parse_args
 from .imports import import_cpu2006, import_plaidml, import_shoc, import_utils
-from ._utils import *
+from . import utils, ioutils
+from .utils import foreach_bench
diff --git a/util/analyze/_main.py b/util/analyze/_main.py
@@ -75,16 +75,27 @@ def parse_args(parser: argparse.ArgumentParser, *names, args=None):
         'plaidml': import_plaidml.parse,
         'shoc': import_shoc.parse,
     }
-    parser = FILE_PARSERS[args.benchsuite]
+    fileparser = FILE_PARSERS[args.benchsuite]
     blk_filter = block_filter(args.keep_blocks_if) if args.keep_blocks_if is not True else True
 
     args_dict = vars(args)
 
+    def parse_input(x):
+        if isinstance(x, str):
+            result = fileparser(x)
+            if blk_filter is not True:
+                result = result.keep_blocks_if(blk_filter)
+            return result
+        else:
+            assert isinstance(x, list)
+            return [parse_input(l) for l in x]
+
     # Go through the logs inputs and parse them.
     for name in names:
-        result = parser(args_dict[name])
-        if blk_filter is not True:
-            result = result.keep_blocks_if(blk_filter)
-        args_dict[name] = result
+        args_dict[name] = parse_input(args_dict[name])
+
+    if hasattr(parser, '__analyze_post_process_parse_args__'):
+        for argname, postprocess in getattr(parser, '__analyze_post_process_parse_args__').items():
+            args_dict[argname] = postprocess(args_dict[argname])
 
     return args
diff --git a/util/analyze/_types.py b/util/analyze/_types.py
@@ -41,13 +41,26 @@ def __iter__(self):
         for bench in self.benchmarks:
             yield from bench.blocks
 
+    def __len__(self):
+        return sum(len(bench) for bench in self.benchmarks)
+
     def __repr__(self):
         benchmarks = ','.join(b.name for b in self.benchmarks)
         return f'<Logs({benchmarks})>'
 
     def keep_blocks_if(self, p):
         return Logs([bench.keep_blocks_if(p) for bench in self.benchmarks])
 
+    def find_equiv(self, blk):
+        uid = blk.uniqueid()
+        return [b for b in self.benchmark(blk.info['benchmark']) if b.uniqueid() == uid]
+
+    def find_block(self, name, benchmark=None):
+        search = self
+        if benchmark is not None:
+            search = self.benchmark(benchmark)
+        return [b for b in search if b.name == name]
+
 
 class Benchmark:
     '''
@@ -67,6 +80,9 @@ def __init__(self, info, blocks):
     def __iter__(self):
         return iter(self.blocks)
 
+    def __len__(self):
+        return len(self.blocks)
+
     @property
     def benchmarks(self):
         return (self,)
@@ -77,6 +93,16 @@ def __repr__(self):
     def keep_blocks_if(self, p):
         return Benchmark(self.info, [blk for blk in self.blocks if p(blk)])
 
+    def find_equiv(self, blk):
+        uid = blk.uniqueid()
+        return [b for b in self if b.uniqueid() == uid]
+
+    def find_block(self, name, benchmark=None):
+        if benchmark is not None:
+            if benchmark != self.name:
+                return []
+        return [b for b in self if b.name == name]
+
 
 class Block:
     '''
@@ -93,10 +119,14 @@ class Block:
 
     def __init__(self, info, raw_log, events):
         self.name = info['name']
+        self.benchmark = info['benchmark']
         self.info = info
         self.raw_log = raw_log
         self.events = events
 
+        if 'PassFinished' in self:
+            self.info['pass'] = self.single('PassFinished')['num']
+
     def single(self, event_name):
         '''
         Gets an event with the specified name, requiring exactly one match
@@ -132,3 +162,6 @@ def __repr__(self):
 
     def uniqueid(self):
         return frozenset(self.info.items())
+
+    def dump(self):
+        print(self.raw_log)
diff --git a/util/analyze/_utils.py b/util/analyze/_utils.py
diff --git a/util/analyze/imports/import_cpu2006.py b/util/analyze/imports/import_cpu2006.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 
 import os
-import re
 
 from . import import_utils
 
@@ -13,8 +12,8 @@ def parse(file):
     with open(file, 'r') as f:
         return import_utils.parse_multi_bench_file(
             f.read(),
-            benchstart=re.compile(r'Building (?P<name>\S*)'),
-            filename=re.compile(r'/[fc]lang\b.*\s(\S+\.\S+)\n'))
+            benchstart=r'Building (?P<name>\S*)',
+            filename=r'/[fc]lang\b.*\s(\S+\.\S+)\n')
 
 
 if __name__ == '__main__':