From 973eac53233ac85dfe63965718fcb134f8a60d5c Mon Sep 17 00:00:00 2001 From: Pete Peterson Date: Wed, 27 Dec 2023 09:10:18 -0500 Subject: [PATCH 1/2] Add pre-commit/ruff configuration and fix issues --- .pre-commit-config.yaml | 15 +++++ algorithm_tree.py | 33 ++++++++--- mantid-profiler.py | 121 +++++++++++++++++++++++----------------- psrecord.py | 63 +++++++++++---------- pyproject.toml | 14 +++++ 5 files changed, 155 insertions(+), 91 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..136e0e3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + args: [--maxkb=8192] + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.8 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format diff --git a/algorithm_tree.py b/algorithm_tree.py index 79d4c47..8b18506 100644 --- a/algorithm_tree.py +++ b/algorithm_tree.py @@ -19,7 +19,7 @@ import re -class Node(): +class Node: def __init__(self, info=[]): self.parent = None self.level = 0 @@ -33,6 +33,7 @@ def to_list_int(node, lst): lst.append(node) for nd in node.children: to_list_int(nd, lst) + to_list_int(self, res) return res @@ -47,6 +48,7 @@ def find_all_int(node, cond, res): res.append(node) for nd in node.children: find_all_int(nd, cond, res) + result = [] find_all_int(self, cond, result) return result @@ -57,6 +59,7 @@ def find_in_depth_int(node, cond, res): res[0] = node for nd in node.children: find_in_depth_int(nd, cond, res) + result = [None] find_in_depth_int(self, cond, result) return result[0] @@ -71,6 +74,7 @@ def find_first_int(node, cond, res): return for nd in node.children: find_first_int(nd, cond, res) + result = [] find_first_int(self, cond, result) return result[0] @@ -91,6 +95,7 @@ def apply_int(nd, func): nd.info = func(nd.info) for ch in nd.children: apply_int(ch, func) + root = self.clone() apply_int(root, func) return root @@ -128,9 +133,8 @@ def apply_multiple_trees(trees, check, func): def parseLine(line): - res = re.search('ThreadID=([0-9]*), AlgorithmName=(.*), StartTime=([0-9]*), EndTime=([0-9]*)', line) - return {"thread_id" : res.group(1), "name" : res.group(2), - "start" : int(res.group(3)), "finish" : int(res.group(4))} + res = re.search("ThreadID=([0-9]*), AlgorithmName=(.*), StartTime=([0-9]*), EndTime=([0-9]*)", line) + return {"thread_id": res.group(1), "name": res.group(2), "start": int(res.group(3)), "finish": int(res.group(4))} def fromFile(fileName): @@ -146,28 +150,41 @@ def fromFile(fileName): def cmp_to_key(mycmp): - 'Convert a cmp= function into a key= function' + "Convert a cmp= function into a key= function" + class K: - def __init__(self, obj, *args): + def __init__(self, obj, *args): # noqa: ARG002 self.obj = obj + def __lt__(self, other): return mycmp(self.obj, other.obj) < 0 + def __gt__(self, other): return mycmp(self.obj, other.obj) > 0 + def __eq__(self, other): return mycmp(self.obj, other.obj) == 0 + def __le__(self, other): return mycmp(self.obj, other.obj) <= 0 + def __ge__(self, other): return mycmp(self.obj, other.obj) >= 0 + def __ne__(self, other): return mycmp(self.obj, other.obj) != 0 + return K + def toTrees(records): - recs = sorted(records, key = cmp_to_key(lambda x, y: x["start"] - y["start"] if x["start"] != y["start"] else y["finish"] - x["finish"])) + recs = sorted( + records, + key=cmp_to_key(lambda x, y: x["start"] - y["start"] if x["start"] != y["start"] else y["finish"] - x["finish"]), + ) + def rec_to_node(r, counter): - return Node([r["name"] + " " + str(counter), r["start"], r["finish"], counter ]) + return Node([r["name"] + " " + str(counter), r["start"], r["finish"], counter]) heads = [] counter = dict() diff --git a/mantid-profiler.py b/mantid-profiler.py index a05cb42..5319fb3 100644 --- a/mantid-profiler.py +++ b/mantid-profiler.py @@ -15,13 +15,15 @@ # along with this program. If not, see . -import algorithm_tree as at -import numpy as np -import psrecord import argparse import copy import sys +import numpy as np + +import algorithm_tree as at +import psrecord + # Parse the logfile outputted by psrecord def parse_cpu_log(filename): @@ -36,7 +38,7 @@ def parse_cpu_log(filename): if "START_TIME:" in line: start_time = float(line.split()[1]) continue - line = line.replace("[","") + line = line.replace("[", "") line = line.replace("]", "") line = line.replace("(", "") line = line.replace(")", "") @@ -78,34 +80,32 @@ def parse_cpu_log(filename): # This method is simple but does not guarantee uniqueness of the color. # It is however random enough for our purposes def stringToColor(string): - red = 0 grn = 0 blu = 0 - for i in range(0,len(string),3): + for i in range(0, len(string), 3): red += ord(string[i]) - for i in range(1,len(string),3): + for i in range(1, len(string), 3): grn += ord(string[i]) - for i in range(2,len(string),3): + for i in range(2, len(string), 3): blu += ord(string[i]) red %= 255 grn %= 255 blu %= 255 - return [red,grn,blu,(red+grn+blu)/3.0] + return [red, grn, blu, (red + grn + blu) / 3.0] # Generate HTML output for a tree node def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time): - x0 = ((node.info[1] + header) / 1.0e9) - sync_time x1 = ((node.info[2] + header) / 1.0e9) - sync_time x2 = 0.5 * (x0 + x1) y0 = 0.0 - y1 = -(lmax-node.level+1) + y1 = -(lmax - node.level + 1) dt = x1 - x0 # Get unique color from algorithm name - color = stringToColor(node.info[0].split(' ')[0]) + color = stringToColor(node.info[0].split(" ")[0]) # Compute raw time and percentages rawTime = dt if len(node.children) > 0: @@ -120,7 +120,7 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time): boxText += "%.1E" % dt else: boxText += "%.1f" % dt - boxText += "s (%.1f%%) | %.1fs (%.1f%%)
" % (percTot,rawTime,percRaw) + boxText += "s (%.1f%%) | %.1fs (%.1f%%)
" % (percTot, rawTime, percRaw) if node.parent is not None: boxText += "Parent: " + node.parent.info[0] + "
" @@ -135,7 +135,7 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time): outputString += "x: [%f, %f, %f, %f, %f],\n" % (x0, x0, x2, x1, x1) outputString += "y: [%f, %f, %f, %f, %f],\n" % (y0, y1, y1, y1, y0) outputString += "fill: 'tozeroy',\n" - outputString += "fillcolor: 'rgb(%i,%i,%i)',\n" % (color[0],color[1],color[2]) + outputString += "fillcolor: 'rgb(%i,%i,%i)',\n" % (color[0], color[1], color[2]) outputString += "line: {\n" outputString += "color: '#000000',\n" outputString += "dash: 'solid',\n" @@ -146,10 +146,13 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time): # If the background color is too bright, make the font color black. # Default font color is white if color[3] > 180: - textcolor = '#000000' + textcolor = "#000000" else: - textcolor = '#ffffff' - outputString += "text: ['', '', '%s', '', ''],\n" % (textcolor, base_url, node.info[0].split()[0], node.info[0]) + textcolor = "#ffffff" + outputString += ( + "text: ['', '', '%s', '', ''],\n" + % (textcolor, base_url, node.info[0].split()[0], node.info[0]) + ) outputString += "textposition: 'top',\n" outputString += "hovertext: '" + boxText + "',\n" outputString += "hoverinfo: 'text',\n" @@ -163,15 +166,15 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time): # Generate HTML interactive plot with Plotly library -def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0, - nthreads=0, lmax=0, sync_time=0, header=None): - - htmlFile = open(filename,'w') +def htmlProfile( + filename=None, x=None, data=None, records=None, fill_factor=0, nthreads=0, lmax=0, sync_time=0, header=None +): + htmlFile = open(filename, "w") htmlFile.write("\n") - htmlFile.write(" \n") + htmlFile.write(' \n') htmlFile.write("\n") htmlFile.write("\n") - htmlFile.write("
\n") + htmlFile.write('
\n') htmlFile.write(" \n\n\n") htmlFile.close() + # Main function to launch process monitor and create interactive HTML plot def main(): + parser = argparse.ArgumentParser(description="Profile a Mantid workflow") - parser = argparse.ArgumentParser( - description="Profile a Mantid workflow") - - parser.add_argument("pid", type=str, - help="the process id") + parser.add_argument("pid", type=str, help="the process id") - parser.add_argument("--outfile", type=str, default="profile.html", - help="name of output html file") + parser.add_argument("--outfile", type=str, default="profile.html", help="name of output html file") - parser.add_argument("--infile", type=str, default="algotimeregister.out", - help="name of input file containing algorithm timings") + parser.add_argument( + "--infile", type=str, default="algotimeregister.out", help="name of input file containing algorithm timings" + ) - parser.add_argument("--logfile", type=str, default="mantidprofile.txt", - help="name of output file containing process monitor data") + parser.add_argument( + "--logfile", type=str, default="mantidprofile.txt", help="name of output file containing process monitor data" + ) - parser.add_argument("--interval", type=float, - help="how long to wait between each sample (in " - "seconds). By default the process is sampled " - "as often as possible.") + parser.add_argument( + "--interval", + type=float, + help="how long to wait between each sample (in " + "seconds). By default the process is sampled " + "as often as possible.", + ) - parser.add_argument("--mintime", type=float, default=0.1, - help="minimum duration for an algorithm to appear in" - "the profiling graph (in seconds).") + parser.add_argument( + "--mintime", + type=float, + default=0.1, + help="minimum duration for an algorithm to appear in" "the profiling graph (in seconds).", + ) args = parser.parse_args() @@ -329,7 +337,7 @@ def main(): # Read in algorithm timing log and build tree try: header, records = at.fromFile(args.infile) - records = [x for x in records if x["finish"] - x["start"] > (args.mintime*1.0e9)] + records = [x for x in records if x["finish"] - x["start"] > (args.mintime * 1.0e9)] # Number of threads allocated to this run nthreads = int(header.split()[3]) # Run start time @@ -338,12 +346,13 @@ def main(): lmax = 0 for tree in at.toTrees(records): for node in tree.to_list(): - lmax = max(node.level,lmax) + lmax = max(node.level, lmax) except FileNotFoundError as e: print("failed to load file:", e.filename) print("creating plot without algorithm annotations") import psutil + nthreads = psutil.cpu_count() lmax = 1 header = "" @@ -356,19 +365,27 @@ def main(): raise # Time series - x = data[:,0]-sync_time + x = data[:, 0] - sync_time # Integrate under the curve and compute CPU usage fill factor area_under_curve = np.trapz(data[:, 1], x=x) fill_factor = area_under_curve / ((x[-1] - x[0]) * nthreads) # Create HTML output with Plotly - htmlProfile(filename=args.outfile, x=x, data=data, records=records, - fill_factor=fill_factor, nthreads=nthreads, lmax=lmax, - sync_time=sync_time, header=header) + htmlProfile( + filename=args.outfile, + x=x, + data=data, + records=records, + fill_factor=fill_factor, + nthreads=nthreads, + lmax=lmax, + sync_time=sync_time, + header=header, + ) return -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/psrecord.py b/psrecord.py index ecefcc7..1f1bc56 100644 --- a/psrecord.py +++ b/psrecord.py @@ -26,11 +26,11 @@ ############################################################################### # # 2018: Modified for Mantid profiler by Neil Vaytet & Igor Gudich +# https://github.com/astrofrog/psrecord # ############################################################################### -from __future__ import (unicode_literals, division, print_function, - absolute_import) +from __future__ import absolute_import, division, print_function, unicode_literals import time @@ -62,14 +62,14 @@ def all_children(pr): return pr.children(recursive=True) except AttributeError: return pr.get_children(recursive=True) - except Exception: # pragma: no cover + except Exception: # noqa: BLE001 return [] -def update_children(old_children, new_children): # old children - dict, new_children - list +def update_children(old_children, new_children): # old children - dict, new_children - list new_dct = {} for ch in new_children: - new_dct.update({ch.pid : ch}) + new_dct.update({ch.pid: ch}) todel = [] for pid in old_children.keys(): @@ -87,7 +87,6 @@ def update_children(old_children, new_children): # old children - dict, new_chil def monitor(pid, logfile=None, interval=None): - # We import psutil here so that the module can be imported even if psutil # is not present (for example if accessing the version) import psutil @@ -101,25 +100,25 @@ def monitor(pid, logfile=None, interval=None): except AttributeError: start_time = time.time() - f = open(logfile, 'w') - f.write("# {0:12s} {1:12s} {2:12s} {3:12s} {4}\n".format( - 'Elapsed time'.center(12), - 'CPU (%)'.center(12), - 'Real (MB)'.center(12), - 'Virtual (MB)'.center(12), - 'Threads info'.center(12)) + f = open(logfile, "w") + f.write( + "# {0:12s} {1:12s} {2:12s} {3:12s} {4}\n".format( + "Elapsed time".center(12), + "CPU (%)".center(12), + "Real (MB)".center(12), + "Virtual (MB)".center(12), + "Threads info".center(12), + ) ) - f.write('START_TIME: {}\n'.format(starting_point)) + f.write("START_TIME: {}\n".format(starting_point)) children = {} for ch in all_children(pr): children.update({ch.pid: ch}) try: - # Start main event loop while True: - # Find current time try: current_time = time.perf_counter() @@ -135,8 +134,7 @@ def monitor(pid, logfile=None, interval=None): # Check if process status indicates we should exit if pr_status in [psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD]: - print("Process finished ({0:.2f} seconds)" - .format(current_time - start_time)) + print("Process finished ({0:.2f} seconds)".format(current_time - start_time)) break # Get current CPU and memory @@ -144,10 +142,10 @@ def monitor(pid, logfile=None, interval=None): current_cpu = get_percent(pr) current_mem = get_memory(pr) current_threads = get_threads(pr) - except Exception: + except Exception: # noqa: BLE001 break - current_mem_real = current_mem.rss / 1024. ** 2 - current_mem_virtual = current_mem.vms / 1024. ** 2 + current_mem_real = current_mem.rss / 1024.0**2 + current_mem_virtual = current_mem.vms / 1024.0**2 # Get information for children update_children(children, all_children(pr)) @@ -156,17 +154,20 @@ def monitor(pid, logfile=None, interval=None): current_cpu += get_percent(child) current_mem = get_memory(child) current_threads.extend(get_threads(child)) - except Exception: + except Exception: # noqa: BLE001 continue - current_mem_real += current_mem.rss / 1024. ** 2 - current_mem_virtual += current_mem.vms / 1024. ** 2 - - f.write("{0:12.6f} {1:12.3f} {2:12.3f} {3:12.3f} {4}\n".format( - current_time - start_time + starting_point, - current_cpu, - current_mem_real, - current_mem_virtual, - current_threads)) + current_mem_real += current_mem.rss / 1024.0**2 + current_mem_virtual += current_mem.vms / 1024.0**2 + + f.write( + "{0:12.6f} {1:12.3f} {2:12.3f} {3:12.3f} {4}\n".format( + current_time - start_time + starting_point, + current_cpu, + current_mem_real, + current_mem_virtual, + current_threads, + ) + ) f.flush() if interval is not None: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..15102fa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "mantidprofiler" +description = "Uses psrecord and plotly.js to profile a mantid workflow" +requires-python = ">=3.10" +readme = "README.md" +license = { text = "GPL3.0" } + +[build-system] +requires = ["setuptools", "wheel", "toml"] + +[tool.ruff] +line-length = 120 +# https://beta.ruff.rs/docs/rules/ +select = ["A", "ARG", "BLE", "E", "F", "I", "PT"] From dcc718d6fb617b61b05e1f6c48a29db4b6288b3f Mon Sep 17 00:00:00 2001 From: Pete Peterson Date: Wed, 27 Dec 2023 09:11:39 -0500 Subject: [PATCH 2/2] Remove python 2 compatibility --- psrecord.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/psrecord.py b/psrecord.py index 1f1bc56..01dc6f0 100644 --- a/psrecord.py +++ b/psrecord.py @@ -30,8 +30,6 @@ # ############################################################################### -from __future__ import absolute_import, division, print_function, unicode_literals - import time