Merge pull request #1 from mantidproject/precommit

Add pre-commit/ruff configuration and fix issues
mantidproject · Dec 27, 2023 · 5c4112f · 5c4112f
2 parents 5ed1627 + dcc718d
commit 5c4112f
Show file tree

Hide file tree

Showing 5 changed files with 154 additions and 92 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.5.0
+  hooks:
+    - id: check-added-large-files
+      args: [--maxkb=8192]
+    - id: check-yaml
+    - id: end-of-file-fixer
+    - id: trailing-whitespace
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.1.8
+  hooks:
+    - id: ruff
+      args: [--fix, --exit-non-zero-on-fix]
+    - id: ruff-format
diff --git a/algorithm_tree.py b/algorithm_tree.py
@@ -19,7 +19,7 @@
 import re
 
 
-class Node():
+class Node:
     def __init__(self, info=[]):
         self.parent = None
         self.level = 0
@@ -33,6 +33,7 @@ def to_list_int(node, lst):
             lst.append(node)
             for nd in node.children:
                 to_list_int(nd, lst)
+
         to_list_int(self, res)
         return res
 
@@ -47,6 +48,7 @@ def find_all_int(node, cond, res):
                 res.append(node)
             for nd in node.children:
                 find_all_int(nd, cond, res)
+
         result = []
         find_all_int(self, cond, result)
         return result
@@ -57,6 +59,7 @@ def find_in_depth_int(node, cond, res):
                 res[0] = node
                 for nd in node.children:
                     find_in_depth_int(nd, cond, res)
+
         result = [None]
         find_in_depth_int(self, cond, result)
         return result[0]
@@ -71,6 +74,7 @@ def find_first_int(node, cond, res):
                     return
                 for nd in node.children:
                     find_first_int(nd, cond, res)
+
         result = []
         find_first_int(self, cond, result)
         return result[0]
@@ -91,6 +95,7 @@ def apply_int(nd, func):
             nd.info = func(nd.info)
             for ch in nd.children:
                 apply_int(ch, func)
+
         root = self.clone()
         apply_int(root, func)
         return root
@@ -128,9 +133,8 @@ def apply_multiple_trees(trees, check, func):
 
 
 def parseLine(line):
-    res = re.search('ThreadID=([0-9]*), AlgorithmName=(.*), StartTime=([0-9]*), EndTime=([0-9]*)', line)
-    return {"thread_id" : res.group(1), "name" : res.group(2),
-            "start" : int(res.group(3)), "finish" : int(res.group(4))}
+    res = re.search("ThreadID=([0-9]*), AlgorithmName=(.*), StartTime=([0-9]*), EndTime=([0-9]*)", line)
+    return {"thread_id": res.group(1), "name": res.group(2), "start": int(res.group(3)), "finish": int(res.group(4))}
 
 
 def fromFile(fileName):
@@ -146,28 +150,41 @@ def fromFile(fileName):
 
 
 def cmp_to_key(mycmp):
-    'Convert a cmp= function into a key= function'
+    "Convert a cmp= function into a key= function"
+
     class K:
-        def __init__(self, obj, *args):
+        def __init__(self, obj, *args):  # noqa: ARG002
             self.obj = obj
+
         def __lt__(self, other):
             return mycmp(self.obj, other.obj) < 0
+
         def __gt__(self, other):
             return mycmp(self.obj, other.obj) > 0
+
         def __eq__(self, other):
             return mycmp(self.obj, other.obj) == 0
+
         def __le__(self, other):
             return mycmp(self.obj, other.obj) <= 0
+
         def __ge__(self, other):
             return mycmp(self.obj, other.obj) >= 0
+
         def __ne__(self, other):
             return mycmp(self.obj, other.obj) != 0
+
     return K
 
+
 def toTrees(records):
-    recs = sorted(records, key = cmp_to_key(lambda x, y: x["start"] - y["start"] if x["start"] != y["start"] else y["finish"] - x["finish"]))
+    recs = sorted(
+        records,
+        key=cmp_to_key(lambda x, y: x["start"] - y["start"] if x["start"] != y["start"] else y["finish"] - x["finish"]),
+    )
+
     def rec_to_node(r, counter):
-        return Node([r["name"] + " " + str(counter), r["start"], r["finish"], counter ])
+        return Node([r["name"] + " " + str(counter), r["start"], r["finish"], counter])
 
     heads = []
     counter = dict()

diff --git a/mantid-profiler.py b/mantid-profiler.py
@@ -15,13 +15,15 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 
-import algorithm_tree as at
-import numpy as np
-import psrecord
 import argparse
 import copy
 import sys
 
+import numpy as np
+
+import algorithm_tree as at
+import psrecord
+
 
 # Parse the logfile outputted by psrecord
 def parse_cpu_log(filename):
@@ -36,7 +38,7 @@ def parse_cpu_log(filename):
             if "START_TIME:" in line:
                 start_time = float(line.split()[1])
                 continue
-            line = line.replace("[","")
+            line = line.replace("[", "")
             line = line.replace("]", "")
             line = line.replace("(", "")
             line = line.replace(")", "")
@@ -78,34 +80,32 @@ def parse_cpu_log(filename):
 # This method is simple but does not guarantee uniqueness of the color.
 # It is however random enough for our purposes
 def stringToColor(string):
-
     red = 0
     grn = 0
     blu = 0
-    for i in range(0,len(string),3):
+    for i in range(0, len(string), 3):
         red += ord(string[i])
-    for i in range(1,len(string),3):
+    for i in range(1, len(string), 3):
         grn += ord(string[i])
-    for i in range(2,len(string),3):
+    for i in range(2, len(string), 3):
         blu += ord(string[i])
     red %= 255
     grn %= 255
     blu %= 255
-    return [red,grn,blu,(red+grn+blu)/3.0]
+    return [red, grn, blu, (red + grn + blu) / 3.0]
 
 
 # Generate HTML output for a tree node
 def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time):
-
     x0 = ((node.info[1] + header) / 1.0e9) - sync_time
     x1 = ((node.info[2] + header) / 1.0e9) - sync_time
     x2 = 0.5 * (x0 + x1)
     y0 = 0.0
-    y1 = -(lmax-node.level+1)
+    y1 = -(lmax - node.level + 1)
     dt = x1 - x0
 
     # Get unique color from algorithm name
-    color = stringToColor(node.info[0].split(' ')[0])
+    color = stringToColor(node.info[0].split(" ")[0])
     # Compute raw time and percentages
     rawTime = dt
     if len(node.children) > 0:
@@ -120,7 +120,7 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time):
         boxText += "%.1E" % dt
     else:
         boxText += "%.1f" % dt
-    boxText += "s (%.1f%%) | %.1fs (%.1f%%)<br>" % (percTot,rawTime,percRaw)
+    boxText += "s (%.1f%%) | %.1fs (%.1f%%)<br>" % (percTot, rawTime, percRaw)
 
     if node.parent is not None:
         boxText += "Parent: " + node.parent.info[0] + "<br>"
@@ -135,7 +135,7 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time):
     outputString += "x: [%f, %f, %f, %f, %f],\n" % (x0, x0, x2, x1, x1)
     outputString += "y: [%f, %f, %f, %f, %f],\n" % (y0, y1, y1, y1, y0)
     outputString += "fill: 'tozeroy',\n"
-    outputString += "fillcolor: 'rgb(%i,%i,%i)',\n" % (color[0],color[1],color[2])
+    outputString += "fillcolor: 'rgb(%i,%i,%i)',\n" % (color[0], color[1], color[2])
     outputString += "line: {\n"
     outputString += "color: '#000000',\n"
     outputString += "dash: 'solid',\n"
@@ -146,10 +146,13 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time):
     # If the background color is too bright, make the font color black.
     # Default font color is white
     if color[3] > 180:
-        textcolor = '#000000'
+        textcolor = "#000000"
     else:
-        textcolor = '#ffffff'
-    outputString += "text: ['', '', '<a style=\"text-decoration: none; color: %s;\" href=\"%s%s-v1.html\">%s</a>', '', ''],\n" % (textcolor, base_url, node.info[0].split()[0], node.info[0])
+        textcolor = "#ffffff"
+    outputString += (
+        "text: ['', '', '<a style=\"text-decoration: none; color: %s;\" href=\"%s%s-v1.html\">%s</a>', '', ''],\n"
+        % (textcolor, base_url, node.info[0].split()[0], node.info[0])
+    )
     outputString += "textposition: 'top',\n"
     outputString += "hovertext: '" + boxText + "',\n"
     outputString += "hoverinfo: 'text',\n"
@@ -163,15 +166,15 @@ def treeNodeToHtml(node, lmax, sync_time, header, count, tot_time):
 
 
 # Generate HTML interactive plot with Plotly library
-def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0,
-                nthreads=0, lmax=0, sync_time=0, header=None):
-
-    htmlFile = open(filename,'w')
+def htmlProfile(
+    filename=None, x=None, data=None, records=None, fill_factor=0, nthreads=0, lmax=0, sync_time=0, header=None
+):
+    htmlFile = open(filename, "w")
     htmlFile.write("<head>\n")
-    htmlFile.write("  <script src=\"https://cdn.plot.ly/plotly-latest.min.js\"></script>\n")
+    htmlFile.write('  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>\n')
     htmlFile.write("</head>\n")
     htmlFile.write("<body>\n")
-    htmlFile.write("  <div id=\"myDiv\"></div>\n")
+    htmlFile.write('  <div id="myDiv"></div>\n')
     htmlFile.write("  <script>\n")
     # CPU
     htmlFile.write("  var trace1 = {\n")
@@ -181,7 +184,7 @@ def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0,
     htmlFile.write("],\n")
     htmlFile.write("    'y': [\n")
     for i in range(len(x)):
-        htmlFile.write("%f,\n" % data[i,1])
+        htmlFile.write("%f,\n" % data[i, 1])
     htmlFile.write("],\n")
     htmlFile.write("  'xaxis': 'x',\n")
     htmlFile.write("  'yaxis': 'y1',\n")
@@ -196,7 +199,7 @@ def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0,
     htmlFile.write("],\n")
     htmlFile.write("    y: [\n")
     for i in range(len(x)):
-        htmlFile.write("%f,\n" % (data[i,2]/1000.0))
+        htmlFile.write("%f,\n" % (data[i, 2] / 1000.0))
     htmlFile.write("],\n")
     htmlFile.write("  xaxis: 'x',\n")
     htmlFile.write("  yaxis: 'y2',\n")
@@ -211,7 +214,7 @@ def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0,
     htmlFile.write("],\n")
     htmlFile.write("    y: [\n")
     for i in range(len(x)):
-        htmlFile.write("%f,\n" % (data[i,4]*100.0))
+        htmlFile.write("%f,\n" % (data[i, 4] * 100.0))
     htmlFile.write("],\n")
     htmlFile.write("  xaxis: 'x',\n")
     htmlFile.write("  yaxis: 'y1',\n")
@@ -284,7 +287,7 @@ def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0,
     htmlFile.write("      x0: 0.0,\n")
     htmlFile.write("      x1: %f,\n" % x[-1])
     htmlFile.write("      y0: 0,\n")
-    htmlFile.write("      y1: %i,\n" % (nthreads*100))
+    htmlFile.write("      y1: %i,\n" % (nthreads * 100))
     htmlFile.write("      xref: 'x',\n")
     htmlFile.write("      yref: 'y1',\n")
     htmlFile.write("    }],\n")
@@ -293,32 +296,37 @@ def htmlProfile(filename=None, x=None, data=None, records=None, fill_factor=0,
     htmlFile.write("</script>\n</body>\n</html>\n")
     htmlFile.close()
 
+
 # Main function to launch process monitor and create interactive HTML plot
 def main():
+    parser = argparse.ArgumentParser(description="Profile a Mantid workflow")
 
-    parser = argparse.ArgumentParser(
-        description="Profile a Mantid workflow")
-
-    parser.add_argument("pid", type=str,
-                        help="the process id")
+    parser.add_argument("pid", type=str, help="the process id")
 
-    parser.add_argument("--outfile", type=str, default="profile.html",
-                        help="name of output html file")
+    parser.add_argument("--outfile", type=str, default="profile.html", help="name of output html file")
 
-    parser.add_argument("--infile", type=str, default="algotimeregister.out",
-                        help="name of input file containing algorithm timings")
+    parser.add_argument(
+        "--infile", type=str, default="algotimeregister.out", help="name of input file containing algorithm timings"
+    )
 
-    parser.add_argument("--logfile", type=str, default="mantidprofile.txt",
-                        help="name of output file containing process monitor data")
+    parser.add_argument(
+        "--logfile", type=str, default="mantidprofile.txt", help="name of output file containing process monitor data"
+    )
 
-    parser.add_argument("--interval", type=float,
-                        help="how long to wait between each sample (in "
-                             "seconds). By default the process is sampled "
-                             "as often as possible.")
+    parser.add_argument(
+        "--interval",
+        type=float,
+        help="how long to wait between each sample (in "
+        "seconds). By default the process is sampled "
+        "as often as possible.",
+    )
 
-    parser.add_argument("--mintime", type=float, default=0.1,
-                        help="minimum duration for an algorithm to appear in"
-                             "the profiling graph (in seconds).")
+    parser.add_argument(
+        "--mintime",
+        type=float,
+        default=0.1,
+        help="minimum duration for an algorithm to appear in" "the profiling graph (in seconds).",
+    )
 
     args = parser.parse_args()
 
@@ -329,7 +337,7 @@ def main():
     # Read in algorithm timing log and build tree
     try:
         header, records = at.fromFile(args.infile)
-        records = [x for x in records if x["finish"] - x["start"] > (args.mintime*1.0e9)]
+        records = [x for x in records if x["finish"] - x["start"] > (args.mintime * 1.0e9)]
         # Number of threads allocated to this run
         nthreads = int(header.split()[3])
         # Run start time
@@ -338,12 +346,13 @@ def main():
         lmax = 0
         for tree in at.toTrees(records):
             for node in tree.to_list():
-                lmax = max(node.level,lmax)
+                lmax = max(node.level, lmax)
     except FileNotFoundError as e:
         print("failed to load file:", e.filename)
         print("creating plot without algorithm annotations")
 
         import psutil
+
         nthreads = psutil.cpu_count()
         lmax = 1
         header = ""
@@ -356,19 +365,27 @@ def main():
         raise
 
     # Time series
-    x = data[:,0]-sync_time
+    x = data[:, 0] - sync_time
 
     # Integrate under the curve and compute CPU usage fill factor
     area_under_curve = np.trapz(data[:, 1], x=x)
     fill_factor = area_under_curve / ((x[-1] - x[0]) * nthreads)
 
     # Create HTML output with Plotly
-    htmlProfile(filename=args.outfile, x=x, data=data, records=records,
-                fill_factor=fill_factor, nthreads=nthreads, lmax=lmax,
-                sync_time=sync_time, header=header)
+    htmlProfile(
+        filename=args.outfile,
+        x=x,
+        data=data,
+        records=records,
+        fill_factor=fill_factor,
+        nthreads=nthreads,
+        lmax=lmax,
+        sync_time=sync_time,
+        header=header,
+    )
 
     return
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(main())