Add hooks for calling arbitrary code around each benchmark

psf · Jun 24, 2024 · 00a2dec · 00a2dec
1 parent 7b9a23a
commit 00a2dec
Show file tree

Hide file tree

Showing 7 changed files with 94 additions and 28 deletions.
diff --git a/doc/run_benchmark.rst b/doc/run_benchmark.rst
@@ -195,9 +195,11 @@ Specializer statistics (``pystats``)
 ==================================
 
 ``pyperf`` has built-in support for `specializer statistics (``pystats``) <https://docs.python.org/dev/using/configure.html#cmdoption-enable-pystats>`_.
-If running benchmarks on a CPython built with the ``--enable-pystats`` flag, pyperf will automatically collect ``pystats`` on the benchmark code by calling ``sys._stats_on`` immediately before the benchmark and calling ``sys._stats_off`` immediately after.
+If running benchmarks on a CPython built with the ``--enable-pystats`` flag, when you pass ``--hook pystats``, pyperf will collect ``pystats`` on the benchmark code by calling ``sys._stats_on`` immediately before the benchmark and calling ``sys._stats_off`` immediately after.
 Stats are not collected when running ``pyperf``'s own code or when warming up or calibrating the benchmarks.
 
+**New in X.YY:** The ``--hook pystats`` flag must be given to collect pystats.
+
 Due to the overhead of collecting the statistics, the timing results will be meaningless.
 
 The `Tools/scripts/summarize_stats.py <https://github.com/python/cpython/blob/main/Tools/scripts/summarize_stats.py>`_ script can be used to summarize the statistics in a human-readable form.

diff --git a/pyperf/_collect_metadata.py b/pyperf/_collect_metadata.py
@@ -125,10 +125,6 @@ def collect_python_metadata(metadata):
         if not gc.isenabled():
             metadata['python_gc'] = 'disabled'
 
-    # pystats enabled?
-    if hasattr(sys, "_stats_clear"):
-        metadata['pystats'] = 'enabled'
-
 
 def read_proc(path):
     path = proc_path(path)

diff --git a/pyperf/_hooks.py b/pyperf/_hooks.py
@@ -0,0 +1,55 @@
+# Hooks are installable context managers defined as entry points so that
+# arbitrary code can by run right before and after the actual internal
+# benchmarking code is run.
+
+
+import sys
+
+
+import pkg_resources
+
+
+def get_hooks():
+    return (x.load() for x in pkg_resources.iter_entry_points(group="pyperf.hook", name=None))
+
+
+def get_hook_names():
+    return (x.__name__ for x in get_hooks())
+
+
+def get_selected_hooks(hook_names):
+    if hook_names is None:
+        return
+
+    for hook in get_hooks():
+        if hook.__name__ in hook_names:
+            yield hook
+
+
+def collect_hook_metadata(metadata, hook_names):
+    for hook in get_selected_hooks(hook_names):
+        hook.collect_metadata(metadata)
+
+
+class HookError(Exception):
+    pass
+
+
+class pystats:
+    def __init__(self):
+        if not hasattr(sys, "_pystats_on"):
+            raise HookError(
+                "Can not collect pystats because python was not built with --enable-pystats"
+            )
+        sys._stats_off()
+        sys._stats_clear()
+
+    @staticmethod
+    def collect_hook_metadata(metadata):
+        metadata["pystats"] = "enabled"
+
+    def __enter__(self):
+        sys._stats_on()
+
+    def __exit__(self, _exc_type, _exc_value, _traceback):
+        sys._stats_off()
diff --git a/pyperf/_manager.py b/pyperf/_manager.py
@@ -69,6 +69,10 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe):
         if args.profile:
             cmd.extend(['--profile', args.profile])
 
+        if args.hook:
+            for hook in args.hook:
+                cmd.extend(['--hook', hook])
+
         if self.runner._add_cmdline_args:
             self.runner._add_cmdline_args(cmd, args)
 

diff --git a/pyperf/_runner.py b/pyperf/_runner.py
@@ -11,6 +11,7 @@
                                get_isolated_cpus, set_cpu_affinity,
                                set_highest_priority)
 from pyperf._formatter import format_timedelta
+from pyperf._hooks import get_hook_names
 from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable,
                            WritePipe, get_python_names,
                            merge_profile_stats)
@@ -77,13 +78,6 @@ def __init__(self, values=None, processes=None,
                  program_args=None, add_cmdline_args=None,
                  _argparser=None):
 
-        # Reset the stats collection if running a --enable-pystats build
-        try:
-            sys._stats_off()
-            sys._stats_clear()
-        except AttributeError:
-            pass
-
         # Watchdog: ensure that only once instance of Runner (or a Runner
         # subclass) is created per process to prevent bad surprises
         cls = self.__class__
@@ -248,6 +242,9 @@ def __init__(self, values=None, processes=None,
                             help='Collect profile data using cProfile '
                                  'and output to the given file.')
 
+        parser.add_argument('--hook', nargs="*", choices=list(get_hook_names()),
+                            help='Use the given pyperf hooks')
+
         memory = parser.add_mutually_exclusive_group()
         memory.add_argument('--tracemalloc', action="store_true",
                             help='Trace memory allocations using tracemalloc')
@@ -732,6 +729,10 @@ def bench_command(self, name, command):
         if self.args.profile:
             command.extend(["--profile", self.args.profile])
 
+        if self.args.hook:
+            for hook in self.args.hook:
+                command.extend(["--hook", hook])
+
         # Use lazy import to limit imports on 'import pyperf'
         from pyperf._command import BenchCommandTask
         task = BenchCommandTask(self, name, command)

diff --git a/pyperf/_worker.py b/pyperf/_worker.py
@@ -1,10 +1,12 @@
+import contextlib
 import statistics
 import sys
 import time
 
 import pyperf
 from pyperf._formatter import (format_number, format_value, format_values,
                                format_timedelta)
+from pyperf._hooks import collect_hook_metadata, get_selected_hooks, HookError
 from pyperf._utils import MS_WINDOWS, MAC_OS, percentile, median_abs_dev
 
 
@@ -58,19 +60,14 @@ def _compute_values(self, values, nvalue,
 
         task_func = self.task_func
 
-        # If we are on a pystats build, turn on stats collection around the
-        # actual work, except when calibrating.
-        if hasattr(sys, "_stats_on") and not calibrate_loops:
-            core_task_func = task_func
-
-            def stats_func(*args):
-                sys._stats_on()
-                try:
-                    return core_task_func(*args)
-                finally:
-                    sys._stats_off()
-
-            task_func = stats_func
+        hook_managers = []
+        for hook in get_selected_hooks(args.hook):
+            try:
+                hook_managers.append(hook())
+            except HookError as e:
+                print(f"ERROR setting up hook '{hook.__name__}:'", file=sys.stderr)
+                print(str(e), file=sys.stderr)
+                sys.exit(1)
 
         index = 1
         inner_loops = self.inner_loops
@@ -80,7 +77,11 @@ def stats_func(*args):
             if index > nvalue:
                 break
 
-            raw_value = task_func(self, self.loops)
+            with contextlib.ExitStack() as stack:
+                for hook in hook_managers:
+                    stack.enter_context(hook)
+                raw_value = task_func(self, self.loops)
+
             raw_value = float(raw_value)
             value = raw_value / (self.loops * inner_loops)
 
@@ -118,7 +119,9 @@ def stats_func(*args):
 
     def collect_metadata(self):
         from pyperf._collect_metadata import collect_metadata
-        return collect_metadata(process=False)
+        metadata = collect_metadata(process=False)
+        collect_hook_metadata(self.args.hook, metadata)
+        return metadata
 
     def test_calibrate_warmups(self, nwarmup, unit):
         half = nwarmup + (len(self.warmups) - nwarmup) // 2
@@ -381,4 +384,6 @@ def compute(self):
 
     def collect_metadata(self):
         from pyperf._collect_metadata import collect_metadata
-        return collect_metadata()
+        metadata = collect_metadata()
+        collect_hook_metadata(self.args.hook, metadata)
+        return metadata
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,6 +56,9 @@ dev = [
 [project.scripts]
 pyperf = "pyperf.__main__:main"
 
+[project.entry-points."pyperf.hook"]
+pystats = "pyperf._hooks:pystats"
+
 [tool.setuptools]
 packages = ["pyperf", "pyperf.tests"]