From caa226bd4ee39945cd458a331923a95a687b3b8d Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Fri, 18 Nov 2022 17:07:02 +0100
Subject: [PATCH 01/12] Speed-up cut calculation by computing quantiles in one
 go

---
 pyirf/cut_optimization.py         | 48 +++++++++++++++++--------------
 pyirf/cuts.py                     |  8 +++++-
 pyirf/spectral.py                 |  3 +-
 pyirf/tests/test_optimize_cuts.py | 32 +++++++++++++++++++++
 4 files changed, 67 insertions(+), 24 deletions(-)
 create mode 100644 pyirf/tests/test_optimize_cuts.py

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index df4cab015..1cc4e485b 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -80,27 +80,31 @@ def optimize_gh_cut(
     )
 
     sensitivities = []
-    gh_cuts = []
-    for efficiency in tqdm(gh_cut_efficiencies, disable=not progress):
-
-        # calculate necessary percentile needed for
-        # ``calculate_percentile_cut`` with the correct efficiency.
-        # Depends on the operator, since we need to invert the
-        # efficiency if we compare using >=, since percentile is
-        # defines as <=.
-        if op(-1, 1): # if operator behaves like "<=", "<" etc:
-            percentile = 100 * efficiency
-            fill_value = signal['gh_score'].min()
-        else: # operator behaves like ">=", ">"
-            percentile = 100 * (1 - efficiency)
-            fill_value = signal['gh_score'].max()
-
-        gh_cut = calculate_percentile_cut(
-            signal['gh_score'], signal['reco_energy'],
-            bins=reco_energy_bins,
-            fill_value=fill_value, percentile=percentile,
-        )
-        gh_cuts.append(gh_cut)
+    # calculate necessary percentile needed for
+    # ``calculate_percentile_cut`` with the correct efficiency.
+    # Depends on the operator, since we need to invert the
+    # efficiency if we compare using >=, since percentile is
+    # defines as <=.
+    gh_cut_efficiencies = np.asanyarray(gh_cut_efficiencies)
+    if op(-1, 1):
+        # if operator behaves like "<=", "<" etc:
+        percentiles = 100 * gh_cut_efficiencies
+        fill_value = signal['gh_score'].min()
+    else:
+        # operator behaves like ">=", ">"
+        percentiles = 100 * (1 - gh_cut_efficiencies)
+        fill_value = signal['gh_score'].max()
+
+    gh_cuts = calculate_percentile_cut(
+        signal['gh_score'], signal['reco_energy'],
+        bins=reco_energy_bins,
+        fill_value=fill_value,
+        percentile=percentiles,
+    )
+
+    for col in tqdm(range(len(gh_cut_efficiencies)), disable=not progress):
+        gh_cut = gh_cuts.copy()
+        gh_cut["cut"] = gh_cuts["cut"][:, col]
 
         # apply the current cut
         signal_selected = evaluate_binned_cut(
@@ -149,6 +153,6 @@ def optimize_gh_cut(
             best = 0
 
         best_sensitivity[bin_id] = sensitivities[best][bin_id]
-        best_cut_table["cut"][bin_id] = gh_cuts[best]["cut"][bin_id]
+        best_cut_table["cut"][bin_id] = gh_cuts["cut"][bin_id][best]
 
     return best_sensitivity, best_cut_table
diff --git a/pyirf/cuts.py b/pyirf/cuts.py
index 39f467991..677b96890 100644
--- a/pyirf/cuts.py
+++ b/pyirf/cuts.py
@@ -62,12 +62,18 @@ def calculate_percentile_cut(
     bin_index, valid = calculate_bin_indices(bin_values, bins)
     by_bin = table[valid].group_by(bin_index[valid])
 
+    n_bins = len(bins) - 1
     cut_table = QTable()
     cut_table["low"] = bins[:-1]
     cut_table["high"] = bins[1:]
     cut_table["center"] = bin_center(bins)
     cut_table["n_events"] = 0
-    cut_table["cut"] = np.asanyarray(fill_value, values.dtype)
+
+    percentile = np.asanyarray(percentile)
+    if percentile.shape == ():
+        cut_table["cut"] = np.asanyarray(fill_value, values.dtype)
+    else:
+        cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) 
 
     for bin_idx, group in zip(by_bin.groups.keys, by_bin.groups):
         # replace bins with too few events with fill_value
diff --git a/pyirf/spectral.py b/pyirf/spectral.py
index 164ab0990..81ed01f13 100644
--- a/pyirf/spectral.py
+++ b/pyirf/spectral.py
@@ -264,7 +264,8 @@ def __init__(self, normalization, index, e_ref, f, mu, sigma):
     @u.quantity_input(energy=u.TeV)
     def __call__(self, energy):
         power = super().__call__(energy)
-        log10_e = np.log10(energy / self.e_ref)
+        e = (energy / self.e_ref).to_value(u.one)
+        log10_e = np.log10(e)
         # ROOT's TMath::Gauss does not add the normalization
         # this is missing from the IRFDocs
         # the code used for the plot can be found here:
diff --git a/pyirf/tests/test_optimize_cuts.py b/pyirf/tests/test_optimize_cuts.py
new file mode 100644
index 000000000..770dd9cb5
--- /dev/null
+++ b/pyirf/tests/test_optimize_cuts.py
@@ -0,0 +1,32 @@
+import numpy as np
+from astropy.table import QTable
+import astropy.units as u
+
+
+def test_optimize_cuts():
+    from pyirf.cuts import calculate_percentile_cut
+    from pyirf.cut_optimization import optimize_gh_cut
+
+    rng = np.random.default_rng(0)
+
+    n_signal = 1000
+    n_background = 10000
+
+    signal = QTable({
+        "reco_energy": rng.uniform(1.0, 10.0, n_signal) * u.TeV,
+        "theta": rng.uniform(0.0, 0.5, n_signal) * u.deg,
+        "gh_score": np.clip(rng.normal(0.7, 0.4, n_signal), 0, 1),
+    })
+
+    background = QTable({
+        "reco_energy": rng.uniform(1.0, 10.0, n_background) * u.TeV,
+        "theta": rng.uniform(0.0, 0.5, n_background) * u.deg,
+        "gh_score": np.clip(rng.normal(0.2, 0.3, n_background), 0, 1),
+        "reco_source_fov_offset": rng.uniform(0, 1, n_background) * u.deg,
+    })
+
+
+    e_reco_bins = np.linspace(1.0, 10.0, 5) * u.TeV
+    theta_cuts = calculate_percentile_cut(signal["theta"], signal["reco_energy"], e_reco_bins, fill_value=1 * u.deg)
+
+    sensitivity, cuts = optimize_gh_cut(signal, background, e_reco_bins, [0.5, 0.8, 0.9], theta_cuts)

From 67188b13e1831fc8032e39dce216060cc7016a39 Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Fri, 18 Nov 2022 17:13:31 +0100
Subject: [PATCH 02/12] Use tqdm.auto

---
 pyirf/cut_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index 1cc4e485b..0fcd8c8e8 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -1,7 +1,7 @@
 import numpy as np
 from astropy.table import QTable
 import astropy.units as u
-from tqdm import tqdm
+from tqdm.auto import tqdm
 import operator
 
 from .cuts import evaluate_binned_cut, calculate_percentile_cut

From db2701166858f8cba63e06f1d11d0d8c6b671ef1 Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Fri, 18 Nov 2022 17:23:50 +0100
Subject: [PATCH 03/12] Store best efficiency

---
 pyirf/cut_optimization.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index 0fcd8c8e8..7d369166a 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -140,6 +140,7 @@ def optimize_gh_cut(
     best_cut_table["center"] = bin_center(reco_energy_bins)
     best_cut_table["high"] = reco_energy_bins[1:]
     best_cut_table["cut"] = np.nan
+    best_cut_table["efficiency"] = np.nan
 
     best_sensitivity = sensitivities[0].copy()
     for bin_id in range(len(reco_energy_bins) - 1):
@@ -154,5 +155,6 @@ def optimize_gh_cut(
 
         best_sensitivity[bin_id] = sensitivities[best][bin_id]
         best_cut_table["cut"][bin_id] = gh_cuts["cut"][bin_id][best]
+        best_cut_table["efficiency"][bin_id] = gh_cut_efficiencies[best]
 
     return best_sensitivity, best_cut_table

From 3eb02217659dcddddce9263529a43a6db8a8503b Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Fri, 18 Nov 2022 19:44:48 +0100
Subject: [PATCH 04/12] Add full cut optimization

---
 pyirf/cut_optimization.py         | 184 +++++++++++++++++++++++++++++-
 pyirf/cuts.py                     |   8 ++
 pyirf/tests/test_optimize_cuts.py |  43 ++++++-
 3 files changed, 227 insertions(+), 8 deletions(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index 7d369166a..f47dbc524 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -1,3 +1,4 @@
+from itertools import product
 import numpy as np
 from astropy.table import QTable
 import astropy.units as u
@@ -14,6 +15,183 @@
 ]
 
 
+def optimize_cuts(
+    signal,
+    background,
+    reco_energy_bins,
+    multiplicity_cuts,
+    gh_cut_efficiencies,
+    theta_cut_efficiencies,
+    fov_offset_min=0 * u.deg,
+    fov_offset_max=1 * u.deg,
+    alpha=1.0,
+    progress=True,
+    **kwargs
+):
+    """
+    Optimize the gamma/hadronnes, theta and multiplicity cut in every energy bin of reconstructed energy
+    for best sensitivity.
+
+    Parameters
+    ----------
+    signal: astropy.table.QTable
+        event list of simulated signal events.
+        Required columns are `theta`, `reco_energy`, 'weight', `gh_score`
+        No directional (theta) or gamma/hadron cut should already be applied.
+    background: astropy.table.QTable
+        event list of simulated background events.
+        Required columns are `reco_source_fov_offset`, `reco_energy`,
+        'weight', `gh_score`.
+        No directional (theta) or gamma/hadron cut should already be applied.
+    reco_energy_bins: astropy.units.Quantity[energy]
+        Bins in reconstructed energy to use for sensitivity computation
+    gh_cut_efficiencies: np.ndarray[float, ndim=1]
+        The cut efficiencies to scan for best sensitivity.
+    theta_cuts: astropy.table.QTable
+        cut definition of the energy dependent theta cut,
+        e.g. as created by ``calculate_percentile_cut``
+    op: comparison function with signature f(a, b) -> bool
+        The comparison function to use for the gamma hadron score.
+        Returning true means an event passes the cut, so is not discarded.
+        E.g. for gammaness-like score, use `operator.ge` (>=) and for a
+        hadroness-like score use `operator.le` (<=).
+    fov_offset_min: astropy.units.Quantity[angle]
+        Minimum distance from the fov center for background events to be taken into account
+    fov_offset_max: astropy.units.Quantity[angle]
+        Maximum distance from the fov center for background events to be taken into account
+    alpha: float
+        Size ratio of off region / on region. Will be used to
+        scale the background rate.
+    progress: bool
+        If True, show a progress bar during cut optimization
+    **kwargs are passed to ``calculate_sensitivity``
+    """
+    gh_cut_efficiencies = np.asanyarray(gh_cut_efficiencies)
+    gh_cut_percentiles = 100 * (1 - gh_cut_efficiencies)
+    fill_value = signal['gh_score'].max()
+
+
+    sensitivities = []
+    cut_indicies = []
+    n_theta_cuts = len(theta_cut_efficiencies)
+    n_gh_cuts = len(gh_cut_efficiencies)
+    n_cuts = len(multiplicity_cuts) * n_theta_cuts * n_gh_cuts
+
+    with tqdm(total=n_cuts, disable=not progress) as bar:
+        for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts):
+
+            signal_mask_multiplicity = signal['multiplicity'] >= multiplicity_cut
+            background_mask_multiplicity = background["multiplicity"] >= multiplicity_cut
+
+            gh_cuts = calculate_percentile_cut(
+                signal['gh_score'][signal_mask_multiplicity],
+                signal['reco_energy'][signal_mask_multiplicity],
+                bins=reco_energy_bins,
+                fill_value=fill_value,
+                percentile=gh_cut_percentiles,
+            )
+
+            theta_cuts = calculate_percentile_cut(
+                signal['theta'][signal_mask_multiplicity],
+                signal['reco_energy'][signal_mask_multiplicity],
+                bins=reco_energy_bins,
+                fill_value=0.3 * u.deg,
+                max_value=0.3 * u.deg,
+                min_value=0.02 * u.deg,
+                percentile=100 * theta_cut_efficiencies,
+            )
+
+
+            for gh_index, theta_index in product(range(n_gh_cuts), range(n_theta_cuts)):
+
+                # apply the current cuts
+                theta_cut = theta_cuts.copy()
+                theta_cut["cut"] = theta_cuts["cut"][:, theta_index]
+                signal_mask_theta = evaluate_binned_cut(
+                    signal["theta"], signal["reco_energy"], theta_cut, operator.le,
+                )
+
+                gh_cut = gh_cuts.copy()
+                gh_cut["cut"] = gh_cuts["cut"][:, gh_index]
+                signal_mask_gh = evaluate_binned_cut(
+                    signal["gh_score"], signal["reco_energy"], gh_cut, operator.ge,
+                )
+
+                signal_selected = signal_mask_gh & signal_mask_theta & signal_mask_multiplicity
+
+                background_mask_gh = evaluate_binned_cut(
+                    background["gh_score"], background["reco_energy"], gh_cut, operator.ge,
+                )
+                background_selected = background_mask_gh & background_mask_multiplicity
+
+                # create the histograms
+                signal_hist = create_histogram_table(
+                    signal[signal_selected], reco_energy_bins, "reco_energy"
+                )
+
+                background_hist = estimate_background(
+                    events=background[background_selected],
+                    reco_energy_bins=reco_energy_bins,
+                    theta_cuts=theta_cut,
+                    alpha=alpha,
+                    fov_offset_min=fov_offset_min,
+                    fov_offset_max=fov_offset_max,
+                )
+
+                sensitivity = calculate_sensitivity(
+                    signal_hist, background_hist, alpha=alpha,
+                    **kwargs,
+                )
+                cut_indicies.append((multiplicity_index, theta_index, gh_index))
+                sensitivities.append(sensitivity)
+                bar.update(1)
+
+    best_gh_cut = QTable()
+    best_gh_cut["low"] = reco_energy_bins[0:-1]
+    best_gh_cut["center"] = bin_center(reco_energy_bins)
+    best_gh_cut["high"] = reco_energy_bins[1:]
+    best_gh_cut["cut"] = np.nan
+    best_gh_cut["efficiency"] = np.nan
+
+    best_multiplicity_cut = QTable()
+    best_multiplicity_cut["low"] = reco_energy_bins[0:-1]
+    best_multiplicity_cut["center"] = bin_center(reco_energy_bins)
+    best_multiplicity_cut["high"] = reco_energy_bins[1:]
+    best_multiplicity_cut["cut"] = np.nan
+
+    best_theta_cut = QTable()
+    best_theta_cut["low"] = reco_energy_bins[0:-1]
+    best_theta_cut["center"] = bin_center(reco_energy_bins)
+    best_theta_cut["high"] = reco_energy_bins[1:]
+    best_theta_cut["cut"] = np.nan
+    best_theta_cut["cut"].unit = theta_cuts["cut"].unit
+    best_theta_cut["efficiency"] = np.nan
+
+    best_sensitivity = sensitivities[0].copy()
+    for bin_id in range(len(reco_energy_bins) - 1):
+        sensitivities_bin = [s["relative_sensitivity"][bin_id] for s in sensitivities]
+
+        if not np.all(np.isnan(sensitivities_bin)):
+            # nanargmin won't return the index of nan entries
+            best = np.nanargmin(sensitivities_bin)
+        else:
+            # if all are invalid, just use the first one
+            best = 0
+        
+        multiplicity_index, theta_index, gh_index = cut_indicies[best]
+
+        best_sensitivity[bin_id] = sensitivities[best][bin_id]
+
+        best_gh_cut["cut"][bin_id] = gh_cuts["cut"][bin_id][gh_index]
+        best_multiplicity_cut["cut"][bin_id] = multiplicity_cuts[multiplicity_index]
+        best_theta_cut["cut"][bin_id] = theta_cuts["cut"][bin_id][theta_index]
+
+        best_gh_cut["efficiency"][bin_id] = gh_cut_efficiencies[gh_index]
+        best_theta_cut["efficiency"][bin_id] = theta_cut_efficiencies[theta_index]
+
+    return best_sensitivity, best_multiplicity_cut, best_theta_cut, best_gh_cut
+
+
 def optimize_gh_cut(
     signal,
     background,
@@ -28,9 +206,6 @@ def optimize_gh_cut(
     **kwargs
 ):
     """
-    Optimize the gh-score cut in every energy bin of reconstructed energy
-    for best sensitivity.
-
     This procedure is EventDisplay-like, since it only applies a
     pre-computed theta cut and then optimizes only the gamma/hadron separation
     cut.
@@ -50,9 +225,6 @@ def optimize_gh_cut(
         Bins in reconstructed energy to use for sensitivity computation
     gh_cut_efficiencies: np.ndarray[float, ndim=1]
         The cut efficiencies to scan for best sensitivity.
-    theta_cuts: astropy.table.QTable
-        cut definition of the energy dependent theta cut,
-        e.g. as created by ``calculate_percentile_cut``
     op: comparison function with signature f(a, b) -> bool
         The comparison function to use for the gamma hadron score.
         Returning true means an event passes the cut, so is not discarded.
diff --git a/pyirf/cuts.py b/pyirf/cuts.py
index 677b96890..308741c31 100644
--- a/pyirf/cuts.py
+++ b/pyirf/cuts.py
@@ -69,12 +69,20 @@ def calculate_percentile_cut(
     cut_table["center"] = bin_center(bins)
     cut_table["n_events"] = 0
 
+    unit = None
+    if hasattr(fill_value, 'unit'):
+        unit = fill_value.unit
+        fill_value = fill_value.value
+
     percentile = np.asanyarray(percentile)
     if percentile.shape == ():
         cut_table["cut"] = np.asanyarray(fill_value, values.dtype)
     else:
         cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) 
 
+    if unit is not None:
+        cut_table["cut"].unit = unit
+
     for bin_idx, group in zip(by_bin.groups.keys, by_bin.groups):
         # replace bins with too few events with fill_value
         n_events = len(group)
diff --git a/pyirf/tests/test_optimize_cuts.py b/pyirf/tests/test_optimize_cuts.py
index 770dd9cb5..6784209c6 100644
--- a/pyirf/tests/test_optimize_cuts.py
+++ b/pyirf/tests/test_optimize_cuts.py
@@ -3,7 +3,7 @@
 import astropy.units as u
 
 
-def test_optimize_cuts():
+def test_optimize_gh_cuts():
     from pyirf.cuts import calculate_percentile_cut
     from pyirf.cut_optimization import optimize_gh_cut
 
@@ -29,4 +29,43 @@ def test_optimize_cuts():
     e_reco_bins = np.linspace(1.0, 10.0, 5) * u.TeV
     theta_cuts = calculate_percentile_cut(signal["theta"], signal["reco_energy"], e_reco_bins, fill_value=1 * u.deg)
 
-    sensitivity, cuts = optimize_gh_cut(signal, background, e_reco_bins, [0.5, 0.8, 0.9], theta_cuts)
+    optimize_gh_cut(signal, background, e_reco_bins, [0.5, 0.8, 0.9], theta_cuts)
+
+
+def test_optimize_cuts():
+    from pyirf.cut_optimization import optimize_cuts
+
+    rng = np.random.default_rng(0)
+
+    n_signal = 3000
+    n_background = 10000
+
+    signal = QTable({
+        "reco_energy": rng.uniform(1.0, 10.0, n_signal) * u.TeV,
+        "theta": rng.uniform(0.0, 0.5, n_signal) * u.deg,
+        "gh_score": np.clip(rng.normal(0.7, 0.4, n_signal), 0, 1),
+        "multiplicity": rng.integers(2, 15, n_signal),
+        "weight": rng.uniform(0.5, 2, n_signal),
+    })
+
+    background = QTable({
+        "reco_energy": rng.uniform(1.0, 10.0, n_background) * u.TeV,
+        "theta": rng.uniform(0.0, 0.5, n_background) * u.deg,
+        "gh_score": np.clip(rng.normal(0.2, 0.3, n_background), 0, 1),
+        "reco_source_fov_offset": rng.uniform(0, 1, n_background) * u.deg,
+        "multiplicity": rng.integers(2, 15, n_background),
+        "weight": rng.uniform(0.5, 2, n_background),
+    })
+
+
+    e_reco_bins = np.linspace(1.0, 10.0, 5) * u.TeV
+
+    optimize_cuts(
+        signal,
+        background,
+        e_reco_bins,
+        multiplicity_cuts=[2, 3, 4, 5],
+        gh_cut_efficiencies=[0.5, 0.8, 0.9],
+        theta_cut_efficiencies=[0.68, 0.75],
+        alpha=0.2,
+    )

From c4640e99a197bc3453bff499c5c18c4b1b876770 Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Sun, 20 Nov 2022 15:21:39 +0100
Subject: [PATCH 05/12] Fix fill_value unit error

---
 pyirf/cuts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyirf/cuts.py b/pyirf/cuts.py
index 308741c31..22041eda4 100644
--- a/pyirf/cuts.py
+++ b/pyirf/cuts.py
@@ -89,7 +89,7 @@ def calculate_percentile_cut(
         cut_table["n_events"][bin_idx] = n_events
 
         if n_events < min_events:
-            cut_table["cut"][bin_idx] = fill_value
+            cut_table["cut"].value[bin_idx] = fill_value
         else:
             value = np.nanpercentile(group["values"], percentile)
             if min_value is not None or max_value is not None:

From 5bc7488a7eb2864671004e2df2de1c1459c2767c Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Sun, 20 Nov 2022 18:11:32 +0100
Subject: [PATCH 06/12] Fix setuptools to < 65.6 to fix distutils import error

---
 pyproject.toml | 2 +-
 setup.py       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 51f3b9859..ee44d66c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,4 +32,4 @@ build-backend = "setuptools.build_meta"
 
     [[tool.towncrier.section]]
         name = ""
-        path = ""
\ No newline at end of file
+        path = ""
diff --git a/setup.py b/setup.py
index f0c38b1c2..add71cfd1 100644
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,7 @@
         "matplotlib",
         "numpy>=1.18",
         "scipy",
+        "setuptools<65.6",
         "tqdm",
         "importlib_resources ; python_version < '3.9'"
     ],

From 46ec05fb4aa1cbb4ea9984592ed8718e3b639dbe Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Fri, 14 Jul 2023 11:13:13 +0200
Subject: [PATCH 07/12] Avoid recomputing bin indices

---
 pyirf/cut_optimization.py | 23 +++++++++++++++--------
 pyirf/cuts.py             | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index f47dbc524..e8cab71db 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -5,9 +5,9 @@
 from tqdm.auto import tqdm
 import operator
 
-from .cuts import evaluate_binned_cut, calculate_percentile_cut
+from .cuts import evaluate_binned_cut_by_index, calculate_percentile_cut
 from .sensitivity import calculate_sensitivity, estimate_background
-from .binning import create_histogram_table, bin_center
+from .binning import create_histogram_table, bin_center, calculate_bin_indices
 
 
 __all__ = [
@@ -77,6 +77,13 @@ def optimize_cuts(
     n_gh_cuts = len(gh_cut_efficiencies)
     n_cuts = len(multiplicity_cuts) * n_theta_cuts * n_gh_cuts
 
+    signal_bin_index, signal_valid = calculate_bin_indices(
+        signal['reco_energy'], reco_energy_bins,
+    )
+    background_bin_index, background_valid = calculate_bin_indices(
+        background['reco_energy'], reco_energy_bins,
+    )
+
     with tqdm(total=n_cuts, disable=not progress) as bar:
         for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts):
 
@@ -107,20 +114,20 @@ def optimize_cuts(
                 # apply the current cuts
                 theta_cut = theta_cuts.copy()
                 theta_cut["cut"] = theta_cuts["cut"][:, theta_index]
-                signal_mask_theta = evaluate_binned_cut(
-                    signal["theta"], signal["reco_energy"], theta_cut, operator.le,
+                signal_mask_theta = evaluate_binned_cut_by_index(
+                    signal["theta"], signal_bin_index, signal_valid, theta_cut, operator.le,
                 )
 
                 gh_cut = gh_cuts.copy()
                 gh_cut["cut"] = gh_cuts["cut"][:, gh_index]
-                signal_mask_gh = evaluate_binned_cut(
-                    signal["gh_score"], signal["reco_energy"], gh_cut, operator.ge,
+                signal_mask_gh = evaluate_binned_cut_by_index(
+                    signal["gh_score"], signal_bin_index, signal_valid, gh_cut, operator.ge,
                 )
 
                 signal_selected = signal_mask_gh & signal_mask_theta & signal_mask_multiplicity
 
-                background_mask_gh = evaluate_binned_cut(
-                    background["gh_score"], background["reco_energy"], gh_cut, operator.ge,
+                background_mask_gh = evaluate_binned_cut_by_index(
+                    background["gh_score"], background_bin_index, background_valid, gh_cut, operator.ge,
                 )
                 background_selected = background_mask_gh & background_mask_multiplicity
 
diff --git a/pyirf/cuts.py b/pyirf/cuts.py
index 22041eda4..c81b61a4e 100644
--- a/pyirf/cuts.py
+++ b/pyirf/cuts.py
@@ -107,6 +107,37 @@ def calculate_percentile_cut(
     return cut_table
 
 
+def evaluate_binned_cut_by_index(values, bin_index, valid, cut_table, op):
+    """
+    Evaluate a binned cut as defined in cut_table on given events.
+
+    Events with bin_values outside the bin edges defined in cut table
+    will be set to False.
+
+    Parameters
+    ----------
+    values: ``~numpy.ndarray`` or ``~astropy.units.Quantity``
+        The values on which the cut should be evaluated
+    cut_table: ``~astropy.table.Table``
+        A table describing the binned cuts, e.g. as created by
+        ``~pyirf.cuts.calculate_percentile_cut``.
+        Required columns:
+        - `low`: lower edges of the bins
+        - `high`: upper edges of the bins,
+        - `cut`: cut value
+    op: callable(a, b) -> bool
+        A function taking two arguments, comparing element-wise and
+        returning an array of booleans.
+        Must support vectorized application.
+    """
+    if not isinstance(cut_table, QTable):
+        raise ValueError('cut_table needs to be an astropy.table.QTable')
+
+    result = np.zeros(len(bin_index), dtype=bool)
+    result[valid] = op(values[valid], cut_table["cut"][bin_index[valid]])
+    return result
+
+
 def evaluate_binned_cut(values, bin_values, cut_table, op):
     """
     Evaluate a binned cut as defined in cut_table on given events.
@@ -137,10 +168,7 @@ def evaluate_binned_cut(values, bin_values, cut_table, op):
 
     bins = np.append(cut_table["low"], cut_table["high"][-1])
     bin_index, valid = calculate_bin_indices(bin_values, bins)
-
-    result = np.zeros(len(values), dtype=bool)
-    result[valid] = op(values[valid], cut_table["cut"][bin_index[valid]])
-    return result
+    return evaluate_binned_cut_by_index(values, bin_index, valid, cut_table, op)
 
 
 def compare_irf_cuts(cuts):

From 9bef4fed0c2fdc253da0397919b6fc2f4c071201 Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Fri, 14 Jul 2023 11:42:18 +0200
Subject: [PATCH 08/12] Fix import

---
 pyirf/cut_optimization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index e8cab71db..2d99ac0da 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -5,7 +5,8 @@
 from tqdm.auto import tqdm
 import operator
 
-from .cuts import evaluate_binned_cut_by_index, calculate_percentile_cut
+
+from .cuts import evaluate_binned_cut_by_index, calculate_percentile_cut, evaluate_binned_cut
 from .sensitivity import calculate_sensitivity, estimate_background
 from .binning import create_histogram_table, bin_center, calculate_bin_indices
 

From 3358d03ea8f890aa47744c6939e0dbbf81d1ddc2 Mon Sep 17 00:00:00 2001
From: JBernete <jbernete@ucm.es>
Date: Thu, 12 Dec 2024 15:22:21 +0100
Subject: [PATCH 09/12] Fix a bug in optimize_cuts where looping over a new
 multiplicity cut was overwriting the gh and theta cuts, instead of saving
 them separately.

---
 pyirf/cut_optimization.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index 2d99ac0da..e6e2aeeb9 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -86,6 +86,8 @@ def optimize_cuts(
     )
 
     with tqdm(total=n_cuts, disable=not progress) as bar:
+        all_the_gh_cuts = []
+        all_the_theta_cuts = []
         for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts):
 
             signal_mask_multiplicity = signal['multiplicity'] >= multiplicity_cut
@@ -98,6 +100,7 @@ def optimize_cuts(
                 fill_value=fill_value,
                 percentile=gh_cut_percentiles,
             )
+            all_the_gh_cuts.append(gh_cuts)
 
             theta_cuts = calculate_percentile_cut(
                 signal['theta'][signal_mask_multiplicity],
@@ -108,6 +111,7 @@ def optimize_cuts(
                 min_value=0.02 * u.deg,
                 percentile=100 * theta_cut_efficiencies,
             )
+            all_the_theta_cuts.append(theta_cuts)
 
 
             for gh_index, theta_index in product(range(n_gh_cuts), range(n_theta_cuts)):
@@ -190,9 +194,9 @@ def optimize_cuts(
 
         best_sensitivity[bin_id] = sensitivities[best][bin_id]
 
-        best_gh_cut["cut"][bin_id] = gh_cuts["cut"][bin_id][gh_index]
+        best_gh_cut["cut"][bin_id] = all_the_gh_cuts[multiplicity_index]["cut"][bin_id][gh_index]
         best_multiplicity_cut["cut"][bin_id] = multiplicity_cuts[multiplicity_index]
-        best_theta_cut["cut"][bin_id] = theta_cuts["cut"][bin_id][theta_index]
+        best_theta_cut["cut"][bin_id] = all_the_theta_cuts[multiplicity_index]["cut"][bin_id][theta_index]
 
         best_gh_cut["efficiency"][bin_id] = gh_cut_efficiencies[gh_index]
         best_theta_cut["efficiency"][bin_id] = theta_cut_efficiencies[theta_index]

From 16b6e2fecad345a50970150aa5dc414f4e33b89e Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Wed, 18 Dec 2024 12:32:11 +0100
Subject: [PATCH 10/12] Rename variable

---
 pyirf/cut_optimization.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index e6e2aeeb9..9df954ff3 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -85,9 +85,10 @@ def optimize_cuts(
         background['reco_energy'], reco_energy_bins,
     )
 
+    gh_cut_grid = []
+    theta_cut_grid = []
+
     with tqdm(total=n_cuts, disable=not progress) as bar:
-        all_the_gh_cuts = []
-        all_the_theta_cuts = []
         for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts):
 
             signal_mask_multiplicity = signal['multiplicity'] >= multiplicity_cut
@@ -100,7 +101,7 @@ def optimize_cuts(
                 fill_value=fill_value,
                 percentile=gh_cut_percentiles,
             )
-            all_the_gh_cuts.append(gh_cuts)
+            gh_cut_grid.append(gh_cuts)
 
             theta_cuts = calculate_percentile_cut(
                 signal['theta'][signal_mask_multiplicity],
@@ -111,7 +112,7 @@ def optimize_cuts(
                 min_value=0.02 * u.deg,
                 percentile=100 * theta_cut_efficiencies,
             )
-            all_the_theta_cuts.append(theta_cuts)
+            theta_cut_grid.append(theta_cuts)
 
 
             for gh_index, theta_index in product(range(n_gh_cuts), range(n_theta_cuts)):
@@ -194,9 +195,9 @@ def optimize_cuts(
 
         best_sensitivity[bin_id] = sensitivities[best][bin_id]
 
-        best_gh_cut["cut"][bin_id] = all_the_gh_cuts[multiplicity_index]["cut"][bin_id][gh_index]
+        best_gh_cut["cut"][bin_id] = gh_cut_grid[multiplicity_index]["cut"][bin_id][gh_index]
         best_multiplicity_cut["cut"][bin_id] = multiplicity_cuts[multiplicity_index]
-        best_theta_cut["cut"][bin_id] = all_the_theta_cuts[multiplicity_index]["cut"][bin_id][theta_index]
+        best_theta_cut["cut"][bin_id] = theta_cut_grid[multiplicity_index]["cut"][bin_id][theta_index]
 
         best_gh_cut["efficiency"][bin_id] = gh_cut_efficiencies[gh_index]
         best_theta_cut["efficiency"][bin_id] = theta_cut_efficiencies[theta_index]

From f3902222bda0977e9832129df928075183991fe6 Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Wed, 18 Dec 2024 12:38:09 +0100
Subject: [PATCH 11/12] Add changelog

---
 docs/changes/204.feature.rst | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 docs/changes/204.feature.rst

diff --git a/docs/changes/204.feature.rst b/docs/changes/204.feature.rst
new file mode 100644
index 000000000..62dfb1ef5
--- /dev/null
+++ b/docs/changes/204.feature.rst
@@ -0,0 +1,3 @@
+Add function ``pyirf.cut_optimization.optimize_cuts`` doing
+a full grid search on gammaness, theta and multiplicity cut
+to find best point source sensitivity.

From d4ac7c57c4e9ee9af01c2e533fc1c299e1020130 Mon Sep 17 00:00:00 2001
From: Maximilian Linhoff <maximilian.linhoff@tu-dortmund.de>
Date: Wed, 18 Dec 2024 13:04:38 +0100
Subject: [PATCH 12/12] Fix codacy complaints

---
 pyirf/cut_optimization.py | 2 +-
 pyirf/cuts.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py
index 9df954ff3..7b42ee3da 100644
--- a/pyirf/cut_optimization.py
+++ b/pyirf/cut_optimization.py
@@ -190,7 +190,7 @@ def optimize_cuts(
         else:
             # if all are invalid, just use the first one
             best = 0
-        
+
         multiplicity_index, theta_index, gh_index = cut_indicies[best]
 
         best_sensitivity[bin_id] = sensitivities[best][bin_id]
diff --git a/pyirf/cuts.py b/pyirf/cuts.py
index 8422059a7..c3694127f 100644
--- a/pyirf/cuts.py
+++ b/pyirf/cuts.py
@@ -86,7 +86,7 @@ def calculate_percentile_cut(
     if percentile.shape == ():
         cut_table["cut"] = np.asanyarray(fill_value, values.dtype)
     else:
-        cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) 
+        cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype)
 
     if unit is not None:
         cut_table["cut"].unit = unit