From caa226bd4ee39945cd458a331923a95a687b3b8d Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 18 Nov 2022 17:07:02 +0100 Subject: [PATCH 01/12] Speed-up cut calculation by computing quantiles in one go --- pyirf/cut_optimization.py | 48 +++++++++++++++++-------------- pyirf/cuts.py | 8 +++++- pyirf/spectral.py | 3 +- pyirf/tests/test_optimize_cuts.py | 32 +++++++++++++++++++++ 4 files changed, 67 insertions(+), 24 deletions(-) create mode 100644 pyirf/tests/test_optimize_cuts.py diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index df4cab015..1cc4e485b 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -80,27 +80,31 @@ def optimize_gh_cut( ) sensitivities = [] - gh_cuts = [] - for efficiency in tqdm(gh_cut_efficiencies, disable=not progress): - - # calculate necessary percentile needed for - # ``calculate_percentile_cut`` with the correct efficiency. - # Depends on the operator, since we need to invert the - # efficiency if we compare using >=, since percentile is - # defines as <=. - if op(-1, 1): # if operator behaves like "<=", "<" etc: - percentile = 100 * efficiency - fill_value = signal['gh_score'].min() - else: # operator behaves like ">=", ">" - percentile = 100 * (1 - efficiency) - fill_value = signal['gh_score'].max() - - gh_cut = calculate_percentile_cut( - signal['gh_score'], signal['reco_energy'], - bins=reco_energy_bins, - fill_value=fill_value, percentile=percentile, - ) - gh_cuts.append(gh_cut) + # calculate necessary percentile needed for + # ``calculate_percentile_cut`` with the correct efficiency. + # Depends on the operator, since we need to invert the + # efficiency if we compare using >=, since percentile is + # defines as <=. + gh_cut_efficiencies = np.asanyarray(gh_cut_efficiencies) + if op(-1, 1): + # if operator behaves like "<=", "<" etc: + percentiles = 100 * gh_cut_efficiencies + fill_value = signal['gh_score'].min() + else: + # operator behaves like ">=", ">" + percentiles = 100 * (1 - gh_cut_efficiencies) + fill_value = signal['gh_score'].max() + + gh_cuts = calculate_percentile_cut( + signal['gh_score'], signal['reco_energy'], + bins=reco_energy_bins, + fill_value=fill_value, + percentile=percentiles, + ) + + for col in tqdm(range(len(gh_cut_efficiencies)), disable=not progress): + gh_cut = gh_cuts.copy() + gh_cut["cut"] = gh_cuts["cut"][:, col] # apply the current cut signal_selected = evaluate_binned_cut( @@ -149,6 +153,6 @@ def optimize_gh_cut( best = 0 best_sensitivity[bin_id] = sensitivities[best][bin_id] - best_cut_table["cut"][bin_id] = gh_cuts[best]["cut"][bin_id] + best_cut_table["cut"][bin_id] = gh_cuts["cut"][bin_id][best] return best_sensitivity, best_cut_table diff --git a/pyirf/cuts.py b/pyirf/cuts.py index 39f467991..677b96890 100644 --- a/pyirf/cuts.py +++ b/pyirf/cuts.py @@ -62,12 +62,18 @@ def calculate_percentile_cut( bin_index, valid = calculate_bin_indices(bin_values, bins) by_bin = table[valid].group_by(bin_index[valid]) + n_bins = len(bins) - 1 cut_table = QTable() cut_table["low"] = bins[:-1] cut_table["high"] = bins[1:] cut_table["center"] = bin_center(bins) cut_table["n_events"] = 0 - cut_table["cut"] = np.asanyarray(fill_value, values.dtype) + + percentile = np.asanyarray(percentile) + if percentile.shape == (): + cut_table["cut"] = np.asanyarray(fill_value, values.dtype) + else: + cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) for bin_idx, group in zip(by_bin.groups.keys, by_bin.groups): # replace bins with too few events with fill_value diff --git a/pyirf/spectral.py b/pyirf/spectral.py index 164ab0990..81ed01f13 100644 --- a/pyirf/spectral.py +++ b/pyirf/spectral.py @@ -264,7 +264,8 @@ def __init__(self, normalization, index, e_ref, f, mu, sigma): @u.quantity_input(energy=u.TeV) def __call__(self, energy): power = super().__call__(energy) - log10_e = np.log10(energy / self.e_ref) + e = (energy / self.e_ref).to_value(u.one) + log10_e = np.log10(e) # ROOT's TMath::Gauss does not add the normalization # this is missing from the IRFDocs # the code used for the plot can be found here: diff --git a/pyirf/tests/test_optimize_cuts.py b/pyirf/tests/test_optimize_cuts.py new file mode 100644 index 000000000..770dd9cb5 --- /dev/null +++ b/pyirf/tests/test_optimize_cuts.py @@ -0,0 +1,32 @@ +import numpy as np +from astropy.table import QTable +import astropy.units as u + + +def test_optimize_cuts(): + from pyirf.cuts import calculate_percentile_cut + from pyirf.cut_optimization import optimize_gh_cut + + rng = np.random.default_rng(0) + + n_signal = 1000 + n_background = 10000 + + signal = QTable({ + "reco_energy": rng.uniform(1.0, 10.0, n_signal) * u.TeV, + "theta": rng.uniform(0.0, 0.5, n_signal) * u.deg, + "gh_score": np.clip(rng.normal(0.7, 0.4, n_signal), 0, 1), + }) + + background = QTable({ + "reco_energy": rng.uniform(1.0, 10.0, n_background) * u.TeV, + "theta": rng.uniform(0.0, 0.5, n_background) * u.deg, + "gh_score": np.clip(rng.normal(0.2, 0.3, n_background), 0, 1), + "reco_source_fov_offset": rng.uniform(0, 1, n_background) * u.deg, + }) + + + e_reco_bins = np.linspace(1.0, 10.0, 5) * u.TeV + theta_cuts = calculate_percentile_cut(signal["theta"], signal["reco_energy"], e_reco_bins, fill_value=1 * u.deg) + + sensitivity, cuts = optimize_gh_cut(signal, background, e_reco_bins, [0.5, 0.8, 0.9], theta_cuts) From 67188b13e1831fc8032e39dce216060cc7016a39 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 18 Nov 2022 17:13:31 +0100 Subject: [PATCH 02/12] Use tqdm.auto --- pyirf/cut_optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index 1cc4e485b..0fcd8c8e8 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -1,7 +1,7 @@ import numpy as np from astropy.table import QTable import astropy.units as u -from tqdm import tqdm +from tqdm.auto import tqdm import operator from .cuts import evaluate_binned_cut, calculate_percentile_cut From db2701166858f8cba63e06f1d11d0d8c6b671ef1 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 18 Nov 2022 17:23:50 +0100 Subject: [PATCH 03/12] Store best efficiency --- pyirf/cut_optimization.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index 0fcd8c8e8..7d369166a 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -140,6 +140,7 @@ def optimize_gh_cut( best_cut_table["center"] = bin_center(reco_energy_bins) best_cut_table["high"] = reco_energy_bins[1:] best_cut_table["cut"] = np.nan + best_cut_table["efficiency"] = np.nan best_sensitivity = sensitivities[0].copy() for bin_id in range(len(reco_energy_bins) - 1): @@ -154,5 +155,6 @@ def optimize_gh_cut( best_sensitivity[bin_id] = sensitivities[best][bin_id] best_cut_table["cut"][bin_id] = gh_cuts["cut"][bin_id][best] + best_cut_table["efficiency"][bin_id] = gh_cut_efficiencies[best] return best_sensitivity, best_cut_table From 3eb02217659dcddddce9263529a43a6db8a8503b Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 18 Nov 2022 19:44:48 +0100 Subject: [PATCH 04/12] Add full cut optimization --- pyirf/cut_optimization.py | 184 +++++++++++++++++++++++++++++- pyirf/cuts.py | 8 ++ pyirf/tests/test_optimize_cuts.py | 43 ++++++- 3 files changed, 227 insertions(+), 8 deletions(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index 7d369166a..f47dbc524 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -1,3 +1,4 @@ +from itertools import product import numpy as np from astropy.table import QTable import astropy.units as u @@ -14,6 +15,183 @@ ] +def optimize_cuts( + signal, + background, + reco_energy_bins, + multiplicity_cuts, + gh_cut_efficiencies, + theta_cut_efficiencies, + fov_offset_min=0 * u.deg, + fov_offset_max=1 * u.deg, + alpha=1.0, + progress=True, + **kwargs +): + """ + Optimize the gamma/hadronnes, theta and multiplicity cut in every energy bin of reconstructed energy + for best sensitivity. + + Parameters + ---------- + signal: astropy.table.QTable + event list of simulated signal events. + Required columns are `theta`, `reco_energy`, 'weight', `gh_score` + No directional (theta) or gamma/hadron cut should already be applied. + background: astropy.table.QTable + event list of simulated background events. + Required columns are `reco_source_fov_offset`, `reco_energy`, + 'weight', `gh_score`. + No directional (theta) or gamma/hadron cut should already be applied. + reco_energy_bins: astropy.units.Quantity[energy] + Bins in reconstructed energy to use for sensitivity computation + gh_cut_efficiencies: np.ndarray[float, ndim=1] + The cut efficiencies to scan for best sensitivity. + theta_cuts: astropy.table.QTable + cut definition of the energy dependent theta cut, + e.g. as created by ``calculate_percentile_cut`` + op: comparison function with signature f(a, b) -> bool + The comparison function to use for the gamma hadron score. + Returning true means an event passes the cut, so is not discarded. + E.g. for gammaness-like score, use `operator.ge` (>=) and for a + hadroness-like score use `operator.le` (<=). + fov_offset_min: astropy.units.Quantity[angle] + Minimum distance from the fov center for background events to be taken into account + fov_offset_max: astropy.units.Quantity[angle] + Maximum distance from the fov center for background events to be taken into account + alpha: float + Size ratio of off region / on region. Will be used to + scale the background rate. + progress: bool + If True, show a progress bar during cut optimization + **kwargs are passed to ``calculate_sensitivity`` + """ + gh_cut_efficiencies = np.asanyarray(gh_cut_efficiencies) + gh_cut_percentiles = 100 * (1 - gh_cut_efficiencies) + fill_value = signal['gh_score'].max() + + + sensitivities = [] + cut_indicies = [] + n_theta_cuts = len(theta_cut_efficiencies) + n_gh_cuts = len(gh_cut_efficiencies) + n_cuts = len(multiplicity_cuts) * n_theta_cuts * n_gh_cuts + + with tqdm(total=n_cuts, disable=not progress) as bar: + for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts): + + signal_mask_multiplicity = signal['multiplicity'] >= multiplicity_cut + background_mask_multiplicity = background["multiplicity"] >= multiplicity_cut + + gh_cuts = calculate_percentile_cut( + signal['gh_score'][signal_mask_multiplicity], + signal['reco_energy'][signal_mask_multiplicity], + bins=reco_energy_bins, + fill_value=fill_value, + percentile=gh_cut_percentiles, + ) + + theta_cuts = calculate_percentile_cut( + signal['theta'][signal_mask_multiplicity], + signal['reco_energy'][signal_mask_multiplicity], + bins=reco_energy_bins, + fill_value=0.3 * u.deg, + max_value=0.3 * u.deg, + min_value=0.02 * u.deg, + percentile=100 * theta_cut_efficiencies, + ) + + + for gh_index, theta_index in product(range(n_gh_cuts), range(n_theta_cuts)): + + # apply the current cuts + theta_cut = theta_cuts.copy() + theta_cut["cut"] = theta_cuts["cut"][:, theta_index] + signal_mask_theta = evaluate_binned_cut( + signal["theta"], signal["reco_energy"], theta_cut, operator.le, + ) + + gh_cut = gh_cuts.copy() + gh_cut["cut"] = gh_cuts["cut"][:, gh_index] + signal_mask_gh = evaluate_binned_cut( + signal["gh_score"], signal["reco_energy"], gh_cut, operator.ge, + ) + + signal_selected = signal_mask_gh & signal_mask_theta & signal_mask_multiplicity + + background_mask_gh = evaluate_binned_cut( + background["gh_score"], background["reco_energy"], gh_cut, operator.ge, + ) + background_selected = background_mask_gh & background_mask_multiplicity + + # create the histograms + signal_hist = create_histogram_table( + signal[signal_selected], reco_energy_bins, "reco_energy" + ) + + background_hist = estimate_background( + events=background[background_selected], + reco_energy_bins=reco_energy_bins, + theta_cuts=theta_cut, + alpha=alpha, + fov_offset_min=fov_offset_min, + fov_offset_max=fov_offset_max, + ) + + sensitivity = calculate_sensitivity( + signal_hist, background_hist, alpha=alpha, + **kwargs, + ) + cut_indicies.append((multiplicity_index, theta_index, gh_index)) + sensitivities.append(sensitivity) + bar.update(1) + + best_gh_cut = QTable() + best_gh_cut["low"] = reco_energy_bins[0:-1] + best_gh_cut["center"] = bin_center(reco_energy_bins) + best_gh_cut["high"] = reco_energy_bins[1:] + best_gh_cut["cut"] = np.nan + best_gh_cut["efficiency"] = np.nan + + best_multiplicity_cut = QTable() + best_multiplicity_cut["low"] = reco_energy_bins[0:-1] + best_multiplicity_cut["center"] = bin_center(reco_energy_bins) + best_multiplicity_cut["high"] = reco_energy_bins[1:] + best_multiplicity_cut["cut"] = np.nan + + best_theta_cut = QTable() + best_theta_cut["low"] = reco_energy_bins[0:-1] + best_theta_cut["center"] = bin_center(reco_energy_bins) + best_theta_cut["high"] = reco_energy_bins[1:] + best_theta_cut["cut"] = np.nan + best_theta_cut["cut"].unit = theta_cuts["cut"].unit + best_theta_cut["efficiency"] = np.nan + + best_sensitivity = sensitivities[0].copy() + for bin_id in range(len(reco_energy_bins) - 1): + sensitivities_bin = [s["relative_sensitivity"][bin_id] for s in sensitivities] + + if not np.all(np.isnan(sensitivities_bin)): + # nanargmin won't return the index of nan entries + best = np.nanargmin(sensitivities_bin) + else: + # if all are invalid, just use the first one + best = 0 + + multiplicity_index, theta_index, gh_index = cut_indicies[best] + + best_sensitivity[bin_id] = sensitivities[best][bin_id] + + best_gh_cut["cut"][bin_id] = gh_cuts["cut"][bin_id][gh_index] + best_multiplicity_cut["cut"][bin_id] = multiplicity_cuts[multiplicity_index] + best_theta_cut["cut"][bin_id] = theta_cuts["cut"][bin_id][theta_index] + + best_gh_cut["efficiency"][bin_id] = gh_cut_efficiencies[gh_index] + best_theta_cut["efficiency"][bin_id] = theta_cut_efficiencies[theta_index] + + return best_sensitivity, best_multiplicity_cut, best_theta_cut, best_gh_cut + + def optimize_gh_cut( signal, background, @@ -28,9 +206,6 @@ def optimize_gh_cut( **kwargs ): """ - Optimize the gh-score cut in every energy bin of reconstructed energy - for best sensitivity. - This procedure is EventDisplay-like, since it only applies a pre-computed theta cut and then optimizes only the gamma/hadron separation cut. @@ -50,9 +225,6 @@ def optimize_gh_cut( Bins in reconstructed energy to use for sensitivity computation gh_cut_efficiencies: np.ndarray[float, ndim=1] The cut efficiencies to scan for best sensitivity. - theta_cuts: astropy.table.QTable - cut definition of the energy dependent theta cut, - e.g. as created by ``calculate_percentile_cut`` op: comparison function with signature f(a, b) -> bool The comparison function to use for the gamma hadron score. Returning true means an event passes the cut, so is not discarded. diff --git a/pyirf/cuts.py b/pyirf/cuts.py index 677b96890..308741c31 100644 --- a/pyirf/cuts.py +++ b/pyirf/cuts.py @@ -69,12 +69,20 @@ def calculate_percentile_cut( cut_table["center"] = bin_center(bins) cut_table["n_events"] = 0 + unit = None + if hasattr(fill_value, 'unit'): + unit = fill_value.unit + fill_value = fill_value.value + percentile = np.asanyarray(percentile) if percentile.shape == (): cut_table["cut"] = np.asanyarray(fill_value, values.dtype) else: cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) + if unit is not None: + cut_table["cut"].unit = unit + for bin_idx, group in zip(by_bin.groups.keys, by_bin.groups): # replace bins with too few events with fill_value n_events = len(group) diff --git a/pyirf/tests/test_optimize_cuts.py b/pyirf/tests/test_optimize_cuts.py index 770dd9cb5..6784209c6 100644 --- a/pyirf/tests/test_optimize_cuts.py +++ b/pyirf/tests/test_optimize_cuts.py @@ -3,7 +3,7 @@ import astropy.units as u -def test_optimize_cuts(): +def test_optimize_gh_cuts(): from pyirf.cuts import calculate_percentile_cut from pyirf.cut_optimization import optimize_gh_cut @@ -29,4 +29,43 @@ def test_optimize_cuts(): e_reco_bins = np.linspace(1.0, 10.0, 5) * u.TeV theta_cuts = calculate_percentile_cut(signal["theta"], signal["reco_energy"], e_reco_bins, fill_value=1 * u.deg) - sensitivity, cuts = optimize_gh_cut(signal, background, e_reco_bins, [0.5, 0.8, 0.9], theta_cuts) + optimize_gh_cut(signal, background, e_reco_bins, [0.5, 0.8, 0.9], theta_cuts) + + +def test_optimize_cuts(): + from pyirf.cut_optimization import optimize_cuts + + rng = np.random.default_rng(0) + + n_signal = 3000 + n_background = 10000 + + signal = QTable({ + "reco_energy": rng.uniform(1.0, 10.0, n_signal) * u.TeV, + "theta": rng.uniform(0.0, 0.5, n_signal) * u.deg, + "gh_score": np.clip(rng.normal(0.7, 0.4, n_signal), 0, 1), + "multiplicity": rng.integers(2, 15, n_signal), + "weight": rng.uniform(0.5, 2, n_signal), + }) + + background = QTable({ + "reco_energy": rng.uniform(1.0, 10.0, n_background) * u.TeV, + "theta": rng.uniform(0.0, 0.5, n_background) * u.deg, + "gh_score": np.clip(rng.normal(0.2, 0.3, n_background), 0, 1), + "reco_source_fov_offset": rng.uniform(0, 1, n_background) * u.deg, + "multiplicity": rng.integers(2, 15, n_background), + "weight": rng.uniform(0.5, 2, n_background), + }) + + + e_reco_bins = np.linspace(1.0, 10.0, 5) * u.TeV + + optimize_cuts( + signal, + background, + e_reco_bins, + multiplicity_cuts=[2, 3, 4, 5], + gh_cut_efficiencies=[0.5, 0.8, 0.9], + theta_cut_efficiencies=[0.68, 0.75], + alpha=0.2, + ) From c4640e99a197bc3453bff499c5c18c4b1b876770 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Sun, 20 Nov 2022 15:21:39 +0100 Subject: [PATCH 05/12] Fix fill_value unit error --- pyirf/cuts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyirf/cuts.py b/pyirf/cuts.py index 308741c31..22041eda4 100644 --- a/pyirf/cuts.py +++ b/pyirf/cuts.py @@ -89,7 +89,7 @@ def calculate_percentile_cut( cut_table["n_events"][bin_idx] = n_events if n_events < min_events: - cut_table["cut"][bin_idx] = fill_value + cut_table["cut"].value[bin_idx] = fill_value else: value = np.nanpercentile(group["values"], percentile) if min_value is not None or max_value is not None: From 5bc7488a7eb2864671004e2df2de1c1459c2767c Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Sun, 20 Nov 2022 18:11:32 +0100 Subject: [PATCH 06/12] Fix setuptools to < 65.6 to fix distutils import error --- pyproject.toml | 2 +- setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 51f3b9859..ee44d66c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,4 +32,4 @@ build-backend = "setuptools.build_meta" [[tool.towncrier.section]] name = "" - path = "" \ No newline at end of file + path = "" diff --git a/setup.py b/setup.py index f0c38b1c2..add71cfd1 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ "matplotlib", "numpy>=1.18", "scipy", + "setuptools<65.6", "tqdm", "importlib_resources ; python_version < '3.9'" ], From 46ec05fb4aa1cbb4ea9984592ed8718e3b639dbe Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 14 Jul 2023 11:13:13 +0200 Subject: [PATCH 07/12] Avoid recomputing bin indices --- pyirf/cut_optimization.py | 23 +++++++++++++++-------- pyirf/cuts.py | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index f47dbc524..e8cab71db 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -5,9 +5,9 @@ from tqdm.auto import tqdm import operator -from .cuts import evaluate_binned_cut, calculate_percentile_cut +from .cuts import evaluate_binned_cut_by_index, calculate_percentile_cut from .sensitivity import calculate_sensitivity, estimate_background -from .binning import create_histogram_table, bin_center +from .binning import create_histogram_table, bin_center, calculate_bin_indices __all__ = [ @@ -77,6 +77,13 @@ def optimize_cuts( n_gh_cuts = len(gh_cut_efficiencies) n_cuts = len(multiplicity_cuts) * n_theta_cuts * n_gh_cuts + signal_bin_index, signal_valid = calculate_bin_indices( + signal['reco_energy'], reco_energy_bins, + ) + background_bin_index, background_valid = calculate_bin_indices( + background['reco_energy'], reco_energy_bins, + ) + with tqdm(total=n_cuts, disable=not progress) as bar: for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts): @@ -107,20 +114,20 @@ def optimize_cuts( # apply the current cuts theta_cut = theta_cuts.copy() theta_cut["cut"] = theta_cuts["cut"][:, theta_index] - signal_mask_theta = evaluate_binned_cut( - signal["theta"], signal["reco_energy"], theta_cut, operator.le, + signal_mask_theta = evaluate_binned_cut_by_index( + signal["theta"], signal_bin_index, signal_valid, theta_cut, operator.le, ) gh_cut = gh_cuts.copy() gh_cut["cut"] = gh_cuts["cut"][:, gh_index] - signal_mask_gh = evaluate_binned_cut( - signal["gh_score"], signal["reco_energy"], gh_cut, operator.ge, + signal_mask_gh = evaluate_binned_cut_by_index( + signal["gh_score"], signal_bin_index, signal_valid, gh_cut, operator.ge, ) signal_selected = signal_mask_gh & signal_mask_theta & signal_mask_multiplicity - background_mask_gh = evaluate_binned_cut( - background["gh_score"], background["reco_energy"], gh_cut, operator.ge, + background_mask_gh = evaluate_binned_cut_by_index( + background["gh_score"], background_bin_index, background_valid, gh_cut, operator.ge, ) background_selected = background_mask_gh & background_mask_multiplicity diff --git a/pyirf/cuts.py b/pyirf/cuts.py index 22041eda4..c81b61a4e 100644 --- a/pyirf/cuts.py +++ b/pyirf/cuts.py @@ -107,6 +107,37 @@ def calculate_percentile_cut( return cut_table +def evaluate_binned_cut_by_index(values, bin_index, valid, cut_table, op): + """ + Evaluate a binned cut as defined in cut_table on given events. + + Events with bin_values outside the bin edges defined in cut table + will be set to False. + + Parameters + ---------- + values: ``~numpy.ndarray`` or ``~astropy.units.Quantity`` + The values on which the cut should be evaluated + cut_table: ``~astropy.table.Table`` + A table describing the binned cuts, e.g. as created by + ``~pyirf.cuts.calculate_percentile_cut``. + Required columns: + - `low`: lower edges of the bins + - `high`: upper edges of the bins, + - `cut`: cut value + op: callable(a, b) -> bool + A function taking two arguments, comparing element-wise and + returning an array of booleans. + Must support vectorized application. + """ + if not isinstance(cut_table, QTable): + raise ValueError('cut_table needs to be an astropy.table.QTable') + + result = np.zeros(len(bin_index), dtype=bool) + result[valid] = op(values[valid], cut_table["cut"][bin_index[valid]]) + return result + + def evaluate_binned_cut(values, bin_values, cut_table, op): """ Evaluate a binned cut as defined in cut_table on given events. @@ -137,10 +168,7 @@ def evaluate_binned_cut(values, bin_values, cut_table, op): bins = np.append(cut_table["low"], cut_table["high"][-1]) bin_index, valid = calculate_bin_indices(bin_values, bins) - - result = np.zeros(len(values), dtype=bool) - result[valid] = op(values[valid], cut_table["cut"][bin_index[valid]]) - return result + return evaluate_binned_cut_by_index(values, bin_index, valid, cut_table, op) def compare_irf_cuts(cuts): From 9bef4fed0c2fdc253da0397919b6fc2f4c071201 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 14 Jul 2023 11:42:18 +0200 Subject: [PATCH 08/12] Fix import --- pyirf/cut_optimization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index e8cab71db..2d99ac0da 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -5,7 +5,8 @@ from tqdm.auto import tqdm import operator -from .cuts import evaluate_binned_cut_by_index, calculate_percentile_cut + +from .cuts import evaluate_binned_cut_by_index, calculate_percentile_cut, evaluate_binned_cut from .sensitivity import calculate_sensitivity, estimate_background from .binning import create_histogram_table, bin_center, calculate_bin_indices From 3358d03ea8f890aa47744c6939e0dbbf81d1ddc2 Mon Sep 17 00:00:00 2001 From: JBernete Date: Thu, 12 Dec 2024 15:22:21 +0100 Subject: [PATCH 09/12] Fix a bug in optimize_cuts where looping over a new multiplicity cut was overwriting the gh and theta cuts, instead of saving them separately. --- pyirf/cut_optimization.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index 2d99ac0da..e6e2aeeb9 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -86,6 +86,8 @@ def optimize_cuts( ) with tqdm(total=n_cuts, disable=not progress) as bar: + all_the_gh_cuts = [] + all_the_theta_cuts = [] for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts): signal_mask_multiplicity = signal['multiplicity'] >= multiplicity_cut @@ -98,6 +100,7 @@ def optimize_cuts( fill_value=fill_value, percentile=gh_cut_percentiles, ) + all_the_gh_cuts.append(gh_cuts) theta_cuts = calculate_percentile_cut( signal['theta'][signal_mask_multiplicity], @@ -108,6 +111,7 @@ def optimize_cuts( min_value=0.02 * u.deg, percentile=100 * theta_cut_efficiencies, ) + all_the_theta_cuts.append(theta_cuts) for gh_index, theta_index in product(range(n_gh_cuts), range(n_theta_cuts)): @@ -190,9 +194,9 @@ def optimize_cuts( best_sensitivity[bin_id] = sensitivities[best][bin_id] - best_gh_cut["cut"][bin_id] = gh_cuts["cut"][bin_id][gh_index] + best_gh_cut["cut"][bin_id] = all_the_gh_cuts[multiplicity_index]["cut"][bin_id][gh_index] best_multiplicity_cut["cut"][bin_id] = multiplicity_cuts[multiplicity_index] - best_theta_cut["cut"][bin_id] = theta_cuts["cut"][bin_id][theta_index] + best_theta_cut["cut"][bin_id] = all_the_theta_cuts[multiplicity_index]["cut"][bin_id][theta_index] best_gh_cut["efficiency"][bin_id] = gh_cut_efficiencies[gh_index] best_theta_cut["efficiency"][bin_id] = theta_cut_efficiencies[theta_index] From 16b6e2fecad345a50970150aa5dc414f4e33b89e Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 18 Dec 2024 12:32:11 +0100 Subject: [PATCH 10/12] Rename variable --- pyirf/cut_optimization.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index e6e2aeeb9..9df954ff3 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -85,9 +85,10 @@ def optimize_cuts( background['reco_energy'], reco_energy_bins, ) + gh_cut_grid = [] + theta_cut_grid = [] + with tqdm(total=n_cuts, disable=not progress) as bar: - all_the_gh_cuts = [] - all_the_theta_cuts = [] for multiplicity_index, multiplicity_cut in enumerate(multiplicity_cuts): signal_mask_multiplicity = signal['multiplicity'] >= multiplicity_cut @@ -100,7 +101,7 @@ def optimize_cuts( fill_value=fill_value, percentile=gh_cut_percentiles, ) - all_the_gh_cuts.append(gh_cuts) + gh_cut_grid.append(gh_cuts) theta_cuts = calculate_percentile_cut( signal['theta'][signal_mask_multiplicity], @@ -111,7 +112,7 @@ def optimize_cuts( min_value=0.02 * u.deg, percentile=100 * theta_cut_efficiencies, ) - all_the_theta_cuts.append(theta_cuts) + theta_cut_grid.append(theta_cuts) for gh_index, theta_index in product(range(n_gh_cuts), range(n_theta_cuts)): @@ -194,9 +195,9 @@ def optimize_cuts( best_sensitivity[bin_id] = sensitivities[best][bin_id] - best_gh_cut["cut"][bin_id] = all_the_gh_cuts[multiplicity_index]["cut"][bin_id][gh_index] + best_gh_cut["cut"][bin_id] = gh_cut_grid[multiplicity_index]["cut"][bin_id][gh_index] best_multiplicity_cut["cut"][bin_id] = multiplicity_cuts[multiplicity_index] - best_theta_cut["cut"][bin_id] = all_the_theta_cuts[multiplicity_index]["cut"][bin_id][theta_index] + best_theta_cut["cut"][bin_id] = theta_cut_grid[multiplicity_index]["cut"][bin_id][theta_index] best_gh_cut["efficiency"][bin_id] = gh_cut_efficiencies[gh_index] best_theta_cut["efficiency"][bin_id] = theta_cut_efficiencies[theta_index] From f3902222bda0977e9832129df928075183991fe6 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 18 Dec 2024 12:38:09 +0100 Subject: [PATCH 11/12] Add changelog --- docs/changes/204.feature.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/changes/204.feature.rst diff --git a/docs/changes/204.feature.rst b/docs/changes/204.feature.rst new file mode 100644 index 000000000..62dfb1ef5 --- /dev/null +++ b/docs/changes/204.feature.rst @@ -0,0 +1,3 @@ +Add function ``pyirf.cut_optimization.optimize_cuts`` doing +a full grid search on gammaness, theta and multiplicity cut +to find best point source sensitivity. From d4ac7c57c4e9ee9af01c2e533fc1c299e1020130 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 18 Dec 2024 13:04:38 +0100 Subject: [PATCH 12/12] Fix codacy complaints --- pyirf/cut_optimization.py | 2 +- pyirf/cuts.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyirf/cut_optimization.py b/pyirf/cut_optimization.py index 9df954ff3..7b42ee3da 100644 --- a/pyirf/cut_optimization.py +++ b/pyirf/cut_optimization.py @@ -190,7 +190,7 @@ def optimize_cuts( else: # if all are invalid, just use the first one best = 0 - + multiplicity_index, theta_index, gh_index = cut_indicies[best] best_sensitivity[bin_id] = sensitivities[best][bin_id] diff --git a/pyirf/cuts.py b/pyirf/cuts.py index 8422059a7..c3694127f 100644 --- a/pyirf/cuts.py +++ b/pyirf/cuts.py @@ -86,7 +86,7 @@ def calculate_percentile_cut( if percentile.shape == (): cut_table["cut"] = np.asanyarray(fill_value, values.dtype) else: - cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) + cut_table["cut"] = np.full((n_bins, len(percentile)), fill_value, dtype=values.dtype) if unit is not None: cut_table["cut"].unit = unit