From e8aed35eb7c6a8559aa04785a3c67f7403ed7c70 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Thu, 5 Sep 2024 13:39:27 +0200 Subject: [PATCH] Fix feeddown folding --- .../analysis/analyzer_jets.py | 15 ++++++------ machine_learning_hep/utils/hist.py | 24 +++++++++++++++---- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index a61906c4a8..3773e8a1f8 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -226,7 +226,7 @@ def calculate_efficiencies(self): # gen-level efficiency for feeddown estimation h_eff_gen = h_genmatch[cat].Clone() h_eff_gen.Divide(h_gen[cat]) - self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png') + self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png', 'text') self.hcandeff_gen[cat] = h_eff_gen # matching loss @@ -947,7 +947,6 @@ def estimate_feeddown(self): f';p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}', bins_ptjet, self.bins_candpt, bins_obs[var]) fill_hist_fast(h3_fd_gen_orig, df[['pt_jet', 'pt_cand', f'{colname}']]) - self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png') # new method h3_fd_gen = h3_fd_gen_orig.Clone() @@ -955,10 +954,11 @@ def estimate_feeddown(self): self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen.png') # apply np efficiency for ipt in range(get_nbins(h3_fd_gen, 1)): - eff_np = self.hcandeff_gen['np'].GetBinContent(ipt+1) - for iptjet, ishape in itertools.product( - range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))): - scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1) + for iptjet in range(get_nbins(h3_fd_gen, 0)): + eff_np = self.hcandeff_gen['np'].GetBinContent(iptjet + 1, ipt + 1) + print(f'scaling with {eff_np=}', flush=True) + for ishape in range(get_nbins(h3_fd_gen, 2)): + scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1) self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_geneff.png') # 3d folding incl. kinematic efficiencies @@ -987,14 +987,15 @@ def estimate_feeddown(self): for iptjet, ishape in itertools.product( range(get_nbins(h3_fd_det, 0)), range(get_nbins(h3_fd_det, 2))): scale_bin(h3_fd_det, 1./eff_pr, iptjet+1, ipt+1, ishape+1) - self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_det_deteff.png') # project to 2d (ptjet-shape) h_fd_det = project_hist(h3_fd_det, [0, 2], {}) + self._save_hist(h_fd_det, f'fd/h_ptjet-{var}_fdnew_det_deteff.png') # old method h3_fd_gen = h3_fd_gen_orig.Clone() ensure_sumw2(h3_fd_gen) + self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png') for ipt in range(get_nbins(h3_fd_gen, 1)): eff_pr = self.hcandeff['pr'].GetBinContent(ipt+1) eff_np = self.hcandeff['np'].GetBinContent(ipt+1) diff --git a/machine_learning_hep/utils/hist.py b/machine_learning_hep/utils/hist.py index 1bbe330320..db7f63ba09 100644 --- a/machine_learning_hep/utils/hist.py +++ b/machine_learning_hep/utils/hist.py @@ -293,13 +293,19 @@ def norm_response(response, dim_out): for bin_in in itertools.product(*(range(1, get_nbins(response_norm, iaxis) + 1) for iaxis in range(dim_out, get_dim(response_norm)))): for iaxis, val in enumerate(bin_in, dim_out): - get_axis(response_norm, iaxis).SetRange(val, val) - norm = response_norm.Projection(0).Integral() + get_axis(response, iaxis).SetRange(val, val) + # norm = response.Projection(0).Integral() + norm = 0. + for bin_out in itertools.product(*(range(0, get_nbins(response, i)+2) for i in range(dim_out))): + norm += get_bin_val(response, bin_out + bin_in) if np.isclose(norm, 0.): continue - for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i)+1) for i in range(dim_out))): - set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response_norm, bin_out + bin_in) / norm) - set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response_norm, bin_out + bin_in) / norm) + total = 0. + for bin_out in itertools.product(*(range(0, get_nbins(response_norm, i)+2) for i in range(dim_out))): + set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response, bin_out + bin_in) / norm) + set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response, bin_out + bin_in) / norm) + total += get_bin_val(response_norm, bin_out + bin_in) + print(f'distributed {bin_in=} to {total=} counts') return response_norm @@ -309,10 +315,18 @@ def fold_hist(hist, response): dim_out = get_dim(response) - get_dim(hist) axes_spec = list(np.array(get_axis(response, i).GetXbins(), 'd') for i in range(dim_out)) hfold = create_hist('test', 'test', *axes_spec) + # TODO: setup axes + for bin_in in itertools.product(*(range(1, get_nbins(hist, i)+1) for i in range(get_dim(hist)))): + total = 0. + for bin_out in itertools.product(*(range(1, get_nbins(hfold, i)+1) for i in range(get_dim(hfold)))): + total += get_bin_val(response, bin_out + bin_in) + print(f'redistributed {bin_in=} to {total=} counts') + for bin_out in itertools.product(*(range(1, get_nbins(hfold, i)+1) for i in range(get_dim(hfold)))): val = 0. err = 0. for bin_in in itertools.product(*(range(1, get_nbins(hist, i)+1) for i in range(get_dim(hist)))): + print(f'{bin_out=} collecting {bin_in=}') val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in) err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2 set_bin_val(hfold, bin_out, val)