fix

alisw · Nov 5, 2024 · cb7f08c · cb7f08c
1 parent fe563e2
commit cb7f08c
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 177 deletions.
diff --git a/machine_learning_hep/analysis/analyzerdhadrons_mult.py b/machine_learning_hep/analysis/analyzerdhadrons_mult.py
@@ -56,7 +56,6 @@ def __init__(self, datap, case, typean, period):
         self.lvar2_binmax = datap["analysis"][self.typean]["sel_binmax2"]
         self.v_var2_binning = datap["analysis"][self.typean]["var_binning2"]
         self.v_var2_binning_gen = datap["analysis"][self.typean]["var_binning2_gen"]
-        self.triggerbit = datap["analysis"][self.typean]["triggerbit"]
         self.p_nbin2 = len(self.lvar2_binmin)
 
         dp = datap["analysis"][typean]
@@ -126,12 +125,6 @@ def __init__(self, datap, case, typean, period):
         self.p_cctype = datap["analysis"]["cctype"]
         self.p_sigmamb = datap["analysis"]["sigmamb"]
         self.p_inputfonllpred = datap["analysis"]["inputfonllpred"]
-        self.p_triggereff = datap["analysis"][self.typean].get("triggereff", [1] * 10)
-        self.p_triggereffunc = datap["analysis"][self.typean].get("triggereffunc", [0] * 10)
-
-        self.apply_weights = \
-                datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None) \
-                is not None
         self.root_objects = []
 
         self.get_crossmb_from_path = datap["analysis"][self.typean].get("get_crossmb_from_path", \
@@ -319,9 +312,8 @@ def fit(self):
                         if self.cfg('mass_fit'):
                             fit_res, _, func_bkg = self._fit_mass(
                                 h_invmass,
-                                f'fit/'
-                                f'h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_\
-                                {self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png')
+                                f'fit/h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}'
+                                f'_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png')
                             if fit_res and fit_res.Get() and fit_res.IsValid():
                                 self.fit_mean[level][ipt] = fit_res.Parameter(1)
                                 self.fit_sigma[level][ipt] = fit_res.Parameter(2)
@@ -357,8 +349,8 @@ def fit(self):
                             roo_res, roo_ws = self._roofit_mass(
                                 level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows,
                                 f'roofit/mult_{multrange[0]}-{multrange[1]}/'
-                                f'h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}_\
-                                {self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png')
+                                f'h_mass_fitted_pthf-{ptrange[0]}-{ptrange[1]}'\
+                                f'_{self.v_var2_binning}-{multrange[0]}-{multrange[1]}_{level}.png')
                             # if level == 'mc':
                             #     roo_ws.Print()
                             self.roo_ws[level][ipt] = roo_ws
@@ -634,9 +626,6 @@ def makenormyields(self): # pylint: disable=import-outside-toplevel, too-many-br
             nameyield = "hyields%d" % imult
             fileoutcrossmult = "%s/finalcross%s%smult%d.root" % \
                 (self.d_resultsallpdata, self.case, self.typean, imult)
-            labeltrigger = "hbit%svs%s" % (self.triggerbit, self.v_var2_binning_gen)
-            if self.apply_weights is True:
-                labeltrigger = labeltrigger + "_weight"
 
             #Bin1 is all events. Bin2 is all sel events. Mult bins start from Bin3.
             norm = histonorm.GetBinContent(imult + 3)
@@ -747,126 +736,5 @@ def plotternormyields(self):
         cCrossvsvar1.SaveAs("%s/CorrectedYieldsNorm%s%sVs%s.eps" % (self.d_resultsallpdata,
                                                                     self.case, self.typean,
                                                                     self.v_var_binning))
-    def plottervalidation(self):
-        if self.p_performval is False:
-            self.logger.fatal(
-                "The validation step was set to false. You dont \
-                                have produced the histograms you need for the \
-                                validation stage. Please rerun the histomass \
-                                step"
-            )
-        self.logger.info("I AM RUNNING THE PLOTTER VALIDATION STEP")
-        # You can find all the input files in the self.n_filemass. At the
-        # moment we dont do tests for the MC file that would be in any case
-        # self.n_filemass_mc. This function will be run on only the single
-        # merged LHC16,LHC17, LHC18 file or also on the separate years
-        # depending on how you set the option doperperiod in the
-        # default_complete.yml database.
-
-        def do_validation_plots(input_file_name,
-                                output_path,
-                                ismc=False,
-                                pileup_fraction=True,
-                                tpc_tof_me=True):
-            gROOT.SetBatch(True)
-
-            input_file = TFile(input_file_name, "READ")
-            if not input_file or not input_file.IsOpen():
-                self.logger.fatal("Did not find file %s", input_file.GetName())
-
-            def get_histo(namex, namey=None, tag="", strictly_require=True):
-                """
-                Gets a histogram from a file
-                """
-                h_name = f"hVal_{namex}"
-                if namey:
-                    h_name += f"_vs_{namey}"
-                h_name += tag
-                h = input_file.Get(h_name)
-                if not h:
-                    if strictly_require:
-                        input_file.ls()
-                        self.logger.fatal(
-                            "Did not find %s in file %s", h_name, input_file.GetName()
-                        )
-                    else:
-                        self.logger.warning(
-                            "Did not find %s in file %s", h_name, input_file.GetName()
-                        )
-                        return None
-                return h
-
-            def do_plot(histo):
-                """
-                Plots the histogram in a new canvas, if it is a TH2, it also plots the profile.
-                The canvas has the same name as the histogram and it is saved to the output_path
-                """
-                canvas = TCanvas(histo.GetName(), histo.GetName())
-                profile = None
-                histo.Draw("COLZ")
-                if "TH2" in histo.ClassName():
-                    if "nsig" in histo.GetYaxis().GetTitle():
-                        histo.GetYaxis().SetRangeUser(-100, 100)
-                    profile = histo.ProfileX(histo.GetName() + "_profile")
-                    profile.SetLineWidth(2)
-                    profile.SetLineColor(2)
-                    profile.Draw("same")
-                gPad.SetLogz()
-                gPad.Update()
-                save_root_object(canvas, path=output_path)
-
-            # Fraction of pileup events
-            if pileup_fraction:
-                hnum = get_histo("n_tracklets_corr", tag="pileup", strictly_require=False)
-                if hnum is not None:
-                    hnum.SetName(hnum.GetName() + "_eventfraction")
-                    hden = get_histo("n_tracklets_corr", tag="_EvtSel")
-                    hnum.Divide(hnum, hden)
-                    hnum.GetYaxis().SetTitle("Fraction of events")
-                    do_plot(hnum)
-
-            def plot_tpc_tof_me(tag):
-                # Compute TPC-TOF matching efficiency
-                if tpc_tof_me:
-                    to_plot = [["Pi", "K", "Pr"],
-                               ["0", "1"],
-                               ["p_prong0", "pt_prong0", "pt_cand"]
-                               ]
-                    for spec, prong, observable in itertools.product(*to_plot):
-                        hname = [f"{observable}",
-                                 f"nsigTOF_{spec}_{prong}", tag]
-                        hnum = get_histo(*hname,
-                                         strictly_require=False)
-                        if hnum is None:
-                            continue
-                        hnum = hnum.ProjectionX(
-                            hnum.GetName() + "_num", 2, -1)
-                        hden = get_histo(*hname)
-                        hden = hden.ProjectionX(
-                            hden.GetName() + "_den")
-                        hnum.Divide(hnum, hden, 1, 1, "B")
-                        hnum.SetName(
-                            hnum.GetName().replace(
-                                "_num", "_TPC-TOF_MatchingEfficiency"
-                            )
-                        )
-                        hnum.GetYaxis().SetTitle("TPC-TOF_MatchingEfficiency")
-                        do_plot(hnum)
-
-            plot_tpc_tof_me(tag="")
-            # Part dedicated to MC Checks
-            if ismc:
-                plot_tpc_tof_me(tag="MC")
-
-            # Plot all other validation histogram
-            for i in range(0, input_file.GetListOfKeys().GetEntries()):
-                key_name = input_file.GetListOfKeys().At(i).GetName()
-                if not key_name.startswith("hVal_"):
-                    continue
-                do_plot(input_file.Get(key_name))
-
-            input_file.Close()
-
-        do_validation_plots(self.n_filemass, self.d_resultsallpdata)
-        do_validation_plots(self.n_filemass_mc,
-                            self.d_resultsallpmc, ismc=True)
+
+        fileoutcrosstot.Close()
diff --git a/machine_learning_hep/processerdhadrons_mult.py b/machine_learning_hep/processerdhadrons_mult.py
@@ -22,7 +22,7 @@
 import os
 import numpy as np
 import pandas as pd
-from ROOT import TFile, TH1F
+from ROOT import TFile, TH1F, TH2F
 from machine_learning_hep.utilities_files import create_folder_struc
 from machine_learning_hep.utilities import seldf_singlevar, seldf_singlevar_inclusive
 from machine_learning_hep.utilities import mergerootfiles, read_df
@@ -66,25 +66,12 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
         self.mc_cut_on_binning2 = datap["analysis"][self.typean].get("mc_cut_on_binning2", True)
 
         self.bin_matching = datap["analysis"][self.typean]["binning_matching"]
-        #self.sel_final_fineptbins = datap["analysis"][self.typean]["sel_final_fineptbins"]
         self.s_evtsel = datap["analysis"][self.typean]["evtsel"]
         self.event_cand_validation = datap["analysis"][self.typean].get("event_cand_validation", "")
         if "event_cand_validation" not in datap["analysis"][self.typean]:
             self.event_cand_validation = False
-        self.usetriggcorrfunc = \
-                datap["analysis"][self.typean]["triggersel"].get("usetriggcorrfunc", None)
         self.weightfunc = None
         self.weighthist = None
-        if self.usetriggcorrfunc is not None and self.mcordata == "data":
-            filename = os.path.join(self.d_mcreweights, "trigger%s.root" % self.typean)
-            if os.path.exists(filename):
-                weight_file = TFile.Open(filename, "read")
-                self.weightfunc = weight_file.Get("func%s_norm" % self.typean)
-                self.weighthist = weight_file.Get("hist%s_norm" % self.typean)
-                self.weighthist.SetDirectory(0)
-                weight_file.Close()
-            else:
-                print("trigger correction file", filename, "doesnt exist")
         self.nbinshisto = datap["analysis"][self.typean]["nbinshisto"]
         self.minvaluehisto = datap["analysis"][self.typean]["minvaluehisto"]
         self.maxvaluehisto = datap["analysis"][self.typean]["maxvaluehisto"]
@@ -93,6 +80,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
         # Event re-weighting MC
         self.event_weighting_mc = datap["analysis"][self.typean].get("event_weighting_mc", {})
         self.event_weighting_mc = self.event_weighting_mc.get(self.period, {})
+        self.v_var2_binning_weigths = datap["analysis"][self.typean].get("var_binning2_weights")
 
     @staticmethod
     def make_weights(col, func, hist, use_func):
@@ -189,20 +177,11 @@ def process_histomass_single(self, index):
                               self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                 h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                  self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
-                #h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins,
-                #                        self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                 df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \
                                          self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                 fill_hist(h_invmass, df_bin[self.v_invmass])
-                #if self.usetriggcorrfunc is not None and self.mcordata == "data":
-                #    weights = self.make_weights(df_bin[self.v_var2_binning_gen], self.weightfunc,
-                #                                self.weighthist, self.usetriggcorrfunc)
-
-                #    weightsinv = [1./weight for weight in weights]
-                #    fill_hist(h_invmass_weight, df_bin[self.v_invmass], weights=weightsinv)
                 myfile.cd()
                 h_invmass.Write()
-                #h_invmass_weight.Write()
 
                 if self.mcordata == "mc":
                     df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
@@ -219,6 +198,22 @@ def process_histomass_single(self, index):
             fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen])
             histomult.Write()
 
+            if self.v_var2_binning_weigths is not None:
+                label = "h%s" % self.v_var2_binning_weigths
+                histomult_weigths = TH1F(label, label, self.nbinshisto,
+                             self.minvaluehisto, self.maxvaluehisto)
+                fill_hist(histomult_weigths, dfevtevtsel[self.v_var2_binning_weigths])
+
+                label = "h%s_%s" % (self.v_var2_binning_weigths, self.v_var2_binning_gen)
+                histomult_weigths_2d = TH2F(label, label,
+                                            self.nbinshisto, self.minvaluehisto, self.maxvaluehisto,
+                                            self.nbinshisto, self.minvaluehisto, self.maxvaluehisto)
+                fill_hist(histomult_weigths_2d, dfevtevtsel[[self.v_var2_binning_weigths, self.v_var2_binning_gen]])
+
+                histomult_weigths.Write()
+                histomult_weigths_2d.Write()
+
+
     def get_reweighted_count(self, dfsel, ibin=None):
         """Apply event weights
 
@@ -298,16 +293,8 @@ def make_histo(name, title,
                                   "Prompt Generated in acceptance |y|<0.5")
             h_presel_pr = make_histo("h_presel_pr",
                                      "Prompt Reco in acc |#eta|<0.8 and sel")
-            h_presel_pr_wotof = make_histo("h_presel_pr_wotof",
-                                           "Prompt Reco in acc woTOF |#eta|<0.8 and pre-sel")
-            h_presel_pr_wtof = make_histo("h_presel_pr_wtof",
-                                          "Prompt Reco in acc wTOF |#eta|<0.8 and pre-sel")
             h_sel_pr = make_histo("h_sel_pr",
                                   "Prompt Reco and sel in acc |#eta|<0.8 and sel")
-            h_sel_pr_wotof = make_histo("h_sel_pr_wotof",
-                                        "Prompt Reco and sel woTOF in acc |#eta|<0.8")
-            h_sel_pr_wtof = make_histo("h_sel_pr_wtof",
-                                       "Prompt Reco and sel wTOF in acc |#eta|<0.8")
             h_gen_fd = make_histo("h_gen_fd",
                                   "FD Generated in acceptance |y|<0.5")
             h_presel_fd = make_histo("h_presel_fd",
@@ -365,16 +352,8 @@ def set_content(df_to_use, histogram,
                     histogram.SetBinError(b_c + 1, err)
 
                 set_content(df_gen_sel_pr, h_gen_pr)
-                if "nsigTOF_Pr_0" in df_reco_presel_pr:
-                    set_content(df_reco_presel_pr[df_reco_presel_pr.nsigTOF_Pr_0 < -998],
-                                h_presel_pr_wotof)
-                    set_content(df_reco_presel_pr[df_reco_presel_pr.nsigTOF_Pr_0 > -998],
-                                h_presel_pr_wtof)
                 set_content(df_reco_presel_pr, h_presel_pr)
                 set_content(df_reco_sel_pr, h_sel_pr)
-                if "nsigTOF_Pr_0" in df_reco_sel_pr:
-                    set_content(df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 < -998], h_sel_pr_wotof)
-                    set_content(df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 > -998], h_sel_pr_wtof)
                 set_content(df_gen_sel_fd, h_gen_fd)
                 set_content(df_reco_presel_fd, h_presel_fd)
                 set_content(df_reco_sel_fd, h_sel_fd)