Skip to content

Commit

Permalink
Fixing fitter and cleaning prefix directory addition (#833)
Browse files Browse the repository at this point in the history
* Fixing fitter and cleaning prefix directory addition

* fix

* copyright fix

* fix pylint

* fix

---------

Co-authored-by: Luigi Dello Stritto <[email protected]>
  • Loading branch information
DelloStritto and Luigi Dello Stritto authored Nov 21, 2023
1 parent acad7d9 commit f29b4a8
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 85 deletions.
9 changes: 5 additions & 4 deletions machine_learning_hep/analysis/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#############################################################################
## © Copyright CERN 2018. All rights not expressly granted are reserved. ##
## © Copyright CERN 2023. All rights not expressly granted are reserved. ##
## Author: [email protected] ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
Expand All @@ -25,10 +25,11 @@ def __init__(self, datap, case, typean, period):

# The only thing here is to dump the database in the data analysis directory
for mcordata in ("mc", "data"):
prefix_dir_res = datap["mlapplication"][mcordata].get("prefix_dir_res", "")
results_dir = prefix_dir_res + datap["analysis"][typean][mcordata]["results"][period] \
dp = datap["analysis"][typean][mcordata]
prefix_dir_res = dp.get("prefix_dir_res", "")
results_dir = prefix_dir_res + dp["results"][period] \
if period is not None \
else prefix_dir_res + datap["analysis"][typean][mcordata]["resultsallp"]
else prefix_dir_res + dp["resultsallp"]
if not exists(results_dir):
# create otput directories in case they do not exist
makedirs(results_dir)
Expand Down
17 changes: 11 additions & 6 deletions machine_learning_hep/analysis/analyzerdhadrons.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#############################################################################
## © Copyright CERN 2018. All rights not expressly granted are reserved. ##
## © Copyright CERN 2023. All rights not expressly granted are reserved. ##
## Author: [email protected] ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
Expand All @@ -20,7 +20,7 @@
# pylint: disable=unused-wildcard-import, wildcard-import
#from array import array
#import itertools
# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=import-error, no-name-in-module, unused-import, consider-using-f-string
# from root_numpy import hist2array, array2hist
from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1D
from ROOT import gStyle, TLegend, TLine, TText, TPaveText, TArrow
Expand Down Expand Up @@ -54,10 +54,15 @@ def __init__(self, datap, case, typean, period):
self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"]
self.triggerbit = datap["analysis"][self.typean].get("triggerbit", "")

self.d_resultsallpmc = datap["analysis"][typean]["mc"]["results"][period] \
if period is not None else datap["analysis"][typean]["mc"]["resultsallp"]
self.d_resultsallpdata = datap["analysis"][typean]["data"]["results"][period] \
if period is not None else datap["analysis"][typean]["data"]["resultsallp"]
dp = datap["analysis"][self.typean]
self.d_prefix_mc = dp["mc"].get("prefix_dir_res")
self.d_prefix_data = dp["data"].get("prefix_dir_res")
self.d_resultsallpmc = self.d_prefix_mc + dp["mc"]["results"][period] \
if period is not None \
else self.d_prefix_mc + dp["mc"]["resultsallp"]
self.d_resultsallpdata = + dp["data"]["results"][period] \
if period is not None \
else self.d_prefix_data + dp["data"]["resultsallp"]

n_filemass_name = datap["files_names"]["histofilename"]
self.n_filemass = os.path.join(self.d_resultsallpdata, n_filemass_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,11 @@ LcpKpi:

mlapplication:
data:
prefix_dir_res: /data2/MLhep/
prefix_dir_app: /data2/MLhep/
pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods
pkl_skimmed_decmerged: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged] #list of periods
mc:
prefix_dir_res: /data2/MLhep/
prefix_dir_app: /data2/MLhep/
pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmc] #list of periods
pkl_skimmed_decmerged: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmcmerged] #list of periods
modelname: xgboost
Expand All @@ -272,9 +272,9 @@ LcpKpi:
xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav,
xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav]
probcutpresel:
data: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] #list of nbins
mc: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] #list of nbins
probcutoptimal: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins
data: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins
mc: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins
probcutoptimal: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] #list of nbins

analysis:
indexhptspectrum: -1 #kD0Kpi=0, kDplusKpipi=1, kDstarD0pi=2, kDsKKpi=3, kLctopKpi=4, kLcK0Sp=5
Expand Down Expand Up @@ -319,22 +319,24 @@ LcpKpi:

data:
runselection: [null, null] #FIXME
prefix_dir_res: /data2/MLhep/
results: [LHC22pp/Results/prod_LHC22o/resultsdata] #list of periods
resultsallp: LHC22pp/Results/resultsdatatot
mc:
runselection: [null] #FIXME
prefix_dir_res: /data2/MLhep/
results: [LHC22pp_mc/Results/prod_LHC22b1b/resultsmc] #list of periods
resultsallp: LHC22pp_mc/Results/prod_LHC22b1b/resultsmctot

mass_fit_lim: [2.14, 2.436] # region for the fit of the invariant mass distribution [GeV/c^2]
bin_width: 0.001 # bin width of the invariant mass histogram
init_fits_from: [mc,mc,mc,mc,mc,mc] # data or mc
sgnfunc: [kGaus,kGaus,kGaus,kGaus,kGaus,kGaus]
bkgfunc: [Pol2.Pol2,Pol2,Pol2,Pol2,Pol2]
bkgfunc: [Pol2,Pol2,Pol2,Pol2,Pol2,Pol2]
masspeak: 2.286
massmin: [2.14,2.14,2.14,2.14,2.14,2.14]
massmax: [2.436,2.436,2.436,2.436,2.436,2.436]
rebin: [4,4,4,4,4,4]
rebin: [6,6,6,6,6,6]
fix_mean: [false,false,false,false,false,false]
fix_sigma: [false,false,false,false,false,false]
masssecpeak: 0.
Expand All @@ -360,7 +362,7 @@ LcpKpi:
latexbin2var: "n_{trkl}"
nevents: null
dodoublecross: false
dobkgfromsideband: true
dobkgfromsideband: false

systematics:
probvariation:
Expand Down
10 changes: 7 additions & 3 deletions machine_learning_hep/fitting/fitters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#############################################################################
## © Copyright CERN 2018. All rights not expressly granted are reserved. ##
## © Copyright CERN 2023. All rights not expressly granted are reserved. ##
## Author: [email protected] ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
Expand All @@ -24,7 +24,7 @@
from math import sqrt
from ctypes import c_double

# pylint: disable=import-error, no-name-in-module, unused-import
# pylint: disable=import-error, no-name-in-module, unused-import, f-string-without-interpolation
from ROOT import AliHFInvMassFitter, AliVertexingHFUtils, AliHFInvMassMultiTrialFit
from ROOT import TFile, TH1F, TH1D, TF1, TPaveText, TLine, TLegend, TLatex
from ROOT import kBlue, kRed, kGreen, kMagenta, kOrange, kPink, kCyan, kYellow, kBlack
Expand Down Expand Up @@ -497,6 +497,10 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
signif = c_double()
signif_err = c_double()
self.kernel.Significance(n_sigma_signal, signif, signif_err)
bkg = bkg.value
bkg_err = bkg_err.value
signif = signif.value
signif_err = signif_err.value
sig_o_bkg = sig / bkg if bkg > 0. else -1.

root_objects.append(self.add_pave_helper_(0.15, 0.7, 0.48, 0.89, "NDC"))
Expand Down Expand Up @@ -1399,7 +1403,7 @@ def fill_pad(pad, ylims, histos, ref_line=None):
pad.cd()
pad.SetLeftMargin(0.13)
pad.SetRightMargin(0.06)
lim_delta = (ylims[1] - ylims[0])
lim_delta = ylims[1] - ylims[0]
lim_min = ylims[0] - 0.1 * lim_delta
lim_max = ylims[1] + 0.1 * lim_delta
for h in histos:
Expand Down
17 changes: 10 additions & 7 deletions machine_learning_hep/fitting/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#############################################################################
## © Copyright CERN 2018. All rights not expressly granted are reserved. ##
## © Copyright CERN 2023. All rights not expressly granted are reserved. ##
## Author: [email protected] ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
Expand All @@ -20,7 +20,7 @@
from array import array
from ctypes import c_double

#pylint: disable=too-many-lines, too-few-public-methods
#pylint: disable=too-many-lines, too-few-public-methods, consider-using-f-string, too-many-statements
from ROOT import TFile, TH1F, TF1, TCanvas, gStyle #pylint: disable=import-error, no-name-in-module

from machine_learning_hep.logger import get_logger
Expand All @@ -29,7 +29,7 @@
from machine_learning_hep.fitting.utils import save_fit, load_fit
from machine_learning_hep.fitting.fitters import FitAliHF, FitROOTGauss, FitSystAliHF

class MLFitParsFactory: # pylint: disable=too-many-instance-attributes, too-many-statements
class MLFitParsFactory: # pylint: disable=too-many-instance-attributes
"""
Managing MLHEP specific fit parameters and is used to collect and retrieve all information
required to initialise a (systematic) fit
Expand Down Expand Up @@ -670,17 +670,20 @@ def __call__(self, x_var, par):
return 0

if fbkg[ibin1] == "kLin":
fit_func = TF1("fit_func", FitBkg(), fitlim[0], fitlim[1], 2)
bkgFunc = FitBkg()
fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 2)
hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
pars = fit_func.GetParameters()
bkg_func = TF1("fbkg", "pol1", fitlim[0], fitlim[1])
elif fbkg[ibin1] == "Pol2":
fit_func = TF1("fit_func", FitBkg(), fitlim[0], fitlim[1], 3)
hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
bkgFunc = FitBkg()
fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 3)
hmass.Fit("fit_func", '', '', fitlim[0], fitlim[1])
pars = fit_func.GetParameters()
bkg_func = TF1("fbkg", "pol2", fitlim[0], fitlim[1])
elif fbkg[ibin1] == "kExpo":
fit_func = TF1("fit_func", FitBkg(), fitlim[0], fitlim[1], 2)
bkgFunc = FitBkg()
fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 2)
hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
pars = fit_func.GetParameters()
bkg_func = TF1("fbkg", "expo", fitlim[0], fitlim[1])
Expand Down
46 changes: 19 additions & 27 deletions machine_learning_hep/multiprocesser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#############################################################################
## © Copyright CERN 2018. All rights not expressly granted are reserved. ##
## © Copyright CERN 2023. All rights not expressly granted are reserved. ##
## Author: [email protected] ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
Expand Down Expand Up @@ -51,20 +51,16 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.dlper_pklsk = []
self.dlper_pklml = []
self.d_prefix = datap["multi"][self.mcordata].get("prefix_dir", "")
self.d_prefix_res = datap["mlapplication"][self.mcordata].get("prefix_dir_res", "")
for s in datap["multi"][self.mcordata]["unmerged_tree_dir"]:
self.dlper_root.append(self.d_prefix + s)
for s in datap["multi"][self.mcordata]["pkl"]:
self.dlper_pkl.append(self.d_prefix + s)
for s in datap["multi"][self.mcordata]["pkl_skimmed"]:
self.dlper_pklsk.append(self.d_prefix + s)
for s in datap["multi"][self.mcordata]["pkl_skimmed_merge_for_ml"]:
self.dlper_pklml.append(self.d_prefix + s)
self.d_pklml_mergedallp = self.d_prefix + \
datap["multi"][self.mcordata]["pkl_skimmed_merge_for_ml_all"]
self.d_pklevt_mergedallp = self.d_prefix + \
datap["multi"][self.mcordata]["pkl_evtcounter_all"]

self.d_prefix_app = datap["mlapplication"][self.mcordata].get("prefix_dir_app", "")
self.d_prefix_res = datap["analysis"][self.typean][self.mcordata].get("prefix_dir_res", "")

dp = datap["multi"][self.mcordata]
self.dlper_root = [self.d_prefix + p for p in dp["unmerged_tree_dir"]]
self.dlper_pkl = [self.d_prefix + p for p in dp["pkl"]]
self.dlper_pklsk = [self.d_prefix + p for p in dp["pkl_skimmed"]]
self.dlper_pklml = [self.d_prefix + p for p in dp["pkl_skimmed_merge_for_ml"]]
self.d_pklml_mergedallp = self.d_prefix + dp["pkl_skimmed_merge_for_ml_all"]
self.d_pklevt_mergedallp = self.d_prefix + dp["pkl_evtcounter_all"]
self.dlper_mcreweights = datap["multi"][self.mcordata]["mcreweights"]

#namefiles pkl
Expand Down Expand Up @@ -101,18 +97,14 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.lper_evtorig = \
[os.path.join(direc, self.n_evtorig) for direc in self.dlper_pkl]

self.dlper_reco_modapp = []
self.dlper_reco_modappmerged = []
self.d_results = []

for s in datap["mlapplication"][self.mcordata]["pkl_skimmed_dec"]:
self.dlper_reco_modapp.append(self.d_prefix_res + s)
for s in datap["mlapplication"][self.mcordata]["pkl_skimmed_decmerged"]:
self.dlper_reco_modappmerged.append(self.d_prefix_res + s)
for s in datap["analysis"][self.typean][self.mcordata]["results"]:
self.d_results.append(self.d_prefix_res + s)
self.d_resultsallp = \
self.d_prefix_res + datap["analysis"][self.typean][self.mcordata]["resultsallp"]
dp = datap["mlapplication"][self.mcordata]
self.dlper_reco_modapp = [self.d_prefix_app + p for p in dp["pkl_skimmed_dec"]]
self.dlper_reco_modappmerged = [self.d_prefix_app + p for p in dp["pkl_skimmed_decmerged"]]

dp = datap["analysis"][self.typean][self.mcordata]
self.d_results = [self.d_prefix_res + p for p in dp["results"]]
self.d_resultsallp = self.d_prefix_res + dp["resultsallp"]

self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"]
self.lpt_probcut = datap["mlapplication"]["probcutoptimal"]
self.f_evt_mergedallp = os.path.join(self.d_pklevt_mergedallp, self.n_evt)
Expand Down
10 changes: 5 additions & 5 deletions machine_learning_hep/processerdhadrons.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#############################################################################
## © Copyright CERN 2018. All rights not expressly granted are reserved. ##
## © Copyright CERN 2023. All rights not expressly granted are reserved. ##
## Author: [email protected] ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
Expand Down Expand Up @@ -133,7 +133,7 @@ def process_histomass_single(self, index):
h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])

fill_hist(h_invmass, df[self.v_invmass].to_numpy())
fill_hist(h_invmass, df[self.v_invmass])
myfile.cd()
h_invmass.Write()

Expand All @@ -149,9 +149,9 @@ def process_histomass_single(self, index):
self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins,
self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
fill_hist(h_invmass_sig, df_sig[self.v_invmass].to_numpy())
fill_hist(h_invmass_bkg, df_bkg[self.v_invmass].to_numpy())
fill_hist(h_invmass_refl, df_refl[self.v_invmass].to_numpy())
fill_hist(h_invmass_sig, df_sig[self.v_invmass])
fill_hist(h_invmass_bkg, df_bkg[self.v_invmass])
fill_hist(h_invmass_refl, df_refl[self.v_invmass])
myfile.cd()
h_invmass_sig.Write()
h_invmass_bkg.Write()
Expand Down
14 changes: 8 additions & 6 deletions machine_learning_hep/steer_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,20 +140,22 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite
dirpklmltotdata = dirprefixdata + dp["pkl_skimmed_merge_for_ml_all"]

dp = data_param[case]["mlapplication"]["mc"]
dirprefixmcres = dp.get("prefix_dir_res", "")
dirpklskdecmc = [dirprefixmcres + p for p in dp["pkl_skimmed_dec"]]
dirpklskdec_mergedmc = [dirprefixmcres + p for p in dp["pkl_skimmed_decmerged"]]
dirprefixmcapp = dp.get("prefix_dir_app", "")
dirpklskdecmc = [dirprefixmcapp + p for p in dp["pkl_skimmed_dec"]]
dirpklskdec_mergedmc = [dirprefixmcapp + p for p in dp["pkl_skimmed_decmerged"]]

dp = data_param[case]["mlapplication"]["data"]
dirprefixdatares = dp.get("prefix_dir_res", "")
dirpklskdecdata = [dirprefixdatares + p for p in dp["pkl_skimmed_dec"]]
dirpklskdec_mergeddata = [dirprefixdatares + p for p in dp["pkl_skimmed_decmerged"]]
dirprefixdataapp = dp.get("prefix_dir_app", "")
dirpklskdecdata = [dirprefixdataapp + p for p in dp["pkl_skimmed_dec"]]
dirpklskdec_mergeddata = [dirprefixdataapp + p for p in dp["pkl_skimmed_decmerged"]]

dp = data_param[case]["analysis"][typean]["data"]
dirprefixdatares = dp.get("prefix_dir_res", "")
dirresultsdata = [dirprefixdatares + p for p in dp["results"]]
dirresultsdatatot = dirprefixdatares + dp["resultsallp"]

dp = data_param[case]["analysis"][typean]["mc"]
dirprefixmcres = dp.get("prefix_dir_res", "")
dirresultsmc = [dirprefixmcres + p for p in dp["results"]]
dirresultsmctot = dirprefixmcres + dp["resultsallp"]

Expand Down
Loading

0 comments on commit f29b4a8

Please sign in to comment.