Skip to content

Commit

Permalink
Prepare for Run 3 analysis (#816)
Browse files Browse the repository at this point in the history
* Prepare for run 3 analysis incl. HF jets

* Make pylint happier
  • Loading branch information
qgp authored Aug 4, 2023
1 parent 98110e4 commit ddb5106
Show file tree
Hide file tree
Showing 12 changed files with 952 additions and 118 deletions.
1 change: 0 additions & 1 deletion machine_learning_hep/analysis/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from machine_learning_hep.workflow.workflow_base import WorkflowBase
from machine_learning_hep.io import dump_yaml_from_dict


class Analyzer(WorkflowBase):
def __init__(self, datap, case, typean, period):
super().__init__(datap, case, typean, period)
Expand Down
57 changes: 57 additions & 0 deletions machine_learning_hep/analysis/analyzer_jets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#############################################################################
## © Copyright CERN 0238. All rights not expressly granted are reserved. ##
## ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
## Free Software Foundation, either version 3 of the License, or (at your ##
## option) any later version. This program is distributed in the hope that ##
## it will be useful, but WITHOUT ANY WARRANTY; without even the implied ##
## warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ##
## See the GNU General Public License for more details. ##
## You should have received a copy of the GNU General Public License ##
## along with this program. if not, see <https://www.gnu.org/licenses/>. ##
#############################################################################

import os
import munch # pylint: disable=import-error, no-name-in-module
from ROOT import TFile # pylint: disable=import-error, no-name-in-module

from machine_learning_hep.analysis.analyzer import Analyzer

class AnalyzerJets(Analyzer):
species = "analyzer"

def __init__(self, datap, case, typean, period):
super().__init__(datap, case, typean, period)

self.cfg = munch.munchify(datap)
self.cfg.ana = munch.munchify(datap).analysis[typean]

# output directories
self.d_resultsallpmc = datap["analysis"][typean]["mc"]["results"][period] \
if period is not None else datap["analysis"][typean]["mc"]["resultsallp"]
self.d_resultsallpdata = datap["analysis"][typean]["data"]["results"][period] \
if period is not None else datap["analysis"][typean]["data"]["resultsallp"]

# input directories (processor output)
self.d_resultsallpmc_proc = self.d_resultsallpmc
self.d_resultsallpdata_proc = self.d_resultsallpdata

# input files
n_filemass_name = datap["files_names"]["histofilename"]
self.n_filemass = os.path.join(self.d_resultsallpdata_proc, n_filemass_name)
self.n_filemass_mc = os.path.join(self.d_resultsallpmc_proc, n_filemass_name)
self.n_fileeff = datap["files_names"]["efffilename"]
self.n_fileeff = os.path.join(self.d_resultsallpmc_proc, self.n_fileeff)
self.n_fileresp = datap["files_names"]["respfilename"]
self.n_fileresp = os.path.join(self.d_resultsallpmc_proc, self.n_fileresp)

def qa(self): # pylint: disable=too-many-branches, too-many-locals
self.logger.info("Running D0 jet qa")

with TFile(self.n_filemass) as rfile:
histonorm = rfile.Get("histonorm")
if not histonorm:
self.logger.critical('histonorm not found')
p_nevents = histonorm.GetBinContent(1)
self.logger.debug(f'Number of selected event: {p_nevents}')
8 changes: 3 additions & 5 deletions machine_learning_hep/analysis/analyzer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def initialize(self):
self.is_initialized = True


def analyze(self, *ana_steps):
def analyze(self, ana_steps):
"""
Gives a list of analyzers and analysis steps do each step for each analyzer
Args:
Expand All @@ -96,10 +96,8 @@ def analyze(self, *ana_steps):

self.initialize()

self.logger.info("Run all registered analyzers of type %s for following analysis steps",
self.ana_class.__name__)
for step in ana_steps:
print(f" -> {step}")
self.logger.info("Run all registered analyzers of type %s for following analysis steps: %s",
self.ana_class.__name__, ana_steps)

# Collect potentially failed systematic steps
failed_steps = []
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ LcpKpi:
xlim:
- 0
- 0.0001

files_names:
namefile_unmerged_tree: AnalysisResults_trees.root
namefile_reco: AnalysisResultsReco.pkl
Expand Down Expand Up @@ -196,6 +197,7 @@ LcpKpi:
pkl_skimmed_merge_for_ml_all: /data2/MLhep/prod_LHC22b1b_MC/mltotmc
pkl_evtcounter_all: /data2/MLhep/prod_LHC22b1b_MC/evttotmc
mcreweights: [../Analyses]

ml:
evtsel: null
triggersel:
Expand Down
10 changes: 5 additions & 5 deletions machine_learning_hep/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@ class MLLoggerFormatter(logging.Formatter):

level_map = {
logging.DEBUG: (None, 'blue', False),
logging.INFO: (None, 'black', False),
logging.INFO: (None, 'green', False),
logging.WARNING: (None, 'yellow', False),
logging.ERROR: (None, 'red', False),
logging.ERROR: (None, 'orange', False),
logging.CRITICAL: ('red', 'white', True),
}
csi = '\x1b['
reset = '\x1b[0m'

# Define default format string
def __init__(self, fmt='%(levelname)s in %(pathname)s:%(lineno)d:\n%(message)s',
def __init__(self, fmt='%(levelname)s in %(pathname)s:%(lineno)d:\n%(message)s',
datefmt=None, style='%', color=False):
logging.Formatter.__init__(self, fmt, datefmt, style)
self.color = color
Expand Down Expand Up @@ -102,7 +102,7 @@ def configure_logger(debug, logfile=None):
logger.setLevel(logging.INFO)

sh = logging.StreamHandler()
formatter = MLLoggerFormatter(color=lambda : getattr(sh.stream, 'isatty', None)) # pylint: disable=C0326
formatter = MLLoggerFormatter(color=lambda : getattr(sh.stream, 'isatty', None))

sh.setFormatter(formatter)
logger.addHandler(sh)
Expand All @@ -123,5 +123,5 @@ def get_logger():
"""
Get the global logger for this package and set handler together with formatters.
"""
configure_logger(False, None)
# configure_logger(False, None)
return logging.getLogger("MachinelearningHEP")
5 changes: 5 additions & 0 deletions machine_learning_hep/multiprocesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
from machine_learning_hep.processer import Processer # pylint: disable=unused-import
from machine_learning_hep.utilities import merge_method, mergerootfiles, get_timestamp_string
from machine_learning_hep.io import parse_yaml, dump_yaml_from_dict
from machine_learning_hep.logger import get_logger

class MultiProcesser: # pylint: disable=too-many-instance-attributes, too-many-statements
species = "multiprocesser"
logger = get_logger()

def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
self.case = case
self.datap = datap
Expand Down Expand Up @@ -174,6 +178,7 @@ def multi_histomass(self):
if self.p_useperiod[indexp] == 1:
self.process_listsample[indexp].process_histomass()
tmp_merged = f"/data/tmp/hadd/{self.case}_{self.typean}/mass/{get_timestamp_string()}/"
self.logger.debug('merging all')
mergerootfiles(self.lper_filemass, self.filemass_mergedall, tmp_merged)

def multi_efficiency(self):
Expand Down
Loading

0 comments on commit ddb5106

Please sign in to comment.