From 4247ca85a42928326c35f36dd1d7cc2807b0ba63 Mon Sep 17 00:00:00 2001 From: gputnam Date: Fri, 15 Mar 2024 13:07:51 -0500 Subject: [PATCH] Python modules for applying weights. Bash scripts for generating files. --- .../pyana/dimuon-tools/combine_cohlike.py | 51 +++ .../pyana/dimuon-tools/makedf/reweight_coh.py | 91 ++++ .../dimuon-tools/nb/add_detsyst_2spectrum.py | 79 ++++ .../pyana/dimuon-tools/nb/add_detsyst_all.sh | 6 + .../pyana/dimuon-tools/nb/add_dfs.py | 11 + .../pyana/dimuon-tools/nb/add_dfs_fixpot.py | 17 + .../pyana/dimuon-tools/nb/blindemail.py | 34 ++ .../pyana/dimuon-tools/nb/branches.py | 130 ++++++ .../pyana/dimuon-tools/nb/make_spectrum_df.py | 407 ++++++++++++++++++ .../pyana/dimuon-tools/nb/makefakedata.py | 37 ++ .../pyana/dimuon-tools/nb/regen_spectra.sh | 58 +++ .../pyana/dimuon-tools/nb/reweight_coh.py | 112 +++++ .../pyana/dimuon-tools/nb/timewgt.py | 15 + .../pyana/dimuon-tools/nb/track_splitting.py | 211 +++++++++ .../pyana/dimuon-tools/rundfs.sh | 13 + 15 files changed, 1272 insertions(+) create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/branches.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/make_spectrum_df.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/makefakedata.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/regen_spectra.sh create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/reweight_coh.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/timewgt.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/track_splitting.py create mode 100644 sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/rundfs.sh diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py new file mode 100644 index 00000000..fda1eeb2 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py @@ -0,0 +1,51 @@ +import argparse +import pandas as pd +import numpy as np +import sys + +FNAL_COHLIKE_FILES = [ + "/icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike_nom_evt.df", + "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-E_evt.df", + "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-F_evt.df", + "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-G_evt.df", + "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-H_evt.df", +] + +POLARIS_COHLIKE_FILES = [ + "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod2_evt.df", + "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod3_evt.df", +] + +OUTPUT = "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-all_nowgt_evt.df" + +# Neutrinos per POT, extracted from one of the coh-like files +NU_PER_POT_BASE = 1.0448039251186124e-16 + +NAMES = ["mch", "evt", "hdr", "mcnu"] +with pd.HDFStore(OUTPUT) as hdf: + for n in NAMES: + df = None + offset = 0 + for f in FNAL_COHLIKE_FILES: + thisdf = pd.read_hdf(f, n) + if thisdf.shape[0] > 0: + thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0) + if df is None: + df = thisdf + else: + df = pd.concat([df, thisdf]) + print(f, n) + offset += 1000000 + + for f in POLARIS_COHLIKE_FILES: + thisdf = pd.read_hdf(f, n) + if n == "hdr": # fix the POT for the polaris files + thisdf.pot = 1 / NU_PER_POT_BASE + if thisdf.shape[0] > 0: + thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0) + df = pd.concat([df, thisdf]) + del thisdf + print(f, n) + offset += 1000000 + + hdf.put(key=n, value=df, format="fixed") diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py new file mode 100644 index 00000000..60564183 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py @@ -0,0 +1,91 @@ +import uproot +import numpy as np +import pandas as pd +from scipy import linalg +import util + +np.random.seed(525600) # moments so dear +NUNIV = 1000 + +# Fit configuration file +fname = "/icarus/data/users/gputnam/AScale0_fix.min.root" +f = uproot.open(fname) + +# Load the configuration +covariance = f["covariance"].to_numpy()[0] +fit_result = f["fit_dials"].to_numpy()[0] +NVAR = fit_result.size + +# Hard code "default" uncertainty +default_uncertainty = np.array([0.5, 0.9, 0.65, 0.5, 0.5, 0.5, 0.5, 1, 1]) + +# error on A-scaling is set separately +Aerr = 0.91393 + +# add in error on A to covariance +covariance = covariance + np.diag(np.concatenate(([Aerr], np.zeros((NVAR-1,))))) + +# Generate the random throws for each parameter value + +# The GENIE reweight convention is that the values and error are reported relative to the +# "default uncertainty" in the fit, so we need to scale by that +rands = np.random.randn(NUNIV, NVAR) +SAMPLES = 1 + (rands@linalg.cholesky(covariance) + fit_result)*default_uncertainty + +# compute the A-scaling shift in each universe and the CV +A_samples = SAMPLES[:, 0] +Aexp_nom = 4/3. +Ar_A = 18 +C_A = 12 +Aexp_UNIV = Aexp_nom*A_samples + +A_scale_UNIV = (Ar_A/C_A)**Aexp_UNIV / ((Ar_A/C_A)**Aexp_nom) + +# Compute the pion scaling + +# hard-code pion energy bins in x-sec reweighting +PION_E_BINS = [0, 0.25, 0.5, 0.75, 1, 1.5, 2, 3, 6] + +PION_E_SCALES = SAMPLES[:, 1:].T +PION_E_SCALES[:4] *= A_scale_UNIV + +MEAN_SCALES = np.mean(PION_E_SCALES, axis=1) + +# Also add in an MA associated weight +MA_nom = 1 +MA_unc = 0.3 +MA_UNIV = MA_nom + MA_unc*np.random.randn(NUNIV) +def Faxial(Q2, M): + return M**2 / (Q2 + M**2) + +# And an additave uncertainty on the |t| constant +b_const_shift = 20 # 1/GeV^2 +b_UNIV = b_const_shift*np.random.randn(NUNIV) + +def coht(mcdf): + nuclE = mcdf.E - mcdf.p0.genE - mcdf.p1.genE + nuclP = mcdf.nu.P - mcdf.p0.genp - mcdf.p1.genp + t = np.abs(nuclE**2 - util.magdf(nuclP)**2) + return t + +PION_MASS = 0.139570 +def cohweight(mcdf, Q2wgt=True, bshiftwgt=True): + wgts = [] + cccoh = (np.abs(mcdf.pdg) == 14) & (mcdf.iscc == 1) & (mcdf.genie_mode == 3) + pionKE = mcdf.p1.genE - PION_MASS + + piwgt = pd.cut(pionKE, PION_E_BINS, labels=MEAN_SCALES).astype(float) + wgts.append(piwgt.fillna(1).rename("cv")) + for i in range(NUNIV): + thiswgt = pd.cut(pionKE, PION_E_BINS, labels=PION_E_SCALES[:, i]).astype(float) + if Q2wgt: + thiswgt *= Faxial(mcdf.Q2, MA_UNIV[i]) / Faxial(mcdf.Q2, MA_nom) + if bshiftwgt: + t = np.minimum(coht(mcdf), 0.1) + thiswgt *= np.exp(t*b_UNIV[i]) + wgts.append(thiswgt.fillna(1).rename("univ%i" % i)) + + wgts = pd.concat(wgts, axis=1) + wgts.loc[~cccoh, :] = 1 # don't reweight non-CC-COH + + return wgts diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py new file mode 100644 index 00000000..a16ffca1 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py @@ -0,0 +1,79 @@ +import argparse +import pandas as pd +import numpy as np +import sys + +# configured +DETSYST_NU = 15.426 +DETSYST_DCY = 9.8711 +NUMISYST = 8.6 + +NUNI = 100 +np.random.seed(103) # It's October 3rd +mass_bins = np.array([0.21, 0.225, 0.245, 0.265, 0.29, 0.32, 0.35, 0.4, 0.5]) + +parser = argparse.ArgumentParser( + prog="add_detsyst_2spectrum.py", + description="Add detector uncertainties to spectrum dataframe.", +) +parser.add_argument("filename") + +args = parser.parse_args() +f = args.filename + +if not f.endswith("spectrum.df"): + print("Must run on spectrum file!") + sys.exit(1) + +spectrum_file = f.replace("spectrum.df", "spectrum_detsyst.df") +print("Will save to: %s" % spectrum_file) + +DETSYST = DETSYST_DCY if "Higgs" in spectrum_file else DETSYST_NU + +df = pd.read_hdf(f, key="s") + +# Apply time-weighting +if "timeweight" in df.columns: + df.weight *= df.timeweight + df.cvweight *= df.timeweight + +# Detector systematic uncertainty (normalization) +detsyst_univ = 1 + np.random.normal(size=NUNI)*DETSYST/100 + +numisyst_univ = 1 + np.random.normal(size=NUNI)*NUMISYST/100 + +# Cathode crossing uncertainty +cathode_univ = np.random.normal(size=NUNI) + +# Induction gap uncertainty +gap_univ = np.random.normal(size=NUNI) + +# M.C. stat uncertainty +mcstat_uncs = [] +for mlo, mhi in zip(mass_bins[:-1], mass_bins[1:]): + when = (df.mass > mlo) & (df.mass < mhi) & (df.thnumi < 5) + mcstat_uncs.append(np.nan_to_num(np.sqrt((df.weight[when]**2).sum())/df.weight[when].sum())) +mcstat = pd.cut(df.mass, mass_bins, labels=mcstat_uncs, ordered=False).astype(float).fillna(0) + +mcstat_univ = np.random.normal(size=(NUNI, len(mass_bins)-1)) + +for i in range(NUNI): + # multiply the normalization, cathode-cross, and induction-gap-cross uncertainties + cathode_err_wgt = np.maximum(1 + cathode_univ[i]*df.cathode_weight_err, 0) + gap_err_wgt = np.maximum(1 + gap_univ[i]*df.gap_weight_err*df.gap_weight_err, 0) + df["wgt_det_univ_%i" % i] = np.maximum(detsyst_univ[i]*cathode_err_wgt*gap_err_wgt, 0) + df["wgt_all_univ_%i" % i] *= df["wgt_det_univ_%i" % i] + + mcstat_univ_var = pd.cut(df.mass, mass_bins, labels=mcstat_univ[i], ordered=False).astype(float).fillna(0) + mcstat_univ_var[df.thnumi > 5] = 0 # assume ~0 MC stat uncertainty outside signal box + df["wgt_mcstat_univ_%i" % i] = np.maximum(1 + mcstat_univ_var * mcstat, 0) + df["wgt_all_univ_%i" % i] *= df["wgt_mcstat_univ_%i" % i] + + # additional flux uncertainty + df["wgt_flux_univ_%i" % i] *= numisyst_univ[i] + df["wgt_all_univ_%i" % i] *= numisyst_univ[i] + + df = df.copy() # undo fragmentation + +# Save +df.to_hdf(spectrum_file, key="s") diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh new file mode 100644 index 00000000..f29bab82 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh @@ -0,0 +1,6 @@ +PROCESSDIR=/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra +python add_detsyst_2spectrum.py $PROCESSDIR/F-Nu_loose_evt_spectrum.df +for f in $PROCESSDIR/F*-Higgs_M*_loose_evt_spectrum.df +do + python add_detsyst_2spectrum.py $f +done diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py new file mode 100644 index 00000000..bdc5f5b4 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py @@ -0,0 +1,11 @@ +import argparse +import pandas as pd +import numpy as np +import sys + +if not len(sys.argv) > 3: + print("Usage: python add_dfs.py .df [.df]") + sys.exit(1) + +out = pd.concat([pd.read_hdf(f, "s") for f in sys.argv[2:]]) +out.to_hdf(sys.argv[1], key="s") diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py new file mode 100644 index 00000000..e9afb773 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py @@ -0,0 +1,17 @@ +import argparse +import pandas as pd +import numpy as np +import sys + +if not len(sys.argv) > 3: + print("Usage: python add_dfs_fixpot.py .df [.df]") + sys.exit(1) + +outdfs = [pd.read_hdf(f, "s") for f in sys.argv[2:]] +pots = [df.pot.iloc[0] for df in outdfs] +totpot = sum(pots) +for i in range(len(outdfs)): + outdfs[i].weight *= pots[i] / totpot + +out = pd.concat(outdfs) +out.to_hdf(sys.argv[1], key="s") diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py new file mode 100644 index 00000000..f4a6b6af --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py @@ -0,0 +1,34 @@ +# Import smtplib for the actual sending function +import smtplib +# Import the email modules we'll need +from email.mime.text import MIMEText + +import random +import sys +import shutil + + +files = sys.argv[1:] +random.shuffle(files) +OUTDIR = "/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/blinded-datas/" +alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +me = "gray@putnam" +you = "joanli411@gmail.com" + +for i, f in enumerate(files): + fname = f.split("/")[-1] + src = f + dst = OUTDIR + alphabet[i] + ".df" + shutil.copyfile(src, dst) + + if fname == "thedata.df": # the data + msg = MIMEText("The real data is in dataset %s." % alphabet[i]) + msg['Subject'] = 'This is the blinding email. Dont tell anyone!' + msg['From'] = me + msg['To'] = you + + # Send the message via our own SMTP server, but don't include the + # envelope header. + s = smtplib.SMTP('localhost') + s.sendmail(me, [you], msg.as_string()) + s.quit() diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/branches.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/branches.py new file mode 100644 index 00000000..6194a3c8 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/branches.py @@ -0,0 +1,130 @@ +hdrbranches = [ + "rec.hdr.pot", + "rec.hdr.first_in_subrun", + "rec.hdr.run", + "rec.hdr.subrun", + "rec.hdr.ngenevt", + "rec.hdr.evt", + "rec.hdr.proc", + "rec.hdr.cluster", +] + +trueparticlenames = [ + "start_process", + "end_process", + "pdg", + "startE", + "start.x", "start.y", "start.z", + "end.x", "end.y", "end.z", + "genp.x", "genp.y", "genp.z", + "length", + "G4ID", + "cont_tpc", + "genE", + "interaction_id" +] + +pfpbranch = "rec.slc.reco.pfp." +trkbranch = pfpbranch + "trk." +shwbranch = pfpbranch + "shw." + +pfobranches = [ + pfpbranch + "pfochar.chgendfrac", + pfpbranch + "pfochar.chgfracspread", + pfpbranch + "pfochar.linfitdiff", + pfpbranch + "pfochar.linfitlen", + pfpbranch + "pfochar.linfitgaplen", + pfpbranch + "pfochar.linfitrms", + pfpbranch + "pfochar.openanglediff", + pfpbranch + "pfochar.pca2ratio", + pfpbranch + "pfochar.pca3ratio", + pfpbranch + "pfochar.vtxdist" +] + +pfpbranches = [ + pfpbranch + "parent_is_primary", + pfpbranch + "slcID", + pfpbranch + "trackScore", + pfpbranch + "parent", + pfpbranch + "id", + pfpbranch + "t0", +] + pfobranches + +pfp_daughter_branch = [ + pfpbranch + "daughters" +] + +trkbranches = [ + trkbranch + "producer", + trkbranch + "start.x", trkbranch + "start.y", trkbranch + "start.z", + trkbranch + "end.x", trkbranch + "end.y", trkbranch + "end.z", + trkbranch + "dir.x", trkbranch + "dir.y", trkbranch + "dir.z", + trkbranch + "len", + trkbranch + "rangeP.p_muon", + trkbranch + "mcsP.fwdP_muon", + trkbranch + "rangeP.p_pion", + trkbranch + "mcsP.fwdP_pion", + trkbranch + "bestplane", + trkbranch + "crthit.distance", + trkbranch + "crthit.hit.time", + trkbranch + "crthit.hit.pe", + trkbranch + "chi2pid.2.pid_ndof", + trkbranch + "chi2pid.2.chi2_muon", + trkbranch + "chi2pid.2.chi2_proton", +] + pfpbranches + +trkhitbranches = [ + trkbranch + "calo.2.points.dedx", + trkbranch + "calo.2.points.dqdx", + trkbranch + "calo.2.points.pitch", + trkbranch + "calo.2.points.integral", + trkbranch + "calo.2.points.rr", + trkbranch + "calo.2.points.wire", + trkbranch + "calo.2.points.tpc", + trkbranch + "calo.2.points.sumadc", + # trkbranch + "calo.2.points.width", + # trkbranch + "calo.2.points.mult", + # trkbranch + "calo.2.points.t", + # trkbranch + "calo.2.points.x", + # trkbranch + "calo.2.points.y", + # trkbranch + "calo.2.points.z", + # trkbranch + "calo.2.points.truth.e", + # trkbranch + "calo.2.points.truth.nelec", + # trkbranch + "calo.2.points.truth.pitch", + # trkbranch + "calo.2.points.truth.rr", +] + +for n in trueparticlenames: trkbranches.append(trkbranch + "truth.p." + n) + +slcbranches = [ + "rec.slc.is_clear_cosmic", + "rec.slc.vertex.x", "rec.slc.vertex.y", "rec.slc.vertex.z", + "rec.slc.self", + "rec.slc.tmatch.eff", + "rec.slc.tmatch.pur", + "rec.slc.tmatch.index", + "rec.slc.producer", + "rec.slc.nuid.crlongtrkdiry" +] + +mcbranches = [ + "rec.mc.nu.E", + "rec.mc.nu.position.x", + "rec.mc.nu.position.y", + "rec.mc.nu.position.z", + "rec.mc.nu.pdg", + "rec.mc.nu.iscc", + "rec.mc.nu.genie_mode" +] + +mcprimbranches = [ + "rec.mc.nu.prim.genE", + "rec.mc.nu.prim.pdg", + "rec.mc.nu.prim.genp.x", + "rec.mc.nu.prim.genp.y", + "rec.mc.nu.prim.genp.z", +] + +slc_mcbranches = ["rec.slc.truth." + ".".join(s.split(".")[3:]) for s in mcbranches] +slc_mcprimbranches = ["rec.slc.truth." + ".".join(s.split(".")[3:]) for s in mcprimbranches] + diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/make_spectrum_df.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/make_spectrum_df.py new file mode 100644 index 00000000..93864040 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/make_spectrum_df.py @@ -0,0 +1,407 @@ +import sys +import os +import argparse +import uproot +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import matplotlib as mpl +import timewgt + +from util import * +import var +import cut +import data +import hist + +import importlib + +from pyanalib import panda_helpers + +parser = argparse.ArgumentParser( + prog="make_spectrum_df", + description="Convert an evt dataframe to a spectrum dataframe.", +) +parser.add_argument("filename") +parser.add_argument("-gl", "--gainlo", action="store_true", help="Gain Low Variation") +parser.add_argument("-gh", "--gainhi", action="store_true", help="Gain High Variation") +parser.add_argument("-cl", "--callo", action="store_true", help="dE/dx Low Variation") +parser.add_argument("-ch", "--calhi", action="store_true", help="dE/dx High Variation") +parser.add_argument("-ml", "--mcslo", action="store_true", help="MCS Low Variation") +parser.add_argument("-mh", "--mcshi", action="store_true", help="MCS High Variation") + +parser.add_argument("-l", "--loose", action="store_true", help="Loose thumi Cut") +parser.add_argument("-rc", "--remove_cohlike", action="store_true", help="Remove coh-like events.") +parser.add_argument("-csb", "--cut_signal_box", action="store_true", help="Remove events inside signal box region.") +parser.add_argument("-cst", "--correct_split_tracks", action="store_true", help="Apply track splitting corrections w/ uncertainty.") +parser.add_argument("-d", "--data", action="store_true", help="File is data.") +parser.add_argument("-o", "--overwrite", action="store_true", help="Whether to overwrite existing spectra file.") +parser.add_argument("-od", "--output-dir", help="Output directory location") + +NU_PER_POT_BASE = 1.0448039251186124e-16 +parser.add_argument("-fp", "--fix-pot", help="Whether to fix POT to Coh-like amount (%f)." % NU_PER_POT_BASE, action="store_true") + +args = parser.parse_args() + +# get the type of any variation +if args.gainlo: + vartype = "gainlo_" +elif args.gainhi: + vartype = "gainhi_" +elif args.callo: + vartype = "callo_" +elif args.calhi: + vartype = "calhi_" +elif args.mcslo: + vartype = "mcslo_" +elif args.mcshi: + vartype = "mcshi_" +else: # Nominal + vartype = "" + +if args.loose: + vartype += "loose_" + +if args.cut_signal_box: + vartype += "nosignal_" + + +f = args.filename + +if not args.filename.endswith("evt.df"): + print("Must run on spectrum file!") + sys.exit(1) + +spectrum_file = f.replace("evt.df", "%sevt_spectrum.df" % vartype) +if args.output_dir: + spectrum_file = os.path.join(args.output_dir, os.path.basename(spectrum_file)) +print("Will save to: %s" % spectrum_file) + +if os.path.isfile(spectrum_file) and not args.overwrite: + print("File already exists! Exiting normally.") + sys.exit(0) + +try: + mchdf = pd.read_hdf(f, key="mch") +except: + mchdf = pd.DataFrame() +if mchdf.index.nlevels > 2: + mchdf.index = mchdf.index.droplevel(2) + +hdr = pd.read_hdf(f, key="hdr") + +issignal = mchdf.shape[0] > 0 # MeVPrtl MC + +# Load the reconstructed events +evtdf = data.mc_dataset(f, "evt", mcnukey="mcnuwgt", isscalar=issignal).df.copy() if not args.data else data.onbeam_dataset(f, "evt").df.copy() + +# MeVPrtl truth information +# Simulation parameters +if issignal: + p_E = mchdf.E + p_E.name = "p_E" + + p_M = mchdf.M + p_M.name = "p_M" + + p_th = mchdf.C1 + p_th.name = "p_th" + + p_dist = dmagdf(mchdf.start, mchdf.enter) + p_dist.name = "p_dist" + + p_raydist = dmagdf(mchdf.enter, mchdf.exit) + p_raydist.name = "p_raydist" + + p_decaylength = mchdf.decay_length + p_decaylength.name = "p_decaylength" + + evtdf[p_E.name] = p_E + evtdf[p_M.name] = p_M + evtdf[p_th.name] = p_th + evtdf[p_raydist.name] = p_raydist + evtdf[p_dist.name] = p_dist + evtdf[p_decaylength.name] = p_decaylength + + evtdf["higgs"] = True + +else: # Neutrino MC or data + evtdf["p_E"] = np.nan + evtdf["p_M"] = np.nan + evtdf["p_th"] = np.nan + evtdf["p_dist"] = np.nan + evtdf["p_raydist"] = np.nan + evtdf["p_dist"] = np.nan + evtdf["p_decaylength"] = np.nan + + evtdf["higgs"] = False + +mcdf = pd.read_hdf(f, key="mcnu") +def cohlike(df): + nmu = df.iscc & (np.abs(df.pdg) == 14) + npi = df.npi + ns = df.nsm + df.nsp + + is_coh_like = (nmu + npi + ns >= 2) & (df.max_proton_ke < 0.05) + + is_coh_like = is_coh_like & InFV(df.position, 50) + + return is_coh_like + +if not args.data: + evtdf = evtdf.join(cohlike(mcdf).groupby(level=[0,1]).any().rename(("cohlike", "", "", "", "", ""))) +else: + evtdf["cohlike"] = False + + +if not args.data: + pot = np.sum(hdr.pot * hdr.first_in_subrun) if not args.fix_pot else hdr.shape[0] / NU_PER_POT_BASE + GOAL_POT = 2.41e20 + evtdf["scale"] = evtdf.wgt.cv*GOAL_POT / pot + evtdf["cvscale"] = evtdf.wgt.cv +else: + pot = 0 + evtdf["scale"] = 1 + evtdf["cvscale"] = 1 + +if issignal and "time" in mchdf.columns: # apply timing fix for scalars + wgt = timewgt.reweight(mchdf.time/1e3) + evtdf["timewgt"] = wgt +else: + evtdf["timewgt"] = 1 + +beam2det = np.array([ [0.921035925, 0.022715103, 0.388814672], [0, 0.998297825, -0.058321970], [-0.389477631, 0.053716629, 0.919468161]]) +beamorigin = np.array([4.503730e2, 80.153901e2, 795.112945e2]) +MUON_MASS = 0.1057 +PION_MASS = 0.13457 +BEAMDIR = beam2det.dot(beamorigin) / np.linalg.norm(beam2det.dot(beamorigin)) + +def recop(trk): + p = trk.rangeP.p_muon + p[~TrkInFV(trk.end)] = trk.mcsP.fwdP_muon + return p + +scalar_mom = evtdf.trunk.trk.dir.multiply(recop(evtdf.trunk.trk), axis=0) + \ + evtdf.branch.trk.dir.multiply(recop(evtdf.branch.trk), axis=0) + +scalar_dir = scalar_mom.divide(magdf(scalar_mom), axis=0) + +beamangle = np.arccos(scalar_dir.x*BEAMDIR[0] + scalar_dir.y*BEAMDIR[1] + scalar_dir.z*BEAMDIR[2])*180/np.pi + +trunk_beamcos = evtdf.trunk.trk.dir.x*BEAMDIR[0] + evtdf.trunk.trk.dir.y*BEAMDIR[1] + evtdf.trunk.trk.dir.z*BEAMDIR[2] +trunk_E = np.sqrt(recop(evtdf.trunk.trk)**2 + MUON_MASS**2) +branch_beamcos = evtdf.branch.trk.dir.x*BEAMDIR[0] + evtdf.branch.trk.dir.y*BEAMDIR[1] + evtdf.branch.trk.dir.z*BEAMDIR[2] +branch_E = np.sqrt(recop(evtdf.branch.trk)**2 + PION_MASS**2) +coh_t = (np.sin(beamangle)*magdf(scalar_mom))**2 + (trunk_E - recop(evtdf.trunk.trk)*trunk_beamcos + branch_E - recop(evtdf.branch.trk)*branch_beamcos)**2 + +openangle = np.arccos(dotdf(evtdf.trunk.trk.dir, evtdf.branch.trk.dir))*180/np.pi + +# OPTIMIZED +MAX_SHW_LEN_CUT = 15 +MAX_OTHR_TRK_LEN_CUT = 15 +OTHR_CHI2U_CUT = 45 +OTHR_CHI2P_CUT = 90 + +TRUNK_LEN_CUT = 25 +BRANCH_LEN_CUT = 25 +TRUNK_CHI2U_CUT = 10 # 15? +BRANCH_CHI2U_CUT = 10 +CHI2P_CUT = 95 +MCS_RANGE_COMP = 1.8 + +STUB_5mm_CUT = 20 +STUB_10mm_CUT = 30 +STUB_20mm_CUT = 35 +STUB_30mm_CUT = 45 + +OPENANGLE_CUT = 70 +UU_ANGLE_CUT = 5 if not args.loose else 15 + +# MUON ID VARIATIONS +if args.gainlo: + chi2u_trunk = evtdf.trunk.trk.chi2pid.I2_glo.chi2u + chi2u_branch = evtdf.branch.trk.chi2pid.I2_glo.chi2u + chi2p_trunk = evtdf.trunk.trk.chi2pid.I2_glo.chi2p + chi2p_branch = evtdf.branch.trk.chi2pid.I2_glo.chi2p +elif args.gainhi: + chi2u_trunk = evtdf.trunk.trk.chi2pid.I2_ghi.chi2u + chi2u_branch = evtdf.branch.trk.chi2pid.I2_ghi.chi2u + chi2p_trunk = evtdf.trunk.trk.chi2pid.I2_ghi.chi2p + chi2p_branch = evtdf.branch.trk.chi2pid.I2_ghi.chi2p +elif args.callo: + chi2u_trunk = evtdf.trunk.trk.chi2pid.I2_callo.chi2u + chi2u_branch = evtdf.branch.trk.chi2pid.I2_callo.chi2u + chi2p_trunk = evtdf.trunk.trk.chi2pid.I2_callo.chi2p + chi2p_branch = evtdf.branch.trk.chi2pid.I2_callo.chi2p +elif args.calhi: + chi2u_trunk = evtdf.trunk.trk.chi2pid.I2_calhi.chi2u + chi2u_branch = evtdf.branch.trk.chi2pid.I2_calhi.chi2u + chi2p_trunk = evtdf.trunk.trk.chi2pid.I2_calhi.chi2p + chi2p_branch = evtdf.branch.trk.chi2pid.I2_calhi.chi2p +elif not args.data: # Nominal + chi2u_trunk = evtdf.trunk.trk.chi2pid.I2.chi2u + chi2u_branch = evtdf.branch.trk.chi2pid.I2.chi2u + chi2p_trunk = evtdf.trunk.trk.chi2pid.I2.chi2p + chi2p_branch = evtdf.branch.trk.chi2pid.I2.chi2p +else: # different VAR name for data + chi2u_trunk = evtdf.trunk.trk.chi2pid.I2.chi2_muon + chi2u_branch = evtdf.branch.trk.chi2pid.I2.chi2_muon + chi2p_trunk = evtdf.trunk.trk.chi2pid.I2.chi2_proton + chi2p_branch = evtdf.branch.trk.chi2pid.I2.chi2_proton + +trunk_mcs_range = evtdf.trunk.trk.mcsP.fwdP_muon / evtdf.trunk.trk.rangeP.p_muon +branch_mcs_range = evtdf.branch.trk.mcsP.fwdP_muon / evtdf.branch.trk.rangeP.p_muon + +if args.mcslo: + trunk_mcs_range -= 0.03 + branch_mcs_range -= 0.03 +elif args.mcshi: + trunk_mcs_range += 0.03 + branch_mcs_range += 0.03 +else: # Nominal + pass + +# STUB ID VARIATIONS +if args.callo and not issignal: # only apply stub variations to neutrinos + stubl0_5_dedx = evtdf.stub.l0_5cm.dedx_callo*evtdf.stub.l0_5cm.length + stubl1_dedx = evtdf.stub.l1cm.dedx_callo*evtdf.stub.l1cm.length + stubl2_dedx = evtdf.stub.l2cm.dedx_callo*evtdf.stub.l2cm.length + stubl3_dedx = evtdf.stub.l3cm.dedx_callo*evtdf.stub.l3cm.length +elif args.calhi and not issignal: + stubl0_5_dedx = evtdf.stub.l0_5cm.dedx_calhi*evtdf.stub.l0_5cm.length + stubl1_dedx = evtdf.stub.l1cm.dedx_calhi*evtdf.stub.l1cm.length + stubl2_dedx = evtdf.stub.l2cm.dedx_calhi*evtdf.stub.l2cm.length + stubl3_dedx = evtdf.stub.l3cm.dedx_calhi*evtdf.stub.l3cm.length +else: # Nominal + stubl0_5_dedx = evtdf.stub.l0_5cm.dedx*evtdf.stub.l0_5cm.length + stubl1_dedx = evtdf.stub.l1cm.dedx*evtdf.stub.l1cm.length + stubl2_dedx = evtdf.stub.l2cm.dedx*evtdf.stub.l2cm.length + stubl3_dedx = evtdf.stub.l3cm.dedx*evtdf.stub.l3cm.length + +preselect = [ + SlcInFV(evtdf.slc.vertex), + TrkInFV(evtdf.trunk.trk.end), + TrkInFV(evtdf.branch.trk.end) +] + +objs = [ + np.isnan(evtdf.max_shw_len_primary) | (evtdf.max_shw_len_primary < MAX_SHW_LEN_CUT), + np.isnan(evtdf.max_othr_trk_len) | (evtdf.max_othr_trk_len < MAX_OTHR_TRK_LEN_CUT), + np.isnan(evtdf.max_othr_chi2_muon) | (evtdf.max_othr_chi2_muon < OTHR_CHI2U_CUT), + np.isnan(evtdf.min_othr_chi2_proton_all) | (evtdf.min_othr_chi2_proton_all > OTHR_CHI2P_CUT), + +] + +hasstub = \ + (stubl0_5_dedx > STUB_5mm_CUT) |\ + (stubl1_dedx > STUB_10mm_CUT) |\ + (stubl2_dedx > STUB_20mm_CUT) |\ + (stubl3_dedx > STUB_30mm_CUT) + +muid = [ + evtdf.trunk.trk.len > TRUNK_LEN_CUT, + evtdf.branch.trk.len > BRANCH_LEN_CUT, + chi2u_trunk < TRUNK_CHI2U_CUT, + chi2u_branch < BRANCH_CHI2U_CUT, + chi2p_trunk > CHI2P_CUT, + chi2p_branch > CHI2P_CUT, + trunk_mcs_range < MCS_RANGE_COMP, + branch_mcs_range < MCS_RANGE_COMP, +] + +kinematics = [ + openangle < OPENANGLE_CUT, + beamangle < UU_ANGLE_CUT, +] + +if args.cut_signal_box: + kinematics[-1] = kinematics[-1] & (beamangle > 5) + +stubs = [ + ~hasstub, +] + +def cut_all(l): + l0 = l[0] + for c in l[1:]: + l0 = l0 & c + return l0 + +cuts = preselect + objs + muid + stubs + kinematics + +passcut = cut_all(cuts) + +passcut_nokinematics = cut_all(preselect + muid + stubs) + +Etrunk = np.sqrt(MUON_MASS**2 + recop(evtdf.trunk.trk)**2) +Ebranch = np.sqrt(MUON_MASS**2 + recop(evtdf.branch.trk)**2) + +s_mass = np.sqrt(2)*np.sqrt(MUON_MASS**2 + Etrunk*Ebranch - recop(evtdf.trunk.trk)*recop(evtdf.branch.trk)*dotdf(evtdf.branch.trk.dir, evtdf.trunk.trk.dir)) + +# Compute track splitting variables and apply corrections +if args.correct_split_tracks and not args.data: + import track_splitting + cathode_cross_weight, cathode_cross_weight_err, gap_cross_weight, gap_cross_weight_err = track_splitting.correct(evtdf) +else: + cathode_cross_weight = pd.Series(1, evtdf.index) + cathode_cross_weight_err = pd.Series(0, evtdf.index) + gap_cross_weight = pd.Series(1, evtdf.index) + gap_cross_weight_err = pd.Series(0, evtdf.index) + +# Update weights +evtdf.scale = evtdf.scale*cathode_cross_weight*gap_cross_weight +evtdf.cvscale = evtdf.cvscale*cathode_cross_weight*gap_cross_weight + +spectradf = pd.DataFrame( + data={ + "mass": s_mass[passcut], + "openangle": openangle[passcut], + "thnumi": beamangle[passcut], + "tuu": coh_t[passcut], + "chi2u_trunk": chi2u_trunk[passcut], + "chi2u_branch": chi2u_branch[passcut], + "chi2p_trunk": chi2p_trunk[passcut], + "chi2p_branch": chi2p_branch[passcut], + "signal": evtdf.higgs[passcut], + "cohlike": evtdf.cohlike[passcut], + "weight": evtdf.scale[passcut], + "pot": pot, + "cvweight": evtdf.cvscale[passcut], + "timeweight": evtdf.timewgt[passcut], + "cathode_weight": cathode_cross_weight[passcut], + "cathode_weight_err": cathode_cross_weight_err[passcut], + "gap_weight": gap_cross_weight[passcut], + "gap_weight_err": gap_cross_weight_err[passcut], + "genie_mode": evtdf.slc.truth.genie_mode[passcut], + "nu_iscc": evtdf.slc.truth.iscc[passcut], + "nu_pdg": evtdf.slc.truth.pdg[passcut], + "nu_E": evtdf.slc.truth.E[passcut], + "nu_Q2": evtdf.slc.truth.Q2[passcut], + "nu_W": evtdf.slc.truth.w[passcut], + "p_E": evtdf.p_E[passcut], + "p_M": evtdf.p_M[passcut], + "p_th": evtdf.p_th[passcut], + "p_dist": evtdf.p_dist[passcut], + "p_raydist": evtdf.p_raydist[passcut], + "p_decaylength": evtdf.p_decaylength[passcut], + }, +) + +# systematic uncertainties +if not args.data: + systdfs = [] + for syst in ["g4", "xsec", "flux", "coh", "all"]: + systdf = evtdf.wgt[syst][passcut] + systdf.columns = [("wgt_%s_" % syst) + "_".join([r for r in c if r]) for c in systdf.columns] + systdfs.append(systdf) + + systdf = pd.concat(systdfs, axis=1) + + # Add in weights + spectradf = pd.concat([spectradf, systdf], axis=1).reset_index(drop=True) + +if args.remove_cohlike: + spectradf = spectradf[~spectradf.cohlike] + +# Save +spectradf.to_hdf(spectrum_file, key="s") diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/makefakedata.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/makefakedata.py new file mode 100644 index 00000000..5a738ae2 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/makefakedata.py @@ -0,0 +1,37 @@ +import argparse +import pandas as pd +import numpy as np +import sys + +output_file = sys.argv[1] +input_files = sys.argv[2:] + +COHWEIGHT = np.random.random() + 0.5 +SIG_SCALE = np.random.random()*50 + +# print(COHWEIGHT, SIG_SCALE) + +outdfs = [] +for i, f in enumerate(input_files): + df = pd.read_hdf(f, "s") + + df.loc[df.genie_mode == 3, "weight"] *= COHWEIGHT + if i == 1: # signal MC + df.weight *= SIG_SCALE / df.weight.sum() + + draws = np.random.poisson(df.weight).astype(int) + # print(f, df.weight.sum(), draws.sum()) + + cols_2_save = [ + "mass", + "thnumi", + "openangle", + "tuu" + ] + + df = pd.DataFrame(np.repeat(df.values, draws, axis=0), columns=df.columns)[cols_2_save].astype(float) + # df["ifile"] = i + # print(df[df.thnumi < 5]) + outdfs.append(df) + +pd.concat(outdfs).reset_index(drop=True).to_hdf(output_file, key="s", format="fixed") diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/regen_spectra.sh b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/regen_spectra.sh new file mode 100644 index 00000000..cf9e6dd3 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/regen_spectra.sh @@ -0,0 +1,58 @@ +OUTDIR=/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/ + +python make_spectrum_df.py -od $OUTDIR -cst -rc /icarus/data/users/gputnam/DMCP2023G/mc-F/F-MCNuPhase2_evt.df +python make_spectrum_df.py -od $OUTDIR --loose -cst -rc /icarus/data/users/gputnam/DMCP2023G/mc-F/F-MCNuPhase2_evt.df + +for arg in gl gh cl ch ml mh +do + python make_spectrum_df.py -od $OUTDIR --loose -cst -${arg} /icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike_nom_evt.df & + python make_spectrum_df.py -od $OUTDIR -cst -${arg} /icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike_nom_evt.df & + wait +done + +for f in /icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike*evt.df +do + python make_spectrum_df.py -od $OUTDIR -cst --loose $f & + python make_spectrum_df.py -od $OUTDIR -cst $f & + wait +done + +for f in `ls /icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom*evt.df | grep -v polaris | grep -v nowgt` +do + python make_spectrum_df.py -od $OUTDIR -cst --loose $f & + python make_spectrum_df.py -od $OUTDIR -cst $f & + wait +done + +for f in `ls /icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom*evt.df | grep polaris | grep -v nowgt` +do + python make_spectrum_df.py -fp -od $OUTDIR -cst --loose $f & + python make_spectrum_df.py -fp -od $OUTDIR -cst $f & +done +wait + +python add_dfs_fixpot.py /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-CohLike_nom-all_evt_spectrum.df /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-CohLike_nom_evt_spectrum.df `ls /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/FB-CohLike_nom-* | grep -v loose` +python add_dfs_fixpot.py /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-CohLike_nom-all_loose_evt_spectrum.df /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-CohLike_nom_loose_evt_spectrum.df `ls /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/FB-CohLike_nom-* | grep loose` + +python add_dfs.py /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-Nu_loose_evt_spectrum.df /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-CohLike_nom-all_loose_evt_spectrum.df /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-MCNuPhase2_loose_evt_spectrum.df +python add_dfs.py /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-Nu_evt_spectrum.df /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-CohLike_nom-all_evt_spectrum.df /icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/F-MCNuPhase2_evt_spectrum.df + +for M in 220 240 260 280 300 320 340 +do + for f in `ls /icarus/data/users/gputnam/DMCP2023G/mc-F/F2-Higgs_M*_evt.df | grep $M | grep -v _nom_` + do + python make_spectrum_df.py -od $OUTDIR -cst $f & + done + wait +done + +for f in /icarus/data/users/gputnam/DMCP2023G/mc-F-iter2/F2-Higgs_M*_nom_evt.df +do + python make_spectrum_df.py -od $OUTDIR -cst $f & + python make_spectrum_df.py -od $OUTDIR -cst --loose $f & + for arg in gl gh cl ch ml mh + do + python make_spectrum_df.py -od $OUTDIR -cst -${arg} $f & + done + wait +done diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/reweight_coh.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/reweight_coh.py new file mode 100644 index 00000000..e17b5b8b --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/reweight_coh.py @@ -0,0 +1,112 @@ +import uproot +import numpy as np +import pandas as pd +from scipy import linalg +import util + +np.random.seed(525600) # moments so dear +NUNIV = 100 + +# Fit configuration file +# fname = "/icarus/data/users/gputnam/AScale0_fix.min.root" # pre b-scale fix +fname = "/icarus/data/users/gputnam/fitresult.min.root" +f = uproot.open(fname) + +# Load the configuration +covariance = f["covariance"].to_numpy()[0] +fit_result = f["fit_dials"].to_numpy()[0] +NVAR = fit_result.size + +# Hard code "default" uncertainty +default_uncertainty = np.array([0.5, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1]) + +# error on A-scaling is set separately +Aerr = 0.91393 + +# add in error on A to covariance +covariance = covariance + np.diag(np.concatenate(([Aerr], np.zeros((NVAR-1,))))) + +# Generate the random throws for each parameter value + +# The GENIE reweight convention is that the values and error are reported relative to the +# "default uncertainty" in the fit, so we need to scale by that +rands = np.random.randn(NUNIV, NVAR) +SAMPLES = 1 + (rands@linalg.cholesky(covariance) + fit_result)*default_uncertainty + +# compute the A-scaling shift in each universe and the CV +A_samples = SAMPLES[:, 0] +Aexp_nom = 4/3. +Ar_A = 40 +C_A = 12 +Aexp_UNIV = Aexp_nom*A_samples + +A_scale_UNIV = (Ar_A/C_A)**Aexp_UNIV / ((Ar_A/C_A)**Aexp_nom) + +# BS and RS b-values +HBARC = 0.197326980 # GeV*fm +NUCLEAR_RADIUS = 1.00 # fm, in GENIE +NUCLEAR_RADIUS = NUCLEAR_RADIUS / HBARC # 1/GeV +bRS_C = (1./3)*(NUCLEAR_RADIUS*(C_A**(1./3)))**2 # Rein Segall, on Carbon +bRS = (1./3)*(NUCLEAR_RADIUS*(Ar_A**(1./3)))**2 # Rein Segall +b_BS_PION_E_BINS = [0, 0.076, 0.080, 0.100, 0.148, 0.162, 0.226, 0.486, 0.584, 0.662, 0.776, 0.870, np.inf] +b_BS_VALS_C = np.array([116, 109, 89.8, 91.0, 89.2, 80.8, 54.6, 55.2, 58.4, 60.5, 62.2, bRS_C]) # b values on carbon +b_BS_VALS = b_BS_VALS_C * (Ar_A / C_A)**(2./3) # assume A-scaling to convert to argon + +# Compute the pion scaling + +# hard-code pion energy bins in x-sec reweighting +PION_E_BINS = [0, 0.25, 0.5, 0.75, 1, 1.5, 2, 3, 6] + +PION_E_SCALES = SAMPLES[:, 1:].T +PION_E_SCALES[:4] *= A_scale_UNIV + +MEAN_SCALES = np.mean(PION_E_SCALES, axis=1) + +# Also add in an MA associated weight +MA_nom = 1 +MA_unc = 0.3 +MA_UNIV = MA_nom + MA_unc*np.random.randn(NUNIV) +def Faxial(Q2, M): + return M**2 / (Q2 + M**2) + +# divide out the mean effect to get the variations +PION_E_SCALES = (PION_E_SCALES.T / MEAN_SCALES).T + +def coht(mcdf): + nuclE = mcdf.E - mcdf.p0.genE - mcdf.p1.genE + t = np.abs(nuclE**2 - util.dmagdf(mcdf.momentum, mcdf.p0.genp+mcdf.p1.genp)**2) + return t + +PION_MASS = 0.139570 +def cohweight(mcdf, Q2wgt=True, fixb=True, douniv=True): + wgts = [] + cccoh = (np.abs(mcdf.pdg) == 14) & (mcdf.iscc == 1) & (mcdf.genie_mode == 3) + pionKE = np.maximum(mcdf.p1.genE - PION_MASS, 0) + + piwgt = pd.cut(pionKE, PION_E_BINS, labels=MEAN_SCALES).astype(float) + wgts.append(piwgt.fillna(1).rename("cv")) + + # GENIE by default uses the Rein-Segall b's. Correct these to Berger-Segall + fixb_wgt = 1 + if fixb: + t = coht(mcdf) + bBS = pd.cut(pionKE, b_BS_PION_E_BINS, labels=b_BS_VALS).astype(float) + fixb_wgt = np.exp(-t*(bBS - bRS))*(bBS/bRS)**2 + wgts[0] *= fixb_wgt + + if not douniv: + wgts = pd.concat(wgts, axis=1) + wgts.loc[~cccoh, :] = 1 # don't reweight non-CC-COH + + return wgts + + for i in range(NUNIV): + thiswgt = np.maximum(0, pd.cut(pionKE, PION_E_BINS, labels=PION_E_SCALES[:, i]).astype(float).fillna(0)) + if Q2wgt: + thiswgt *= Faxial(mcdf.Q2, MA_UNIV[i]) / Faxial(mcdf.Q2, MA_nom) + wgts.append(thiswgt.fillna(1).rename("univ%i" % i)) + + wgts = pd.concat(wgts, axis=1) + wgts.loc[~cccoh, :] = 1 # don't reweight non-CC-COH + + return wgts diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/timewgt.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/timewgt.py new file mode 100644 index 00000000..805a1c42 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/timewgt.py @@ -0,0 +1,15 @@ +import numpy as np +import pandas as pd + +def reweight(time): + goodtimes = (2, 3.5) + badtimes = (9.5, np.inf) + + is_goodtime = (time > goodtimes[0]) & (time < goodtimes[1]) + is_badtime = (time > badtimes[0]) & (time < badtimes[1]) + + timewgt_v = (is_goodtime.sum() + is_badtime.sum()) / is_goodtime.sum() + timewgt = pd.Series(1, time.index) + timewgt[is_goodtime] = timewgt_v + timewgt[is_badtime] = 0 + return timewgt diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/track_splitting.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/track_splitting.py new file mode 100644 index 00000000..c3dffc42 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/track_splitting.py @@ -0,0 +1,211 @@ +import numpy as np +import pandas as pd + +# CATHODE CROSSING +correction_file = "/exp/icarus/app/users/jzettle/BSM_Systs/Weights_Output/dir_x_weights_cathode.txt" +cathode_corrections = np.loadtxt(correction_file).T +cathode_correction_W_split = cathode_corrections[1] +cathode_correction_W_reco = cathode_corrections[3] +cathode_correction_E_split = cathode_corrections[9] +cathode_correction_E_reco = cathode_corrections[11] +cathode_correction_bins = np.linspace(-1, 1, cathode_corrections.shape[1]+1) +cathode_correction_err_W = 0.3 +cathode_correction_err_E = 0.2 +def tpc_index(x): + itpc = (x*0).fillna(0).astype(int) + itpc[x > -210.29] += 1 + itpc[x > 61.94] += 1 + itpc[x > 210.29] += 1 + return itpc + +# Z=0 INDUCTION GAP +correction_file = "/exp/icarus/app/users/jzettle/BSM_Systs/Weights_Output/dir_z_weights_across_z.txt" +gap_corrections = np.loadtxt(correction_file).T +gap_correction_W_split = gap_corrections[1] +gap_correction_W_reco = gap_corrections[2] +gap_correction_E_split = gap_corrections[3] +gap_correction_E_reco = gap_corrections[4] +# don't actually apply corrections for split -- we can't fix these +gap_correction_W_split[:] = 1 +gap_correction_E_split[:] = 1 +# large error since this is harder to trust +gap_correction_err = 1 +gap_correction_bins = np.linspace(-1, 1, cathode_corrections.shape[1]+1) + +def correct(evtdf): + # CATHODE CROSSING + cathode_correction_trunk_var = evtdf.trunk.trk.dir.x + cathode_correction_branch_var = evtdf.branch.trk.dir.x + + # TRUNK + true_trunk_crosses_cathode = tpc_index(evtdf.trunk.trk.truth.p.start.x) != tpc_index(evtdf.trunk.trk.truth.p.end.x) + true_same_cryo = (tpc_index(evtdf.trunk.trk.truth.p.start.x)//2) == (tpc_index(evtdf.trunk.trk.truth.p.end.x)//2) + true_trunk_crosses_cathode = true_trunk_crosses_cathode & true_same_cryo + reco_trunk_crosses_cathode = tpc_index(evtdf.trunk.trk.start.x) != tpc_index(evtdf.trunk.trk.end.x) + cryoE = evtdf.trunk.trk.producer == 0 + + # CORRECTION + cathode_cross_trunk_weight = pd.Series(1, evtdf.index) + cathode_cross_trunk_weight[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & ~cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_W_split)\ + .astype(float).fillna(0) + cathode_cross_trunk_weight[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & ~cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_W_reco)\ + .astype(float).fillna(0) + cathode_cross_trunk_weight[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_E_split)\ + .astype(float).fillna(0) + cathode_cross_trunk_weight[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_E_reco)\ + .astype(float).fillna(0) + + # UNCERTAINTY + cathode_cross_trunk_weight_err = pd.Series(0, evtdf.index) + cathode_cross_trunk_weight_err[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & ~cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_W_split)*cathode_correction_err_W)\ + .astype(float).fillna(0) + cathode_cross_trunk_weight_err[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & ~cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_W_reco)*cathode_correction_err_W)\ + .astype(float).fillna(0) + cathode_cross_trunk_weight_err[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & ~reco_trunk_crosses_cathode & cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_E_split)*cathode_correction_err_E)\ + .astype(float).fillna(0) + cathode_cross_trunk_weight_err[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_trunk_var[true_trunk_crosses_cathode & reco_trunk_crosses_cathode & cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_E_reco)*cathode_correction_err_E)\ + .astype(float).fillna(0) + + # BRANCH + true_branch_crosses_cathode = tpc_index(evtdf.branch.trk.truth.p.start.x) != tpc_index(evtdf.branch.trk.truth.p.end.x) + true_same_cryo = (tpc_index(evtdf.branch.trk.truth.p.start.x)//2) == (tpc_index(evtdf.branch.trk.truth.p.end.x)//2) + true_branch_crosses_cathode = true_branch_crosses_cathode & true_same_cryo + reco_branch_crosses_cathode = tpc_index(evtdf.branch.trk.start.x) != tpc_index(evtdf.branch.trk.end.x) + cryoE = evtdf.branch.trk.producer == 0 + + # CORRECTION + cathode_cross_branch_weight = pd.Series(1, evtdf.index) + cathode_cross_branch_weight[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & ~cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_W_split)\ + .astype(float).fillna(0) + cathode_cross_branch_weight[true_branch_crosses_cathode & reco_branch_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & reco_branch_crosses_cathode & ~cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_W_reco)\ + .astype(float).fillna(0) + cathode_cross_branch_weight[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_E_split)\ + .astype(float).fillna(0) + cathode_cross_branch_weight[true_branch_crosses_cathode & reco_branch_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & reco_branch_crosses_cathode & cryoE], cathode_correction_bins, ordered=False, labels=cathode_correction_E_reco)\ + .astype(float).fillna(0) + + # UNCERTAINTY + cathode_cross_branch_weight_err = pd.Series(0, evtdf.index) + cathode_cross_branch_weight_err[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & ~cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_W_split)*cathode_correction_err_W)\ + .astype(float).fillna(0) + cathode_cross_branch_weight_err[true_branch_crosses_cathode & reco_branch_crosses_cathode & ~cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & reco_branch_crosses_cathode & ~cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_W_reco)*cathode_correction_err_W)\ + .astype(float).fillna(0) + cathode_cross_branch_weight_err[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & ~reco_branch_crosses_cathode & cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_E_split)*cathode_correction_err_E)\ + .astype(float).fillna(0) + cathode_cross_branch_weight_err[true_branch_crosses_cathode & reco_branch_crosses_cathode & cryoE] =\ + pd.cut(cathode_correction_branch_var[true_branch_crosses_cathode & reco_branch_crosses_cathode & cryoE], cathode_correction_bins, + ordered=False, labels=np.abs(1-cathode_correction_E_reco)*cathode_correction_err_E)\ + .astype(float).fillna(0) + + # Combine + cathode_cross_weight = cathode_cross_trunk_weight * cathode_cross_branch_weight + cathode_cross_weight_err = np.sqrt(cathode_cross_trunk_weight_err**2 + cathode_cross_branch_weight_err**2) + + # Z=0 INDUCTION GAP + gap_correction_trunk_var = evtdf.trunk.trk.dir.z + gap_correction_branch_var = evtdf.branch.trk.dir.z + + # TRUNK + true_trunk_crosses_gap = (evtdf.trunk.trk.truth.p.start.z >= 0) != (evtdf.trunk.trk.truth.p.end.z >= 0) + reco_trunk_crosses_gap = (evtdf.trunk.trk.start.z >= 0) != (evtdf.trunk.trk.end.z >= 0) + + # CORRECTION + gap_cross_trunk_weight = pd.Series(1, evtdf.index) + gap_cross_trunk_weight[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & ~cryoE], gap_correction_bins, ordered=False, labels=gap_correction_W_split)\ + .astype(float).fillna(0) + gap_cross_trunk_weight[true_trunk_crosses_gap & reco_trunk_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & reco_trunk_crosses_gap & ~cryoE], gap_correction_bins, ordered=False, labels=gap_correction_W_reco)\ + .astype(float).fillna(0) + gap_cross_trunk_weight[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & cryoE], gap_correction_bins, ordered=False, labels=gap_correction_E_split)\ + .astype(float).fillna(0) + gap_cross_trunk_weight[true_trunk_crosses_gap & reco_trunk_crosses_gap & cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & reco_trunk_crosses_gap & cryoE], gap_correction_bins, ordered=False, labels=gap_correction_E_reco)\ + .astype(float).fillna(0) + + # UNCERTAINTY + gap_cross_trunk_weight_err = pd.Series(0, evtdf.index) + gap_cross_trunk_weight_err[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & ~cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_W_split)*gap_correction_err)\ + .astype(float).fillna(0) + gap_cross_trunk_weight_err[true_trunk_crosses_gap & reco_trunk_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & reco_trunk_crosses_gap & ~cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_W_reco)*gap_correction_err)\ + .astype(float).fillna(0) + gap_cross_trunk_weight_err[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & ~reco_trunk_crosses_gap & cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_E_split)*gap_correction_err)\ + .astype(float).fillna(0) + gap_cross_trunk_weight_err[true_trunk_crosses_gap & reco_trunk_crosses_gap & cryoE] =\ + pd.cut(gap_correction_trunk_var[true_trunk_crosses_gap & reco_trunk_crosses_gap & cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_E_reco)*gap_correction_err)\ + .astype(float).fillna(0) + + # BRANCH + true_branch_crosses_gap = (evtdf.branch.trk.truth.p.start.z >= 0) != (evtdf.branch.trk.truth.p.end.z >= 0) + reco_branch_crosses_gap = (evtdf.branch.trk.start.z >= 0) != (evtdf.branch.trk.end.z >= 0) + + # CORRECTION + gap_cross_branch_weight = pd.Series(1, evtdf.index) + gap_cross_branch_weight[true_branch_crosses_gap & ~reco_branch_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & ~reco_branch_crosses_gap & ~cryoE], gap_correction_bins, ordered=False, labels=gap_correction_W_split)\ + .astype(float).fillna(0) + gap_cross_branch_weight[true_branch_crosses_gap & reco_branch_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & reco_branch_crosses_gap & ~cryoE], gap_correction_bins, ordered=False, labels=gap_correction_W_reco)\ + .astype(float).fillna(0) + gap_cross_branch_weight[true_branch_crosses_gap & ~reco_branch_crosses_gap & cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & ~reco_branch_crosses_gap & cryoE], gap_correction_bins, ordered=False, labels=gap_correction_E_split)\ + .astype(float).fillna(0) + gap_cross_branch_weight[true_branch_crosses_gap & reco_branch_crosses_gap & cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & reco_branch_crosses_gap & cryoE], gap_correction_bins, ordered=False, labels=gap_correction_E_reco)\ + .astype(float).fillna(0) + + # UNCERTAINTY + gap_cross_branch_weight_err = pd.Series(0, evtdf.index) + gap_cross_branch_weight_err[true_branch_crosses_gap & ~reco_branch_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & ~reco_branch_crosses_gap & ~cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_W_split)*gap_correction_err)\ + .astype(float).fillna(0) + gap_cross_branch_weight_err[true_branch_crosses_gap & reco_branch_crosses_gap & ~cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & reco_branch_crosses_gap & ~cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_W_reco)*gap_correction_err)\ + .astype(float).fillna(0) + gap_cross_branch_weight_err[true_branch_crosses_gap & ~reco_branch_crosses_gap & cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & ~reco_branch_crosses_gap & cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_E_split)*gap_correction_err)\ + .astype(float).fillna(0) + gap_cross_branch_weight_err[true_branch_crosses_gap & reco_branch_crosses_gap & cryoE] =\ + pd.cut(gap_correction_branch_var[true_branch_crosses_gap & reco_branch_crosses_gap & cryoE], gap_correction_bins, + ordered=False, labels=np.abs(1-gap_correction_E_reco)*gap_correction_err)\ + .astype(float).fillna(0) + + # Combine + gap_cross_weight = gap_cross_trunk_weight * gap_cross_branch_weight + gap_cross_weight_err = np.sqrt(gap_cross_trunk_weight_err**2 + gap_cross_branch_weight_err**2) + + return cathode_cross_weight, cathode_cross_weight_err, gap_cross_weight, gap_cross_weight_err + diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/rundfs.sh b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/rundfs.sh new file mode 100644 index 00000000..1b9254f5 --- /dev/null +++ b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/rundfs.sh @@ -0,0 +1,13 @@ +OUTDIR=/icarus/data/users/gputnam/DMCP2023G/mc-FB +DF=evt + +for dir in "$@" +do + arrPath=(${dir//// }) + sPath=${arrPath[@]:7} + savename=${sPath// /_} + find $dir -name "*.flat.caf.root" | grep -v Blind | grep -v Prescaled > ${savename}.list + echo Making dataframe $OUTDIR/${savename}_${DF}.df with `wc -l < ${savename}.list` files + python rundf.py configs/${DF}df.py $OUTDIR/${savename}_${DF}.df ${savename}.list + rm ${savename}.list +done