-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Python modules for applying weights. Bash scripts for generating files.
- Loading branch information
gputnam
committed
Mar 15, 2024
1 parent
ff5bde1
commit 4247ca8
Showing
15 changed files
with
1,272 additions
and
0 deletions.
There are no files selected for viewing
51 changes: 51 additions & 0 deletions
51
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import argparse | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
|
||
FNAL_COHLIKE_FILES = [ | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike_nom_evt.df", | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-E_evt.df", | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-F_evt.df", | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-G_evt.df", | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-H_evt.df", | ||
] | ||
|
||
POLARIS_COHLIKE_FILES = [ | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod2_evt.df", | ||
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod3_evt.df", | ||
] | ||
|
||
OUTPUT = "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-all_nowgt_evt.df" | ||
|
||
# Neutrinos per POT, extracted from one of the coh-like files | ||
NU_PER_POT_BASE = 1.0448039251186124e-16 | ||
|
||
NAMES = ["mch", "evt", "hdr", "mcnu"] | ||
with pd.HDFStore(OUTPUT) as hdf: | ||
for n in NAMES: | ||
df = None | ||
offset = 0 | ||
for f in FNAL_COHLIKE_FILES: | ||
thisdf = pd.read_hdf(f, n) | ||
if thisdf.shape[0] > 0: | ||
thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0) | ||
if df is None: | ||
df = thisdf | ||
else: | ||
df = pd.concat([df, thisdf]) | ||
print(f, n) | ||
offset += 1000000 | ||
|
||
for f in POLARIS_COHLIKE_FILES: | ||
thisdf = pd.read_hdf(f, n) | ||
if n == "hdr": # fix the POT for the polaris files | ||
thisdf.pot = 1 / NU_PER_POT_BASE | ||
if thisdf.shape[0] > 0: | ||
thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0) | ||
df = pd.concat([df, thisdf]) | ||
del thisdf | ||
print(f, n) | ||
offset += 1000000 | ||
|
||
hdf.put(key=n, value=df, format="fixed") |
91 changes: 91 additions & 0 deletions
91
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import uproot | ||
import numpy as np | ||
import pandas as pd | ||
from scipy import linalg | ||
import util | ||
|
||
np.random.seed(525600) # moments so dear | ||
NUNIV = 1000 | ||
|
||
# Fit configuration file | ||
fname = "/icarus/data/users/gputnam/AScale0_fix.min.root" | ||
f = uproot.open(fname) | ||
|
||
# Load the configuration | ||
covariance = f["covariance"].to_numpy()[0] | ||
fit_result = f["fit_dials"].to_numpy()[0] | ||
NVAR = fit_result.size | ||
|
||
# Hard code "default" uncertainty | ||
default_uncertainty = np.array([0.5, 0.9, 0.65, 0.5, 0.5, 0.5, 0.5, 1, 1]) | ||
|
||
# error on A-scaling is set separately | ||
Aerr = 0.91393 | ||
|
||
# add in error on A to covariance | ||
covariance = covariance + np.diag(np.concatenate(([Aerr], np.zeros((NVAR-1,))))) | ||
|
||
# Generate the random throws for each parameter value | ||
|
||
# The GENIE reweight convention is that the values and error are reported relative to the | ||
# "default uncertainty" in the fit, so we need to scale by that | ||
rands = np.random.randn(NUNIV, NVAR) | ||
SAMPLES = 1 + (rands@linalg.cholesky(covariance) + fit_result)*default_uncertainty | ||
|
||
# compute the A-scaling shift in each universe and the CV | ||
A_samples = SAMPLES[:, 0] | ||
Aexp_nom = 4/3. | ||
Ar_A = 18 | ||
C_A = 12 | ||
Aexp_UNIV = Aexp_nom*A_samples | ||
|
||
A_scale_UNIV = (Ar_A/C_A)**Aexp_UNIV / ((Ar_A/C_A)**Aexp_nom) | ||
|
||
# Compute the pion scaling | ||
|
||
# hard-code pion energy bins in x-sec reweighting | ||
PION_E_BINS = [0, 0.25, 0.5, 0.75, 1, 1.5, 2, 3, 6] | ||
|
||
PION_E_SCALES = SAMPLES[:, 1:].T | ||
PION_E_SCALES[:4] *= A_scale_UNIV | ||
|
||
MEAN_SCALES = np.mean(PION_E_SCALES, axis=1) | ||
|
||
# Also add in an MA associated weight | ||
MA_nom = 1 | ||
MA_unc = 0.3 | ||
MA_UNIV = MA_nom + MA_unc*np.random.randn(NUNIV) | ||
def Faxial(Q2, M): | ||
return M**2 / (Q2 + M**2) | ||
|
||
# And an additave uncertainty on the |t| constant | ||
b_const_shift = 20 # 1/GeV^2 | ||
b_UNIV = b_const_shift*np.random.randn(NUNIV) | ||
|
||
def coht(mcdf): | ||
nuclE = mcdf.E - mcdf.p0.genE - mcdf.p1.genE | ||
nuclP = mcdf.nu.P - mcdf.p0.genp - mcdf.p1.genp | ||
t = np.abs(nuclE**2 - util.magdf(nuclP)**2) | ||
return t | ||
|
||
PION_MASS = 0.139570 | ||
def cohweight(mcdf, Q2wgt=True, bshiftwgt=True): | ||
wgts = [] | ||
cccoh = (np.abs(mcdf.pdg) == 14) & (mcdf.iscc == 1) & (mcdf.genie_mode == 3) | ||
pionKE = mcdf.p1.genE - PION_MASS | ||
|
||
piwgt = pd.cut(pionKE, PION_E_BINS, labels=MEAN_SCALES).astype(float) | ||
wgts.append(piwgt.fillna(1).rename("cv")) | ||
for i in range(NUNIV): | ||
thiswgt = pd.cut(pionKE, PION_E_BINS, labels=PION_E_SCALES[:, i]).astype(float) | ||
if Q2wgt: | ||
thiswgt *= Faxial(mcdf.Q2, MA_UNIV[i]) / Faxial(mcdf.Q2, MA_nom) | ||
if bshiftwgt: | ||
t = np.minimum(coht(mcdf), 0.1) | ||
thiswgt *= np.exp(t*b_UNIV[i]) | ||
wgts.append(thiswgt.fillna(1).rename("univ%i" % i)) | ||
|
||
wgts = pd.concat(wgts, axis=1) | ||
wgts.loc[~cccoh, :] = 1 # don't reweight non-CC-COH | ||
|
||
return wgts |
79 changes: 79 additions & 0 deletions
79
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import argparse | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
|
||
# configured | ||
DETSYST_NU = 15.426 | ||
DETSYST_DCY = 9.8711 | ||
NUMISYST = 8.6 | ||
|
||
NUNI = 100 | ||
np.random.seed(103) # It's October 3rd | ||
mass_bins = np.array([0.21, 0.225, 0.245, 0.265, 0.29, 0.32, 0.35, 0.4, 0.5]) | ||
|
||
parser = argparse.ArgumentParser( | ||
prog="add_detsyst_2spectrum.py", | ||
description="Add detector uncertainties to spectrum dataframe.", | ||
) | ||
parser.add_argument("filename") | ||
|
||
args = parser.parse_args() | ||
f = args.filename | ||
|
||
if not f.endswith("spectrum.df"): | ||
print("Must run on spectrum file!") | ||
sys.exit(1) | ||
|
||
spectrum_file = f.replace("spectrum.df", "spectrum_detsyst.df") | ||
print("Will save to: %s" % spectrum_file) | ||
|
||
DETSYST = DETSYST_DCY if "Higgs" in spectrum_file else DETSYST_NU | ||
|
||
df = pd.read_hdf(f, key="s") | ||
|
||
# Apply time-weighting | ||
if "timeweight" in df.columns: | ||
df.weight *= df.timeweight | ||
df.cvweight *= df.timeweight | ||
|
||
# Detector systematic uncertainty (normalization) | ||
detsyst_univ = 1 + np.random.normal(size=NUNI)*DETSYST/100 | ||
|
||
numisyst_univ = 1 + np.random.normal(size=NUNI)*NUMISYST/100 | ||
|
||
# Cathode crossing uncertainty | ||
cathode_univ = np.random.normal(size=NUNI) | ||
|
||
# Induction gap uncertainty | ||
gap_univ = np.random.normal(size=NUNI) | ||
|
||
# M.C. stat uncertainty | ||
mcstat_uncs = [] | ||
for mlo, mhi in zip(mass_bins[:-1], mass_bins[1:]): | ||
when = (df.mass > mlo) & (df.mass < mhi) & (df.thnumi < 5) | ||
mcstat_uncs.append(np.nan_to_num(np.sqrt((df.weight[when]**2).sum())/df.weight[when].sum())) | ||
mcstat = pd.cut(df.mass, mass_bins, labels=mcstat_uncs, ordered=False).astype(float).fillna(0) | ||
|
||
mcstat_univ = np.random.normal(size=(NUNI, len(mass_bins)-1)) | ||
|
||
for i in range(NUNI): | ||
# multiply the normalization, cathode-cross, and induction-gap-cross uncertainties | ||
cathode_err_wgt = np.maximum(1 + cathode_univ[i]*df.cathode_weight_err, 0) | ||
gap_err_wgt = np.maximum(1 + gap_univ[i]*df.gap_weight_err*df.gap_weight_err, 0) | ||
df["wgt_det_univ_%i" % i] = np.maximum(detsyst_univ[i]*cathode_err_wgt*gap_err_wgt, 0) | ||
df["wgt_all_univ_%i" % i] *= df["wgt_det_univ_%i" % i] | ||
|
||
mcstat_univ_var = pd.cut(df.mass, mass_bins, labels=mcstat_univ[i], ordered=False).astype(float).fillna(0) | ||
mcstat_univ_var[df.thnumi > 5] = 0 # assume ~0 MC stat uncertainty outside signal box | ||
df["wgt_mcstat_univ_%i" % i] = np.maximum(1 + mcstat_univ_var * mcstat, 0) | ||
df["wgt_all_univ_%i" % i] *= df["wgt_mcstat_univ_%i" % i] | ||
|
||
# additional flux uncertainty | ||
df["wgt_flux_univ_%i" % i] *= numisyst_univ[i] | ||
df["wgt_all_univ_%i" % i] *= numisyst_univ[i] | ||
|
||
df = df.copy() # undo fragmentation | ||
|
||
# Save | ||
df.to_hdf(spectrum_file, key="s") |
6 changes: 6 additions & 0 deletions
6
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
PROCESSDIR=/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra | ||
python add_detsyst_2spectrum.py $PROCESSDIR/F-Nu_loose_evt_spectrum.df | ||
for f in $PROCESSDIR/F*-Higgs_M*_loose_evt_spectrum.df | ||
do | ||
python add_detsyst_2spectrum.py $f | ||
done |
11 changes: 11 additions & 0 deletions
11
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import argparse | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
|
||
if not len(sys.argv) > 3: | ||
print("Usage: python add_dfs.py <out>.df [<in>.df]") | ||
sys.exit(1) | ||
|
||
out = pd.concat([pd.read_hdf(f, "s") for f in sys.argv[2:]]) | ||
out.to_hdf(sys.argv[1], key="s") |
17 changes: 17 additions & 0 deletions
17
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import argparse | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
|
||
if not len(sys.argv) > 3: | ||
print("Usage: python add_dfs_fixpot.py <out>.df [<in>.df]") | ||
sys.exit(1) | ||
|
||
outdfs = [pd.read_hdf(f, "s") for f in sys.argv[2:]] | ||
pots = [df.pot.iloc[0] for df in outdfs] | ||
totpot = sum(pots) | ||
for i in range(len(outdfs)): | ||
outdfs[i].weight *= pots[i] / totpot | ||
|
||
out = pd.concat(outdfs) | ||
out.to_hdf(sys.argv[1], key="s") |
34 changes: 34 additions & 0 deletions
34
sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Import smtplib for the actual sending function | ||
import smtplib | ||
# Import the email modules we'll need | ||
from email.mime.text import MIMEText | ||
|
||
import random | ||
import sys | ||
import shutil | ||
|
||
|
||
files = sys.argv[1:] | ||
random.shuffle(files) | ||
OUTDIR = "/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/blinded-datas/" | ||
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
me = "gray@putnam" | ||
you = "[email protected]" | ||
|
||
for i, f in enumerate(files): | ||
fname = f.split("/")[-1] | ||
src = f | ||
dst = OUTDIR + alphabet[i] + ".df" | ||
shutil.copyfile(src, dst) | ||
|
||
if fname == "thedata.df": # the data | ||
msg = MIMEText("The real data is in dataset %s." % alphabet[i]) | ||
msg['Subject'] = 'This is the blinding email. Dont tell anyone!' | ||
msg['From'] = me | ||
msg['To'] = you | ||
|
||
# Send the message via our own SMTP server, but don't include the | ||
# envelope header. | ||
s = smtplib.SMTP('localhost') | ||
s.sendmail(me, [you], msg.as_string()) | ||
s.quit() |
Oops, something went wrong.