Skip to content

Commit

Permalink
Python modules for applying weights. Bash scripts for generating files.
Browse files Browse the repository at this point in the history
  • Loading branch information
gputnam committed Mar 15, 2024
1 parent ff5bde1 commit 4247ca8
Show file tree
Hide file tree
Showing 15 changed files with 1,272 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import argparse
import pandas as pd
import numpy as np
import sys

FNAL_COHLIKE_FILES = [
"/icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike_nom_evt.df",
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-E_evt.df",
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-F_evt.df",
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-G_evt.df",
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-H_evt.df",
]

POLARIS_COHLIKE_FILES = [
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod2_evt.df",
"/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod3_evt.df",
]

OUTPUT = "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-all_nowgt_evt.df"

# Neutrinos per POT, extracted from one of the coh-like files
NU_PER_POT_BASE = 1.0448039251186124e-16

NAMES = ["mch", "evt", "hdr", "mcnu"]
with pd.HDFStore(OUTPUT) as hdf:
for n in NAMES:
df = None
offset = 0
for f in FNAL_COHLIKE_FILES:
thisdf = pd.read_hdf(f, n)
if thisdf.shape[0] > 0:
thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0)
if df is None:
df = thisdf
else:
df = pd.concat([df, thisdf])
print(f, n)
offset += 1000000

for f in POLARIS_COHLIKE_FILES:
thisdf = pd.read_hdf(f, n)
if n == "hdr": # fix the POT for the polaris files
thisdf.pot = 1 / NU_PER_POT_BASE
if thisdf.shape[0] > 0:
thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0)
df = pd.concat([df, thisdf])
del thisdf
print(f, n)
offset += 1000000

hdf.put(key=n, value=df, format="fixed")
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import uproot
import numpy as np
import pandas as pd
from scipy import linalg
import util

np.random.seed(525600) # moments so dear
NUNIV = 1000

# Fit configuration file
fname = "/icarus/data/users/gputnam/AScale0_fix.min.root"
f = uproot.open(fname)

# Load the configuration
covariance = f["covariance"].to_numpy()[0]
fit_result = f["fit_dials"].to_numpy()[0]
NVAR = fit_result.size

# Hard code "default" uncertainty
default_uncertainty = np.array([0.5, 0.9, 0.65, 0.5, 0.5, 0.5, 0.5, 1, 1])

# error on A-scaling is set separately
Aerr = 0.91393

# add in error on A to covariance
covariance = covariance + np.diag(np.concatenate(([Aerr], np.zeros((NVAR-1,)))))

# Generate the random throws for each parameter value

# The GENIE reweight convention is that the values and error are reported relative to the
# "default uncertainty" in the fit, so we need to scale by that
rands = np.random.randn(NUNIV, NVAR)
SAMPLES = 1 + (rands@linalg.cholesky(covariance) + fit_result)*default_uncertainty

# compute the A-scaling shift in each universe and the CV
A_samples = SAMPLES[:, 0]
Aexp_nom = 4/3.
Ar_A = 18
C_A = 12
Aexp_UNIV = Aexp_nom*A_samples

A_scale_UNIV = (Ar_A/C_A)**Aexp_UNIV / ((Ar_A/C_A)**Aexp_nom)

# Compute the pion scaling

# hard-code pion energy bins in x-sec reweighting
PION_E_BINS = [0, 0.25, 0.5, 0.75, 1, 1.5, 2, 3, 6]

PION_E_SCALES = SAMPLES[:, 1:].T
PION_E_SCALES[:4] *= A_scale_UNIV

MEAN_SCALES = np.mean(PION_E_SCALES, axis=1)

# Also add in an MA associated weight
MA_nom = 1
MA_unc = 0.3
MA_UNIV = MA_nom + MA_unc*np.random.randn(NUNIV)
def Faxial(Q2, M):
return M**2 / (Q2 + M**2)

# And an additave uncertainty on the |t| constant
b_const_shift = 20 # 1/GeV^2
b_UNIV = b_const_shift*np.random.randn(NUNIV)

def coht(mcdf):
nuclE = mcdf.E - mcdf.p0.genE - mcdf.p1.genE
nuclP = mcdf.nu.P - mcdf.p0.genp - mcdf.p1.genp
t = np.abs(nuclE**2 - util.magdf(nuclP)**2)
return t

PION_MASS = 0.139570
def cohweight(mcdf, Q2wgt=True, bshiftwgt=True):
wgts = []
cccoh = (np.abs(mcdf.pdg) == 14) & (mcdf.iscc == 1) & (mcdf.genie_mode == 3)
pionKE = mcdf.p1.genE - PION_MASS

piwgt = pd.cut(pionKE, PION_E_BINS, labels=MEAN_SCALES).astype(float)
wgts.append(piwgt.fillna(1).rename("cv"))
for i in range(NUNIV):
thiswgt = pd.cut(pionKE, PION_E_BINS, labels=PION_E_SCALES[:, i]).astype(float)
if Q2wgt:
thiswgt *= Faxial(mcdf.Q2, MA_UNIV[i]) / Faxial(mcdf.Q2, MA_nom)
if bshiftwgt:
t = np.minimum(coht(mcdf), 0.1)
thiswgt *= np.exp(t*b_UNIV[i])
wgts.append(thiswgt.fillna(1).rename("univ%i" % i))

wgts = pd.concat(wgts, axis=1)
wgts.loc[~cccoh, :] = 1 # don't reweight non-CC-COH

return wgts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import argparse
import pandas as pd
import numpy as np
import sys

# configured
DETSYST_NU = 15.426
DETSYST_DCY = 9.8711
NUMISYST = 8.6

NUNI = 100
np.random.seed(103) # It's October 3rd
mass_bins = np.array([0.21, 0.225, 0.245, 0.265, 0.29, 0.32, 0.35, 0.4, 0.5])

parser = argparse.ArgumentParser(
prog="add_detsyst_2spectrum.py",
description="Add detector uncertainties to spectrum dataframe.",
)
parser.add_argument("filename")

args = parser.parse_args()
f = args.filename

if not f.endswith("spectrum.df"):
print("Must run on spectrum file!")
sys.exit(1)

spectrum_file = f.replace("spectrum.df", "spectrum_detsyst.df")
print("Will save to: %s" % spectrum_file)

DETSYST = DETSYST_DCY if "Higgs" in spectrum_file else DETSYST_NU

df = pd.read_hdf(f, key="s")

# Apply time-weighting
if "timeweight" in df.columns:
df.weight *= df.timeweight
df.cvweight *= df.timeweight

# Detector systematic uncertainty (normalization)
detsyst_univ = 1 + np.random.normal(size=NUNI)*DETSYST/100

numisyst_univ = 1 + np.random.normal(size=NUNI)*NUMISYST/100

# Cathode crossing uncertainty
cathode_univ = np.random.normal(size=NUNI)

# Induction gap uncertainty
gap_univ = np.random.normal(size=NUNI)

# M.C. stat uncertainty
mcstat_uncs = []
for mlo, mhi in zip(mass_bins[:-1], mass_bins[1:]):
when = (df.mass > mlo) & (df.mass < mhi) & (df.thnumi < 5)
mcstat_uncs.append(np.nan_to_num(np.sqrt((df.weight[when]**2).sum())/df.weight[when].sum()))
mcstat = pd.cut(df.mass, mass_bins, labels=mcstat_uncs, ordered=False).astype(float).fillna(0)

mcstat_univ = np.random.normal(size=(NUNI, len(mass_bins)-1))

for i in range(NUNI):
# multiply the normalization, cathode-cross, and induction-gap-cross uncertainties
cathode_err_wgt = np.maximum(1 + cathode_univ[i]*df.cathode_weight_err, 0)
gap_err_wgt = np.maximum(1 + gap_univ[i]*df.gap_weight_err*df.gap_weight_err, 0)
df["wgt_det_univ_%i" % i] = np.maximum(detsyst_univ[i]*cathode_err_wgt*gap_err_wgt, 0)
df["wgt_all_univ_%i" % i] *= df["wgt_det_univ_%i" % i]

mcstat_univ_var = pd.cut(df.mass, mass_bins, labels=mcstat_univ[i], ordered=False).astype(float).fillna(0)
mcstat_univ_var[df.thnumi > 5] = 0 # assume ~0 MC stat uncertainty outside signal box
df["wgt_mcstat_univ_%i" % i] = np.maximum(1 + mcstat_univ_var * mcstat, 0)
df["wgt_all_univ_%i" % i] *= df["wgt_mcstat_univ_%i" % i]

# additional flux uncertainty
df["wgt_flux_univ_%i" % i] *= numisyst_univ[i]
df["wgt_all_univ_%i" % i] *= numisyst_univ[i]

df = df.copy() # undo fragmentation

# Save
df.to_hdf(spectrum_file, key="s")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
PROCESSDIR=/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra
python add_detsyst_2spectrum.py $PROCESSDIR/F-Nu_loose_evt_spectrum.df
for f in $PROCESSDIR/F*-Higgs_M*_loose_evt_spectrum.df
do
python add_detsyst_2spectrum.py $f
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import argparse
import pandas as pd
import numpy as np
import sys

if not len(sys.argv) > 3:
print("Usage: python add_dfs.py <out>.df [<in>.df]")
sys.exit(1)

out = pd.concat([pd.read_hdf(f, "s") for f in sys.argv[2:]])
out.to_hdf(sys.argv[1], key="s")
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import argparse
import pandas as pd
import numpy as np
import sys

if not len(sys.argv) > 3:
print("Usage: python add_dfs_fixpot.py <out>.df [<in>.df]")
sys.exit(1)

outdfs = [pd.read_hdf(f, "s") for f in sys.argv[2:]]
pots = [df.pot.iloc[0] for df in outdfs]
totpot = sum(pots)
for i in range(len(outdfs)):
outdfs[i].weight *= pots[i] / totpot

out = pd.concat(outdfs)
out.to_hdf(sys.argv[1], key="s")
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Import smtplib for the actual sending function
import smtplib
# Import the email modules we'll need
from email.mime.text import MIMEText

import random
import sys
import shutil


files = sys.argv[1:]
random.shuffle(files)
OUTDIR = "/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/blinded-datas/"
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
me = "gray@putnam"
you = "[email protected]"

for i, f in enumerate(files):
fname = f.split("/")[-1]
src = f
dst = OUTDIR + alphabet[i] + ".df"
shutil.copyfile(src, dst)

if fname == "thedata.df": # the data
msg = MIMEText("The real data is in dataset %s." % alphabet[i])
msg['Subject'] = 'This is the blinding email. Dont tell anyone!'
msg['From'] = me
msg['To'] = you

# Send the message via our own SMTP server, but don't include the
# envelope header.
s = smtplib.SMTP('localhost')
s.sendmail(me, [you], msg.as_string())
s.quit()
Loading

0 comments on commit 4247ca8

Please sign in to comment.