Python modules for applying weights. Bash scripts for generating files.

SBNSoftware · Mar 15, 2024 · 4247ca8 · 4247ca8
1 parent ff5bde1
commit 4247ca8
Show file tree

Hide file tree

Showing 15 changed files with 1,272 additions and 0 deletions.
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/combine_cohlike.py
@@ -0,0 +1,51 @@
+import argparse
+import pandas as pd
+import numpy as np
+import sys
+
+FNAL_COHLIKE_FILES = [
+  "/icarus/data/users/gputnam/DMCP2023G/mc-F/F-CohLike_nom_evt.df",
+  "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-E_evt.df",
+  "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-F_evt.df",
+  "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-G_evt.df",
+  "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-H_evt.df",
+]
+
+POLARIS_COHLIKE_FILES = [
+  "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod2_evt.df",
+  "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-polaris-prod3_evt.df",
+]
+
+OUTPUT = "/icarus/data/users/gputnam/DMCP2023G/mc-FB/FB-CohLike_nom-all_nowgt_evt.df"
+
+# Neutrinos per POT, extracted from one of the coh-like files
+NU_PER_POT_BASE = 1.0448039251186124e-16
+
+NAMES = ["mch", "evt", "hdr", "mcnu"]
+with pd.HDFStore(OUTPUT) as hdf:
+    for n in NAMES:
+        df = None
+        offset = 0
+        for f in FNAL_COHLIKE_FILES: 
+            thisdf = pd.read_hdf(f, n)
+            if thisdf.shape[0] > 0:
+                thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0)
+            if df is None:
+                df = thisdf
+            else:
+                df = pd.concat([df, thisdf])
+            print(f, n)
+            offset += 1000000
+
+        for f in POLARIS_COHLIKE_FILES: 
+            thisdf = pd.read_hdf(f, n)
+            if n == "hdr": # fix the POT for the polaris files
+                thisdf.pot = 1 / NU_PER_POT_BASE
+            if thisdf.shape[0] > 0:
+                thisdf.index = thisdf.index.set_levels(thisdf.index.levels[0] + offset, level=0)
+            df = pd.concat([df, thisdf])
+            del thisdf
+            print(f, n)
+            offset += 1000000
+
+        hdf.put(key=n, value=df, format="fixed")
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/makedf/reweight_coh.py
@@ -0,0 +1,91 @@
+import uproot
+import numpy as np
+import pandas as pd
+from scipy import linalg
+import util
+
+np.random.seed(525600) # moments so dear
+NUNIV = 1000
+
+# Fit configuration file
+fname = "/icarus/data/users/gputnam/AScale0_fix.min.root"
+f = uproot.open(fname)
+
+# Load the configuration
+covariance = f["covariance"].to_numpy()[0]
+fit_result = f["fit_dials"].to_numpy()[0]
+NVAR = fit_result.size
+
+# Hard code "default" uncertainty
+default_uncertainty = np.array([0.5, 0.9, 0.65, 0.5, 0.5, 0.5, 0.5, 1, 1])
+
+# error on A-scaling is set separately
+Aerr = 0.91393
+
+# add in error on A to covariance
+covariance = covariance + np.diag(np.concatenate(([Aerr], np.zeros((NVAR-1,)))))
+
+# Generate the random throws for each parameter value
+
+# The GENIE reweight convention is that the values and error are reported relative to the
+# "default uncertainty" in the fit, so we need to scale by that
+rands = np.random.randn(NUNIV, NVAR)
+SAMPLES = 1 + (rands@linalg.cholesky(covariance) + fit_result)*default_uncertainty
+
+# compute the A-scaling shift in each universe and the CV
+A_samples = SAMPLES[:, 0]
+Aexp_nom = 4/3.
+Ar_A = 18
+C_A = 12
+Aexp_UNIV = Aexp_nom*A_samples
+
+A_scale_UNIV = (Ar_A/C_A)**Aexp_UNIV / ((Ar_A/C_A)**Aexp_nom)
+
+# Compute the pion scaling
+
+# hard-code pion energy bins in x-sec reweighting
+PION_E_BINS = [0, 0.25, 0.5, 0.75, 1, 1.5, 2, 3, 6]
+
+PION_E_SCALES = SAMPLES[:, 1:].T 
+PION_E_SCALES[:4] *= A_scale_UNIV
+
+MEAN_SCALES = np.mean(PION_E_SCALES, axis=1)
+
+# Also add in an MA associated weight
+MA_nom = 1
+MA_unc = 0.3
+MA_UNIV = MA_nom + MA_unc*np.random.randn(NUNIV)
+def Faxial(Q2, M):
+    return M**2 / (Q2 + M**2)
+
+# And an additave uncertainty on the |t| constant
+b_const_shift = 20 # 1/GeV^2
+b_UNIV = b_const_shift*np.random.randn(NUNIV)
+
+def coht(mcdf):
+    nuclE = mcdf.E - mcdf.p0.genE - mcdf.p1.genE
+    nuclP = mcdf.nu.P - mcdf.p0.genp - mcdf.p1.genp
+    t = np.abs(nuclE**2 - util.magdf(nuclP)**2)
+    return t
+
+PION_MASS = 0.139570
+def cohweight(mcdf, Q2wgt=True, bshiftwgt=True):
+    wgts = []
+    cccoh = (np.abs(mcdf.pdg) == 14) & (mcdf.iscc == 1) & (mcdf.genie_mode == 3)
+    pionKE = mcdf.p1.genE - PION_MASS
+
+    piwgt = pd.cut(pionKE, PION_E_BINS, labels=MEAN_SCALES).astype(float)
+    wgts.append(piwgt.fillna(1).rename("cv"))
+    for i in range(NUNIV):
+        thiswgt = pd.cut(pionKE, PION_E_BINS, labels=PION_E_SCALES[:, i]).astype(float)
+        if Q2wgt:
+            thiswgt *= Faxial(mcdf.Q2, MA_UNIV[i]) / Faxial(mcdf.Q2, MA_nom) 
+        if bshiftwgt:
+            t = np.minimum(coht(mcdf), 0.1)
+            thiswgt *= np.exp(t*b_UNIV[i])
+        wgts.append(thiswgt.fillna(1).rename("univ%i" % i))
+
+    wgts = pd.concat(wgts, axis=1)
+    wgts.loc[~cccoh, :] = 1 # don't reweight non-CC-COH
+
+    return wgts
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_2spectrum.py
@@ -0,0 +1,79 @@
+import argparse
+import pandas as pd
+import numpy as np
+import sys
+
+# configured
+DETSYST_NU = 15.426
+DETSYST_DCY = 9.8711
+NUMISYST = 8.6
+
+NUNI = 100
+np.random.seed(103) # It's October 3rd
+mass_bins = np.array([0.21, 0.225, 0.245, 0.265, 0.29, 0.32, 0.35, 0.4, 0.5])
+
+parser = argparse.ArgumentParser(
+    prog="add_detsyst_2spectrum.py",
+    description="Add detector uncertainties to spectrum dataframe.",
+)
+parser.add_argument("filename")
+
+args = parser.parse_args()
+f = args.filename
+
+if not f.endswith("spectrum.df"):
+    print("Must run on spectrum file!")
+    sys.exit(1)
+
+spectrum_file = f.replace("spectrum.df", "spectrum_detsyst.df")
+print("Will save to: %s" % spectrum_file)
+
+DETSYST = DETSYST_DCY if "Higgs" in spectrum_file else DETSYST_NU
+
+df = pd.read_hdf(f, key="s")
+
+# Apply time-weighting
+if "timeweight" in df.columns:
+    df.weight *= df.timeweight
+    df.cvweight *= df.timeweight
+
+# Detector systematic uncertainty (normalization)
+detsyst_univ = 1 + np.random.normal(size=NUNI)*DETSYST/100
+
+numisyst_univ = 1 + np.random.normal(size=NUNI)*NUMISYST/100
+
+# Cathode crossing uncertainty
+cathode_univ = np.random.normal(size=NUNI)
+
+# Induction gap uncertainty
+gap_univ = np.random.normal(size=NUNI)
+
+# M.C. stat uncertainty
+mcstat_uncs = []
+for mlo, mhi in zip(mass_bins[:-1], mass_bins[1:]):
+    when = (df.mass > mlo) & (df.mass < mhi) & (df.thnumi < 5)
+    mcstat_uncs.append(np.nan_to_num(np.sqrt((df.weight[when]**2).sum())/df.weight[when].sum()))
+mcstat = pd.cut(df.mass, mass_bins, labels=mcstat_uncs, ordered=False).astype(float).fillna(0)
+
+mcstat_univ = np.random.normal(size=(NUNI, len(mass_bins)-1)) 
+
+for i in range(NUNI):
+    # multiply the normalization, cathode-cross, and induction-gap-cross uncertainties
+    cathode_err_wgt = np.maximum(1 + cathode_univ[i]*df.cathode_weight_err, 0)
+    gap_err_wgt = np.maximum(1 + gap_univ[i]*df.gap_weight_err*df.gap_weight_err, 0)
+    df["wgt_det_univ_%i" % i] = np.maximum(detsyst_univ[i]*cathode_err_wgt*gap_err_wgt, 0)
+    df["wgt_all_univ_%i" % i] *= df["wgt_det_univ_%i" % i]
+
+    mcstat_univ_var = pd.cut(df.mass, mass_bins, labels=mcstat_univ[i], ordered=False).astype(float).fillna(0)
+    mcstat_univ_var[df.thnumi > 5] = 0 # assume ~0 MC stat uncertainty outside signal box 
+    df["wgt_mcstat_univ_%i" % i] = np.maximum(1 + mcstat_univ_var * mcstat, 0)
+    df["wgt_all_univ_%i" % i] *= df["wgt_mcstat_univ_%i" % i]
+
+    # additional flux uncertainty
+    df["wgt_flux_univ_%i" % i] *= numisyst_univ[i]
+    df["wgt_all_univ_%i" % i] *= numisyst_univ[i]
+
+    df = df.copy() # undo fragmentation
+
+# Save
+df.to_hdf(spectrum_file, key="s")
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_detsyst_all.sh
@@ -0,0 +1,6 @@
+PROCESSDIR=/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra
+python add_detsyst_2spectrum.py $PROCESSDIR/F-Nu_loose_evt_spectrum.df
+for f in $PROCESSDIR/F*-Higgs_M*_loose_evt_spectrum.df
+do
+  python add_detsyst_2spectrum.py $f
+done
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs.py
@@ -0,0 +1,11 @@
+import argparse
+import pandas as pd
+import numpy as np
+import sys
+
+if not len(sys.argv) > 3:
+    print("Usage: python add_dfs.py <out>.df [<in>.df]")
+    sys.exit(1)
+
+out = pd.concat([pd.read_hdf(f, "s") for f in sys.argv[2:]])
+out.to_hdf(sys.argv[1], key="s")
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/add_dfs_fixpot.py
@@ -0,0 +1,17 @@
+import argparse
+import pandas as pd
+import numpy as np
+import sys
+
+if not len(sys.argv) > 3:
+    print("Usage: python add_dfs_fixpot.py <out>.df [<in>.df]")
+    sys.exit(1)
+
+outdfs = [pd.read_hdf(f, "s") for f in sys.argv[2:]]
+pots = [df.pot.iloc[0] for df in outdfs]
+totpot = sum(pots)
+for i in range(len(outdfs)):
+    outdfs[i].weight *= pots[i] / totpot
+
+out = pd.concat(outdfs)
+out.to_hdf(sys.argv[1], key="s")
diff --git a/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py b/sbnana/SBNAna/icarus-analysis-villiage/pyana/dimuon-tools/nb/blindemail.py
@@ -0,0 +1,34 @@
+# Import smtplib for the actual sending function
+import smtplib
+# Import the email modules we'll need
+from email.mime.text import MIMEText
+
+import random
+import sys
+import shutil
+
+
+files = sys.argv[1:]
+random.shuffle(files)
+OUTDIR = "/icarus/data/users/gputnam/DMCP2023G/mc-F-spectra/blinded-datas/"
+alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+me = "gray@putnam"
+you = "[email protected]"
+
+for i, f in enumerate(files):
+    fname = f.split("/")[-1]
+    src = f
+    dst = OUTDIR + alphabet[i] + ".df"
+    shutil.copyfile(src, dst)
+
+    if fname == "thedata.df": # the data
+        msg = MIMEText("The real data is in dataset %s." % alphabet[i])
+        msg['Subject'] = 'This is the blinding email. Dont tell anyone!'
+        msg['From'] = me
+        msg['To'] = you
+
+        # Send the message via our own SMTP server, but don't include the
+        # envelope header.
+        s = smtplib.SMTP('localhost')
+        s.sendmail(me, [you], msg.as_string())
+        s.quit()