Skip to content

Commit

Permalink
Prepare to compare AUC for BNN and linmod
Browse files Browse the repository at this point in the history
  • Loading branch information
evenmm committed Apr 27, 2023
1 parent fd9b076 commit b652ca8
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 11 deletions.
28 changes: 28 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM ubuntu:20.04

ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Europe/Berlin

RUN apt update \
&& apt-get install -y \
g++ \
cmake \
libatlas-base-dev \
python3 \
python3-dev \
python3-pip \
swig \
git\
libhdf5-serial-dev\
&& ln -sf /usr/bin/swig4.0 /usr/bin/swig

RUN pip3 install python-libsbml>=5.17.0 numpy

COPY container_files.tar.gz /tmp

RUN pip3 install -U --upgrade pip wheel \
&& mkdir -p /tmp/container_files/ \
&& cd /tmp/container_files \
&& tar -xzf ../container_files.tar.gz

RUN pip3 install aesara arviz pymc==5.1.1 theano numpy matplotlib scipy pandas seaborn arviz jupyter
4 changes: 2 additions & 2 deletions run_jobs.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash
# Open multiple screens, send a command to each screen

screen_names=("test0" "test1" "test2" "test3" "test4" "test5")
job_names=("module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 python simdata_BNN.py 0\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 python simdata_BNN.py 1\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 python simdata_BNN.py 2\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 python simdata_BNN.py 3\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 python simdata_BNN.py 4\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 python simdata_BNN.py 5\n")
screen_names=("x0" "x1" "x2" "x3" "x4" "x5")
job_names=("module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 taskset -c 0-4 python simdata_BNN.py 0\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 taskset -c 5-9 python simdata_BNN.py 1\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 taskset -c 10-14 python simdata_BNN.py 2\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 taskset -c 15-19 python simdata_BNN.py 3\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 taskset -c 20-24 python simdata_BNN.py 4\n" "module purge\n module load GCC/11.2.0\n module load CUDA/11.3.1 \n module load OpenMPI/4.1.1-GCC-11.2.0\n module load Python/3.9.6-GCCcore-11.2.0\n nice -n 10 taskset -c 25-29 python simdata_BNN.py 5\n")
number_of_jobs=${#screen_names[@]}

for (( job=0; job < $number_of_jobs; job++ ))
Expand Down
12 changes: 8 additions & 4 deletions simdata_BNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
rng = np.random.default_rng(RANDOM_SEED)
print(f"Running on PyMC v{pm.__version__}")
#SAVEDIR = "/data/evenmm/plots/"
SAVEDIR = "./plots/Bayesian_estimates_simdata_BNN/"
#SAVEDIR = "./plots/Bayesian_estimates_simdata_BNN/"
SAVEDIR = "./"

script_index = int(sys.argv[1])

Expand Down Expand Up @@ -65,13 +66,13 @@
with neural_net_model:
if ADADELTA:
print("------------------- ADADELTA INITIALIZATION -------------------")
advi_iterations = 50_000
advi_iterations = 100_000
advi = pm.ADVI()
tracker = pm.callbacks.Tracker(
mean=advi.approx.mean.eval, # callable that returns mean
std=advi.approx.std.eval, # callable that returns std
)
approx = advi.fit(advi_iterations, obj_optimizer=pm.adadelta(), obj_n_mc=5, callbacks=[tracker])
approx = advi.fit(advi_iterations, obj_optimizer=pm.adadelta(), obj_n_mc=50, callbacks=[tracker])
#approx = advi.fit(advi_iterations, obj_optimizer=pm.adagrad(), obj_n_mc=5, callbacks=[tracker])

# Plot ELBO and trace
Expand Down Expand Up @@ -120,5 +121,8 @@
plot_all_credible_intervals(idata, patient_dictionary, patient_dictionary_test, X_test, SAVEDIR, name, y_resolution, model_name=model_name, parameter_dictionary=parameter_dictionary, PLOT_PARAMETERS=True, parameter_dictionary_test=parameter_dictionary_test, PLOT_PARAMETERS_test=True, PLOT_TREATMENTS=False, MODEL_RANDOM_EFFECTS=MODEL_RANDOM_EFFECTS, CI_with_obs_noise=CI_with_obs_noise, PLOT_RESISTANT=True)
print("Finished!")

evaluation_time = 1000 # days
# At how many days to we want to classify people into recurrence / not recurrence:
# Here we make sure that all patients have observations at that time by taking the latest time where every patient has an observation
evaluation_time = measurement_times[:3][-1] + 1
print(evaluation_time)
pfs_auc(evaluation_time, patient_dictionary_test, N_patients_test, idata, X_test, model_name, MODEL_RANDOM_EFFECTS, CI_with_obs_noise, SAVEDIR, name)
21 changes: 16 additions & 5 deletions utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,6 @@ def predict_PFS(args): # Predicts observations of M protein
return p_recurrence #predicted_PFS, point_predicted_PFS

def pfs_auc(evaluation_time, patient_dictionary_test, N_patients_test, idata, X_test, model_name, MODEL_RANDOM_EFFECTS, CI_with_obs_noise, SAVEDIR, name):
import sklearn.metrics as metrics
N_patients_test = len(patient_dictionary_test)

recurrence_or_not = np.zeros(N_patients_test)
Expand All @@ -1318,10 +1317,7 @@ def pfs_auc(evaluation_time, patient_dictionary_test, N_patients_test, idata, X_
sample_shape = idata.posterior['psi'].shape # [chain, n_samples, dim]
N_samples = sample_shape[1]
# Posterior predictive CI for test data
if N_samples <= 10:
N_rand_eff_pred = 100 # Number of random intercept samples to draw for each idata sample when we make predictions
N_rand_obs_pred = 100 # Number of observation noise samples to draw for each parameter sample
elif N_samples <= 100:
if N_samples <= 100:
N_rand_eff_pred = 10 # Number of random intercept samples to draw for each idata sample when we make predictions
N_rand_obs_pred = 100 # Number of observation noise samples to draw for each parameter sample
elif N_samples <= 1000:
Expand All @@ -1336,8 +1332,22 @@ def pfs_auc(evaluation_time, patient_dictionary_test, N_patients_test, idata, X_
p_recurrence = np.zeros(N_patients_test)
for ii, elem in enumerate(args):
p_recurrence[ii] = predict_PFS(elem)

print("recurrence_or_not", recurrence_or_not)
print("p_recurrence", p_recurrence)

picklefile = open(SAVEDIR+name+'_recurrence_or_not', 'wb')
pickle.dump(recurrence_or_not, picklefile)
picklefile.close()

picklefile = open(SAVEDIR+name+'_p_recurrence', 'wb')
pickle.dump(p_recurrence, picklefile)
picklefile.close()

"""
# Commented out only because med-biostat2 does not have sklearn
# calculate the fpr and tpr for all thresholds of the classification
import sklearn.metrics as metrics
fpr, tpr, threshold = metrics.roc_curve(recurrence_or_not, p_recurrence) #(y_test, preds)
roc_auc = metrics.auc(fpr, tpr)
Expand All @@ -1357,6 +1367,7 @@ def pfs_auc(evaluation_time, patient_dictionary_test, N_patients_test, idata, X_
plt.savefig(SAVEDIR+"AUC_"+str(N_patients_test)+"_test_patients_"+name+".pdf")
#plt.show()
plt.close()
"""

def plot_all_credible_intervals(idata, patient_dictionary, patient_dictionary_test, X_test, SAVEDIR, name, y_resolution, model_name, parameter_dictionary, PLOT_PARAMETERS, parameter_dictionary_test, PLOT_PARAMETERS_test, PLOT_TREATMENTS, MODEL_RANDOM_EFFECTS, CI_with_obs_noise=True, PARALLELLIZE=True, PLOT_RESISTANT=True, PLOT_MEASUREMENTS_test=False):
sample_shape = idata.posterior['psi'].shape # [chain, n_samples, dim]
Expand Down

0 comments on commit b652ca8

Please sign in to comment.