-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_model.py
executable file
·198 lines (175 loc) · 13.6 KB
/
linear_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
from utilities import *
import arviz as az
import pymc as pm
# Initialize random number generator
RANDOM_SEED = 42
rng = np.random.default_rng(RANDOM_SEED)
##############################
# Function argument shapes:
# X is an (N_patients, P) shaped pandas dataframe
# patient dictionary contains N_patients patients in the same order as X
def linear_model(X, patient_dictionary, name, N_samples=3000, N_tuning=3000, target_accept=0.99, max_treedepth=10, psi_prior="lognormal", FUNNEL_REPARAMETRIZATION=False, method="HMC", scaling=False, t_max_prior_scale=1, likelihood_model="original", model_rho_r_dependancy_on_rho_s=True):
assert not X.isna().any().any()
assert not np.isinf(X).any().any()
assert not X.isna().any().any()
df = pd.DataFrame(columns=["patient_id", "mprotein_value", "time"])
for ii in range(len(patient_dictionary)):
patient = patient_dictionary[ii]
mprot = patient.Mprotein_values
times = patient.measurement_times
for jj in range(len(mprot)):
single_entry = pd.DataFrame({"patient_id":[ii], "mprotein_value":[mprot[jj]], "time":[times[jj]]})
df = pd.concat([df, single_entry], ignore_index=True)
group_id = df["patient_id"].tolist()
assert not np.isnan(group_id).any()
assert not np.isinf(group_id).any()
Y_flat_no_nans = np.array(df["mprotein_value"].tolist())
assert min(Y_flat_no_nans) >= 0
assert not np.isnan(Y_flat_no_nans).any()
assert not np.isinf(Y_flat_no_nans).any()
t_flat_no_nans = np.array(df["time"].tolist())
#print(max(t_flat_no_nans)) #1373 with IKEMA
assert min(t_flat_no_nans) >= 0
assert not np.isnan(t_flat_no_nans).any()
assert not np.isinf(t_flat_no_nans).any()
N_patients, P = X.shape
N_patients = int(N_patients)
P0 = min(10, max(int(P / 2), 1)) # A guess of the true number of nonzero parameters is needed for defining the global shrinkage parameter
assert P0 > 0
assert P > 0
assert N_patients > 0
X_not_transformed = X.copy()
X = X.T
yi0 = np.zeros(N_patients)
for ii in range(N_patients):
yi0[ii] = max(patient_dictionary[ii].Mprotein_values[0], 1e-5)
assert min(yi0) > 0 #Strictly greater than zero required because we log transform it for the log prior of psi
assert not np.isnan(yi0).any()
assert not np.isinf(yi0).any()
if psi_prior not in ["lognormal", "normal"]:
print("Unknown prior option specified for psi; Using 'lognormal' prior")
psi_prior = "lognormal"
with pm.Model(coords={"predictors": X_not_transformed.columns.values}) as linear_model:
# Observation noise (std)
sigma_obs = pm.HalfNormal("sigma_obs", sigma=1)
# alpha
alpha = pm.Normal("alpha", mu=np.array([np.log(t_max_prior_scale*0.015), np.log(t_max_prior_scale*0.005), np.log(0.01/(1-0.01))]), sigma=1, shape=3)
# beta (with horseshoe priors):
# Global shrinkage prior
tau_rho_r = pm.HalfStudentT("tau_rho_r", nu=2, sigma=P0/(P-P0)*sigma_obs/np.sqrt(N_patients))
tau_pi_r = pm.HalfStudentT("tau_pi_r", nu=2, sigma=P0/(P-P0)*sigma_obs/np.sqrt(N_patients))
# Local shrinkage prior
lam_rho_r = pm.HalfStudentT("lam_rho_r", nu=2, sigma=1, dims="predictors")
lam_pi_r = pm.HalfStudentT("lam_pi_r", nu=2, sigma=1, dims="predictors")
c2_rho_r = pm.InverseGamma("c2_rho_r", alpha=1, beta=0.1)
c2_pi_r = pm.InverseGamma("c2_pi_r", alpha=1, beta=0.1)
z_rho_r = pm.Normal("z_rho_r", mu=0.0, sigma=1.0, dims="predictors")
z_pi_r = pm.Normal("z_pi_r", mu=0.0, sigma=1.0, dims="predictors")
# Shrunken coefficients
beta_rho_r = pm.Deterministic("beta_rho_r", z_rho_r * tau_rho_r * lam_rho_r * np.sqrt(c2_rho_r / (c2_rho_r + tau_rho_r**2 * lam_rho_r**2)), dims="predictors")
beta_pi_r = pm.Deterministic("beta_pi_r", z_pi_r * tau_pi_r * lam_pi_r * np.sqrt(c2_pi_r / (c2_pi_r + tau_pi_r**2 * lam_pi_r**2)), dims="predictors")
# Latent variables theta
omega = pm.HalfNormal("omega", sigma=1, shape=3) # Patient variability in theta (std)
if FUNNEL_REPARAMETRIZATION:
# Reparametrized to escape/explore the funnel of Hell (https://twiecki.io/blog/2017/02/08/bayesian-hierchical-non-centered/):
#theta_rho_s_offset = pm.Normal('theta_rho_s_offset', mu=0, sigma=1, shape=N_patients)
#theta_rho_r_offset = pm.Normal('theta_rho_r_offset', mu=0, sigma=1, shape=N_patients)
#theta_pi_r_offset = pm.Normal('theta_pi_r_offset', mu=0, sigma=1, shape=N_patients)
#theta_rho_s = pm.Deterministic("theta_rho_s", (alpha[0]) + theta_rho_s_offset * omega[0])
#theta_rho_r = pm.Deterministic("theta_rho_r", (alpha[1] + pm.math.dot(beta_rho_r, X) + pm.math.dot(coef_rho_s, theta_rho_s)) + theta_rho_r_offset * omega[1])
#theta_pi_r = pm.Deterministic("theta_pi_r", (alpha[2] + pm.math.dot(beta_pi_r, X)) + theta_pi_r_offset * omega[2])
pass
else:
# Original
theta_rho_s = pm.Normal("theta_rho_s", mu= alpha[0], sigma=omega[0], shape=N_patients) # Individual random intercepts in theta to confound effects of X
if model_rho_r_dependancy_on_rho_s:
## Horseshoe-like prior
#lam_rho_s = pm.HalfStudentT("lam_rho_s", nu=2, sigma=1)
#z_rho_s = pm.Normal("z_rho_s", mu=0.0, sigma=1.0)
#coef_rho_s = pm.Deterministic("coef_rho_s", z_rho_s * lam_rho_s)
# Lasso prior
lam = 9
sig = pm.math.sqrt(pm.InverseGamma(name="sig", alpha=3, beta=1))
coef_rho_s = pm.Laplace("coef_rho_s", mu=0, b=sig/lam)
theta_rho_r = pm.Normal("theta_rho_r", mu= alpha[1] + pm.math.dot(beta_rho_r, X) + pm.math.dot(coef_rho_s, theta_rho_s), sigma=omega[1]) # Individual random intercepts in theta to confound effects of X
else:
theta_rho_r = pm.Normal("theta_rho_r", mu= alpha[1] + pm.math.dot(beta_rho_r, X), sigma=omega[1]) # Individual random intercepts in theta to confound effects of X
theta_pi_r = pm.Normal("theta_pi_r", mu= alpha[2] + pm.math.dot(beta_pi_r, X), sigma=omega[2]) # Individual random intercepts in theta to confound effects of X
# psi: True M protein at time 0
# 1) Normal. Fast convergence, but possibly negative tail
if psi_prior=="normal":
psi = pm.Normal("psi", mu=yi0, sigma=sigma_obs, shape=N_patients) # Informative. Centered around the patient specific yi0 with std=observation noise sigma
# 2) Lognormal. Works if you give it time to converge
if psi_prior=="lognormal":
xi = pm.HalfNormal("xi", sigma=1)
log_psi = pm.Normal("log_psi", mu=np.log(yi0), sigma=xi, shape=N_patients)
psi = pm.Deterministic("psi", np.exp(log_psi))
# Transformed latent variables
rho_s = pm.Deterministic("rho_s", -np.exp(theta_rho_s))
rho_r = pm.Deterministic("rho_r", np.exp(theta_rho_r))
pi_r = pm.Deterministic("pi_r", 1/(1+np.exp(-theta_pi_r)))
if likelihood_model == "Kevins_first":
print("Likelihood model in linear_model is: Kevin's model no. 1: Logistic growth in resistant cells; limited by total cell count")
a_param = 1
K_param_vector = a_param * psi[group_id]
#b_param_vector = (a_param - pi_r[group_id]) / (1 - pi_r[group_id])
# b_param_vector = 1 when a = 1
R_zero = psi[group_id] * pi_r[group_id]
rho_r_grouped = rho_r[group_id]
psi_grouped = psi[group_id]
pi_r_grouped = pi_r[group_id]
rho_s_grouped = rho_s[group_id]
# nominator
integral_of_S_from_0_to_t = psi[group_id] * (1 - pi_r[group_id])/rho_s[group_id] * (np.exp(rho_s[group_id] * t_flat_no_nans) - 1)
#resistant_part_upper = R_zero * np.exp(rho_r[group_id] * (K_param_vector * t_flat_no_nans - b_param_vector * integral_of_S_from_0_to_t))
resistant_part_upper = R_zero * np.exp(rho_r[group_id] * (K_param_vector * t_flat_no_nans - 1 * integral_of_S_from_0_to_t))
# denominator
def exp_Kw_minus_rho_r_b_integral_S_dy_dw(w, rho_r_element, psi_element, pi_r_element, rho_s_element, K_param_vector_element, b_param_vector_element):
return np.exp(K_param_vector_element * w - rho_r_element * b_param_vector_element * psi_element * (1 - pi_r_element)/rho_s_element * (np.exp(rho_s_element * w) - 1))
# Attempt to vectorize:
#vec_int = np.vectorize(lambda t_index: quad(exp_Kw_minus_rho_r_b_integral_S_dy_dw, 0, t_flat_no_nans[t_index], args=(rho_r_grouped[t_index], psi_grouped[t_index], pi_r_grouped[t_index], rho_s_grouped[t_index], K_param_vector, b_param_vector[t_index])))
#integral_of_exp_Kw_minus_rho_r_b_integral_S_dy_dw_from_0_to_tt = vec_int([t_index for t_index, _ in enumerate(t_flat_no_nans)])
# Not working because quad does not support vectorization:
#integral_of_exp_Kw_minus_rho_r_b_integral_S_dy_dw_from_0_to_tt = quad(exp_Kw_minus_rho_r_b_integral_S_dy_dw, 0, t_flat_no_nans, args=(rho_r_grouped, psi_grouped, pi_r_grouped, rho_s_grouped, K_param_vector, b_param_vector))
# non vectorized:
# quad does not work
#integral_of_exp_Kw_minus_rho_r_b_integral_S_dy_dw_from_0_to_tt = [quad(exp_Kw_minus_rho_r_b_integral_S_dy_dw, 0, t_flat_no_nans[t_index], args=(rho_r_grouped[t_index], psi_grouped[t_index], pi_r_grouped[t_index], rho_s_grouped[t_index], K_param_vector, b_param_vector[t_index])) for t_index, _ in enumerate(t_flat_no_nans)]
def trapzf(t_index, n=20):
#x = np.linspace(a, b, n)
x = np.linspace(0, t_flat_no_nans[t_index], n)
#y = f(x)
#y = exp_Kw_minus_rho_r_b_integral_S_dy_dw(x, rho_r_grouped[t_index], psi_grouped[t_index], pi_r_grouped[t_index], rho_s_grouped[t_index], K_param_vector[t_index], b_param_vector[t_index])
y = exp_Kw_minus_rho_r_b_integral_S_dy_dw(x, rho_r_grouped[t_index], psi_grouped[t_index], pi_r_grouped[t_index], rho_s_grouped[t_index], K_param_vector[t_index], 1)
#h = (b-a)/(n-1)
h = t_flat_no_nans[t_index]/(n-1)
return (h/2)*(y[1:]+y[:-1]).sum()
integral_of_exp_Kw_minus_rho_r_b_integral_S_dy_dw_from_0_to_tt = [trapzf(t_index) for t_index, _ in enumerate(t_flat_no_nans)] # exp_Kw_minus_rho_r_b_integral_S_dy_dw, 0, t_flat_no_nans[t_index], args=(rho_r_grouped[t_index], psi_grouped[t_index], pi_r_grouped[t_index], rho_s_grouped[t_index], K_param_vector, b_param_vector[t_index])) for t_index, _ in enumerate(t_flat_no_nans)]
resistant_part_lower = 1 + R_zero * rho_r[group_id] * integral_of_exp_Kw_minus_rho_r_b_integral_S_dy_dw_from_0_to_tt
resistant_part = resistant_part_upper / resistant_part_lower
sensitive_part = np.exp(rho_s[group_id] * t_flat_no_nans)
mu_Y = psi[group_id] * (pi_r[group_id]*resistant_part + (1-pi_r[group_id])*sensitive_part)
elif likelihood_model == "Kevins_second":
print("Likelihood model in linear_model is: Kevin's model no. 2: Logistic growth in resistant cells; only limited by sensitive cell count")
## Kevin's second option (S_zero is psi[group_id])
#a_param = pm.Gamma("a_param", alpha=2, beta=1)
#mu_Y = psi[group_id] * (pi_r[group_id] * np.exp(rho_r[group_id]*psi[group_id]*(a_param*t_flat_no_nans - (1 - np.exp(-rho_s[group_id]*t_flat_no_nans))/rho_s[group_id])) + (1-pi_r[group_id])*np.exp(rho_s[group_id]*t_flat_no_nans))
#a_param = 1
#S_zero = psi[group_id] * (1-pi_r[group_id])
#resistant_part = np.exp(rho_r[group_id] * S_zero * (a_param * t_flat_no_nans + (1 - np.exp(rho_s[group_id]*t_flat_no_nans)/(rho_s[group_id]))))
#sensitive_part = np.exp(rho_s[group_id] * t_flat_no_nans)
#mu_Y = psi[group_id] * (pi_r[group_id]*resistant_part + (1-pi_r[group_id])*sensitive_part)
#mu_Y = psi[group_id] * (pi_r[group_id]*np.exp(rho_r[group_id] * psi[group_id] * (1-pi_r[group_id]) * (1 * t_flat_no_nans + (1 - np.exp(rho_s[group_id]*t_flat_no_nans)/(rho_s[group_id])))) + (1-pi_r[group_id])*np.exp(rho_s[group_id] * t_flat_no_nans))
# With switch to handle rho_s close to zero:
# rho_s is a negative number.
# First we check that it is not tiny negative: If smaller (more negative) than -1e-10, it's ok. Otherwise set the fraction to t_flat_no_nans (original model)
fraction_with_rho_s_1 = pm.math.switch(rho_s[group_id] < -1e-10, (1 * t_flat_no_nans + (1 - np.exp(rho_s[group_id]*t_flat_no_nans)/(rho_s[group_id]))), t_flat_no_nans)
# Then check that it is not large and negative. If smaller (more negative) than -1e5, set it to -1e5
fraction_with_rho_s = pm.math.switch(rho_s[group_id] < -1e5, (1 * t_flat_no_nans + (1 - np.exp(-1e5*t_flat_no_nans)/(-1e5))), fraction_with_rho_s_1)
mu_Y = psi[group_id] * (pi_r[group_id]*np.exp(rho_r[group_id] * psi[group_id] * (1-pi_r[group_id]) * fraction_with_rho_s) + (1-pi_r[group_id])*np.exp(rho_s[group_id] * t_flat_no_nans))
else:
print("Likelihood model in linear_model is: Original model without any growth limit or interaction between subpopulations")
# Original:
mu_Y = psi[group_id] * (pi_r[group_id]*np.exp(rho_r[group_id]*t_flat_no_nans) + (1-pi_r[group_id])*np.exp(rho_s[group_id]*t_flat_no_nans))
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal("Y_obs", mu=mu_Y, sigma=sigma_obs, observed=Y_flat_no_nans)
return linear_model