-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_selection_COMMPASS.py
294 lines (268 loc) · 24.7 KB
/
model_selection_COMMPASS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# Purposes of this script:
# Load COMMPASS_patient_dictionary
# Add parameter estimates to the patients and create COMMPASS_patient_dictionary_with_estimates
# Find sections with the right drug combination and enough data to perform inference
# Perform inference of parameter set in each region
from utilities import *
warnings.simplefilter("ignore")
# Settings
N_iter = 1000 # separate minimzations of the least squares when fitting parameters
minimum_number_of_measurements = 3 # required number of M protein measurements for a period to be included in the dataset
threshold_for_closeness_for_M_protein_at_start = 60 # If M protein at period start is missing, it is imputed using the nearest measurement, but only if closer than this threshold number of days.
INCLUDE_SUBSEQUENT_DRUG_HOLIDAY = False #True # If a treatment is followed by a drug holiday, this decided if the holiday is included as part of the period
## Inference
# The length of ub and lb implicitly decides whether the effect of treatment is given a parameter or not.
# If len(ub) = 4, then it is assumed that only periods under treatment are considered
# If len(ub) = 5, then k_1 models the effect of the drug on the sensitive population
# Simple exponential growth model with 2 populations, where only one is affected by treatment
# The parameters we estimate are
# Y_0, pi_r, g_r, g_s, k_1, sigma
#lb = np.array([ 0, 0, 0.01, 0.00, 0.20]) #, 10e-6])
#ub = np.array([100, 1, 0.20, lb[4], 1.00]) #, 10e4])
#lb = np.array([ 0, 0, 0.00, -1e-0])
#ub = np.array([100, 1, 2e-1, 0.00])
# Y_0=50, pi_r=0.10, g_r=2e-3, g_s=1e-2, k_1=3e-2
# Bounds for model 1: 1 population, only resistant cells
# Y_0, g_r sigma
lb_1 = np.array([ 0, 0.001, 10e-6])
ub_1 = np.array([100, 0.20, 10e4])
# Bounds for model 2: 1 population, only sensitive cells
# Y_0, g_s, k_1, sigma
lb_2 = np.array([ 0, 0.001, 0.20, 10e-6])
ub_2 = np.array([100, lb_2[2], 1.00, 10e4])
# Bounds for model 3: sensitive and resistant population
# Y_0, pi_r, g_r, g_s, k_1, sigma
lb_3 = np.array([ 0, 0.001, 0.001, 0.001, 0.20, 10e-6])
ub_3 = np.array([100, 1-0.001, 0.20, lb_3[4], 1.00, 10e4])
# Load COMMPASS_patient_dictionary
picklefile = open('./binaries_and_pickles/COMMPASS_patient_dictionary', 'rb')
COMMPASS_patient_dictionary = pickle.load(picklefile)
picklefile.close()
picklefile = open('./binaries_and_pickles/unique_treat_counter', 'rb')
unique_treat_counter = pickle.load(picklefile)
picklefile.close()
negative_loglikelihoods_1 = []
negative_loglikelihoods_2 = []
negative_loglikelihoods_3 = []
bic_values_1 = []
bic_values_2 = []
bic_values_3 = []
aic_c_values_1 = []
aic_c_values_2 = []
aic_c_values_3 = []
chosen_models = []
# Find the treatment id of the required treatment, extract those treatment regions and perform inference there
def estimate_and_save_region_estimate(training_instance_id, period_start, period_end, minimum_number_of_measurements, dummy_measurement_times, dummy_Mprotein_values, dummy_Kappa_values, dummy_Lambda_values, how_many_regions, training_instance_dict, patient, Y_parameters, treatment_id_of_interest, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3):
# Check how many M protein values are within period, and find the observed values, times and drug periods in the period
valid_Mprotein = dummy_Mprotein_values[dummy_measurement_times>=period_start]
valid_Kappa = dummy_Kappa_values[dummy_measurement_times>=period_start]
valid_Lambda = dummy_Lambda_values[dummy_measurement_times>=period_start]
valid_times = dummy_measurement_times[dummy_measurement_times>=period_start]
valid_Mprotein = valid_Mprotein[valid_times<=period_end]
valid_Kappa = valid_Kappa[valid_times<=period_end]
valid_Lambda = valid_Lambda[valid_times<=period_end]
valid_times = valid_times[valid_times<=period_end]
# Only add as data instance to X and Y if there are enough:
if len(valid_times) >= minimum_number_of_measurements and max(valid_Mprotein) > 0:
print("Saving a case from", patient.name, "- treatment id", treatment_id_of_interest, "- training instance id", training_instance_id)
how_many_regions[treatment_id_of_interest] = how_many_regions[treatment_id_of_interest] + 1
# Note the time limits of this period: Last M protein measurement while still on treatment
last_measurement_time_on_treatment = valid_times[-1]
# Estimate parameters for a dummy patient within this interval
dummmy_patient = COMMPASS_Patient(measurement_times=valid_times, drug_dates=[], drug_history=[], treatment_history=this_history, Mprotein_values=valid_Mprotein, Kappa_values=valid_Kappa, Lambda_values=valid_Lambda, covariates=[], name="dummy")
this_patient = dummmy_patient
# Estimate parameters for model 1
# Model 1: exp rho t (2+1=3 parameters: Y0, rho, sigma)
this_estimate_m1 = estimate_drug_response_parameters_any_model(this_patient, lb_1, ub_1, N_iterations=N_iter) #, sigma_noise_std=1
array_x = np.array([this_estimate_m1.Y_0, this_estimate_m1.g_r, this_estimate_m1.sigma])
predictions = measure_Mprotein_noiseless(this_estimate_m1, this_patient.measurement_times, this_patient.treatment_history)
sumofsquares_model_1 = np.sum((this_patient.Mprotein_values - predictions)**2)
sample_variance_unadjusted_model_1 = sumofsquares_model_1/len(this_patient.measurement_times)
negative_loglikelihoods_1.append(negative_loglikelihood_any_model(array_x, this_patient)) #, sigma_noise_std=sample_variance_unadjusted_model_1))
#print(negative_loglikelihoods_1[training_instance_id])
# Estimate parameters for model 2
# Model 2: exp t(alpha - k) (3+1=4 parameters: Y0, alpha, K, sigma)
this_estimate_m2 = estimate_drug_response_parameters_any_model(this_patient, lb_2, ub_2, N_iterations=N_iter) #, sigma_noise_std=1
array_x = np.array([this_estimate_m2.Y_0, this_estimate_m2.g_s, this_estimate_m2.k_1, this_estimate_m2.sigma])
predictions = measure_Mprotein_noiseless(this_estimate_m2, this_patient.measurement_times, this_patient.treatment_history)
sumofsquares_model_2 = np.sum((this_patient.Mprotein_values - predictions)**2)
sample_variance_unadjusted_model_2 = sumofsquares_model_2/len(this_patient.measurement_times)
negative_loglikelihoods_2.append(negative_loglikelihood_any_model(array_x, this_patient)) #, sigma_noise_std=sample_variance_unadjusted_model_2))
#print(negative_loglikelihoods_2[training_instance_id])
# Estimate parameters for model 3
# Model 3: Both together. (5+1=6 parameters: Y0, pi, rho, alpha, K, sigma)
this_estimate_m3 = estimate_drug_response_parameters_any_model(this_patient, lb_3, ub_3, N_iterations=N_iter) #, sigma_noise_std=1
array_x = np.array([this_estimate_m3.Y_0, this_estimate_m3.pi_r, this_estimate_m3.g_r, this_estimate_m3.g_s, this_estimate_m3.k_1, this_estimate_m3.sigma])
predictions = measure_Mprotein_noiseless(this_estimate_m3, this_patient.measurement_times, this_patient.treatment_history)
sumofsquares_model_3 = np.sum((this_patient.Mprotein_values - predictions)**2)
sample_variance_unadjusted_model_3 = sumofsquares_model_3/len(this_patient.measurement_times)
negative_loglikelihoods_3.append(negative_loglikelihood_any_model(array_x, this_patient)) #, sigma_noise_std=sample_variance_unadjusted_model_3))
#print(negative_loglikelihoods_3[training_instance_id])
N_observations_this_period = len(dummmy_patient.measurement_times)
# BIC values: k*ln(N) + 2*negative loglikelihood
bic_values_1.append(len(lb_1)*np.log(N_observations_this_period) + 2*negative_loglikelihoods_1[training_instance_id])
bic_values_2.append(len(lb_2)*np.log(N_observations_this_period) + 2*negative_loglikelihoods_2[training_instance_id])
bic_values_3.append(len(lb_3)*np.log(N_observations_this_period) + 2*negative_loglikelihoods_3[training_instance_id])
#aic_c_values_1.append(2*len(lb_1) + 2*negative_loglikelihoods_1[training_instance_id] + 2*len(lb_1)*(len(lb_1)+1)/(N_observations_this_period - len(lb_1) - 1))
#aic_c_values_2.append(2*len(lb_2) + 2*negative_loglikelihoods_2[training_instance_id] + 2*len(lb_2)*(len(lb_2)+1)/(N_observations_this_period - len(lb_2) - 1))
#aic_c_values_3.append(2*len(lb_3) + 2*negative_loglikelihoods_3[training_instance_id] + 2*len(lb_3)*(len(lb_3)+1)/(N_observations_this_period - len(lb_3) - 1))
bic_values_this_patient = bic_values_1[training_instance_id], bic_values_2[training_instance_id], bic_values_3[training_instance_id]
#aic_c_values_this_patient = aic_c_values_1[training_instance_id], aic_c_values_2[training_instance_id], aic_c_values_3[training_instance_id]
model_choice = bic_values_this_patient.index(min(bic_values_this_patient)) + 1
chosen_models.append(model_choice)
parameter_estimates_this_patients = [this_estimate_m1, this_estimate_m2, this_estimate_m3]
this_estimate = parameter_estimates_this_patients[model_choice-1]
#this_estimate = estimate_drug_response_parameters(dummmy_patient, lb, ub, N_iterations=N_iter)
training_instance_dict[training_instance_id] = [patient.name, period_start, period_end, treatment_id_of_interest, last_measurement_time_on_treatment, this_estimate]
# Add estimates to Y_parameters
Y_parameters.append(this_estimate) # training_instance_id is position in Y_parameters
#patient.add_parameter_estimate(this_estimate, (period_start, period_end), dummmy_patient) # NOt really used, it is in training_instance_dict
training_instance_id = training_instance_id + 1
# Plotting treatment region with estimate (using model 3 estimate)
#for index, param_set in enumerate(patient.parameter_estimates):
estimated_parameters = this_estimate # patient.parameter_estimates[index]
plot_patient = dummmy_patient # patient.dummmy_patients[index]
#savename = "./COMMPASS_estimate_plots/treatment_id_"+str(treatment_id_of_interest)+"/Treatment_"+str(treatment_id_of_interest)+"_"+patient.name+"_at_time"+str(period_start)+"Y_0="+str(estimated_parameters.Y_0)+", pi_r="+str(estimated_parameters.pi_r)+", g_r="+str(estimated_parameters.g_r)+", g_s="+str(estimated_parameters.g_s)+", k_1="+str(estimated_parameters.k_1)+", sigma="+str(estimated_parameters.sigma)+".png"
savename = "./COMMPASS_estimate_plots/model_selection/model_"+str(model_choice)+"/Model_"+str(model_choice)+"_id_"+str(training_instance_id)+"_Treatment_"+str(treatment_id_of_interest)+"_"+patient.name+"_at_time"+str(period_start)+"Y_0="+str(estimated_parameters.Y_0)+", pi_r="+str(estimated_parameters.pi_r)+", g_r="+str(estimated_parameters.g_r)+", g_s="+str(estimated_parameters.g_s)+", k_1="+str(estimated_parameters.k_1)+", sigma="+str(estimated_parameters.sigma)+".png"
plot_title = patient.name
plot_treatment_region_with_estimate(estimated_parameters, plot_patient, estimated_parameters=[], PLOT_ESTIMATES=False, plot_title=plot_title, savename=savename)
return training_instance_id, how_many_regions, training_instance_dict, patient, Y_parameters, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3
# A training instance is a pair of history covariates X and estimated parameters Y
# Define minimum number of measurements for including period as training instance to X and Y
print("\nFinding right regions and estimating parameters...")
# Iterate through patients
# Identify valid periods and estimate parameters there: training_instance_dict = (training_instance_id, start, end, treatment_id). Y: (training_instance_id, parameters).
# Find patients with periods that satisfy criteria of enough M protein measurements under chosen treatment
# 1) Feature extract history up until period, put in X
# 2) Estimate parameters in period, put in Y
training_instance_id = 0
training_instance_dict = {} # A dictionary mapping training_instance_id to the patient name and the start and end of the interval with the treatment of interest
Y_parameters = []
#treatment_id_of_interest = 15 # Dex+Len+Bor #COMMPASS_patient_dictionary["MMRF_1293"].treatment_history[5].id
# Iterate over all patients, look at their treatment periods one by one and check if it qualifies as a training item
start_time = time.time()
how_many_regions = np.zeros(unique_treat_counter)
for name, patient in COMMPASS_patient_dictionary.items():
if len(patient.measurement_times) > minimum_number_of_measurements:
treatment_id_list = [1,2,3,7,10,13,15,16] #[38,62,66,110] are other combinations with the same drugs (but 66 only has 6 patients). # 30 for patient 1727 # Subset of treatment ids we choose to include. [15,16,3,10,7,1,13,2]: #range(1,unique_treat_counter):
for outer_index, outer_treatment in enumerate(patient.treatment_history): # Outer loop so we pass each of them only once
if outer_treatment.id in treatment_id_list:
treatment_id_of_interest = outer_treatment.id
# New, simpler and faster version when not including treatment holidays
# Check if period satisfies, if so then add it as a training case
# *Just check the current period instead of looping, it is faster*
# *Then remove treatment_id_list popping at end
# Currently the pop remains as there were duplicates: Now there is at most one case per patient
# Find periods of interest by looking through patient history
period_start = np.nan
period_end = np.nan
valid_interval = False
dummy_measurement_times = np.array([])
dummy_Mprotein_values = np.array([])
dummy_Kappa_values = np.array([])
dummy_Lambda_values = np.array([])
this_history = np.array([])
for index, treatment in enumerate(patient.treatment_history[outer_index:]): # Inner loop to check what happens after this exact treatment from the outer loop
if valid_interval == False:
if treatment.id == treatment_id_of_interest:
# We found the start
valid_interval = True
period_start = treatment.start
this_history = np.array([treatment])
# Find the M protein value closest in time to the start of the treatment
distances_to_treatment_start = patient.measurement_times - np.repeat(treatment.start, len(patient.measurement_times))
abs_distances_to_treatment_start = abs(distances_to_treatment_start)
closest_index = np.argmin(abs_distances_to_treatment_start)
if (not abs_distances_to_treatment_start[closest_index] == 0) and (min(abs_distances_to_treatment_start) <= threshold_for_closeness_for_M_protein_at_start):
# Add that value as M protein value at treatment start if there is nothing there
if distances_to_treatment_start[closest_index] > 0: # Treatment.start comes before the value that was closest
dummy_measurement_times = np.concatenate((patient.measurement_times[0:closest_index], [treatment.start], patient.measurement_times[closest_index:]))
dummy_Mprotein_values = np.concatenate((patient.Mprotein_values[0:closest_index], [patient.Mprotein_values[closest_index]], patient.Mprotein_values[closest_index:]))
dummy_Kappa_values = np.concatenate((patient.Kappa_values[0:closest_index], [patient.Kappa_values[closest_index]], patient.Kappa_values[closest_index:]))
dummy_Lambda_values = np.concatenate((patient.Lambda_values[0:closest_index], [patient.Lambda_values[closest_index]], patient.Lambda_values[closest_index:]))
else: # Treatment.start comes after the value that was closest. Indexing is ok, even in empty space
dummy_measurement_times = np.concatenate((patient.measurement_times[0:closest_index+1], [treatment.start], patient.measurement_times[closest_index+1:]))
dummy_Mprotein_values = np.concatenate((patient.Mprotein_values[0:closest_index+1], [patient.Mprotein_values[closest_index]], patient.Mprotein_values[closest_index+1:]))
dummy_Kappa_values = np.concatenate((patient.Kappa_values[0:closest_index+1], [patient.Kappa_values[closest_index]], patient.Kappa_values[closest_index+1:]))
dummy_Lambda_values = np.concatenate((patient.Lambda_values[0:closest_index+1], [patient.Lambda_values[closest_index]], patient.Lambda_values[closest_index+1:]))
#elif treatment.id == 0:
# # The last M protein under this treatment might be the start
# # Find time of last M protein value within period
# valid_Mprotein = patient.Mprotein_values[patient.measurement_times>=treatment.start]
# valid_times = patient.measurement_times[patient.measurement_times>=treatment.start]
# valid_Mprotein = valid_Mprotein[valid_times<=treatment.end]
# valid_times = valid_times[valid_times<=treatment.end]
# if len(valid_times) > 0:
# valid_interval = True
# period_start = valid_times[-1]
# zero_treatment = Treatment(period_start, treatment.end, treatment.id)
# this_history = np.array([zero_treatment])
# # Continute to next
else:
# Continue looking for the start
pass
else: # if valid interval == True, then we are looking for 0 or the end
if treatment.id == treatment_id_of_interest:
this_history = np.append(this_history,[treatment])
pass
elif treatment.id == 0 and INCLUDE_SUBSEQUENT_DRUG_HOLIDAY == True:
# Only after the id of interest. We are extending the period by a drug holiday, then ending the period at the end of this treatment
if patient.name == "MMRF_1793" and treatment_id_of_interest == 15:
treatment = Treatment(treatment.start, 1770, treatment.id)
this_history = np.append(this_history,[treatment])
valid_interval = False
period_end = treatment.end
# Estimate parameters, add to Y and to the patient using the common function
training_instance_id, how_many_regions, training_instance_dict, patient, Y_parameters, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3 = estimate_and_save_region_estimate(training_instance_id, period_start, period_end, minimum_number_of_measurements, dummy_measurement_times, dummy_Mprotein_values, dummy_Kappa_values, dummy_Lambda_values, how_many_regions, training_instance_dict, patient, Y_parameters, treatment_id_of_interest, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3)
dummy_measurement_times = np.array([])
dummy_Mprotein_values = np.array([])
dummy_Kappa_values = np.array([])
dummy_Lambda_values = np.array([])
else:
# We found the end at the beginning of this foreign treatment id
valid_interval = False
# Check if we captured the treatment of interest and not only zero:
if treatment_id_of_interest in [element.id for element in this_history]:
period_end = treatment.start
# Estimate parameters, add to Y and to the patient using the common function
training_instance_id, how_many_regions, training_instance_dict, patient, Y_parameters, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3 = estimate_and_save_region_estimate(training_instance_id, period_start, period_end, minimum_number_of_measurements, dummy_measurement_times, dummy_Mprotein_values, dummy_Kappa_values, dummy_Lambda_values, how_many_regions, training_instance_dict, patient, Y_parameters, treatment_id_of_interest, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3)
dummy_measurement_times = np.array([])
dummy_Mprotein_values = np.array([])
dummy_Kappa_values = np.array([])
dummy_Lambda_values = np.array([])
# After the last treatment, if ending on a valid interval, this is the end
if valid_interval == True:
# Check if we captured the treatment of interest and not only zero:
if treatment_id_of_interest in [element.id for element in this_history]:
period_end = patient.treatment_history[-1].end
# Estimate parameters, add to Y and to the patient using the common function
training_instance_id, how_many_regions, training_instance_dict, patient, Y_parameters, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3 = estimate_and_save_region_estimate(training_instance_id, period_start, period_end, minimum_number_of_measurements, dummy_measurement_times, dummy_Mprotein_values, dummy_Kappa_values, dummy_Lambda_values, how_many_regions, training_instance_dict, patient, Y_parameters, treatment_id_of_interest, negative_loglikelihoods_1, negative_loglikelihoods_2, negative_loglikelihoods_3, bic_values_1, bic_values_2, bic_values_3, aic_c_values_1, aic_c_values_2, aic_c_values_3)
treatment_id_list.remove(treatment_id_of_interest)
end_time = time.time()
time_duration = end_time - start_time
print("Time elapsed:", time_duration)
#COMMPASS_patient_dictionary["MMRF_1293"].print()
#print("Treatment id of interest:", treatment_id_of_interest)
#print("Number of regions with", minimum_number_of_measurements, "or more M protein measurements:", how_many_regions[treatment_id_of_interest])
#np.save("./binaries_and_pickles/training_instance_dict.npy", training_instance_dict)
#print(len(training_instance_dict.values()))
###np.save("Y_parameters.npy", np.array(Y_parameters))
####print("Sort(enumerate(how_many_regions))", Sort(enumerate(how_many_regions)))
###counts_regions = np.flip(Sort(enumerate(how_many_regions)))
###for element in counts_regions[counts_regions[:,0]>1]:
### patient_count = element[0]
### treatment_id = element[1]
### drug_names = get_drug_names_from_treatment_id_COMMPASS(treatment_id, treatment_id_to_drugs_dictionary_COMMPASS)
### print("N =", patient_count, " id = ", treatment_id, " drugs:", drug_names)
# Plot estimate with observed values
#estimated_parameters_patient_2 = Parameters(Y_0=best_x[0], pi_r=best_x[1], g_r=best_x[2], g_s=best_x[3], k_1=best_x[4], sigma=global_sigma)
#plot_treatment_region_with_estimate(parameters_patient_2, patient_2, estimated_parameters=estimated_parameters_patient_2, PLOT_ESTIMATES=True, plot_title="Patient 2")
##create a pickle file
#picklefile = open('./binaries_and_pickles/Y_parameters', 'wb')
##pickle the dictionary and write it to file
#pickle.dump(np.array(Y_parameters), picklefile)
##close the file
#picklefile.close()
#picklefile = open('./binaries_and_pickles/COMMPASS_patient_dictionary_with_estimates', 'wb')
#pickle.dump(COMMPASS_patient_dictionary, picklefile)
#picklefile.close()