-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCRC_real_data_analysis.R
968 lines (822 loc) · 60.3 KB
/
CRC_real_data_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
source('./src/dependencies.R')
source('./src/estimation_functions.R')
params_save_name <- '2,8,32,128,256,512,1024,2,6,10_1,1,1,1,1,1,1,2,2,2'
load(paste0("params/parameters_CRC_real_data_", params_save_name, ".RData"))
parameters_CRC_real_data$bootstrap_M <- 200
parameters_CRC_real_data$save_name <- params_save_name
parameters_CRC_real_data$num_cores <- 1
with(parameters_CRC_real_data, {
colon_ncdb <- read.csv('./colon_ncdb.csv') %>%
mutate(X = surg_approach_open,
Y = death_90) %>% dplyr::select(-c(surg_approach_open, death_90))
set.seed(202848)
colon_ncdb_split <- split_data (colon_ncdb, 2)
colon_ncdb_1 <- colon_ncdb_split[[1]]
colon_ncdb_2 <- colon_ncdb_split[[2]]
###########
## split 1
###########
basis_1 <- lapply(1:length(K), function(k) {
create_b_spline_basis(
data = colon_ncdb_1,
continuous_vars = c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg'),
binary_vars = names(colon_ncdb_1)[!(names(colon_ncdb_1) %in%
c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg', 'X', 'Y'))],
knots = knots[[k]], boundary_knots = boundary_knots[[k]],
degree_of_interactions = degree_of_interactions[k],
polynomial_degree = polynomial_degree[k]
)
})
###########
## split 2
###########
basis_2 <- lapply(1:length(K), function(k) {
create_b_spline_basis(
data = colon_ncdb_2,
continuous_vars = c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg'),
binary_vars = names(colon_ncdb_2)[!(names(colon_ncdb_2) %in%
c('distance', 'age', 'tumor_size', 'days_from_dx_to_def_surg', 'X', 'Y'))],
knots = knots[[k]], boundary_knots = boundary_knots[[k]],
degree_of_interactions = degree_of_interactions[k],
polynomial_degree = polynomial_degree[k]
)
})
##########################################################
## compute estimates in estimation data
##########################################################
###########
## split 1
###########
prob_trt1_stacked_classifier_1 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_2 %>% dplyr::select(X) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_trt_2,
params_random_forest = params_random_forest_trt_2,
params_knn = params_knn_trt_2,
params_lasso = params_lasso_trt_2,
params_glm = params_glm_trt_2,
meta_model = meta_model_trt_2$meta_model,
meta_model_formula = meta_model_trt_2$meta_model_formula,
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_stacked_classifier_1 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_2,
params_random_forest = params_random_forest_outcome_2,
params_knn = params_knn_outcome_2,
params_lasso = params_lasso_outcome_2,
params_glm = params_glm_outcome_2,
meta_model = meta_model_outcome_2$meta_model,
meta_model_formula = meta_model_outcome_2$meta_model_formula,
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_stacked_classifier_1 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_2,
params_random_forest = params_random_forest_outcome_2,
params_knn = params_knn_outcome_2,
params_lasso = params_lasso_outcome_2,
params_glm = params_glm_outcome_2,
meta_model = meta_model_outcome_2$meta_model,
meta_model_formula = meta_model_outcome_2$meta_model_formula,
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_boosted_tree_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_2 %>% dplyr::select(X) %>% {.[[1]]},
params = params_boosted_tree_trt_2,
model = "boosted_tree",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_boosted_tree_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_2,
model = "boosted_tree",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_boosted_tree_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_2,
model = "boosted_tree",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_random_forest_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_2 %>% dplyr::select(X) %>% {.[[1]]},
params = params_random_forest_trt_2,
model = "random_forest",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_random_forest_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_2,
model = "random_forest",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_random_forest_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_2,
model = "random_forest",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_knn_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_2 %>% dplyr::select(X) %>% {.[[1]]},
params = params_knn_trt_2,
model = "knn",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_knn_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_2,
model = "knn",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_knn_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_2,
model = "knn",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_lasso_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_2 %>% dplyr::select(X) %>% {.[[1]]},
params = params_lasso_trt_2,
model = "lasso",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_lasso_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_2,
model = "lasso",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_lasso_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_2,
model = "lasso",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_glm_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_2 %>% dplyr::select(X) %>% {.[[1]]},
params = params_glm_trt_2,
model = "glm",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_glm_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_2,
model = "glm",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_glm_1 <- estimate_prob_individual_model(covariates_df = colon_ncdb_2 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_2 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_2,
model = "glm",
predict_data = colon_ncdb_1 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
estimate_AIPW_stacked_1 <-
estimate_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_stacked_classifier_1,
prob_outcome1_trt1 = prob_outcome1_trt1_stacked_classifier_1,
prob_outcome1_trt0 = prob_outcome1_trt0_stacked_classifier_1
)
estimate_var_AIPW_stacked_1 <-
estimate_var_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_stacked_classifier_1,
prob_outcome1_trt1 = prob_outcome1_trt1_stacked_classifier_1,
prob_outcome1_trt0 = prob_outcome1_trt0_stacked_classifier_1
)
estimate_AIPW_boosted_tree_1 <-
estimate_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_boosted_tree_1,
prob_outcome1_trt1 = prob_outcome1_trt1_boosted_tree_1,
prob_outcome1_trt0 = prob_outcome1_trt0_boosted_tree_1
)
estimate_var_AIPW_boosted_tree_1 <-
estimate_var_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_boosted_tree_1,
prob_outcome1_trt1 = prob_outcome1_trt1_boosted_tree_1,
prob_outcome1_trt0 = prob_outcome1_trt0_boosted_tree_1
)
estimate_AIPW_random_forest_1 <-
estimate_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_random_forest_1,
prob_outcome1_trt1 = prob_outcome1_trt1_random_forest_1,
prob_outcome1_trt0 = prob_outcome1_trt0_random_forest_1
)
estimate_var_AIPW_random_forest_1 <-
estimate_var_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_random_forest_1,
prob_outcome1_trt1 = prob_outcome1_trt1_random_forest_1,
prob_outcome1_trt0 = prob_outcome1_trt0_random_forest_1
)
estimate_AIPW_knn_1 <-
estimate_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_knn_1,
prob_outcome1_trt1 = prob_outcome1_trt1_knn_1,
prob_outcome1_trt0 = prob_outcome1_trt0_knn_1
)
estimate_var_AIPW_knn_1 <-
estimate_var_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_knn_1,
prob_outcome1_trt1 = prob_outcome1_trt1_knn_1,
prob_outcome1_trt0 = prob_outcome1_trt0_knn_1
)
estimate_AIPW_lasso_1 <-
estimate_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_lasso_1,
prob_outcome1_trt1 = prob_outcome1_trt1_lasso_1,
prob_outcome1_trt0 = prob_outcome1_trt0_lasso_1
)
estimate_var_AIPW_lasso_1 <-
estimate_var_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_lasso_1,
prob_outcome1_trt1 = prob_outcome1_trt1_lasso_1,
prob_outcome1_trt0 = prob_outcome1_trt0_lasso_1
)
estimate_AIPW_glm_1 <-
estimate_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_glm_1,
prob_outcome1_trt1 = prob_outcome1_trt1_glm_1,
prob_outcome1_trt0 = prob_outcome1_trt0_glm_1
)
estimate_var_AIPW_glm_1 <-
estimate_var_AIPW(
trt = colon_ncdb_1$X,
outcome = colon_ncdb_1$Y,
prob_trt1 = prob_trt1_glm_1,
prob_outcome1_trt1 = prob_outcome1_trt1_glm_1,
prob_outcome1_trt0 = prob_outcome1_trt0_glm_1
)
###########
## split 2
###########
prob_trt1_stacked_classifier_2 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_1 %>% dplyr::select(X) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_trt_1,
params_random_forest = params_random_forest_trt_1,
params_knn = params_knn_trt_1,
params_lasso = params_lasso_trt_1,
params_glm = params_glm_trt_1,
meta_model = meta_model_trt_1$meta_model,
meta_model_formula = meta_model_trt_1$meta_model_formula,
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_stacked_classifier_2 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_1,
params_random_forest = params_random_forest_outcome_1,
params_knn = params_knn_outcome_1,
params_lasso = params_lasso_outcome_1,
params_glm = params_glm_outcome_1,
meta_model = meta_model_outcome_1$meta_model,
meta_model_formula = meta_model_outcome_1$meta_model_formula,
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_stacked_classifier_2 <- estimate_prob_stacked_classifier(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params_boosted_tree = params_boosted_tree_outcome_1,
params_random_forest = params_random_forest_outcome_1,
params_knn = params_knn_outcome_1,
params_lasso = params_lasso_outcome_1,
params_glm = params_glm_outcome_1,
meta_model = meta_model_outcome_1$meta_model,
meta_model_formula = meta_model_outcome_1$meta_model_formula,
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_boosted_tree_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_1 %>% dplyr::select(X) %>% {.[[1]]},
params = params_boosted_tree_trt_1,
model = "boosted_tree",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_boosted_tree_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_1,
model = "boosted_tree",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_boosted_tree_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_boosted_tree_outcome_1,
model = "boosted_tree",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_random_forest_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_1 %>% dplyr::select(X) %>% {.[[1]]},
params = params_random_forest_trt_1,
model = "random_forest",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_random_forest_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_1,
model = "random_forest",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_random_forest_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_random_forest_outcome_1,
model = "random_forest",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_knn_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_1 %>% dplyr::select(X) %>% {.[[1]]},
params = params_knn_trt_1,
model = "knn",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_knn_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_1,
model = "knn",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_knn_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_knn_outcome_1,
model = "knn",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_lasso_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_1 %>% dplyr::select(X) %>% {.[[1]]},
params = params_lasso_trt_1,
model = "lasso",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_lasso_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_1,
model = "lasso",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_lasso_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_lasso_outcome_1,
model = "lasso",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
prob_trt1_glm_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y, X)),
label_vector = colon_ncdb_1 %>% dplyr::select(X) %>% {.[[1]]},
params = params_glm_trt_1,
model = "glm",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y, X)))
prob_outcome1_trt1_glm_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_1,
model = "glm",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 1))
prob_outcome1_trt0_glm_2 <- estimate_prob_individual_model(covariates_df = colon_ncdb_1 %>% dplyr::select(-c(Y)),
label_vector = colon_ncdb_1 %>% dplyr::select(Y) %>% {.[[1]]},
params = params_glm_outcome_1,
model = "glm",
predict_data = colon_ncdb_2 %>% dplyr::select(-c(Y)) %>% mutate(X = 0))
estimate_AIPW_stacked_2 <-
estimate_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_stacked_classifier_2,
prob_outcome1_trt1 = prob_outcome1_trt1_stacked_classifier_2,
prob_outcome1_trt0 = prob_outcome1_trt0_stacked_classifier_2
)
estimate_var_AIPW_stacked_2 <-
estimate_var_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_stacked_classifier_2,
prob_outcome1_trt1 = prob_outcome1_trt1_stacked_classifier_2,
prob_outcome1_trt0 = prob_outcome1_trt0_stacked_classifier_2
)
estimate_AIPW_boosted_tree_2 <-
estimate_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_boosted_tree_2,
prob_outcome1_trt1 = prob_outcome1_trt1_boosted_tree_2,
prob_outcome1_trt0 = prob_outcome1_trt0_boosted_tree_2
)
estimate_var_AIPW_boosted_tree_2 <-
estimate_var_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_boosted_tree_2,
prob_outcome1_trt1 = prob_outcome1_trt1_boosted_tree_2,
prob_outcome1_trt0 = prob_outcome1_trt0_boosted_tree_2
)
estimate_AIPW_random_forest_2 <-
estimate_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_random_forest_2,
prob_outcome1_trt1 = prob_outcome1_trt1_random_forest_2,
prob_outcome1_trt0 = prob_outcome1_trt0_random_forest_2
)
estimate_var_AIPW_random_forest_2 <-
estimate_var_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_random_forest_2,
prob_outcome1_trt1 = prob_outcome1_trt1_random_forest_2,
prob_outcome1_trt0 = prob_outcome1_trt0_random_forest_2
)
estimate_AIPW_knn_2 <-
estimate_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_knn_2,
prob_outcome1_trt1 = prob_outcome1_trt1_knn_2,
prob_outcome1_trt0 = prob_outcome1_trt0_knn_2
)
estimate_var_AIPW_knn_2 <-
estimate_var_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_knn_2,
prob_outcome1_trt1 = prob_outcome1_trt1_knn_2,
prob_outcome1_trt0 = prob_outcome1_trt0_knn_2
)
estimate_AIPW_lasso_2 <-
estimate_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_lasso_2,
prob_outcome1_trt1 = prob_outcome1_trt1_lasso_2,
prob_outcome1_trt0 = prob_outcome1_trt0_lasso_2
)
estimate_var_AIPW_lasso_2 <-
estimate_var_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_lasso_2,
prob_outcome1_trt1 = prob_outcome1_trt1_lasso_2,
prob_outcome1_trt0 = prob_outcome1_trt0_lasso_2
)
estimate_AIPW_glm_2 <-
estimate_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_glm_2,
prob_outcome1_trt1 = prob_outcome1_trt1_glm_2,
prob_outcome1_trt0 = prob_outcome1_trt0_glm_2
)
estimate_var_AIPW_glm_2 <-
estimate_var_AIPW(
trt = colon_ncdb_2$X,
outcome = colon_ncdb_2$Y,
prob_trt1 = prob_trt1_glm_2,
prob_outcome1_trt1 = prob_outcome1_trt1_glm_2,
prob_outcome1_trt0 = prob_outcome1_trt0_glm_2
)
##########################################################
## compute IF22 (for risk difference) with Gram matrix computed from
# 1) training sample (including shrinkage versions), and
# 3) estimation sample
##########################################################
###########
## split 1
###########
HOIF_stacked_eff1_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_stacked_classifier_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_stacked_classifier_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
num_cores = 1
)
HOIF_stacked_eff0_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_stacked_classifier_1, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_stacked_classifier_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = 1
)
HOIF_stacked_eff_1 <- HOIF_stacked_eff1_1-HOIF_stacked_eff0_1
HOIF_stacked_eff_1[1,] <- HOIF_stacked_eff1_1[1,]
var_HOIF_stacked_eff_1 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_stacked_classifier_1, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_stacked_classifier_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_stacked_classifier_1, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_stacked_classifier_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
sigma2 = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_boosted_tree_eff1_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_boosted_tree_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_boosted_tree_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
num_cores = 1
)
HOIF_boosted_tree_eff0_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_boosted_tree_1, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_boosted_tree_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = 1
)
HOIF_boosted_tree_eff_1 <- HOIF_boosted_tree_eff1_1-HOIF_boosted_tree_eff0_1
HOIF_boosted_tree_eff_1[1,] <- HOIF_boosted_tree_eff1_1[1,]
var_HOIF_boosted_tree_eff_1 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_boosted_tree_1, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_boosted_tree_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_boosted_tree_1, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_boosted_tree_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
sigma2 = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_random_forest_eff1_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_random_forest_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_random_forest_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
num_cores = 1
)
HOIF_random_forest_eff0_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_random_forest_1, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_random_forest_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = 1
)
HOIF_random_forest_eff_1 <- HOIF_random_forest_eff1_1-HOIF_random_forest_eff0_1
HOIF_random_forest_eff_1[1,] <- HOIF_random_forest_eff1_1[1,]
var_HOIF_random_forest_eff_1 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_random_forest_1, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_random_forest_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_random_forest_1, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_random_forest_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
sigma2 = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_knn_eff1_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_knn_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_knn_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
num_cores = 1
)
HOIF_knn_eff0_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_knn_1, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_knn_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = 1
)
HOIF_knn_eff_1 <- HOIF_knn_eff1_1-HOIF_knn_eff0_1
HOIF_knn_eff_1[1,] <- HOIF_knn_eff1_1[1,]
var_HOIF_knn_eff_1 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_knn_1, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_knn_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_knn_1, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_knn_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
sigma2 = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_lasso_eff1_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_lasso_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_lasso_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
num_cores = 1
)
HOIF_lasso_eff0_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_lasso_1, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_lasso_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = 1
)
HOIF_lasso_eff_1 <- HOIF_lasso_eff1_1-HOIF_lasso_eff0_1
HOIF_lasso_eff_1[1,] <- HOIF_lasso_eff1_1[1,]
var_HOIF_lasso_eff_1 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_lasso_1, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_lasso_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_lasso_1, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_lasso_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
sigma2 = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_glm_eff1_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_glm_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_glm_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
num_cores = 1
)
HOIF_glm_eff0_1 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_glm_1, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_glm_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = 1
)
HOIF_glm_eff_1 <- HOIF_glm_eff1_1-HOIF_glm_eff0_1
HOIF_glm_eff_1[1,] <- HOIF_glm_eff1_1[1,]
var_HOIF_glm_eff_1 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_1$X, pred = prob_trt1_glm_1, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, pred = 1-prob_trt1_glm_1, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt1_glm_1, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_1$X, outcome = colon_ncdb_1$Y, pred = prob_outcome1_trt0_glm_1, type = 'outcome'),
basis = rep(basis_1, 3),
sigma = c(sigma_2_eff1, sigma_1_eff1[1:10]),
sigma2 = c(sigma_2_eff0, sigma_1_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
###########
## split 2
###########
HOIF_stacked_eff1_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_stacked_classifier_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_stacked_classifier_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
num_cores = 1
)
HOIF_stacked_eff0_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_stacked_classifier_2, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_stacked_classifier_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = 1
)
HOIF_stacked_eff_2 <- HOIF_stacked_eff1_2-HOIF_stacked_eff0_2
HOIF_stacked_eff_2[1,] <- HOIF_stacked_eff1_2[1,]
var_HOIF_stacked_eff_2 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_stacked_classifier_2, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_stacked_classifier_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_stacked_classifier_2, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_stacked_classifier_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
sigma2 = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_boosted_tree_eff1_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_boosted_tree_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_boosted_tree_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
num_cores = 1
)
HOIF_boosted_tree_eff0_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_boosted_tree_2, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_boosted_tree_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = 1
)
HOIF_boosted_tree_eff_2 <- HOIF_boosted_tree_eff1_2-HOIF_boosted_tree_eff0_2
HOIF_boosted_tree_eff_2[1,] <- HOIF_boosted_tree_eff1_2[1,]
var_HOIF_boosted_tree_eff_2 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_boosted_tree_2, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_boosted_tree_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_boosted_tree_2, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_boosted_tree_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
sigma2 = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_random_forest_eff1_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_random_forest_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_random_forest_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
num_cores = 1
)
HOIF_random_forest_eff0_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_random_forest_2, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_random_forest_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = 1
)
HOIF_random_forest_eff_2 <- HOIF_random_forest_eff1_2-HOIF_random_forest_eff0_2
HOIF_random_forest_eff_2[1,] <- HOIF_random_forest_eff1_2[1,]
var_HOIF_random_forest_eff_2 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_random_forest_2, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_random_forest_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_random_forest_2, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_random_forest_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
sigma2 = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_knn_eff1_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_knn_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_knn_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
num_cores = 1
)
HOIF_knn_eff0_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_knn_2, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_knn_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = 1
)
HOIF_knn_eff_2 <- HOIF_knn_eff1_2-HOIF_knn_eff0_2
HOIF_knn_eff_2[1,] <- HOIF_knn_eff1_2[1,]
var_HOIF_knn_eff_2 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_knn_2, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_knn_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_knn_2, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_knn_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
sigma2 = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_lasso_eff1_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_lasso_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_lasso_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
tr_corr = F,
num_cores = 1
)
HOIF_lasso_eff0_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_lasso_2, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_lasso_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff0, sigma_2_eff0[1:10]),
tr_corr = F,
num_cores = 1
)
HOIF_lasso_eff_2 <- HOIF_lasso_eff1_2-HOIF_lasso_eff0_2
HOIF_lasso_eff_2[1,] <- HOIF_lasso_eff1_2[1,]
var_HOIF_lasso_eff_2 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_lasso_2, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_lasso_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_lasso_2, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_lasso_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
sigma2 = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
HOIF_glm_eff1_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_glm_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_glm_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
num_cores = 1
)
HOIF_glm_eff0_2 <- compute_HOIF_sequence(
a_resid = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_glm_2, type = 'trt'),
y_resid = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_glm_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = 1
)
HOIF_glm_eff_2 <- HOIF_glm_eff1_2-HOIF_glm_eff0_2
HOIF_glm_eff_2[1,] <- HOIF_glm_eff1_2[1,]
var_HOIF_glm_eff_2 <- compute_var_HOIF_sequence(a_resid = compute_resid(trt = colon_ncdb_2$X, pred = prob_trt1_glm_2, type = 'trt'),
a_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, pred = 1-prob_trt1_glm_2, type = 'trt'),
y_resid = compute_resid(trt = colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt1_glm_2, type = 'outcome'),
y_resid2 = compute_resid(trt = 1-colon_ncdb_2$X, outcome = colon_ncdb_2$Y, pred = prob_outcome1_trt0_glm_2, type = 'outcome'),
basis = c(basis_2, basis_2, basis_2),
sigma = c(sigma_1_eff1, sigma_2_eff1[1:10]),
sigma2 = c(sigma_1_eff0, sigma_2_eff0[1:10]),
num_cores = num_cores,
M = bootstrap_M)
##########################################################
## save
##########################################################
results <- list(
estimate_AIPW_stacked_1 = estimate_AIPW_stacked_1,
estimate_var_AIPW_stacked_1 = estimate_var_AIPW_stacked_1,
estimate_AIPW_boosted_tree_1 = estimate_AIPW_boosted_tree_1,
estimate_var_AIPW_boosted_tree_1 = estimate_var_AIPW_boosted_tree_1,
estimate_AIPW_random_forest_1 = estimate_AIPW_random_forest_1,
estimate_var_AIPW_random_forest_1 = estimate_var_AIPW_random_forest_1,
estimate_AIPW_knn_1 = estimate_AIPW_knn_1,
estimate_var_AIPW_knn_1 = estimate_var_AIPW_knn_1,
estimate_AIPW_lasso_1 = estimate_AIPW_lasso_1,
estimate_var_AIPW_lasso_1 = estimate_var_AIPW_lasso_1,
estimate_AIPW_glm_1 = estimate_AIPW_glm_1,
estimate_var_AIPW_glm_1 = estimate_var_AIPW_glm_1,
estimate_AIPW_stacked_2 = estimate_AIPW_stacked_2,
estimate_var_AIPW_stacked_2 = estimate_var_AIPW_stacked_2,
estimate_AIPW_boosted_tree_2 = estimate_AIPW_boosted_tree_2,
estimate_var_AIPW_boosted_tree_2 = estimate_var_AIPW_boosted_tree_2,
estimate_AIPW_random_forest_2 = estimate_AIPW_random_forest_2,
estimate_var_AIPW_random_forest_2 = estimate_var_AIPW_random_forest_2,
estimate_AIPW_knn_2 = estimate_AIPW_knn_2,
estimate_var_AIPW_knn_2 = estimate_var_AIPW_knn_2,
estimate_AIPW_lasso_2 = estimate_AIPW_lasso_2,
estimate_var_AIPW_lasso_2 = estimate_var_AIPW_lasso_2,
estimate_AIPW_glm_2 = estimate_AIPW_glm_2,
estimate_var_AIPW_glm_2 = estimate_var_AIPW_glm_2,
HOIF_stacked_eff_1 = HOIF_stacked_eff_1,
var_HOIF_stacked_eff_1 = var_HOIF_stacked_eff_1,
HOIF_boosted_tree_eff_1 = HOIF_boosted_tree_eff_1,
var_HOIF_boosted_tree_eff_1 = var_HOIF_boosted_tree_eff_1,
HOIF_random_forest_eff_1 = HOIF_random_forest_eff_1,
var_HOIF_random_forest_eff_1 = var_HOIF_random_forest_eff_1,
HOIF_knn_eff_1 = HOIF_knn_eff_1,
var_HOIF_knn_eff_1 = var_HOIF_knn_eff_1,
HOIF_lasso_eff_1 = HOIF_lasso_eff_1,
var_HOIF_lasso_eff_1 = var_HOIF_lasso_eff_1,
HOIF_glm_eff_1 = HOIF_glm_eff_1,
var_HOIF_glm_eff_1 = var_HOIF_glm_eff_1,
HOIF_stacked_eff_2 = HOIF_stacked_eff_2,
var_HOIF_stacked_eff_2 = var_HOIF_stacked_eff_2,
HOIF_boosted_tree_eff_2 = HOIF_boosted_tree_eff_2,
var_HOIF_boosted_tree_eff_2 = var_HOIF_boosted_tree_eff_2,
HOIF_random_forest_eff_2 = HOIF_random_forest_eff_2,
var_HOIF_random_forest_eff_2 = var_HOIF_random_forest_eff_2,
HOIF_knn_eff_2 = HOIF_knn_eff_2,
var_HOIF_knn_eff_2 = var_HOIF_knn_eff_2,
HOIF_lasso_eff_2 = HOIF_lasso_eff_2,
var_HOIF_lasso_eff_2 = var_HOIF_lasso_eff_2,
HOIF_glm_eff_2 = HOIF_glm_eff_2,
var_HOIF_glm_eff_2 = var_HOIF_glm_eff_2
)
save(results, file=paste0("./output/CRC_real_data_analysis_", save_name, ".RData"))
})