-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnalysis.R
288 lines (238 loc) · 10.2 KB
/
Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
###################################################################
# Identifying drugs that were dispensed but not prescribed
###################################################################
setwd("/Volumes/SLS Mock Data")
rm(list=ls()) # remove all variables from workspace
library(readr)
library(dplyr)
library(lubridate)
library(stringr)
library(MASS)
library(survival)
library(survminer)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(cowplot)
load("/Volumes/SLS Mock Data/results_version_final_internal_x.RData")
###################################################################
# Methods
###################################################################
# how many rows of data there are in the prescribing records
length_PRESCR1F
# how many unique people
length_PRESCR1F_people
# Date range
PRESC_date_min
PRESC_date_max
# The same for the dispensing data
length_DISPENSF
length_DISPENSF_people
DISP_date_min
DISP_date_max
# Unique number of drug descriptions
length_unique_CMDRGSYN
###################################################################
# Data Cleaning
###################################################################
# Unique number of drug descriptions
length_unique_CMDRGSYN
# Check OXIS
check_oxis
# length of unique asthma related drug descriptions
length_unique_CMDRGSYN_asthma
length_unique_CMDRGSYN_asthma*100/length_unique_CMDRGSYN
# How many were dropped
length_unique_CMDRGSYN_asthma_drop
length_unique_CMDRGSYN_asthma_drop*100/length_unique_CMDRGSYN_asthma
# Drop records based on keywords
keywords<-c("NASAL","NOSE","NOSTRIL","NASULE","HAYFEVER",
"EYE","EAR","DROP","TONGUE",
"FOAM","ENEMA","RECTAL",
"GASTRO","MODIFIED",
"CREAM", "APPLY","SKIN","ULCER","OINTMENT","PATCH",
"CAPSULE", "SACHET", "SPRAY",
"AZELASTINE","NASONEX","FLIXONASE","ANORA ELLIPTA",
"SUMATRIPTAN","AVAMYS","RHINOCORT","NASOBEC","NASOFAN")
for (keyword in keywords) {
print(keyword)
print(get(paste(keyword,"freq",sep="_")))
}
# check nothing excluded I should have kept
exclusions
# retained records
length_PRESCR1F_asthma
length_DISPENSF_asthma
# Duplicate removal
length_PRESCR1F_asthma_drop
length_PRESCR1F_asthma_final
length_PRESCR1F_asthma_final*100/length_PRESCR1F_asthma
length_DISPENSF_asthma_drop
length_DISPENSF_asthma_final
length_DISPENSF_asthma_final*100/length_DISPENSF_asthma
###################################################################
# Matching
###################################################################
candidate_links
# dropped by dates
dates_dropped
dates_dropped*100/candidate_links
dates_remaining
# Distribution of our weights for all candidate links, before weight-based exclusion
table(candidate_weight_a, useNA = "ifany")*100/dates_remaining
table(candidate_weight_b, useNA = "ifany")*100/dates_remaining
table(candidate_weight_c, useNA = "ifany")*100/dates_remaining
table(candidate_weight_d, useNA = "ifany")*100/dates_remaining
table(candidate_weight_disp, useNA = "ifany")*100/dates_remaining
hist(candidate_weight_disp)
# how many were dropped by weight
weight_dropped
weight_dropped*100/dates_remaining
candidate_links_retained
candidate_links_retained*100/candidate_links
# no_candidates_disp
# no_candidates_disp*100/length_DISPENSF_asthma_final
#
# no_candidates_presc
# no_candidates_presc*100/length_PRESCR1F_asthma_final
p1ID_unique_merge_check
DID_unique_merge_check
matches
matches*100/length_PRESCR1F_asthma_final
matches*100/length_DISPENSF_asthma_final
# weight of matched records
table(match_weight_a_m, useNA = "ifany")*100/matches
table(match_weight_b_m, useNA = "ifany")*100/matches
table(match_weight_c_m, useNA = "ifany")*100/matches
table(match_weight_d_m, useNA = "ifany")*100/matches
table(match_weight_disp, useNA = "ifany")*100/matches
hist(match_weight_disp)
###################################################################
# Sensitivity analysis
###################################################################
# how many more were dropped by weight
candidate_links_retained_sens
candidate_links_retained_sens*100/candidate_links_retained
matches_sens
matches_sens*100/matches
t.test(match_weight_disp,matches_weights_sens)
###################################################################
# Quality Assurance - unmatched records
###################################################################
sum(results$match=="Presc not Disp")
sum(results$match=="Presc not Disp")*100/length_PRESCR1F_asthma_final
sum(results$match=="Disp not Presc")
sum(results$match=="Disp not Presc")*100/length_DISPENSF_asthma_final
# Quality assurance - sensitivity analysis
sum(results_sens$match=="Presc not Disp")*100/length_PRESCR1F_asthma_final
sum(results_sens$match=="Disp not Presc")*100/length_DISPENSF_asthma_final
sum(is.na(results$dose_strength))
round(prop.table(table(results$match,is.na(results$dose_strength)),margin=1),2)*100
round(prop.table(table(results$match,is.na(results$QUANTITY_primary)),margin=1),2)*100
round(prop.table(table(results$match,is.na(results$QUANTITY_primary) & is.na(results$dose_strength)),margin=1),2)*100
counts<-results %>%
group_by(random_ID,match) %>%
count() %>%
spread(match, n)
counts$DNP<-ifelse(is.na(counts$`Disp not Presc`),0,counts$`Disp not Presc`)
counts$PND<-ifelse(is.na(counts$`Presc not Disp`),0,counts$`Presc not Disp`)
summary(lm(DNP ~ PND, data=counts))
###################################################################
# Unclaimed medications
###################################################################
# what percentage went unclaimed?
perc_unclaimed
unclaimed_CMDRGSYN
# per-person claimed
summary(1-person_unclaimed$unclaimed_total/person_unclaimed$presc_total)
sum(person_unclaimed$unclaimed_total/person_unclaimed$presc_total>0.5)*100/nrow(person_unclaimed)
# ICS and LABA together or sep?
claiming_ICS_LABA
sum(claiming_ICS_LABA$`0`+claiming_ICS_LABA$`1`)
prop.test(claiming_ICS_LABA$`1`,claiming_ICS_LABA$`0`+claiming_ICS_LABA$`1`)
# Summary Stats
sum(!is.na(initiation))
summary(initiation)
summary(c(initiation[which(!is.na(initiation))],rep(178,23391))) # 178 days is 6-months
sum(initiation>7,na.rm=T)*100/length(initiation)
sum(initiation>14,na.rm=T)*100/length(initiation)
sum(initiation>21,na.rm=T)*100/length(initiation)
sum(initiation>30,na.rm=T)*100/length(initiation)
# survival for time to claiming
ggsurvplot(survfit(Surv(initiation_days_x,event==1) ~ 1, data=plot),
data = plot, ggtheme = theme_bw(),
legend = "none", axes.offset=F, break.time.by=1, ylim=c(0,1),
xlim=c(0,21),xlab="Days since prescription issued",
ylab="Probability prescription \nremains unclaimed",
font.x=16, font.y=16, font.tickslab=14, size=2)
round(prop.table(table(plot$initiation_days_x==21,plot$event),margin=2)*100,2)
cox1
round(cox1$coefficients,3)
###################################################################
# Appendix D Figures
###################################################################
# table(candidate_weight_a, useNA = "ifany")*100/dates_remaining
# table(candidate_weight_b, useNA = "ifany")*100/dates_remaining
# table(candidate_weight_c, useNA = "ifany")*100/dates_remaining
# table(candidate_weight_d, useNA = "ifany")*100/dates_remaining
#
# table(match_weight_a_m, useNA = "ifany")*100/matches
# table(match_weight_b_m, useNA = "ifany")*100/matches
# table(match_weight_c_m, useNA = "ifany")*100/matches
# table(match_weight_d_m, useNA = "ifany")*100/matches
df_1<-data.frame(points=rep(c("0","10","20"),2),
values=c(6.3,0,93.7,2.8,0,97.2),
matches=c(rep("Candidates",3),rep("Matches",3)))
df_2<-data.frame(points=rep(c("0","10","35"),2),
values=c(4.8,18.1,77.2,0,9,91),
matches=c(rep("Candidates",3),rep("Matches",3)))
df_3<-data.frame(points=rep(c("0","10","15","35"),2),
values=c(4.2,9.8,4.9,81.1,0,0,1.5,98.5),
matches=c(rep("Candidates",4),rep("Matches",4)))
df_4<-data.frame(points=rep(c("0","10"),2),
values=c(67.2,32.8,1.3,98.7),
matches=c(rep("Candidates",2),rep("Matches",2)))
plot_1<-ggplot(data=df_1, aes(x=points, y=values, fill=matches)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal() + scale_fill_manual(values=c('#999999','#E69F00')) +
labs(x="Points", y="Percentage", title="Brand Name") +
theme(legend.position = "none", text = element_text(size=20))
plot_2<-ggplot(data=df_2, aes(x=points, y=values, fill=matches)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal() + scale_fill_manual(values=c('#999999','#E69F00')) +
labs(x="Points", y=" ", title="Dose Strength") +
theme(legend.position = "none", text = element_text(size=20)) +ylim(0,100)
plot_3<-ggplot(data=df_3, aes(x=points, y=values, fill=matches)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal() + scale_fill_manual(values=c('#999999','#E69F00')) +
labs(x="Points", y="Percentage", title="Quantity") +
theme(legend.position = "none", text = element_text(size=20))
plot_4<-ggplot(data=df_4, aes(x=points, y=values, fill=matches)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal() + scale_fill_manual(values=c('#999999','#E69F00')) +
labs(fill=" ",x="Points", y=" ", title="Dates") +
theme(legend.position = "bottom", text = element_text(size=20))
legend<-get_legend(plot_4)
plot_4<-plot_4+theme(legend.position = "none")
blankPlot <- ggplot()+geom_blank(aes(1,1)) + cowplot::theme_nothing()
grid.arrange(legend,blankPlot,plot_1,plot_2,plot_3,plot_4,
nrow=3, ncol=2 ,
widths = c(2.7, 2.7), heights = c(0.3,2.5,2.5))
###################################################################
# Benchmarking Analysis
###################################################################
rm(list=ls()) # remove all variables from workspace
load("/Volumes/SLS Mock Data/results_version_final_benchmarking_internal_27Aug2019.RData")
prescriptions
dispensings
first_benchmark
first_benchmark*100/prescriptions
first_benchmark*100/dispensings
second_benchmark
second_benchmark*100/prescriptions
second_benchmark*100/dispensings
prescriptions_cc
dispensings_cc
third_benchmark
third_benchmark*100/prescriptions_cc
third_benchmark*100/dispensings_cc