-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path12_Plasmid_traits.R
367 lines (300 loc) · 13.9 KB
/
12_Plasmid_traits.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
####################################################################################################################
# Plasmid rumen network analysis
#
# Script 12: Effect of plasmid traits (mobility and antimicrobial resistance) on network structure
#
#
#
# Script tested for R version 4.1.1
####################################################################################################################
####################################################################################################################
# SCRIPT SET-UP
####################################################################################################################
# Set working directory to wherever your files are located
# Load the necessary packages:
library(tidyverse)
# Starting files:
# File linking plasmid node ID's to original names, created in script 03_Network_setup.R
load("plas.2k.name.node.id.Rda")
# Degree of physical nodes, created in script 04_Basic_network_statistics.R
load("deg.str.2k.all.phys.Rda")
# Cow per plasmid created in script 04_Basic_network_statistics.R
load("cows.per.plasmid.Rda")
# Infomap data created in script 08_Infomap_analysis_full_network.R
load("plas_mods.df.Rda")
# Subnetwork data created in script 11_Subnetwork_analysis.R
load("hgt.netdat.et.Rda")
load("distdisp.netdat.et.Rda")
load("recdisp.netdat.et.Rda")
####################################################################################################################
####################################################################################################################
# Section 1: FORMATTING THE STARTING FILES
####################################################################################################################
# Read the file with the annotations:
ann.dat1 <- read.csv("nr_annotations_min1000.csv", header=TRUE,na.strings=c("","NA"))
# Minor formatting of the file:
ann.dat2 <- ann.dat1 %>%
# Get rid of prefix before node name
separate(gene, c("prefix", "gene"), "gene_\\d+_\\d+_\\d+_") %>%
select(-prefix) %>%
dplyr::rename(plasmid_name = gene)
# Filter to keep only data from the plasmids included in our analysis (1344 plasmids)
ann.dat3 <- plas.2k.name.node.id %>%
left_join(., ann.dat2, by="plasmid_name") %>%
select(node_id, plasmid_length, final) %>%
distinct() %>%
mutate(presence = ifelse(final==0, 0, 1))
####################################################################################################################
####################################################################################################################
# Section 2: FORMATTING THE STARTING FILES FOR MOB GENE ANALYSIS
####################################################################################################################
# Identify plasmids with mob-related genes
mob.types1 <- ann.dat3 %>%
filter(grepl("mob|PRE|recombinase|relaxase", final)) %>%
pivot_wider(names_from = final, values_from = presence, values_fill=0)
# Plasmids without mob-related genes:
no.mob <- ann.dat3 %>%
filter(!(node_id %in% mob.types1$node_id)) %>%
select(node_id, plasmid_length) %>%
distinct() %>%
# Create mob variables to correspond with mob data:
mutate(PRE = 0,
mob = 0,
recombinase = 0,
relaxase =0)
# Label plasmids as having mob genes or not (presence / absence)
# Use both binary and categorical variable (useful for plotting)
mob.dat.an <- mob.types1 %>%
bind_rows(., no.mob) %>%
# Make category for mob and conj:
mutate(mob.pres = ifelse(mob > 0, "Present","Absent"))
####################################################################################################################
####################################################################################################################
# Section 3: JOIN MOB DATA TO RELEVANT DATA SETS
####################################################################################################################
# Join mob data to relevant data sets:
# Degree data:
mob.deg.df <- deg.str.2k.all.phys %>%
select(node_id, degree, strength) %>%
ungroup() %>%
full_join(., mob.dat.an, by="node_id")
# Plasmid-cow distribution data:
cows.per.plasmid.mob <- cows.per.plasmid %>%
left_join(., mob.dat.an)
# Infomap data, plasmid-level:
plas_flow.mob <- plas_mods.df %>%
select(node_id, flow) %>%
distinct() %>%
left_join(., mob.dat.an)
# Infomap data, module-level:
plas_mods.mob <- plas_mods.df %>%
left_join(., mob.dat.an, by="node_id") %>%
group_by(module) %>%
summarise(n.state.nodes=n(),
n.phys.nodes = n_distinct(node_id),
n.layers = n_distinct(layer_id),
mean.size=mean(plasmid_length),
mob.sum = sum(mob),
mod.flow=sum(flow)) %>%
mutate(mob.pres.cat = ifelse(mob.sum > 0, "Present", "Absent"),
mob.pres.num = ifelse(mob.sum > 0, 1,0),
prop.mob = mob.sum/n.state.nodes)
####################################################################################################################
####################################################################################################################
# Section 4: COMPARE EFFECT OF MOB GENES ON PLASMID-LEVEL MEASURES
####################################################################################################################
# Test for effect of mob genes on degree centrality:
# Wilcoxon on degree:
wilcox.test(mob.deg.df$degree ~ mob.deg.df$mob, paired=F)
# Compare the median degree for plasmids with and without mob genes
mob.deg.df%>%
group_by(mob)%>%
summarise(Mean = mean(degree),Median=median(degree))
# Test for effect of mob genes on the number of cows a plasmid is found in
wilcox.test(cows.per.plasmid.mob$num.cows ~ cows.per.plasmid.mob$mob.pres)
# Not significant
# Compare the median number of plasmids with/without mob genes cows are found in
cows.per.plasmid.mob%>%
group_by(mob.pres)%>%
summarise(Median=median(num.cows))
# Test for effect of mob genes on flow per plasmid:
wilcox.test(plas_flow.mob$flow ~ plas_flow.mob$mob, paired=F)
# Compare median flow for plasmids with and without mob genes
plas_flow.mob%>%
group_by(mob)%>%
summarise(Median=median(flow))
####################################################################################################################
####################################################################################################################
# Section 5: COMPARE EFFECT OF MOB GENES ON MODULE-LEVEL MEASURES
####################################################################################################################
# Test if mob presence influences module size and flow
#State nodes
wilcox.test(plas_mods.mob$n.state.nodes ~ plas_mods.mob$mob.pres.cat,paired=F)
# Compare median state nodes for modules with and without mob genes
plas_mods.mob%>%
group_by(mob.pres.cat)%>%
summarise(Median=median(n.state.nodes))
# Physical nodes
wilcox.test(plas_mods.mob$n.phys.nodes ~ plas_mods.mob$mob.pres.cat, paired=F)
# Compare median physical nodes for modules with and without mob genes
plas_mods.mob%>%
group_by(mob.pres.cat)%>%
summarise(Median=median(n.phys.nodes))
# Layers
wilcox.test(plas_mods.mob$n.layers ~ plas_mods.mob$mob.pres.cat)
# Compare layers for modules with and without mob genes
plas_mods.mob%>%
group_by(mob.pres.cat)%>%
summarise(Median=median(n.layers), Mean=mean(n.layers))
# Flow:
wilcox.test(plas_mods.mob$mod.flow ~ plas_mods.mob$mob.pres.cat)
# Compare median flow for modules with and without mob genes
plas_mods.mob%>%
group_by(mob.pres.cat)%>%
summarise(Median=median(mod.flow))
# Median values for each characteristic:
median.mods <- plas_mods.mob%>%
group_by(mob.pres.cat)%>%
summarise(median.st=median(n.state.nodes),
median.phys=median(n.phys.nodes),
median.layers=median(n.layers),
median.flow=median(mod.flow))
####################################################################################################################
####################################################################################################################
# Section 6: DATA FORMATTING FOR AMR GENES
####################################################################################################################
# Identify plasmids with AMR-related genes
amr.types1 <- ann.dat3 %>%
filter(grepl("penicillin-binding protein|beta-lactamase|tetracycline resistance|tetracycline resistance protein", final)) %>%
pivot_wider(names_from = final, values_from = presence, values_fill=0) %>%
mutate(tet.res = ifelse(`tetracycline resistance` > 0 | `tetracycline resistance protein` > 0, 1, 0),
# Note confirm that "beta-lactamase" are in columns 3 and 4
beta.lact = ifelse(.[[3]] > 0 | .[[4]] > 0, 1, 0),
res="Present") %>%
dplyr::rename(pen.bind = `penicillin-binding protein`) %>%
select(-starts_with("tetracycline"), -starts_with("beta-"))
# Plasmids without AMR-related genes:
no.amr <- ann.dat3 %>%
filter(!(node_id %in% amr.types1$node_id)) %>%
select(node_id, plasmid_length) %>%
distinct() %>%
# Create mob variables to correspond with mob data:
mutate(pen.bind = 0,
tet.res = 0,
beta.lact = 0,
res ="Absent")
# Bind data with and without mob genes:
amr.dat.an <- amr.types1 %>%
bind_rows(., no.amr)
####################################################################################################################
####################################################################################################################
# Section 7: IDENTIFY COWS AND MODULES CONTAINTING AMR PLASMIDS
####################################################################################################################
# Join AMR data to module data in order to check which cows and modules AMR plasmids are in
plas_mods.amr <- plas_mods.df %>%
left_join(., amr.dat.an, by="node_id")
# Modules with any resistance:
# Manual check to identify cows and modules (since there are only 22 state nodes with
# any of the three resistance types)
plas_mods.amr2 <- plas_mods.amr %>%
filter(res=="Present")
####################################################################################################################
####################################################################################################################
# Section 8: AMR PLASMID SUBNETWORKS
####################################################################################################################
# Beta lactam resistant plasmids:
beta.list <- c(108, 289, 3952, 4543, 5983, 6702, 7146, 7573)
# Recent dispersal, plasmids from
beta.recdisp.fr <- amr.types1 %>%
filter(beta.lact==1) %>%
left_join(., recdisp.netdat.et, by = c("node_id"="node_from"), keep=TRUE)
# Recent dispersal, plasmids to
beta.recdisp.to <- amr.types1 %>%
filter(beta.lact==1) %>%
left_join(., recdisp.netdat.et, by = c("node_id"="node_to"), keep = TRUE)
# Join together
beta.recdisp <- beta.recdisp.fr %>%
bind_rows(beta.recdisp.to) %>%
distinct() %>%
filter(node_from %in% beta.list & node_to %in% beta.list) %>%
distinct() %>%
select(node_from, node_to, weight, everything())
# Distant dispersal, plasmids from
beta.distdisp.fr <- amr.types1 %>%
filter(beta.lact==1) %>%
left_join(., distdisp.netdat.et, by = c("node_id"="node_from"), keep=TRUE)
# Distant dispersal, plasmids to
beta.distdisp.to <- amr.types1 %>%
filter(beta.lact==1) %>%
left_join(., distdisp.netdat.et, by = c("node_id"="node_to"), keep = TRUE)
# Join together
beta.distdisp <- beta.distdisp.fr %>%
bind_rows(beta.distdisp.to) %>%
distinct() %>%
filter(node_from %in% beta.list & node_to %in% beta.list) %>%
distinct() %>%
select(node_from, node_to, weight, everything())
# HGT, plasmids from
beta.hgt.fr <- amr.types1 %>%
filter(beta.lact==1) %>%
left_join(., hgt.netdat.et, by = c("node_id"="node_from"), keep=TRUE)
# HGT, plasmids to
beta.hgt.to <- amr.types1 %>%
filter(beta.lact==1) %>%
left_join(., hgt.netdat.et, by = c("node_id"="node_to"), keep = TRUE)
# Join together
beta.hgt <- beta.hgt.fr %>%
bind_rows(beta.hgt.to) %>%
distinct() %>%
filter(node_from %in% beta.list & node_to %in% beta.list) %>%
distinct() %>%
select(node_from, node_to, weight, everything())
# Repeat for tetracycline resistance
# Tetracycline resistant plasmids
tet.list <- c(1293, 3066, 3455)
# Recent dispersal, plasmids from
tet.recdisp.fr <- amr.types1 %>%
filter(tet.res==1) %>%
left_join(., recdisp.netdat.et, by = c("node_id"="node_from"), keep=TRUE)
# Recent dispersal, plasmids to
tet.recdisp.to <- amr.types1 %>%
filter(tet.res==1) %>%
left_join(., recdisp.netdat.et, by = c("node_id"="node_to"), keep = TRUE)
# Join together
tet.recdisp <- tet.recdisp.fr %>%
bind_rows(tet.recdisp.to) %>%
distinct() %>%
filter(node_from %in% tet.list & node_to %in% tet.list) %>%
distinct() %>%
select(node_from, node_to, weight, everything())
# Distant dispersal, plasmids from
tet.distdisp.fr <- amr.types1 %>%
filter(tet.res==1) %>%
left_join(., distdisp.netdat.et, by = c("node_id"="node_from"), keep=TRUE)
# Distant dispersal, plasmids to
tet.distdisp.to <- amr.types1 %>%
filter(tet.res==1) %>%
left_join(., distdisp.netdat.et, by = c("node_id"="node_to"), keep = TRUE)
# Join together
tet.distdisp <- tet.distdisp.fr %>%
bind_rows(tet.distdisp.to) %>%
distinct() %>%
filter(node_from %in% tet.list & node_to %in% tet.list) %>%
distinct() %>%
select(node_from, node_to, weight, everything())
# HGT, plasmids from
tet.hgt.fr <- amr.types1 %>%
filter(tet.res==1) %>%
left_join(., hgt.netdat.et, by = c("node_id"="node_from"), keep=TRUE)
# HGT plasmids to
tet.hgt.to <- amr.types1 %>%
filter(tet.res==1) %>%
left_join(., hgt.netdat.et, by = c("node_id"="node_to"), keep = TRUE)
# Join together
tet.hgt <- tet.hgt.fr %>%
bind_rows(tet.hgt.to) %>%
distinct() %>%
filter(node_from %in% tet.list & node_to %in% tet.list) %>%
distinct() %>%
select(node_from, node_to, weight, everything())
####################################################################################################################