-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path13_Gillespie_model_setup.R
186 lines (149 loc) · 6.43 KB
/
13_Gillespie_model_setup.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
####################################################################################################################
# Plasmid rumen network analysis
#
# Script 13: Set up Gillespie stochastic simulations code for gene dispersion in a network
#
#
# Script tested for R version 4.1.1
####################################################################################################################
####################################################################################################################
# SCRIPT SET-UP
####################################################################################################################
# Set working directory to wherever your files are located
# Load the necessary packages:
library(tidyverse)
library(igraph)
# Starting files:
# Edgelist with edgeweights created in script 03_Network_setup.R
load("net.dat2k.ew.Rda")
# Degree of physical nodes, created in script 04_Basic_network_statistics.R
load("deg.str.2k.all.phys.Rda")
# Infomap data created in script 08_Infomap_analysis_full_network.R
load("plas_mods.df.Rda")
# Statistics on infomap modules created in script 08_Infomap_analysis_full_network.R
load("plas_mods.stats.Rda")
####################################################################################################################
####################################################################################################################
# Section 1: CREATE GILLESPIE FUNCTION
####################################################################################################################
# Gillespie function:
gillespie <- function(boolean, # Your state nodes with or without gene
sim.matrix, # Matrix defining the flow
pars, # Parameters of the model
time_v){ #And a vector of times when things happen
# Rates
p <- pars$success_probability # For gene transfer
cont <- pars$contact_rate # Rate at which plasmids contact (NOTE that I see this as a total rate, not a per capita one)
loss <- pars$loss_rate # Gene loss per capita rate (NOTE that this changes when we gain or loose genes)
# States: note that the state of the system is encoded in the boolean but it
# will be handy to track the number of genes in the (meta)population.
G <- sum(boolean)
n_plasmids <- length(boolean) # Needed for later
# Propensities
r1 <- function(){cont} # Two plasmids collide
r2 <- function(){loss * G} # Losing a gene
# Vector of propensities
rates <- c(r1(), r2())
# Names of events
events <- c("f1", "f2")
# Events: each one actualizes the corresponding states and propensities.
f1 <- function(){ #Two plasmids collide
x1 <- sample.int(n_plasmids, 1)
x2 <- sample.int(n_plasmids, 1)
if(boolean[x1] != boolean[x2]){
gain <- runif(1) < (p * sim.matrix[x1, x2])
if (gain) boolean[x1] <<- T; boolean[x2] <<- T; G <<- G + 1; rates[2] <<- r2()
}
}
f2 <- function(){ #A gene is loss at random
index <- which(boolean)
lost <- sample.int(G, 1)
boolean[index[lost]] <<- F
G <<- G - 1
rates[2] <<- r2()
rm(index); rm(lost)
}
# Initialize variables
t <- time_v[1]
out <- matrix(nrow = n_plasmids, ncol = length(time_v))
out[, 1] <- boolean
# Note: a little fraction of time is out of sync. Correct when everything else is settled.
# Size of error depends on contact rate (higher contact rate = lower error)
# Start simulations
for(i in 2:length(time_v)){
while (t < time_v[i]){
# Sampling
dt <- stats::rexp(1, sum(rates))
z <- sample.int(length(rates), 1, prob = rates)
do.call(events[z], args = list())
t <- t + dt
}
out[, i] <- boolean #saving the states
}
out
}
####################################################################################################################
####################################################################################################################
# Section 2: CREATE A MATRIX FROM THE EXTENDED EDGELIST FOR THE GILLESPIE MODEL
####################################################################################################################
# Create state-node labels (layer-plasmid names combination)
pre.mat1 <- net.dat2k.ew %>%
rowwise() %>%
mutate(fr_node = paste("N",node_from,sep = "")) %>%
mutate(fr_lay = paste("L",layer_from,sep = "")) %>%
mutate(fr_grp = paste(fr_node, fr_lay, sep = "_")) %>%
mutate(to_node = paste("N",node_to,sep = "")) %>%
mutate(to_lay = paste("L",layer_to,sep = "")) %>%
mutate(to_grp = paste(to_node, to_lay, sep = "_"))
# Select relevant columns for creating the matrix
pre.mat2 <- pre.mat1 %>%
ungroup() %>%
select(fr_grp, to_grp, weight)
# Rename columns
pre.st.fr <- pre.mat1 %>%
ungroup() %>%
select(layer_from, node_from, fr_grp) %>%
distinct() %>%
rename(layer_id=layer_from,
node_id=node_from,
st.node.id=fr_grp)
# Create a graph from the dataframe
graph1 <- graph_from_data_frame(pre.mat2, directed=F)
# Create an adjacency matrix from the graph
mat <- get.adjacency(graph1, sparse = FALSE, attr='weight', type="both")
# Save the matrix:
save(mat, file="mat.Rda")
# Create a list of state nodes and bind them to their metadata (characteristics of plasmids and their modules)
st.node.list <- pre.mat1 %>%
ungroup() %>%
select(layer_to, node_to, to_grp) %>%
distinct() %>%
rename(layer_id=layer_to,
node_id=node_to,
st.node.id=to_grp) %>%
bind_rows(., pre.st.fr) %>%
left_join(., deg.str.2k.all.phys, by="node_id") %>%
select(-strength, -edge_type) %>%
left_join(., plas_mods.df) %>%
left_join(., plas_mods.stats)
# Create variable for the matrix order of each state node
st.node.order <- as.data.frame(mat) %>%
rownames_to_column(., var="st.node.id") %>%
select(st.node.id) %>%
# Give a position variable
rownames_to_column(., var="mat.order") %>%
mutate(mat.order=as.numeric(mat.order))
# Bind metadata to the ordered state node list with matrix order variable
st.node.list.ord <- st.node.list %>%
left_join(., st.node.order) %>%
mutate(layer_id=as.factor(layer_id)) %>%
arrange(mat.order) %>%
distinct()
# Save the data
save(st.node.list.ord, file="st.node.list.ord.Rda")
# Create a list of the matrix order variable and cow layer, e.g. identify
# which cow each matrix position corresponds to
mat.ids <- st.node.list.ord %>%
select(mat.order, layer_id)
# Save the data
save(mat.ids, file="mat.ids.Rda")