-
Notifications
You must be signed in to change notification settings - Fork 0
/
ananse_graph_analysis.rmd
750 lines (600 loc) · 23.2 KB
/
ananse_graph_analysis.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
---
title: "Ptychodera Cisreg Development: Cis-regulatory network Graph Analysis"
author: "Alberto Perez-Posada"
date: "4/01/2023"
output:
html_document: default
pdf_document: default
editor_options:
markdown:
wrap: sentence
---
```{r setup, include=FALSE}
dir <- '/home/ska/aperpos/projects/ptychodera_cisreg_development/'
fcha <- function(){ gsub("-","",Sys.Date()) }
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = dir)
options(scipen=999)
Sys.setenv(VROOM_CONNECTION_SIZE=5000072)
```
## About
In this markdown, we will analyse the networks of TF/target gene cis-regulatory interactinos generated using ANANSE.
We will load these networks as tabular data that we will transform into `igraph` graph objects, that we can parse and annotate using our information on TF classes and other similar functional annotation that we have carried out over the course of this project.
## Load libraries
```{r libraries, message=FALSE, warning=FALSE}
library(DESeq2)
library(limma)
library(ComplexHeatmap)
library(circlize)
library(ggplot2)
library(igraph)
```
## Load functions
```{r load_functions, message = FALSE}
source("code/r_code/r_functions/sourcefolder.R")
sourceFolder(
"code/r_code/r_functions",
recursive = TRUE
)
sourceFolder(
"code/r_code/r_general",
recursive = TRUE
)
```
## Load previous Data
We will need some of the information we have generated in our previous analyses such as stage-specific cluster information and Transcription factor annotation.
```{r}
# RNA clusters
load("outputs/rda/stage_specific_clusters.rda")
# Transcription Factor information
load("outputs/rda/TF_annotation.rda")
```
## Load and prepare information for graph analysis
Here we will load all the remaining information of our interest and we will transform it into a format compliant to some of the functions we have made to annotate the graphs; basically, tabular data with gene id <--> annotation category.
```{r}
# Transcription factors
allTFclasses_col <- c(topclasses_col,otherclasses_col)
pfla_tfs_graph_analysis <-
merge(
pfla_tfs,
data.frame(
TFclass =
c(names(topclasses_col),names(otherclasses_col)),
col =
c(topclasses_col,otherclasses_col)
),
by.x = 2,
by.y = 1
)[,c(2,1,3)]
colnames(pfla_tfs_graph_analysis) <- c("id","TFclass","col")
# Effector genes
pfla_tfeg <- data.frame(
id = allgenes,
TFEG = ifelse(allgenes %in% pfla_tfs$id, "TF", "EG")
)
# Functional categories (COG)
pfla_funcat <- read.table(
"/home/ska/aperpos/Def_Pfla/outputs/eggnog/20221014_ptyFlav3_CYi_longest.cds.fa.transdecoder.pep_eggnog_meNOG_all.emapper.annotations.COG.tsv",
col.names = c("id","funcat")
)
# Trans-developmental / housekeeping
pfla_td_nohk <-
unique(
read.table(
"/home/ska/aperpos/Def_Pfla/outputs/identif_tf_transdev/transdev/20210507_transdev_expanded_exclusive_nonhk.tsv",
header=F,
stringsAsFactors = F)[,1]
)
pfla_hk_notd <-
unique(
read.table(
"/home/ska/aperpos/Def_Pfla/outputs/identif_housekeeping_genes/20210507_housekeep_custom_exclusive_nontd.tsv",
header=F,
stringsAsFactors=F)[,1]
)
pfla_tdhk <- data.frame(
id = allgenes,
TDHK =
ifelse(allgenes %in% pfla_td_nohk, "td",
ifelse(allgenes %in% pfla_hk_notd, "hk",
"none")
)
)
```
Finally, we add them into a list that our function for annotation will iterate through. We will call this the **list of attributes**
```{r}
pfla_attributes_list <-
list(
pfla_tfeg,
pfla_tfs_graph_analysis,
pfla_funcat,
pfla_tdhk
)
```
We will also load the GO annotation of Ptychodera to use it later on in the functions.
```{r}
#gene universe
gene_universe <- allgenes
# gene-GO mappings
pfla_id_GO <-
readMappings(
"outputs/functional_annotation/go_blast2go/GO_annotation.txt"
)
```
Likewise, we will also load the EggNOG annotation of gene names for more clarity in the plots of the graphs.
```{r}
pfla_genenames <-
read.delim2(
file = "/home/ska/aperpos/Def_Pfla/outputs/eggnog/venog_all/ptyFlav3_CYi_longest.cds.fa.transdecoder.pep_eggnog_veNOG_all.emapper.annotations",
skip = 3,
header = TRUE
)[,c(1,5,13)]
pfla_genenames <- pfla_genenames[pfla_genenames$predicted_gene_name != "",]
```
## Load ANANSE outputs
Here we will use the function `LoadNetworkData` that reads the .network file replacing instances of em dash and other ANANSE-related quirks. The resulting dataframe has three columns: "tf" (transcription factor), "tg" (target gene), and "prob" (probability). We manually add back underscores to the gene ids because ANANSE required the gene ids in a different format.
We do this for both the Early Blastula (EB) and Late Gastrula (LG) networks.
```{r, warningss = FALSE, message = FALSE}
# EARLY BLASTULA
pfla_EB_nw <- LoadNetworkData("/home/ska/aperpos/Def_Pfla/outputs/ananse/20210901_EB.network")
pfla_EB_nw$tf <- gsub("TCONS", "TCONS_", pfla_EB_nw$tf)
pfla_EB_nw$tg <- gsub("TCONS", "TCONS_", pfla_EB_nw$tg)
# LATE GASTRULA
pfla_LG_nw <- LoadNetworkData("/home/ska/aperpos/Def_Pfla/outputs/ananse/20210908_LG.network")
pfla_LG_nw$tf <- gsub("TCONS", "TCONS_", pfla_LG_nw$tf)
pfla_LG_nw$tg <- gsub("TCONS", "TCONS_", pfla_LG_nw$tg)
```
Here it is what it looks like.
```{r}
head(pfla_EB_nw)
```
## Early Blastula
The first thing we will do is filter the network and keep only those interactions whose values are in the top 5% percentile. We will use this later on to quantify and measure the In- and Out-Degree of each gene, among other things.
```{r}
pfla_EB_nw2 <- FilterNetwork(pfla_EB_nw,q=0.95)
```
We generate an `igraph` object using our custom wrapper `GenerateNetwork` that also filter by the same number of interactions, since we provide `q = 0.95`.
```{r}
pfla_EB_graph <- GenerateNetwork(pfla_EB_nw,q = 0.95)
```
Using our list of attributes, we add this information to all the pertinent nodes (== genes) of our network using our ParseNetwork function.
```{r}
pfla_EB_parsenetwork <- ParseNetwork(pfla_EB_graph, pfla_attributes_list)
```
The resulting object can be subsetted to extract the new network with attributes on one object and the data frame of attributes on the other.
```{r}
pfla_EB_graph2 <- pfla_EB_parsenetwork[[1]]
pfla_EB_df_attr <- pfla_EB_parsenetwork[[2]]
```
The data frame of attributes is a data.frame object sorted in the same order as nodes are indexed in the igraph object. The content of this data frame is, for every node that has an attribute, a row with the id of this node, the index of this node, and a bunch of attributes (color, whether it is a trans-developmental gene, TF class, functional category, etc.)
```{r}
head(pfla_EB_df_attr)
```
Finally, we use our wrapper `NetworkStats`.
This wrapper performs a number of calculations using our igraph object and the tabular data frame, including also the gene ontology of target genes, and also reports the top central genes, the centrality of different kinds of genes by attribute (e.g. centrality of genes by TF class).
The inputs of the function are:
- nw: the network represented as a tabular factor/target/prob
- graph: the igraph object representing the network
- att: a data frame containing information about the nodes of the network (e.g., the gene class, the gene function category, etc.)
- N: the number of top nodes to select
- C: a parameter to select the connected component of the graph to analyze
- gene_universe: the universe of genes to consider in the gene ontology (GO) analysis
- id2go: a mapping between gene IDs and GO IDs
The function outputs a list containing various statistics and metrics of the network, such as the number of genes, the number of active TFs, the top emitters and receivers of the network, the number of self-regulated TFs, the size of the connected components of the graph, the centrality of the TFs and the functional categories, and the gene ontology information.
```{r}
pfla_EB_stats <- NetworkStats(pfla_EB_nw2, pfla_EB_graph2, pfla_EB_df_attr, N = 10, gene_universe = gene_universe, id2go = pfla_id_GO)
```
## Late Gastrula
Next we do the same for the Late Gastrula Network. We start by filtering the Network:
```{r}
pfla_LG_nw2 <- FilterNetwork(pfla_LG_nw,q=0.95)
```
Then we also generate an `igraph` network using the same percentile.
```{r}
pfla_LG_graph <- GenerateNetwork(pfla_LG_nw,q = 0.95)
```
We parse this graph using our list of attributes
```{r}
pfla_LG_parsenetwork <- ParseNetwork(pfla_LG_graph, pfla_attributes_list)
pfla_LG_graph2 <- pfla_LG_parsenetwork[[1]]
pfla_LG_df_attr <- pfla_LG_parsenetwork[[2]]
```
And finally we calculate the stats and metrics of this network:
```{r}
pfla_LG_stats <- NetworkStats(pfla_LG_nw2, pfla_LG_graph2, pfla_LG_df_attr, N = 10, gene_universe = gene_universe, id2go = pfla_id_GO)
```
## Comparing the networks
We will have a look at the output of these wrappers a bit later. We will now compare these two networks using the NetworkStats and the filtered tab-format networks that we have generated.
Part of this comparison involves defining a subset of genes of interest, which we load here.
```{r}
genes_postgr <-
rownames(pfla_rna_dev)[
pfla_rna_dev$cID > 12
]
```
And another part is loading the results of running ANANSE influence, which takes into account differential gene expression to infer the responsible factors for changes in the networks of cis-regulatory interactions between two states (in our case, the transition from early blastula to late gastrula).
```{r}
ananse_EB_to_LG <-
read.table("/home/ska/aperpos/Def_Pfla/outputs/ananse/20210909_ANANSE_EG_EB",header=T) #change this path
ananse_EB_to_LG$factor <-
sub("TCONS","TCONS_",ananse_EB_to_LG$factor)
```
The `compareNetworks` wrapper compares two networks, nw_a and nw_b, and their respective graphs, graph_a and graph_b, and outputs various metrics, visualizations, and data frames.
The function requires the following arguments:
- `nw_a`: A data frame or network object for network A.
- `nw_b`: A data frame or network object for network B.
- `graph_a`: A graph object for network A.
- `graph_b`: A graph object for network B.
- `stats_a`: A list of statistics for network A. Generated using NetworkStats() as explained above.
- `stats_b`: A list of statistics for network B. Generated using NetworkStats() as explained above.
- `influence`: if present, it outputs subgraphs of, and visualisations of, the top factors explaining the transition (as estimated by ANANSE influence).
- `name_network_a`: The name of network A. The default is "a".
- `name_network_b`: The name of network B. The default is "b".
- `col_a`: The color of network A in the Venn diagram. The default is "darkorange".
- `col_b`: The color of network B in the Venn diagram. The default is "purple".
- `geneset_interest`: if present, it compares the percentage of a set of genes of interest as targets in each of the networks.
- `top`: The proportion of top targets in each network to consider. The default is 0.9.
- `tfs`: Not used in the function.
- `gene_universe`: A vector of all genes.
- `id2go`: A data frame containing gene ontology (GO) annotations.
The function performs the following tasks:
- Identifies the exclusive and common targets in each network, and creates a Venn diagram of the targets.
- Determines the GO terms for the exclusive and common targets and returns the topGO Fisher's test (elim method vs the whole set of ptychodera genes)
- Calculates various metrics for the two networks, such as the number of genes, active transcription factors (TFs), connections per TF and target gene (TG), and self-regulated TFs, and creates a data frame of the calculated metrics.
- Merges the InVsOut (In-degree vs. Out-degree) behavior of genes in both networks.
- Calculates the centrality of common genes in both networks, and creates a data frame of the centrality metrics.
- Calculates the centrality of TFs in both networks, and creates a data frame of the centrality metrics.
- Calculates the centrality of trans-dev (trans-developmental) genes in both networks, and creates a data frame of the centrality metrics.
- It generates sub-graphs of each of the networks, with the transcription factors found to have differences in centrality between each network.
- Some of the output plots are:
- barplots of the metrics,
- venn diagram of shared target genes across networks,
- scatter plots of centrality of common genes across networks, including TFs, and trans-dev genes,
- plots of foldchanges of centrality of genes across networks, including TFs, and trans-dev genes,
- plots of foldchanges of in-outdegree gene behavior across networks,
- sub-graphs of top central tfs on each network,
- scatter plot of top factors from ANANSE influence,
- sub graph of the network b factor using the top factors from ANANSE influence.
```{r}
pdf(file = "graphics/EB_LG_comparenetworks.pdf",he=8,wi=10)
EB_vs_LG <- compareNetworks(
name_network_a = "EB",
name_network_b = "LG",
nw_a = pfla_EB_nw2,
nw_b = pfla_LG_nw2,
top = 0.9,
stats_a = pfla_EB_stats,
stats_b = pfla_LG_stats,
graph_a = pfla_EB_graph2,
graph_b = pfla_LG_graph2,
influence = ananse_EB_to_LG,
tfs = pfla_tfs_graph_analysis,
geneset_interest = genes_postgr,
id2go = pfla_id_GO,
gene_universe = gene_universe,
col_a = "#efaa90",
col_b = "#fbcf99"
)
dev.off()
```
## Network plots
We can use the wrapper `NetworkPlots` to generate ALL of these plots together. The `CompareNetworks` wrapper will also generate plots into a file if we run the whole function inside a call to a pdf device, as done above.
```{r}
pdf(file = "graphics/EB_graph_plots.pdf",he=20,wi=20)
NetworkPlots(
stats = pfla_EB_stats,
nw = pfla_EB_nw,
tfcol = allTFclasses_col,
layout = TRUE,
pdf = F
)
dev.off()
```
```{r}
pdf(file = "graphics/LG_graph_plots.pdf",he=20,wi=20)
NetworkPlots(
stats = pfla_LG_stats,
nw = pfla_LG_nw,
tfcol = allTFclasses_col,
layout = TRUE,
pdf = F
)
dev.off()
```
Although these plots have been saved in disk, for clarity, we will explore these analyses by re-plotting them here. We will define the constants of colors.
```{r}
col_a = "#efaa90"
col_b = "#fbcf99"
```
We start by having a look at the number of connections per target gene. This is done by counting the number of instances of each gene in the "target" column of the tabular networks.
```{r}
# num connections per target gene
boxplot(
main = "connections per\ntarget gene",
list(
c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_a),
c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_b)
),
names = c("\nEarly\nBlastula", "\nLate\nGastrula"),
col = c(col_a,col_b),
sub = paste0(
"Wilcox p.value ",
wilcox.test(
x = c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_a),
y = c(EB_vs_LG$connections_per_tgt_gene$connects_per_tgt_gene_b)
)$p.value
)
)
```
We can also have a look at the number of TFs that are self-regulated on each network by counting the number of times a given TF gene appears as a target of itself in the tabular networks.
```{r}
# Number of self-reg TFs
barplot(
main = "Number of\nself-regulating TFs",
height = c(
EB_vs_LG$comparison_table$a[EB_vs_LG$comparison_table$metric == "Num Self-Regulated TFs"],
EB_vs_LG$comparison_table$b[EB_vs_LG$comparison_table$metric == "Num Self-Regulated TFs"]
),
names.arg = c("Early\nBlastula", "Late\nGastrula"),
col = c(col_a,col_b),
)
```
How different are these networks at the target level? We can dig into this by looking at how overlapped these networks are, and at the enriched GO terms in the exclusive and commonly-shared target genes of each network.
```{r}
# number of shared targets
x <-
list(
EB = c(
EB_vs_LG$target_genes$targets_common_ab,
EB_vs_LG$target_genes$targets_exclusive_a
),
LG = c(
EB_vs_LG$target_genes$targets_common_ab,
EB_vs_LG$target_genes$targets_exclusive_b
)
)
# Helper function to display Venn diagram
display_venn <- function(x, ...){
library(VennDiagram)
grid.newpage()
venn_object <- venn.diagram(x, filename = NULL, ...)
grid.draw(venn_object)
}
# Display the plot directly in R:
display_venn(
x,
# Circles
lwd = 2,
lty = 'blank',
fill = c(col_a, col_b),
# Numbers
cex = .9,
fontface = "italic",
# Set names
category.names = c("EB" , "LG"),
cat.cex = 1,
cat.pos = 180,
cat.fontface = "bold",
cat.default.pos = "outer"
)
```
Here we have the enriched GO terms for the target genes exclusive to EB, the target genes exclusive to LG, and the common ones.
```{r}
# GO barplots or just the words for them
EB_vs_LG$GOs_targets$GOtable$targets_exclusive_a
EB_vs_LG$GOs_targets$GOtable$targets_exclusive_b
EB_vs_LG$GOs_targets$GOtable$targets_common_ab
```
How are these changes in target genes achieved? What is happening at the factor level?
For this we can look at the changes in centrality for different genes across networks.
```{r}
# plots of correlation changes in centrality tfs and transdev
par(mfrow = c(1,2))
plot(
main = "Changes in TF centrality across networks",
x = EB_vs_LG$tf_centrality_across_networks$a,
y = EB_vs_LG$tf_centrality_across_networks$b,
xlab = "Early Blastula",
ylab = "Late Gastrula",
pch = 19,
cex = 0.75,
col = alpha("#E58745",0.25)
)
plot(
main = "Foldchange in centrality of TFs",
sort(
EB_vs_LG$tf_centrality_across_networks$b /
EB_vs_LG$tf_centrality_across_networks$a
),
col = alpha("#E58745",0.5),
ylab = "foldchange centrality"
)
par(mfrow = c(1,1))
```
In this case we see a large number of genes tend to acquire higher levels of centrality in LG compared to EB, as shown in the scatter plot and the plot of the foldchange.
And the same about trans-dev genes:
```{r}
par(mfrow = c(1,2))
# same in transdev
plot(
main = "Changes in trans-dev centrality across networks",
x = EB_vs_LG$transdev_centrality_across_networks$a,
y = EB_vs_LG$transdev_centrality_across_networks$b,
xlab="Early Blastula",
ylab = "Late Gastrula",
pch = 19,
cex = 0.75,
col = alpha("#1A9A83",0.25)
)
plot(
main = "Foldchange in centrality of trans-devs",
sort(
EB_vs_LG$transdev_centrality_across_networks$b /
EB_vs_LG$transdev_centrality_across_networks$a
),
col = alpha("#1A9A83",0.5),
ylab = "foldchange centrality"
)
par(mfrow = c(1,1))
```
This is also reflected at the InvsOut-Degree of genes, what we call "gene behaviour", and how it changes between networks. This can be visualised as cumulative density functions and/or boxplots.
```{r}
eb_inout_ratio <-
pfla_EB_stats$In_Out_per_Gene$ratio[
pfla_EB_stats$In_Out_per_Gene$ratio > 0
]
lg_inout_ratio <-
pfla_LG_stats$In_Out_per_Gene$ratio[
pfla_LG_stats$In_Out_per_Gene$ratio > 0
]
plot(
seq(1:length(eb_inout_ratio)),
eb_inout_ratio,
main = "Gene behavior\n(types & amount\nof connections)",
col = col_a,
type = "l",
xlab = "",
ylab = "% emitting connections",
lwd = 2,
ylim = c(0,1)
)
lines(
seq(1:79),
lg_inout_ratio[1:79],
main = "Gene behavior\n(types & amt of connections)",
col = col_b,
lwd = 2
)
```
To have a better idea of what are these TF genes, and since we saw that there is a TF switch at the transcriptional level during development (see previous markdowns), we can showcase the changes in the network structure by looking at the centrality of the different genes by TF class.
In this case we use the function `plot_centrailty_TFclass` that grabs a named list with teh centrality values of TF genes grouped by class and creates a barplot. Colors are assigned based on the named vector with TFclasses and color.
```{r}
## Centrality per TF class
par(mfrow = c(2,1))
plot_centrality_TFclass(
s = pfla_EB_stats$Centrality_per_TFclass,
f = allTFclasses_col,
# sub_s = topclasses,
main = "Centrality per TF Class - EB",
ylim = c(0.00005,0.00012)
)
plot_centrality_TFclass(
s = pfla_LG_stats$Centrality_per_TFclass,
f = allTFclasses_col,
# sub_s = topclasses,
main = "Centrality per TF Class - LG",
ylim = c(0.00005,0.00012)
)
par(mfrow = c(1,1))
```
Another way to check for differences between target genes in networks is looking at the presence of certain genes of interest in each. For example, we can check how many genes of larval development are being targeted in each of the networks. We see there is a large different in the proportion of larval genes that are embedded in the LG network compared to the EB network. This is hinting at the aforementioned switch towards larval development already taking place at the cis-regulatory level during gastrulation.
```{r}
# number of post-gastrulation genes in network
barplot(
main="Post-gastrulation genes\nin network",
height=c(
EB_vs_LG[[11]][1]/length(EB_vs_LG$target_genes$targets_exclusive_a)*100,
EB_vs_LG[[11]][2]/length(EB_vs_LG$target_genes$targets_exclusive_b)*100
),
col = c(
col_a,
col_b
),
names=c("EB", "LG"),
ylab="gene percent",
ylim = c(0,65),
las=1
)
```
Finally, to check the differences between the networks of these developmental stages, we can use the output of ANANSE influence to check for the top factors. Below we re-run the code used to generate the influence plot in `compareNetworks()`.
```{r}
# influence
influ_tbl <- ananse_EB_to_LG
influ_tbl <- merge(influ_tbl,pfla_tfs_graph_analysis,by.x=1,by.y=1,all.x=T)
influ_tbl$TFclass[is.na(influ_tbl$TFclass)] <- " "
influ_tbl$col[is.na(influ_tbl$col)] <- "gray"
influ_tbl <- influ_tbl [rev(order(influ_tbl$sumScaled)),]
rownames(influ_tbl) <- NULL
# 'translate' the gene ids to known gene names using a small function
influ_tbl$genename <-
translate_ids(x = influ_tbl$factor,dict = pfla_genenames)
```
Here the plot of ANANSE influence. Colors indicate different TF classes. Where available, gene names have been added.
```{r}
plot(
influ_tbl$factor_fc,
influ_tbl$sumScaled,
pch=19,
col=influ_tbl$col,
bg="black",
xlab="log2fold change of TF",
ylab="ANANSE influence score",
main="Main factors",
bty="n",
xlim=c(0,max(influ_tbl$factor_fc)+1)
)
text(
influ_tbl$factor_fc[1:20],
influ_tbl$sumScaled[1:20],
influ_tbl$genename[1:20],
pos = 3,
cex=0.85
)
```
And here is the plot of the graph of these TFs:
```{r}
V(EB_vs_LG$influence_graph)$genename <-
translate_ids(V(EB_vs_LG$influence_graph)$name,pfla_genenames)
E(EB_vs_LG$influence_graph)$width <-
category_by_quantile(
E(EB_vs_LG$influence_graph)$prob,
newvalues = c(0.2,1,2,5)
)
plot(
main = "Influence network",
EB_vs_LG$influence_graph,
vertex.color = V(EB_vs_LG$influence_graph)$col,
vertex.label = V(EB_vs_LG$influence_graph)$genename,
edge.width = E(EB_vs_LG$influence_graph)$width,
edge.arrow.size = 0.2,
edge.color = rgb(0,0,0,0.1),
layout = layout_with_fr(EB_vs_LG$influence_graph)
)
```
## Genes in In Situs
Here there will be some code to subset the graphs and get the genes of each developmental layer.
```{r}
```
## Networks from Other Organisms
Here there will be code to retrieve the subgraphs of genes from GRNs in other organisms.
```{r}
```
## Save the Data
We will save the data now:
```{r}
save(
# Attributes
pfla_attributes_list,
pfla_tfs_graph_analysis,
pfla_tdhk,
pfla_tfeg,
pfla_funcat,
# Early Blastula
pfla_EB_nw,
pfla_EB_nw2,
pfla_EB_parsenetwork,
pfla_EB_graph,
pfla_EB_graph2,
pfla_EB_stats,
# Late Gastrula
pfla_LG_nw,
pfla_LG_nw2,
pfla_LG_parsenetwork,
pfla_LG_graph,
pfla_LG_graph2,
pfla_LG_stats,
# Network comparison
EB_vs_LG,
file = "outputs/rda/graph_analysis.rda"
)
```