-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathModel_fitting.R
826 lines (662 loc) · 42 KB
/
Model_fitting.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
##### This script is written for fixing calibration bias for old duplicates, and to perform model-fitting
### To generate dataset with no or less calibration bias for old duplicates, we used the original scripts of Dunn et al. (2018), and modified few steps
### The original scripts of Dunn et al. (2018) are available at https://github.com/caseywdunn/comparative_expression_2017.
### We used our own scripts, and functions to reanalyze the data, and to perform model fitting in a maximum likelihood, and in a Bayesian frameworks
# The following should be installed from github with the specified
# devtools command. You will need to install devtools first.
library(treeio) # devtools::install_github("GuangchuangYu/treeio")
library(ggtree) # devtools::install_github("GuangchuangYu/ggtree")
library(hutan) # devtools::install_github("caseywdunn/hutan")
# The rest can be installed from CRAN
library(ape)
library(digest)
library(geiger)
library(ggrepel)
library(gridExtra)
library(magrittr)
library(foreach)
library(doParallel)
library(phytools)
library(stringr)
library(tidyverse)
library(caper)
library(mvMORPH)
library(OUwie)
library(bayou)
## Setting working directory
setwd("/Users/admin/Desktop/nwork/TMRR/")
## Functions required
source("functions_Dunn.R") ## Original function script provided by Dunn et al.(2018)
source("functions_TM_new.R") ## Functions written for this study
set.seed(23456)
## Setting system computational parameters
cores <- detectCores() - 1
if (cores < 1) {
cores = 1
}
## Register parallel workers for %dopar%
registerDoParallel(cores)
## To test the ortholog conjecture simulation model, the fold change rates following duplication with respect to speciation
dup_adjust <- 2
## Compara gene trees file was downloaded from ftp://ftp.ensembl.org/pub/release-75/emf/ensembl-compara/homologies/
gene_tree_name <- "Compara.75.protein.nhx.emf"
## Tau values of species were taken from Rscript.R of Kryuchkova-Mostacci and Robinson-Rechavi (2016)
expression_file_names <- c(
"ChickenBrawandTScomparisonTable_9_6chPC.txt",
"ChimpBrawandTScomparisonTable_9_6cmPC.txt",
"GorillaBrawandTScomparisonTable_9_6gPC.txt",
"HumBrawandTScomparisonTable_9_6hPC.txt",
"MacacaBrawandTScomparisonTable_9_6mcPC.txt",
"MusBrawandTScomparisonTable_9_6mPC.txt",
"OpossumBrawandTScomparisonTable_9_6oPC.txt",
"PlatypusBrawandTScomparisonTable_9_6pPC.txt"
)
## Directory for reanalyses
tmrr_directory <- "tmrr/"
expression_file_names %<>% paste(tmrr_directory, ., sep="")
gene_tree_name %<>% paste( tmrr_directory, ., sep="")
## The minimum number of genes with expression data for each gene tree, as used by Dunn et al. (2018)
min_genes_with_expression <- 4
## These clade dates are taken from the study of Kryuchkova-Mostacci and Robinson-Rechavi (2016), which are originally obtained from http://timetree.org/
calibration_times <- data.frame(
age =
c(20, 92, 167, 9, 42, 74, 296, 535, 104, 937, 29, 722, 441, 65,
15, 1215, 414, 371, 162, 25, 74, 77, 86, 7),
clade =
c("Hominoidea", "Euarchontoglires", "Mammalia", "Homininae",
"Simiiformes", "Primates", "Amniota", "Vertebrata", "Eutheria",
"Bilateria", "Catarrhini", "Chordata", "Euteleostomi",
"Haplorrhini", "Hominidae", "Opisthokonta", "Sarcopterygii",
"Tetrapoda", "Theria", "Murinae", "Sciurognathi", "Rodentia",
"Glires", "Hominini"
),
stringsAsFactors=FALSE
)
## The 7 speciation time points used by Dunn et al.(2018)
focal_calibrations_clades <-
c("Hominini", "Homininae", "Catarrhini", "Euarchontoglires", "Theria",
"Mammalia", "Amniota")
## Reading the gene tree file as a vector of strings, one per line
lines <- readLines(gene_tree_name)
tree_lines <- lines[grepl( "^\\(", lines, perl = TRUE )]
## Parse each string to a treeio::treedata object, adding a label column for node names
gene_trees <- foreach(tree_line=tree_lines) %dopar%
parse_gene_trees(tree_line)
## Total number of gene trees
n_gene_trees <- length(gene_trees)
## We added index to all the gene trees
Index<-0
for(Index in 1:length(gene_trees))
{
gene_trees[[Index]]@data$index.tree<-Index
}
## Parsing the tree annotations, these are lines that start with SEQ
tip_annotations <-
lines[grepl("^SEQ", lines, perl = TRUE)] %>%
str_replace(" \\(\\d+ of \\d+\\)", "") %>%
str_replace("(([^ ]+ ){8})([^ ]+) ([^ ]+)", "\\1\\3_\\4") %>%
str_c(collapse="\n") %>%
read_delim(delim=" ", col_names = FALSE) %>%
rename(Ensembl.Gene.ID = X8) %>%
rename(species = X2)
## Selecting required columns
tip_annotations<- tip_annotations[c(2,8,9)]
## Free up memory
rm(lines)
rm(tree_lines)
## Reading the expression data for each species and combine into a single tibble,
expression <-
lapply(
expression_file_names,
read.table,
stringsAsFactors=FALSE,
header=TRUE
) %>%
bind_rows() %>%
left_join(tip_annotations, by="Ensembl.Gene.ID")
## Adding the expression data to the tips of the trees
## Annotate each tree by joining the corresponding expression data to the
## @data object.
add_expression_to_tree <- function(tree){
tree@data %<>%
left_join(expression, by = c("G" = "Ensembl.Gene.ID"))
return(tree)
}
gene_trees_annotated <- foreach(tree=gene_trees) %dopar%
add_expression_to_tree(tree)
## Cleaning memory
#rm(gene_trees)
############ New ways to time calibrate trees fixed by TMRR ###########
## Fixing clade names following Dunn et al. (2018) before calibration
gene_trees_annotated <- lapply(gene_trees_annotated,fix_hominini)
calibration_times$label<- calibration_times$clade
## Modifyting duplication node labels
#gene_trees_annotated1 = lapply(gene_trees_annotated, modify_label)
gene_trees_annotated1 <- lapply(gene_trees_annotated, tree_nodedepth)
## Time calibrating the trees before pruning unlike Dunn et al. (2018)
count<-0
Time_calibrated_trees <- lapply(gene_trees_annotated1,
tree_calibrate,
timeframe=calibration_times,
model="correlated")
## Successfully calibrated trees
gene_trees_calibrated_new<-Time_calibrated_trees[!is.na(Time_calibrated_trees)]
gene_trees_calibrated_new<-gene_trees_calibrated_new[!sapply(gene_trees_calibrated_new, is.null)]
## Pruning trees, i.e., removing tips from the trees that do not have expression data
gene_trees_prune_new <- foreach( tree=gene_trees_calibrated_new ) %dopar%
drop_empty_tips(
tree,
min_genes_with_expression=min_genes_with_expression
)
gene_trees_prune_new<-gene_trees_prune_new[!is.na(gene_trees_prune_new)]
gene_trees_prune_new<-gene_trees_prune_new[!sapply(gene_trees_prune_new, is.null)]##passed 7336 trees
## Remodifying the previously modified duplication label
gene_trees_prune_new <- lapply(gene_trees_prune_new, remodify_label)
## Storing node age using the function provided by Dunn et al. (2018)
gene_trees_prune_calibrated <- foreach(tree=gene_trees_prune_new) %dopar%
store_node_age(tree)
gene_trees_calibrated_now<-gene_trees_prune_calibrated ##passed 7336 trees
## Remove trees with no speciation events, i.e., pure duplication trees
gene_trees_calibrated_now <-
gene_trees_calibrated_now[
unlist( lapply( gene_trees_calibrated_now, get_n_speciation ) ) > 0
] ## Passed 7297 trees, and removed 39 (7336-7297) pure duplication trees
## Calculating phylogenetic independent contrasts using the function provided by Dunn et al.(2018)
gene_trees_pic_new <- add_pics_to_trees(gene_trees_calibrated_now) ##passed 7297 trees
## Adding node heights to each gene tree
gene_trees_pic_new<-lapply(gene_trees_pic_new,tree_height)
summary_all_trees<-bind_rows(mclapply(gene_trees_pic_new,summary_function,mc.cores = cores))
summary_all_trees<-summary_all_trees[which(!is.na(summary_all_trees$pic)),]
length(summary_all_trees$pic[which(summary_all_trees$Event=="Duplication")]) ##n=15274
length(summary_all_trees$pic[which(summary_all_trees$Event=="Speciation")]) ##n=52081
## Total 52081 speciation events, and 15274 duplication events
## Cleaning memory
rm(Time_calibrated_trees)
rm(gene_trees_prune_calibrated)
rm(gene_trees_annotated1)
rm(expression)
rm(tip_annotations)
#save.image("Dunn_tree_index_mapping_new.rda")
## Keeping gene trees with at least one duplication, and one speciation events
## This is necessary to understand the evolutionary trajectories of different events for the same gene
trees_of_interest<-mclapply(gene_trees_pic_new,tree_with_duplication, mc.cores = cores)
trees_of_interest<-trees_of_interest[!is.na(trees_of_interest)]
trees_of_interest<-lapply(trees_of_interest,tree_height) #passed 4288 trees
## Median number of tips for the retained gene trees
tree_info<-bind_rows(lapply(trees_of_interest, tree_data_statistics,"all"))
median(tree_info$tip_num) ## n= 15
## Summmary statistics of events
trees_of_interest_PIC <- add_pics_to_trees( trees_of_interest ) ## 4288 trees
summary_4288_trees<-bind_rows(mclapply(trees_of_interest_PIC,summary_function,mc.cores = cores))
summary_4288_trees<-summary_4288_trees[which(!is.na(summary_4288_trees$pic)),]
length(summary_4288_trees$pic[which(summary_4288_trees$Event=="Duplication")]) ##n=15274 duplication
length(summary_4288_trees$pic[which(summary_4288_trees$Event=="Speciation")]) ##n=38882 speciation
length(summary_4288_trees$pic[which(is.na(summary_4288_trees$Event))]) ##n=15201 NA event
## Total 38882 speciation events, and 15274 duplication events (i.e. 13199 speciation events are removed by removal of pure speciation trees)
# Loaded the previously stored data generated by using "manuscript_kernel.R" of Dunn et al.(2018) to compare
load("/Users/admin/Desktop/nwork/simulation/manuscript_dunn.RData")
################### Model fitting using mvMORPH, and OUwie after painting speciation, young and old duplication branches separately for each tree #####################
trees_of_interest_modified<-lapply(trees_of_interest,modify_tiplabel) # Changing tip Ensembl protein ID to Ensembl Gene ID
## Painting trees before model fitting
count<-0
trees_of_interest_painted<-lapply(trees_of_interest_modified,paint_tree_mod_new,"Empirical")
trees_of_interest_painted1<- trees_of_interest_painted[!sapply(trees_of_interest_painted, is.null)]
trees_of_interest_painted1<-trees_of_interest_painted1[!is.na(trees_of_interest_painted1)]
#################### Model fitting analyses ####################
i<-0
Tree_model_fit_summary<-bind_rows(lapply(trees_of_interest_painted1,model_fitting_genetree_for_Tau,trait="Tau"))
## Adding model parameters to the "phylo" slots of the trees, collected from the best fit model
i<-0
Tree_added_parameters<-lapply(trees_of_interest_painted1,add_model_parameters_info,Tree_model_fit_summary)
Tree_added_parameters<-mclapply(Tree_added_parameters,tree_added_events_info,mc.cores=cores)
### Counting total speciation and duplication events of 4288 trees
## Classifying duplication events into young (age <= 296 My, maximum speciation age), and old (age > 296 My) for further analyses
Tree_all<-Tree_added_parameters[c(1:4288)]
table_event<-NULL
for(i in 1:length(Tree_all))
{
table_event<-bind_rows(table_event,tibble(index=i,
n_speciation=Tree_all[[i]]@phylo$n_speciation,
n_duplication_young=Tree_all[[i]]@phylo$n_duplication_young,
n_duplication_old=Tree_all[[i]]@phylo$n_duplication_old))
}
sum(table_event$n_speciation) #n=38882
sum(table_event$n_duplication_young) #n=8556
sum(table_event$n_duplication_old) ##n=6718
## Total 38882 speciation events, and 15274 (8556 young + 6718 old) duplication events
## Out of 4288 trees, 1630 trees have both young and old duplication events
table_event_shared<-table_event[which(table_event$n_duplication_young > 0 & table_event$n_duplication_old > 0),]
sum(table_event_shared$n_speciation) # n=19698
sum(table_event_shared$n_duplication_young) # n=4653
sum(table_event_shared$n_duplication_old) ## n=4387
## Total 19698 speciation events, and 9040 (4653 young + 4387 old) duplication events for trees having both type of duplicates
## Summary statistics of model fitting by maximum likelihood approach
# BM1:308,
# BMM:704,
# OU1:2874,
# OUM:370,
# 32 trees failed to fit any model
## save.image("Model_fitting_TMRR.Rdata")
################ Adaptive optimum (θ) estimation for OUM trees ###########
index_OUM<-Tree_model_fit_summary$tree_index[which(Tree_model_fit_summary$best_fit_model %in% c("OUM"))]
Tree_OUM<-Tree_added_parameters[c(index_OUM)]##370 trees
table_OUM<-NULL
for(i in 1:length(Tree_OUM))
{
table_OUM<-bind_rows(table_OUM,tibble(index=i,spe_evol_rate=Tree_OUM[[i]]@phylo$fitted_parameters$sig2_speciation,
spe_alpha=Tree_OUM[[i]]@phylo$fitted_parameters$alpha_speciation,
theta_spe=Tree_OUM[[i]]@phylo$fitted_parameters$theta_speciation,
dupy_evol_rate=Tree_OUM[[i]]@phylo$fitted_parameters$sig2_duplication_young,
dupy_alpha=Tree_OUM[[i]]@phylo$fitted_parameters$alpha_duplication_young,
theta_dupy=Tree_OUM[[i]]@phylo$fitted_parameters$theta_duplication_young,
dupo_evol_rate=Tree_OUM[[i]]@phylo$fitted_parameters$sig2_duplication_old,
dupo_alpha=Tree_OUM[[i]]@phylo$fitted_parameters$alpha_duplication_old,
theta_dupo=Tree_OUM[[i]]@phylo$fitted_parameters$theta_duplication_old,
n_speciation=Tree_OUM[[i]]@phylo$n_speciation,
n_duplication_young=Tree_OUM[[i]]@phylo$n_duplication_young,
n_duplication_old=Tree_OUM[[i]]@phylo$n_duplication_old))
}
## Analyses for young duplication
table_OUM_with_young_dup<-table_OUM[c(2:7,11:12)]
table_OUM_with_young_dup<-table_OUM_with_young_dup[complete.cases(table_OUM_with_young_dup),]
median(table_OUM_with_young_dup$theta_spe) ## 0.4075981 (n=2690)
median(table_OUM_with_young_dup$theta_dupy) ##0.7387739 (n=842)
wilcox.test(table_OUM_with_young_dup$theta_spe,table_OUM_with_young_dup$theta_dupy,paired = T) #p-value = 8.591e-10
## Analyses for old duplication
table_OUM_with_old_dup<-table_OUM[c(2:4,8:10,11,13)]
table_OUM_with_old_dup<-table_OUM_with_old_dup[complete.cases(table_OUM_with_old_dup),]
median(table_OUM_with_old_dup$theta_spe) ## 0.4221139 (n=4152)
median(table_OUM_with_old_dup$theta_dupo) ##0.9175527 (n=847)
wilcox.test(table_OUM_with_old_dup$theta_spe,table_OUM_with_old_dup$theta_dupo,paired = T) # p-value = 0.0002459
################ Multi rate Brownian trees (BMM) for evolutionary rate (σ2) analyses ###########
index_BMM<-Tree_model_fit_summary$tree_index[which(Tree_model_fit_summary$best_fit_model%in% c("BMM"))]
Tree_BMM<-Tree_added_parameters[c(index_BMM)] ## 704 trees
#bmPlot(Tree_BMM[[2]]@phylo$painted_tree,type="threshold",thresholds=c(0,1))
table_BMM<-NULL
for(i in 1:length(Tree_BMM))
{
table_BMM<-bind_rows(table_BMM,tibble(index=i,spe_evol_rate=Tree_BMM[[i]]@phylo$fitted_parameters$sig2_speciation,
dupy_evol_rate=Tree_BMM[[i]]@phylo$fitted_parameters$sig2_duplication_young,
dupo_evol_rate=Tree_BMM[[i]]@phylo$fitted_parameters$sig2_duplication_old,
n_speciation=Tree_BMM[[i]]@phylo$n_speciation,
n_duplication_young=Tree_BMM[[i]]@phylo$n_duplication_young,
n_duplication_old=Tree_BMM[[i]]@phylo$n_duplication_old))
}
##Analyses for young duplication
table_BMM_with_young_dup<-table_BMM[c(2:3,5:6)]
table_BMM_with_young_dup<-table_BMM_with_young_dup[complete.cases(table_BMM_with_young_dup),]
median(table_BMM_with_young_dup$spe_evol_rate) ## 0.00009075896 (n=4642)
median(table_BMM_with_young_dup$dupy_evol_rate) ##0.0001435775 (n=1742)
wilcox.test(table_BMM_with_young_dup$spe_evol_rate,table_BMM_with_young_dup$dupy_evol_rate,paired = T) #p-value = 5.044e-12
##Analyses for old duplication
table_BMM_with_old_dup<-table_BMM[c(2,4,5,7)]
table_BMM_with_old_dup<-table_BMM_with_old_dup[complete.cases(table_BMM_with_old_dup),]
median(table_BMM_with_old_dup$spe_evol_rate) ##0.0001734904 (n=5356)
median(table_BMM_with_old_dup$dupo_evol_rate) ##2.061154e-09 (n=1295)
wilcox.test(table_BMM_with_old_dup$spe_evol_rate,table_BMM_with_old_dup$dupo_evol_rate,paired = T) #p-value < 2.2e-16
############################ Randomization tests ####################################
set.seed(23456)
################# Analysis on event randomized tree ################
## First we shuffle the internal node events
Randomized_Event_trees<- mclapply(trees_of_interest_painted,shuffling_event,mc.cores = cores)
## Repainting the event randomized tree according to the newly assigned internal events
count<-0
Event_randomized_trees_painted<-lapply(Randomized_Event_trees,paint_tree_mod_new,"Random")
################### Event randomized BMM trees###################
## Collecting BM trees with randomized events
df_BMM<-Tree_model_fit_summary[which(Tree_model_fit_summary$best_fit_model %in% c("BMM")),]
index_BMM<-Tree_model_fit_summary$tree_index[which(Tree_model_fit_summary$best_fit_model%in% c("BMM"))]
Tree_randomized_event_BMM<-Event_randomized_trees_painted[c(index_BMM)]## 704 trees
## Adding model parameters by fitting the BMM model
i<-0
Randomized_BMM_tree_added_parameter<-lapply(Tree_randomized_event_BMM,add_model_parameters_info2,df_BMM)
Randomized_BMM_tree_added_parameter2<-Randomized_BMM_tree_added_parameter[!is.na(Randomized_BMM_tree_added_parameter)]
Randomized_BMM_tree_added_parameter3<-mclapply(Randomized_BMM_tree_added_parameter2,tree_added_events_info,mc.cores=cores)
## Analyses
table_randomized_event_BMM<-NULL
for(x in 1:length(Randomized_BMM_tree_added_parameter3))
{
print(x)
table_randomized_event_BMM<-bind_rows(table_randomized_event_BMM,tibble(index=x,spe_evol_rate=Randomized_BMM_tree_added_parameter3[[x]]@phylo$fitted_parameters$sig2_speciation,
dupy_evol_rate=Randomized_BMM_tree_added_parameter3[[x]]@phylo$fitted_parameters$sig2_duplication_young,
dupo_evol_rate=Randomized_BMM_tree_added_parameter3[[x]]@phylo$fitted_parameters$sig2_duplication_old,
n_speciation=Randomized_BMM_tree_added_parameter3[[x]]@phylo$n_speciation,
n_duplication_young=Randomized_BMM_tree_added_parameter3[[x]]@phylo$n_duplication_young,
n_duplication_old=Randomized_BMM_tree_added_parameter3[[x]]@phylo$n_duplication_old))
}
## Analyses on young duplicates
table_randomized_event_BMM_young_dup<-table_randomized_event_BMM[c(2:3,5:6)]
table_randomized_event_BMM_young_dup<-table_randomized_event_BMM_young_dup[complete.cases(table_randomized_event_BMM_young_dup),]
median(table_randomized_event_BMM_young_dup$spe_evol_rate) ## 0.0001663995 (n=3215)
median(table_randomized_event_BMM_young_dup$dupy_evol_rate) ## 0.00008534398 (n=1438)
wilcox.test(table_randomized_event_BMM_young_dup$spe_evol_rate,table_randomized_event_BMM_young_dup$dupy_evol_rate,paired = T) #p-value = 0.02783
## Analyses on old duplicates
table_randomized_event_BMM_old_dup<-table_randomized_event_BMM[c(2,4,5,7)]
table_randomized_event_BMM_old_dup<-table_randomized_event_BMM_old_dup[complete.cases(table_randomized_event_BMM_old_dup),]
median(table_randomized_event_BMM_old_dup$spe_evol_rate) ## 0.0001762469 (n=2788)
median(table_randomized_event_BMM_old_dup$dupo_evol_rate) ## 2.061154e-09 (n=800)
wilcox.test(table_randomized_event_BMM_old_dup$spe_evol_rate,table_randomized_event_BMM_old_dup$dupo_evol_rate,paired = T) #p-value < 2.2e-16
################### Event randomized OUM trees###################
## Collecting OUM trees with randomized events using the index of the OUM trees
df_OUM<-Tree_model_fit_summary[which(Tree_model_fit_summary$best_fit_model %in% c("OUM")),]
index_OU<-Tree_model_fit_summary$tree_index[which(Tree_model_fit_summary$best_fit_model%in% c("OUM"))]
Tree_randomized_event_OUM<-Event_randomized_trees_painted[c(index_OUM)]## 370 trees
## Adding model parameters by fitting the OUM model
i<-0
Randomized_OUM_tree_added_parameter<-lapply(Tree_randomized_event_OUM,add_model_parameters_info2,df_OUM)
Randomized_OUM_tree_added_parameter2<-Randomized_OUM_tree_added_parameter[!is.na(Randomized_OUM_tree_added_parameter)]
Randomized_OUM_tree_added_parameter3<-mclapply(Randomized_OUM_tree_added_parameter2,tree_added_events_info,mc.cores=cores)
## Analyses
table_OUM_random<-NULL
for(i in 1:length(Randomized_OUM_tree_added_parameter3))
{
table_OUM_random<-bind_rows(table_OUM_random,tibble(index=i,spe_alpha=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$alpha_speciation,
spe_evol_rate=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$sig2_speciation,
theta_spe=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$theta_speciation,
dupy_alpha=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$alpha_duplication_young,
dupy_evol_rate=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$sig2_duplication_young,
theta_dupy=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$theta_duplication_young,
dupo_alpha=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$alpha_duplication_old,
dupo_evol_rate=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$sig2_duplication_old,
theta_dupo=Randomized_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$theta_duplication_old,
n_speciation=Randomized_OUM_tree_added_parameter3[[i]]@phylo$n_speciation,
n_duplication_young=Randomized_OUM_tree_added_parameter3[[i]]@phylo$n_duplication_young,
n_duplication_old=Randomized_OUM_tree_added_parameter3[[i]]@phylo$n_duplication_old))
}
## With young duplication
table_OUM_random_with_young_dup<-table_OUM_random[c(2:7,11:12)]
table_OUM_random_with_young_dup<-table_OUM_random_with_young_dup[complete.cases(table_OUM_random_with_young_dup),]
median(table_OUM_random_with_young_dup$theta_spe) ## 0.5047096 (n=1872)
median(table_OUM_random_with_young_dup$theta_dupy) ## 0.5339038 (n=698)
wilcox.test(table_OUM_random_with_young_dup$theta_spe,table_OUM_random_with_young_dup$theta_dupy,paired = T) #p-value = 0.7483
## With old duplication
table_OUM_random_with_old_dup<-table_OUM_random[c(2:4,8:10,11,13)]
table_OUM_random_with_old_dup<-table_OUM_random_with_old_dup[complete.cases(table_OUM_random_with_old_dup),]
median(table_OUM_random_with_old_dup$theta_spe) ## 0.5093067 (n=2081)
median(table_OUM_random_with_old_dup$theta_dupo) ## 0.660308 (n=482)
wilcox.test(table_OUM_random_with_old_dup$theta_spe,table_OUM_random_with_old_dup$theta_dupo,paired = T) #p-value = 0.7339
#save.image("Model_fitting_TMRR.Rdata")
################# Analysis on Tau randomized tree ################
set.seed( 23456 )
## First we shuffle the tip data
Randomized_Tau_trees<- mclapply(trees_of_interest_painted,shuffling_tauR,mc.cores = cores)
################### Tau randomized BMM trees###################
## Collecting BM trees with randomized Tau using the index of the BM trees
df_BMM<-Tree_model_fit_summary[which(Tree_model_fit_summary$best_fit_model %in% c("BMM")),]
index_BMM<-Tree_model_fit_summary$tree_index[which(Tree_model_fit_summary$best_fit_model%in% c("BMM"))]
Tree_randomized_Tau_BMM<-Randomized_Tau_trees[c(index_BMM)]## 704 trees
## Adding model parameters by fitting the BMM trees
i<-0
Randomized_Tau_BMM_tree_added_parameter<-lapply(Tree_randomized_Tau_BMM,add_model_parameters_info2,df_BMM)
Randomized_Tau_BMM_tree_added_parameter2<-Randomized_Tau_BMM_tree_added_parameter[!is.na(Randomized_Tau_BMM_tree_added_parameter)]
Randomized_Tau_BMM_tree_added_parameter3<-mclapply(Randomized_Tau_BMM_tree_added_parameter2,tree_added_events_info,mc.cores=cores)
## Analyses
table_randomized_Tau_BMM<-NULL
for(x in 1:length(Randomized_Tau_BMM_tree_added_parameter3))
{
print(x)
table_randomized_Tau_BMM<-bind_rows(table_randomized_Tau_BMM,tibble(index=x,spe_evol_rate=Randomized_Tau_BMM_tree_added_parameter3[[x]]@phylo$fitted_parameters$sig2_speciation,
dupy_evol_rate=Randomized_Tau_BMM_tree_added_parameter3[[x]]@phylo$fitted_parameters$sig2_duplication_young,
dupo_evol_rate=Randomized_Tau_BMM_tree_added_parameter3[[x]]@phylo$fitted_parameters$sig2_duplication_old,
n_speciation=Randomized_Tau_BMM_tree_added_parameter3[[x]]@phylo$n_speciation,
n_duplication_young=Randomized_Tau_BMM_tree_added_parameter3[[x]]@phylo$n_duplication_young,
n_duplication_old=Randomized_Tau_BMM_tree_added_parameter3[[x]]@phylo$n_duplication_old))
}
## Analyses on young duplicates
table_randomized_Tau_BMM_young_dup<-table_randomized_Tau_BMM[c(2:3,5:6)]
table_randomized_Tau_BMM_young_dup<-table_randomized_Tau_BMM_young_dup[complete.cases(table_randomized_Tau_BMM_young_dup),]
median(table_randomized_Tau_BMM_young_dup$spe_evol_rate) ## 0.0006957318 (n=4618)
median(table_randomized_Tau_BMM_young_dup$dupy_evol_rate) ## 0.0002236885 (n=1723)
wilcox.test(table_randomized_Tau_BMM_young_dup$spe_evol_rate,table_randomized_Tau_BMM_young_dup$dupy_evol_rate,paired = T,) #p-value = 1.388e-13
## Analyses on old duplicates
table_randomized_Tau_BMM_old_dup<-table_randomized_Tau_BMM[c(2,4,5,7)]
table_randomized_Tau_BMM_old_dup<-table_randomized_Tau_BMM_old_dup[complete.cases(table_randomized_Tau_BMM_old_dup),]
median(table_randomized_Tau_BMM_old_dup$spe_evol_rate) ##0.000908805 (n=5337)
median(table_randomized_Tau_BMM_old_dup$dupo_evol_rate) ##2.529964e-10 (n=1291)
wilcox.test(table_randomized_Tau_BMM_old_dup$spe_evol_rate,table_randomized_Tau_BMM_old_dup$dupo_evol_rate,paired = T) #p-value < 2.2e-16
############## Tau randomized OUM trees ##############
## Collecting OU trees with randomized events using the index of the OU trees
df_OUM<-Tree_model_fit_summary[which(Tree_model_fit_summary$best_fit_model %in% c("OUM")),]
index_OUM<-Tree_model_fit_summary$tree_index[which(Tree_model_fit_summary$best_fit_model%in% c("OUM"))]
Tree_randomized_Tau_OUM<-Randomized_Tau_trees[c(index_OUM)]## 370 trees
## Adding model parameters by fitting the OUM trees
i<-0
Randomized_Tau_OUM_tree_added_parameter<-lapply(Tree_randomized_Tau_OUM,add_model_parameters_info2,df_OUM)
Randomized_Tau_OUM_tree_added_parameter2<-Randomized_Tau_OUM_tree_added_parameter[!is.na(Randomized_Tau_OUM_tree_added_parameter)]
Randomized_Tau_OUM_tree_added_parameter3<-mclapply(Randomized_Tau_OUM_tree_added_parameter2,tree_added_events_info,mc.cores=cores)
table_OUM_Tau_random<-NULL
for(i in 1:length(Randomized_Tau_OUM_tree_added_parameter3))
{
table_OUM_Tau_random<-bind_rows(table_OUM_Tau_random,tibble(index=i,spe_alpha=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$alpha_speciation,
spe_evol_rate=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$sig2_speciation,
theta_spe=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$theta_speciation,
dupy_alpha=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$alpha_duplication_young,
dupy_evol_rate=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$sig2_duplication_young,
theta_dupy=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$theta_duplication_young,
dupo_alpha=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$alpha_duplication_old,
dupo_evol_rate=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$sig2_duplication_old,
theta_dupo=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$fitted_parameters$theta_duplication_old,
n_speciation=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$n_speciation,
n_duplication_young=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$n_duplication_young,
n_duplication_old=Randomized_Tau_OUM_tree_added_parameter3[[i]]@phylo$n_duplication_old))
}
## With young duplication
table_OUM_Tau_random_with_young_dup<-table_OUM_Tau_random[c(2:7,11:12)]
table_OUM_Tau_random_with_young_dup<-table_OUM_Tau_random_with_young_dup[complete.cases(table_OUM_Tau_random_with_young_dup),]
median(table_OUM_Tau_random_with_young_dup$theta_spe) ## 0.5292699 (n=2690)
median(table_OUM_Tau_random_with_young_dup$theta_dupy) ## 0.5524303 (n=842)
wilcox.test(table_OUM_Tau_random_with_young_dup$theta_spe,table_OUM_Tau_random_with_young_dup$theta_dupy,paired = T) #p-value = 0.9754
## With old duplication
table_OUM_Tau_random_with_old_dup<-table_OUM_Tau_random[c(2:4,8:10,11,13)]
table_OUM_Tau_random_with_old_dup<-table_OUM_Tau_random_with_old_dup[complete.cases(table_OUM_Tau_random_with_old_dup),]
median(table_OUM_Tau_random_with_old_dup$theta_spe) ## 0.5391824 (n=4152)
median(table_OUM_Tau_random_with_old_dup$theta_dupo) ## 1.46883e-11 (n=847)
wilcox.test(table_OUM_Tau_random_with_old_dup$theta_spe,table_OUM_Tau_random_with_old_dup$theta_dupo,paired = T) #p-value = 1.355e-07
#save.image("Model_fitting_TMRR.Rdata")
####### Cleaning memory
rm(lines)
rm(tree_lines)
rm(i)
rm(x)
rm(Index)
rm(index_BMM)
rm(index_OU)
rm(index_OUM)
rm(df_BMM)
rm(df_OUM)
rm(Randomized_BMM_tree_added_parameter2)
rm(Randomized_BMM_tree_added_parameter3)
rm(Randomized_OUM_tree_added_parameter2)
rm(Randomized_OUM_tree_added_parameter3)
rm(Randomized_Tau_BMM_tree_added_parameter2)
rm(Randomized_Tau_BMM_tree_added_parameter3)
rm(Randomized_Tau_OUM_tree_added_parameter2)
rm(Randomized_Tau_OUM_tree_added_parameter3)
#save.image("Model_fitting_TMRR.Rdata")
################################## Baysian model fitting to identify multi peak OU or OUM trees #################
## We used bayou R package for this purpose
## We consider a strict posterior probability cutoff of >= 0.7 to identify regime, i.e. θ shift
## Considering all OU Trees
count<-0
result_bayou_OU<-NULL
result_bayou_OU<-bind_rows(lapply(Tree_OU,bayou_analysis))
## Considering OUM Trees
count<-0
result_bayou_OUM<-NULL
result_bayou_OUM<-bind_rows(lapply(Tree_OUM,bayou_analysis))
##################### Optima shift analyses on 370 OUM trees ###################
## Analysis considering all branches of the OU trees
result_bayou_OUM_new<-result_bayou_OUM[result_bayou_OUM$pp >= 0.7,]
length(unique(result_bayou_OUM_new$index)) ## 206 out of 370 (55.7%)
Baysian_index<-unique(result_bayou_OUM_new$index)
Trees_OUM_new<-Tree_OUM[c(Baysian_index)]
rm(Baysian_index)
## To obtain total numbers of events
table_OUM_0.7pp<-NULL
for(i in 1:length(Trees_OUM_new))
{
table_OUM_0.7pp<-bind_rows(table_OUM_0.7pp,tibble(index=i,
n_speciation=Trees_OUM_new[[i]]@phylo$n_speciation,
n_duplication_young=Trees_OUM_new[[i]]@phylo$n_duplication_young,
n_duplication_old=Trees_OUM_new[[i]]@phylo$n_duplication_old))
}
rm(i)
sum(table_OUM_0.7pp$n_speciation) ## 2779 speciation events
sum(table_OUM_0.7pp$n_duplication_young) ## 486 young duplication events
sum(table_OUM_0.7pp$n_duplication_old) ## 548 old duplication events
## Regime shift analyses considering the whole tree
OUM_all<-proportion_calculation(Tree_OUM,"all",result_bayou_OUM_new)
median(na.omit(OUM_all$shift_prob_dup)) #0.125
median(na.omit(OUM_all$shift_prob_spe)) #0
mean(na.omit(OUM_all$shift_prob_dup)) #0.509
mean(na.omit(OUM_all$shift_prob_spe)) #0.232
pvalue_regime_shift_all_OUM<- wilcox.test(OUM_all$shift_prob_spe,OUM_all$shift_prob_dup,paired = T)$p.value #p-value = 1.45e-20
## Regime shift analysis considering young duplication and speciation branches of the OUM trees
result_bayoum1<-result_bayou_OUM_new[result_bayou_OUM_new$node_age<=296,]
OUM_young<-proportion_calculation(Tree_OUM,"young",result_bayoum1)
median(na.omit(OUM_young$shift_prob_dup)) #0.083
median(na.omit(OUM_young$shift_prob_spe)) #0.028
mean(na.omit(OUM_young$shift_prob_dup)) #0.138
mean(na.omit(OUM_young$shift_prob_spe)) #0.05
pvalue_regime_shift_OUM_young<- wilcox.test(OUM_young$shift_prob_spe,OUM_young$shift_prob_dup,paired = T)$p.value #p-value = 8.26e-4
## Regime shift analysis considering old duplication and speciation branches of the OUM trees
result_bayoum2<-NULL
result_bayou_OUM_spe<-result_bayou_OUM_new[result_bayou_OUM_new$Event=="Speciation",]
result_bayou_OUM_dup_old<-result_bayou_OUM_new[result_bayou_OUM_new$Event=="Duplication" & result_bayou_OUM_new$node_age > 296,]
result_bayoum2<-rbind(result_bayou_OUM_spe,result_bayou_OUM_dup_old)
OUM_old<-proportion_calculation(Tree_OUM,"old",result_bayoum2)
median(na.omit(OUM_old$shift_prob_dup)) #0.167
median(na.omit(OUM_old$shift_prob_spe)) #0
mean(na.omit(OUM_old$shift_prob_dup)) #0.173
mean(na.omit(OUM_old$shift_prob_spe)) #0.028
pvalue_regime_shift_OUM_old<- wilcox.test(OUM_old$shift_prob_spe,OUM_old$shift_prob_dup,paired = T)$p.value #p-value = 7.26e-40
## Calculating the shift rates/My for OUM trees
## Step 1: Estimating length of the branch where optimum shift was detected
for(i in 1:nrow(result_bayou_OUM_new))
{
tre<-NULL
index<-NULL
edgelength<-NULL
index<-result_bayou_OUM_new$index[i]
tre<-Tree_OUM[[index]]@phylo
result_bayou_OUM_new$edgelength[i]<-tre$edge.length[which(tre$edge[,1]==result_bayou_OUM_new$node1[i] & tre$edge[,2]==result_bayou_OUM_new$node2[i])]
}
## Step 2: Based on the relative location provided by Bayesian approach, we estimated the time when optimum shifts took place
result_bayou_OUM_new$shift_age<-result_bayou_OUM_new$node_age-(result_bayou_OUM_new$edgelength*result_bayou_OUM_new$rel.location)
## Shift rates calculation
result_bayou_OUM_new$shifts_per_my<-1/result_bayou_OUM_new$shift_age
## For young duplicates
result_bayou_OUM_young<-result_bayou_OUM_new[result_bayou_OUM_new$node_age <= 296,]
optima_shifts_per_my_young_duplicates_OUM<-result_bayou_OUM_young$shifts_per_my[which(result_bayou_OUM_young$Event=="Duplication")]
optima_shifts_per_my_speciation_OUM<- result_bayou_OUM_young$shifts_per_my[which(result_bayou_OUM_young$Event=="Speciation")]
median(optima_shifts_per_my_young_duplicates_OUM) #0.032
median(optima_shifts_per_my_speciation_OUM) ##0.0121
wilcox.test(optima_shifts_per_my_young_duplicates_OUM,optima_shifts_per_my_speciation_OUM) #p-value = 2.341e-11
## For old duplicates
result_bayou_OUM_old<-NULL
result_bayou_OUM_old<-result_bayou_OUM_new[result_bayou_OUM_new$Event=="Speciation",]
result_bayou_OUM_old<-rbind(result_bayou_OUM_old,result_bayou_OUM_new[result_bayou_OUM_new$Event=="Duplication" & result_bayou_OUM_new$node_age > 296,])
result_bayou_OUM_old<-na.omit(result_bayou_OUM_old)
optima_shifts_per_my_old_duplicates_OUM<-result_bayou_OUM_old$shifts_per_my[which(result_bayou_OUM_old$Event=="Duplication")]
optima_shifts_per_my_speciation_OUM<- result_bayou_OUM_old$shifts_per_my[which(result_bayou_OUM_old$Event=="Speciation")]
median(optima_shifts_per_my_old_duplicates_OUM) #0.0025
median(optima_shifts_per_my_speciation_OUM) ##0.012
wilcox.test(optima_shifts_per_my_old_duplicates_OUM,optima_shifts_per_my_speciation_OUM) #p-value < 2.2e-16
##################### Optima shift analyses on 3244 OU (OU1+OUM) trees #############
## Analysis considering all branches of the OU trees
result_bayou_OU_new<-result_bayou_OU[result_bayou_OU$pp >= 0.7,]
## Unique OU ID with pp > 0.1
length(unique(result_bayou_OU$index)) ## 2676 out of 3244 (82.49%)
## Unique OU ID with pp >= 0.7
length(unique(result_bayou_OU_new$index)) ## 1101 out of 3244 (33.94%)
Multioptima_Baysian_index<-unique(result_bayou_OU_new$index)
Trees_OUM_Baysian<-Tree_OU[c(Multioptima_Baysian_index)]
rm(Multioptima_Baysian_index)
## To obtain total numbers of events
table_Baysian_OU<-NULL
for(i in 1:length(Trees_OUM_Baysian))
{
table_Baysian_OU<-bind_rows(table_Baysian_OU,tibble(index=i,
n_speciation=Trees_OUM_Baysian[[i]]@phylo$n_speciation,
n_duplication_young=Trees_OUM_Baysian[[i]]@phylo$n_duplication_young,
n_duplication_old=Trees_OUM_Baysian[[i]]@phylo$n_duplication_old))
}
rm(i)
sum(table_Baysian_OU$n_speciation) ## 13824 speciation events
sum(table_Baysian_OU$n_duplication_young) ## 3027 young duplication events
sum(table_Baysian_OU$n_duplication_old) ## 2814 old duplication events
## Regime shift analyses considering all events of a tree
OU_all<-proportion_calculation(Tree_OU,"all",result_bayou_OU_new)
median(na.omit(OU_all$shift_prob_dup)) #0.1
median(na.omit(OU_all$shift_prob_spe)) #0.023
mean(na.omit(OU_all$shift_prob_dup)) #0.128
mean(na.omit(OU_all$shift_prob_spe)) #0.049
pvalue_regime_shift_all_OU<- wilcox.test(OU_all$shift_prob_spe,OU_all$shift_prob_dup,paired = T)$p.value #p-value = 7.56e-55
## Analysis considering young duplication and speciation branches of the OU trees
result_bayou1<-result_bayou_OU_new[result_bayou_OU_new$node_age<=296,]
OU_young<-proportion_calculation(Tree_OU,"young",result_bayou1)
median(na.omit(OU_young$shift_prob_dup)) #0.045
median(na.omit(OU_young$shift_prob_spe)) #0.031
mean(na.omit(OU_young$shift_prob_dup)) #0.147
mean(na.omit(OU_young$shift_prob_spe)) #0.066
pvalue_regime_shift_OU_young<- wilcox.test(OU_young$shift_prob_spe,OU_young$shift_prob_dup,paired = T)$p.value #p-value = 3.43e-12
## Analysis considering old duplication and speciation branches of the OU trees
result_bayou2<-NULL
result_bayou_OU_spe<-result_bayou_OU_new[result_bayou_OU_new$Event=="Speciation",]
result_bayou_OU_dup_old<-result_bayou_OU_new[result_bayou_OU_new$Event=="Duplication" & result_bayou_OU_new$node_age > 296,]
result_bayou2<-rbind(result_bayou_OU_spe,result_bayou_OU_dup_old)
OU_old<-proportion_calculation(Tree_OU,"old",result_bayou2)
median(na.omit(OU_old$shift_prob_dup)) #0.1
median(na.omit(OU_old$shift_prob_spe)) #0.026
mean(na.omit(OU_old$shift_prob_dup)) #0.119
mean(na.omit(OU_old$shift_prob_spe)) #0.039
pvalue_regime_shift_OU_old<- wilcox.test(OU_old$shift_prob_spe,OU_old$shift_prob_dup,paired = T)$p.value #p-value = 7.26e-40
## Calculating the regime shift rates/My
## Step 1: Estimating length of the branch where optimum shift was detected
for(i in 1:nrow(result_bayou_OU_new))
{
tre<-NULL
index<-NULL
edgelength<-NULL
index<-result_bayou_OU_new$index[i]
tre<-Tree_OU[[index]]@phylo
result_bayou_OU_new$edgelength[i]<-tre$edge.length[which(tre$edge[,1]==result_bayou_OU_new$node1[i] & tre$edge[,2]==result_bayou_OU_new$node2[i])]
}
## Step 2: Based on the relative location provided by Bayesian approach, we estimated the time when optimum shifts took place
result_bayou_OU_new$shift_age<-result_bayou_OU_new$node_age-(result_bayou_OU_new$edgelength*result_bayou_OU_new$rel.location)
## Rate calculations
result_bayou_OU_new$shifts_per_my<-1/result_bayou_OU_new$shift_age
## For young duplicates
result_bayou_OU_young<-result_bayou_OU_new[result_bayou_OU_new$node_age <= 296,]
optima_shifts_per_my_young_duplicates<-result_bayou_OU_young$shifts_per_my[which(result_bayou_OU_young$Event=="Duplication")]
optima_shifts_per_my_speciation<- result_bayou_OU_young$shifts_per_my[which(result_bayou_OU_young$Event=="Speciation")]
median(optima_shifts_per_my_young_duplicates) #0.0309
median(optima_shifts_per_my_speciation) ##0.0129
wilcox.test(optima_shifts_per_my_young_duplicates,optima_shifts_per_my_speciation) #p-value = 2.341e-11
## For old duplicates
result_bayou_OU_old<-NULL
result_bayou_OU_old<-result_bayou_OU_new[result_bayou_OU_new$Event=="Speciation",]
result_bayou_OU_old<-rbind(result_bayou_OU_old,result_bayou_OU_new[result_bayou_OU_new$Event=="Duplication" & result_bayou_OU_new$node_age > 296,])
result_bayou_OU_old<-na.omit(result_bayou_OU_old)
optima_shifts_per_my_old_duplicates<-result_bayou_OU_old$shifts_per_my[which(result_bayou_OU_old$Event=="Duplication")]
optima_shifts_per_my_speciation<- result_bayou_OU_old$shifts_per_my[which(result_bayou_OU_old$Event=="Speciation")]
median(optima_shifts_per_my_old_duplicates) #0.0023
median(optima_shifts_per_my_speciation) ##0.0129
wilcox.test(optima_shifts_per_my_old_duplicates,optima_shifts_per_my_speciation) #p-value < 2.2e-16
duplication_shifts_per_my<-result_bayou_OU_new$shifts_per_my[which(result_bayou_OU_new$Event=="Duplication")]
speciation_shifts_per_my<- result_bayou_OU_new$shifts_per_my[which(result_bayou_OU_new$Event=="Speciation")]
median(duplication_shifts_per_my) ## 0.0034
median(speciation_shifts_per_my) ## 0.013
wilcox.test(duplication_shifts_per_my,speciation_shifts_per_my)
#save.image("Model_fitting_TMRR.Rdata")
## To identify trees for which OU1 was the preferred model by Maximum likelihood framework
df<-NULL
for(i in 1:nrow(result_bayou_OU_new))
{
index<-NULL
index<-result_bayou_OU_new$index[i]
if(Tree_OU[[index]]@phylo$best_fit_model=="OU1")
{
df<-append(df,index)
}
}