-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBlodgettProject_16Scode.R
1648 lines (1435 loc) · 76.2 KB
/
BlodgettProject_16Scode.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
library(decontam)
library(dada2)
library(phyloseq)
library(ggplot2)
library(data.table)
library(tidyverse)
library(vegan)
library(reshape2)
####
##
#
### SOME USEFUL/IMPORTANT DATA OBJECTS
#
# phyloseq object before decontam:
ps
# phyloseq object output from decontam:
ps2
# complete taxonomy table extracted from decontam output:
head(tax.df)
# complete sequence table extracted from decontam output:
head(seq.df, 2)
# complete OTU table extracted from decontam output (rownames = seqID):
otu.df[1:5,1:5]
# complete OTU table extracted from decontam output (column1 = seqID):
otu.df.rownames[1:5,1:5]
# transposed OTU table:
otu.dft[1:5,1:5]
# OTU table with taxonomic classes appended to the end of the table
OTUtax.df[1:5, 1:5]
OTUtax.df[1:5, 662:672]
# OTUtax table, but with obscure phyla removed
OTUtax.df.sub[1:5, 662:672]
# Metadata
head(MetadataWithReads)
#######################################################################################
#### Package installation and Overview Summary of Pipeline ####
###############################################################
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("dada2")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ShortRead")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Biostrings")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("decontam")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("phyloseq")
#################################################
#### CREATE PHYLOSEQ OBJECT and RUN DECONTAM ####
#################################################
##
##
# https://benjjneb.github.io/decontam/vignettes/decontam_intro.html#identifying-contaminants-in-marker-gene-and-metagenomics-data
##
##
library(phyloseq)
library(ggplot2)
library(decontam)
library(data.table)
library(tidyverse)
library(vegan)
## MASTER METADATA TABLE
SampleMetadata <- fread("~/Desktop/Box Sync/All16S_Processed/SampleMetadata_Blodgett_UPDATED3.csv")
head(SampleMetadata)
## WARNING -- the sequencing facility changed some of our sample names...
## here's how to fix it so that our Metadata Table matches the seq facility output:
SampleMetadata$SeqID <- gsub("rp", "REP", SampleMetadata$SeqID) #find and replace "rep" with "rp" to match sequencing facility output...
SampleMetadata$SeqID <- gsub("p", "P", SampleMetadata$SeqID) #find and replace "p" with "nothing"P" to match sequencing facility output...
## CORRECT DATE SNAFU! ("replace any '8-Oct-20' with '8-Oct-19'")
SampleMetadata$Date <- gsub("8-Oct-20", "8-Oct-19", SampleMetadata$Date)
## CORRECT POOLED LABEL SNAFU! ("replace any 'p1-5' with 'p1-3'")
SampleMetadata$depth <- gsub("p1-5", "p1-3", SampleMetadata$depth)
write.csv(SampleMetadata, "~/Desktop/Blodgett Data & Manuscript/Blodgett_16S_Comm/SampleMetadata_Blodgett_UPDATED.csv")
#NOTE that there were a handful more edits made in Excel
#USE THIS TABLE:
SampleMetadata <- fread("~/Desktop/Box Sync/All16S_Processed/SampleMetadata_Blodgett_UPDATED3.csv")
head(SampleMetadata)
##16S Table, Fixed
SampleMetadata <- fread("~/Desktop/Box Sync/Blodgett_16S_Comm/16S_SampleMetadata.csv")
SampleMetadata <- SampleMetadata[-c(651:nrow(SampleMetadata)),]
SampleMetadata <- column_to_rownames(SampleMetadata, var = "SeqID")
head(SampleMetadata)
class(SampleMetadata)
## decontam uses dada2 outputs converted into phyloseq objects, and phyloseq requires OTU and Taxa tables as matrices
#
# Load OTU table:
seqtab.nochim <- as.matrix(read.delim("~/Desktop/Box Sync/All16S_Processed/QIIMEobjects/exported-feature-table/feature-table.tsv", sep = "\t", header = TRUE, skip = 1, row.names = 1))
seqtab.nochim[1:5,1:2] #since there's a million columns, this is better than head()
seqtab.nochim.t <- t(seqtab.nochim)
seqtab.nochim.t[1:5,1:2]
# Load Taxonomy Table:
taxa <- as.matrix(fread("~/Desktop/Box Sync/All16S_Processed/QIIMEobjects/TaxonomySep.csv",
header = TRUE), rownames=1)
head(taxa)
# Extract the SeqIDs from the OTUtable
samplenames <- data.table(SeqID = row.names(seqtab.nochim.t))
# add read counts per sample to the Sample Metadata Table.. (reads output by DADA2 in the post-chimera-removal sanity check)
reads <- fread("~/Desktop/Box Sync/All16S_Processed/QIIMEobjects/stats-metadata_3C.csv")
head(reads)
colnames(reads) <- c("SeqID", "dada2_reads", "nochim_reads")
reads <- mutate(reads, SeqID = str_replace_all(SeqID, "-", "."))
class(reads$nochim_reads)
class(reads$dada2_reads)
reads$nochim_reads <- as.numeric(reads$nochim_reads)
max(reads$nochim_reads)
min(reads$nochim_reads)
# subset metadata table, add reads counts, and create table for phyloseq
dat1 <- merge(samplenames, SampleMetadata, by="SeqID", all.x=TRUE)
dat2 <- merge(dat1, reads, by="SeqID", all.x=TRUE)
samp16S <- column_to_rownames(dat2, var="SeqID") #rownames = SeqID will be required for phyloseq
test.ps <- phyloseq(sample_data(samp16S))
#plot the number of reads by sample to get an general idea of what the data look like
ggplot(data=dat2, aes(x=SeqID, y=nochim_reads, color=Who)) +
geom_point() +
scale_color_manual(values = c("#00ff95", "#ff9500", "#9500ff", "#ff006a")) +
scale_y_continuous(expand= c(0,0), breaks=seq(0, 130000, 10000), limits=c(0, 130000)) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))
ggplot(data=dat2, aes(x=SeqID, y=nochim_reads, color=SamplePool)) +
geom_point() +
scale_color_manual(values = c("#00ff95", "#ff9500", "#9500ff")) +
scale_y_continuous(expand= c(0,0), breaks=seq(0, 130000, 10000), limits=c(0, 130000)) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=1))
##
# CREATE PHYLOSEQ OBJECT
ps <- phyloseq(otu_table(seqtab.nochim.t, taxa_are_rows=FALSE), #matrix (rownames = SeqIDs, colnames = ASVs, values = abundance of each ASV in each SeqID)
tax_table(taxa), #matrix (rownames = ASVs, colnames = taxonomic levels, values = taxonomic classification for each ASV)
sample_data(samp16S)) #data.frame, (rownames = SeqIDs, colnames & values = additional info for each SeqID)
# check that nothing catastrophic happened when creating the phyloseq object, table dimensions should match:
dim(tax_table(ps)) #phyloseq object
dim(taxa) #original
(otu_table(ps))[1:5, 1:5] #phyloseq object
dim(seqtab.nochim) #original
dim(sample_data(ps)) #phyloseq object
head(samp16S) #original
head(tax_table(ps))
otu <- otu_table(ps)
class(otu)
otu.t <- t(otu)
otu.t[1:5, 1:5]
## ASVs are currently identified by their DNA sequence, which is cumbersome
## Move this DNA sequence to the refseq slot of the phyloseq object
## and give each ASV a simpler name like ASV1, ASV2, etc.
dna <- Biostrings::DNAStringSet(taxa_names(ps2copy))
names(dna) <- taxa_names(ps2copy)
ps <- merge_phyloseq(ps, dna)
taxa_names(ps) <- paste0("OTU", seq(ntaxa(ps)))
ps
# To run decontam, we will need a column ("is.neg") in the Metadata Table;
# control samples are indicated as "TRUE" and all other samples are "FALSE"
# Create the is.neg column:
sample_data(ps)$is.neg <- sample_data(ps)$Who == "Control"
head(sample_data(ps)) #check that it looks correct
view(sample_data(ps))
#
## RUN DECONTAM!
# outputs a table of stats for all taxa and decontam's decision about whether or not each taxon is a contaminant
# play around with changing the "threshold" value..
contamdf.prev <- isContaminant(ps, method="prevalence", neg="is.neg", threshold = 0.1)
# Summary table: how many true contaminants were identified?
table(contamdf.prev$contaminant)
#tresh0.1 = 190contaminants, tresh0.05 = 93contaminants
#for 16S, tresh0.5 = 103contaminants, tresh0.1 = 361contaminants, tresh0.05 = 34contaminants
# plot a histogram of the # of ASVs (y-axis) for each decontam score (a.k.a. P-value) on the x-axis
# ASVs are rows in the contamdf.prev object
ggplot(contamdf.prev, aes(x=p)) +
geom_histogram(binwidth=0.01, fill="purple")+
scale_x_continuous(breaks=seq(0, 1, 0.1))
#there isn't much of a difference between 0 and 0.55...
#moving forward with 0.1, since that's the recommended threshold
# ...though I could concievably increase the threshold substantially...
# thresh0.5 = 746contaminants (14470 non-contminants)
### NOTES!
## "method=prevalence" means contaminants are identified by increased prevalence in negative controls vs. samples
# default: threshold = 0.1 ...defines the threshold for deciding what is more prevalent in the negative controls than the soil samples
# default: detailed = TRUE ...returns a data.frame with diagnostic info on the contamination decision
### Some notes about the threshold argument from Davis et al 2018:
## For P=0.5, ASVs would be classified as contaminants if present in a higher fraction of controls than samples
## The P-value is the result of 2x2 chi-squared test (many samples), or Fisher's Exact Test (few samples)
## this P-value is also refered to as a "score" or "decontam score"
## Each ASV is assigned a P-value/score
## In general, the threshold P value should be less-than-or-equal-to 0.5
## higher scores = likely not a contaminant
unique(sample_data(ps)$Who)
sum(sample_data(ps)$Who == "Control") #46 #for 16S = 42
sum(sample_data(ps)$Who == "Neemonika") #325 #for 16S = 316
sum(sample_data(ps)$Who == "Neemonika", sample_data(ps)$Who =="Phillip", sample_data(ps)$Who =="PhillipPool") #618 ...618+46=664 #for 16S = 608
# Make phyloseq object of presence-absence in negative controls and true samples
ps.pa <- transform_sample_counts(ps, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Who == "Control", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Who == c("Phillip", "PhillipPool", "Neemonika"), ps.pa)
#Warning: In sample_data(ps.pa)$Who == c("Phillip", "PhillipPool", "Neemonika") : "longer object length is not a multiple of shorter object length"
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.prev$contaminant)
# Plot ASV presence in negative controls vs. positive samples and color by whether or not they were ID's as a contaminant
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant))+
geom_point()+
#geom_abline(intercept = 0, slope = 1)+ #add a line showing the threshold for equal representation in controls and samples
xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")+
ggtitle("Threshold = 0.1")
# REMOVE CONTAMINANTS FROM DATASET
contam <- rownames_to_column(contamdf.prev, var = "rowname") #move rownames (ASVs) to a column that data.table can work with
contam.dt <- data.table(contam) #convert to a data.table
badTaxa <- contam.dt[contaminant == "TRUE", rowname] #use data.table to generate a list of contaminant ASVs
goodTaxa <- setdiff(taxa_names(ps), badTaxa) #list of all ASVs minus the contaminant ASVs
ps2 <- prune_taxa(goodTaxa, ps) #create a new phyloseq object that only contains the good (non-contaminant) ASVs!
#check out the difference:
ps #15216 taxa #16S = 95178
ps2 #15026 taxa #16S = 94817
95178 - 94817 # =361, which is equivalent to the number of taxa that decontam identified as bad! this worked! hooray!
########################################
##### ORDINATION PLOTS & PERMANOVA #####
#########################################
library(vegan)
library(phyloseq)
library(ggplot2)
library(data.table)
## NMDS represents as well as possible the ordering relationships among objects in a small number of axes
# NMDS does not preserve the exact dissimilarities among objects
# The user sets the number of axes,
# the the NMDS algorithm fits the data into the specified number or axes
# Ouput stress value describes the fit of the data to the axes
# The goal is to minimize stress (different number of axes will change stress)
# a random R ecology blog says stress >0.3 is bad, stress <0.01 great
#...it seems the default for phyloseq is 2 axes?
##
# PCA assumes data are normally distributed, but is more infomative than NMDS because the axes are meaningful
# The axes are meaningful in PCA because the exact dissimilarities among objects is maintained,
# and there can be as many (or as few) different axes as are needed to account for all the variation in the data
# RDA is specific type of PCA, in which a regression is preformed prior to the PCA:
# "In RDA, one can truly say that the axes explain or model (in the statistical sense) the variation of the..
# ..dependent matrix. Furthermore, a global hypothesis (H0) of absence of linear relationship between Y and X can..
# ..be tested in RDA; this is not the case in PCA"
# Running the rda() function without an environmental variable matrix is just a plain old PCA
# Hellinger Transformation can be used to normalize data prior to doing a PCA
# PERMANOVA can then be used to identify whether or not there is a significant difference between treatments
# View the entire summary of a PCA with the summary() command... like this:
pca <- rda(data)
summary(pca)
#### HELLLINGER-TRANSFORMATION + PCA
# subset data
ps.F240pp <- prune_samples(sample_data(ps2)$Date== "2020-02-17" &
sample_data(ps2)$Plot== "240" &
sample_data(ps2)$Who== "Phillip", ps2)
#ps.F240pp <- prune_samples(ps2, Who== "Phillip", Plot== "240", Date== "2/17/20")
ps.F240pp
ps2.F240pp <- prune_samples(sample_data(ps2)$Who== "Phillip", ps2)
unique (sample_data(ps.F240pp)$depth)
unique (sample_data(ps.F240pp)$Who)
##ps2.F240 <- prune_samples(sample_data(ps2)$Date== "17-Feb-20" & sample_data(ps2)$Plot== "240", ps2)
##ps2.F240 #54 samples, 7622 taxa
# copy phyloseq object for Hellinger Transformation
ps.F240pp.hel <- ps.F240pp
# Hellinger Transformation
otu_table(ps.F240pp.hel) <-otu_table(decostand(otu_table(ps.F240pp.hel), method = "hellinger"), taxa_are_rows=FALSE)
# PCA
ps.F240pp.hel.pca <- rda(otu_table(ps.F240pp.hel))
# Broken Stick Model
screeplot(ps.F240pp.hel.pca, bstick = TRUE,
npcs = length(ps.F240pp.hel.pca$CA$eig),
main = "Hellinger PCA - February (no burn control) comp240") #1500x600
# PCA plot by Rep
plot_ordination(ps.F240pp.hel, ps.F240pp.hel.pca, color="Rep")+
stat_ellipse(geom="polygon", aes(fill=Rep), alpha=0.2) +
geom_point(size=2) +
geom_text(label=sample_data(ps.F240pp.hel)$depth, color="black", nudge_x = 0.02, nudge_y = 0.02)+
ggtitle("Hellinger PCA - February (no burn control) comp240") #600x500
#PERMANOVA to test for a statistically significant differences between reps:
adonis(otu_table(ps.F240pp.hel) ~ sample_data(ps.F240pp)$Rep)
# PCA plot by depth
plot_ordination(ps.F240pp.hel, ps.F240pp.hel.pca, color="depth")+
geom_mark_hull(concavity = 5,expand=0,radius=0,aes(fill=depth))+
geom_point(size=2) +
theme_classic()+
scale_color_manual(breaks=c("1","2","3","4", "5", "10", "20", "p1-3", "p1-10"),
values=c("#ff3b9d", "#ff3b6c", "#ff3b3b", "#ff6c3b", "#ff9d3b", "#ffce3b", "#ceff3b", "#0077ee", "black", "pink"))+
scale_fill_manual(breaks=c("1","2","3","4", "5", "10", "20", "p1-3", "p1-10"),
values=c("#ff3b9d", "#ff3b6c", "#ff3b3b", "#ff6c3b", "#ff9d3b", "#ffce3b", "#ceff3b", "#0077ee", "black", "pink"))+
ggtitle("Hellinger PCA - February (no burn control) comp240") #600x500
#PERMANOVA to test for a statistically significant differences between reps:
adonis(otu_table(ps.F240pp.hel) ~ sample_data(ps.F240pp)$depth)
# subset data
ps.F321pp <- prune_samples(sample_data(ps2)$Date== "2020-02-17" &
sample_data(ps2)$Plot== "321west", ps2)
ps.F321pp #53 samples, 7622 taxa
unique (sample_data(ps.F321pp)$depth)
# copy phyloseq object for Hellinger Transformation
ps.F321pp.hel <- ps.F321pp
# Hellinger Transformation
otu_table(ps.F321pp.hel) <-otu_table(decostand(otu_table(ps.F321pp.hel), method = "hellinger"), taxa_are_rows=FALSE)
# PCA
ps.F321pp.hel.pca <- rda(otu_table(ps.F321pp.hel))
# Broken Stick Modle:
screeplot(ps.F321pp.hel.pca, bstick = TRUE,
npcs = length(ps.F321pp.hel.pca$CA$eig),
main = "Hellinger PCA - February (post-burn) comp321") #1500x600
# PCA plot by Rep:
plot_ordination(ps.F321pp.hel, ps.F321pp.hel.pca, color="Rep")+
stat_ellipse(geom="polygon", aes(fill=Rep), alpha=0.2) +
geom_point(size=2) +
geom_text(label=sample_data(ps.F321pp.hel)$depth, color="black", nudge_x = 0.02, nudge_y = 0.02)+
ggtitle("Hellinger PCA - February (post-burn) comp321") #600x500
# PERMANOVA to test for a statistically significant differences between reps:
adonis(otu_table(ps.F321pp.hel) ~ sample_data(ps.F321pp)$Rep)
# PCA plot by depth:
plot_ordination(ps.F321pp.hel, ps.F321pp.hel.pca, color="depth")+
geom_mark_hull(concavity = 5,expand=0,radius=0,aes(fill=depth))+
geom_point(size=2) +
theme_classic()+
scale_color_manual(breaks=c("1","2","3","4", "5", "10", "20", "p1-3", "p1-10"),
values=c("#ff3b9d", "#ff3b6c", "#ff3b3b", "#ff6c3b", "#ff9d3b", "#ffce3b", "#ceff3b", "#0077ee", "black", "pink", "purple"))+
scale_fill_manual(breaks=c("1","2","3","4", "5", "10", "20", "p1-3", "p1-10"),
values=c("#ff3b9d", "#ff3b6c", "#ff3b3b", "#ff6c3b", "#ff9d3b", "#ffce3b", "#ceff3b", "#0077ee", "black", "pink", "purple"))+
ggtitle("Hellinger PCA - February (post-burn) comp321") #600x500
### For Blodgett Data ###
ps.BS <- prune_samples(sample_data(ps2)$Plot== "240" &
sample_data(ps2)$Who== "Neemonika" |
sample_data(ps2)$Plot== "400" &
sample_data(ps2)$Who== "Neemonika" |
sample_data(ps2)$Plot== "321west" &
sample_data(ps2)$Who== "Neemonika" |
sample_data(ps2)$Plot== "321east" &
sample_data(ps2)$Who== "Neemonika", ps2)
ps.BS
unique (sample_data(ps.BS)$Who)
# copy phyloseq object for Hellinger Transformation
ps.BS.hel <- ps.BS
# Hellinger Transformation
otu_table(ps.BS.hel) <-otu_table(decostand(otu_table(ps.BS.hel), method = "hellinger"), taxa_are_rows=FALSE)
# PCA
ps.BS.hel.pca <- rda(otu_table(ps.BS.hel))
# Broken Stick Model
screeplot(ps.BS.hel.pca, bstick = TRUE,
npcs = length(ps.BS.hel.pca$CA$eig),
main = "Hellinger PCA - All Blodgett Samples") #1500x600
### Define the order in which the Plot variable should be plotted:
sample_data(ps.BS.hel)$Plot <- factor(sample_data(ps.BS.hel)$Plot, levels=c("240", "321west", "321east", "400"))
### PCA plot with ellipses:
library(ggplot2)
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="Plot")+
stat_ellipse(geom="polygon", aes(fill=Plot), alpha=0.05)+
#scale_color_gradient2(midpoint=6, low="green", mid="orange", high="blue")+ ##for plotting continuous variables!
scale_fill_manual(values=c("#009acd", "#00cd9a", "#ff7f01", "#ff0102"),
labels=c("1c", "2c", "lo", "hi"))+
scale_color_manual(values=c("#009acd", "#00cd9a", "#ff7f01", "#ff0102"),
labels=c("1c", "2c", "lo", "hi"))+
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Hellinger PCA - 16S")
### Export as .png or .tiff: 600x500
### Export as .pdf TBD
#PERMANOVA to test for a statistically significant differences between reps:
adonis(otu_table(ps.BS.hel) ~ sample_data(ps.BS)$Plot)
# PCA plot by depth
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="depth")+
stat_ellipse(geom="polygon", aes(fill=Plot), alpha=0.05)+
geom_point(size=2) +
scale_color_manual(breaks=c("0-3cm","3-6cm","10cm"),
values=c("#ff0102", "#0077ee", "#00cd9a","#ffce3b"))+
scale_fill_manual(breaks=c("0-3cm","3-6cm","10cm"),
values=c("#ff0102", "#0077ee", "#00cd9a", "#ffce3b"))+
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Depth") #600x500
#PERMANOVA to test for a statistically significant differences between reps:
adonis(otu_table(ps.BS.hel) ~ sample_data(ps.BS)$Rep)
# PCA plot by Fire
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="Fire")+
stat_ellipse(geom="polygon", aes(fill=Fire), alpha=0.05)+
#scale_color_gradient2(midpoint=6, low="green", mid="orange", high="blue")+ ##for plotting continuous variables!
scale_fill_manual(values=c("#00cd9a", "#ff0102"),
labels=c("Pre-Fire", "Post-Fire"))+
scale_color_manual(values=c("#00cd9a", "#ff0102"),
labels=c("Pre-Fire", "Post-Fire"))+
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Burned vs. Unburned")
#PERMANOVA
adonis(otu_table(ps.BS.hel) ~ sample_data(ps.BS)$Fire)
## Oval plots
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="Season")+
stat_ellipse(geom="polygon", aes(fill=Plot), alpha=0.05)+
#scale_color_gradient2(midpoint=6, low="green", mid="orange", high="blue")+ ##for plotting continuous variables!
scale_fill_manual(values=c("#009acd", "#00cd9a", "#ff7f01", "#ff0102"),
labels=c("1c", "2c", "lo", "hi"))+
scale_color_manual(values=c("#009acd", "#00cd9a", "#ff7f01", "#ff0102"),
labels=c("1c", "2c", "lo", "hi"))+
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Hellinger PCA - All Neemonika (3-6cm samples omitted)")
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="Season")+
stat_ellipse(geom="polygon", aes(fill=Season), alpha=0.2)+
geom_point(size=2) +
scale_color_manual(breaks=c("Autumn","Winter","Spring","Summer"),
values=c("#ff6c3b", "#0077ee", "#00cd9a", "#ff3b9d"))+
scale_fill_manual(breaks=c("Autumn","Winter","Spring","Summer"),
values=c("#ff6c3b", "#0077ee", "#00cd9a", "#ff3b9d"))+
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Season")
##Depth
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="depth")+
stat_ellipse(geom="polygon", aes(fill=depth), alpha=0.2)+
geom_point(size=2) +
theme_classic()+
scale_color_manual(breaks=c("0-3cm","3-6cm","10cm"),
values=c("#ff0102", "#0077ee", "#00cd9a","#ffce3b"))+
scale_fill_manual(breaks=c("0-3cm","3-6cm","10cm"),
values=c("#ff0102", "#0077ee", "#00cd9a", "#ffce3b"))+
ggtitle("Depth")
unique (sample_data(ps2.BS)$depth)
### PCA plot with gradient colors
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="CtempDailyAvg")+
#stat_ellipse(geom="polygon", aes(fill=pH), alpha=0.2)+
scale_color_gradient2(midpoint=10, low="green", mid="orange", high="blue")+
geom_point(size=2) +
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Avg. Daily Air Temp (C)") #600x500
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="Precip4wkSum_in")+
#stat_ellipse(geom="polygon", aes(fill=pH), alpha=0.2)+
scale_color_gradient2(midpoint=10, low="green", mid="orange", high="blue")+
geom_point(size=2) +
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Avg. Precip - 4wkSum") #600x500
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="pH")+
#stat_ellipse(geom="polygon", aes(fill=pH), alpha=0.2)+
#stat_ellipse(geom="polygon", aes(fill=Fire), alpha=0.2)+
scale_color_gradient2(midpoint=6, low="green", mid="orange", high="blue")+
geom_point(size=2) +
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("pH") #600x500
#permanova
adonis(otu_table(ps.BS.hel) ~ sample_data(ps2.Neemonika)$pH)
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="Precip4wkSum_in")+
#stat_ellipse(geom="polygon", aes(fill=Fire), alpha=0.2)+
scale_color_gradient2(midpoint=15, low="green", mid="orange", high="blue")+
geom_point(size=2) +
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Avg. Weekly Preciptation") #600x500
#permanova
adonis(otu_table(ps.BS.hel) ~ sample_data(ps2.Neemonika)$Precip4wkSum_in)
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="numdate")+
#stat_ellipse(geom="polygon", aes(fill=pH), alpha=0.2)+
scale_color_gradient2(midpoint=18000, low="green", mid="orange", high="blue")+
geom_point(size=2) +
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Blodgett Samples by Time") #600x500
#permanova
adonis(otu_table(ps.BS.hel) ~ sample_data(ps2.Neemonika)$numdate)
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="DaysSinceBurn")+
scale_color_gradient2(midpoint=100, low="green", mid="orange", high="blue")+
geom_point(size=2) +
theme_classic()+
theme(axis.text = element_text(color = "black", size=14))+
theme(axis.title = element_text(color = "black", size=14))+
theme(legend.text = element_text(size=14, color="black"))+
theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Time after Treatment") #600x500
#permanova
adonis(otu_table(ps.BS.hel) ~ sample_data(ps2.Neemonika)$DaysSinceBurn)
##
plot_ordination(ps.BS.hel, ps.BS.hel.pca, color="pH")+
#stat_ellipse(geom="polygon", aes(fill=Plot), alpha=0.05)+
scale_color_gradient2(midpoint=6, low="green", mid="orange", high="blue")+ ##for plotting continuous variables!
geom_point(size=4) +
#scale_fill_manual(values=c("#009acd", "#00cd9a", "#ff7f01", "#ff0102"),
# labels=c("1c", "2c", "lo", "hi"))+
#scale_color_manual(values=c("#009acd", "#00cd9a", "#ff7f01", "#ff0102"),
# labels=c("1c", "2c", "lo", "hi"))+
theme_classic()+
#theme(axis.text = element_text(color = "black", size=14))+
#theme(axis.title = element_text(color = "black", size=14))+
#theme(legend.text = element_text(size=14, color="black"))+
#theme(legend.title = element_text(size=14, face="bold", color="black"))+
ggtitle("Hellinger PCA - pH")
### Export as .png or .tiff: 600x500
### Export as .pdf TBD
### NUMDATES!
#
# Autumn2018 = 17816-17886 **321E burned on 17820
# Winter2019 = 17887-17976 **400 burned on 17898
# Spring2019 = 17977-18068
# Summer2019 = 18069-18160
# Autumn2019 = 18161-18251
# Winter2020 = 18252-18342 **321W burned on 18308
#
#
###################################
## Diversity Metrics across time ##
###################################
#
# subset ps2 (decontam output) for only Neemonika data,
# up until the day that 321W burned, and exclude comp380:
ps2.Neemonika <- prune_samples(sample_data(ps2)$Who == "Neemonika" & #only Neemonika data (no Phillip or Controls)
sample_data(ps2)$Plot != "380" &
sample_data(ps2)$numdate < 18308, ps2) #exclude dates on/after 321W burn
#ps2.Neemonika <- prune_samples(sample_data(ps2)$Who == "Neemonika" & #only Neemonika data (no Phillip or Controls)
# sample_data(ps2)$Plot != "380" &
# sample_data(ps2)$depth != "3-6cm" & #exclude comp380 data
# sample_data(ps2)$numdate < 18308, ps2) #exclude dates on/after 321W burn
# Extract OTU table from phyloseq into a data.frame:
otu.df <- as.data.frame(otu_table(ps2.Neemonika))
otu.df[1:5,1:5] # rownames=SeqIDs, colnames=ASVs
dim(otu.df) ##255 samples.
SampleMetadata <- fread("~/Desktop/Box Sync/Blodgett_16S_Comm/16S_SampleData_decontam_620.csv")
# Create Metadata tables for each Plot, which will be useful for subsetting the otu.df table!
Metadata321W <- SampleMetadata[Who =="Neemonika" & Plot == "321west" & numdate < 18308]
Metadata321E <- SampleMetadata[Who =="Neemonika" & Plot == "321east" & numdate < 18308]
Metadata400 <- SampleMetadata[Who =="Neemonika" & Plot == "400" & numdate < 18308]
Metadata240 <- SampleMetadata[Who =="Neemonika" & Plot == "240" & numdate < 18308]
MetadataBS <- SampleMetadata[Who =="Neemonika" & Plot != "380" & numdate < 18308]
head(MetadataBS)
dim(MetadataBS)
#
# Subset otu_table for only the rownames that match the SeqIDs in MetadataBS:
otu_tableBSasv <- otu.df[rownames(otu.df) %in% MetadataBS$SeqID, ]
otu_tableBSasv[1:10, 1:5] #all SeqIDs should be from comp400!
dim(otu_tableBSasv) #255 samples.
# Calculate diversity metric with the vegan package
ShanDivBS <- as.data.table(diversity(otu_tableBSasv, index="shannon")) #index options = shannon, simpson, or invsimpson
ShanDivBS[ ,Date:=MetadataBS$numdate]
ShanDivBS[ ,Plot:=MetadataBS$Plot]
ggplot(ShanDivBS, aes(x=numdate, y=V1, color=Plot))+
geom_point()+
geom_smooth(aes(fill=Plot))+
theme_minimal()+
theme(axis.text.x = element_text(hjust=1, vjust=0.5, angle=90, color = "black"))+
scale_x_continuous(breaks=seq(17890, 18310, 30), limits=c(17890, 18310))+
xlab("Time")+
ylab("Shannon Diversity Index")+
ggtitle("Blodgett 16S Community Shannon Diversity")
SimpDivBS <- as.data.table(diversity(otu_tableBSasv, index="simpson"))
SimpDivBS[ ,Date:=MetadataBS$numdate]
SimpDivBS[ ,Plot:=MetadataBS$Plot]
ggplot(SimpDivBS, aes(x=Date, y=V1, color=Plot))+
geom_point()+
geom_smooth(aes(fill=Plot))+
theme_minimal()+
theme(axis.text.x = element_text(hjust=1, vjust=0.5, angle=90, color = "black"))+
scale_x_continuous(breaks=seq(17890, 18310, 30), limits=c(17890, 18310))+
xlab("Time")+
ylab("Simpson Diversity Index")+
ggtitle("Blodgett 16S Community Simpson Diversity")
#calculate species RICHNESS
# Where ASV abundance value is greater than 0, give it a 1, then sum the 1's by sample:
otu_tableBSasv.rich <- apply(otu_tableBSasv > 0,1,sum)
otu_tableBSasv.rich.dt <- as.data.table(otu_tableBSasv.rich)
otu_tableBSasv.rich.dt[ ,Plot:=MetadataBS$Plot]
otu_tableBSasv.rich.dt[ ,Date:=MetadataBS$numdate]
names(otu_tableBSasv.rich.dt)[1] <- "Richness"
ggplot(otu_tableBSasv.rich.dt, aes(x=Date, y=Richness, color=Plot))+
geom_point()+
geom_smooth(aes(fill=Plot))+
theme_minimal()+
theme(axis.text.x = element_text(hjust=1, vjust=0.5, angle=90, color = "black"))+
scale_x_continuous(breaks=seq(17890, 18310, 30), limits=c(17890, 18310))+
xlab("Time")+
ylab("Richness")+
ggtitle("Blodgett 16S Community Richness")
#calculate species EVENNESS
otu_tableBSasv.even <- as.data.table(diversity(otu_tableBSasv, index="simpson"))/log(otu_tableBSasv.rich)
otu_tableBSasv.even[ ,Date:=MetadataBS$numdate]
otu_tableBSasv.even[ ,Plot:=MetadataBS$Plot]
names(otu_tableBSasv.even)[1] <- "Evenness"
ggplot(otu_tableBSasv.even, aes(x=Date, y=Evenness, color=Plot))+
geom_point()+
geom_smooth(aes(fill=Plot))+
theme_minimal()+
theme(axis.text.x = element_text(hjust=1, vjust=0.5, angle=90, color = "black"))+
scale_x_continuous(breaks=seq(17890, 18310, 30), limits=c(17890, 18310))+
xlab("Time")+
ylab("Evenness")+
ggtitle("Blodgett 16S Community Evenness")
##############################
####### TITAN Analysis #######
##############################
library(TITAN2)
# TITAN2 requires two inputs:
# (1) OTU table -- data.frame: rownames = samples, colnames=OTUs
View(glades.taxa) #example
# (2) Evironmental gradient -- data.frame: rownames = samples, single column gradient variable
View(glades.env) #example
########################################
####### COMBINED TITAN Analysis #######
#######################################
library(TITAN2)
# TITAN2 requires two inputs:
# (1) OTU table -- data.frame: rownames = samples, colnames=OTUs
View(glades.taxa) #example
# (2) Evironmental gradient -- data.frame: rownames = samples, single column gradient variable
View(glades.env) #example
SampleMetadata <- fread("~/Desktop/Box Sync/Blodgett_16S_Comm/16S_SampleData_decontam_620.csv")
SampleMetadataITS <- fread("~/Desktop/Box Sync/Blodgett_16S_Comm/ITS_Data/Blodgett_CommData_Decontam-selected/ITS_ps2_SampleData_decontamTITAN.csv")
SampleMetadata$SeqID <-gsub("\\..*","",SampleMetadata$SeqID)
SampleMetadataITS$SeqID <-gsub("\\-.*","",SampleMetadataITS$SeqID)
view(SampleMetadata)
view(SampleMetadataITS)
dim(SampleMetadata)
dim(SampleMetadataITS)
AllSampleMetadata <- cbind(SampleMetadata, SampleMetadataITS, by="SeqID")
#Change Variant IDs to OTUs
#taxa_names(ps2) <- paste0("OTU", seq(ntaxa(ps2)))
# Subset phyloseq object:
#ps2.Neemonika <- prune_samples(sample_data(ps2)$Who == "Neemonika" &
# sample_data(ps2)$Plot != "380" &
# sample_data(ps2)$numdate < 18308, ps2)
# Extract OTU table:
#otu.df <- as.data.frame(otu_table(ps2.Neemonika))
#otu.df[1:5,1:5]
#otu2.df <- as.data.frame(otu_table(ps3.Neemonika))
#otu2.df[1:5,1:5]
#Stringtable <- colSums(otu.df)
#OTUtable <- colSums(otu2.df)
#Stringtable.df <- as.data.frame(Stringtable)
#OTUtable.df <- as.data.frame(OTUtable)
#Stringtable.df <- rownames_to_column(Stringtable.df, var= "String")
#OTUtable.df <- rownames_to_column(OTUtable.df, var= "OTU")
#COMBO <- cbind(Stringtable.df, OTUtable.df)
#if(COMBO$Stringtable == COMBO$OTUtable)
#{
# print("Column A and B are identical")
#}
#identical(COMBO[['Stringtable']],COMBO[['OTUtable']])
#write.csv(COMBO, file = "~/Desktop/Box Sync/Blodgett_16S_Comm/16S_OTU_String_Combo.csv")
#Read ITS/16S OTU table:
otu <- fread("~/Desktop/Box Sync/Blodgett_16S_Comm/TITAN_16S+ITS/otu_table_16SITS.csv")
otu.df <- as.data.frame(otu)
otu.df$V1 <- NULL
colnames(otu.df)[1] <- "OTU"
otu.df[1:5,1:5]
dim(otu.df) #255 rows, 110139 columns
otu.df <- column_to_rownames(otu.df, var = "OTU")
#Read ITS/16S Taxa table
tax <- fread("~/Desktop/Box Sync/Blodgett_16S_Comm/TITAN_16S+ITS/tax_table_16SITS.csv", header = TRUE)
tax.df <- as.data.frame(tax)
tax.df[2:5, 1:8]
tax.df$V1 <- NULL
dim(tax.df) #110138 rows, 8 columns
colnames(tax.df)[1] <- "OTU"
rownames(tax.df) <- tax.df[,1]
tax.df$OTU <- NULL
#tax.df <- tax.df[,-1]
head(SampDat.dt)
rownames(SampDat.dt) <- SampDat.dt[,2]
ps <- phyloseq(otu_table(otu.dft, taxa_are_rows=TRUE), #matrix (rownames = SeqIDs, colnames = ASVs, values = abundance of each ASV in each SeqID)
tax_table(tax.df), #matrix (rownames = ASVs, colnames = taxonomic levels, values = taxonomic classification for each ASV)
sample_data(AllSampleMetadata)) #data.frame, (rownames = SeqIDs, colnames & values = additional info for each SeqID)
# Extract Taxonomy table:
#tax.df <- as.data.frame(tax_table(ps2.Neemonika))
#tax.df[1:5,1:5]
#transpose OTU table:
otu.dft <- as.data.frame(t(otu.df))
otu.dft[1:5,1:5]
# Merge taxa info onto OTU table by matching rownames
OTUtax <- as.data.table(merge(otu.dft, tax.df, by=0))
dim(OTUtax) #110138 rows, 264 columns
OTUtax[1:5,257:264] #taxonomy should be at the end of the table
# Create the .taxa table for TITAN; with Genus appened onto ASVs,
# which will make the TITAN output plots a little easier to look at...
# So the y-axis will be labeled like "Pyronema_ASV257" instead of just "ASV257"
# Note that you can choose whatever taxonomic level you want for this!
# I feel Genus is most informative for the Fungi,
# but maybe Phylum or a different level is more informative for bacteria... ?!
#
# Split the "Genus" column to get rid of the "g__" before every genus name
OTUtax[ ,c("JustGenus")] <- copy(OTUtax$Genus)
OTUtax$JustGenus <- gsub("_", "-", OTUtax$JustGenus) # replace all "_" with "-" in the JustGenus column
dim(OTUtax) #110138, 265
head(OTUtax)
OTUtax[1:5,257:265]
OTUtax[1:5, 1:5]
# Create column with GenusASV info (i.e. "Pyronema_ASV257")
OTUtax <- unite(OTUtax, GenusOTU, c(JustGenus, Row.names), sep="_", remove=FALSE)
# Massage table into the format that TITAN is expecting:
#remove excess columns (by number) so that all that remains are the sample columns and the GenusASV column
dim(OTUtax) #110138,266
OTUtax[1:5,258:266] #note which columns are non-numeric, taxonomy-related columns...
OTUtax[1:5,1:5] #note that GenusASV is column#1
OTUtax[,c(2,258:266)] <- list(NULL) #remove all taxonomy columns
dim(OTUtax) #110138, 256
OTUtax[1:5,250:256] #check to make sure taxonomy columns were successfully removed
OTUtax[1:5, 1:5] #check to make sure GenusASV column is still there
OTUtax <- column_to_rownames(OTUtax, var="GenusOTU") #move GenusASV to the rownames
#transpose the table
OTUtax.t <- as.data.frame(t(OTUtax))
OTUtax.t[1:5, 1:5] #check that rownames=SeqIDs and colnames=GenusASV
OTUtax.t.rn <- rownames_to_column(OTUtax.t, var="SeqID") #move rownames to a column
OTUtax.t.rn[1:5, 1:5] ## THIS is the table that should be used as the basis for .taxa argumens in TITAN!
#
#
#
# Run TITAN on each Plot separately!
#
Metadata321W <- AllSampleMetadata[Who =="Neemonika" & Plot == "321west" & numdate < 18308]
Metadata321E <- AllSampleMetadata[Who =="Neemonika" & Plot == "321east" & numdate < 18308]
Metadata400 <- AllSampleMetadata[Who =="Neemonika" & Plot == "400" & numdate < 18308]
Metadata240 <- AllSampleMetadata[Who =="Neemonika" & Plot == "240" & numdate < 18308]
MetadataBS <- AllSampleMetadata[Who =="Neemonika" & Plot != "380" & numdate < 18308]
########### comp400 TITAN!
#
# (1) OTU Table -- Subset OTUtax.t.rn table for only the column names that match the SeqIDs in Metadata400
BS400.taxa <- OTUtax.t.rn[OTUtax.t.rn$SeqID %in% Metadata400$SeqID,] #67 samples, 110139
BS400.taxa[1:5,1:5]
rownames(BS400.taxa) <- c() #explicitly assign nothing to the rownames, so that column_to_rownames has a blank slate to work with...
BS400.taxa <- column_to_rownames(BS400.taxa, var="SeqID")
dim(BS400.taxa) #67, 110138 before filtering / 31, 110138 omitting 3-6cm
# remove OTUs that occur in only 3 or fewer samples (TITAN requirement)
BS400.taxa <- BS400.taxa[colSums(BS400.taxa > 0) >= 3 ] # 6449 after filtering / 3151 filtered for omitted samples
# "BS400.taxa > 0" creates a TRUE/FALSE table for abundance values >0,
# colSums() adds up all the values in each column (TRUE = 1, FALSE = 0)
# subset the table for where their are more than 3 TRUE (non-zero) values!
dim(BS400.taxa)
# (2) Evironmental gradient -- data.frame: rownames = samples, single column gradient variable
# Subset the Metadata400 table to create the .env argument:
BS400.env <- Metadata400[ ,c("SeqID", "numdate")]
head(BS400.env)
BS400.env <- unique(BS400.env)
dim(BS400.env)
BS400.env <- column_to_rownames(BS400.env, var="SeqID")
###check that everything looks right before launching TITAN
# .env argument should be numeric:
class(BS400.env$numdate)
# if it's not numeric, tell it to be numeric:
BS400.env$numdate <- as.numeric(BS400.env$numdate)
# .env and .taxa tables should have equivallent rownames:
head(BS400.env)
BS400.taxa[1:5,1:5]
# both tables should have the same number of rows (first number)
dim(BS400.env)
dim(BS400.taxa)
# Run TITAN2 with default parameters:
# WARNING - it can take hours to run!
#example:: glades.titan <- titan(glades.env, glades.taxa)
BS400.titan <- titan(BS400.env, BS400.taxa)
# start @ 1110, end @
# start @ 2:59pm, end @ 4:28! after 500 permutations
# MAKE NOTE THE OUTPUTS! ...(so you don't have to run it again!)
#
# "100% occurrence detected 1 times (0.1% of taxa)"
#
# "Proportion of pure and reliable taxa = 0.0576"
##EXPLORE THE RESULTS OF TITAN!
#
# overview of what's contained within out TITAN object:
summary(BS400.titan)
# Each item within the TITAN object can be accessed as if it were a column
# ...for example: BS400.titan$sumz.cp
#
# Length Class Mode
#sppmax 24096 -none- numeric ### table of change points by taxa, dim = 1506 x 16
#sumz.cp 24 -none- numeric ### table of observed change points ("cp"), with selected quantiles, dim = 4 x 6
#env 80 -none- numeric ### input env table, dim = 80 x 1
#taxa 120480 -none- numeric ### input taxa table, dim = 80 x 1506
#envcls 71 -none- numeric ### "A vector of candidate partitions derived from subtracting ’minSplt’ from ’env’"
#srtEnv 80 -none- numeric ### sorted version of input env table
#ivzScores 427704 -none- numeric ### "matrix containing group membership, z scores, IndVals, and p values for each taxon at every candidate partition in ’envcls’"
#ivz 142 -none- numeric ### sum z+ and sum z- scores for each value in encvls
#ivz.f 142 -none- numeric ### Filtered sum z+ and sum z- scores for each value in encvls
#maxSumz 1000 -none- numeric ### "matrix of environmental values at sum(z-) and sum(z+) maxima across all bootstrap replicates
#maxFsumz 1000 -none- numeric ### "matrix of environmental values at filtered sum(z-) and sum(z+) maxima across all bootstrap replicates"
#metricArray 3012000 -none- numeric ### "An array of group membership, env change points, z scores, and p values for passing to ’plot.IVecdf’"
#arguments 10 -none- numeric ### arguments used in TITAN function call
# summary table of observed change points ("cp"), with selected quantiles around the cp
BS400.titan$sumz.cp
#output for ASVs:
# cp 0.05 0.10 0.50 0.90 0.95
#sumz- 17904 17904.0 17904.0 18006 18127.0 18127 ##negatively responding taxa, "17904" = Jan2019 = the first post-fire sample!
#sumz+ 18330 18267.0 18286.0 18305 18317.5 18330 ##positively responding taxa, "18330" = March2020
#fsumz- 18024 17988.0 17988.0 18024 18040.5 18057 ##filtered sumz-, "18024" = May2019
#fsumz+ 18305 18191.5 18191.5 18267 18286.0 18305 ##filtered sumz+, "18304" = Feb2020
#output for OTUs:
# cp 0.05 0.10 0.50 0.90 0.95
#sumz- 18006 17904.00 17904 18006 18127 18127.0
#sumz+ 18286 18071.50 18086 18206 18305 18305.0
#fsumz- 18006 17988.00 17988 18006 18024 18040.5
#fsumz+ 18267 18084.55 18127 18239 18267 18286.0
# Table of TITAN results for all taxa
# Zscores, IndVal scores, p-values, etc...
head(BS400.titan$sppmax)
dim(BS400.titan$sppmax) #6449x16 for combined data, 3151x16 for omitted
# SAVE THIS TABLE!
BS400.titan.dt <- as.data.table(BS400.titan$sppmax)
# Results table doesn't have informative rownames, so re-attach ASVs:
# transpose taxa table and then move rownames to the results table
BS400.taxa <- as.data.frame(t(BS400.taxa)) #BS.taxa was the input to TITAN that can also be seen as BS.titan$taxa
head(BS400.taxa)
BS400.taxa.rn <- rownames_to_column(BS400.taxa, var="OTU")
BS400.taxa.rn[1:5, 1:2]
# Paste OTUs onto the TITAN results table:
merge1 <- cbind(BS400.taxa.rn$OTU, BS400.titan.dt)
head(merge1)
names(merge1)[1] <- "GenusOTU"
# split the GenusASV column to make an ASV column that can be matched with the ASV column in tax.df
merge1[ , c("Genus", "OTU"):=tstrsplit(GenusOTU, "_", fixed=TRUE)]
#merge1[ ,c("OTU")] <- grep(pattern = "ASV*", merge1$GenusOTU)
tax.df[1:5, 1:5]
tax.df.rn <- rownames_to_column(tax.df, var="OTU")
tax.df.rn[1:5, 1:5]
# merge taxonomy table with results table:
BS400.titan.AllResults <- merge(merge1, tax.df.rn, by="OTU")
head(BS400.titan.AllResults)
# EXPORT RESULTS to your computer!
write.csv(BS400.titan.AllResults, "~/Desktop/Box Sync/Blodgett_16S_Comm/TITAN_16S+ITS_0-3cmONLY/TITAN400_ITS+16S.csv")
# filter the table for only the results that were ultimately plotted = where the value in the "filter" column is >0
BS400.titan.TopResults <- BS400.titan.AllResults[filter>0]
colnames(BS400.titan.TopResults)
# ienv.cp- environmental change point for each taxon based on IndVal maximum (used if imax = TRUE)
# zenv.cp- environmental change point for each taxon based on z maximum (default, imax = FALSE)
# freq- number of non-zero abundance values per taxon
# maxgrp- 1 if z- (negative response); 2 if z+ (positive response)
# IndVal-Dufrene and Legendre 1997 IndVal statistic, scaled 0-100%
# obsiv.prob- probability of an equal or larger IndVal from random permutation. **squishy! don't trust!
# zscore- IndVal z score
# 5%, 10%, 50%, 90%, 95%- change point quantiles among bootstrap replicates
# purity - proportion of replicates matching observed maxgrp assignment
# reliability - proportion of replicate obsiv.prob values < = 0.05
# z.median- median score magnitude across all bootstrap replicates
# filter- logical (if >0) indicating whether each taxa met purity and reliability criteria, value indicates maxgrp assignment.
#
#
#
#PLOT RESULTS!
#
# Summary plot of total z scores...
plot_sumz_density(BS400.titan)
# illustrate the filtered sumz's...
plot_sumz(BS400.titan, xmin = 17800, xmax = 18400, col1 = "blue", col2 = "orange")
#
#plot z-scores of indicator taxa for the +/- change points! NEW METHOD (colored distributions)
plot_taxa_ridges(BS400.titan, xlabel = "Time")
#plot plot z-scores of indicator taxa for the +/- change points! OLD METHOD (B&W circle+line "boxplots")
plot_taxa(BS400.titan, prob95 = TRUE, xlabel = "Time", xmin = 17800, xmax = 18400, leg.x = 0.1, leg.y = 10)
#plot Z-score with ggplot2!!