diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0747222 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +.Rhistory + diff --git a/DESCRIPTION b/DESCRIPTION index 80f506c..b5c025f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: biomehorizon -Title: Plot Microbiome Time Series -Version: 0.0.0.9000 +Title: Plot Microbiome Time Series +Version: 1.0.0.0000 Authors@R: person("Isaac", "Fink", email = "isaacfink21@gmail.com", role = c("aut", "cre")) -Description: This package provides a toolset for visualizing longitudinal microbiome data with a horizon - plot. A horizon plot provides a compact method to display change in the distribution of microbial taxa - over time. This method is valuable for highlighting changes in individual taxa, but more importantly, +Description: This package provides a toolset for visualizing longitudinal microbiome data with a horizon + plot. A horizon plot provides a compact method to display change in the distribution of microbial taxa + over time. This method is valuable for highlighting changes in individual taxa, but more importantly, emphasizes broad trends among groups of taxa. This package can also be used to visualize other types of longitudinal data. Depends: R (>= 3.5.2) @@ -16,8 +16,7 @@ Imports: magrittr Suggests: RColorBrewer -License: What license is it under? +License: MIT Encoding: UTF-8 LazyData: true -RoxygenNote: 6.1.1 - +RoxygenNote: 7.1.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d2299fb --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +Copyright 2021 Regents of the University of Minnesota + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +End license text. diff --git a/R/BiomeHorizon.R b/R/BiomeHorizon.R index aff5a7e..e7b9860 100644 --- a/R/BiomeHorizon.R +++ b/R/BiomeHorizon.R @@ -172,46 +172,47 @@ #' @examples #' # Pass just the OTU table to prepanel, and it will assume all samples belong #' # to the same subject. -#' prepanel(otusample) +#' prepanel(otusample = otusample_diet) #' #' # Supplement metadata and a subject name, and it will select samples from #' # just one subject (this is what you should do with more than one subject). -#' prepanel(otusample, metadatasample, subj="subject_1") +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01") #' #' # Pass taxonomydata to prepanel if you want to label facets by taxonomy #' # rather than by OTU ID. -#' prepanel(otusample, metadatasample, taxonomysample, subj="subject_1", -#' facetLabelsByTaxonomy=TRUE) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +#' taxonomydata = taxonomysample_diet, subj="MCTs01", facetLabelsByTaxonomy=TRUE) #' #' # OTU filtering using both a prevalence and an abundance standard (default) -#' prepanel(otusample, metadatasample, subj="subject_2", thresh_prevalence=75, -#' thresh_abundance=0.75) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +#' thresh_prevalence=75, thresh_abundance=0.75) #' #' # OTU filtering using just an abundance standard -#' prepanel(otusample, metadatasample, subj="subject_2", thresh_prevalence=NA, -#' thresh_abundance=0.75) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +#' thresh_prevalence=NA, thresh_abundance=0.75) #' #' # If an OTU's average abundance reaches a high enough threshold, override #' # other standards and include it in analysis -#' prepanel(otusample, metadatasample, subj="subject_2", thresh_prevalence=90, -#' thresh_abundance=0.75, thresh_abundance_override=1.5) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +#' thresh_prevalence=90, thresh_abundance=0.75, thresh_abundance_override=1.5) #' #' # Filter OTUs where >2% samples are NA values -#' prepanel(otusample, metadatasample, subj="subject_2", thresh_NA=2) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +#' thresh_NA=2) #' #' # You can also manually select OTUs by OTU ID -#' prepanel(otusample, metadatasample, subj="subject_2", -#' otulist=c("otu_1000","otu_1243","otu_1530","otu_6821","otu_7737")) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +#' otulist=c("taxon 1", "taxon 2", "taxon 10", "taxon 14")) #' #' # Manual selection can be used to specify the order OTUs will appear on #' # the horizon plot. For example, these two datasets have identical OTUs, but #' # they are ordered differently. -#' params <- prepanel(otusample, metadatasample, subj="subject_1", -#' thresh_prevalence=95, thresh_abundance=1.5) +#' params <- prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +#' subj="MCTs01", thresh_prevalence=95, thresh_abundance=1.5, +#' otulist=c("taxon 1", "taxon 2", "taxon 10", "taxon 14")) #' params[[1]]$otuid -#' params <- prepanel(otusample, metadatasample, subj="subject_1", -#' otulist=c("otu_2526","otu_1530", "otu_7737", "otu_6821", "otu_3773", -#' "otu_2457", "otu_1243", "otu_2378")) +#' params <- prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +#' subj="MCTs01", otulist=c("taxon 10", "taxon 2", "taxon 1", "taxon 14")) #' params[[1]]$otuid #' #' # The origin and band.thickness variables can be set to either a numeric @@ -219,11 +220,12 @@ #' # on its sample values. #' #' # Use a fixed origin of 5% for all OTU subpanels -#' prepanel(otusample, metadatasample, subj="subject_1", origin=5) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +#' subj="MCTs01", origin=5) #' #' # Evaluate a different origin for each OTU subpanel using a custom function -#' prepanel(otusample, metadatasample, subj="subject_1", -#' origin=function(y){mad(y, na.rm=TRUE)}) +#' prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +#' subj="MCTs01", origin=function(y){mad(y, na.rm=TRUE)}) #' #' @import dplyr #' @importFrom magrittr %>% @@ -462,9 +464,9 @@ prepanel <- function(otudata, metadata=NA, taxonomydata=NA, otudata <- otudata %>% dplyr::filter(otuid == singleVarOTU) } - # Covert otudata format for single variable analysis + # Covert otudata format for single variable analysis. Remove extra metadata columns if(!is.na(singleVarOTU)) { - samplenames <- metadata %>% dplyr::select(-collection_date) %>% dplyr::group_by(subject) %>% dplyr::mutate(row_id=1:dplyr::n()) %>% dplyr::ungroup() %>% tidyr::spread(subject,sample) %>% dplyr::select(-row_id) %>% t() %>% as.data.frame() %>% tibble::rowid_to_column("otuid") + samplenames <- metadata %>% dplyr::select(subject, sample) %>% dplyr::group_by(subject) %>% dplyr::mutate(row_id=1:dplyr::n()) %>% dplyr::ungroup() %>% tidyr::spread(subject,sample) %>% dplyr::select(-row_id) %>% t() %>% as.data.frame() %>% tibble::rowid_to_column("otuid") ids <- samplenames$otuid samplenames <- samplenames %>% dplyr::select(-otuid) otudata <- matrix(otudata[1,][c(as.matrix(samplenames))],nrow(samplenames)) %>% as.data.frame() %>% dplyr::mutate(otuid=ids) %>% dplyr::select(otuid,everything()) @@ -730,17 +732,18 @@ prepanel <- function(otudata, metadata=NA, taxonomydata=NA, #' #' @examples #' # Basic plot form. By default, samples are plotted next to each other. -#' plist <- prepanel(otusample, metadatasample, taxonomysample, subj = "subject_4") +#' plist <- prepanel(otudata = otusample_diet, metadata = metadatasample_diet, +#' taxonomysample = taxonomysample_diet, subj = "MCTs16") #' horizonplot(plist) #' #' # For irregularly spaced time series, you can "regularize" the data to create #' # an accurate timescale. #' -#' # Adjust data to regular time intervals each 100 days. This will interpolate -#' # new data points for each OTU at day = 1, 101, 201, etc. based on values +#' # Adjust data to regular time intervals each 1 day. This will interpolate +#' # new data points for each OTU at day = 1, 2, 3 etc. based on values #' # at previous and subsequent timepoints. -#' plist <- prepanel(otusample, metadatasample, taxonomysample, subj="subject_4", -#' regularInterval = 100) +#' plist <- prepanel(otudata = otusample_diet, metadata = metadatasample_diet, +#' subj = "MCTs16", regularInterval = 1) #' horizonplot(plist) #' #' # If the data has large gaps of time without samples, interpolating data @@ -750,14 +753,15 @@ prepanel <- function(otudata, metadata=NA, taxonomydata=NA, #' # data will be regularized separately on both sides of the break in two #' # different facets. #' -#' # Set maximum time without samples to 200 days -#' plist <- prepanel(otusample, metadatasample, taxonomysample, subj="subject_4", -#' regularInterval = 100, maxGap = 200) +#' # Set maximum time without samples to 75 days +#' plist <- prepanel(otudata = otusample_baboon, metadata = metadatasample_baboon, +#' subj = "Baboon_388", regularInterval = 25, maxGap = 75) #' horizonplot(plist) #' #' # Remove facets with less than 5 samples -#' plist <- prepanel(otusample, metadatasample, taxonomysample, subj="subject_4", -#' regularInterval = 100, maxGap = 200, minSamplesPerFacet = 5) +#' plist <- prepanel(otudata = otusample_baboon, metadata = metadatasample_baboon, +#' subj = "Baboon_388", regularInterval = 25, maxGap = 75, +#' minSamplesPerFacet = 5) #' horizonplot(plist) #' #' @import ggplot2 @@ -998,7 +1002,7 @@ horizonplot <- function(parameterList, aesthetics=horizonaes()) { geom_area(aes(x = as.numeric(day), y = value, fill=band), position="identity", color=col.outline) + scale_fill_manual(values=col.bands,breaks=names(col.bands)[c((2*nbands):(1+nbands),nbands:1)],labels=c(paste("+",nbands:1,sep=""),(-1):(-1*nbands))) + theme_bw() + - theme(axis.text.x=element_text(size=16), axis.text.y=element_blank(), axis.ticks.y=element_blank(), panel.grid=element_blank(), panel.border=element_rect(color=col.border), strip.text.y=element_text(angle=180), panel.spacing.y=unit(0, units="cm"), legend.position=legendPosition) + + theme(axis.text.x=element_text(size=16), axis.text.y=element_blank(), axis.ticks.y=element_blank(), panel.grid=element_blank(), panel.border=element_rect(color=col.border), strip.text.y.left=element_text(angle=0), panel.spacing.y=unit(0, units="cm"), legend.position=legendPosition) + scale_y_continuous(expand = c(0,0)) + scale_x_continuous(expand = c(0,0)) + # remove margins between plot and panel xlab(ifelse(is.na(timestamps), "Sample", "Day")) + ylab(element_blank()) @@ -1048,8 +1052,9 @@ horizonplot <- function(parameterList, aesthetics=horizonaes()) { #' horizon plot, but if you want to add other aesthetics not included in this #' function, you can do so by appending them to the horizon plot object using #' the \code{+} operator. e.g. to add a gray background in the plotting area:\cr -#' \code{horizonplot(prepanel(otusample, metadatasample, taxonomysample, -#' subj="subject_1")) + theme(panel.background = element_rect(fill="gray90"))} +#' \code{horizonplot(prepanel(otudata = otusample_diet, +#' metadata = metadatasample_diet, taxonomydata = taxonomysample_diet, +#' subj = "MCTs01")) + theme(panel.background = element_rect(fill="gray90"))} #' #' @param title character. The text for the title. #' @param subtitle character. The text for the subtitle, displayed below the title. @@ -1089,7 +1094,8 @@ horizonplot <- function(parameterList, aesthetics=horizonaes()) { #' to apply the aesthetics. #' #' @examples -#' plist <- prepanel(otusample, metadatasample, taxonomysample, subj = "subject_2") +#' plist <- prepanel(otudata = otusample_diet, metadata = metadatasample_diet, +#' taxonomydata = taxonomysample_diet, subj = "MCTs01") #' #' # By default, the function is called with no arguments to use default aesthetics #' horizonplot(plist, horizonaes()) diff --git a/R/metadatasample-data.R b/R/metadatasample-data.R deleted file mode 100644 index 791dd0f..0000000 --- a/R/metadatasample-data.R +++ /dev/null @@ -1,18 +0,0 @@ -#' Metadata on samples from OTU table -#' -#' Metadata providing subject names and collection dates of samples from the -#' OTU table. -#' -#' @docType data -#' -#' @usage data(metadatasample) -#' -#' @format A data frame with 461 rows and 3 variables: -#' \describe{ -#' \item{sample}{sample ID corresponding to a variable name from \code{otusample}} -#' \item{subject}{subject name} -#' \item{collection_date}{date the sample was collected, in the format yyyy-mm-dd} -#' } -#' -#' @keywords datasets -"metadatasample" diff --git a/R/metadatasample_baboon-data.R b/R/metadatasample_baboon-data.R new file mode 100644 index 0000000..f0340db --- /dev/null +++ b/R/metadatasample_baboon-data.R @@ -0,0 +1,22 @@ +#' Metadata on samples from OTU table of wild baboon data +#' +#' Metadata providing subject names and collection dates of samples from the +#' OTU table. +#' +#' @docType data +#' +#' @usage data(metadatasample_baboon) +#' +#' @format A data frame with 276 rows and 7 variables: +#' \describe{ +#' \item{subject}{subject name} +#' \item{sample}{sample ID corresponding to a variable name from \code{otusample_baboon}} +#' \item{collection_date}{number of days into the study the sample was collected} +#' \item{sex}{if subject was male or female} +#' \item{season}{if sample was collected in the wet or dry season} +#' \item{rain_month_mm}{amount of rainfall in mm for month prior to sample collection} +#' \item{diet_PC1}{a measure of dietary composition for a sample} +#' } +#' +#' @keywords datasets +"metadatasample_baboon" diff --git a/R/metadatasample_diet-data.R b/R/metadatasample_diet-data.R new file mode 100644 index 0000000..bf644fc --- /dev/null +++ b/R/metadatasample_diet-data.R @@ -0,0 +1,19 @@ +#' Metadata on samples from OTU (or other lowest taxonomic level) table of human diet data +#' +#' Metadata providing subject names and collection dates of samples from the +#' OTU table. +#' +#' @docType data +#' +#' @usage data(metadatasample_diet) +#' +#' @format A data frame with 483 rows and 4 variables: +#' \describe{ +#' \item{subject}{subject name in character format} +#' \item{sample}{sample ID in character format corresponding to a variable name from \code{otusample_diet}} +#' \item{collection_date}{number of days in numeric format into the study the sample was collected} +#' \item{supplement}{metadata variable in character format indicating if subject was given EVOO or MCT as a dietary supplement on days 10-17} +#' } +#' +#' @keywords datasets +"metadatasample_diet" diff --git a/R/otusample-data.R b/R/otusample-data.R deleted file mode 100644 index dbc1d3a..0000000 --- a/R/otusample-data.R +++ /dev/null @@ -1,16 +0,0 @@ -#' Sample OTU table -#' -#' Sample OTU table with 6 subjects. The first variable contains OTU IDs. Each -#' subsequent column represents a unique sample collected at a specific time -#' from one of six subjects' microbiomes, as well as two blanks. Entries -#' represent number of reads per sample per OTU, out of a total of 30,000 reads -#' for each sample. -#' -#' @docType data -#' -#' @usage data(otusample) -#' -#' @format A data frame with 8814 rows and 1783 variables. -#' -#' @keywords datasets -"otusample" diff --git a/R/otusample_baboon-data.R b/R/otusample_baboon-data.R new file mode 100644 index 0000000..6551f2f --- /dev/null +++ b/R/otusample_baboon-data.R @@ -0,0 +1,14 @@ +#' Sample OTU table +#' +#' Sample OTU-format table with 6 subjects. The first variable contains OTU IDs. +#' Each subsequent column represents a unique sample collected at a specific time +#' from one of 6 subjects' microbiomes. Entries represent number of reads per sample per OTU. +#' +#' @docType data +#' +#' @usage data(otusample_baboon) +#' +#' @format A data frame with 2922 rows and 277 variables. +#' +#' @keywords datasets +"otusample_baboon" diff --git a/R/otusample_diet-data.R b/R/otusample_diet-data.R new file mode 100644 index 0000000..ce27229 --- /dev/null +++ b/R/otusample_diet-data.R @@ -0,0 +1,16 @@ +#' Sample OTU table +#' +#' Sample OTU-format table with 34 subjects. Sequences are from metagenomic data, not 16S, +#' so the first variable contains unique IDs assigned to each microbial sequence rather than OTUs. +#' Each subsequent column represents a unique sample collected at a specific time +#' from one of 34 subjects' microbiomes. Entries represent number of reads per sample +#' per microbial taxon. +#' +#' @docType data +#' +#' @usage data(otusample_diet) +#' +#' @format A data frame with 4583 rows and 484 variables. +#' +#' @keywords datasets +"otusample_diet" diff --git a/R/taxonomysample-data.R b/R/taxonomysample-data.R deleted file mode 100644 index 89af986..0000000 --- a/R/taxonomysample-data.R +++ /dev/null @@ -1,16 +0,0 @@ -#' Taxonomy information for OTUs -#' -#' Taxonomy information for each OTU listed in the sample OTU table. The first -#' variable contains OTU IDs, as listed in \code{otusample}, and subsequent -#' columns provide taxonomic clasification up to Genus, or the most specific -#' level possible for a given OTU. OTUs that are classified more broadly have -#' \code{NA} values for narrower taxonomic levels that do not apply. -#' -#' @docType data -#' -#' @usage data(taxonomysample) -#' -#' @format A data frame with 8814 rows and 7 variables. -#' -#' @keywords datasets -"taxonomysample" diff --git a/R/taxonomysample_baboon-data.R b/R/taxonomysample_baboon-data.R new file mode 100644 index 0000000..18c5fff --- /dev/null +++ b/R/taxonomysample_baboon-data.R @@ -0,0 +1,16 @@ +#' Taxonomy information for OTUs +#' +#' Taxonomy information for each OTU ID listed in the sample OTU table. The first +#' variable contains OTU IDs, as listed in \code{otusample_baboon}, and subsequent +#' columns provide taxonomic clasification up to Genus, or the most specific +#' level possible for a given taxon. Taxo IDs that are classified more broadly have +#' \code{NA} values for narrower taxonomic levels that do not apply. +#' +#' @docType data +#' +#' @usage data(taxonomysample_baboon) +#' +#' @format A data frame with 2922 rows and 7 variables. +#' +#' @keywords datasets +"taxonomysample_baboon" diff --git a/R/taxonomysample_diet-data.R b/R/taxonomysample_diet-data.R new file mode 100644 index 0000000..5d5f3c8 --- /dev/null +++ b/R/taxonomysample_diet-data.R @@ -0,0 +1,16 @@ +#' Taxonomy information for OTUs +#' +#' Taxonomy information for each taxon ID listed in the sample OTU table. The first +#' variable contains taxon IDs, as listed in \code{otusample_diet}, and the subsequent +#' column provides taxonomic clasification up to Genus, or the most specific +#' level possible for a given taxon. Taxo IDs that are classified more broadly have +#' \code{NA} values for narrower taxonomic levels that do not apply. +#' +#' @docType data +#' +#' @usage data(taxonomysample_diet) +#' +#' @format A data frame with 4583 rows and 2 variables. +#' +#' @keywords datasets +"taxonomysample_diet" diff --git a/README.md b/README.md new file mode 100644 index 0000000..e2bfe02 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# BiomeHorizon + +This package provides a toolset for visualizing longitudinal microbiome data with a horizon plot. A horizon plot provides a compact method to display change in the distribution of microbial taxa over time. This method is valuable for highlighting changes in individual taxa, but more importantly, emphasizes broad trends among groups of taxa. This package can also be used to visualize other types of longitudinal data. + +Documentation available in the package, plus our [online tutorial](https://blekhmanlab.github.io/biomehorizon/). + +## Installation + +```r +install.packages("devtools") + +devtools::install_github("blekhmanlab/biomehorizon") +library(biomehorizon) +``` diff --git a/assets/pics/.DS_Store b/assets/pics/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/assets/pics/.DS_Store differ diff --git a/assets/pics/plot_basic.png b/assets/pics/plot_basic.png new file mode 100644 index 0000000..4cf5d59 Binary files /dev/null and b/assets/pics/plot_basic.png differ diff --git a/assets/pics/plot_bt.png b/assets/pics/plot_bt.png new file mode 100644 index 0000000..fc73dff Binary files /dev/null and b/assets/pics/plot_bt.png differ diff --git a/assets/pics/plot_bt_fixed.png b/assets/pics/plot_bt_fixed.png new file mode 100644 index 0000000..e62e048 Binary files /dev/null and b/assets/pics/plot_bt_fixed.png differ diff --git a/assets/pics/plot_bt_fixed_val10.png b/assets/pics/plot_bt_fixed_val10.png new file mode 100644 index 0000000..1bab42c Binary files /dev/null and b/assets/pics/plot_bt_fixed_val10.png differ diff --git a/assets/pics/plot_bt_fixed_val2.png b/assets/pics/plot_bt_fixed_val2.png new file mode 100644 index 0000000..ac43d53 Binary files /dev/null and b/assets/pics/plot_bt_fixed_val2.png differ diff --git a/assets/pics/plot_by_subject.png b/assets/pics/plot_by_subject.png new file mode 100644 index 0000000..b6d36e4 Binary files /dev/null and b/assets/pics/plot_by_subject.png differ diff --git a/assets/pics/plot_colbands.png b/assets/pics/plot_colbands.png new file mode 100644 index 0000000..5e8b113 Binary files /dev/null and b/assets/pics/plot_colbands.png differ diff --git a/assets/pics/plot_custom_labels.png b/assets/pics/plot_custom_labels.png new file mode 100644 index 0000000..d78f636 Binary files /dev/null and b/assets/pics/plot_custom_labels.png differ diff --git a/assets/pics/plot_customaes.png b/assets/pics/plot_customaes.png new file mode 100644 index 0000000..9723899 Binary files /dev/null and b/assets/pics/plot_customaes.png differ diff --git a/assets/pics/plot_horizonaes.png b/assets/pics/plot_horizonaes.png new file mode 100644 index 0000000..ae93868 Binary files /dev/null and b/assets/pics/plot_horizonaes.png differ diff --git a/assets/pics/plot_irregular_data.png b/assets/pics/plot_irregular_data.png new file mode 100644 index 0000000..47cb9a8 Binary files /dev/null and b/assets/pics/plot_irregular_data.png differ diff --git a/assets/pics/plot_manual_selection.png b/assets/pics/plot_manual_selection.png new file mode 100644 index 0000000..cdfee36 Binary files /dev/null and b/assets/pics/plot_manual_selection.png differ diff --git a/assets/pics/plot_max_gap.png b/assets/pics/plot_max_gap.png new file mode 100644 index 0000000..8393fb5 Binary files /dev/null and b/assets/pics/plot_max_gap.png differ diff --git a/assets/pics/plot_min_samples.png b/assets/pics/plot_min_samples.png new file mode 100644 index 0000000..5e7003a Binary files /dev/null and b/assets/pics/plot_min_samples.png differ diff --git a/assets/pics/plot_missing_data.png b/assets/pics/plot_missing_data.png new file mode 100644 index 0000000..bcdc5f3 Binary files /dev/null and b/assets/pics/plot_missing_data.png differ diff --git a/assets/pics/plot_nbands.png b/assets/pics/plot_nbands.png new file mode 100644 index 0000000..cebf445 Binary files /dev/null and b/assets/pics/plot_nbands.png differ diff --git a/assets/pics/plot_origin.png b/assets/pics/plot_origin.png new file mode 100644 index 0000000..9aae404 Binary files /dev/null and b/assets/pics/plot_origin.png differ diff --git a/assets/pics/plot_origin_bt_fixed.png b/assets/pics/plot_origin_bt_fixed.png new file mode 100644 index 0000000..fdb47aa Binary files /dev/null and b/assets/pics/plot_origin_bt_fixed.png differ diff --git a/assets/pics/plot_origin_fixed.png b/assets/pics/plot_origin_fixed.png new file mode 100644 index 0000000..56295dc Binary files /dev/null and b/assets/pics/plot_origin_fixed.png differ diff --git a/assets/pics/plot_rm_xlab_legend.png b/assets/pics/plot_rm_xlab_legend.png new file mode 100644 index 0000000..19894c1 Binary files /dev/null and b/assets/pics/plot_rm_xlab_legend.png differ diff --git a/assets/pics/plot_taxonomy_labels.png b/assets/pics/plot_taxonomy_labels.png new file mode 100644 index 0000000..6b461c7 Binary files /dev/null and b/assets/pics/plot_taxonomy_labels.png differ diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000..23f6dc8 Binary files /dev/null and b/data/.DS_Store differ diff --git a/data/metadatasample.rda b/data/metadatasample.rda deleted file mode 100644 index 647064f..0000000 Binary files a/data/metadatasample.rda and /dev/null differ diff --git a/data/metadatasample_baboon.rda b/data/metadatasample_baboon.rda new file mode 100644 index 0000000..76354c8 Binary files /dev/null and b/data/metadatasample_baboon.rda differ diff --git a/data/metadatasample_diet.rda b/data/metadatasample_diet.rda new file mode 100644 index 0000000..0627030 Binary files /dev/null and b/data/metadatasample_diet.rda differ diff --git a/data/otusample.rda b/data/otusample.rda deleted file mode 100644 index 6e6d58b..0000000 Binary files a/data/otusample.rda and /dev/null differ diff --git a/data/otusample_baboon.rda b/data/otusample_baboon.rda new file mode 100644 index 0000000..7f0ad70 Binary files /dev/null and b/data/otusample_baboon.rda differ diff --git a/data/otusample_diet.rda b/data/otusample_diet.rda new file mode 100644 index 0000000..87dc561 Binary files /dev/null and b/data/otusample_diet.rda differ diff --git a/data/taxonomysample.rda b/data/taxonomysample.rda deleted file mode 100644 index ad80a10..0000000 Binary files a/data/taxonomysample.rda and /dev/null differ diff --git a/data/taxonomysample_baboon.rda b/data/taxonomysample_baboon.rda new file mode 100644 index 0000000..f462422 Binary files /dev/null and b/data/taxonomysample_baboon.rda differ diff --git a/data/taxonomysample_diet.rda b/data/taxonomysample_diet.rda new file mode 100644 index 0000000..745c040 Binary files /dev/null and b/data/taxonomysample_diet.rda differ diff --git a/man/.DS_Store b/man/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/man/.DS_Store differ diff --git a/man/horizonaes.Rd b/man/horizonaes.Rd index e9062be..b8e3b2b 100644 --- a/man/horizonaes.Rd +++ b/man/horizonaes.Rd @@ -4,10 +4,20 @@ \alias{horizonaes} \title{Add Custom Aesthetics to the Horizon Plot} \usage{ -horizonaes(title = NA, subtitle = NA, xlabel = NA, ylabel = NA, - showColorLegend = TRUE, showLegendLabels = TRUE, - legendPosition = "right", legendTitle = NA, showPlotLabels = TRUE, - col.bands = NA, col.outline = "#CCCCCC", col.border = "#CCCCCC") +horizonaes( + title = NA, + subtitle = NA, + xlabel = NA, + ylabel = NA, + showColorLegend = TRUE, + showLegendLabels = TRUE, + legendPosition = "right", + legendTitle = NA, + showPlotLabels = TRUE, + col.bands = NA, + col.outline = "#CCCCCC", + col.border = "#CCCCCC" +) } \arguments{ \item{title}{character. The text for the title.} @@ -74,11 +84,13 @@ This function provides an easy way to add the most common aesthetics to the horizon plot, but if you want to add other aesthetics not included in this function, you can do so by appending them to the horizon plot object using the \code{+} operator. e.g. to add a gray background in the plotting area:\cr -\code{horizonplot(prepanel(otusample, metadatasample, taxonomysample, -subj="subject_1")) + theme(panel.background = element_rect(fill="gray90"))} +\code{horizonplot(prepanel(otudata = otusample_diet, +metadata = metadatasample_diet, taxonomydata = taxonomysample_diet, +subj = "MCTs01")) + theme(panel.background = element_rect(fill="gray90"))} } \examples{ -plist <- prepanel(otusample, metadatasample, taxonomysample, subj = "subject_2") +plist <- prepanel(otudata = otusample_diet, metadata = metadatasample_diet, +taxonomydata = taxonomysample_diet, subj = "MCTs01") # By default, the function is called with no arguments to use default aesthetics horizonplot(plist, horizonaes()) diff --git a/man/horizonplot.Rd b/man/horizonplot.Rd index e3c518e..0cba72e 100644 --- a/man/horizonplot.Rd +++ b/man/horizonplot.Rd @@ -102,17 +102,18 @@ A common problem faced in visualizing time series data is plotting data \examples{ # Basic plot form. By default, samples are plotted next to each other. -plist <- prepanel(otusample, metadatasample, taxonomysample, subj = "subject_4") +plist <- prepanel(otudata = otusample_diet, metadata = metadatasample_diet, +taxonomysample = taxonomysample_diet, subj = "MCTs16") horizonplot(plist) # For irregularly spaced time series, you can "regularize" the data to create # an accurate timescale. -# Adjust data to regular time intervals each 100 days. This will interpolate -# new data points for each OTU at day = 1, 101, 201, etc. based on values +# Adjust data to regular time intervals each 1 day. This will interpolate +# new data points for each OTU at day = 1, 2, 3 etc. based on values # at previous and subsequent timepoints. -plist <- prepanel(otusample, metadatasample, taxonomysample, subj="subject_4", - regularInterval = 100) +plist <- prepanel(otudata = otusample_diet, metadata = metadatasample_diet, + subj = "MCTs16", regularInterval = 1) horizonplot(plist) # If the data has large gaps of time without samples, interpolating data @@ -122,14 +123,15 @@ horizonplot(plist) # data will be regularized separately on both sides of the break in two # different facets. -# Set maximum time without samples to 200 days -plist <- prepanel(otusample, metadatasample, taxonomysample, subj="subject_4", - regularInterval = 100, maxGap = 200) +# Set maximum time without samples to 75 days +plist <- prepanel(otudata = otusample_baboon, metadata = metadatasample_baboon, + subj = "Baboon_388", regularInterval = 25, maxGap = 75) horizonplot(plist) # Remove facets with less than 5 samples -plist <- prepanel(otusample, metadatasample, taxonomysample, subj="subject_4", - regularInterval = 100, maxGap = 200, minSamplesPerFacet = 5) +plist <- prepanel(otudata = otusample_baboon, metadata = metadatasample_baboon, + subj = "Baboon_388", regularInterval = 25, maxGap = 75, + minSamplesPerFacet = 5) horizonplot(plist) } diff --git a/man/metadatasample.Rd b/man/metadatasample.Rd deleted file mode 100644 index b260918..0000000 --- a/man/metadatasample.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metadatasample-data.R -\docType{data} -\name{metadatasample} -\alias{metadatasample} -\title{Metadata on samples from OTU table} -\format{A data frame with 461 rows and 3 variables: -\describe{ - \item{sample}{sample ID corresponding to a variable name from \code{otusample}} - \item{subject}{subject name} - \item{collection_date}{date the sample was collected, in the format yyyy-mm-dd} -}} -\usage{ -data(metadatasample) -} -\description{ -Metadata providing subject names and collection dates of samples from the -OTU table. -} -\keyword{datasets} diff --git a/man/metadatasample_baboon.Rd b/man/metadatasample_baboon.Rd new file mode 100644 index 0000000..21ad908 --- /dev/null +++ b/man/metadatasample_baboon.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metadatasample_baboon-data.R +\docType{data} +\name{metadatasample_baboon} +\alias{metadatasample_baboon} +\title{Metadata on samples from OTU table of wild baboon data} +\format{ +A data frame with 276 rows and 7 variables: +\describe{ + \item{subject}{subject name} + \item{sample}{sample ID corresponding to a variable name from \code{otusample_baboon}} + \item{collection_date}{number of days into the study the sample was collected} + \item{sex}{if subject was male or female} + \item{season}{if sample was collected in the wet or dry season} + \item{rain_month_mm}{amount of rainfall in mm for month prior to sample collection} + \item{diet_PC1}{a measure of dietary composition for a sample} +} +} +\usage{ +data(metadatasample_baboon) +} +\description{ +Metadata providing subject names and collection dates of samples from the +OTU table. +} +\keyword{datasets} diff --git a/man/metadatasample_diet.Rd b/man/metadatasample_diet.Rd new file mode 100644 index 0000000..957f854 --- /dev/null +++ b/man/metadatasample_diet.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metadatasample_diet-data.R +\docType{data} +\name{metadatasample_diet} +\alias{metadatasample_diet} +\title{Metadata on samples from OTU (or other lowest taxonomic level) table of human diet data} +\format{ +A data frame with 483 rows and 4 variables: +\describe{ + \item{subject}{subject name in character format} + \item{sample}{sample ID in character format corresponding to a variable name from \code{otusample_diet}} + \item{collection_date}{number of days in numeric format into the study the sample was collected} + \item{supplement}{metadata variable in character format indicating if subject was given EVOO or MCT as a dietary supplement on days 10-17} +} +} +\usage{ +data(metadatasample_diet) +} +\description{ +Metadata providing subject names and collection dates of samples from the +OTU table. +} +\keyword{datasets} diff --git a/man/otusample.Rd b/man/otusample.Rd deleted file mode 100644 index 276105d..0000000 --- a/man/otusample.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/otusample-data.R -\docType{data} -\name{otusample} -\alias{otusample} -\title{Sample OTU table} -\format{A data frame with 8814 rows and 1783 variables.} -\usage{ -data(otusample) -} -\description{ -Sample OTU table with 6 subjects. The first variable contains OTU IDs. Each -subsequent column represents a unique sample collected at a specific time -from one of six subjects' microbiomes, as well as two blanks. Entries -represent number of reads per sample per OTU, out of a total of 30,000 reads -for each sample. -} -\keyword{datasets} diff --git a/man/otusample_baboon.Rd b/man/otusample_baboon.Rd new file mode 100644 index 0000000..8c77327 --- /dev/null +++ b/man/otusample_baboon.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/otusample_baboon-data.R +\docType{data} +\name{otusample_baboon} +\alias{otusample_baboon} +\title{Sample OTU table} +\format{ +A data frame with 2922 rows and 277 variables. +} +\usage{ +data(otusample_baboon) +} +\description{ +Sample OTU-format table with 6 subjects. The first variable contains OTU IDs. +Each subsequent column represents a unique sample collected at a specific time +from one of 6 subjects' microbiomes. Entries represent number of reads per sample per OTU. +} +\keyword{datasets} diff --git a/man/otusample_diet.Rd b/man/otusample_diet.Rd new file mode 100644 index 0000000..e5e263c --- /dev/null +++ b/man/otusample_diet.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/otusample_diet-data.R +\docType{data} +\name{otusample_diet} +\alias{otusample_diet} +\title{Sample OTU table} +\format{ +A data frame with 4583 rows and 484 variables. +} +\usage{ +data(otusample_diet) +} +\description{ +Sample OTU-format table with 34 subjects. Sequences are from metagenomic data, not 16S, +so the first variable contains unique IDs assigned to each microbial sequence rather than OTUs. +Each subsequent column represents a unique sample collected at a specific time +from one of 34 subjects' microbiomes. Entries represent number of reads per sample +per microbial taxon. +} +\keyword{datasets} diff --git a/man/prepanel.Rd b/man/prepanel.Rd index b3a1294..ad7c608 100644 --- a/man/prepanel.Rd +++ b/man/prepanel.Rd @@ -4,13 +4,28 @@ \alias{prepanel} \title{Preliminary Data Cleaning and Preperation} \usage{ -prepanel(otudata, metadata = NA, taxonomydata = NA, - thresh_prevalence = 80, thresh_abundance = 0.5, - thresh_abundance_override = NA, thresh_NA = 5, - regularInterval = NA, maxGap = NA, minSamplesPerFacet = 2, - otulist = NA, subj = NA, singleVarOTU = NA, band.thickness = NA, - origin = NA, facetLabelsByTaxonomy = FALSE, customFacetLabels = NA, - interpolate_NA = TRUE, formatStep = FALSE, nbands = 4) +prepanel( + otudata, + metadata = NA, + taxonomydata = NA, + thresh_prevalence = 80, + thresh_abundance = 0.5, + thresh_abundance_override = NA, + thresh_NA = 5, + regularInterval = NA, + maxGap = NA, + minSamplesPerFacet = 2, + otulist = NA, + subj = NA, + singleVarOTU = NA, + band.thickness = NA, + origin = NA, + facetLabelsByTaxonomy = FALSE, + customFacetLabels = NA, + interpolate_NA = TRUE, + formatStep = FALSE, + nbands = 4 +) } \arguments{ \item{otudata}{Data frame representing OTU Table. Assumes first column @@ -207,46 +222,47 @@ timepoint across multiple individuals, rather than multiple OTUs or taxa. \examples{ # Pass just the OTU table to prepanel, and it will assume all samples belong # to the same subject. -prepanel(otusample) +prepanel(otusample = otusample_diet) # Supplement metadata and a subject name, and it will select samples from # just one subject (this is what you should do with more than one subject). -prepanel(otusample, metadatasample, subj="subject_1") +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01") # Pass taxonomydata to prepanel if you want to label facets by taxonomy # rather than by OTU ID. -prepanel(otusample, metadatasample, taxonomysample, subj="subject_1", -facetLabelsByTaxonomy=TRUE) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +taxonomydata = taxonomysample_diet, subj="MCTs01", facetLabelsByTaxonomy=TRUE) # OTU filtering using both a prevalence and an abundance standard (default) -prepanel(otusample, metadatasample, subj="subject_2", thresh_prevalence=75, -thresh_abundance=0.75) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +thresh_prevalence=75, thresh_abundance=0.75) # OTU filtering using just an abundance standard -prepanel(otusample, metadatasample, subj="subject_2", thresh_prevalence=NA, -thresh_abundance=0.75) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +thresh_prevalence=NA, thresh_abundance=0.75) # If an OTU's average abundance reaches a high enough threshold, override # other standards and include it in analysis -prepanel(otusample, metadatasample, subj="subject_2", thresh_prevalence=90, -thresh_abundance=0.75, thresh_abundance_override=1.5) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +thresh_prevalence=90, thresh_abundance=0.75, thresh_abundance_override=1.5) # Filter OTUs where >2\% samples are NA values -prepanel(otusample, metadatasample, subj="subject_2", thresh_NA=2) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +thresh_NA=2) # You can also manually select OTUs by OTU ID -prepanel(otusample, metadatasample, subj="subject_2", -otulist=c("otu_1000","otu_1243","otu_1530","otu_6821","otu_7737")) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, subj="MCTs01", +otulist=c("taxon 1", "taxon 2", "taxon 10", "taxon 14")) # Manual selection can be used to specify the order OTUs will appear on # the horizon plot. For example, these two datasets have identical OTUs, but # they are ordered differently. -params <- prepanel(otusample, metadatasample, subj="subject_1", -thresh_prevalence=95, thresh_abundance=1.5) +params <- prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +subj="MCTs01", thresh_prevalence=95, thresh_abundance=1.5, +otulist=c("taxon 1", "taxon 2", "taxon 10", "taxon 14")) params[[1]]$otuid -params <- prepanel(otusample, metadatasample, subj="subject_1", -otulist=c("otu_2526","otu_1530", "otu_7737", "otu_6821", "otu_3773", -"otu_2457", "otu_1243", "otu_2378")) +params <- prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +subj="MCTs01", otulist=c("taxon 10", "taxon 2", "taxon 1", "taxon 14")) params[[1]]$otuid # The origin and band.thickness variables can be set to either a numeric @@ -254,10 +270,11 @@ params[[1]]$otuid # on its sample values. # Use a fixed origin of 5\% for all OTU subpanels -prepanel(otusample, metadatasample, subj="subject_1", origin=5) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +subj="MCTs01", origin=5) # Evaluate a different origin for each OTU subpanel using a custom function -prepanel(otusample, metadatasample, subj="subject_1", -origin=function(y){mad(y, na.rm=TRUE)}) +prepanel(otusample = otusample_diet, metadatasample = metadatasample_diet, +subj="MCTs01", origin=function(y){mad(y, na.rm=TRUE)}) } diff --git a/man/taxonomysample.Rd b/man/taxonomysample.Rd deleted file mode 100644 index 937cf42..0000000 --- a/man/taxonomysample.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/taxonomysample-data.R -\docType{data} -\name{taxonomysample} -\alias{taxonomysample} -\title{Taxonomy information for OTUs} -\format{A data frame with 8814 rows and 7 variables.} -\usage{ -data(taxonomysample) -} -\description{ -Taxonomy information for each OTU listed in the sample OTU table. The first -variable contains OTU IDs, as listed in \code{otusample}, and subsequent -columns provide taxonomic clasification up to Genus, or the most specific -level possible for a given OTU. OTUs that are classified more broadly have -\code{NA} values for narrower taxonomic levels that do not apply. -} -\keyword{datasets} diff --git a/man/taxonomysample_baboon.Rd b/man/taxonomysample_baboon.Rd new file mode 100644 index 0000000..aabfba5 --- /dev/null +++ b/man/taxonomysample_baboon.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/taxonomysample_baboon-data.R +\docType{data} +\name{taxonomysample_baboon} +\alias{taxonomysample_baboon} +\title{Taxonomy information for OTUs} +\format{ +A data frame with 2922 rows and 7 variables. +} +\usage{ +data(taxonomysample_baboon) +} +\description{ +Taxonomy information for each OTU ID listed in the sample OTU table. The first +variable contains OTU IDs, as listed in \code{otusample_baboon}, and subsequent +columns provide taxonomic clasification up to Genus, or the most specific +level possible for a given taxon. Taxo IDs that are classified more broadly have +\code{NA} values for narrower taxonomic levels that do not apply. +} +\keyword{datasets} diff --git a/man/taxonomysample_diet.Rd b/man/taxonomysample_diet.Rd new file mode 100644 index 0000000..8b1b6cc --- /dev/null +++ b/man/taxonomysample_diet.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/taxonomysample_diet-data.R +\docType{data} +\name{taxonomysample_diet} +\alias{taxonomysample_diet} +\title{Taxonomy information for OTUs} +\format{ +A data frame with 4583 rows and 2 variables. +} +\usage{ +data(taxonomysample_diet) +} +\description{ +Taxonomy information for each taxon ID listed in the sample OTU table. The first +variable contains taxon IDs, as listed in \code{otusample_diet}, and the subsequent +column provides taxonomic clasification up to Genus, or the most specific +level possible for a given taxon. Taxo IDs that are classified more broadly have +\code{NA} values for narrower taxonomic levels that do not apply. +} +\keyword{datasets}