From 98d4ebf8be953e5b7326ae04e2457f96dafc404a Mon Sep 17 00:00:00 2001 From: lcolladotor Date: Tue, 26 Mar 2024 13:33:53 -0400 Subject: [PATCH] v0.99.33 -- enable `merge_rse_metrics()` to handle `ERCCsumLogErr` via an approximation. Details of the approximation were discussed at https://jhu-genomics.slack.com/archives/C018ZDB8QLU/p1710454284251579 as part of the `qsvaR` project. Resolve #3. --- DESCRIPTION | 4 +-- NEWS.md | 7 ++++ R/merge_rse_metrics.R | 74 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 38de78b..ec743b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: jaffelab Title: Commonly used functions by the Jaffe lab -Version: 0.99.32 -Date: 2021-12-13 +Version: 0.99.33 +Date: 2024-03-26 Authors@R: c(person("Leonardo", "Collado-Torres", role = c("aut", "cre"), email = "lcolladotor@gmail.com", comment = c(ORCID = "0000-0003-2140-308X")), diff --git a/NEWS.md b/NEWS.md index b072722..69a9886 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# jaffelab 0.99.33 + +* `merge_rse_metrics()` can now also handle `ERCCsumLogErr`. Note that its an +approximation given properties of logarithms. To compute the actual correct +values, you would need access to the original output ERCC quantification output +files. + # jaffelab 0.99.32 * Added the `skipLines` argument to `junctionCount` to resolve diff --git a/R/merge_rse_metrics.R b/R/merge_rse_metrics.R index c18c920..859c9a4 100644 --- a/R/merge_rse_metrics.R +++ b/R/merge_rse_metrics.R @@ -8,13 +8,13 @@ #' @param rse A [RangedSummarizedExperiment-class][SummarizedExperiment::RangedSummarizedExperiment-class] #' object with 'concordMapRate', 'overallMapRate', 'mitoRate', #' 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', -#' 'mitoMapped', 'totalMapped' as either NumericList() or IntegerList() objects -#' in the colData() columns. +#' 'mitoMapped', 'totalMapped', 'ERCCsumLogErr' as either NumericList() or +#' IntegerList() objects in the colData() columns. #' #' @return A [RangedSummarizedExperiment-class][SummarizedExperiment::RangedSummarizedExperiment-class] #' with merged columns for 'concordMapRate', 'overallMapRate', 'mitoRate', #' 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', -#' 'mitoMapped', 'totalMapped'. +#' 'mitoMapped', 'totalMapped', 'ERCCsumLogErr' #' #' @export #' @author Leonardo Collado-Torres, Andrew E Jaffe @@ -52,7 +52,8 @@ #' numReads = rand_gen(1e6 + 6e5), #' numUnmapped = rand_gen(5e5), #' mitoMapped = rand_gen(1e5), -#' totalMapped = rand_gen(1e6 + 1e5) +#' totalMapped = rand_gen(1e6 + 1e5), +#' ERCCsumLogErr = rand_gen(-28.5, 4, 6) #' ) #' rse <- SummarizedExperiment( #' assays = SimpleList(counts = counts), @@ -61,6 +62,41 @@ #' colData(rse) #' rse_merged <- merge_rse_metrics(rse) #' colData(rse_merged) +#' +#' ## Use the BSP2 real data +#' local_bsp2 <- tempfile("BSP2_rse_gene.Rdata") +#' download.file( +#' "https://s3.us-east-2.amazonaws.com/libd-brainseq2/rse_gene_unfiltered.Rdata", +#' destfile = local_bsp2, +#' mode = "wb" +#' ) +#' load(local_bsp2, verbose = TRUE) +#' # lobstr::obj_size(rse_gene) +#' # 872.12 MB +#' bsp2_rse_merged <- merge_rse_metrics(rse_gene) +#' colData(bsp2_rse_merged) +#' +#' ## Compare the ERCCsumLogErr approximation against the mean of the +#' ## error values. Note that you would actually have to load the actual +#' ## ERCC quantification output files in order to compute the correct +#' ## ERCCsumLogErr values. +#' bsp2_rse_merged$mean_ERCCsumLogErr <- +#' sapply(rse_gene$ERCCsumLogErr, mean) +#' with( +#' colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ], +#' plot( +#' mean_ERCCsumLogErr, +#' ERCCsumLogErr, +#' ylab = "Approximate ERCCsumLogErr" +#' ) +#' ) +#' abline(a = 0, b = 1, col = "red") +#' with( +#' colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ], +#' summary(mean_ERCCsumLogErr - ERCCsumLogErr) +#' ) +#' + merge_rse_metrics <- function(rse) { stopifnot(is(rse, "RangedSummarizedExperiment")) stopifnot( @@ -101,5 +137,35 @@ merge_rse_metrics <- function(rse) { rse$numUnmapped <- sapply(rse$numUnmapped, sum) rse$mitoMapped <- sapply(rse$mitoMapped, sum) rse$totalMapped <- sapply(rse$totalMapped, sum) + + if("ERCCsumLogErr" %in% colnames(colData(rse))) { + message(Sys.time(), " attempting to approximate ERCCsumLogErr.") + stopifnot(is(rse$ERCCsumLogErr, "NumericList")) + + # ## Read in the ERCC expected concentration + # spikeIns <- read.delim("https://raw.githubusercontent.com/LieberInstitute/SPEAQeasy/master/Annotation/ERCC/ercc_actual_conc.txt", + # as.is = TRUE, row.names = 2 + # ) + # + # ## Identify the number of ERCC sequences + # n_erccs <- nrow(spikeIns) + # + # ## Calculate the ERCC expected concentration + # ercc_expected_conc <- sum(log2(10 * spikeIns[, "concentration.in.Mix.1..attomoles.ul."] + 1)) + # dput(ercc_expected_conc) + # dput(n_erccs) + # + ## Use pre-computed values + ercc_expected_conc <- 643.072778500804 + n_erccs <- 92L + + ## Then compute the approximate result starting from a NumericList input + rse$ERCCsumLogErr <- sapply(rse$ERCCsumLogErr, function(sumlogerr) { + log2(mean((2 ^ ( + sumlogerr + ercc_expected_conc + )) ^ (1 / n_erccs)) ^ n_erccs) - ercc_expected_conc + }) + } + return(rse) }