Skip to content

Commit

Permalink
v0.99.33 -- enable merge_rse_metrics() to handle ERCCsumLogErr vi…
Browse files Browse the repository at this point in the history
…a an approximation.

Details of the approximation were discussed at https://jhu-genomics.slack.com/archives/C018ZDB8QLU/p1710454284251579 as part of the `qsvaR` project.

Resolve #3.
  • Loading branch information
lcolladotor committed Mar 26, 2024
1 parent 3d7635c commit 98d4ebf
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 6 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: jaffelab
Title: Commonly used functions by the Jaffe lab
Version: 0.99.32
Date: 2021-12-13
Version: 0.99.33
Date: 2024-03-26
Authors@R: c(person("Leonardo", "Collado-Torres",
role = c("aut", "cre"), email = "[email protected]",
comment = c(ORCID = "0000-0003-2140-308X")),
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# jaffelab 0.99.33

* `merge_rse_metrics()` can now also handle `ERCCsumLogErr`. Note that its an
approximation given properties of logarithms. To compute the actual correct
values, you would need access to the original output ERCC quantification output
files.

# jaffelab 0.99.32

* Added the `skipLines` argument to `junctionCount` to resolve
Expand Down
74 changes: 70 additions & 4 deletions R/merge_rse_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
#' @param rse A [RangedSummarizedExperiment-class][SummarizedExperiment::RangedSummarizedExperiment-class]
#' object with 'concordMapRate', 'overallMapRate', 'mitoRate',
#' 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped',
#' 'mitoMapped', 'totalMapped' as either NumericList() or IntegerList() objects
#' in the colData() columns.
#' 'mitoMapped', 'totalMapped', 'ERCCsumLogErr' as either NumericList() or
#' IntegerList() objects in the colData() columns.
#'
#' @return A [RangedSummarizedExperiment-class][SummarizedExperiment::RangedSummarizedExperiment-class]
#' with merged columns for 'concordMapRate', 'overallMapRate', 'mitoRate',
#' 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped',
#' 'mitoMapped', 'totalMapped'.
#' 'mitoMapped', 'totalMapped', 'ERCCsumLogErr'
#'
#' @export
#' @author Leonardo Collado-Torres, Andrew E Jaffe
Expand Down Expand Up @@ -52,7 +52,8 @@
#' numReads = rand_gen(1e6 + 6e5),
#' numUnmapped = rand_gen(5e5),
#' mitoMapped = rand_gen(1e5),
#' totalMapped = rand_gen(1e6 + 1e5)
#' totalMapped = rand_gen(1e6 + 1e5),
#' ERCCsumLogErr = rand_gen(-28.5, 4, 6)
#' )
#' rse <- SummarizedExperiment(
#' assays = SimpleList(counts = counts),
Expand All @@ -61,6 +62,41 @@
#' colData(rse)
#' rse_merged <- merge_rse_metrics(rse)
#' colData(rse_merged)
#'
#' ## Use the BSP2 real data
#' local_bsp2 <- tempfile("BSP2_rse_gene.Rdata")
#' download.file(
#' "https://s3.us-east-2.amazonaws.com/libd-brainseq2/rse_gene_unfiltered.Rdata",
#' destfile = local_bsp2,
#' mode = "wb"
#' )
#' load(local_bsp2, verbose = TRUE)
#' # lobstr::obj_size(rse_gene)
#' # 872.12 MB
#' bsp2_rse_merged <- merge_rse_metrics(rse_gene)
#' colData(bsp2_rse_merged)
#'
#' ## Compare the ERCCsumLogErr approximation against the mean of the
#' ## error values. Note that you would actually have to load the actual
#' ## ERCC quantification output files in order to compute the correct
#' ## ERCCsumLogErr values.
#' bsp2_rse_merged$mean_ERCCsumLogErr <-
#' sapply(rse_gene$ERCCsumLogErr, mean)
#' with(
#' colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
#' plot(
#' mean_ERCCsumLogErr,
#' ERCCsumLogErr,
#' ylab = "Approximate ERCCsumLogErr"
#' )
#' )
#' abline(a = 0, b = 1, col = "red")
#' with(
#' colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
#' summary(mean_ERCCsumLogErr - ERCCsumLogErr)
#' )
#'

merge_rse_metrics <- function(rse) {
stopifnot(is(rse, "RangedSummarizedExperiment"))
stopifnot(
Expand Down Expand Up @@ -101,5 +137,35 @@ merge_rse_metrics <- function(rse) {
rse$numUnmapped <- sapply(rse$numUnmapped, sum)
rse$mitoMapped <- sapply(rse$mitoMapped, sum)
rse$totalMapped <- sapply(rse$totalMapped, sum)

if("ERCCsumLogErr" %in% colnames(colData(rse))) {
message(Sys.time(), " attempting to approximate ERCCsumLogErr.")
stopifnot(is(rse$ERCCsumLogErr, "NumericList"))

# ## Read in the ERCC expected concentration
# spikeIns <- read.delim("https://raw.githubusercontent.com/LieberInstitute/SPEAQeasy/master/Annotation/ERCC/ercc_actual_conc.txt",
# as.is = TRUE, row.names = 2
# )
#
# ## Identify the number of ERCC sequences
# n_erccs <- nrow(spikeIns)
#
# ## Calculate the ERCC expected concentration
# ercc_expected_conc <- sum(log2(10 * spikeIns[, "concentration.in.Mix.1..attomoles.ul."] + 1))
# dput(ercc_expected_conc)
# dput(n_erccs)
#
## Use pre-computed values
ercc_expected_conc <- 643.072778500804
n_erccs <- 92L

## Then compute the approximate result starting from a NumericList input
rse$ERCCsumLogErr <- sapply(rse$ERCCsumLogErr, function(sumlogerr) {
log2(mean((2 ^ (
sumlogerr + ercc_expected_conc
)) ^ (1 / n_erccs)) ^ n_erccs) - ercc_expected_conc
})
}

return(rse)
}

0 comments on commit 98d4ebf

Please sign in to comment.