From 92de3ce25510a908134064cedb6d050f239f0def Mon Sep 17 00:00:00 2001 From: Ines Scheller Date: Thu, 5 Oct 2023 17:57:16 +0200 Subject: [PATCH] functionality to test twopass mode for rho fit --- R/pvalsNzscore.R | 81 +++++++++++++++++++++++++++++++++++++++++++++++- R/updateRho.R | 42 ++++++++++++++++++++++--- 2 files changed, 118 insertions(+), 5 deletions(-) diff --git a/R/pvalsNzscore.R b/R/pvalsNzscore.R index be256009..4d0394e5 100644 --- a/R/pvalsNzscore.R +++ b/R/pvalsNzscore.R @@ -44,7 +44,10 @@ calculateZscore <- function(fds, type=currentType(fds), logit=TRUE){ #' @export calculatePvalues <- function(fds, type=currentType(fds), implementation="PCA", BPPARAM=bpparam(), - distributions=c("betabinomial"), capN=5*1e5){ + distributions=c("betabinomial"), capN=5*1e5, + twoPassMode=FALSE, twoPassFDRcutoff=1e-3, + twoPassRhoRange=c(-30, 30)){ + distributions <- tolower(distributions) distributions <- match.arg(distributions, several.ok=TRUE, choices=c("betabinomial", "binomial", "normal")) @@ -109,6 +112,13 @@ calculatePvalues <- function(fds, type=currentType(fds), pvals <- 2 * pmin(pval, 1 - pval + dval, 0.5) pVals(fds, dist="BetaBinomial", level="junction", withDimnames=FALSE) <- pvals + if(isTRUE(twoPassMode)){ + fds <- calculateTwoPassPvalues(fds, type=type, + twoPassFDRcutoff = twoPassFDRcutoff, + implementation=implementation, + BPPARAM=BPPARAM, distribution="BetaBinomial", + rhoRange=twoPassRhoRange) + } } if("binomial" %in% distributions){ @@ -120,6 +130,13 @@ calculatePvalues <- function(fds, type=currentType(fds), pvals <- 2 * pmin(pval, 1 - pval + dval, 0.5) pVals(fds, dist="Binomial", level="junction", withDimnames=FALSE) <- pvals + if(isTRUE(twoPassMode)){ + fds <- calculateTwoPassPvalues(fds, type=type, + twoPassFDRcutoff = twoPassFDRcutoff, + implementation=implementation, + BPPARAM=BPPARAM, distribution="Binomial", + rhoRange=twoPassRhoRange) + } } if("normal" %in% distributions){ @@ -132,6 +149,14 @@ calculatePvalues <- function(fds, type=currentType(fds), pvals <- 2 * pmin(pval, 1 - pval, 0.5) pVals(fds, dist="Normal", level="junction", withDimnames=FALSE) <- pvals + if(isTRUE(twoPassMode)){ + fds <- calculateTwoPassPvalues(fds, type=type, + twoPassFDRcutoff = twoPassFDRcutoff, + implementation=implementation, + BPPARAM=BPPARAM, distribution="Normal", + rhoRange=twoPassRhoRange) + } + } fds @@ -555,3 +580,57 @@ calculatePadjValuesOnSubset <- function(fds, genesToTest, subsetName, return(fds) } +calculateTwoPassPvalues <- function(fds, type=currentType(fds), + twoPassFDRcutoff = 1e-3, implementation="PCA", + distribution="BetaBinomial", rhoRange=c(-30, 30), + BPPARAM=bpparam()){ + + distribution <- match.arg(distribution, several.ok=FALSE, + choices=c("BetaBinomial", "Binomial", "Normal")) + + # get FDR across all samples for each intron + p <- pVals(fds, type=type, level="junction", dist=distribution) + fdrAcrossSamples <- t(apply(p, 1, p.adjust, method="BY")) + + # get samples which should be excluded from rho fit + exMat <- fdrAcrossSamples <= twoPassFDRcutoff + + # get introns with more than 1 sample to exclude from rho fit + exSum <- rowSums(exMat) + idxToReestimate <- which(exSum > 0) + + if(length(idxToReestimate) == 0){ + warning("There were no introns for which the two-pass mode could ", + "be applied.") + return(fds) + } + + # re-estimate rho + fdsSub <- fds[idxToReestimate,] + dontWriteHDF5(fdsSub) <- TRUE + yAll <- predictY(fds, noiseAlpha=currentNoiseAlpha(fds)) # on the full fds + message(date(), ": Updating rho values in two-pass mode ...") + fdsSub <- updateRhoTwoPass(fdsSub, exclusionMat=exMat[idxToReestimate,], + y=yAll[idxToReestimate,], type=type, + rhoRange=rhoRange, verbose=TRUE, BPPARAM=BPPARAM) + message(date(), ": Finished two-pass mode update of rho values") + rhoTwoPass <- rho(fdsSub, type=type) + + # re-assign updated rho values to fds object + rho <- rho(fds, type=type) + rho[idxToReestimate] <- rhoTwoPass + rho(fds, type=type) <- rho + + # re-compute nominal junction-level pvalues and assign to fds object + message(date(), ": Re-calculating p-values after rho update ...") + fdsSub <- calculatePvalues(fds=fdsSub, type=type, + implementation=implementation, + distributions=distribution, BPPARAM=BPPARAM, + twoPassMode=FALSE) + p[idxToReestimate,] <- pVals(fdsSub, type=type, level="junction", + dist=distribution) + pVals(fds, type=type, level="junction", dist=distribution) <- p + + return(fds) +} + diff --git a/R/updateRho.R b/R/updateRho.R index 628b21fd..2b3d1a44 100644 --- a/R/updateRho.R +++ b/R/updateRho.R @@ -27,10 +27,15 @@ updateRho <- function(fds, type, rhoRange, BPPARAM, verbose){ return(fds) } -estRho <- function(idx, k, n, y, rhoRange, nll, control=list(), lambda=1e-4){ - ki <- k[idx,] - ni <- n[idx,] - yi <- y[idx,] +estRho <- function(idx, k, n, y, rhoRange, nll, control=list(), lambda=1e-4, + exclusionMat=NULL){ + if(is.null(exclusionMat)){ + exclusionMat <- matrix(FALSE, ncol=ncol(k), nrow=nrow(k)) + } + + ki <- k[idx,!exclusionMat[idx,]] + ni <- n[idx,!exclusionMat[idx,]] + yi <- y[idx,!exclusionMat[idx,]] # est <- optimize(f=nll, interval=rhoRange, yi=yi, ki=ki, ni=ni, # maximum=FALSE, tol=0.0000001) @@ -123,3 +128,32 @@ methodOfMomentsRho <- function(k, n, rhoRange=c(1e-5, 1 - 1e-5)){ return(rho) } + +#' +#' Update rho step for autoencoder (as second pass in two-pass mode excluding +#' outlier samples) +#' +#' @noRd +updateRhoTwoPass <- function(fds, exclusionMat, type, y, rhoRange, BPPARAM, verbose){ + + k <- K(fds) + n <- N(fds) + + # fitparameters <- bplapply(seq_len(nrow(k)), estRho, nll=truncNLL_rho, + # k=k, n=n, y=y, rhoRange=rhoRange, BPPARAM=BPPARAM) + fitparameters <- bplapply(seq_len(nrow(k)), estRho, + nll=fullNLLRho_penalized, + k=k, n=n, y=y, rhoRange=rhoRange, lambda=1e-4, + BPPARAM=BPPARAM, exclusionMat=exclusionMat) + + rho(fds) <- plogis(vapply(fitparameters, "[[", + double(1), "minimum")) + + if(isTRUE(verbose)){ + stxt <- capture.output(summary(rho(fds))) + message(date(), ": rho fit:\n\t", paste(stxt, collapse="\n\t")) + } + + validObject(fds) + return(fds) +}