From 92de3ce25510a908134064cedb6d050f239f0def Mon Sep 17 00:00:00 2001
From: Ines Scheller <scheller@in.tum.de>
Date: Thu, 5 Oct 2023 17:57:16 +0200
Subject: [PATCH] functionality to test twopass mode for rho fit

---
 R/pvalsNzscore.R | 81 +++++++++++++++++++++++++++++++++++++++++++++++-
 R/updateRho.R    | 42 ++++++++++++++++++++++---
 2 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/R/pvalsNzscore.R b/R/pvalsNzscore.R
index be256009..4d0394e5 100644
--- a/R/pvalsNzscore.R
+++ b/R/pvalsNzscore.R
@@ -44,7 +44,10 @@ calculateZscore <- function(fds, type=currentType(fds), logit=TRUE){
 #' @export
 calculatePvalues <- function(fds, type=currentType(fds),
                     implementation="PCA", BPPARAM=bpparam(),
-                    distributions=c("betabinomial"), capN=5*1e5){
+                    distributions=c("betabinomial"), capN=5*1e5, 
+                    twoPassMode=FALSE, twoPassFDRcutoff=1e-3, 
+                    twoPassRhoRange=c(-30, 30)){
+    distributions <- tolower(distributions)
     distributions <- match.arg(distributions, several.ok=TRUE,
             choices=c("betabinomial", "binomial", "normal"))
     
@@ -109,6 +112,13 @@ calculatePvalues <- function(fds, type=currentType(fds),
         pvals <- 2 * pmin(pval, 1 - pval + dval, 0.5)
         pVals(fds, dist="BetaBinomial", level="junction", 
                 withDimnames=FALSE) <- pvals
+        if(isTRUE(twoPassMode)){
+            fds <- calculateTwoPassPvalues(fds, type=type, 
+                                twoPassFDRcutoff = twoPassFDRcutoff,
+                                implementation=implementation, 
+                                BPPARAM=BPPARAM, distribution="BetaBinomial", 
+                                rhoRange=twoPassRhoRange)
+        }
     }
     
     if("binomial" %in% distributions){
@@ -120,6 +130,13 @@ calculatePvalues <- function(fds, type=currentType(fds),
         pvals <- 2 * pmin(pval, 1 - pval + dval, 0.5)
         pVals(fds, dist="Binomial", level="junction", 
                 withDimnames=FALSE) <- pvals
+        if(isTRUE(twoPassMode)){
+            fds <- calculateTwoPassPvalues(fds, type=type, 
+                                twoPassFDRcutoff = twoPassFDRcutoff,
+                                implementation=implementation, 
+                                BPPARAM=BPPARAM, distribution="Binomial", 
+                                rhoRange=twoPassRhoRange)
+        }
     }
     
     if("normal" %in% distributions){
@@ -132,6 +149,14 @@ calculatePvalues <- function(fds, type=currentType(fds),
         pvals <- 2 * pmin(pval, 1 - pval, 0.5)
         pVals(fds, dist="Normal", level="junction", 
                 withDimnames=FALSE) <- pvals
+        if(isTRUE(twoPassMode)){
+            fds <- calculateTwoPassPvalues(fds, type=type, 
+                                twoPassFDRcutoff = twoPassFDRcutoff,
+                                implementation=implementation, 
+                                BPPARAM=BPPARAM, distribution="Normal", 
+                                rhoRange=twoPassRhoRange)
+        }
+        
     }
     
     fds
@@ -555,3 +580,57 @@ calculatePadjValuesOnSubset <- function(fds, genesToTest, subsetName,
     return(fds)
 }
 
+calculateTwoPassPvalues <- function(fds, type=currentType(fds), 
+                            twoPassFDRcutoff = 1e-3, implementation="PCA", 
+                            distribution="BetaBinomial", rhoRange=c(-30, 30),
+                            BPPARAM=bpparam()){
+    
+    distribution <- match.arg(distribution, several.ok=FALSE, 
+                                choices=c("BetaBinomial", "Binomial", "Normal"))
+    
+    # get FDR across all samples for each intron
+    p <- pVals(fds, type=type, level="junction", dist=distribution)
+    fdrAcrossSamples <- t(apply(p, 1, p.adjust, method="BY"))
+    
+    # get samples which should be excluded from rho fit
+    exMat <- fdrAcrossSamples <= twoPassFDRcutoff
+    
+    # get introns with more than 1 sample to exclude from rho fit
+    exSum <- rowSums(exMat)
+    idxToReestimate <- which(exSum > 0)
+    
+    if(length(idxToReestimate) == 0){
+        warning("There were no introns for which the two-pass mode could ", 
+                "be applied.")
+        return(fds)
+    }
+    
+    # re-estimate rho
+    fdsSub <- fds[idxToReestimate,]
+    dontWriteHDF5(fdsSub) <- TRUE
+    yAll <- predictY(fds, noiseAlpha=currentNoiseAlpha(fds)) # on the full fds
+    message(date(), ": Updating rho values in two-pass mode ...")
+    fdsSub <- updateRhoTwoPass(fdsSub, exclusionMat=exMat[idxToReestimate,], 
+                            y=yAll[idxToReestimate,], type=type, 
+                            rhoRange=rhoRange, verbose=TRUE, BPPARAM=BPPARAM)
+    message(date(), ": Finished two-pass mode update of rho values")
+    rhoTwoPass <- rho(fdsSub, type=type)
+    
+    # re-assign updated rho values to fds object
+    rho <- rho(fds, type=type)
+    rho[idxToReestimate] <- rhoTwoPass
+    rho(fds, type=type) <- rho
+    
+    # re-compute nominal junction-level pvalues and assign to fds object
+    message(date(), ": Re-calculating p-values after rho update ...")
+    fdsSub <- calculatePvalues(fds=fdsSub, type=type, 
+                                implementation=implementation, 
+                                distributions=distribution, BPPARAM=BPPARAM,
+                                twoPassMode=FALSE)
+    p[idxToReestimate,] <- pVals(fdsSub, type=type, level="junction", 
+                                    dist=distribution)
+    pVals(fds, type=type, level="junction", dist=distribution) <- p
+    
+    return(fds)
+}
+
diff --git a/R/updateRho.R b/R/updateRho.R
index 628b21fd..2b3d1a44 100644
--- a/R/updateRho.R
+++ b/R/updateRho.R
@@ -27,10 +27,15 @@ updateRho <- function(fds, type, rhoRange, BPPARAM, verbose){
     return(fds)
 }
 
-estRho <- function(idx, k, n, y, rhoRange, nll, control=list(), lambda=1e-4){
-    ki <- k[idx,]
-    ni <- n[idx,]
-    yi <- y[idx,]
+estRho <- function(idx, k, n, y, rhoRange, nll, control=list(), lambda=1e-4, 
+                   exclusionMat=NULL){
+    if(is.null(exclusionMat)){
+        exclusionMat <- matrix(FALSE, ncol=ncol(k), nrow=nrow(k))
+    }
+    
+    ki <- k[idx,!exclusionMat[idx,]]
+    ni <- n[idx,!exclusionMat[idx,]]
+    yi <- y[idx,!exclusionMat[idx,]]
     
     # est <- optimize(f=nll, interval=rhoRange, yi=yi, ki=ki, ni=ni, 
     #                 maximum=FALSE, tol=0.0000001)
@@ -123,3 +128,32 @@ methodOfMomentsRho <- function(k, n, rhoRange=c(1e-5, 1 - 1e-5)){
     
     return(rho)
 }
+
+#'
+#' Update rho step for autoencoder (as second pass in two-pass mode excluding
+#' outlier samples)
+#'
+#' @noRd
+updateRhoTwoPass <- function(fds, exclusionMat, type, y, rhoRange, BPPARAM, verbose){
+    
+    k <- K(fds)
+    n <- N(fds)
+    
+    # fitparameters <- bplapply(seq_len(nrow(k)), estRho, nll=truncNLL_rho,
+    #         k=k, n=n, y=y, rhoRange=rhoRange, BPPARAM=BPPARAM)
+    fitparameters <- bplapply(seq_len(nrow(k)), estRho, 
+                              nll=fullNLLRho_penalized,
+                              k=k, n=n, y=y, rhoRange=rhoRange, lambda=1e-4,
+                              BPPARAM=BPPARAM, exclusionMat=exclusionMat)
+    
+    rho(fds) <- plogis(vapply(fitparameters, "[[", 
+                              double(1), "minimum"))
+    
+    if(isTRUE(verbose)){
+        stxt <- capture.output(summary(rho(fds)))
+        message(date(), ": rho fit:\n\t", paste(stxt, collapse="\n\t"))
+    }
+    
+    validObject(fds)
+    return(fds)
+}