Skip to content

Commit

Permalink
functionality to test twopass mode for rho fit
Browse files Browse the repository at this point in the history
  • Loading branch information
ischeller committed Oct 5, 2023
1 parent deb065e commit 92de3ce
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 5 deletions.
81 changes: 80 additions & 1 deletion R/pvalsNzscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ calculateZscore <- function(fds, type=currentType(fds), logit=TRUE){
#' @export
calculatePvalues <- function(fds, type=currentType(fds),
implementation="PCA", BPPARAM=bpparam(),
distributions=c("betabinomial"), capN=5*1e5){
distributions=c("betabinomial"), capN=5*1e5,
twoPassMode=FALSE, twoPassFDRcutoff=1e-3,
twoPassRhoRange=c(-30, 30)){
distributions <- tolower(distributions)
distributions <- match.arg(distributions, several.ok=TRUE,
choices=c("betabinomial", "binomial", "normal"))

Expand Down Expand Up @@ -109,6 +112,13 @@ calculatePvalues <- function(fds, type=currentType(fds),
pvals <- 2 * pmin(pval, 1 - pval + dval, 0.5)
pVals(fds, dist="BetaBinomial", level="junction",
withDimnames=FALSE) <- pvals
if(isTRUE(twoPassMode)){
fds <- calculateTwoPassPvalues(fds, type=type,
twoPassFDRcutoff = twoPassFDRcutoff,
implementation=implementation,
BPPARAM=BPPARAM, distribution="BetaBinomial",
rhoRange=twoPassRhoRange)
}
}

if("binomial" %in% distributions){
Expand All @@ -120,6 +130,13 @@ calculatePvalues <- function(fds, type=currentType(fds),
pvals <- 2 * pmin(pval, 1 - pval + dval, 0.5)
pVals(fds, dist="Binomial", level="junction",
withDimnames=FALSE) <- pvals
if(isTRUE(twoPassMode)){
fds <- calculateTwoPassPvalues(fds, type=type,
twoPassFDRcutoff = twoPassFDRcutoff,
implementation=implementation,
BPPARAM=BPPARAM, distribution="Binomial",
rhoRange=twoPassRhoRange)
}
}

if("normal" %in% distributions){
Expand All @@ -132,6 +149,14 @@ calculatePvalues <- function(fds, type=currentType(fds),
pvals <- 2 * pmin(pval, 1 - pval, 0.5)
pVals(fds, dist="Normal", level="junction",
withDimnames=FALSE) <- pvals
if(isTRUE(twoPassMode)){
fds <- calculateTwoPassPvalues(fds, type=type,
twoPassFDRcutoff = twoPassFDRcutoff,
implementation=implementation,
BPPARAM=BPPARAM, distribution="Normal",
rhoRange=twoPassRhoRange)
}

}

fds
Expand Down Expand Up @@ -555,3 +580,57 @@ calculatePadjValuesOnSubset <- function(fds, genesToTest, subsetName,
return(fds)
}

calculateTwoPassPvalues <- function(fds, type=currentType(fds),
twoPassFDRcutoff = 1e-3, implementation="PCA",
distribution="BetaBinomial", rhoRange=c(-30, 30),
BPPARAM=bpparam()){

distribution <- match.arg(distribution, several.ok=FALSE,
choices=c("BetaBinomial", "Binomial", "Normal"))

# get FDR across all samples for each intron
p <- pVals(fds, type=type, level="junction", dist=distribution)
fdrAcrossSamples <- t(apply(p, 1, p.adjust, method="BY"))

# get samples which should be excluded from rho fit
exMat <- fdrAcrossSamples <= twoPassFDRcutoff

# get introns with more than 1 sample to exclude from rho fit
exSum <- rowSums(exMat)
idxToReestimate <- which(exSum > 0)

if(length(idxToReestimate) == 0){
warning("There were no introns for which the two-pass mode could ",
"be applied.")
return(fds)
}

# re-estimate rho
fdsSub <- fds[idxToReestimate,]
dontWriteHDF5(fdsSub) <- TRUE
yAll <- predictY(fds, noiseAlpha=currentNoiseAlpha(fds)) # on the full fds
message(date(), ": Updating rho values in two-pass mode ...")
fdsSub <- updateRhoTwoPass(fdsSub, exclusionMat=exMat[idxToReestimate,],
y=yAll[idxToReestimate,], type=type,
rhoRange=rhoRange, verbose=TRUE, BPPARAM=BPPARAM)
message(date(), ": Finished two-pass mode update of rho values")
rhoTwoPass <- rho(fdsSub, type=type)

# re-assign updated rho values to fds object
rho <- rho(fds, type=type)
rho[idxToReestimate] <- rhoTwoPass
rho(fds, type=type) <- rho

# re-compute nominal junction-level pvalues and assign to fds object
message(date(), ": Re-calculating p-values after rho update ...")
fdsSub <- calculatePvalues(fds=fdsSub, type=type,
implementation=implementation,
distributions=distribution, BPPARAM=BPPARAM,
twoPassMode=FALSE)
p[idxToReestimate,] <- pVals(fdsSub, type=type, level="junction",
dist=distribution)
pVals(fds, type=type, level="junction", dist=distribution) <- p

return(fds)
}

42 changes: 38 additions & 4 deletions R/updateRho.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,15 @@ updateRho <- function(fds, type, rhoRange, BPPARAM, verbose){
return(fds)
}

estRho <- function(idx, k, n, y, rhoRange, nll, control=list(), lambda=1e-4){
ki <- k[idx,]
ni <- n[idx,]
yi <- y[idx,]
estRho <- function(idx, k, n, y, rhoRange, nll, control=list(), lambda=1e-4,
exclusionMat=NULL){
if(is.null(exclusionMat)){
exclusionMat <- matrix(FALSE, ncol=ncol(k), nrow=nrow(k))
}

ki <- k[idx,!exclusionMat[idx,]]
ni <- n[idx,!exclusionMat[idx,]]
yi <- y[idx,!exclusionMat[idx,]]

# est <- optimize(f=nll, interval=rhoRange, yi=yi, ki=ki, ni=ni,
# maximum=FALSE, tol=0.0000001)
Expand Down Expand Up @@ -123,3 +128,32 @@ methodOfMomentsRho <- function(k, n, rhoRange=c(1e-5, 1 - 1e-5)){

return(rho)
}

#'
#' Update rho step for autoencoder (as second pass in two-pass mode excluding
#' outlier samples)
#'
#' @noRd
updateRhoTwoPass <- function(fds, exclusionMat, type, y, rhoRange, BPPARAM, verbose){

k <- K(fds)
n <- N(fds)

# fitparameters <- bplapply(seq_len(nrow(k)), estRho, nll=truncNLL_rho,
# k=k, n=n, y=y, rhoRange=rhoRange, BPPARAM=BPPARAM)
fitparameters <- bplapply(seq_len(nrow(k)), estRho,
nll=fullNLLRho_penalized,
k=k, n=n, y=y, rhoRange=rhoRange, lambda=1e-4,
BPPARAM=BPPARAM, exclusionMat=exclusionMat)

rho(fds) <- plogis(vapply(fitparameters, "[[",
double(1), "minimum"))

if(isTRUE(verbose)){
stxt <- capture.output(summary(rho(fds)))
message(date(), ": rho fit:\n\t", paste(stxt, collapse="\n\t"))
}

validObject(fds)
return(fds)
}

0 comments on commit 92de3ce

Please sign in to comment.