Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix weight.by.var for approx=FALSE #2330

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 64 additions & 37 deletions R/dimensional_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -799,44 +799,71 @@ RunPCA.default <- function(
approx = TRUE,
...
) {
if(!approx) {
pcs.name <- if(rev.pca) "`gene.loadings`" else "`cell.embeddings`"
# Stage 1:
Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling")
if (is.null(Seurat.RunPCA.use.correct.scaling)) {
warning("If used with `approx=FALSE`, RunPCA does not scale ", pcs.name, " correctly. This will change",
" in a future version.\nTo switch to the new, correct behavior now and get rid of this" , # to be changed to
" warning message, use `options(Seurat.RunPCA.use.correct.scaling = TRUE)` or use `approx=TRUE` (recommended).",
"\nTo maintain the current (wrong) behavior and get rid of this warning message,",
" set `options(Seurat.RunPCA.use.correct.scaling = FALSE)` (not recommended).",
"\nSee https://github.com/satijalab/seurat/issues/2237 for further information.")
Seurat.RunPCA.use.correct.scaling <- FALSE
}
# # Stage 2: # fill in current date for yyyyy in stage 4 warning
# Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling", default = TRUE)
# # Stage 3: # fill in current date for xxxxx in stage 4 warnings
# Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling", default = TRUE)
# if (!Seurat.RunPCA.use.correct.scaling) {
# warning("Using `options(Seurat.RunPCA.use.correct.scaling = FALSE)` is deprecated in will cause an error in future versions of Seurat.",
# "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.")
# }
# # Stage 4: remove if clause below
# Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling", default = TRUE)
# if (!is.null(Seurat.RunPCA.use.correct.scaling)) {
# if (Seurat.RunPCA.use.correct.scaling) {
# warning("Using `options(Seurat.RunPCA.use.correct.scaling = FALSE)` is deprecated since xxxxx.",
# " Specifing `options(Seurat.RunPCA.use.correct.scaling = TRUE)` is no longer needed since yyyyy, please to not set it anymore.",
# "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.")
# } else {
# stop("`options(Seurat.RunPCA.use.correct.scaling = FALSE)` has been deprecated since xxxx and is no longer supported.",
# "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.")
# }
# }
# # Stage 5: remove this if clause
}

if (!is.null(x = seed.use)) {
set.seed(seed = seed.use)
}
if (rev.pca) {
npcs <- min(npcs, ncol(x = object) - 1)
pca.results <- irlba(A = object, nv = npcs, ...)
total.variance <- sum(RowVar(x = t(x = object)))
sdev <- pca.results$d/sqrt(max(1, nrow(x = object) - 1))
if (weight.by.var) {
feature.loadings <- pca.results$u %*% diag(pca.results$d)
} else{
feature.loadings <- pca.results$u
}
cell.embeddings <- pca.results$v
}
else {
total.variance <- sum(RowVar(x = object))
if (approx) {
npcs <- min(npcs, nrow(x = object) - 1)
pca.results <- irlba(A = t(x = object), nv = npcs, ...)
feature.loadings <- pca.results$v
sdev <- pca.results$d/sqrt(max(1, ncol(object) - 1))
if (weight.by.var) {
cell.embeddings <- pca.results$u %*% diag(pca.results$d)
} else {
cell.embeddings <- pca.results$u
}
} else {
npcs <- min(npcs, nrow(x = object))
pca.results <- prcomp(x = t(object), rank. = npcs, ...)
feature.loadings <- pca.results$rotation
sdev <- pca.results$sdev
if (weight.by.var) {
cell.embeddings <- pca.results$x %*% diag(pca.results$sdev[1:npcs]^2)
} else {
cell.embeddings <- pca.results$x
}
}
t.object <- if(rev.pca) object else t(x = object)
nt.object <- if(rev.pca) t(x = object) else object
total.variance <- sum(RowVar(x = nt.object))
if (approx){
npcs <- min(npcs, ncol(x = t.object) - 1) # irlba does not allow computation of all PCs
pca.results <- irlba(A = t.object, nv = npcs, ...)
sdev <- pca.results$d/sqrt(nrow(t.object) - 1)
feature.loadings <- pca.results$v
cell.embeddings <- pca.results$u %*% diag(pca.results$d)
} else {
pca.results <- prcomp(x = t.object, rank. = npcs, ...)
sdev <- pca.results$sdev
feature.loadings <- pca.results$rotation
cell.embeddings <- pca.results$x
}
if (!approx && !Seurat.RunPCA.use.correct.scaling ) { # to be removed at stage 4
if (weight.by.var) { #
cell.embeddings <- cell.embeddings %*% diag(sdev[1:npcs]^2) #
} #
} else if (!weight.by.var) {
cell.embeddings <- cell.embeddings %*% diag(1/sdev[1:npcs]/sqrt(nrow(t.object) - 1))
}
if (rev.pca){
tmp <- feature.loadings
feature.loadings <- cell.embeddings
cell.embeddings <- tmp
}
rownames(x = feature.loadings) <- rownames(x = object)
colnames(x = feature.loadings) <- paste0(reduction.key, 1:npcs)
Expand All @@ -861,8 +888,8 @@ RunPCA.default <- function(
return(reduction.data)
}

#' @param features Features to compute PCA on. If features=NULL, PCA will be run
#' using the variable features for the Assay.
#' @param features Features to compute PCA on. If features=NULL, PCA will be run
#' using the variable features for the Assay.
#'
#' @rdname RunPCA
#' @export
Expand Down
Binary file added tests/testthat/pcalist.approx...FALSE..rds
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/testthat/pcalist.approx...TRUE..rds
Binary file not shown.
Binary file not shown.
Binary file not shown.
129 changes: 129 additions & 0 deletions tests/testthat/test_dimensional_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,132 @@ test_that("pca returns total variance (see #982)", {
sum(prcomp_result$sdev^2))

})

test_that("pca embedding weighting works", {
#test new behavior
Seurat.RunPCA.use.correct.scaling.bak <- options(Seurat.RunPCA.use.correct.scaling = TRUE)
# Generate dummy data exp matrix
set.seed(seed = 1)
npcs <- 50
dummyexpMat <- matrix(
data = stats::rexp(n = 2e4, rate = 1),
ncol = 200, nrow = 100
)
colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat)))
row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat)))

# Create Seurat object for testing
obj <- CreateSeuratObject(counts = dummyexpMat)

# Normalize
obj <- NormalizeData(object = obj, verbose = FALSE)
# Scale
obj <- ScaleData(object = obj, verbose = FALSE)

# un(weighted/scaled)
# compute PCA
obj <- suppressWarnings(expr = RunPCA(
object = obj,
features = rownames(x = obj),
verbose = FALSE,
reduction.name = "pca.prcomp.unscaled",
npcs = npcs,
weight.by.var = FALSE,
approx = FALSE
))

obj <- suppressWarnings(expr = RunPCA(
object = obj,
features = rownames(x = obj),
verbose = FALSE,
reduction.name = "pca.irlba.unscaled",
npcs = npcs,
weight.by.var = FALSE,
approx = TRUE
))

# Compare
expect_equivalent(
diag(x = cov(x = slot(object = obj[["pca.prcomp.unscaled"]], name = "cell.embeddings"))),
rep(x = 1/(ncol(x = obj)-1), times=npcs)
)
expect_equivalent(
diag(x = cov(x = slot(object = obj[["pca.irlba.unscaled"]], name = "cell.embeddings"))),
rep(x = 1/(ncol(x = obj)-1), times=npcs)
)

# weighted/scaled
# compute PCA
obj <- suppressWarnings(expr = RunPCA(
object = obj,
features = rownames(x = obj),
verbose = FALSE,
reduction.name = "pca.prcomp.var_scaled",
npcs = npcs,
weight.by.var = TRUE,
approx = FALSE
))

obj <- suppressWarnings(expr = RunPCA(
object = obj,
features = rownames(x = obj),
verbose = FALSE,
reduction.name = "pca.irlba.var_scaled",
npcs = npcs,
weight.by.var = TRUE,
approx = TRUE
))

# Compare
expect_equivalent(
diag(x = cov(x = slot(object = obj[["pca.prcomp.var_scaled"]], name = "cell.embeddings"))),
slot(object = obj[["pca.prcomp.var_scaled"]], name = "stdev")[1:npcs]^2
)
expect_equivalent(
diag(x = cov(x = slot(object = obj[["pca.irlba.var_scaled"]], name = "cell.embeddings"))),
slot(object = obj[["pca.prcomp.var_scaled"]], name = "stdev")[1:npcs]^2
)
options(Seurat.RunPCA.use.correct.scaling.bak)
})

test_that("pca reduction behaves as previously", {
# Generate dummy data exp matrix
set.seed(seed = 1)
npcs <- 3
dummyexpMat <- matrix(
data = stats::rexp(n = 2e4, rate = 1),
ncol = 200, nrow = 100
)
colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat)))
row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat)))

# Create Seurat object for testing
obj <- CreateSeuratObject(counts = dummyexpMat)

# Normalize
obj <- NormalizeData(object = obj, verbose = FALSE)
# Scale
obj <- ScaleData(object = obj, verbose = FALSE)

# compute PCA with different values of related parameters
for (approx in list(list(approx=TRUE), list(approx=FALSE))) {
for (weight.by.var in list(list(weight.by.var=TRUE), list(weight.by.var=FALSE), list())) {
pars <- c(approx, weight.by.var)
pars.str <- paste0(deparse(pars), collapse="")
expect_known_value(
object = suppressWarnings(
expr = do.call(
what = RunPCA,
args = c(
list(object = obj, features = rownames(x = obj), npcs = npcs),
pars
)
)
)[["pca"]],
file = paste0("pca", make.names(pars.str), ".rds"),
label = paste0("RunPCA with ", pars.str)
)
}
}
})