diff --git a/R/calc_scAEI.R b/R/calc_scAEI.R index 5af80c3..6126a76 100644 --- a/R/calc_scAEI.R +++ b/R/calc_scAEI.R @@ -86,8 +86,8 @@ calc_scAEI <- function(bamfiles, sites, cell_barcodes, param = FilterParam(), # if unstranded, only query w.r.t + strand is_unstranded <- !param@library_type %in% c(1, 2) - if(is_unstranded) { - is_minus <- strand(sites) == "-" + is_minus <- strand(sites) == "-" + if(is_unstranded && sum(is_minus) > 0) { sites[is_minus]$REF <- comp_bases(edit_from) sites[is_minus]$ALT <- comp_bases(edit_to) strand(sites[is_minus]) <- "+" @@ -102,23 +102,29 @@ calc_scAEI <- function(bamfiles, sites, cell_barcodes, param = FilterParam(), param = param, ... ) - if(is_unstranded) { + + if(nrow(aei_sce) == 0) { + cli::cli_abort(c("no sites returned from pileup_cells", + "check input sites and filterParams")) + } + + if(is_unstranded) { aei_sce <- resolve_aei_regions(aei_sce) } - n_alt <- Matrix::colSums(assay(aei_sce, "nAlt")) - n_ref <- Matrix::colSums(assay(aei_sce, "nRef")) - aei <- 100 * (n_alt / (n_alt + n_ref)) - res <- DataFrame(row.names = colnames(aei_sce), - AEI = aei, - n_alt = n_alt, - n_ref = n_ref) - - if(return_sce) { - colData(aei_sce) <- res - return(aei_sce) - } - res + n_alt <- Matrix::colSums(assay(aei_sce, "nAlt")) + n_ref <- Matrix::colSums(assay(aei_sce, "nRef")) + aei <- 100 * (n_alt / (n_alt + n_ref)) + res <- DataFrame(row.names = colnames(aei_sce), + AEI = aei, + n_alt = n_alt, + n_ref = n_ref) + + if(return_sce) { + colData(aei_sce) <- res + return(aei_sce) + } + res } #' @param fasta Path to a genome fasta file @@ -258,41 +264,47 @@ resolve_aei_regions <- function(sce) { vdat <- unlist(vdat) sites_to_keep <- subset_dat[subset_dat$v == vdat[as.character(subset_dat$id)], ] - res_sce <- ud_sce[sites_to_keep$rid, ] - - sce_to_avg <- ud_sce[rowData(ud_sce)$id %in% sites_to_average, ] - anr <- rowsum(assay(sce_to_avg, "nRef"), - paste0(rowData(sce_to_avg)$var, "_", rowData(sce_to_avg)$id), - reorder = FALSE) - - ids <- unlist(lapply(strsplit(rownames(anr), "_"), "[", 2)) - ns <- lengths(split(ids, ids)) - anr <- rowsum(anr, ids) - - stopifnot(length(ns) == nrow(anr)) - stopifnot(all(names(ns) == rownames(anr))) - - avg_nref <- anr / ns - avg_nalt <- avg_nref - avg_nalt[] <- 0 - - # figure out ranges and names for average coords - rr <- rowRanges(sce_to_avg) - stopifnot(all(as.character(rr$id) %in% rownames(avg_nref))) - avg_nref <- avg_nref[unique(as.character(rr$id)), , drop = FALSE] - avg_nalt <- avg_nalt[unique(as.character(rr$id)), , drop = FALSE] - srr <- split(rr, rr$id) - new_ranges <- GRanges(unlist(unique(seqnames(srr))), - IRanges(min(start(srr)), - max(end(srr))), - id = names(srr)) - names(new_ranges) <- paste0("range_mean_", seq_along(new_ranges)) - - avg_sce_res <- SingleCellExperiment(list(nRef = avg_nref, - nAlt = avg_nalt)) - rowRanges(avg_sce_res) <- new_ranges - - # put it all back together - do.call(rbind, list(d_sce, res_sce, avg_sce_res)) + + res <- d_sce + if(length(sites_to_keep > 0)) { + res_sce <- ud_sce[sites_to_keep$rid, ] + res <- rbind(res, res_sce) + } + + if(length(sites_to_average) > 0 ){ + sce_to_avg <- ud_sce[rowData(ud_sce)$id %in% sites_to_average, ] + anr <- rowsum(assay(sce_to_avg, "nRef"), + paste0(rowData(sce_to_avg)$var, "_", rowData(sce_to_avg)$id), + reorder = FALSE) + + ids <- unlist(lapply(strsplit(rownames(anr), "_"), "[", 2)) + ns <- lengths(split(ids, ids)) + anr <- rowsum(anr, ids) + + stopifnot(length(ns) == nrow(anr)) + stopifnot(all(names(ns) == rownames(anr))) + + avg_nref <- anr / ns + avg_nalt <- avg_nref + avg_nalt[] <- 0 + + # figure out ranges and names for average coords + rr <- rowRanges(sce_to_avg) + stopifnot(all(as.character(rr$id) %in% rownames(avg_nref))) + avg_nref <- avg_nref[unique(as.character(rr$id)), , drop = FALSE] + avg_nalt <- avg_nalt[unique(as.character(rr$id)), , drop = FALSE] + srr <- split(rr, rr$id) + new_ranges <- GRanges(unlist(unique(seqnames(srr))), + IRanges(min(start(srr)), + max(end(srr))), + id = names(srr)) + names(new_ranges) <- paste0("range_mean_", seq_along(new_ranges)) + + avg_sce_res <- SingleCellExperiment(list(nRef = avg_nref, + nAlt = avg_nalt)) + rowRanges(avg_sce_res) <- new_ranges + res <- rbind(res, avg_sce_res) + } + res } diff --git a/tests/testthat/test_scaei.R b/tests/testthat/test_scaei.R index cfb37fb..2bce289 100644 --- a/tests/testthat/test_scaei.R +++ b/tests/testthat/test_scaei.R @@ -40,3 +40,21 @@ test_that("calc_scaei basic functions work", { expect_true(is(res, "SingleCellExperiment")) }) +test_that("guard against no sites in output when processing unstranded", { + genes_gr <- GRanges(c( + "2:100-400:-", + "2:200-400:+", + "2:600-680:+" + )) + + sites <- get_scAEI_sites(fa_fn, genes_gr, alus_gr) + + fp <- FilterParam(library_type = "unstranded", + min_mapq = 255) + res <- calc_scAEI(bam_fn, sites, cbs, fp) + expect_equal(nrow(res), 3L) + + sites <- GRanges(seqnames = "2", IRanges(start = 1:10, end = 1:10), strand = "+", id = 1, gene_strand = "defined", REF = "A", ALT = "G") + expect_error(expect_warning(calc_scAEI(bam_fn, sites, cbs, fp))) +}) +