Skip to content

Commit

Permalink
standardize fasta naming, and expand fasta file path
Browse files Browse the repository at this point in the history
  • Loading branch information
kriemo committed Aug 17, 2023
1 parent 05c128a commit 98b06a3
Show file tree
Hide file tree
Showing 14 changed files with 46 additions and 45 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: raer
Type: Package
Title: RNA editing tools in R
Version: 0.99.7
Version: 0.99.8
Authors@R: c(
person("Kent", "Riemondy", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0750-1273")),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# raer 0.99.8

* Function arguments involving a fasta file have been renamed to all be `fasta`

# raer 0.99.7

* added a single cell specific AEI calculation (`calc_scAEI()`)
Expand Down
8 changes: 4 additions & 4 deletions R/calc_AEI.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#'
#' @param bamfiles character vector of paths to indexed bam files. If a named
#' character vector is supplied the names will be used in the output.
#' @param fasta_fn fasta filename
#' @param fasta fasta filename
#' @param alu_ranges [GRanges] with regions to query for
#' calculating the AEI, typically ALU repeats.
#' @param txdb A [TxDb] object, if supplied, will be used to subset the alu_ranges
Expand Down Expand Up @@ -64,7 +64,7 @@
#'
#' @export
calc_AEI <- function(bamfiles,
fasta_fn,
fasta,
alu_ranges = NULL,
txdb = NULL,
snp_db = NULL,
Expand Down Expand Up @@ -167,7 +167,7 @@ calc_AEI <- function(bamfiles,
res <- list()
for(i in seq_along(bamfiles)) {
bam_fn <- bamfiles[i]
aei <- .AEI_per_bam(bam_fn = bam_fn, fasta_fn = fasta_fn, chroms = chroms,
aei <- .AEI_per_bam(bam_fn = bam_fn, fasta_fn = fasta, chroms = chroms,
alu_ranges = alu_ranges, snps = snps, param = param,
genes_gr = genes_gr, verbose = verbose, BPPARAM = BPPARAM)
res[[path(bam_fn)]] <- aei
Expand Down Expand Up @@ -267,7 +267,7 @@ calc_AEI <- function(bamfiles,
param@only_keep_variants <- FALSE

plp <- pileup_sites(bam_fn,
fafile = fasta_fn,
fasta = fasta_fn,
sites = alu_sites,
chroms = chrom,
param = param
Expand Down
8 changes: 3 additions & 5 deletions R/calc_scAEI.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,7 @@ calc_scAEI <- function(bamfiles, sites, cell_barcodes, param = FilterParam(),
res
}

#' @param fasta_fn a path to a BAM file (for 10x libraries), or a vector of paths
#' to BAM files (smart-seq2). Can be supplied as a character vector, `BamFile`, or
#' `BamFileList`.
#' @param fasta Path to a genome fasta file
#' @param genes A `GRanges` object with gene coordinates.Alternatively a `TxDb` object,
#' which if supplied, will be used extract gene coordinates.
#' @param alus `GRanges` with repeat regions to query for calculating the AEI,
Expand All @@ -138,7 +136,7 @@ calc_scAEI <- function(bamfiles, sites, cell_barcodes, param = FilterParam(),
#'
#' @rdname calc_scAEI
#' @export
get_scAEI_sites <- function(fasta_fn,
get_scAEI_sites <- function(fasta,
genes,
alus,
edit_from = "A",
Expand All @@ -159,7 +157,7 @@ get_scAEI_sites <- function(fasta_fn,
alus$id <- seq_along(alus)
gn_alus <- prep_genic_alu_regions(genes, alus)
aei_sites <- get_aei_site_positions(gn_alus,
fasta_fn,
fasta,
edit_from)

aei_sites$ALT <- edit_to
Expand Down
15 changes: 8 additions & 7 deletions R/pileup.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' more BAM files to process. If named, the names will be included in the [colData]
#' of the [RangedSummarizedExperiment] as a `sample` column, otherwise the names will
#' be taken from the basename of the BAM file.
#' @param fafile path to genome fasta file used for read alignment. Can be in
#' @param fasta path to genome fasta file used for read alignment. Can be in
#' gzip or bgzip format.
#' @param sites a [GRanges] object containing regions or sites to process.
#' @param region samtools region query string (i.e. `chr1:100-1000`). Can be combined
Expand Down Expand Up @@ -107,7 +107,7 @@
#' @rdname pileup_sites
#' @export
pileup_sites <- function(bamfiles,
fafile,
fasta,
sites = NULL,
region = NULL,
chroms = NULL,
Expand Down Expand Up @@ -146,9 +146,10 @@ pileup_sites <- function(bamfiles,
cli::cli_abort("index file(s) not found for bam: {mi}")
}

if (!file.exists(fafile)) {
cli::cli_abort("fasta file not found: {fafile}")
if (!file.exists(fasta)) {
cli::cli_abort("fasta file not found: {fasta}")
}
fasta <- path.expand(fasta)

if (is.null(outfile_prefix)) {
if (!return_data) {
Expand Down Expand Up @@ -208,7 +209,7 @@ pileup_sites <- function(bamfiles,
setdiff(chroms_to_process, missing_chroms)
}

chroms_in_fa <- seqnames(Rsamtools::scanFaIndex(fafile))
chroms_in_fa <- seqnames(Rsamtools::scanFaIndex(fasta))
missing_chroms <-
chroms_to_process[!chroms_to_process %in% levels(chroms_in_fa)]

Expand Down Expand Up @@ -278,7 +279,7 @@ pileup_sites <- function(bamfiles,
bfs,
bidxs,
as.integer(n_files),
fafile,
fasta,
region,
sites,
fp[["int_args"]],
Expand Down Expand Up @@ -334,7 +335,7 @@ pileup_sites <- function(bamfiles,
bfs,
bidxs,
as.integer(n_files),
fafile,
fasta,
ctig,
sites,
fp[["int_args"]],
Expand Down
10 changes: 5 additions & 5 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ unlist_w_names <- function(x) {
#' are returned.
#'
#' @param bamfile path to bamfile
#' @param fafile path to fasta file
#' @param fasta path to fasta file
#' @param pos_5p distance 5' of mispriming site to define mispriming region
#' @param pos_3p distance 3' of mispriming site to define mispriming region
#' @param min_reads minimum required number of reads at a mispriming site
Expand All @@ -82,7 +82,7 @@ unlist_w_names <- function(x) {
#' find_mispriming_sites(bam_fn, fa_fn)
#'
#' @export
find_mispriming_sites <- function(bamfile, fafile, pos_5p = 5, pos_3p = 20,
find_mispriming_sites <- function(bamfile, fasta, pos_5p = 5, pos_3p = 20,
min_reads = 2, tag = "pa", tag_values = 3:300,
n_reads_per_chunk = 1e6, verbose = TRUE){

Expand Down Expand Up @@ -140,7 +140,7 @@ find_mispriming_sites <- function(bamfile, fafile, pos_5p = 5, pos_3p = 20,
drop = FALSE)
res$n_regions <- IRanges::grouplengths(res$grouping)
res$grouping <- NULL
res <- pa_seq_context(res, fafile)
res <- pa_seq_context(res, fasta)
res
}

Expand Down Expand Up @@ -175,8 +175,8 @@ merge_pa_peaks <- function(gr) {

#' @importFrom Rsamtools FaFile scanFa
#' @importFrom Biostrings letterFrequency reverseComplement
pa_seq_context <- function(gr, fafile){
fa <- Rsamtools::FaFile(fafile)
pa_seq_context <- function(gr, fasta){
fa <- Rsamtools::FaFile(fasta)
seqs <- Rsamtools::scanFa(fa, gr)
seqs[strand(gr) == "-"] <- Biostrings::reverseComplement(seqs[strand(gr) == "-"])
a_prop <- Biostrings::letterFrequency(seqs, "A") / width(gr)
Expand Down
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ sce
#> dim: 3 3
#> metadata(0):
#> assays(2): nRef nAlt
#> rownames(3): site_2_579_2 site_2_625_2 site_2_589_2
#> rownames(3): site_2_579_2_AG site_2_589_2_AG site_2_625_2_AG
#> rowData names(2): REF ALT
#> colnames(3): CACCAAACAACAACAA-1 TATTCCACACCCTCTA-1 GACCTTCAGTTGTAAG-1
#> colData names(0):
Expand All @@ -150,16 +150,16 @@ sce
``` r
assays(sce)$nRef
#> 3 x 3 sparse Matrix of class "dgCMatrix"
#> CACCAAACAACAACAA-1 TATTCCACACCCTCTA-1 GACCTTCAGTTGTAAG-1
#> site_2_579_2 0 0 1
#> site_2_625_2 0 0 0
#> site_2_589_2 1 1 2
#> CACCAAACAACAACAA-1 TATTCCACACCCTCTA-1 GACCTTCAGTTGTAAG-1
#> site_2_579_2_AG 0 0 1
#> site_2_589_2_AG 1 1 2
#> site_2_625_2_AG 0 0 0
assays(sce)$nAlt
#> 3 x 3 sparse Matrix of class "dgCMatrix"
#> CACCAAACAACAACAA-1 TATTCCACACCCTCTA-1 GACCTTCAGTTGTAAG-1
#> site_2_579_2 1 1 1
#> site_2_625_2 1 1 1
#> site_2_589_2 0 0 0
#> CACCAAACAACAACAA-1 TATTCCACACCCTCTA-1 GACCTTCAGTTGTAAG-1
#> site_2_579_2_AG 1 1 1
#> site_2_589_2_AG 0 0 0
#> site_2_625_2_AG 1 1 1
```

## Related work
Expand Down
4 changes: 2 additions & 2 deletions man/calc_AEI.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions man/calc_scAEI.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/find_mispriming_sites.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/pileup_sites.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_pileup_cells.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ bulkfp <- FilterParam(min_mapq = 255L,
bam_flags = scanBamFlag(isSecondaryAlignment = FALSE,
isSupplementaryAlignment = FALSE,
isNotPassingQualityControls = FALSE))
rse <- pileup_sites(bam_fn, fafile = fa_fn, chrom = "2", param = bulkfp)
rse <- pileup_sites(bam_fn, fa_fn, chrom = "2", param = bulkfp)
rowData(rse)$ALT <- assay(rse, "ALT")[, 1]
sites <- rowRanges(rse)

Expand Down
2 changes: 1 addition & 1 deletion vignettes/novel-sites.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ fp <- FilterParam(
bams <- c(rna_bam, wgs_bam)
names(bams) <- c("rna", "dna")
rse <- pileup_sites(bams,
fafile = fafn,
fasta = fafn,
chrom = "chr4",
param = fp
)
Expand Down
4 changes: 2 additions & 2 deletions vignettes/raer.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ fp <- FilterParam(
)
rse <- pileup_sites(bam_files,
fafile = fafn,
fasta = fafn,
sites = sites,
region = "chr18",
param = fp
Expand Down Expand Up @@ -232,7 +232,7 @@ alus[1:3, ]

```{r}
alu_index <- calc_AEI(bam_files,
fasta_fn = fafn,
fasta = fafn,
snp_db = alu_snps,
alu_ranges = alus,
param = fp)
Expand Down

0 comments on commit 98b06a3

Please sign in to comment.