Skip to content

Commit

Permalink
Merge pull request #1376 from deeptools/mbs_output
Browse files Browse the repository at this point in the history
Mbs output
  • Loading branch information
WardDeb authored Jan 29, 2025
2 parents b7ef67e + d415afe commit c8f38b5
Show file tree
Hide file tree
Showing 13 changed files with 1,620 additions and 1,341 deletions.
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ tokio = "*"
flate2 = "*"
tempfile = "*"
ndarray = "0.16.1"
npyz = "*"
zip = "*"
ndarray-npy = "*"
3 changes: 3 additions & 0 deletions deeptools4.0.0_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@

- unmapped reads to unfiltered_out

## multiBamSummary
- npz output has labels encoded as u8s, no longer strings.

# Todo

- AlignmentSieve: Shift, Bed, Optimization.
Expand Down
7 changes: 5 additions & 2 deletions pydeeptools/deeptools/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,11 @@ def load_matrix(self, matrix_file):
"and plotting\n".format(num_nam))

self.matrix = np.ma.compress_rows(np.ma.masked_invalid(self.matrix))

self.labels = list(map(toString, _ma['labels']))
# Since deeptools 4.0 the labels are encoded and would need to be decoded.
if _ma['labels'].dtype == np.uint8:
self.labels = [bytes(x).decode('utf-8').rstrip('\x00') for x in _ma['labels']]
else:
self.labels = list(map(toString, _ma['labels']))

assert len(self.labels) == self.matrix.shape[1], "ERROR, length of labels is not equal " \
"to length of matrix samples"
Expand Down
2 changes: 1 addition & 1 deletion pydeeptools/deeptools/multiBamSummary2.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def process_args(args=None):
args.labels = [os.path.basename(x) for x in args.bamfiles]

if not args.BED:
args.BED = "None"
args.BED = []
if not args.region:
args.region = []
if not args.blackListFileName:
Expand Down
2 changes: 0 additions & 2 deletions pydeeptools/deeptools/writeBedGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ def run(self, func_to_call, func_args, out_file_name, blackListFileName=None, fo
region=self.region,
blackListFileName=blackListFileName,
numberOfProcessors=self.numberOfProcessors)
print("Result after mapReduce")
print(res)
# Determine the sorted order of the temp files
chrom_order = dict()
for i, _ in enumerate(chrom_names_and_size):
Expand Down
15 changes: 8 additions & 7 deletions src/alignmentsieve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use rust_htslib::bam::{self, record, Header, IndexedReader, Read, Reader, Writer
use tempfile::{Builder, TempPath, NamedTempFile};
use std::fs::File;
use std::io::Write;
use crate::covcalc::{parse_regions, Alignmentfilters};
use crate::covcalc::{parse_regions, Alignmentfilters, Region};

#[pyfunction]
pub fn r_alignmentsieve(
Expand Down Expand Up @@ -47,7 +47,7 @@ pub fn r_alignmentsieve(
// shift is of length 0, 2, or 4.

// Define regions
let (regions, chromsizes) = parse_regions(&Vec::new(), bamifile);
let (regions, chromsizes) = parse_regions(&Vec::new(), vec![bamifile]);

let filters = Alignmentfilters{
minmappingquality: min_mapping_quality,
Expand All @@ -74,7 +74,7 @@ pub fn r_alignmentsieve(

// write output
let mut obam = Writer::from_path(ofile, &header, bam::Format::Bam).unwrap();
obam.set_threads(nproc);
let _ = obam.set_threads(nproc);
for sb in sieve.into_iter() {
if let Some(sb) = sb {
let mut bam = Reader::from_path(&sb).unwrap();
Expand All @@ -87,7 +87,7 @@ pub fn r_alignmentsieve(
// write filtered reads if necessary
if write_filters {
let mut ofilterbam = Writer::from_path(filtered_out_readsfile, &header, bam::Format::Bam).unwrap();
ofilterbam.set_threads(nproc);
let _ = ofilterbam.set_threads(nproc);
for sb in filtersieve.into_iter() {
if let Some(sb) = sb {
let mut bam = Reader::from_path(&sb).unwrap();
Expand All @@ -113,7 +113,8 @@ pub fn r_alignmentsieve(
}


fn sieve_bamregion(ibam: &str, region: &(String, u32, u32), alfilters: &Alignmentfilters, filter_rna_strand: &str, shift: &Vec<i32>, write_filters: bool, nproc: usize, verbose: bool) -> (Vec<Option<TempPath>>, Vec<Option<TempPath>>, u64, u64) {
fn sieve_bamregion(ibam: &str, regstruct: &Region, alfilters: &Alignmentfilters, filter_rna_strand: &str, shift: &Vec<i32>, write_filters: bool, nproc: usize, verbose: bool) -> (Vec<Option<TempPath>>, Vec<Option<TempPath>>, u64, u64) {
let region = (regstruct.chrom.clone(), regstruct.get_startu(), regstruct.get_endu());
let mut total_reads: u64 = 0;
let mut filtered_reads: u64 = 0;
let mut bam = IndexedReader::from_path(ibam).unwrap();
Expand Down Expand Up @@ -147,8 +148,8 @@ fn sieve_bamregion(ibam: &str, region: &(String, u32, u32), alfilters: &Alignmen
if nproc > 4 {
let readthreads = 2;
let writethreads = nproc - 2;
bam.set_threads(readthreads);
sievebamout.set_threads(writethreads);
let _ = bam.set_threads(readthreads);
let _ = sievebamout.set_threads(writethreads);
if verbose {
println!("Reading = {}, Writing = {}", readthreads, writethreads);
}
Expand Down
10 changes: 6 additions & 4 deletions src/bamcompare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::fs::File;
use itertools::Itertools;
use bigtools::{Value};
use crate::filehandler::{bam_ispaired, write_covfile};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
use crate::normalization::scale_factor_bamcompare;
use crate::calc::{median, calc_ratio};
use tempfile::{TempPath};
Expand Down Expand Up @@ -60,7 +60,9 @@ pub fn r_bamcompare(
};

// Parse regions & calculate coverage. Note that
let (regions, chromsizes) = parse_regions(&regions, bamifile1);
let (regions, chromsizes) = parse_regions(&regions, vec![bamifile1, bamifile2]);
let regionblocks = region_divider(&regions);

let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();

// Set up the bam files in a Vec.
Expand All @@ -69,8 +71,8 @@ pub fn r_bamcompare(
let covcalcs: Vec<ParsedBamFile> = pool.install(|| {
bamfiles.par_iter()
.map(|(bamfile, ispe)| {
let (bg, mapped, unmapped, readlen, fraglen) = regions.par_iter()
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false))
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, false))
.reduce(
|| (vec![], 0, 0, vec![], vec![]),
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {
Expand Down
9 changes: 5 additions & 4 deletions src/bamcoverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
use bigtools::Value;
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters};
use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters, region_divider};
use crate::filehandler::{bam_ispaired, write_covfile};
use crate::normalization::scale_factor;
use crate::calc::median;
Expand Down Expand Up @@ -70,11 +70,12 @@ pub fn r_bamcoverage(
}

// Parse regions & calculate coverage
let (regions, chromsizes) = parse_regions(&regions, bamifile);
let (regions, chromsizes) = parse_regions(&regions, vec![bamifile]);
let regionblocks = region_divider(&regions);
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();
let (bg, mapped, _unmapped, readlen, fraglen) = pool.install(|| {
regions.par_iter()
.map(|i| bam_pileup(bamifile, &i, &binsize, &ispe, &ignorechr, &filters, true))
regionblocks.par_iter()
.map(|i| bam_pileup(bamifile, &i, &binsize, &ispe, &ignorechr, &filters, true, false))
.reduce(
|| (vec![], 0, 0, vec![], vec![]),
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {
Expand Down
18 changes: 18 additions & 0 deletions src/calc.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use ndarray::{Array1, Array2, Axis};

pub fn median(mut nvec: Vec<u32>) -> f32 {
if nvec.is_empty() {
0.0
Expand Down Expand Up @@ -156,3 +158,19 @@ pub fn calc_ratio(
}
}
}

pub fn deseq_scalefactors(array2: &Array2<f32>) -> Array1<f32> {
let loggeomeans = array2.mapv(|v| v.ln()).sum_axis(Axis(1)) / array2.shape()[1] as f32;
let masked_array = array2.mapv(|x| if x <= 0.0 { f32::NAN } else { x });
let masked_loggeomeans = loggeomeans.mapv(|x| if x.is_infinite() { f32::NAN } else { x });
let adjusted_loga = masked_array.mapv(|x| x.ln()).t().to_owned() - &masked_loggeomeans;
let medians: Array1<f32> = adjusted_loga.t().axis_iter(Axis(1))
.map(|x| {
let vec: Vec<&f32> = x.iter()
.filter(|&&x| !x.is_nan())
.collect();
median_float(vec)
})
.collect();
medians.mapv(|x| 1.0 / x.exp())
}
Loading

0 comments on commit c8f38b5

Please sign in to comment.