From 4e7c162d495e5dcecf38208f186ed44e79a96aae Mon Sep 17 00:00:00 2001 From: wdecoster Date: Sat, 1 Apr 2023 23:16:37 +0200 Subject: [PATCH] logging number of reads to stderr as requested in https://github.com/wdecoster/chopper/issues/7 --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/main.rs | 52 +++++++++++++++++++++++++++++++--------------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5068bc..83e8eba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,7 +192,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chopper" -version = "0.3.0" +version = "0.4.0" dependencies = [ "approx", "bio", diff --git a/Cargo.toml b/Cargo.toml index fbdaec0..cd6983b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chopper" -version = "0.3.0" +version = "0.4.0" authors = ["wdecoster "] edition = "2018" diff --git a/src/main.rs b/src/main.rs index f131aa3..0fd0336 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ use minimap2::*; use rayon::prelude::*; use std::io::{self, Read}; use std::path::PathBuf; +use std::sync::Mutex; // The arguments end up in the Cli struct #[derive(Parser, Debug)] @@ -68,41 +69,46 @@ where { match args.contam { Some(ref fas) => { + let mut total_reads = 0; + let mut output_reads = 0; let aligner = setup_contamination_filter(fas); - fastq::Reader::new(input) - .records() - .into_iter() - .for_each(|record| { - let record = record.unwrap(); - if !record.is_empty() { - let read_len = record.seq().len(); - // If a read is shorter than what is to be cropped the read is dropped entirely (filtered out) - if args.headcrop + args.tailcrop < read_len { - let average_quality = ave_qual(record.qual()); - if average_quality >= args.minqual - && average_quality <= args.maxqual - && read_len >= args.minlength - && read_len <= args.maxlength - && !is_contamination(&record.seq(), &aligner) - { - write_record(record, &args, read_len); - } + fastq::Reader::new(input).records().for_each(|record| { + let record = record.unwrap(); + total_reads += 1; + if !record.is_empty() { + let read_len = record.seq().len(); + // If a read is shorter than what is to be cropped the read is dropped entirely (filtered out) + if args.headcrop + args.tailcrop < read_len { + let average_quality = ave_qual(record.qual()); + if average_quality >= args.minqual + && average_quality <= args.maxqual + && read_len >= args.minlength + && read_len <= args.maxlength + && !is_contamination(&record.seq(), &aligner) + { + write_record(record, &args, read_len); + output_reads += 1; } } - }); + } + }); + eprintln!("Kept {output_reads} reads out of {total_reads} reads"); } None => { + let total_reads_ = Mutex::new(0); + let output_reads_ = Mutex::new(0); rayon::ThreadPoolBuilder::new() .num_threads(args.threads) .build() .unwrap(); fastq::Reader::new(input) .records() - .into_iter() .par_bridge() .for_each(|record| { let record = record.unwrap(); + let mut total_reads = total_reads_.lock().unwrap(); + *total_reads += 1; if !record.is_empty() { let read_len = record.seq().len(); // If a read is shorter than what is to be cropped the read is dropped entirely (filtered out) @@ -116,10 +122,15 @@ where && read_len <= args.maxlength { write_record(record, &args, read_len); + let mut output_reads = output_reads_.lock().unwrap(); + *output_reads += 1; } } } }); + let output_reads = output_reads_.lock().unwrap(); + let total_reads = total_reads_.lock().unwrap(); + eprintln!("Kept {output_reads} reads out of {total_reads} reads"); } } } @@ -256,7 +267,6 @@ fn test_filter_with_contam() { fn test_record_qual_len() { fastq::Reader::new(std::fs::File::open("test-data/test.fastq").unwrap()) .records() - .into_iter() .for_each(|record| { let record = record.unwrap(); if !record.is_empty() {