Skip to content

Commit

Permalink
Adding threads for functions that read BAM input
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewdavidsmith committed Oct 19, 2024
1 parent 47cd233 commit 3245e62
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 65 deletions.
15 changes: 5 additions & 10 deletions src/bound_pop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ bound_pop_main(const int argc, const char *argv[]) {
#ifdef HAVE_HTSLIB
bool BAM_FORMAT_INPUT = false;
size_t MAX_SEGMENT_LENGTH = 5000;
uint32_t n_threads{1};
#endif

size_t max_num_points = 10;
Expand Down Expand Up @@ -112,6 +113,8 @@ of observed species in an initial sample.)";
"maximum segment length when merging "
"paired end bam reads",
false, MAX_SEGMENT_LENGTH);
opt_parse.add_opt("threads", 't', "number of threads for decompressing BAM",
false, n_threads);
#endif
opt_parse.add_opt("quick", 'Q',
"quick mode, estimate without bootstrapping", false,
Expand Down Expand Up @@ -159,20 +162,12 @@ of observed species in an initial sample.)";
else if (BAM_FORMAT_INPUT && PAIRED_END) {
if (VERBOSE)
cerr << "PAIRED_END_BAM_INPUT" << endl;
// size_t n_paired = 0;
// size_t n_mates = 0;
n_obs = load_counts_BAM_pe(input_file_name,
// n_paired, n_mates,
counts_hist);
// if (VERBOSE) {
// cerr << "MERGED PAIRED END READS = " << n_paired << endl;
// cerr << "MATES PROCESSED = " << n_mates << endl;
// }
n_obs = load_counts_BAM_pe(n_threads, input_file_name, counts_hist);
}
else if (BAM_FORMAT_INPUT) {
if (VERBOSE)
cerr << "BAM_INPUT" << endl;
n_obs = load_counts_BAM_se(input_file_name, counts_hist);
n_obs = load_counts_BAM_se(n_threads, input_file_name, counts_hist);
}
#endif
else if (PAIRED_END) {
Expand Down
15 changes: 5 additions & 10 deletions src/c_curve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ c_curve_main(const int argc, const char *argv[]) {
#ifdef HAVE_HTSLIB
bool BAM_FORMAT_INPUT = false;
size_t MAX_SEGMENT_LENGTH = 5000;
uint32_t n_threads{1};
#endif

const string description = R"(
Expand Down Expand Up @@ -124,6 +125,8 @@ but instead resamples from the given data.)";
"maximum segment length when merging "
"paired end bam reads",
false, MAX_SEGMENT_LENGTH);
opt_parse.add_opt("threads", 't', "number of threads for decompressing BAM",
false, n_threads);
#endif
opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
seed);
Expand Down Expand Up @@ -172,20 +175,12 @@ but instead resamples from the given data.)";
else if (BAM_FORMAT_INPUT && PAIRED_END) {
if (VERBOSE)
cerr << "PAIRED_END_BAM_INPUT" << endl;
// const size_t MAX_READS_TO_HOLD = 5000000;
// size_t n_paired = 0;
// size_t n_mates = 0;
n_reads = load_counts_BAM_pe(input_file_name,
// n_paired, n_mates,
counts_hist);
// if (VERBOSE)
// cerr << "MERGED PAIRED END READS = " << n_paired << endl
// << "MATES PROCESSED = " << n_mates << endl;
n_reads = load_counts_BAM_pe(n_threads, input_file_name, counts_hist);
}
else if (BAM_FORMAT_INPUT) {
if (VERBOSE)
cerr << "BAM_INPUT" << endl;
n_reads = load_counts_BAM_se(input_file_name, counts_hist);
n_reads = load_counts_BAM_se(n_threads, input_file_name, counts_hist);
}
#endif
else if (PAIRED_END) {
Expand Down
7 changes: 5 additions & 2 deletions src/gc_extrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ gc_extrap_main(const int argc, const char *argv[]) {
double c_level = 0.95;
#ifdef HAVE_HTSLIB
bool BAM_FORMAT_INPUT = false;
uint32_t n_threads{1};
#endif

const string description = R"(
Expand Down Expand Up @@ -144,6 +145,8 @@ gc_extrap_main(const int argc, const char *argv[]) {
#ifdef HAVE_HTSLIB
opt_parse.add_opt("bam", '\0', "input is in BAM format", false,
BAM_FORMAT_INPUT);
opt_parse.add_opt("threads", 't', "number of threads for decompressing BAM",
false, n_threads);
#endif
opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
seed);
Expand Down Expand Up @@ -185,8 +188,8 @@ gc_extrap_main(const int argc, const char *argv[]) {
else if (BAM_FORMAT_INPUT) {
if (VERBOSE)
cerr << "BAM_INPUT" << endl;
n_reads = load_coverage_counts_BAM(infile, seed, bin_size, max_width,
coverage_hist);
n_reads = load_coverage_counts_BAM(n_threads, infile, seed, bin_size,
max_width, coverage_hist);
}
#endif
else {
Expand Down
15 changes: 5 additions & 10 deletions src/lc_extrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ lc_extrap_main(const int argc, const char **argv) {
#ifdef HAVE_HTSLIB
bool BAM_FORMAT_INPUT = false;
size_t MAX_SEGMENT_LENGTH = 5000;
uint32_t n_threads{1};
#endif

string description =
Expand Down Expand Up @@ -112,6 +113,8 @@ method has been used for many different purposes since then.
"maximum segment length when merging "
"paired end bam reads",
false, MAX_SEGMENT_LENGTH);
opt_parse.add_opt("threads", 't', "number of threads for decompressing BAM",
false, n_threads);
#endif
opt_parse.add_opt("pe", 'P', "input is paired end read file", false,
PAIRED_END);
Expand Down Expand Up @@ -169,20 +172,12 @@ method has been used for many different purposes since then.
else if (BAM_FORMAT_INPUT && PAIRED_END) {
if (VERBOSE)
cerr << "PAIRED_END_BAM_INPUT" << endl;
// size_t n_paired = 0;
// size_t n_mates = 0;
n_reads = load_counts_BAM_pe(input_file_name,
// n_paired, n_mates,
counts_hist);
// if (VERBOSE) {
// cerr << "MERGED PAIRED END READS = " << n_paired << endl;
// cerr << "MATES PROCESSED = " << n_mates << endl;
// }
n_reads = load_counts_BAM_pe(n_threads, input_file_name, counts_hist);
}
else if (BAM_FORMAT_INPUT) {
if (VERBOSE)
cerr << "BAM_INPUT" << endl;
n_reads = load_counts_BAM_se(input_file_name, counts_hist);
n_reads = load_counts_BAM_se(n_threads, input_file_name, counts_hist);
}
#endif
else if (PAIRED_END) {
Expand Down
37 changes: 18 additions & 19 deletions src/load_data_for_complexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,23 +582,19 @@ update_duplicate_counts_hist_BAM(const T &curr, const T &prev,
++current_count;
}

// template<typename T>
// size_t
// load_counts_BAM_se(/*const size_t n_threads, */
// const string &inputfile, vector<double> &counts_hist) {

template <typename aln_pos_t = aln_pos>
template <typename aln_pos_t>
size_t
load_counts_BAM(/*const size_t n_threads, */
const string &inputfile, vector<double> &counts_hist) {
// bamxx::bam_tpool tp(n_threads);
load_counts_BAM(const uint32_t n_threads, const string &inputfile,
vector<double> &counts_hist) {
bamxx::bam_tpool tp(n_threads);

bamxx::bam_in hts(inputfile); // assume already checked
bamxx::bam_header hdr(hts);
if (!hdr)
throw runtime_error("failed to read header");

// if (n_threads > 1) tp.set_io(hts);
if (n_threads > 1)
tp.set_io(hts);

// find first mapped read to start
bamxx::bam_rec aln;
Expand Down Expand Up @@ -646,15 +642,15 @@ load_counts_BAM(/*const size_t n_threads, */
}

size_t
load_counts_BAM_se(/*const size_t n_threads, */
const string &inputfile, vector<double> &counts_hist) {
return load_counts_BAM<aln_pos>(inputfile, counts_hist);
load_counts_BAM_se(const uint32_t n_threads, const string &inputfile,
vector<double> &counts_hist) {
return load_counts_BAM<aln_pos>(n_threads, inputfile, counts_hist);
}

size_t
load_counts_BAM_pe(/*const size_t n_threads, */
const string &inputfile, vector<double> &counts_hist) {
return load_counts_BAM<aln_pos_pair>(inputfile, counts_hist);
load_counts_BAM_pe(const uint32_t n_threads, const string &inputfile,
vector<double> &counts_hist) {
return load_counts_BAM<aln_pos_pair>(n_threads, inputfile, counts_hist);
}

struct genomic_interval {
Expand Down Expand Up @@ -726,18 +722,21 @@ update_duplicate_coverage_hist(const T &curr, const T &prev,
// ADS: don't care if mapped reads are SE or PE, we only need the
// first mate for each mapped read
size_t
load_coverage_counts_BAM(const string &inputfile, const uint64_t seed,
const size_t bin_size, const size_t max_width,
load_coverage_counts_BAM(const uint32_t n_threads, const string &inputfile,
const uint64_t seed, const size_t bin_size,
const size_t max_width,
vector<double> &coverage_hist) {
srand(time(0) + getpid());
std::mt19937 generator(seed);

bamxx::bam_tpool tp(n_threads);
bamxx::bam_in hts(inputfile); // assume already checked
bamxx::bam_header hdr(hts);
if (!hdr)
throw runtime_error("failed to read header");

// if (n_threads > 1) tp.set_io(hts);
if (n_threads > 1)
tp.set_io(hts);

// find first mapped read to start
bamxx::bam_rec aln;
Expand Down
10 changes: 6 additions & 4 deletions src/load_data_for_complexity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,18 @@ load_counts_BED_se(const std::string &input_file_name,

#ifdef HAVE_HTSLIB
std::size_t
load_counts_BAM_pe(const std::string &input_file_name,
// std::size_t &n_paired, std::size_t &n_mates,
load_counts_BAM_pe(const std::uint32_t n_threads,
const std::string &input_file_name,
std::vector<double> &counts_hist);

std::size_t
load_counts_BAM_se(const std::string &input_file_name,
load_counts_BAM_se(const std::uint32_t n_threads,
const std::string &input_file_name,
std::vector<double> &counts_hist);

std::size_t
load_coverage_counts_BAM(const std::string &input_file_name,
load_coverage_counts_BAM(const std::uint32_t n_threads,
const std::string &input_file_name,
const std::uint64_t seed, const std::size_t bin_size,
const std::size_t max_width,
std::vector<double> &coverage_hist);
Expand Down
15 changes: 5 additions & 10 deletions src/pop_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ pop_size_main(const int argc, const char *argv[]) {
#ifdef HAVE_HTSLIB
bool BAM_FORMAT_INPUT = false;
size_t MAX_SEGMENT_LENGTH = 5000;
uint32_t n_threads{1};
#endif

const string description =
Expand Down Expand Up @@ -106,6 +107,8 @@ pop_size_main(const int argc, const char *argv[]) {
"maximum segment length when merging "
"paired end bam reads",
false, MAX_SEGMENT_LENGTH);
opt_parse.add_opt("threads", 't', "number of threads for decompressing BAM",
false, n_threads);
#endif
opt_parse.add_opt("pe", 'P', "input is paired end read file", false,
PAIRED_END);
Expand Down Expand Up @@ -163,20 +166,12 @@ pop_size_main(const int argc, const char *argv[]) {
else if (BAM_FORMAT_INPUT && PAIRED_END) {
if (VERBOSE)
cerr << "PAIRED_END_BAM_INPUT" << endl;
// size_t n_paired = 0;
// size_t n_mates = 0;
n_reads = load_counts_BAM_pe(input_file_name,
// n_paired, n_mates,
counts_hist);
// if (VERBOSE) {
// cerr << "MERGED PAIRED END READS = " << n_paired << endl;
// cerr << "MATES PROCESSED = " << n_mates << endl;
// }
n_reads = load_counts_BAM_pe(n_threads, input_file_name, counts_hist);
}
else if (BAM_FORMAT_INPUT) {
if (VERBOSE)
cerr << "BAM_INPUT" << endl;
n_reads = load_counts_BAM_se(input_file_name, counts_hist);
n_reads = load_counts_BAM_se(n_threads, input_file_name, counts_hist);
}
#endif
else if (PAIRED_END) {
Expand Down

0 comments on commit 3245e62

Please sign in to comment.