-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaf2stable_main.cpp
130 lines (105 loc) · 3.6 KB
/
paf2stable_main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include <unistd.h>
#include <getopt.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include "paf2stable.hpp"
#include "pafcoverage.hpp"
//#define debug
using namespace std;
void help(char** argv) {
cerr << "usage: " << argv[0] << " [options] <paf>" << endl
<< "Replace every target sequence with a query sequence (preserving all transitive mappings between queries)" << endl
<< endl;
}
int main(int argc, char** argv) {
int64_t min_length = 1;
int64_t padding = 100;
bool validate = false;
int c;
optind = 1;
while (true) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int option_index = 0;
c = getopt_long (argc, argv, "h",
long_options, &option_index);
// Detect the end of the options.
if (c == -1)
break;
switch (c)
{
case 'h':
case '?':
/* getopt_long already printed an error message. */
help(argv);
exit(1);
break;
default:
abort ();
}
}
if (argc <= 1) {
help(argv);
return 1;
}
// Parse the positional arguments
if (optind >= argc ) {
cerr << "[paf2stable] error: too few arguments" << endl;
help(argv);
return 1;
}
string in_paf_path = argv[optind++];
if (optind < argc - 1) {
cerr << "[paf2stable] error: too many arguments" << endl;
help(argv);
return 1;
}
ifstream paf_file(in_paf_path);
if (!paf_file) {
cerr << "[paf2stable] error: Unable to open input PAF file, \"" << in_paf_path << "\"" << endl;
return 1;
}
// first pass: build the mapping table from target to query intervals
unordered_map<string, int64_t> query_name_to_id;
vector<pair<string, int64_t>> query_id_to_info;
unordered_map<string, pair<int64_t, vector<StableInterval>>> target_to_intervals;
cerr << "[paf2stable]: Loading PAF interval mapping" << endl;
size_t paf_line_count = 0;
string buffer;
while (getline(paf_file, buffer)) {
// split into array of tokens
vector<string> toks;
split_delims(buffer, "\t\n", toks);
if (toks.size() < 12) {
throw runtime_error("too few tokens in PAF line: " + buffer);
}
update_stable_mapping_info(toks, query_name_to_id, query_id_to_info, target_to_intervals);
++paf_line_count;
}
size_t total_intervals = 0;
for (const auto& kv : target_to_intervals) {
total_intervals += kv.second.second.size();
}
cerr << "[paf2stable]: Scanned " << total_intervals << " intervals from " << paf_line_count << " PAF lines"
<< " for " << target_to_intervals.size() << " different target contigs" << endl;
cerr << "[paf2stable]: Converting PAF intervals" << endl;
// make the interval tree from the intervals
// (UPDATE: no longer making tree - rather just sort the intervals)
create_interval_trees(target_to_intervals);
// second pass: output the paf with all targets replaced by queries
paf_file.close();
paf_file.open(in_paf_path);
assert(paf_file);
size_t lines_written = 0;
while (getline(paf_file, buffer)) {
// split into array of tokens
vector<string> toks;
split_delims(buffer, "\t\n", toks);
lines_written += paf_to_stable(toks, query_id_to_info, target_to_intervals);
}
cerr << "[paf2stable]: Wrote " << lines_written << " PAF lines" << endl;
return 0;
}