forked from mothur/mothur
-
Notifications
You must be signed in to change notification settings - Fork 0
/
blastalign.cpp
159 lines (121 loc) · 5.72 KB
/
blastalign.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/*
* blastalign.cpp
*
*
* Created by Pat Schloss on 12/16/08.
* Copyright 2008 Patrick D. Schloss. All rights reserved.
*
* This is a basic alignment method that gets the blast program to do the heavy lifting. In the future, we should
* probably incorporate NCBI's library so that we don't have to call on a user-supplied executable. This is a child
* of the Alignment class, which requires a constructor and align method.
*
*/
#include "alignment.hpp"
#include "blastalign.hpp"
//**************************************************************************************************/
BlastAlignment::BlastAlignment(float go, float ge, float ma, float mm) :
match(ma), // This is the score to award for two nucleotides matching (match >= 0)
mismatch(mm) // This is the penalty to assess for a mismatch (mismatch <= 0)
{
path = m->argv;
path = path.substr(0, (path.find_last_of('m')));
gapOpen = abs(go); // This is the penalty to assess for opening a gap (gapOpen >= 0)
gapExtend = abs(ge); // This is the penalty to assess for extending a gap (gapExtend >= 0)
int randNumber = rand();
candidateFileName = toString(randNumber) + ".candidate";
templateFileName = toString(randNumber) + ".template";
blastFileName = toString(randNumber) + ".pairwise";
}
//**************************************************************************************************/
BlastAlignment::~BlastAlignment(){ // The desctructor should clean up by removing the temporary
m->mothurRemove(candidateFileName); // files used to run bl2seq
m->mothurRemove(templateFileName);
m->mothurRemove(blastFileName);
}
//**************************************************************************************************/
void BlastAlignment::align(string seqA, string seqB){ //Use blastn to align the two sequences
ofstream candidateFile(candidateFileName.c_str()); // Write the sequence to be aligned to a temporary candidate seq file
candidateFile << ">candidate" << endl << seqA << endl;
candidateFile.close();
ofstream templateFile(templateFileName.c_str()); // Write the unaligned template sequence to a temporary candidate seq file
templateFile << ">template" << endl << seqB << endl;
templateFile.close();
// The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
// that we don't want to apply any kind of complexity filtering (-F F)
string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
system(blastCommand.c_str()); // Here we assume that "bl2seq" is in the users path or in the same folder as
// this executable
setPairwiseSeqs();
}
/**************************************************************************************************/
void BlastAlignment::setPairwiseSeqs(){ // This method call assigns the blast generated alignment
// to the pairwise entry in the Sequence class for the
// candidate and template Sequence objects
ifstream blastFile;
m->openInputFile(blastFileName, blastFile);
seqAaln = "";
seqBaln = "";
int candidateLength, templateLength;
char d;
string candidateName, templateName;
while((d=blastFile.get()) != '='){}
blastFile >> candidateName; // Get the candidate sequence name from flatfile
while((d=blastFile.get()) != '('){}
blastFile >> candidateLength; // Get the candidate sequence length from flatfile
while((d=blastFile.get())){
if(d == '>'){
blastFile >> templateName; // Get the template sequence name from flatfile
break;
}
else if(d == '*'){ // We go here if there is no significant match
seqAstart = 0;
seqBstart = 0;
seqAend = 0;
seqBend = 0;
pairwiseLength = 0;
// string dummy;
// while(dummy != "query:"){ m->mothurOut(dummy, ""); m->mothurOutEndLine(); blastFile >> dummy; }
// blastFile >> seqBend;
// m->mothurOut(toString(seqBend), ""); m->mothurOutEndLine();
// for(int i=0;i<seqBend;i++){
// seqAaln += 'Z';
// seqBaln += 'X';
// }
// pairwiseLength = 0;
return;
}
}
while((d=blastFile.get()) != '='){}
blastFile >> templateLength; // Get the template sequence length from flatfile
while((d=blastFile.get()) != 'Q'){} // Suck up everything else until we get to the start of the alignment
int queryStart, sbjctStart, queryEnd, sbjctEnd;
string queryLabel, sbjctLabel, query, sbjct;
blastFile >> queryLabel; queryLabel = 'Q' + queryLabel;
while(queryLabel == "Query:"){
blastFile >> queryStart >> query >> queryEnd;
while((d=blastFile.get()) != 'S'){};
blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
if(seqAaln == ""){
seqAstart = queryStart;
seqBstart = sbjctStart;
}
seqAaln += query; // concatenate each line of the sequence to what we already have
seqBaln += sbjct; // for the query and template (subject) sequence
blastFile >> queryLabel;
}
seqAend = queryEnd;
seqBend = sbjctEnd;
pairwiseLength = seqAaln.length();
for(int i=1;i<seqBstart;i++){ // Since the alignments don't always start at (1, 1), we need to pad
seqAaln = 'Z' + seqAaln; // the sequences so that they start at the same point
seqBaln = 'X' + seqBaln;
}
for(int i=seqBend+1;i<=templateLength;i++){ // since the sequences don't necessarily end at the same point, we
seqAaln += 'Z'; // again need ot pad the sequences so that they extend to the length
seqBaln += 'X'; // of the template sequence
}
blastFile.close();
}
//**************************************************************************************************/