-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmaf2phy.awk
57 lines (57 loc) · 1.21 KB
/
maf2phy.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# maf2phy.awk
# Author: Bernhard Haubold, [email protected]
# Contributors: Fabian Klötzl, [email protected]
# Date: June 19, 2014
# Last Modified: February 5, 2015
BEGIN{
if(!n){
print "maf2phy.awk: Convert mutation annotation format (maf) as generated by the program mugsy to PHYLIP";
print "Usage: awk -f maf2phy.awk -v n=<numberOfTaxa> file.maf > file.phy";
exit(-1);
}
numName = 0;
test = "mult=" n;
}{
if(/^a/){
if($0 ~ test)
open = 1;
else
open = 0;
}
if(open && /^s/){
if(!s[$2])
names[numNames++] = $2;
s[$2] = s[$2] $7;
}
}END{
# check equal length of sequences
len = -1;
for(i=0;i<numNames;i++){
name = names[i];
if(len > 0){
if(length(s[name]) != len){
print "sequence length should be " len " but is in fact " length(s[name]);
exit(-1);
}
}else
len = length(s[name]);
}
print numNames, len;
start = 1;
l = 60;
for(i=0;i<numNames;i++){
name = names[i];
printf("%-10.10s",name);
print(" " substr(s[name],start,l));
}
printf("\n");
start += l;
while(start < len){
for(i=0;i<numNames;i++){
name = names[i];
print(substr(s[name],start,l));
}
printf("\n");
start += l;
}
}