-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfq2fa
executable file
·124 lines (111 loc) · 2.79 KB
/
fq2fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/perl
use strict;
use Getopt::Std;
my $usage = q/Usage:
fq2fa [-q <out.qual>] [-o <outfile.fasta>] reads.fastq[.gz]
Takes FASTQ input reads and outputs a simple FASTA conversion,
optionally writing a .qual file (if -q option is given)
The input can be 'stdin' or '-' to specify streaming at stdin.
/;
umask 0002;
getopts('q:o:P:') || die($usage."\n");
my $outfile=$Getopt::Std::opt_o;
my $wqual=$Getopt::Std::opt_q;
$outfile='' if $outfile eq '-';
my $PHRED_BASE=$Getopt::Std::opt_P || 32;
if ($outfile) {
open(OUTF, '>'.$outfile) || die("Error creating output file $outfile\n");
select(OUTF);
}
die($usage."Error: no input fastq given ! (use - for stdin)\n") unless @ARGV;
foreach my $f (@ARGV) {
next if ($f eq '-' || lc($f) eq 'stdin');
die("Error: file $f not found!\n") unless -f $f;
}
if ($wqual) {
open(QUAL, '>'.$wqual) || die("Error creating output file $wqual\n");
}
foreach my $f (@ARGV) {
my ($fh, $fz);
if ($f eq '-' || lc($f) eq 'stdin') {
$fh = *STDIN;
$f='-';
}
else {
if ($f=~m/\.bzi?p?2$/i) {
$fz='bzip2';
}
elsif ($f=~m/.g?zi?p?$/i) {
$fz='gzip';
}
if ($fz) {
open($fh, $fz." -cd '".$f."'|") ||
die("Error creating decompression pipe: $fz -cd '$f' !\n");
}
else {
open($fh, $f) || die("Error opening file $f ! \n");
}
}
while (1) {
my ($rname, $rseq, $rquals)=getFastq($fh);
last if (!$rname);
#write FASTA record
print ">$rname\n";
my $slen=length($rseq);
if ($slen!=length($rquals)) {
print STDERR "WARNING: qv length differs from sequence length for $rname!\n";
}
my @lines=unpack("A70" x (int(($slen-1)/70)+1),$rseq);
print join("\n",@lines)."\n";
if ($wqual) {
my @qls=unpack("A70" x (int(($slen-1)/70)+1),$rquals);
print QUAL ">$rname\n";
foreach my $ql (@qls) {
my $qlen=length($ql);
foreach my $i (0..$qlen-1) {
my $qv=ord(substr($ql, $i, 1))-$PHRED_BASE;
$qv=' '.$qv if $i;
print QUAL $qv;
}
print QUAL "\n";
}
}
}
close($fh) unless $f eq '-';
} #for each input file
if ($outfile) {
select(STDOUT);
close(OUTF);
}
close(QUAL) if ($wqual);
#-----------------------
sub printFasta {
my $slen=length($_[0]);
my @lines=unpack("A70" x (int(($slen-1)/70)+1),$_[0]);
print join("\n",@lines)."\n";
}
sub getFastq {
my $fh=$_[0]; # file handle
#parses next FASTQ record
#returns ($readname, $readseq, $readquals)
my ($rname, $rseq, $rquals);
while (<$fh>) {
($rname)=(m/^@(\S+)/);
last if $rname;
}
if ($_) {
while (<$fh>) {
last if m/^\+/;
chomp;
$rseq.=$_;
}
if ($_) {
while (<$fh>) {
chomp;
$rquals.=$_;
last if (length($rseq)<=length($rquals));
}
}
}
return ($rname, $rseq, $rquals);
}