-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfasta_subregion.pl
executable file
·85 lines (70 loc) · 2 KB
/
fasta_subregion.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl
#fasta_subregion.pl by Megan Supple 03 April 2012
#script to extract a specified region from entries in an aligned fasta file
#usage fasta_subregion.pl input.fasta start_coord stop_coord
use lib $ENV{PERL5LIB};
use strict;
use warnings;
use Getopt::Long;
#read in command line arguments
my ($infasta, $start1, $stop)=@ARGV;
my $usage = "Usage: fasta_subregion.pl <infile.fasta> <start> <stop>
options:
<infile.fasta> an input aligned fasta file
<start> first position to extract
<stop> last position to extract
";
die "$usage" unless (@ARGV == 3);
#determine base file name
my $filename=(split /\//, $infasta)[-1];
#open input file and output file
open(IN, $infasta)||die "can't open input fasta file. $!\n";
open(OUT, ">$filename$start1-$stop.fasta");
#need to shift the frame
my $start=$start1-1;
#declare variables
my $line;
my $seq;
my $header;
my $width=50;
#read in each line of the file
while($line=<IN>)
{
chomp($line);
if ($line =~ m/^>.*/)
{
#if there is sequence
if ($seq)
{
#print header to fasta file
print OUT "$header\n";
#print sequence of interest in lines of $width
my $length=$width;
for (my $pos=$start; $pos<$stop;$pos+=$width)
{
if ($pos+$width>$stop){$length=$stop-$pos;}
print OUT substr($seq, $pos, $length), "\n";
}
}
#set header
$header=$line;
#clear sequence
$seq="";
}
else
{
$seq .=$line
}
}
#print final entry
#print header to fasta file
print OUT "$header\n";
#print sequence of interest in lines of $width
my $end=$width;
for (my $pos=$start; $pos<$stop;$pos+=$width)
{
if ($pos+$width>$stop){$end=$stop-$pos;}
print OUT substr($seq, $pos, $end), "\n";
}
close IN;
close OUT;