-
Notifications
You must be signed in to change notification settings - Fork 1
/
fq_translate
executable file
·128 lines (90 loc) · 3.24 KB
/
fq_translate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/bin/env python2.7
#
#
# Just a cute little python script to switch between different fastq format (sanger, solexa, illumina 1,5 and 1,8)
# Slow, but does the job. Better than emboss (it actually works).
#
# Jordan Texier, Jan 2014
# DittmerLab, UNC
def stringOffset(string, n):
"""Offsets a string's ascii values by an offset of 'n'"""
charlist=[]
for char in string:
val=ord(char)
newval=val + n
newchar=chr(newval)
charlist.append(newchar)
return "".join(charlist)
# ------------------------------ #
# ============ MAIN ============ #
# ------------------------------ #
if __name__=="__main__":
import sys
import argparse
# Create Parser, required and optional args. Then parse:
parser = argparse.ArgumentParser(description='Changes the fastq format of a file',
prog='fq_translate',
usage='%(prog)s -i input_file -if input_format -of output_format [-o output_file]',
formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position = 50),
add_help=False)
required = parser.add_argument_group('Required Arguments')
required.add_argument('-i', '--input', default='', help='input file')
required.add_argument('-if', '--in-format', default='', type=str, help='Input format ("sanger", "solexa", "illumina-1.5", "illumina-1.8")')
required.add_argument('-of', '--out-format', default='', type=str, help='Output format (same as -if)')
optional = parser.add_argument_group('Optional Arguments')
optional.add_argument('-h', '--help', action='help')
optional.add_argument('-o', '--output', default='', help='output file, if empty print to console')
args = vars(parser.parse_args())
# Verify required args:
if(args["input"] == ''):
parser.print_help()
sys.exit()
# Find ASCII Offset:
if(args["out_format"] == '' and args["in_format"] == ''):
# Define default offset
offset = 31
else:
if(args["out_format"] == '' or args["in_format"] == ''):
print "Both input and output format must be defined:"
parser.print_help()
sys.exit()
inAscii = 0
if(args["in_format"] == '18' or args["in_format"] == 'illumina-1.8'
or args["in_format"] == 'sanger'):
inAscii = 33
elif(args["in_format"] == '15' or args["in_format"] == 'illumina-1.5'
or args["in_format"] == 'solexa'):
inAscii = 64
else:
print "Input format not recognized:\n"
print args["in_format"]
print "Acceptable arguments:\n illumina-1.8,\n illumina-1.5,\n sanger, or\n solexa."
parser.print_help()
sys.exit()
outAscii = 0
if(args["out_format"] == '18' or args["out_format"] == 'illumina-1.8'
or args["out_format"] == 'sanger'):
outAscii = 33
elif(args["out_format"] == '15' or args["out_format"] == 'illumina-1.5'
or args["out_format"] == 'solexa'):
outAscii = 64
else:
print "Output format not recognized\n"
print "Acceptable arguments:\n illumina-1.8,\n illumina-1.5,\n sanger, or\n solexa."
parser.print_help()
sys.exit()
offset = outAscii - inAscii
# Begin translation:
with open(args['input']) as infile:
i=0
if (args['output'] is ''):
out = sys.stdout
else:
out = open(args['output'], 'a')
for line in infile:
if (i%4 == 3):
newStr = stringOffset(line[:-1], offset)
out.write(newStri + '\n')
else:
out.write(line)
infile.close()