-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastaq.py
69 lines (64 loc) · 1.89 KB
/
fastaq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def open_for(fname):
if hasattr(fname, 'read'):
return lambda _, __ :fname
if fname.endswith('.gz'):
import gzip
return gzip.open
elif fname.endswith('.bz2'):
import bz2
return bz2.open
elif fname.endswith('.xz'):
import lzma
return lzma.open
elif fname.endswith('.zst') or fname.endswith('.zstd'):
import zstandard as zstd
return zstd.open
else:
return open
def fasta_iter(fname, full_header=False):
'''Iterate over a (possibly gzipped) FASTA file
Parameters
----------
fname : str
Filename.
If it ends with .gz, gzip format is assumed
If .bz2 then bzip2 format is assumed
if .xz, then lzma format is assumerd
full_header : boolean (optional)
If True, yields the full header. Otherwise (the default), only the
first word
Yields
------
(h,seq): tuple of (str, str)
'''
header = None
chunks = []
op = open_for(fname)
with op(fname, 'rt') as f:
for line in f:
if line[0] == '>':
if header is not None:
yield header,''.join(chunks)
line = line[1:].strip()
if not line:
header = ''
elif full_header:
header = line.strip()
else:
header = line.split()[0]
chunks = []
else:
chunks.append(line.strip())
if header is not None:
yield header, ''.join(chunks)
def fastq_iter(ifile):
op = open_for(ifile)
with op(ifile, 'rt') as f:
while True:
name = f.readline().strip()
if not name:
break
seq = f.readline().strip()
f.readline()
qual = f.readline().strip()
yield name, seq, qual