-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvcf2callformat.py
61 lines (51 loc) · 1.51 KB
/
vcf2callformat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 15 11:02:12 2018
@author: YudongCai
@Email: [email protected]
"""
import click
import pysam
def toIUPAC(nuclears):
"""
nuclears: list
['A', 'C'] etc.
https://en.wikipedia.org/wiki/Nucleic_acid_notation
"""
nuc2code = {'A': 1,
'C': 2,
'G': 4,
'T': 8,
'R': 5,
'Y': 10,
'W': 9,
'S': 6,
'M': 3,
'K': 12,
'B': 14,
'H': 11,
'D': 13,
'V': 7,
'N': 0,
None: 0}
code2nuc = {v: k for k,v in nuc2code.items()}
code2nuc[15] = 'N'
code2nuc[0] = 'N'
return code2nuc[sum([nuc2code[x] for x in set(nuclears)])]
@click.command()
@click.option('--vcffile', help='.vcf/.vcf.gz/.bcf')
@click.option('--outfile', help='outfile name')
def main(vcffile, outfile):
var = pysam.VariantFile(vcffile)
samples = list(var.header.samples)
newheader = ['scaffold', 'position'] + samples
with open(outfile, 'w') as f:
f.write('\t'.join(newheader) + '\n')
for record in var:
gts = [x.alleles for x in record.samples.values()]
nucs = []
for gt in gts:
nucs.append(toIUPAC(gt))
f.write(f'{record.chrom}\t{record.pos}\t' + '\t'.join(nucs) + '\n')
if __name__ == '__main__':
main()