-
Notifications
You must be signed in to change notification settings - Fork 3
/
TaxID.py
35 lines (26 loc) · 896 Bytes
/
TaxID.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#################################################################### ############
##Accession to TaxId##
"""Takes a list of accessions and returnes the TaxId and name"""
#################################################################### ############
import time
from Bio import Entrez
Entrez.email = '[email protected]'
with open(in_file) as file:
id_list = file.readlines()
id_list = [word.strip() for word in id_list]
#print 'id_list loaded'
#print id_list
count = 0
for i in id_list:
handle = Entrez.esummary( db ="nucleotide", id=i) record = Entrez.read(handle)
a = str(record[0]['Title'])
b = str(record[0]['TaxId'])
a = a.replace(',','')
handle = Entrez.efetch( db ="taxonomy", id=b)
record = Entrez.read(handle)
c = record[0]['Lineage']
#file.write(str(i + '\t' + a + '\t'+ b+'\n'))
count = count + 1
print i+','+a+','+ b+','+c
time.sleep(1)
# # print "done"