Skip to content

Commit

Permalink
Support MetaPhlAn2
Browse files Browse the repository at this point in the history
  • Loading branch information
ivartb committed Feb 17, 2024
1 parent 04169fe commit 446aad7
Showing 1 changed file with 70 additions and 0 deletions.
70 changes: 70 additions & 0 deletions bin/metafx-scripts/metaphlan_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python
# Utility transform Kraken class report to csv file for Bandage visualization
# -*- coding: UTF-8 -*-

import sys
import getopt
from ete3 import NCBITaxa

if __name__ == "__main__":
inputFile = ''
resFile = ''

helpString = 'Please add all mandatory parameters: --class-file and --res-file'

argv = sys.argv[1:]
try:
opts, args = getopt.getopt(argv, "h", ["class-file=", "res-file="])
except getopt.GetoptError:
print(helpString)
sys.exit(2)
for opt, arg in opts:
if opt == "-h":
print(helpString)
sys.exit()
elif opt == "--class-file":
inputFile = arg
if inputFile[0] == "'" or inputFile[0] == '"':
inputFile = inputFile[1:]
if inputFile[-1] == "'" or inputFile[-1] == '"':
inputFile = inputFile[:-1]
elif opt == "--res-file":
resFile = arg
if resFile[0] == "'" or resFile[0] == '"':
resFile = resFile[1:]
if resFile[-1] == "'" or resFile[-1] == '"':
resFile = resFile[:-1]

tax_ids = []
fileR = open(inputFile, 'r')
fileW = open(resFile, 'w')
count = 0
while True:
line = fileR.readline()
if not line:
break
count += 1
listLine = line.split('\t')
if listLine[0][0] != "#":
tax_id = listLine[1].split('__')[0]
tax_ids.append((listLine[0], tax_id))
fileR.close()

ncbi = NCBITaxa()
fileW.write("Node name,Superkingdom,Phylum,Class,Order,Family,Genus,Species,Serotype,Strains\n")
ranks = {'superkingdom': 1, 'phylum': 2, 'class': 3, 'order': 4, 'family': 5, 'genus': 6, 'species': 7, 'serotype': 8, 'strain': 9}
for (node, tax) in tax_ids:
lineage = ncbi.get_lineage(tax)
names = ncbi.get_taxid_translator(lineage)
fileW.write(node + ",")
prevCount = 0
for taxid in lineage:
rank = ncbi.get_rank([taxid])[taxid]
if (rank in ranks):
curCount = ranks[ncbi.get_rank([taxid])[taxid]]
for i in range(curCount - prevCount - 1):
fileW.write(",")
fileW.write(names[taxid]+",")
prevCount = curCount
fileW.write("\n")
fileW.close()

0 comments on commit 446aad7

Please sign in to comment.