Skip to content

Commit

Permalink
Update to support TB-Profiler 6
Browse files Browse the repository at this point in the history
  • Loading branch information
pvanheus committed May 31, 2024
1 parent 0d39942 commit e7cf406
Show file tree
Hide file tree
Showing 10 changed files with 294,192 additions and 77 deletions.
294,051 changes: 294,050 additions & 1 deletion galaxy/test-data/rif_resistant.results.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="tbvcfreport",
version="0.1.8",
version="0.2.0",
url="https://github.com/COMBAT-TB/tbvcfreport",
author="SANBI",
author_email="[email protected]",
Expand All @@ -19,7 +19,7 @@
package_data={
"tbvcfreport": ["templates/*.html"],
},
python_requires=">=3.6",
python_requires=">=3.10",
install_requires=[
"click",
"neo4j",
Expand Down
2 changes: 1 addition & 1 deletion tbvcfreport/dbconn.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,5 @@ def _get_gene_table_tx(tx, genes=None):

def query_by_gene_list(self, gene_list):
with self.driver.session() as session:
values = session.read_transaction(self._get_gene_table_tx, gene_list)
values = session.execute_read(self._get_gene_table_tx, gene_list)
return values
30 changes: 15 additions & 15 deletions tbvcfreport/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,17 @@ def generate_txt_report(file_name, data):
"""
v_txt_output = "{file_name}_variants_report.txt".format(file_name=file_name)

lineage_header = ["Lineage", "Family", "Spoligotype", "RD", "Agreement"]
lineage_data = []
for lineage in data["lineage"]:
lineage_header = []
if "lineage" in data and "lineage" in data["lineage"]:
lineage_header = ["Lineage", "Support", "Family", "RD"]
lineage_data = []
lineage_data.append(
[
lineage["lin"],
lineage["family"],
lineage["spoligotype"],
lineage["rd"],
str(lineage["frac"]),
]
[ str(el) for el in data["lineage"]["lineage"] ]
)
if "sub_lineage" in data["lineage"]:
lineage_data.append(
[ str(el) for el in data["lineage"]["sub_lineage"] ]
)

variants_header = [
"CHR",
Expand Down Expand Up @@ -112,9 +111,10 @@ def generate_txt_report(file_name, data):
with open(v_txt_output, "w") as _variants_report:
_variants_report.write("#{} Report\n".format(file_name.capitalize()))

_variants_report.write("#{}\n".format("\t".join(lineage_header)))
for l_data in lineage_data:
_variants_report.write("{}\n".format("\t".join(l_data)))
if lineage_header:
_variants_report.write("#{}\n".format("\t".join(lineage_header)))
for l_data in lineage_data:
_variants_report.write("{}\n".format("\t".join(l_data)))

_variants_report.write("\n#{}\n".format("\t".join(variants_header)))
for variant in variants:
Expand All @@ -131,7 +131,7 @@ def generate_txt_report(file_name, data):
for l_data in lineage_data:
dr_report.write("{}\n".format("\t".join(l_data)))

dr_report.write("\n#TBProfiler Drug Resistance Report\n")
dr_report.write("\n#TB-Profiler Drug Resistance Report\n")
dr_report.write(
"\n#{}\n".format(
"\t".join(["Drug", "Resistance", "Supporting Mutations"])
Expand All @@ -152,5 +152,5 @@ def generate_txt_report(file_name, data):
"{}\n".format("\t".join([drug, resistant, ",".join(mutations)]))
)
dr_report.write(
"\n#Drug resistance predictions are for research purposes only and are produced by the TBProfiler software.\n"
"\n#Drug resistance predictions are for research purposes only and are produced by the TB-Profiler software.\n"
)
77 changes: 60 additions & 17 deletions tbvcfreport/tbprofiler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Interface to TBProfiler report."""
from typing import TextIO

import json

Expand All @@ -11,17 +12,20 @@
"pyrazinamide",
"streptomycin",
"ethionamide",
"fluoroquinolones",
"fluoroquinolones", # this is present in older versions of tbdb
"amikacin",
"capreomycin",
"kanamycin",
"para-aminosalicylic_acid",
"cycloserine",
"delaminid",
"delamanid",
"linezolid",
"clofazimine",
"bedaquiline",
"aminoglycosides"
"aminoglycosides" # this is present in newer versions of tbdb
"levofloxacin",
"moxifloxacin",
"pretomanid"
]

drug_names = {
Expand All @@ -31,29 +35,32 @@
"pyrazinamide": "Pyrazinamide",
"streptomycin": "Streptomycin",
"ethionamide": "Ethionamide",
"fluoroquinolones": "Fluoroquinolones",
"fluoroquinolones": "Fluoroquinolones", # this is present in older versions of tbdb
"amikacin": "Amikacin",
"capreomycin": "Capreomycin",
"kanamycin": "Kanamycin",
"para-aminosalicylic_acid": "Para-aminosalicylic acid",
"linezolid": "Linezolid",
"cycloserine": "Cycloserine",
"delaminid": "Delaminid",
"delamanid": "Delamanid",
"clofazimine": "Clofazimine",
"bedaquiline": "Bedaquiline",
"aminoglycosides": "Aminoglycosides"
"aminoglycosides": "Aminoglycosides", # this is present in newer versions of tbdb
"levofloxacin": "Levofloxacin",
"moxifloxacin": "Moxifloxacin",
"pretomanid": "Pretomanid"
}


class TBProfilerReport:
"""Process tbprofiler_report."""

def __init__(self, json_file, _variants):
def __init__(self, json_file: TextIO, _variants: list[list]):
"""initializer."""
self.json_file = json_file
self._variants = _variants

def get_data(self):
def get_data(self) -> tuple[list[str], dict, int, str, dict[str, str]]:
"""Get TBProfiler data."""
drug_resistance_list, call_positions = [], set()
rrs_start, rrs_end = 1471846, 1473382
Expand All @@ -64,10 +71,44 @@ def get_data(self):
rrs_variant_count += 1
call_positions.add(POS)
tbprofiler_data = json.load(self.json_file)
tbprofiler_version = tbprofiler_data["tbprofiler_version"]
tbprofiler_major_version = int(tbprofiler_version.split(".")[0])
if "tbprofiler_version" in tbprofiler_data:
tbprofiler_version = tbprofiler_data["tbprofiler_version"]
else:
tbprofiler_version = tbprofiler_data["pipeline"]["software_version"]
tbprofiler_major_version= int(tbprofiler_version.split(".")[0])

if tbprofiler_major_version < 3:
tbdb_version = {}
for key in ("name", "commit", "author", "date"):
tbdb_version[key] = "Unknown"
elif tbprofiler_major_version < 4:
tbdb_version = tbprofiler_data["db_version"]
else:
tbdb_version = tbprofiler_data["pipeline"]["db_version"] # dict with keys: name, commit, author, date
dr_data = tbprofiler_data["dr_variants"]
lineage = tbprofiler_data["lineage"]

if tbprofiler_major_version < 4:
main_lineage_key = "main_lin"
sublineage_key = "sublin"
lineage_key = "lin"
fraction_key = "frac"
else:
main_lineage_key = "main_lineage"
sublineage_key = "sub_lineage"
lineage_key = "lineage"
fraction_key = "fraction"
lineage_details = {}
for lineage in tbprofiler_data["lineage"]:
lineage_details[lineage[lineage_key]] = (lineage[lineage_key],
lineage[fraction_key],
lineage["family"],
"" if lineage["rd"] is None else lineage["rd"])
lineage_info = {
"lineage": lineage_details[tbprofiler_data[main_lineage_key]]
}
if sublineage_key in tbprofiler_data:
lineage_info["sub_lineage"] = lineage_details[tbprofiler_data[sublineage_key]]

_drug_resistance = self._drug_resistance(
dr_data, call_positions, tbprofiler_major_version
)
Expand All @@ -82,15 +123,17 @@ def get_data(self):
"resistant": False,
}
)
return (drug_resistance_list, lineage, rrs_variant_count)
return (drug_resistance_list, lineage_info, rrs_variant_count, tbprofiler_version, tbdb_version)

@staticmethod
def _drug_resistance(_dr_data, call_positions, tbprofiler_version):
def _drug_resistance(_dr_data: dict, call_positions: list[int], tbprofiler_version: int) -> dict:
dr_calls_seen, drug_resistance = set(), {}
position_key = "genome_pos" if tbprofiler_version < 4 else "pos"
gene_name_key = "gene" if tbprofiler_version < 4 else "gene_name"
for record in _dr_data:
if record["genome_pos"] in dr_calls_seen:
if record[position_key] in dr_calls_seen:
continue
dr_calls_seen.add(record["genome_pos"])
dr_calls_seen.add(record[position_key])
if tbprofiler_version < 3:
variant_drug_names = [record["drug"]]
else:
Expand All @@ -109,10 +152,10 @@ def _drug_resistance(_dr_data, call_positions, tbprofiler_version):
"variants": [],
},
)
if record["genome_pos"] not in call_positions:
if record[position_key] not in call_positions:
dr_record["snippy_agreement"] = False
dr_record["variants"].append(
(record["gene"], record["change"], round(record["freq"], 2))
(record[gene_name_key], record["change"], round(record["freq"], 2)) # round to 2 digits
)
drug_resistance[drug_name] = dr_record
return drug_resistance
14 changes: 12 additions & 2 deletions tbvcfreport/tbvcfreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,12 @@ def parse_and_generate_report(vcf_file, filter_udi, db_url, json_report=None):
lineage = {}
vcf_base_name = os.path.basename(vcf_file)
(vcf_file_name, ext) = os.path.splitext(vcf_base_name)
click.secho(f"Processing {vcf_base_name}...\n", fg="green")
if json_report is not None:
dr_reporting = "with DR reporting"
else:
dr_reporting = ""

click.secho(f"Processing {vcf_base_name} {dr_reporting}... \n", fg="green")

vcf_proc = VCFProc(vcf_file=vcf_file, db_url=db_url, filter_udi=filter_udi)
variants = vcf_proc.parse()
Expand All @@ -45,12 +50,17 @@ def parse_and_generate_report(vcf_file, filter_udi, db_url, json_report=None):
raise TypeError(f"Expected a json file. Found {json_base_name}!")

tbprofiler = TBProfilerReport(json_report, variants)
(drug_resistance_list, lineage, rrs_variant_count) = tbprofiler.get_data()
(drug_resistance_list, lineage, rrs_variant_count, tbprofiler_version, tbdb_version) = tbprofiler.get_data()
else:
tbprofiler_version = "N/A"
tbdb_version = "N/A"
data = {
"variants": variants,
"lineage": lineage,
"dr_data": drug_resistance_list,
"mixed_infection": rrs_variant_count > 1,
"tbprofiler_version": tbprofiler_version,
"tbdb_version": tbdb_version,
}

generate_report(file_name=vcf_file_name, data=data)
Expand Down
28 changes: 14 additions & 14 deletions tbvcfreport/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,29 @@ <h2 class="mt-5">
</div>
{% if 'lineage' in data and data['lineage']['lineage'] %}
<div class="row">
<table class="table table-striped table-hover">
<table class="table table-striped table-hover table-responsive-md">
<caption>Lineage Info</caption>
<thead class="thead-light">
<tr>
<th scope="col">Species</th>
<th scope="col">Lineage</th>
<th scope="col">Sub-lineage</th>
<th scope="col">Agreement with SNPs from this lineage</th>
<th scope="col">Lineage support fraction</th>
<th scope="col">Family</th>
<th scope="col">RD</th>
</tr>
</thead>
<tbody>
<tr>
<td>{{ data['lineage']['species'] }}</td>
<td>{{ data['lineage']['lineage'] }}</td>
<td>
{% if data['lineage']['sublineage'] %}
{{ data['lineage']['sublineage'] }}
{% else %}
Not Available
{% endif %}
</td>
<td>{{ data['lineage']['percent_agreement'] }}%</td>
{% for element in data['lineage']['lineage'] %}
<td>{{ element }}</td>
{% endfor %}
</tr>
{% if 'sub_lineage' in data['lineage'] and data['lineage']['sub_lineage'] %}
<tr>
{% for element in data['lineage']['sub_lineage'] %}
<td>{{ element }}</td>
{% endfor %}
</tr>
{% endif %}
</tbody>
</table>
</div>
Expand Down
57 changes: 32 additions & 25 deletions tbvcfreport/templates/drug_resistance_report.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,44 @@
<div class="row">
<h3 class="mb-2">
<a class="text-body" href="https://github.com/jodyphelan/TBProfiler" target="_blank"
rel="noreferrer noopener">TBProfiler</a>
rel="noreferrer noopener">TB-Profiler</a>
Drug Resistance Report
</h3>
</div>
<div class="row">
<table id="drug_resistance" class="table table-hover table-bordered table-striped table-sm" cellspacing="0">
<caption class="mb-3 text-info">
<strong>Disclaimer</strong>:
Drug resistance predictions are for <strong>Research Purposes Only</strong> and are produced by
<a class="text-info" href="https://github.com/jodyphelan/TBProfiler" target="_blank"
rel="noreferrer noopener">TBProfiler</a>.
</caption>
<thead class="thead-light">
<div class="col-md-12">
<p>TB-Profiler version {{ data["tbprofiler_version"] }} with
database {{ data["tbdb_version"]["name"] + ':' + data["tbdb_version"]["commit"]}} ({{ data["tbdb_version"]["Date"] }})</p>
</div>
<div class="row">
<div class="col-md-12">
<table id="drug_resistance" class="table table-hover table-bordered table-striped table-responsive-md" cellspacing="0">
<caption class="mb-3 text-info">
<strong>Disclaimer</strong>:
Drug resistance predictions are for <strong>Research Purposes Only</strong> and are produced by
<a class="text-info" href="https://github.com/jodyphelan/TBProfiler" target="_blank"
rel="noreferrer noopener">TB-Profiler</a>.
</caption>
<thead class="thead-light">
<tr>
<th scope="col">Drug</th>
<th scope="col">Resistance</th>
<th scope="col">Supporting Mutations</th>
</tr>
</thead>
{% for entry in data['dr_data'] %}
<tr>
<th scope="col">Drug</th>
<th scope="col">Resistance</th>
<th scope="col">Supporting Mutations</th>
<td>{{ entry['drug_human_name'] }}</td>
<td>{% if entry['resistant'] %}R{% else %}S{% endif %}</td>
<td>{% for mutation in entry['variants'] %}
<a target="_blank" href="https://explorer.sanbi.ac.za/gene?q={{ mutation[0] }}"
rel="noreferrer noopener">{{ mutation[0] }}</a>
({{ mutation[2] }} {{ mutation[1] }})
{% endfor %}</td>
</tr>
</thead>
{% for entry in data['dr_data'] %}
<tr>
<td>{{ entry['drug_human_name'] }}</td>
<td>{% if entry['resistant'] %}R{% else %}S{% endif %}</td>
<td>{% for mutation in entry['variants'] %}
<a target="_blank" href="https://explorer.sanbi.ac.za/gene?q={{ mutation[0] }}"
rel="noreferrer noopener">{{ mutation[0] }}</a>
({{ mutation[2] }} {{ mutation[1] }})
{% endfor %}</td>
</tr>
{% endfor %}
</table>
{% endfor %}
</table>
</div>
</div>
{% endif %}
{% endblock %}
5 changes: 5 additions & 0 deletions tbvcfreport/vcfproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ def __init__(self, vcf_file, db_url, filter_udi=None):
self.db = CombatTbDb(db_url, "", "")
self.filter_udi = filter_udi


def __del__(self):
self.db.close()


def parse(self):
"""Parse VCF."""
variants, rv_tags = [], []
Expand Down
Loading

0 comments on commit e7cf406

Please sign in to comment.