Skip to content

Commit

Permalink
Merge pull request #43 from broadinstitute/dp-ksummary
Browse files Browse the repository at this point in the history
docker build fix and filter_taxids_to_focal_hits fix
  • Loading branch information
dpark01 authored Feb 16, 2024
2 parents bb675f3 + 30bc8bd commit 0f02bb7
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ENV VIRAL_CLASSIFY_PATH=$INSTALL_PATH/viral-classify \

COPY requirements-conda.txt requirements-conda-env2.txt $VIRAL_CLASSIFY_PATH/
# install most dependencies to the main environment
RUN $VIRAL_NGS_PATH/docker/install-conda-dependencies.sh $VIRAL_CLASSIFY_PATH/requirements-conda.txt
RUN $VIRAL_NGS_PATH/docker/install-conda-dependencies.sh $VIRAL_CLASSIFY_PATH/requirements-conda.txt $VIRAL_NGS_PATH/requirements-conda.txt

# install packages with dependency incompatibilities to the second environment
RUN CONDA_PREFIX="$MINICONDA_PATH/envs/env2"; \
Expand Down
10 changes: 4 additions & 6 deletions metagenomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,18 +728,16 @@ def filter_taxids_to_focal_hits(taxids_tsv, focal_report_tsv, taxdb_dir, min_rea
with util.file.open_or_gzopen(focal_report_tsv, "rt") as inf:
for row in csv.DictReader(inf, delimiter='\t'):
if int(row['reads_excl_children']) >= min_read_count:
hits.add(row['taxon_id'])
hits.add(int(row['taxon_id']))

# filter taxids_tsv -> output_tsv
with util.file.open_or_gzopen(taxids_tsv, "rt") as inf:
with util.file.open_or_gzopen(output_tsv, "wt") as outf:
for line in inf:
taxid = line.rstrip('\r\n').split('\t')[0]
taxid = int(line.rstrip('\r\n').split('\t')[0])
ancestors = taxdb.get_ordered_ancestors(taxid)
for node in [taxid] + ancestors:
if taxid in hits:
outf.write(line)
break
if any(node in hits for node in [taxid] + ancestors):
outf.write(line)

__commands__.append(('filter_taxids_to_focal_hits', parser_filter_taxids_to_focal_hits))

Expand Down
10 changes: 5 additions & 5 deletions requirements-conda.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
blast=2.9.0
blast>=2.9.0
bmtagger>=3.101
diamond>=2.1.6
kmc>=3.1.1rc1
kraken2>=2.1.2
diamond>=2.1.9
kmc>=3.2.1
kraken2>=2.1.3
krona>=2.8.1
last>=876
last>=1541
9 changes: 4 additions & 5 deletions test/unit/test_taxon_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
+ "[email protected]"

import unittest
import glob
import os, os.path
import tempfile
import shutil
Expand Down Expand Up @@ -364,13 +365,11 @@ def setUp(self):

# tar one db, but not the other
tar_db_tgz = util.file.mkstempfname('-humanChr9Subset.blastn.db.tar.gz')
cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz]
for ext in ('nhr', 'nin', 'nsq'):
cmd.append('humanChr9Subset.'+ext)
cmd = ['tar', '-C', self.tempDir, '-cvzf', tar_db_tgz] + list(os.path.basename(f) for f in glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*")))
subprocess.check_call(cmd)
self.blastdbs_multi[1] = tar_db_tgz
for ext in ('nhr', 'nin', 'nsq'):
os.unlink(os.path.join(self.tempDir, 'humanChr9Subset.'+ext))
for idx in glob.glob(os.path.join(self.tempDir, "humanChr9Subset.n*")):
os.unlink(idx)

def test_deplete_blastn_bam(self):
tempDir = tempfile.mkdtemp()
Expand Down

0 comments on commit 0f02bb7

Please sign in to comment.