Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Removed AMRFinderPlusAnnotation type #95

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions q2_amr/amrfinderplus/sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def annotate_sample_data_amrfinderplus(
GenesDirectoryFormat,
pd.DataFrame,
):
annotations = AMRFinderPlusAnnotationsDirFmt()
mutations = AMRFinderPlusAnnotationsDirFmt()
genes = GenesDirectoryFormat()
amr_annotations = AMRFinderPlusAnnotationsDirFmt()
amr_all_mutations = AMRFinderPlusAnnotationsDirFmt()
amr_genes = GenesDirectoryFormat()
frequency_list = []

# Create list of paths to all mags or contigs
Expand Down Expand Up @@ -86,20 +86,22 @@ def annotate_sample_data_amrfinderplus(

# Move mutations file. If it is not created, create an empty mutations file
des_path_mutations = os.path.join(
str(mutations),
str(amr_all_mutations),
sample_id,
f"{mag_id + '_' if mag_id else ''}amr_mutations.tsv",
f"{mag_id + '_' if mag_id else ''}amr_all_mutations.tsv",
)
os.makedirs(os.path.dirname(des_path_mutations), exist_ok=True)
if organism:
shutil.move(os.path.join(tmp, "amr_mutations.tsv"), des_path_mutations)
shutil.move(
os.path.join(tmp, "amr_all_mutations.tsv"), des_path_mutations
)
else:
with open(des_path_mutations, "w"):
pass

# Move annotations file
des_path_annotations = os.path.join(
str(annotations),
str(amr_annotations),
sample_id,
f"{mag_id + '_' if mag_id else ''}amr_annotations.tsv",
)
Expand All @@ -110,14 +112,14 @@ def annotate_sample_data_amrfinderplus(
shutil.move(
os.path.join(tmp, "amr_genes.fasta"),
os.path.join(
str(genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta"
str(amr_genes), f"{mag_id if mag_id else sample_id}_amr_genes.fasta"
),
)

feature_table = create_count_table(df_list=frequency_list)
return (
annotations,
mutations,
genes,
amr_annotations,
amr_all_mutations,
amr_genes,
feature_table,
)
6 changes: 3 additions & 3 deletions q2_amr/amrfinderplus/tests/test_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@ def mock_run_amrfinderplus_n(
with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"):
pass
if organism:
with open(os.path.join(working_dir, "amr_mutations.tsv"), "w"):
with open(os.path.join(working_dir, "amr_all_mutations.tsv"), "w"):
pass
if dna_sequences:
with open(os.path.join(working_dir, "amr_genes.fasta"), "w"):
pass

files_contigs = [
"amr_annotations.tsv",
"amr_mutations.tsv",
"amr_all_mutations.tsv",
"sample1_amr_genes.fasta",
]

files_mags = [
"mag1_amr_annotations.tsv",
"mag1_amr_mutations.tsv",
"mag1_amr_all_mutations.tsv",
"mag1_amr_genes.fasta",
]

Expand Down
2 changes: 1 addition & 1 deletion q2_amr/amrfinderplus/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_run_amrfinderplus_n(self, mock_run_command):
"--organism",
"Escherichia",
"--mutation_all",
"path_dir/amr_mutations.tsv",
"path_dir/amr_all_mutations.tsv",
"--plus",
"--report_all_equal",
"--ident_min",
Expand Down
2 changes: 0 additions & 2 deletions q2_amr/amrfinderplus/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusAnnotationDirFmt,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
Expand All @@ -18,7 +17,6 @@
"AMRFinderPlusDatabaseDirFmt",
"AMRFinderPlusAnnotationFormat",
"AMRFinderPlusAnnotationsDirFmt",
"AMRFinderPlusAnnotationDirFmt",
"TextFormat",
"BinaryFormat",
]
23 changes: 8 additions & 15 deletions q2_amr/amrfinderplus/types/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat
from q2_types.per_sample_sequences._format import MultiDirValidationMixin
from qiime2.core.exceptions import ValidationError
from qiime2.plugin import model

Expand Down Expand Up @@ -109,19 +110,11 @@ def _validate_(self, level):
self._validate()


class AMRFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat):
annotation = model.FileCollection(
r".*amr_(annotations|mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat
class AMRFinderPlusAnnotationsDirFmt(model.DirectoryFormat):
annotations = model.FileCollection(
r".*amr_(annotations|all_mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat
)

@annotation.set_path_maker
def annotation_path_maker(self, sample_id, mag_id):
prefix = f"{sample_id}/{mag_id}_" if mag_id else f"{sample_id}/"
return f"{prefix}amr_annotations.tsv"


AMRFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat(
"AMRFinderPlusAnnotationDirFmt",
r"amr_(annotations|mutations)\.tsv$",
AMRFinderPlusAnnotationFormat,
)
@annotations.set_path_maker
def annotations_path_maker(self, name, id, dir_name=""):
return os.path.join(dir_name, f"{id}_amr_{name}.tsv")
6 changes: 2 additions & 4 deletions q2_amr/amrfinderplus/types/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@

AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase")
AMRFinderPlusAnnotations = SemanticType(
"AMRFinderPlusAnnotations", variant_of=SampleData.field["type"]
)
AMRFinderPlusAnnotation = SemanticType(
"AMRFinderPlusAnnotation", variant_of=FeatureData.field["type"]
"AMRFinderPlusAnnotations",
variant_of=[SampleData.field["type"], FeatureData.field["type"]],
)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from qiime2.plugin.testing import TestPluginBase

from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusAnnotationDirFmt,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
Expand Down Expand Up @@ -92,14 +91,28 @@ def test_amrfinderplus_annotation_format_validation_error(self):

self.assertEqual(str(context.exception), expected_message)

def test_amrfinderplus_annotation_directory_format(self):
def test_amrfinderplus_annotations_dir_fmt_feature(self):
dirpath = self.get_data_path(
"annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d"
)
annotations = AMRFinderPlusAnnotationDirFmt(dirpath, mode="r")
assert isinstance(annotations, AMRFinderPlusAnnotationDirFmt)
annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r")
assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt)

def test_amrfinderplus_annotations_directory_format(self):
def test_amrfinderplus_annotations_dir_fmt_sample(self):
dirpath = self.get_data_path("annotation")
annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r")
assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt)

def test_amrfinderplus_annotations_dir_fmt_path_maker_dir_name(self):
fmt = AMRFinderPlusAnnotationsDirFmt()
path = fmt.annotations_path_maker(
name="annotations", id="id", dir_name="dir_name"
)
self.assertEqual(
str(path), os.path.join(str(fmt), "dir_name/id_amr_annotations.tsv")
)

def test_amrfinderplus_annotations_dir_fmt_path_maker(self):
fmt = AMRFinderPlusAnnotationsDirFmt()
path = fmt.annotations_path_maker(name="annotations", id="id")
self.assertEqual(str(path), os.path.join(str(fmt), "id_amr_annotations.tsv"))
2 changes: 1 addition & 1 deletion q2_amr/amrfinderplus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run_amrfinderplus_n(
"--organism",
organism,
"--mutation_all",
f"{working_dir}/amr_mutations.tsv",
f"{working_dir}/amr_all_mutations.tsv",
]
)
if plus:
Expand Down
22 changes: 9 additions & 13 deletions q2_amr/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,13 @@
from q2_amr.amrfinderplus.database import fetch_amrfinderplus_db
from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus
from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusAnnotationDirFmt,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusDatabaseDirFmt,
BinaryFormat,
TextFormat,
)
from q2_amr.amrfinderplus.types._type import (
AMRFinderPlusAnnotation,
AMRFinderPlusAnnotations,
AMRFinderPlusDatabase,
)
Expand Down Expand Up @@ -1175,9 +1173,9 @@
"threads": Int % Range(0, None, inclusive_start=False),
},
outputs=[
("annotations", SampleData[AMRFinderPlusAnnotations]),
("mutations", SampleData[AMRFinderPlusAnnotations]),
("genes", GenomeData[Genes]),
("amr_annotations", SampleData[AMRFinderPlusAnnotations]),
("amr_all_mutations", SampleData[AMRFinderPlusAnnotations]),
("amr_genes", GenomeData[Genes]),
("feature_table", FeatureTable[Frequency]),
],
input_descriptions={
Expand Down Expand Up @@ -1212,8 +1210,8 @@
"fail. Using more than 4 threads may speed up searches.",
},
output_descriptions={
"annotations": "Annotated AMR genes and mutations.",
"mutations": "Report of genotypes at all locations screened for point "
"amr_annotations": "Annotated AMR genes and mutations.",
"amr_all_mutations": "Report of genotypes at all locations screened for point "
"mutations. These files allow you to distinguish between called "
"point mutations that were the sensitive variant and the point "
"mutations that could not be called because the sequence was not "
Expand All @@ -1226,8 +1224,8 @@
"'Gene symbols' from known point-mutation sites have gene symbols "
"that match the Pathogen Detection Reference Gene Catalog "
"standardized nomenclature for point mutations.",
"genes": "Sequences that were identified by AMRFinderPlus as AMR genes. This "
"will include the entire region that aligns to the references for "
"amr_genes": "Sequences that were identified by AMRFinderPlus as AMR genes. "
"This will include the entire region that aligns to the references for "
"point mutations.",
"feature_table": "Presence/Absence table of ARGs in all samples.",
},
Expand All @@ -1249,7 +1247,6 @@
CARDMAGsKmerAnalysis,
AMRFinderPlusDatabase,
AMRFinderPlusAnnotations,
AMRFinderPlusAnnotation,
)

plugin.register_semantic_type_to_format(
Expand Down Expand Up @@ -1289,8 +1286,8 @@
artifact_format=AMRFinderPlusAnnotationsDirFmt,
)
plugin.register_semantic_type_to_format(
FeatureData[AMRFinderPlusAnnotation],
artifact_format=AMRFinderPlusAnnotationDirFmt,
FeatureData[AMRFinderPlusAnnotations],
artifact_format=AMRFinderPlusAnnotationsDirFmt,
)
plugin.register_formats(
CARDKmerDatabaseDirectoryFormat,
Expand Down Expand Up @@ -1321,7 +1318,6 @@
BinaryFormat,
AMRFinderPlusAnnotationFormat,
AMRFinderPlusAnnotationsDirFmt,
AMRFinderPlusAnnotationDirFmt,
)

importlib.import_module("q2_amr.card.types._transformer")
Loading