Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Added mzML formats and type #2

Merged
merged 9 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ repos:
rev: 4.0.1
hooks:
- id: flake8
args:
- --max-line-length=88
additional_dependencies:
- pycodestyle==2.8.0
- pyflakes==2.4.0
Expand Down
1 change: 1 addition & 0 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ requirements:
- versioningit

run:
- pymzml
- qiime2 {{ qiime2_epoch }}.*
- q2-types {{ qiime2_epoch }}.*
- q2templates {{ qiime2_epoch }}.*
Expand Down
4 changes: 3 additions & 1 deletion q2_ms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# flake8: noqa
# ----------------------------------------------------------------------------
# Copyright (c) 2024, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import importlib

try:
from ._version import __version__
except ModuleNotFoundError:
__version__ = "0.0.0+notfound"

importlib.import_module("q2_ms.types")
10 changes: 10 additions & 0 deletions q2_ms/citations.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@article{kosters2018pymzml,
title={pymzML v2. 0: introducing a highly compressed and seekable gzip format},
author={K{\"o}sters, M and Leufken, Johannes and Schulze, Stefan and Sugimoto, K and Klein, Joshua and Zahedi, RP and Hippler, Michael and Leidel, SA and Fufezan, Christian},
journal={Bioinformatics},
volume={34},
number={14},
pages={2513--2514},
year={2018},
publisher={Oxford University Press}
}
16 changes: 14 additions & 2 deletions q2_ms/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from q2_types.sample_data import SampleData
from qiime2.plugin import Citations, Plugin

from q2_ms import __version__
from q2_ms.types import mzML, mzMLDirFmt, mzMLFormat

citations = Citations.load("citations.bib", package="q2_ms")

Expand All @@ -19,5 +20,16 @@
package="q2_ms",
description="A QIIME 2 plugin for MS data processing.",
short_description="A QIIME 2 plugin for MS data processing.",
citations=[],
)

# Registrations
plugin.register_semantic_types(
mzML,
)

plugin.register_semantic_type_to_format(SampleData[mzML], artifact_format=mzMLDirFmt)

plugin.register_formats(
mzMLFormat,
mzMLDirFmt,
)
31 changes: 0 additions & 31 deletions q2_ms/tests/data/table-1.biom

This file was deleted.

5 changes: 4 additions & 1 deletion q2_ms/tests/__init__.py → q2_ms/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# flake8: noqa
# ----------------------------------------------------------------------------
# Copyright (c) 2024, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_ms.types._format import mzMLDirFmt, mzMLFormat
from q2_ms.types._type import mzML

__all__ = ["mzMLFormat", "mzMLDirFmt", "mzML"]
37 changes: 37 additions & 0 deletions q2_ms/types/_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2024, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os
import sys

import pymzml
from qiime2.core.exceptions import ValidationError
from qiime2.plugin import model


class mzMLFormat(model.TextFileFormat):
def _validate(self, n_records=None):
try:
# Suppressing warning print "Not index found and build_index_from_scratch
# is False". This could also be solved with setting build_index_from_scratch
# to True but this builds the index and slows down validation.
sys.stdout = open(os.devnull, "w")
pymzml.run.Reader(str(self))
sys.stdout = sys.__stdout__
except Exception as e:
raise ValidationError(e)

def _validate_(self, level):
self._validate()


class mzMLDirFmt(model.DirectoryFormat):
mzml = model.FileCollection(r".*\.mzML$", format=mzMLFormat)

@mzml.set_path_maker
def mzml_path_maker(self, sample_id):
return f"{sample_id}.mzML"
10 changes: 3 additions & 7 deletions q2_ms/tests/test_methods.py → q2_ms/types/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from qiime2.plugin.testing import TestPluginBase
from q2_types.sample_data import SampleData
from qiime2.core.type import SemanticType


class Test(TestPluginBase):
package = "q2_ms.tests"

def test(self):
pass
mzML = SemanticType("mzML", variant_of=SampleData.field["type"])
File renamed without changes.
15 changes: 15 additions & 0 deletions q2_ms/types/tests/data/mzML_invalid/invalid.mzML
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd">
<mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="urn:lsid:psidev.info:mzML.instanceDocuments.tiny.pwiz" version="1.1.0">
<cvList count="2">
<cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="2.26.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>
<cv id="UO" fullName="Unit Ontology" version="14:07:2009" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>
</cvList>
<fileDescription>
<fileContent>
<cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>
<cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>
</fileContent>
<sourceFileList count="3">
<sourceFile id="tiny1.yep" name="tiny1.yep" location="file://F:/data/Exp01">
<cvParam cvRef="MS" accession="MS:1000567" name="Bruker/Agilent YEP file" value=""/>
Loading
Loading