Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new module openms/filefilter #7299

Merged
merged 13 commits into from
Jan 21, 2025
5 changes: 5 additions & 0 deletions modules/nf-core/openms/filefilter/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::openms=3.2.0"
55 changes: 55 additions & 0 deletions modules/nf-core/openms/filefilter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process OPENMS_FILEFILTER {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/openms:3.2.0--haddbca4_4' :
'biocontainers/openms:3.2.0--haddbca4_4' }"

input:
tuple val(meta), path(file)

output:
tuple val(meta), path("*.mzML"), emit: mzml, optional: true
tuple val(meta), path("*.featureXML"), emit: featurexml, optional: true
tuple val(meta), path("*.consensusXML"), emit: consensusxml, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def suffix = "${file.getExtension()}"
if ("$file" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
FileFilter \\
-in $file \\
-out ${prefix}.${suffix} \\
-threads $task.cpus \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def suffix = "${file.getExtension()}"
if ("$file" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
touch ${prefix}.${suffix}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//')
END_VERSIONS
"""
}
70 changes: 70 additions & 0 deletions modules/nf-core/openms/filefilter/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
name: "openms_filefilter"
description: Filters peptide/protein identification results by different criteria.
keywords:
- filter
- mzML
- openms
- proteomics
tools:
- "openms":
description: "OpenMS is an open-source software C++ library for LC-MS data management
and analyses"
homepage: "https://openms.de"
documentation: "https://openms.readthedocs.io/en/latest/index.html"
tool_dev_url: "https://github.com/OpenMS/OpenMS"
doi: "10.1038/s41592-024-02197-7"
licence: ["BSD"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test' ]`
- file:
type: file
description: Peptide-spectrum matches.
pattern: "*.{mzML,featureXML,consensusXML}"

output:
- mzml:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test' ]`
- "*.mzML":
type: file
description: Filtered mzML file.
pattern: "*.mzML"
- featurexml:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test' ]`
- "*.featureXML":
type: file
description: Filtered featureXML file.
pattern: "*.featureXML"
- consensusxml:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test' ]`
- "*.consensusXML":
type: file
description: Filtered consensusXML file.
pattern: "*.consensusXML"
- versions:
- versions.yml:
type: file
description: File containing software version
pattern: "versions.yml"

authors:
- "@jonasscheid"
maintainers:
- "@jonasscheid"
69 changes: 69 additions & 0 deletions modules/nf-core/openms/filefilter/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
nextflow_process {

name "Test Process OPENMS_FILEFILTER"
script "../main.nf"
process "OPENMS_FILEFILTER"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "openms"
tag "openms/filefilter"
tag "thermorawfileparser"

setup {
run("THERMORAWFILEPARSER") {
script "../../../thermorawfileparser/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test'],
file(params.modules_testdata_base_path + 'proteomics/msspectra/PXD012083_e005640_II.raw', checkIfExists: true)
])
"""
}
}
}

test("filter - mzml") {

when {
process {
"""
input[0] = THERMORAWFILEPARSER.out.spectra
"""
}
}

then {
assertAll(
{ assert process.success },
// work dir is written into xml file. Check only the first few lines
{ assert snapshot(path(process.out.mzml[0][1]).readLines().take(15)).match() }
)
}

}

test("filter - mzml - stub") {

options "-stub"

when {
process {
"""
input[0] = THERMORAWFILEPARSER.out.spectra
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
73 changes: 73 additions & 0 deletions modules/nf-core/openms/filefilter/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
{
"filter - mzml": {
"content": [
[
"<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>",
"<indexedmzML xmlns=\"http://psi.hupo.org/ms/mzml\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd\">",
"<mzML xmlns=\"http://psi.hupo.org/ms/mzml\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd\" accession=\"\" version=\"1.1.0\">",
"\t<cvList count=\"5\">",
"\t\t<cv id=\"MS\" fullName=\"Proteomics Standards Initiative Mass Spectrometry Ontology\" URI=\"http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo\"/>",
"\t\t<cv id=\"UO\" fullName=\"Unit Ontology\" URI=\"http://obo.cvs.sourceforge.net/obo/obo/ontology/phenotype/unit.obo\"/>",
"\t\t<cv id=\"BTO\" fullName=\"BrendaTissue545\" version=\"unknown\" URI=\"http://www.brenda-enzymes.info/ontology/tissue/tree/update/update_files/BrendaTissueOBO\"/>",
"\t\t<cv id=\"GO\" fullName=\"Gene Ontology - Slim Versions\" version=\"unknown\" URI=\"http://www.geneontology.org/GO_slims/goslim_goa.obo\"/>",
"\t\t<cv id=\"PATO\" fullName=\"Quality ontology\" version=\"unknown\" URI=\"http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/quality.obo\"/>",
"\t</cvList>",
"\t<fileDescription>",
"\t\t<fileContent>",
"\t\t\t<cvParam cvRef=\"MS\" accession=\"MS:1000579\" name=\"MS1 spectrum\" />",
"\t\t\t<cvParam cvRef=\"MS\" accession=\"MS:1000580\" name=\"MSn spectrum\" />",
"\t\t</fileContent>"
]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2025-01-21T13:07:14.535106314"
},
"filter - mzml - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test_filtered.mzML:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [

],
"2": [

],
"3": [
"versions.yml:md5,a8ec8f78f4a0da0328482bf90dc2df00"
],
"consensusxml": [

],
"featurexml": [

],
"mzml": [
[
{
"id": "test"
},
"test_filtered.mzML:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,a8ec8f78f4a0da0328482bf90dc2df00"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2025-01-21T13:09:28.723787747"
}
}
6 changes: 6 additions & 0 deletions modules/nf-core/openms/filefilter/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {
withName: OPENMS_FILEFILTER {
ext.prefix = {"${meta.id}_filtered"}
ext.args = '-mz 1000:2000'
}
}
Loading