From a547b876299b690f3f448fc8cb97ce3aca318dfe Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 08:34:05 +0000 Subject: [PATCH 01/11] version bump --- .../nf-core/diamond/blastp/environment.yml | 2 +- modules/nf-core/diamond/blastp/main.nf | 4 ++-- .../diamond/blastp/tests/main.nf.test.snap | 24 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/nf-core/diamond/blastp/environment.yml b/modules/nf-core/diamond/blastp/environment.yml index 950c3c5c55e..f19483fe91b 100644 --- a/modules/nf-core/diamond/blastp/environment.yml +++ b/modules/nf-core/diamond/blastp/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::diamond=2.1.8 + - bioconda::diamond=2.1.11 diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index dc01cdcc08c..a43e714ba52 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -4,8 +4,8 @@ process DIAMOND_BLASTP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/diamond:2.1.8--h43eeafb_0' : - 'biocontainers/diamond:2.1.8--h43eeafb_0' }" + 'https://depot.galaxyproject.org/singularity/diamond:2.1.11--h5ca1c30_0' : + 'biocontainers/diamond:2.1.11--h5ca1c30_0' }" input: tuple val(meta) , path(fasta) diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap index e323c8b89c1..47c4758f66d 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap @@ -10,14 +10,14 @@ ] ], [ - "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-07-29T14:40:23.906848" + "timestamp": "2025-01-28T08:32:39.553875424" }, "gz_txt": { "content": [ @@ -30,25 +30,25 @@ ] ], [ - "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-07-29T14:40:29.865487" + "timestamp": "2025-01-28T08:32:49.763438642" }, "Should search for protein hits against a DIAMOND db and return a daa format file of hits": { "content": [ [ - "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-07-29T14:40:35.362027" + "timestamp": "2025-01-28T08:32:58.984888127" } } \ No newline at end of file From 06d1bfa64f3f4800e44222d47f8b2e794d945c91 Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 08:39:46 +0000 Subject: [PATCH 02/11] remove unnecessary unzipping of gz fasta files --- modules/nf-core/diamond/blastp/main.nf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index a43e714ba52..2fa412a9369 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -29,8 +29,6 @@ process DIAMOND_BLASTP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def is_compressed = fasta.getExtension() == "gz" ? true : false - def fasta_name = is_compressed ? fasta.getBaseName() : fasta def columns = blast_columns ? "${blast_columns}" : '' switch ( out_ext ) { case "blast": outfmt = 0; break @@ -47,17 +45,13 @@ process DIAMOND_BLASTP { break } """ - if [ "${is_compressed}" == "true" ]; then - gzip -c -d ${fasta} > ${fasta_name} - fi - DB=`find -L ./ -name "*.dmnd" | sed 's/\\.dmnd\$//'` diamond \\ blastp \\ --threads ${task.cpus} \\ --db \$DB \\ - --query ${fasta_name} \\ + --query ${fasta} \\ --outfmt ${outfmt} ${columns} \\ ${args} \\ --out ${prefix}.${out_ext} From 7e3563b085faab8f9031494220dda7412cd7048c Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 08:41:19 +0000 Subject: [PATCH 03/11] label updated to process_high --- modules/nf-core/diamond/blastp/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index 2fa412a9369..e68602afcc0 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -1,6 +1,6 @@ process DIAMOND_BLASTP { tag "$meta.id" - label 'process_medium' + label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From d7751f61637cfcd67d3839f2a8323484de0859bb Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 08:50:19 +0000 Subject: [PATCH 04/11] db from arg and switch changed to if else --- modules/nf-core/diamond/blastp/main.nf | 35 ++++++++++++++------------ 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index e68602afcc0..5677f6f34b4 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -30,27 +30,30 @@ process DIAMOND_BLASTP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def columns = blast_columns ? "${blast_columns}" : '' - switch ( out_ext ) { - case "blast": outfmt = 0; break - case "xml": outfmt = 5; break - case "txt": outfmt = 6; break - case "daa": outfmt = 100; break - case "sam": outfmt = 101; break - case "tsv": outfmt = 102; break - case "paf": outfmt = 103; break - default: - outfmt = '6'; - out_ext = 'txt'; - log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)"); - break + if (out_ext == "blast") { + outfmt = 0 + } else if (out_ext == "xml") { + outfmt = 5 + } else if (out_ext == "txt") { + outfmt = 6 + } else if (out_ext == "daa") { + outfmt = 100 + } else if (out_ext == "sam") { + outfmt = 101 + } else if (out_ext == "tsv") { + outfmt = 102 + } else if (out_ext == "paf") { + outfmt = 103 + } else { + outfmt = '6' + out_ext = 'txt' + log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } """ - DB=`find -L ./ -name "*.dmnd" | sed 's/\\.dmnd\$//'` - diamond \\ blastp \\ --threads ${task.cpus} \\ - --db \$DB \\ + --db ${db} \\ --query ${fasta} \\ --outfmt ${outfmt} ${columns} \\ ${args} \\ From bfbd61a4e62ea925cf0b1e282782edb129f057a5 Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 09:18:22 +0000 Subject: [PATCH 05/11] stub updated with if else --- modules/nf-core/diamond/blastp/main.nf | 32 +++++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index 5677f6f34b4..df418308e6e 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -66,21 +66,25 @@ process DIAMOND_BLASTP { """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - switch ( out_ext ) { - case "blast": outfmt = 0; break - case "xml": outfmt = 5; break - case "txt": outfmt = 6; break - case "daa": outfmt = 100; break - case "sam": outfmt = 101; break - case "tsv": outfmt = 102; break - case "paf": outfmt = 103; break - default: - outfmt = '6'; - out_ext = 'txt'; - log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)"); - break + if (out_ext == "blast") { + outfmt = 0 + } else if (out_ext == "xml") { + outfmt = 5 + } else if (out_ext == "txt") { + outfmt = 6 + } else if (out_ext == "daa") { + outfmt = 100 + } else if (out_ext == "sam") { + outfmt = 101 + } else if (out_ext == "tsv") { + outfmt = 102 + } else if (out_ext == "paf") { + outfmt = 103 + } else { + outfmt = '6' + out_ext = 'txt' + log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } """ From 62fc882feb693ac92dd159927b192952d772e67b Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 10:28:48 +0000 Subject: [PATCH 06/11] allowing gz outputs, test names updated and added stub test --- modules/nf-core/diamond/blastp/main.nf | 18 +- .../nf-core/diamond/blastp/tests/main.nf.test | 47 ++-- .../diamond/blastp/tests/main.nf.test.snap | 217 ++++++++++++++++-- 3 files changed, 232 insertions(+), 50 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index df418308e6e..a1480358b86 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -14,14 +14,14 @@ process DIAMOND_BLASTP { val blast_columns output: - tuple val(meta), path('*.blast'), optional: true, emit: blast - tuple val(meta), path('*.xml') , optional: true, emit: xml - tuple val(meta), path('*.txt') , optional: true, emit: txt - tuple val(meta), path('*.daa') , optional: true, emit: daa - tuple val(meta), path('*.sam') , optional: true, emit: sam - tuple val(meta), path('*.tsv') , optional: true, emit: tsv - tuple val(meta), path('*.paf') , optional: true, emit: paf - path "versions.yml" , emit: versions + tuple val(meta), path('*.{blast,blast.gz}'), optional: true, emit: blast + tuple val(meta), path('*.{xml,xml.gz}') , optional: true, emit: xml + tuple val(meta), path('*.{txt,txt.gz}') , optional: true, emit: txt + tuple val(meta), path('*.{daa,daa.gz}') , optional: true, emit: daa + tuple val(meta), path('*.{sam,sam.gz}') , optional: true, emit: sam + tuple val(meta), path('*.{tsv,tsv.gz}') , optional: true, emit: tsv + tuple val(meta), path('*.{paf,paf.gz}') , optional: true, emit: paf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -49,6 +49,8 @@ process DIAMOND_BLASTP { out_ext = 'txt' log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } + if ( args =~ /--compress\s+1/ ) out_ext += '.gz' + """ diamond \\ blastp \\ diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test b/modules/nf-core/diamond/blastp/tests/main.nf.test index f21e926de9d..c7c1deba330 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test @@ -23,7 +23,7 @@ nextflow_process { } } - test("Should search for protein hits against a DIAMOND db and return a tab separated output file of hits") { + test("sarscov2 - proteome - txt") { when { params { @@ -42,17 +42,13 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.txt, - process.out.versions - ).match() - } + { assert snapshot(process.out).match() } ) } } - test("Should search for zipped protein hits against a DIAMOND db and return a tab separated output file of hits") { + test("sarscov2 - proteome - gz - txt") { when { params { @@ -71,22 +67,15 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.txt, - process.out.versions - ).match("gz_txt") - } + { assert snapshot(process.out).match("gz_txt")} ) } } - test("Should search for protein hits against a DIAMOND db and return a daa format file of hits") { + test("sarscov2 - proteome - daa") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] @@ -101,7 +90,31 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.daa }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(process.out.versions).match("daa") } + ) + } + + } + + test("sarscov2 - proteome - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = DIAMOND_MAKEDB.out.db + input[2] = 'txt' + input[3] = 'qseqid qlen' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } ) } diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap index 47c4758f66d..b34ef399022 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap @@ -1,45 +1,143 @@ { - "Should search for protein hits against a DIAMOND db and return a tab separated output file of hits": { + "sarscov2 - proteome - txt": { "content": [ - [ - [ - { - "id": "test" - }, - "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "blast": [ + + ], + "daa": [ + + ], + "paf": [ + + ], + "sam": [ + + ], + "tsv": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" + ] + ], + "versions": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "xml": [ + ] - ], - [ - "versions.yml:md5,5f638327037bee3c00e17521c04a652f" - ] + } ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-28T08:32:39.553875424" + "timestamp": "2025-01-28T10:25:13.48912978" }, "gz_txt": { "content": [ - [ - [ - { - "id": "test" - }, - "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "blast": [ + + ], + "daa": [ + + ], + "paf": [ + + ], + "sam": [ + + ], + "tsv": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" + ] + ], + "versions": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "xml": [ + ] - ], - [ - "versions.yml:md5,5f638327037bee3c00e17521c04a652f" - ] + } ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-28T08:32:49.763438642" + "timestamp": "2025-01-28T10:25:20.993203497" }, - "Should search for protein hits against a DIAMOND db and return a daa format file of hits": { + "daa": { "content": [ [ "versions.yml:md5,5f638327037bee3c00e17521c04a652f" @@ -49,6 +147,75 @@ "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-28T08:32:58.984888127" + "timestamp": "2025-01-28T10:25:28.126992812" + }, + "stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "blast": [ + + ], + "daa": [ + + ], + "paf": [ + + ], + "sam": [ + + ], + "tsv": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "xml": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-28T10:25:34.911633513" } } \ No newline at end of file From d756499357918cf86f75f825813fba2b6002b05f Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 10:37:36 +0000 Subject: [PATCH 07/11] test for --compress flag added --- .../nf-core/diamond/blastp/tests/main.nf.test | 30 ++++++-- .../diamond/blastp/tests/main.nf.test.snap | 69 +++++++++++++++++++ .../diamond/blastp/tests/nextflow.config | 7 ++ 3 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 modules/nf-core/diamond/blastp/tests/nextflow.config diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test b/modules/nf-core/diamond/blastp/tests/main.nf.test index c7c1deba330..7934334cc55 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test @@ -26,9 +26,6 @@ nextflow_process { test("sarscov2 - proteome - txt") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] @@ -51,9 +48,6 @@ nextflow_process { test("sarscov2 - proteome - gz - txt") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] @@ -96,6 +90,30 @@ nextflow_process { } + test("sarscov2 - proteome - txt - gz") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = DIAMOND_MAKEDB.out.db + input[2] = 'txt' + input[3] = 'qseqid qlen' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("txt_gz") } + ) + } + + } + test("sarscov2 - proteome - stub") { options "-stub" diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap index b34ef399022..44d504337b1 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap @@ -68,6 +68,75 @@ }, "timestamp": "2025-01-28T10:25:13.48912978" }, + "txt_gz": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,8131b1afd717f3d5f2f2417c5b562e6e" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "blast": [ + + ], + "daa": [ + + ], + "paf": [ + + ], + "sam": [ + + ], + "tsv": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,8131b1afd717f3d5f2f2417c5b562e6e" + ] + ], + "versions": [ + "versions.yml:md5,5f638327037bee3c00e17521c04a652f" + ], + "xml": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-28T10:36:04.361504205" + }, "gz_txt": { "content": [ { diff --git a/modules/nf-core/diamond/blastp/tests/nextflow.config b/modules/nf-core/diamond/blastp/tests/nextflow.config new file mode 100644 index 00000000000..bd28cb1d433 --- /dev/null +++ b/modules/nf-core/diamond/blastp/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: DIAMOND_BLASTP { + ext.args = '--compress 1' + } + +} From 01bc4b0d15c8050bb0cc7684244354400b203b3c Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Tue, 28 Jan 2025 10:50:27 +0000 Subject: [PATCH 08/11] meta.yml updated to include new gz outputs --- modules/nf-core/diamond/blastp/meta.yml | 49 ++++++++++++++----------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/modules/nf-core/diamond/blastp/meta.yml b/modules/nf-core/diamond/blastp/meta.yml index fbddfbd00f1..295dbf30fdc 100644 --- a/modules/nf-core/diamond/blastp/meta.yml +++ b/modules/nf-core/diamond/blastp/meta.yml @@ -54,70 +54,77 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.blast": - type: file + pattern: "*.{blast,blast.gz}" + - "*.{blast,blast.gz}": + type: map description: File containing blastp hits - pattern: "*.{blast}" + pattern: "*.{blast,blast.gz}" - xml: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.xml": - type: file + pattern: "*.{xml,xml.gz}" + - "*.{xml,xml.gz}": + type: map description: File containing blastp hits - pattern: "*.{xml}" + pattern: "*.{xml,xml.gz}" - txt: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.txt": - type: file + pattern: "*.{txt,txt.gz}" + - "*.{txt,txt.gz}": + type: map description: File containing hits in tabular BLAST format. - pattern: "*.{txt}" + pattern: "*.{txt,txt.gz}" - daa: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.daa": - type: file + pattern: "*.{daa,daa.gz}" + - "*.{daa,daa.gz}": + type: map description: File containing hits DAA format - pattern: "*.{daa}" + pattern: "*.{daa,daa.gz}" - sam: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.sam": - type: file + pattern: "*.{sam,sam.gz}" + - "*.{sam,sam.gz}": + type: map description: File containing aligned reads in SAM format - pattern: "*.{sam}" + pattern: "*.{sam,sam.gz}" - tsv: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.tsv": - type: file + pattern: "*.{tsv,tsv.gz}" + - "*.{tsv,tsv.gz}": + type: map description: Tab separated file containing taxonomic classification of hits - pattern: "*.{tsv}" + pattern: "*.{tsv,tsv.gz}" - paf: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.paf": - type: file + pattern: "*.{paf,paf.gz}" + - "*.{paf,paf.gz}": + type: map description: File containing aligned reads in pairwise mapping format format - pattern: "*.{paf}" + pattern: "*.{paf,paf.gz}" - versions: - versions.yml: type: file From 06af3739b7c72e73f2e46f941ec86a825768b28b Mon Sep 17 00:00:00 2001 From: vagkaratzas <vagelaros.gee@gmail.com> Date: Thu, 30 Jan 2025 13:18:50 +0000 Subject: [PATCH 09/11] outfmt instead of out_ext input arg, meta updated --- modules/nf-core/diamond/blastp/main.nf | 67 +++++++++++-------- modules/nf-core/diamond/blastp/meta.yml | 21 +++--- .../nf-core/diamond/blastp/tests/main.nf.test | 10 +-- 3 files changed, 56 insertions(+), 42 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index a1480358b86..dc712b0bed4 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -10,7 +10,7 @@ process DIAMOND_BLASTP { input: tuple val(meta) , path(fasta) tuple val(meta2), path(db) - val out_ext + val outfmt val blast_columns output: @@ -29,26 +29,30 @@ process DIAMOND_BLASTP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def columns = blast_columns ? "${blast_columns}" : '' - if (out_ext == "blast") { - outfmt = 0 - } else if (out_ext == "xml") { - outfmt = 5 - } else if (out_ext == "txt") { - outfmt = 6 - } else if (out_ext == "daa") { - outfmt = 100 - } else if (out_ext == "sam") { - outfmt = 101 - } else if (out_ext == "tsv") { - outfmt = 102 - } else if (out_ext == "paf") { - outfmt = 103 + def out_ext = "" + + if (outfmt == 0) { + out_ext = "blast" + } else if (outfmt == 5) { + out_ext = "xml" + } else if (outfmt == 6) { + out_ext = "txt" + } else if (outfmt == 100) { + out_ext = "daa" + } else if (outfmt == 101) { + out_ext = "sam" + } else if (outfmt == 102) { + out_ext = "tsv" + } else if (outfmt == 103) { + out_ext = "paf" } else { outfmt = '6' out_ext = 'txt' log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } + if ( args =~ /--compress\s+1/ ) out_ext += '.gz' """ @@ -69,26 +73,31 @@ process DIAMOND_BLASTP { stub: def prefix = task.ext.prefix ?: "${meta.id}" - if (out_ext == "blast") { - outfmt = 0 - } else if (out_ext == "xml") { - outfmt = 5 - } else if (out_ext == "txt") { - outfmt = 6 - } else if (out_ext == "daa") { - outfmt = 100 - } else if (out_ext == "sam") { - outfmt = 101 - } else if (out_ext == "tsv") { - outfmt = 102 - } else if (out_ext == "paf") { - outfmt = 103 + + def out_ext = "" + + if (outfmt == 0) { + out_ext = "blast" + } else if (outfmt == 5) { + out_ext = "xml" + } else if (outfmt == 6) { + out_ext = "txt" + } else if (outfmt == 100) { + out_ext = "daa" + } else if (outfmt == 101) { + out_ext = "sam" + } else if (outfmt == 102) { + out_ext = "tsv" + } else if (outfmt == 103) { + out_ext = "paf" } else { outfmt = '6' out_ext = 'txt' log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } + if ( args =~ /--compress\s+1/ ) out_ext += '.gz' + """ touch ${prefix}.${out_ext} diff --git a/modules/nf-core/diamond/blastp/meta.yml b/modules/nf-core/diamond/blastp/meta.yml index 295dbf30fdc..6518aa9aedd 100644 --- a/modules/nf-core/diamond/blastp/meta.yml +++ b/modules/nf-core/diamond/blastp/meta.yml @@ -33,19 +33,24 @@ input: type: file description: File of the indexed DIAMOND database pattern: "*.dmnd" - - - out_ext: - type: string + - - outfmt: + type: integer description: | - Specify the type of output file to be generated. `blast` corresponds to - BLAST pairwise format. `xml` corresponds to BLAST xml format. - `txt` corresponds to to BLAST tabular format. `tsv` corresponds to - taxonomic classification format. - pattern: "blast|xml|txt|daa|sam|tsv|paf" + Specify the type of output file to be generated. + 0, .blast, BLAST pairwise format. + 5, .xml, BLAST XML format. + 6, .txt, BLAST tabular format (default). This format can be customized, the 6 may be followed by a space-separated list of the blast_columns keywords, each specifying a field of the output. + 100, .daa, DIAMOND alignment archive (DAA). The DAA format is a proprietary binary format that can subsequently be used to generate other output formats using the view command. It is also supported by MEGAN and allows a quick import of results. + 101, .sam, SAM format. + 102, .tsv, Taxonomic classification. This format will not print alignments but only a taxonomic classification for each query using the LCA algorithm. + 103, .paf, PAF format. The custom fields in the format are AS (bit score), ZR (raw score) and ZE (e-value). + pattern: "0|5|6|100|101|102|103" - - blast_columns: type: string description: | Optional space separated list of DIAMOND tabular BLAST output keywords - used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: + used in conjunction with the --outfmt 6 option (txt). + Options: qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore output: - blast: diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test b/modules/nf-core/diamond/blastp/tests/main.nf.test index 7934334cc55..9211915173c 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test @@ -30,7 +30,7 @@ nextflow_process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db - input[2] = 'txt' + input[2] = 6 input[3] = 'qseqid qlen' """ } @@ -52,7 +52,7 @@ nextflow_process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db - input[2] = 'txt' + input[2] = 6 input[3] = 'qseqid qlen' """ } @@ -74,7 +74,7 @@ nextflow_process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db - input[2] = 'daa' + input[2] = 100 input[3] = [] """ } @@ -99,7 +99,7 @@ nextflow_process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db - input[2] = 'txt' + input[2] = 6 input[3] = 'qseqid qlen' """ } @@ -123,7 +123,7 @@ nextflow_process { """ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db - input[2] = 'txt' + input[2] = 6 input[3] = 'qseqid qlen' """ } From 68d5789d17289c1bf77edaea38a74813917c7bb9 Mon Sep 17 00:00:00 2001 From: Daniel Lundin <erik.rikard.daniel@gmail.com> Date: Thu, 30 Jan 2025 14:41:55 +0100 Subject: [PATCH 10/11] Update modules/nf-core/diamond/blastp/main.nf --- modules/nf-core/diamond/blastp/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index dc712b0bed4..1342ee23554 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -48,9 +48,9 @@ process DIAMOND_BLASTP { } else if (outfmt == 103) { out_ext = "paf" } else { - outfmt = '6' + log.warn("Unknown output file format provided (${outfmt}): selecting DIAMOND default of tabular BLAST output (txt)") + outfmt = 6 out_ext = 'txt' - log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } if ( args =~ /--compress\s+1/ ) out_ext += '.gz' From 3c6bd345b942806d1c74773a756a194efefd7df6 Mon Sep 17 00:00:00 2001 From: Daniel Lundin <erik.rikard.daniel@gmail.com> Date: Thu, 30 Jan 2025 14:42:04 +0100 Subject: [PATCH 11/11] Update modules/nf-core/diamond/blastp/main.nf --- modules/nf-core/diamond/blastp/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf index 1342ee23554..6dd8d3925ad 100644 --- a/modules/nf-core/diamond/blastp/main.nf +++ b/modules/nf-core/diamond/blastp/main.nf @@ -91,9 +91,9 @@ process DIAMOND_BLASTP { } else if (outfmt == 103) { out_ext = "paf" } else { - outfmt = '6' + log.warn("Unknown output file format provided (${outfmt}): selecting DIAMOND default of tabular BLAST output (txt)") + outfmt = 6 out_ext = 'txt' - log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)") } if ( args =~ /--compress\s+1/ ) out_ext += '.gz'