nf-core · vagkaratzas · Jan 30, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/modules/nf-core/diamond/blastp/environment.yml b/modules/nf-core/diamond/blastp/environment.yml
@@ -2,4 +2,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - bioconda::diamond=2.1.8
+  - bioconda::diamond=2.1.11
diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf
@@ -1,11 +1,11 @@
 process DIAMOND_BLASTP {
     tag "$meta.id"
-    label 'process_medium'
+    label 'process_high'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/diamond:2.1.8--h43eeafb_0' :
-        'biocontainers/diamond:2.1.8--h43eeafb_0' }"
+        'https://depot.galaxyproject.org/singularity/diamond:2.1.11--h5ca1c30_0' :
+        'biocontainers/diamond:2.1.11--h5ca1c30_0' }"
 
     input:
     tuple val(meta) , path(fasta)
@@ -14,50 +14,49 @@ process DIAMOND_BLASTP {
     val blast_columns
 
     output:
-    tuple val(meta), path('*.blast'), optional: true, emit: blast
-    tuple val(meta), path('*.xml')  , optional: true, emit: xml
-    tuple val(meta), path('*.txt')  , optional: true, emit: txt
-    tuple val(meta), path('*.daa')  , optional: true, emit: daa
-    tuple val(meta), path('*.sam')  , optional: true, emit: sam
-    tuple val(meta), path('*.tsv')  , optional: true, emit: tsv
-    tuple val(meta), path('*.paf')  , optional: true, emit: paf
-    path "versions.yml"             , emit: versions
+    tuple val(meta), path('*.{blast,blast.gz}'), optional: true, emit: blast
+    tuple val(meta), path('*.{xml,xml.gz}')    , optional: true, emit: xml
+    tuple val(meta), path('*.{txt,txt.gz}')    , optional: true, emit: txt
+    tuple val(meta), path('*.{daa,daa.gz}')    , optional: true, emit: daa
+    tuple val(meta), path('*.{sam,sam.gz}')    , optional: true, emit: sam
+    tuple val(meta), path('*.{tsv,tsv.gz}')    , optional: true, emit: tsv
+    tuple val(meta), path('*.{paf,paf.gz}')    , optional: true, emit: paf
+    path "versions.yml"                        , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    def is_compressed = fasta.getExtension() == "gz" ? true : false
-    def fasta_name = is_compressed ? fasta.getBaseName() : fasta
     def columns = blast_columns ? "${blast_columns}" : ''
-    switch ( out_ext ) {
-        case "blast": outfmt = 0; break
-        case "xml": outfmt = 5; break
-        case "txt": outfmt = 6; break
-        case "daa": outfmt = 100; break
-        case "sam": outfmt = 101; break
-        case "tsv": outfmt = 102; break
-        case "paf": outfmt = 103; break
-        default:
-            outfmt = '6';
-            out_ext = 'txt';
-            log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
-            break
+    if (out_ext == "blast") {
+        outfmt = 0
+    } else if (out_ext == "xml") {
+        outfmt = 5
+    } else if (out_ext == "txt") {
+        outfmt = 6
+    } else if (out_ext == "daa") {
+        outfmt = 100
+    } else if (out_ext == "sam") {
+        outfmt = 101
+    } else if (out_ext == "tsv") {
+        outfmt = 102
+    } else if (out_ext == "paf") {
+        outfmt = 103
+    } else {
+        outfmt = '6'
+        out_ext = 'txt'
+        log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)")
     }
-    """
-    if [ "${is_compressed}" == "true" ]; then
-        gzip -c -d ${fasta} > ${fasta_name}
-    fi
-
-    DB=`find -L ./ -name "*.dmnd" | sed 's/\\.dmnd\$//'`
+    if ( args =~ /--compress\s+1/ ) out_ext += '.gz'
 
+    """
     diamond \\
         blastp \\
         --threads ${task.cpus} \\
-        --db \$DB \\
-        --query ${fasta_name} \\
+        --db ${db} \\
+        --query ${fasta} \\
         --outfmt ${outfmt} ${columns} \\
         ${args} \\
         --out ${prefix}.${out_ext}
@@ -69,21 +68,25 @@ process DIAMOND_BLASTP {
     """
 
     stub:
-    def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    switch ( out_ext ) {
-        case "blast": outfmt = 0; break
-        case "xml": outfmt = 5; break
-        case "txt": outfmt = 6; break
-        case "daa": outfmt = 100; break
-        case "sam": outfmt = 101; break
-        case "tsv": outfmt = 102; break
-        case "paf": outfmt = 103; break
-        default:
-            outfmt = '6';
-            out_ext = 'txt';
-            log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
-            break
+    if (out_ext == "blast") {
+        outfmt = 0
+    } else if (out_ext == "xml") {
+        outfmt = 5
+    } else if (out_ext == "txt") {
+        outfmt = 6
+    } else if (out_ext == "daa") {
+        outfmt = 100
+    } else if (out_ext == "sam") {
+        outfmt = 101
+    } else if (out_ext == "tsv") {
+        outfmt = 102
+    } else if (out_ext == "paf") {
+        outfmt = 103
+    } else {
+        outfmt = '6'
+        out_ext = 'txt'
+        log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)")
     }
 
     """

diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test b/modules/nf-core/diamond/blastp/tests/main.nf.test
@@ -23,12 +23,9 @@ nextflow_process {
         }
     }
 
-    test("Should search for protein hits against a DIAMOND db and return a tab separated output file of hits") {
+    test("sarscov2 - proteome - txt") {
 
         when {
-            params {
-                outdir = "$outputDir"
-            }
             process {
                 """
                 input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
@@ -42,22 +39,15 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(
-                    process.out.txt,
-                    process.out.versions
-                    ).match()
-                }
+                { assert snapshot(process.out).match() }
             )
         }
 
     }
 
-    test("Should search for zipped protein hits against a DIAMOND db and return a tab separated output file of hits") {
+    test("sarscov2 - proteome - gz - txt") {
 
         when {
-            params {
-                outdir = "$outputDir"
-            }
             process {
                 """
                 input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ]
@@ -71,22 +61,15 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(
-                    process.out.txt,
-                    process.out.versions
-                    ).match("gz_txt")
-                }
+                { assert snapshot(process.out).match("gz_txt")}
             )
         }
 
     }
 
-    test("Should search for protein hits against a DIAMOND db and return a daa format file of hits") {
+    test("sarscov2 - proteome - daa") {
 
         when {
-            params {
-                outdir = "$outputDir"
-            }
             process {
                 """
                 input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
@@ -101,7 +84,55 @@ nextflow_process {
             assertAll(
                 { assert process.success },
                 { assert process.out.daa },
-                { assert snapshot(process.out.versions).match() }
+                { assert snapshot(process.out.versions).match("daa") }
+            )
+        }
+
+    }
+
+    test("sarscov2 - proteome - txt - gz") {
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = DIAMOND_MAKEDB.out.db
+                input[2] = 'txt'
+                input[3] = 'qseqid qlen'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match("txt_gz") }
+            )
+        }
+
+    }
+
+    test("sarscov2 - proteome - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = DIAMOND_MAKEDB.out.db
+                input[2] = 'txt'
+                input[3] = 'qseqid qlen'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match("stub") }
             )
         }