nf-core · charles-plessy · Feb 1, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/modules/nf-core/last/mafconvert/main.nf b/modules/nf-core/last/mafconvert/main.nf
@@ -4,24 +4,29 @@ process LAST_MAFCONVERT {
 
     conda "${moduleDir}/environment.yml"
     container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
-        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data'
-        : 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}"
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/37/379183a78f725c3a8f2c4dda2f73ad452e57cc895239938fc97281d7bd74ffbf/data'
+        : 'community.wave.seqera.io/library/last_samtools:e2b51d2d9a1ce9fa'}"
 
     input:
     tuple val(meta), path(maf)
     val(format)
+    path(fasta)
 
     output:
-    tuple val(meta), path("*.axt.gz"),      optional:true, emit: axt_gz
-    tuple val(meta), path("*.blast.gz"),    optional:true, emit: blast_gz
-    tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
-    tuple val(meta), path("*.chain.gz"),    optional:true, emit: chain_gz
-    tuple val(meta), path("*.gff.gz"),      optional:true, emit: gff_gz
-    tuple val(meta), path("*.html.gz"),     optional:true, emit: html_gz
-    tuple val(meta), path("*.psl.gz"),      optional:true, emit: psl_gz
-    tuple val(meta), path("*.sam.gz"),      optional:true, emit: sam_gz
-    tuple val(meta), path("*.tab.gz"),      optional:true, emit: tab_gz
-    path "versions.yml"                                  , emit: versions
+    tuple val(meta), path("*.axt.gz"),             optional:true, emit: axt_gz
+    tuple val(meta), path("*.bam"),                optional:true, emit: bam
+    tuple val(meta), path("*.blast.gz"),           optional:true, emit: blast_gz
+    tuple val(meta), path("*.blasttab.gz"),        optional:true, emit: blasttab_gz
+    tuple val(meta), path("*.chain.gz"),           optional:true, emit: chain_gz
+    tuple val(meta), path("*.cram"), path(fasta),  optional:true, emit: cram
+    path("*.fai"),                                 optional:true, emit: fai
-    tuple val(meta), path("*.cram"), path(fasta),  optional:true, emit: cram
-    path("*.fai"),                                 optional:true, emit: fai
+    tuple val(meta), path("*.cram"),  optional:true, emit: cram
-    tuple val(meta), path("*.cram"), path(fasta),  optional:true, emit: cram
-    path("*.fai"),                                 optional:true, emit: fai
+    tuple val(meta), path("*.cram"),  optional:true, emit: cram
+    tuple val(meta), path("*.gff.gz"),             optional:true, emit: gff_gz
+    path("*.gzi"),                                 optional:true, emit: gzi
+    tuple val(meta), path("*.html.gz"),            optional:true, emit: html_gz
+    tuple val(meta), path("*.psl.gz"),             optional:true, emit: psl_gz
+    tuple val(meta), path("*.sam.gz"),             optional:true, emit: sam_gz
+    tuple val(meta), path("*.tab.gz"),             optional:true, emit: tab_gz
+    path "versions.yml"                                         , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -31,7 +36,20 @@ process LAST_MAFCONVERT {
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     set -o pipefail
-    maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
+
+    case $format in
+        bam)
+            maf-convert $args -d sam  $maf | samtools view -b -o ${prefix}.${format}
+            ;;
+        cram)
+            # CRAM output is not supported if the genome is compressed with something else than bgzip
+            samtools faidx $fasta
+            maf-convert $args -d sam  $maf | samtools view -Ct $fasta -o ${prefix}.${format}
+            ;;
+        *)
+            maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
+            ;;
+    esac
 
     # maf-convert has no --version option but lastdb (part of the same package) has.
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/nf-core/last/mafconvert/meta.yml b/modules/nf-core/last/mafconvert/meta.yml
@@ -29,6 +29,11 @@ input:
         type: string
         description: Output format (one of axt, blast, blasttab, chain, gff, html, psl,
           sam, or tab)
+  - - fasta:
+        type: file
+        description: Genome file in FASTA format for CRAM conversion.  If compressed it
+          must be done in BGZF format (like with the bgzip tool).
+        pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
 output:
   - axt_gz:
       - meta:
@@ -40,6 +45,16 @@ output:
           type: file
           description: Gzipped pairwise alignment in Axt (Blastz) format (optional)
           pattern: "*.axt.gz"
+  - bam:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.bam":
+          type: file
+          description: Pairwise alignment in BAM format (optional)
+          pattern: "*.bam"
   - blast_gz:
       - meta:
           type: map
@@ -70,6 +85,25 @@ output:
           type: file
           description: Gzipped pairwise alignment in UCSC chain format (optional)
           pattern: "*.chain.gz"
+  - cram:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.cram":
+          type: file
+          description: Pairwise alignment in CRAM format (optional)
+          pattern: "*.cram"
+      - fasta:
+          type: file
+          description: Genome file to recover sequences from the CRAM file (optional)
+          pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
+  - fai:
+      - "*.fai":
+          type: file
+          description: Genome file index generated during CRAM conversion (optional)
+          pattern: "*.fai"
   - gff_gz:
       - meta:
           type: map
@@ -80,6 +114,11 @@ output:
           type: file
           description: Gzipped pairwise alignment in GFF format (optional)
           pattern: "*.gff.gz"
+  - gzi:
+      - "*.gzi":
+          type: file
+          description: Genome file index generated during CRAM conversion (optional)
+          pattern: "*.gzi"
   - html_gz:
       - meta:
           type: map

diff --git a/modules/nf-core/last/mafconvert/tests/main.nf.test b/modules/nf-core/last/mafconvert/tests/main.nf.test
@@ -9,7 +9,7 @@ nextflow_process {
     tag "last"
     tag "last/mafconvert"
 
-    test("sarscov2 - bam") {
+    test("sarscov2 - psl") {
 
         when {
             process {
@@ -19,6 +19,7 @@ nextflow_process {
                     file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
                 ]
                 input[1] = 'psl'
+                input[2] = []
                 """
             }
         }
@@ -32,7 +33,63 @@ nextflow_process {
 
     }
 
-    test("sarscov2 - bam - stub") {
+    test("sarscov2 - bam") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'contigs.genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                ]
+                input[1] = 'bam'
+                input[2] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.bam.collect { bam(it[1]).getSamLines() },
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - cram") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'contigs.genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                ]
+                input[1] = 'cram'
+                input[2] = [
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.cram.collect { cram(it[1], it[2]).getSamLines() },
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - psl - stub") {
 
         options "-stub"
         when {
@@ -43,6 +100,7 @@ nextflow_process {
                     file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
                 ]
                 input[1] = 'psl'
+                input[2] = []
                 """
             }
         }