Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CRAM and BAM support to last/mafconvert #7391

Merged
merged 9 commits into from
Feb 1, 2025
1 change: 1 addition & 0 deletions modules/nf-core/last/mafconvert/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:

dependencies:
- bioconda::last=1608
- bioconda::samtools=1.21
43 changes: 30 additions & 13 deletions modules/nf-core/last/mafconvert/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@ process LAST_MAFCONVERT {

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data'
: 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}"
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/37/379183a78f725c3a8f2c4dda2f73ad452e57cc895239938fc97281d7bd74ffbf/data'
: 'community.wave.seqera.io/library/last_samtools:e2b51d2d9a1ce9fa'}"

input:
tuple val(meta), path(maf)
val(format)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gzi)

output:
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.bam"), optional:true, emit: bam
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.cram"), path(fasta), optional:true, emit: cram
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -31,7 +36,19 @@ process LAST_MAFCONVERT {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
set -o pipefail
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz

case $format in
bam)
maf-convert $args -d sam $maf | samtools view -b -o ${prefix}.${format}
;;
cram)
# CRAM output is not supported if the genome is compressed with something else than bgzip
maf-convert $args -d sam $maf | samtools view -Ct $fasta -o ${prefix}.${format}
;;
*)
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
;;
esac

# maf-convert has no --version option but lastdb (part of the same package) has.
cat <<-END_VERSIONS > versions.yml
Expand Down
53 changes: 53 additions & 0 deletions modules/nf-core/last/mafconvert/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,35 @@ input:
type: string
description: Output format (one of axt, blast, blasttab, chain, gff, html, psl,
sam, or tab)
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fasta:
type: file
description: Genome file in FASTA format for CRAM conversion. If compressed it
must be done in BGZF format (like with the bgzip tool).
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- - meta3:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fai:
type: file
description: Genome index file needed for CRAM conversion.
pattern: "*.fai"
- - meta4:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- gzi:
type: file
description: Genome index file needed for CRAM conversion when the genome file
was compressed with the BGZF algorithm.
pattern: "*.gzi"
output:
- axt_gz:
- meta:
Expand All @@ -40,6 +69,16 @@ output:
type: file
description: Gzipped pairwise alignment in Axt (Blastz) format (optional)
pattern: "*.axt.gz"
- bam:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.bam":
type: file
description: Pairwise alignment in BAM format (optional)
pattern: "*.bam"
- blast_gz:
- meta:
type: map
Expand Down Expand Up @@ -70,6 +109,20 @@ output:
type: file
description: Gzipped pairwise alignment in UCSC chain format (optional)
pattern: "*.chain.gz"
- cram:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.cram":
type: file
description: Pairwise alignment in CRAM format (optional)
pattern: "*.cram"
- fasta:
type: file
description: Genome file to recover sequences from the CRAM file (optional)
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- gff_gz:
- meta:
type: map
Expand Down
74 changes: 72 additions & 2 deletions modules/nf-core/last/mafconvert/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ nextflow_process {
tag "last"
tag "last/mafconvert"

test("sarscov2 - bam") {
test("sarscov2 - psl") {

when {
process {
Expand All @@ -19,6 +19,9 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = [[],[]]
"""
}
}
Expand All @@ -32,7 +35,71 @@ nextflow_process {

}

test("sarscov2 - bam - stub") {
test("sarscov2 - bam") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'bam'
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = [[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.bam.collect { bam(it[1]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - cram") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'cram'
input[2] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[3] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
input[4] = [[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.cram.collect { cram(it[1], it[2]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - psl - stub") {

options "-stub"
when {
Expand All @@ -43,6 +110,9 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = [[],[]]
"""
}
}
Expand Down
Loading
Loading