Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CRAM and BAM support to last/mafconvert #7391

Merged
merged 9 commits into from
Feb 1, 2025
1 change: 1 addition & 0 deletions modules/nf-core/last/mafconvert/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ channels:

dependencies:
- bioconda::last=1608
- bioconda::samtools=1.21
41 changes: 28 additions & 13 deletions modules/nf-core/last/mafconvert/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,27 @@ process LAST_MAFCONVERT {

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data'
: 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}"
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/37/379183a78f725c3a8f2c4dda2f73ad452e57cc895239938fc97281d7bd74ffbf/data'
: 'community.wave.seqera.io/library/last_samtools:e2b51d2d9a1ce9fa'}"

input:
tuple val(meta), path(maf)
val(format)
tuple path(fasta), path(fai), path(gzi)
charles-plessy marked this conversation as resolved.
Show resolved Hide resolved

output:
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.bam"), optional:true, emit: bam
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.cram"), path(fasta), optional:true, emit: cram
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -31,7 +34,19 @@ process LAST_MAFCONVERT {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
set -o pipefail
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz

case $format in
bam)
maf-convert $args -d sam $maf | samtools view -b -o ${prefix}.${format}
;;
cram)
# CRAM output is not supported if the genome is compressed with something else than bgzip
maf-convert $args -d sam $maf | samtools view -Ct $fasta -o ${prefix}.${format}
;;
*)
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
;;
esac

# maf-convert has no --version option but lastdb (part of the same package) has.
cat <<-END_VERSIONS > versions.yml
Expand Down
38 changes: 38 additions & 0 deletions modules/nf-core/last/mafconvert/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,20 @@ input:
type: string
description: Output format (one of axt, blast, blasttab, chain, gff, html, psl,
sam, or tab)
- - fasta:
type: file
description: Genome file in FASTA format for CRAM conversion. If compressed it
must be done in BGZF format (like with the bgzip tool).
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- fai:
type: file
description: Genome index file needed for CRAM conversion.
pattern: "*.fai"
- gzi:
type: file
description: Genome index file needed for CRAM conversion when the genome file
was compressed with the BGZF algorithm.
pattern: "*.gzi"
output:
- axt_gz:
- meta:
Expand All @@ -40,6 +54,16 @@ output:
type: file
description: Gzipped pairwise alignment in Axt (Blastz) format (optional)
pattern: "*.axt.gz"
- bam:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.bam":
type: file
description: Pairwise alignment in BAM format (optional)
pattern: "*.bam"
- blast_gz:
- meta:
type: map
Expand Down Expand Up @@ -70,6 +94,20 @@ output:
type: file
description: Gzipped pairwise alignment in UCSC chain format (optional)
pattern: "*.chain.gz"
- cram:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.cram":
type: file
description: Pairwise alignment in CRAM format (optional)
pattern: "*.cram"
- fasta:
type: file
description: Genome file to recover sequences from the CRAM file (optional)
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- gff_gz:
- meta:
type: map
Expand Down
64 changes: 62 additions & 2 deletions modules/nf-core/last/mafconvert/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ nextflow_process {
tag "last"
tag "last/mafconvert"

test("sarscov2 - bam") {
test("sarscov2 - psl") {

when {
process {
Expand All @@ -19,6 +19,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = [[],[],[]]
"""
}
}
Expand All @@ -32,7 +33,65 @@ nextflow_process {

}

test("sarscov2 - bam - stub") {
test("sarscov2 - bam") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'bam'
input[2] = [[],[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.bam.collect { bam(it[1]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - cram") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'cram'
input[2] = [
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true),
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.cram.collect { cram(it[1], it[2]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - psl - stub") {

options "-stub"
when {
Expand All @@ -43,6 +102,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = [[],[],[]]
"""
}
}
Expand Down
Loading
Loading