Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CRAM and BAM support to last/mafconvert #7391

Merged
merged 9 commits into from
Feb 1, 2025
44 changes: 31 additions & 13 deletions modules/nf-core/last/mafconvert/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@ process LAST_MAFCONVERT {

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/db/db0b5de918238f07ec1ca668be942397da85e26aa582f8927ac37c70896303cf/data'
: 'community.wave.seqera.io/library/last:1608--f41c047f7dc37e30'}"
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/37/379183a78f725c3a8f2c4dda2f73ad452e57cc895239938fc97281d7bd74ffbf/data'
: 'community.wave.seqera.io/library/last_samtools:e2b51d2d9a1ce9fa'}"

input:
tuple val(meta), path(maf)
val(format)
path(fasta)

output:
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions
tuple val(meta), path("*.axt.gz"), optional:true, emit: axt_gz
tuple val(meta), path("*.bam"), optional:true, emit: bam
tuple val(meta), path("*.blast.gz"), optional:true, emit: blast_gz
tuple val(meta), path("*.blasttab.gz"), optional:true, emit: blasttab_gz
tuple val(meta), path("*.chain.gz"), optional:true, emit: chain_gz
tuple val(meta), path("*.cram"), path(fasta), optional:true, emit: cram
path("*.fai"), optional:true, emit: fai
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tuple val(meta), path("*.cram"), path(fasta), optional:true, emit: cram
path("*.fai"), optional:true, emit: fai
tuple val(meta), path("*.cram"), optional:true, emit: cram

fasta is an input file so shouldn't be in the output section,
fai should also be an input file :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment I need path(fasta) in the cram output channel because I did not find another way to access that file during the tests. Can you help me to solve that problem?

tuple val(meta), path("*.gff.gz"), optional:true, emit: gff_gz
path("*.gzi"), optional:true, emit: gzi
tuple val(meta), path("*.html.gz"), optional:true, emit: html_gz
tuple val(meta), path("*.psl.gz"), optional:true, emit: psl_gz
tuple val(meta), path("*.sam.gz"), optional:true, emit: sam_gz
tuple val(meta), path("*.tab.gz"), optional:true, emit: tab_gz
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -31,7 +36,20 @@ process LAST_MAFCONVERT {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
set -o pipefail
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz

case $format in
bam)
maf-convert $args -d sam $maf | samtools view -b -o ${prefix}.${format}
;;
cram)
# CRAM output is not supported if the genome is compressed with something else than bgzip
samtools faidx $fasta
charles-plessy marked this conversation as resolved.
Show resolved Hide resolved
maf-convert $args -d sam $maf | samtools view -Ct $fasta -o ${prefix}.${format}
;;
*)
maf-convert $args $format $maf | gzip --no-name > ${prefix}.${format}.gz
;;
esac

# maf-convert has no --version option but lastdb (part of the same package) has.
cat <<-END_VERSIONS > versions.yml
Expand Down
39 changes: 39 additions & 0 deletions modules/nf-core/last/mafconvert/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ input:
type: string
description: Output format (one of axt, blast, blasttab, chain, gff, html, psl,
sam, or tab)
- - fasta:
type: file
description: Genome file in FASTA format for CRAM conversion. If compressed it
must be done in BGZF format (like with the bgzip tool).
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
output:
- axt_gz:
- meta:
Expand All @@ -40,6 +45,16 @@ output:
type: file
description: Gzipped pairwise alignment in Axt (Blastz) format (optional)
pattern: "*.axt.gz"
- bam:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.bam":
type: file
description: Pairwise alignment in BAM format (optional)
pattern: "*.bam"
- blast_gz:
- meta:
type: map
Expand Down Expand Up @@ -70,6 +85,25 @@ output:
type: file
description: Gzipped pairwise alignment in UCSC chain format (optional)
pattern: "*.chain.gz"
- cram:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.cram":
type: file
description: Pairwise alignment in CRAM format (optional)
pattern: "*.cram"
- fasta:
type: file
description: Genome file to recover sequences from the CRAM file (optional)
pattern: "*.{fasta,fasta.gz,fasta.bgz,fasta.bgzf}"
- fai:
- "*.fai":
type: file
description: Genome file index generated during CRAM conversion (optional)
pattern: "*.fai"
- gff_gz:
- meta:
type: map
Expand All @@ -80,6 +114,11 @@ output:
type: file
description: Gzipped pairwise alignment in GFF format (optional)
pattern: "*.gff.gz"
- gzi:
- "*.gzi":
type: file
description: Genome file index generated during CRAM conversion (optional)
pattern: "*.gzi"
- html_gz:
- meta:
type: map
Expand Down
62 changes: 60 additions & 2 deletions modules/nf-core/last/mafconvert/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ nextflow_process {
tag "last"
tag "last/mafconvert"

test("sarscov2 - bam") {
test("sarscov2 - psl") {

when {
process {
Expand All @@ -19,6 +19,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = []
"""
}
}
Expand All @@ -32,7 +33,63 @@ nextflow_process {

}

test("sarscov2 - bam - stub") {
test("sarscov2 - bam") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'bam'
input[2] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.bam.collect { bam(it[1]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - cram") {

when {
process {
"""
input[0] = [
[ id:'contigs.genome' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'cram'
input[2] = [
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.cram.collect { cram(it[1], it[2]).getSamLines() },
process.out.versions
).match() }
)
}

}

test("sarscov2 - psl - stub") {

options "-stub"
when {
Expand All @@ -43,6 +100,7 @@ nextflow_process {
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
input[1] = 'psl'
input[2] = []
"""
}
}
Expand Down
Loading
Loading