Skip to content

Commit

Permalink
Implement nf-test for rank variants (#435)
Browse files Browse the repository at this point in the history
* wip - unstable genmod output

* Add changelog

* review suggestions

* allow empty vep_plugin_files
  • Loading branch information
fellen31 authored Oct 24, 2024
1 parent 49fa8e7 commit 2caa215
Show file tree
Hide file tree
Showing 12 changed files with 460 additions and 84 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jobs:
- "SNV_ANNOTATION"
- "CALL_SVS"
- "ANNOTATE_SVS"
- "RANK_VARIANTS"
profile:
- "docker"

Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#419](https://github.com/genomic-medicine-sweden/nallo/pull/419) - Added support for SV filtering using input BED file ([#348](https://github.com/genomic-medicine-sweden/nallo/issues/348))
- [#430](https://github.com/genomic-medicine-sweden/nallo/pull/430) - Added a GitHub action to build and publish docs to GitHub Pages
- [#431](https://github.com/genomic-medicine-sweden/nallo/pull/431) - Added files needed to automatically build and publish docs to GitHub Pages
- [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Added nf-test to rank variants

### `Changed`

Expand Down Expand Up @@ -58,6 +59,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#431](https://github.com/genomic-medicine-sweden/nallo/pull/431) - Changed `CITATIONS.md` to `docs/CITATIONS.md`,
- [#433](https://github.com/genomic-medicine-sweden/nallo/pull/433) - Updated docs and README.
- [#434](https://github.com/genomic-medicine-sweden/nallo/pull/434) - Updated the SVDB merge module to fix unstable CALL_SVS tests
- [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Updated and refactored processes and workflows related to variant ranking

### `Removed`

Expand Down
4 changes: 0 additions & 4 deletions conf/modules/rank_variants.config
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,4 @@ process {
ext.args = "--temp_dir ./"
}

withName: '.*:RANK_VARIANTS_SNV:BCFTOOLS_SORT' {
ext.when = false
}

}
15 changes: 10 additions & 5 deletions modules/local/add_most_severe_consequence.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process ADD_MOST_SEVERE_CSQ {
tag "$meta.id"
label 'process_low'
label 'process_single'

conda "conda-forge::python=3.8.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -9,20 +9,25 @@ process ADD_MOST_SEVERE_CSQ {

input:
tuple val(meta), path(vcf)
path (variant_consequences)
tuple val(meta2), path (variant_consequences)

output:
tuple val(meta), path("*.vcf") , emit: vcf
path "versions.yml" , emit: versions
tuple val(meta), path("*.vcf"), emit: vcf
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if ("$vcf" == "${prefix}.vcf" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
add_most_severe_consequence.py --file_in ${vcf} --file_out ${prefix}.vcf --variant_csq ${variant_consequences}
add_most_severe_consequence.py \\
--file_in ${vcf} \\
--file_out ${prefix}.vcf \\
--variant_csq ${variant_consequences}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
12 changes: 8 additions & 4 deletions modules/local/add_most_severe_pli.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process ADD_MOST_SEVERE_PLI {
tag "$meta.id"
label 'process_low'
label 'process_single'

conda "conda-forge::python=3.8.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -11,17 +11,21 @@ process ADD_MOST_SEVERE_PLI {
tuple val(meta), path(vcf)

output:
tuple val(meta), path("*.vcf") , emit: vcf
path "versions.yml" , emit: versions
tuple val(meta), path("*.vcf"), emit: vcf
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if ("$vcf" == "${prefix}.vcf" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
add_most_severe_pli.py --file_in ${vcf} --file_out ${prefix}.vcf
add_most_severe_pli.py \\
--file_in ${vcf} \\
--file_out ${prefix}.vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
8 changes: 4 additions & 4 deletions subworkflows/local/annotate_consequence_pli.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/mai
workflow ANNOTATE_CSQ_PLI {
take:
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_variant_consequences // channel: [mandatory] [ path(consequences) ]
ch_variant_consequences // channel: [mandatory] [ val(meta), path(consequences) ]

main:
ch_versions = Channel.empty()
Expand All @@ -24,7 +24,7 @@ workflow ANNOTATE_CSQ_PLI {
ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions)

emit:
vcf_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] } // channel: [ val(meta), path(vcf) ]
tbi_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, tbi ] } // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
vcf = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> [ meta, vcf ] } // channel: [ val(meta), path(vcf) ]
tbi = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> [ meta, tbi ] } // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
47 changes: 34 additions & 13 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modules/nf-core/untar/main'
workflow PREPARE_GENOME {

take:
fasta_in // channel: [mandatory] [ val(meta), path(fasta) ]
ch_vep_cache // channel: [optional] [ path(cache) ]
fasta_in // channel: [mandatory] [ val(meta), path(fasta) ]
gunzip_fasta // bool: should we gunzip fasta
ch_vep_cache // channel: [optional] [ val(meta), path(cache) ]
split_vep_files // bool: are there vep extra files
ch_vep_extra_files_unsplit // channel: [optional] [ val(meta), path(csv) ]

main:
ch_versions = Channel.empty()
Expand All @@ -16,16 +19,15 @@ workflow PREPARE_GENOME {
fasta_file = fasta_in.map{meta, file -> file}

// Will not catch cases where fasta is bgzipped
if ( params.fasta.endsWith('.gz') ) {
GUNZIP_FASTA(fasta_in)
if ( gunzip_fasta ) {
GUNZIP_FASTA ( fasta_in )
.gunzip
.collect()
.set{ch_fasta}

.set { ch_fasta }
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions.first())
} else {
fasta_in
.set{ch_fasta}
.set { ch_fasta }
}

SAMTOOLS_FAIDX ( ch_fasta, [[],[]] )
Expand All @@ -38,14 +40,33 @@ workflow PREPARE_GENOME {
ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)

UNTAR_VEP_CACHE.out.untar
.map { meta, files -> [files] }
.map { meta, files -> [ files ] }
.collect()
.set { untarred_vep }

// Read and store paths in the vep_plugin_files file
if ( split_vep_files ) {
ch_vep_extra_files_unsplit
.splitCsv ( header:true )
.map { row ->
path = file(row.vep_files[0])
if(path.exists()) {
return [path]
} else {
error("\nVep database file ${path} does not exist.")
}
}
.collect()
.set { ch_vep_extra_files }
} else {
ch_vep_extra_files = Channel.value([])
}

emit:
mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ]
fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), path(fai) ]
fasta = ch_fasta // channel: [ val(meta), path(fasta) ]
vep_resources = untarred_vep // channel: [ path(cache) ]
versions = ch_versions // channel: [ versions.yml ]
mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ]
fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), path(fai) ]
fasta = ch_fasta // channel: [ val(meta), path(fasta) ]
vep_resources = untarred_vep // channel: [ path(cache) ]
vep_extra_files = ch_vep_extra_files // channel: [ path(files) ]
versions = ch_versions // channel: [ versions.yml ]
}
57 changes: 27 additions & 30 deletions subworkflows/local/rank_variants/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,44 @@ include { GENMOD_ANNOTATE } from '../../../modules/nf-core/genmod/annotate/main
include { GENMOD_MODELS } from '../../../modules/nf-core/genmod/models/main'
include { GENMOD_SCORE } from '../../../modules/nf-core/genmod/score/main'
include { GENMOD_COMPOUND } from '../../../modules/nf-core/genmod/compound/main'
include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main'
include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main'
include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main'
include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main'

workflow RANK_VARIANTS {

take:
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_pedfile // channel: [mandatory] [ path(ped) ]
ch_reduced_penetrance // channel: [mandatory] [ path(pentrance) ]
ch_score_config // channel: [mandatory] [ path(ini) ]
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_pedfile // channel: [mandatory] [ val(meta), path(ped) ]
ch_reduced_penetrance // channel: [mandatory] [ val(meta), path(pentrance) ]
ch_score_config // channel: [mandatory] [ val(meta), path(ini) ]

main:
ch_versions = Channel.empty()
ch_versions = Channel.empty()

GENMOD_ANNOTATE(ch_vcf)
GENMOD_ANNOTATE ( ch_vcf )
ch_versions = ch_versions.mix(GENMOD_ANNOTATE.out.versions)

GENMOD_MODELS(GENMOD_ANNOTATE.out.vcf, ch_pedfile, ch_reduced_penetrance)
GENMOD_MODELS (
GENMOD_ANNOTATE.out.vcf,
ch_pedfile.map { meta, ped -> ped },
ch_reduced_penetrance.map { meta, file -> file }
)
ch_versions = ch_versions.mix(GENMOD_MODELS.out.versions)

GENMOD_SCORE(GENMOD_MODELS.out.vcf, ch_pedfile, ch_score_config)
GENMOD_SCORE (
GENMOD_MODELS.out.vcf,
ch_pedfile.map { meta, ped -> ped },
ch_score_config.map { meta, file -> file }
)
ch_versions = ch_versions.mix(GENMOD_SCORE.out.versions)

GENMOD_COMPOUND(GENMOD_SCORE.out.vcf)
GENMOD_COMPOUND ( GENMOD_SCORE.out.vcf )
ch_versions = ch_versions.mix(GENMOD_COMPOUND.out.versions)

BCFTOOLS_SORT(GENMOD_COMPOUND.out.vcf) // SV file needs to be sorted before indexing

TABIX_BGZIP(GENMOD_COMPOUND.out.vcf) //run only for SNVs

ch_vcf = TABIX_BGZIP.out.output.mix(BCFTOOLS_SORT.out.vcf)

TABIX_TABIX (ch_vcf)

ch_versions = ch_versions.mix(GENMOD_ANNOTATE.out.versions)
ch_versions = ch_versions.mix(GENMOD_MODELS.out.versions)
ch_versions = ch_versions.mix(GENMOD_SCORE.out.versions)
ch_versions = ch_versions.mix(GENMOD_COMPOUND.out.versions)
ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions)
ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
TABIX_BGZIPTABIX ( GENMOD_COMPOUND.out.vcf )
ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions)

emit:
vcf = ch_vcf // channel: [ val(meta), path(vcf) ]
tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
vcf = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> [ meta, vcf ] } // channel: [ val(meta), path(vcf) ]
tbi = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> [ meta, tbi ] } // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
Loading

0 comments on commit 2caa215

Please sign in to comment.