From 9000326c61f0b100c77419fec2b87beabfa9e810 Mon Sep 17 00:00:00 2001 From: Felix Lenner <52530259+fellen31@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:16:04 +0100 Subject: [PATCH] Add found_in tag to call repeat expansions (#445) --- .github/workflows/ci.yml | 1 + CHANGELOG.md | 1 + modules/local/trgt/main.nf | 15 +- .../local/call_repeat_expansions/main.nf | 48 +++++-- .../call_repeat_expansions/tests/main.nf.test | 99 ++++++++++++++ .../tests/main.nf.test.snap | 128 ++++++++++++++++++ .../tests/nextflow.config | 11 ++ tests/samplesheet.nf.test.snap | 10 +- .../samplesheet_multisample_bam.nf.test.snap | 14 +- workflows/nallo.nf | 2 +- 10 files changed, 302 insertions(+), 27 deletions(-) create mode 100644 subworkflows/local/call_repeat_expansions/tests/main.nf.test create mode 100644 subworkflows/local/call_repeat_expansions/tests/main.nf.test.snap create mode 100644 subworkflows/local/call_repeat_expansions/tests/nextflow.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 346f8279..2c7daaeb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,7 @@ jobs: - "CALL_SVS" - "ANNOTATE_SVS" - "RANK_VARIANTS" + - "CALL_REPEAT_EXPANSIONS" profile: - "docker" diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a5c78b7..53949c87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#430](https://github.com/genomic-medicine-sweden/nallo/pull/430) - Added a GitHub action to build and publish docs to GitHub Pages - [#431](https://github.com/genomic-medicine-sweden/nallo/pull/431) - Added files needed to automatically build and publish docs to GitHub Pages - [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Added nf-test to rank variants +- [#445](https://github.com/genomic-medicine-sweden/nallo/pull/445) - Added FOUND_IN tag and nf-test to rank variants ### `Changed` diff --git a/modules/local/trgt/main.nf b/modules/local/trgt/main.nf index 3d2a34db..e4ce8278 100644 --- a/modules/local/trgt/main.nf +++ b/modules/local/trgt/main.nf @@ -9,7 +9,7 @@ process TRGT { tuple val(meta), path(bam), path(bai), val(sex) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) - path(repeats) + tuple val(meta4), path(repeats) output: tuple val(meta), path("${meta.id}.spanning.bam"), emit: bam @@ -40,6 +40,19 @@ process TRGT { --output-prefix ${meta.id} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trgt: \$(echo \$(trgt -V) | sed 's/trgt //' ) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.spanning.bam + cat <<-END_VERSIONS > versions.yml "${task.process}": trgt: \$(echo \$(trgt -V) | sed 's/trgt //' ) diff --git a/subworkflows/local/call_repeat_expansions/main.nf b/subworkflows/local/call_repeat_expansions/main.nf index 36d55256..ed34b7f3 100644 --- a/subworkflows/local/call_repeat_expansions/main.nf +++ b/subworkflows/local/call_repeat_expansions/main.nf @@ -1,3 +1,4 @@ +include { ADD_FOUND_IN_TAG } from '../../../modules/local/add_found_in_tag/main' include { TRGT } from '../../../modules/local/trgt' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_TRGT } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_SORT as SAMTOOLS_SORT_TRGT } from '../../../modules/nf-core/samtools/sort/main' @@ -21,14 +22,33 @@ workflow CALL_REPEAT_EXPANSIONS { .set { ch_trgt_input } // Run TGRT - TRGT ( ch_trgt_input, ch_fasta, ch_fai, ch_trgt_bed.map { it[1] } ) + TRGT ( + ch_trgt_input, + ch_fasta, + ch_fai, + ch_trgt_bed + ) + ch_versions = ch_versions.mix(TRGT.out.versions) // Sort and index bam - SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] ) - SAMTOOLS_INDEX_TRGT(SAMTOOLS_SORT_TRGT.out.bam) + SAMTOOLS_SORT_TRGT ( + TRGT.out.bam, + [[],[]] + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions) + + SAMTOOLS_INDEX_TRGT ( SAMTOOLS_SORT_TRGT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions) + + // Add FOUND_IN=TRGT tag + ADD_FOUND_IN_TAG ( + TRGT.out.vcf.map { meta, vcf -> [ meta, vcf, [] ] }, + "TRGT" + ) // Sort and index bcf - BCFTOOLS_SORT_TRGT(TRGT.out.vcf) + BCFTOOLS_SORT_TRGT ( ADD_FOUND_IN_TAG.out.vcf ) + ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions) BCFTOOLS_SORT_TRGT.out.vcf .join( BCFTOOLS_SORT_TRGT.out.tbi ) @@ -36,17 +56,19 @@ workflow CALL_REPEAT_EXPANSIONS { .groupTuple() .set{ ch_bcftools_merge_in } - BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [[],[]] ) - - - ch_versions = ch_versions.mix(TRGT.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions) - ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions) + BCFTOOLS_MERGE ( + ch_bcftools_merge_in, + ch_fasta, + ch_fai, + [[],[]] + ) ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions) emit: - vcf = BCFTOOLS_SORT_TRGT.out.vcf // channel: [ val(meta), path(vcf) ] - versions = ch_versions // channel: [ versions.yml ] + sample_vcf = BCFTOOLS_SORT_TRGT.out.vcf // channel: [ val(meta), path(vcf) ] + project_vcf = BCFTOOLS_MERGE.out.vcf // channel: [ val(meta), path(vcf) ] + sample_bam = SAMTOOLS_SORT_TRGT.out.bam // channel: [ val(meta), path(bam) ] + sample_bai = SAMTOOLS_INDEX_TRGT.out.bai // channel: [ val(meta), path(bai) ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/call_repeat_expansions/tests/main.nf.test b/subworkflows/local/call_repeat_expansions/tests/main.nf.test new file mode 100644 index 00000000..84349c1b --- /dev/null +++ b/subworkflows/local/call_repeat_expansions/tests/main.nf.test @@ -0,0 +1,99 @@ +nextflow_workflow { + + name "Test Workflow CALL_REPEAT_EXPANSIONS" + script "../main.nf" + config "./nextflow.config" + workflow "CALL_REPEAT_EXPANSIONS" + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.pipelines_testdata_base_path + 'reference/hg38.test.fa.gz', checkIfExists: true) + ] + """ + } + + } + + run("SAMTOOLS_FAIDX") { + script "../../../../modules/nf-core/samtools/faidx/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [[],[]] + """ + } + + } + } + + test("[bam, bai], fasta, fai, bed") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, project: 'project', sex: 1 ], // meta map + file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam', checkIfExists: true), + file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam.bai', checkIfExists: true) + ]) + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + input[3] = Channel.of([ + [ id: 'pathogenic_repeats' ], + file(params.pipelines_testdata_base_path + 'reference/pathogenic_repeats.hg38.bed') + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.sample_bai.get(0).get(1).endsWith(".bai") }, + { assert snapshot( + path(workflow.out.sample_vcf.get(0).get(1)).vcf.variantsMD5, + path(workflow.out.project_vcf.get(0).get(1)).vcf.variantsMD5, + bam(workflow.out.sample_bam.get(0).get(1), stringency: 'silent').getReadsMD5(), + workflow.out.versions, + ).match() } + ) + } + + } + + test("[bam, bai], fasta, fai, bed - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false, project: 'project', sex: 1 ], // meta map + file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam', checkIfExists: true), + file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam.bai', checkIfExists: true) + ]) + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + input[3] = Channel.of([ + [ id: 'pathogenic_repeats' ], + file(params.pipelines_testdata_base_path + 'reference/pathogenic_repeats.hg38.bed') + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } +} diff --git a/subworkflows/local/call_repeat_expansions/tests/main.nf.test.snap b/subworkflows/local/call_repeat_expansions/tests/main.nf.test.snap new file mode 100644 index 00000000..768fb131 --- /dev/null +++ b/subworkflows/local/call_repeat_expansions/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "[bam, bai], fasta, fai, bed": { + "content": [ + "502b7befd528ce70b8269a8e4b9281df", + "1de84bd070a82b37e6fac25d19ae604e", + "65999ab8f2bc7841de8172468bf23ab6", + [ + "versions.yml:md5,52272b464e62cb9e5d41622ea76cd070", + "versions.yml:md5,6576546ea5cf2a0cb6438b4c6758fd1e", + "versions.yml:md5,799b136592e7434ff7eb9ddcc70e7e41", + "versions.yml:md5,8a4b29c3089d4b00cfe6c5c39b88d1ab", + "versions.yml:md5,b9424dde80b33e84164cc956a14aa459" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-29T10:47:08.425030144" + }, + "[bam, bai], fasta, fai, bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false, + "project": "project", + "sex": 1 + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "project" + }, + "project.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false, + "project": "project", + "sex": 1 + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false, + "project": "project", + "sex": 1 + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,52272b464e62cb9e5d41622ea76cd070", + "versions.yml:md5,6576546ea5cf2a0cb6438b4c6758fd1e", + "versions.yml:md5,799b136592e7434ff7eb9ddcc70e7e41", + "versions.yml:md5,8a4b29c3089d4b00cfe6c5c39b88d1ab", + "versions.yml:md5,b9424dde80b33e84164cc956a14aa459" + ], + "project_vcf": [ + [ + { + "id": "project" + }, + "project.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "sample_bai": [ + [ + { + "id": "test", + "single_end": false, + "project": "project", + "sex": 1 + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sample_bam": [ + [ + { + "id": "test", + "single_end": false, + "project": "project", + "sex": 1 + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sample_vcf": [ + [ + { + "id": "test", + "single_end": false, + "project": "project", + "sex": 1 + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,52272b464e62cb9e5d41622ea76cd070", + "versions.yml:md5,6576546ea5cf2a0cb6438b4c6758fd1e", + "versions.yml:md5,799b136592e7434ff7eb9ddcc70e7e41", + "versions.yml:md5,8a4b29c3089d4b00cfe6c5c39b88d1ab", + "versions.yml:md5,b9424dde80b33e84164cc956a14aa459" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T11:00:04.845039812" + } +} \ No newline at end of file diff --git a/subworkflows/local/call_repeat_expansions/tests/nextflow.config b/subworkflows/local/call_repeat_expansions/tests/nextflow.config new file mode 100644 index 00000000..d056805e --- /dev/null +++ b/subworkflows/local/call_repeat_expansions/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'CALL_REPEAT_EXPANSIONS:BCFTOOLS_MERGE' { + ext.args = '--output-type z --force-single --no-version' + } + withName: 'CALL_REPEAT_EXPANSIONS:BCFTOOLS_SORT_TRGT' { + ext.args = '--output-type z --write-index=tbi' + } + withName: 'CALL_REPEAT_EXPANSIONS:TRGT' { + ext.args = { "--sample-name ${meta.id}" } + } +} diff --git a/tests/samplesheet.nf.test.snap b/tests/samplesheet.nf.test.snap index 08c88aaf..838d5318 100644 --- a/tests/samplesheet.nf.test.snap +++ b/tests/samplesheet.nf.test.snap @@ -1,7 +1,7 @@ { "test profile": { "content": [ - 103, + 104, { "ADD_FOUND_IN_TAG": { "bcftools": 1.2, @@ -530,15 +530,15 @@ ], [ "HG002_Revio_repeat_expansion_stranger.vcf.gz", - "492a5d1a0f1656e1f7fc1b97c922cab4" + "73c6c1d07ff96ef44569b5534aa90174" ], [ "test.vcf.gz", - "4ab22ba6012b9f579f1831248bb95da" + "1de84bd070a82b37e6fac25d19ae604e" ], [ "HG002_Revio_sorted.vcf.gz", - "43b909ba9628b92f5062be6c413e560d" + "502b7befd528ce70b8269a8e4b9281df" ] ] ], @@ -546,6 +546,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-30T08:33:56.258093996" + "timestamp": "2024-10-30T10:27:37.120618269" } } \ No newline at end of file diff --git a/tests/samplesheet_multisample_bam.nf.test.snap b/tests/samplesheet_multisample_bam.nf.test.snap index 4ca6e6b0..75143c8d 100644 --- a/tests/samplesheet_multisample_bam.nf.test.snap +++ b/tests/samplesheet_multisample_bam.nf.test.snap @@ -1,7 +1,7 @@ { "samplesheet_multisample_bam | --phaser hiphase": { "content": [ - 148, + 150, { "ADD_FOUND_IN_TAG": { "bcftools": 1.2, @@ -722,23 +722,23 @@ ], [ "HG002_Revio_A_repeat_expansion_stranger.vcf.gz", - "492a5d1a0f1656e1f7fc1b97c922cab4" + "73c6c1d07ff96ef44569b5534aa90174" ], [ "HG002_Revio_B_repeat_expansion_stranger.vcf.gz", - "492a5d1a0f1656e1f7fc1b97c922cab4" + "73c6c1d07ff96ef44569b5534aa90174" ], [ "test.vcf.gz", - "7fcabf1cb86be8e3e5ad35d91e8897f9" + "9f54e9a4fed49a36e1ca3d4cbac049fe" ], [ "HG002_Revio_A_sorted.vcf.gz", - "43b909ba9628b92f5062be6c413e560d" + "502b7befd528ce70b8269a8e4b9281df" ], [ "HG002_Revio_B_sorted.vcf.gz", - "43b909ba9628b92f5062be6c413e560d" + "502b7befd528ce70b8269a8e4b9281df" ] ] ], @@ -746,6 +746,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-30T08:35:33.241655745" + "timestamp": "2024-10-30T10:29:12.353783346" } } \ No newline at end of file diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 345859d3..45587e62 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -483,7 +483,7 @@ workflow NALLO { // Annotate repeat expansions with stranger // if(!params.skip_repeat_annotation) { - ANNOTATE_REPEAT_EXPANSIONS ( ch_variant_catalog, CALL_REPEAT_EXPANSIONS.out.vcf ) + ANNOTATE_REPEAT_EXPANSIONS ( ch_variant_catalog, CALL_REPEAT_EXPANSIONS.out.sample_vcf ) ch_versions = ch_versions.mix(ANNOTATE_REPEAT_EXPANSIONS.out.versions) } }