From 99cbaae823e5a92e4e3b6e6b8b557ef780dcc561 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 11:21:44 +0200 Subject: [PATCH 01/26] move bam conversion to cram_prepare_samtools_bedtools --- .github/workflows/ci.yml | 2 +- conf/modules.config | 12 +- lib/GlobalVariables.groovy | 6 +- modules/nf-core/elprep/filter/environment.yml | 5 + modules/nf-core/elprep/filter/main.nf | 123 ++++++++++ modules/nf-core/elprep/filter/meta.yml | 212 ++++++++++++++++++ .../nf-core/elprep/filter/tests/main.nf.test | 120 ++++++++++ .../elprep/filter/tests/main.nf.test.snap | 122 ++++++++++ .../elprep/filter/tests/nextflow.config | 5 + nf-test.config | 3 +- .../main.nf | 42 +--- .../cram_prepare_samtools_bedtools/main.nf | 19 ++ .../local/input_split_bedtools/main.nf | 14 +- tests/nextflow.config | 8 + .../main.nf.test | 111 ++++----- .../main.nf.test.snap | 14 +- .../main.nf.test | 33 ++- .../main.nf.test.snap | 48 +++- workflows/germline.nf | 33 ++- 19 files changed, 788 insertions(+), 144 deletions(-) create mode 100644 modules/nf-core/elprep/filter/environment.yml create mode 100644 modules/nf-core/elprep/filter/main.nf create mode 100644 modules/nf-core/elprep/filter/meta.yml create mode 100644 modules/nf-core/elprep/filter/tests/main.nf.test create mode 100644 modules/nf-core/elprep/filter/tests/main.nf.test.snap create mode 100644 modules/nf-core/elprep/filter/tests/nextflow.config rename subworkflows/local/{cram_call_vardictjava => bam_call_vardictjava}/main.nf (72%) rename tests/subworkflows/local/{cram_call_vardictjava => bam_call_vardictjava}/main.nf.test (58%) rename tests/subworkflows/local/{cram_call_vardictjava => bam_call_vardictjava}/main.nf.test.snap (79%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e2be3da..2ed581fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,7 @@ jobs: - "pipeline_variations2" - "pipeline_gvcfs" - "cram_call_genotype_gatk4" - - "cram_call_vardictjava" + - "bam_call_vardictjava" - "cram_prepare_samtools_bedtools" - "input_split_bedtools" - "vcf_annotation" diff --git a/conf/modules.config b/conf/modules.config index e8406ec1..65276d4d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -274,7 +274,7 @@ process { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: "^.*CRAM_CALL_VARDICTJAVA:VARDICTJAVA\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:VARDICTJAVA\$" { time = { 16.h * task.attempt } ext.prefix = {"${meta.id}"} ext.args = { @@ -291,7 +291,7 @@ process { } } - withName: "^.*CRAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { ext.args = '--allow-overlaps --output-type z' ext.prefix = enableOutput("original") ? final_prefix : {"${meta.id}.concat"} publishDir = [ @@ -303,7 +303,7 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" { ext.prefix = enableOutput("original") ? final_prefix : {"${meta.id}.vcfanno"} publishDir = [ overwrite: true, @@ -314,12 +314,12 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_1\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_1\$" { ext.prefix = { "${meta.id}.filtered1" } ext.args = "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z" } - withName: "^.*CRAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_2\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_2\$" { ext.args = "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} publishDir = [ @@ -331,7 +331,7 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_VARDICTJAVA:TABIX_TABIX\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_TABIX\$" { publishDir = [ overwrite: true, enabled: enableOutput("filter") || enableOutput("original"), diff --git a/lib/GlobalVariables.groovy b/lib/GlobalVariables.groovy index 72d5754b..9d868d23 100644 --- a/lib/GlobalVariables.groovy +++ b/lib/GlobalVariables.groovy @@ -4,9 +4,11 @@ import java.nio.file.Path class GlobalVariables { // The available callers - public static List availableCallers = ["haplotypecaller", "vardict"] + public static List availableCallers = ["haplotypecaller", "vardict", "elprep"] - public static List gvcfCallers = ["haplotypecaller"] + public static List gvcfCallers = ["haplotypecaller", "elprep"] + + public static List bamCallers = ["elprep", "vardict"] public static Map pedFiles = [:] diff --git a/modules/nf-core/elprep/filter/environment.yml b/modules/nf-core/elprep/filter/environment.yml new file mode 100644 index 00000000..38dd4f47 --- /dev/null +++ b/modules/nf-core/elprep/filter/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::elprep=5.1.3 diff --git a/modules/nf-core/elprep/filter/main.nf b/modules/nf-core/elprep/filter/main.nf new file mode 100644 index 00000000..6727106a --- /dev/null +++ b/modules/nf-core/elprep/filter/main.nf @@ -0,0 +1,123 @@ +process ELPREP_FILTER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/elprep:5.1.3--he881be0_1': + 'biocontainers/elprep:5.1.3--he881be0_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(target_regions_bed), path(filter_regions_bed), path(intermediate_bqsr_tables), path(recall_file) + tuple val(meta2), path(reference_sequences) + tuple val(meta3), path(reference_elfasta) + tuple val(meta4), path(known_sites_elsites) + val(run_haplotypecaller) + val(run_bqsr) + val(bqsr_tables_only) + val(get_activity_profile) + val(get_assembly_regions) + + + output: + tuple val(meta), path("*.{bam,sam}") , emit: bam + tuple val(meta), path("*.log") , emit: logs + tuple val(meta), path("*.metrics.txt") , optional: true, emit: metrics + tuple val(meta), path("*.recall") , optional: true, emit: recall + tuple val(meta), path("*.vcf.gz") , optional: true, emit: gvcf + tuple val(meta), path("*.table") , optional: true, emit: table + tuple val(meta), path("*.activity_profile.igv") , optional: true, emit: activity_profile + tuple val(meta), path("*.assembly_regions.igv") , optional: true, emit: assembly_regions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--output-type sam") ? "sam" : "bam" + + // filter args + def reference_sequences_cmd = reference_sequences ? "--replace-reference-sequences ${reference_sequences}" : "" + def filter_regions_cmd = filter_regions_bed ? "--filter-non-overlapping-reads ${filter_regions_bed}" : "" + + // markdup args + def markdup_cmd = args.contains("--mark-duplicates") ? "--mark-optical-duplicates ${prefix}.metrics.txt": "" + + // variant calling args + def haplotyper_cmd = run_haplotypecaller ? "--haplotypecaller ${prefix}.g.vcf.gz": "" + + def fasta_cmd = reference_elfasta ? "--reference ${reference_elfasta}": "" + def known_sites_cmd = known_sites_elsites ? "--known-sites ${known_sites_elsites}": "" + def target_regions_cmd = target_regions_bed ? "--target-regions ${target_regions_bed}": "" + + // bqsr args + def bqsr_cmd = run_bqsr ? "--bqsr ${prefix}.recall": "" + def bqsr_tables_only_cmd = bqsr_tables_only ? "--bqsr-tables-only ${prefix}.table": "" + + def intermediate_bqsr_cmd = intermediate_bqsr_tables ? "--bqsr-apply .": "" + def input_recall_cmd = recall_file ? "--recal-file $recall_file" : "" + // misc + def activity_profile_cmd = get_activity_profile ? "--activity-profile ${prefix}.activity_profile.igv": "" + def assembly_regions_cmd = get_assembly_regions ? "--assembly-regions ${prefix}.assembly_regions.igv": "" + + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + elprep filter ${bam} ${prefix}.${suffix} \\ + ${reference_sequences_cmd} \\ + ${filter_regions_cmd} \\ + ${markdup_cmd} \\ + ${haplotyper_cmd} \\ + ${fasta_cmd} \\ + ${known_sites_cmd} \\ + ${target_regions_cmd} \\ + ${bqsr_cmd} \\ + ${bqsr_tables_only_cmd} \\ + ${intermediate_bqsr_cmd} \\ + ${input_recall_cmd} \\ + ${activity_profile_cmd} \\ + ${assembly_regions_cmd} \\ + --nr-of-threads ${task.cpus} \\ + --log-path ./ \\ + $args + + mv logs/elprep/*.log . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--output-type sam") ? "sam" : "bam" + def timestamp = "${java.time.OffsetDateTime.now().format(java.time.format.DateTimeFormatter.ISO_DATE_TIME)}" + def markdup_cmd = args.contains("--mark-duplicates") ? "touch ${prefix}.metrics.txt": "" + def bqsr_cmd = run_bqsr ? "touch ${prefix}.recall": "" + def haplotyper_cmd = run_haplotypecaller ? "echo | gzip > ${prefix}.g.vcf.gz": "" + def bqsr_tables_only_cmd = bqsr_tables_only ? "echo | gzip > ${prefix}.table": "" + def activity_profile_cmd = get_activity_profile ? "touch ${prefix}.activity_profile.igv": "" + def assembly_regions_cmd = get_assembly_regions ? "touch ${prefix}.assembly_regions.igv": "" + + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + touch ${prefix}.${suffix} + touch elprep-${timestamp}.log + ${markdup_cmd} + ${bqsr_cmd} + ${haplotyper_cmd} + ${bqsr_tables_only_cmd} + ${activity_profile_cmd} + ${assembly_regions_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/elprep/filter/meta.yml b/modules/nf-core/elprep/filter/meta.yml new file mode 100644 index 00000000..2af3b8b1 --- /dev/null +++ b/modules/nf-core/elprep/filter/meta.yml @@ -0,0 +1,212 @@ +name: "elprep_filter" +description: "Filter, sort and markdup sam/bam files, with optional BQSR and variant + calling." +keywords: + - sort + - bam + - sam + - filter + - variant calling +tools: + - "elprep": + description: "elPrep is a high-performance tool for preparing .sam/.bam files + for variant calling in sequencing pipelines. It can be used as a drop-in replacement + for SAMtools/Picard/GATK4." + homepage: "https://github.com/ExaScience/elprep" + documentation: "https://github.com/ExaScience/elprep" + tool_dev_url: "https://github.com/ExaScience/elprep" + doi: "10.1371/journal.pone.0244471" + licence: ["AGPL v3"] + identifier: biotools:elprep +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input SAM/BAM file + pattern: "*.{bam,sam}" + - bai: + type: file + description: Input BAM file index + pattern: "*.bai" + - target_regions_bed: + type: file + description: Optional BED file containing target regions for BQSR and variant + calling. + pattern: "*.bed" + - filter_regions_bed: + type: file + description: Optional BED file containing regions to filter. + pattern: "*.bed" + - intermediate_bqsr_tables: + type: file + description: Optional list of BQSR tables, used when parsing files created by + `elprep split` + pattern: "*.table" + - recall_file: + type: file + description: Recall file with intermediate results for bqsr + pattern: "*.recall" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference_sequences: + type: file + description: Optional SAM header to replace existing header. + pattern: "*.sam" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference_elfasta: + type: file + description: Elfasta file, required for BQSR and variant calling. + pattern: "*.elfasta" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - known_sites_elsites: + type: file + description: Optional elsites file containing known SNPs for BQSR. + pattern: "*.elsites" + - - run_haplotypecaller: + type: boolean + description: Run variant calling on the input files. Needed to generate gvcf + output. + - - run_bqsr: + type: boolean + description: Run BQSR on the input files. Needed to generate recall metrics. + - - bqsr_tables_only: + type: boolean + description: Write intermediate BQSR tables, used when parsing files created + by `elprep split`. + - - get_activity_profile: + type: boolean + description: Get the activity profile calculated by the haplotypecaller to the + given file in IGV format. + - - get_assembly_regions: + type: boolean + description: Get the assembly regions calculated by haplotypecaller to the speficied + file in IGV format. +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{bam,sam}" + - "*.{bam,sam}": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{bam,sam}" + - logs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "elprep-*.log" + - "*.log": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "elprep-*.log" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{metrics.txt}" + - "*.metrics.txt": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{metrics.txt}" + - recall: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{recall}" + - "*.recall": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{recall}" + - gvcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - "*.vcf.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{table}" + - "*.table": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{table}" + - activity_profile: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{activity_profile.igv}" + - "*.activity_profile.igv": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{activity_profile.igv}" + - assembly_regions: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{assembly_regions.igv}" + - "*.assembly_regions.igv": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{assembly_regions.igv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" +maintainers: + - "@matthdsm" diff --git a/modules/nf-core/elprep/filter/tests/main.nf.test b/modules/nf-core/elprep/filter/tests/main.nf.test new file mode 100644 index 00000000..84f6e16c --- /dev/null +++ b/modules/nf-core/elprep/filter/tests/main.nf.test @@ -0,0 +1,120 @@ + +nextflow_process { + + name "Test Process ELPREP_FILTER" + script "../main.nf" + process "ELPREP_FILTER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "elprep" + tag "elprep/filter" + + test("test-elprep-filter") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + [], + [], + [] + ]) // meta, bam, bai, target_regions, bqsr_table, recall + input[1] = [[],[]] // reference sequences + input[2] = [ + [ id:'elfasta' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.elfasta', checkIfExists: true) + ] // meta2, reference_elfasta + input[3] = [ + [ id: 'sites' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites', checkIfExists: true) + ] // elsites + input[4] = true // haplotypecaller + input[5] = true // bqsr + input[6] = false // bqsr_tables_only + input[7] = true // get_activity_profile + input[8] = true // get_assembly_regions + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.logs }, // name is unstable + { assert snapshot( + process.out.bam.collect { [it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}"] }, + process.out.metrics.collect { [it[0], file(it[1]).readLines()[10..20]] }, + process.out.recall, + process.out.gvcf.collect { [ it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}" ] }, + process.out.table, + process.out.activity_profile, + process.out.assembly_regions, + process.out.versions + ).match() + } + ) + } + } + + test("test-elprep-filter-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ + [ id:'ref_seq'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] // reference sequences + input[2] = [ + [ id:'elfasta' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.elfasta', checkIfExists: true) + ] // meta2, reference_elfasta + input[3] = [ + [ id: 'sites' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites', checkIfExists: true) + ] // elsites + input[4] = true // haplotypecaller + input[5] = false // bqsr + input[6] = false // bqsr_tables_only + input[7] = true // get_activity_profile + input[8] = true // get_assembly_regions + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.logs }, // name is unstable + { assert snapshot( + process.out.bam, + process.out.metrics, + process.out.recall, + process.out.gvcf, + process.out.table, + process.out.activity_profile, + process.out.assembly_regions, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/elprep/filter/tests/main.nf.test.snap b/modules/nf-core/elprep/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..9112fe30 --- /dev/null +++ b/modules/nf-core/elprep/filter/tests/main.nf.test.snap @@ -0,0 +1,122 @@ +{ + "test-elprep-filter": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam,readsMD5:463ac3b905fbf4ddf113a94dbfa8d69f" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.recall:md5,9a7921cc49a7a3f6c20e0278eaf3f235" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz,variantsMD5:b74f219f1f3ca2e59d6edfabf503a6a9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.activity_profile.igv:md5,c4b77c1bebcffd7822cafb8b90f70cde" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly_regions.igv:md5,7ec2070b4d4af26532cffbc1c465ba93" + ] + ], + [ + "versions.yml:md5,8193703d0cedd662b76ea48940dac55d" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-22T11:05:45.927224502" + }, + "test-elprep-filter-stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.activity_profile.igv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly_regions.igv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,8193703d0cedd662b76ea48940dac55d" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-22T10:45:09.343805413" + } +} \ No newline at end of file diff --git a/modules/nf-core/elprep/filter/tests/nextflow.config b/modules/nf-core/elprep/filter/tests/nextflow.config new file mode 100644 index 00000000..bcb2dae0 --- /dev/null +++ b/modules/nf-core/elprep/filter/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: ELPREP_FILTER { + ext.args = "--reference-confidence GVCF" + } +} diff --git a/nf-test.config b/nf-test.config index 6d58c41d..5c6eea4f 100644 --- a/nf-test.config +++ b/nf-test.config @@ -6,7 +6,8 @@ config { profile "nf_test,docker" plugins { - load "nft-bam@0.1.1" + load "nft-bam@0.4.0" + load "nft-vcf@1.0.7" } } diff --git a/subworkflows/local/cram_call_vardictjava/main.nf b/subworkflows/local/bam_call_vardictjava/main.nf similarity index 72% rename from subworkflows/local/cram_call_vardictjava/main.nf rename to subworkflows/local/bam_call_vardictjava/main.nf index 91ab4fef..4ba412ce 100644 --- a/subworkflows/local/cram_call_vardictjava/main.nf +++ b/subworkflows/local/bam_call_vardictjava/main.nf @@ -1,4 +1,3 @@ -include { SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main' include { VARDICTJAVA } from '../../../modules/nf-core/vardictjava/main' include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' include { BCFTOOLS_REHEADER } from '../../../modules/nf-core/bcftools/reheader/main' @@ -9,10 +8,9 @@ include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcf include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' -workflow CRAM_CALL_VARDICTJAVA { +workflow BAM_CALL_VARDICTJAVA { take: - ch_crams // channel: [mandatory] [ val(meta), path(cram), path(crai) ] => sample CRAM files and their indexes - ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes + ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai), path(bed) ] => sample CRAM files and their indexes ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index ch_dbsnp // channel: [optional] [ path(vcf) ] => the dbnsp vcf file @@ -22,39 +20,11 @@ workflow CRAM_CALL_VARDICTJAVA { main: def ch_versions = Channel.empty() - def ch_cram_bam = ch_crams - .map { meta, cram, crai -> - def new_meta = meta + [caller:"vardict"] - [ new_meta, cram, crai ] - } - .branch { _meta, cram, _crai -> - bam: cram.extension == "bam" - cram: cram.extension == "cram" - } - - SAMTOOLS_CONVERT( - ch_cram_bam.cram, - ch_fasta, - ch_fai - ) - ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) - - def ch_vardict_crams = ch_input - .map { meta, cram, crai, bed -> - def new_meta = meta - meta.subMap("split_count") + [caller:"vardict", id:meta.sample] - [ new_meta, cram, crai, bed, meta.split_count ] - } - - def ch_vardict_input = ch_cram_bam.bam - .mix(SAMTOOLS_CONVERT.out.bam.join(SAMTOOLS_CONVERT.out.bai, failOnMismatch:true, failOnDuplicate:true)) - .combine(ch_vardict_crams, by:0) - .map { meta, bam, bai, _cram, _crai, bed, split_count -> - def new_meta = meta + [id:bed.baseName, split_count:split_count] - [ new_meta, bam, bai, bed ] - } - VARDICTJAVA( - ch_vardict_input, + ch_input.map { meta, bam, bai, bed -> + def new_meta = meta + [caller:'vardict'] + [ new_meta, bam, bai, bed ] + }, ch_fasta, ch_fai ) diff --git a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf index dbce8201..320008f4 100644 --- a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf +++ b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf @@ -8,6 +8,7 @@ include { FILTER_BEDS } from '../../../modules/local/filte include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main' include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' include { TABIX_BGZIP as UNZIP_ROI } from '../../../modules/nf-core/tabix/bgzip/main' include { BEDTOOLS_INTERSECT } from '../../../modules/nf-core/bedtools/intersect/main' @@ -20,6 +21,7 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { ch_fasta // channel: [mandatory] [ path(fasta) ] => fasta reference ch_fai // channel: [mandatory] [ path(fai) ] => fasta reference index ch_default_roi // channel: [optional] [ path(roi) ] => bed containing regions of interest to be used as default + output_bam // boolean: Also output BAM files main: @@ -71,6 +73,22 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { .join(SAMTOOLS_INDEX.out.crai, failOnDuplicate: true, failOnMismatch: true) .mix(ch_merged_crams.indexed) + // + // Optionally convert the CRAM files to BAM + // + + def ch_ready_bams = Channel.empty() + if(output_bam) { + SAMTOOLS_CONVERT( + ch_ready_crams, + ch_fasta, + ch_fai + ) + ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) + + ch_ready_bams = SAMTOOLS_CONVERT.out.bam.join(SAMTOOLS_CONVERT.out.bai, failOnDuplicate:true, failOnMismatch:true) + } + // // Preprocess the ROI BED files => sort and merge overlapping regions // @@ -168,6 +186,7 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { emit: ready_crams = ch_ready_crams // [ val(meta), path(cram), path(crai) ] + ready_bams = ch_ready_bams // [ val(meta), path(bam), path(bai) ] ready_beds = ch_ready_beds // [ val(meta), path(bed) ] versions = ch_versions // [ path(versions) ] reports = ch_reports // [ path(reports) ] diff --git a/subworkflows/local/input_split_bedtools/main.nf b/subworkflows/local/input_split_bedtools/main.nf index 20977cb0..dd332ab2 100644 --- a/subworkflows/local/input_split_bedtools/main.nf +++ b/subworkflows/local/input_split_bedtools/main.nf @@ -20,17 +20,21 @@ workflow INPUT_SPLIT_BEDTOOLS { def ch_split_output = ch_inputs .join(BEDTOOLS_SPLIT.out.beds, failOnDuplicate: true, failOnMismatch: true) - .map { meta, input, input_index, beds -> + .map { row -> + def meta = row[0] + def beds = row[-1] // Determine the amount of BED files per sample def bed_is_list = beds instanceof ArrayList def new_meta = meta + [split_count: bed_is_list ? beds.size() : 1] - [ new_meta, input, input_index, bed_is_list ? beds : [beds] ] + def bed_output = bed_is_list ? [beds] : [[beds]] + return [new_meta] + row[1..-2] + bed_output } .transpose(by:3) // Create one channel entry for each BED file per sample - .map { meta, input, input_index, bed -> + .map { row -> // Set the base name of the BED file as the ID (this will look like sample_id.xxxx, where xxxx are numbers) - def new_meta = meta + [id:bed.baseName] - [ new_meta, input, input_index, bed ] + def new_row = row + new_row[0] = row[0] + [id:row[-1].baseName] + return new_row } emit: diff --git a/tests/nextflow.config b/tests/nextflow.config index 59ffd5da..1643f647 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -11,6 +11,7 @@ params { // References for test data fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" + elfasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.elfasta" fai = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta.fai" dict = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.dict" sdf = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz" @@ -31,6 +32,13 @@ params { cram3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram" crai3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram.crai" + bam1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24143.bam" + bai1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24143.bam.bai" + bam2 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24149.bam" + bai2 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24149.bam.bai" + bam3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24385.bam" + bai3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24385.bam.bai" + vcf1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz" tbi1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz.tbi" vcf2 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24149.vcf.gz" diff --git a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test similarity index 58% rename from tests/subworkflows/local/cram_call_vardictjava/main.nf.test rename to tests/subworkflows/local/bam_call_vardictjava/main.nf.test index 05798304..6615e3ab 100644 --- a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test +++ b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test @@ -1,16 +1,16 @@ nextflow_workflow { - name "Test Workflow CRAM_CALL_VARDICTJAVA" - script "subworkflows/local/cram_call_vardictjava/main.nf" - workflow "CRAM_CALL_VARDICTJAVA" + name "Test Workflow BAM_CALL_VARDICTJAVA" + script "subworkflows/local/bam_call_vardictjava/main.nf" + workflow "BAM_CALL_VARDICTJAVA" tag "subworkflows" tag "subworkflows_local" - tag "cram_call_vardictjava" + tag "bam_call_vardictjava" tag "vcf_concat_bcftools" tag "vcf_filter_bcftools" - test("cram_call_vardictjava - default") { + test("bam_call_vardictjava - default") { when { @@ -20,37 +20,32 @@ nextflow_workflow { workflow { """ input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true) - ]) - input[1] = Channel.of([ [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split1, checkIfExists:true) ],[ [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split2, checkIfExists:true) ],[ [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split3, checkIfExists:true) ]) - input[2] = Channel.value([ + input[1] = Channel.value([ [id:"fasta"], file(params.fasta, checkIfExists:true) ]) - input[3] = Channel.value([ + input[2] = Channel.value([ [id:"fai"], file(params.fai, checkIfExists:true) ]) + input[3] = [[],[]] input[4] = [[],[]] - input[5] = [[],[]] - input[6] = false + input[5] = false """ } } @@ -59,14 +54,14 @@ nextflow_workflow { assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } } + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } ).match("default") } ) } } - test("cram_call_vardictjava - filter") { + test("bam_call_vardictjava - filter") { when { @@ -77,37 +72,32 @@ nextflow_workflow { workflow { """ input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true) - ]) - input[1] = Channel.of([ [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split1, checkIfExists:true) ],[ [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split2, checkIfExists:true) ],[ [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split3, checkIfExists:true) ]) - input[2] = Channel.value([ + input[1] = Channel.value([ [id:"fasta"], file(params.fasta, checkIfExists:true) ]) - input[3] = Channel.value([ + input[2] = Channel.value([ [id:"fai"], file(params.fai, checkIfExists:true) ]) + input[3] = [[],[]] input[4] = [[],[]] - input[5] = [[],[]] - input[6] = true + input[5] = true """ } } @@ -116,14 +106,14 @@ nextflow_workflow { assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } } + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } ).match("filter") } ) } } - test("cram_call_vardictjava - family") { + test("bam_call_vardictjava - family") { // The family should not be merged here when { @@ -133,56 +123,47 @@ nextflow_workflow { workflow { """ input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149"], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149"], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true) - ]) - input[1] = Channel.of([ [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split1, checkIfExists:true) ],[ [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split2, checkIfExists:true) ],[ [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), file(params.split3, checkIfExists:true) ],[ [id:"NA24149.00001", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true), + file(params.bam2, checkIfExists:true), + file(params.bai2, checkIfExists:true), file(params.split1, checkIfExists:true) ],[ [id:"NA24149.00002", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true), + file(params.bam2, checkIfExists:true), + file(params.bai2, checkIfExists:true), file(params.split2, checkIfExists:true) ],[ [id:"NA24149.00003", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true), + file(params.bam2, checkIfExists:true), + file(params.bai2, checkIfExists:true), file(params.split3, checkIfExists:true) ]) - input[2] = Channel.value([ + input[1] = Channel.value([ [id:"fasta"], file(params.fasta, checkIfExists:true) ]) - input[3] = Channel.value([ + input[2] = Channel.value([ [id:"fai"], file(params.fai, checkIfExists:true) ]) + input[3] = [[],[]] input[4] = [[],[]] - input[5] = [[],[]] - input[6] = false + input[5] = false """ } } @@ -191,7 +172,7 @@ nextflow_workflow { assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } } + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } ).match("family") } ) } diff --git a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap similarity index 79% rename from tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap rename to tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap index a476cb3c..8ae2d467 100644 --- a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap +++ b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap @@ -10,7 +10,7 @@ "family_samples": "NA24143", "caller": "vardict" }, - "NA24143.vardict.vcf.gz", + "NA24143.vardict.vcf.gz,variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", "NA24143.vardict.vcf.gz.tbi" ] ] @@ -19,7 +19,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-05T17:38:01.461442987" + "timestamp": "2024-10-23T11:17:08.827619348" }, "default": { "content": [ @@ -32,7 +32,7 @@ "family_samples": "NA24143", "caller": "vardict" }, - "NA24143.vardict.vcf.gz", + "NA24143.vardict.vcf.gz,variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", "NA24143.vardict.vcf.gz.tbi" ] ] @@ -41,7 +41,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-05T17:37:40.178107639" + "timestamp": "2024-10-23T11:16:29.818590197" }, "family": { "content": [ @@ -54,7 +54,7 @@ "family_samples": "NA24143", "caller": "vardict" }, - "NA24143.vardict.vcf.gz", + "NA24143.vardict.vcf.gz,variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", "NA24143.vardict.vcf.gz.tbi" ], [ @@ -65,7 +65,7 @@ "family_samples": "NA24149", "caller": "vardict" }, - "NA24149.vardict.vcf.gz", + "NA24149.vardict.vcf.gz,variantsMD5:37c24a3165a79df0b9744e80a9255f83", "NA24149.vardict.vcf.gz.tbi" ] ] @@ -74,6 +74,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-08T16:58:11.678281371" + "timestamp": "2024-10-23T11:17:59.686967502" } } \ No newline at end of file diff --git a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test index 0a69a066..5ca5ffb2 100644 --- a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test +++ b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test @@ -31,15 +31,22 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = [] + input[5] = false """ } } then { + def fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.ready_crams.collect { it.collect { it instanceof Map ? it : file(it).name } }, + workflow.out.ready_crams.collect { + [ it[0], it[1], file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WGS") } @@ -71,15 +78,22 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = Channel.fromPath(params.bed, checkIfExists:true) + input[5] = true """ } } then { + def fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.ready_crams.collect { it.collect { it instanceof Map ? it : file(it).name } }, + workflow.out.ready_crams.collect { + [ it[0], it[1], file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WES common ROI") } @@ -111,15 +125,22 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = [] + input[5] = false """ } } then { + def fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.ready_crams.collect { it.collect { it instanceof Map ? it : file(it).name } }, + workflow.out.ready_crams.collect { + [ it[0], it[1], file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WES") } @@ -158,6 +179,7 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = [] + input[5] = true """ } } @@ -168,7 +190,10 @@ nextflow_workflow { { assert workflow.success }, { assert snapshot( workflow.out.ready_crams.collect { - [ it[0], cram(it[1], fasta).reads.size(), file(it[2]).name ] + [ it[0], "${file(it[1]).name},readsMD5:${cram(it[1], fasta).getReadsMD5()}", file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] }, workflow.out.ready_beds, workflow.out.reports diff --git a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap index cbd7535e..8fabe768 100644 --- a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap +++ b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap @@ -10,10 +10,23 @@ "family_samples": "NA24143", "duplicate_count": 2 }, - 798258, + "NA24143.cram,readsMD5:be28f434d6f7bcfa398488a6611d89c1", "NA24143.cram.crai" ] ], + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "duplicate_count": 2 + }, + "NA24143.bam,readsMD5:be28f434d6f7bcfa398488a6611d89c1", + "NA24143.bam.bai" + ] + ], [ [ { @@ -34,7 +47,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-23T11:46:20.577603661" + "timestamp": "2024-10-23T10:19:20.53195135" }, "default - WGS": { "content": [ @@ -47,9 +60,12 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.cram", + "/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram", "NA24143.cram.crai" ] + ], + [ + ], [ [ @@ -71,7 +87,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-23T14:56:11.141634788" + "timestamp": "2024-10-23T10:17:48.433466279" }, "default - WES": { "content": [ @@ -84,9 +100,12 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.cram", + "/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram", "NA24143.cram.crai" ] + ], + [ + ], [ [ @@ -108,7 +127,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-06T14:33:15.988619426" + "timestamp": "2024-10-23T10:18:36.993186258" }, "default - WES common ROI": { "content": [ @@ -121,10 +140,23 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.cram", + "/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram", "NA24143.cram.crai" ] ], + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "duplicate_count": 1 + }, + "NA24143.bam,readsMD5:77afffb023e537869c5c6ebf31187ded", + "NA24143.bam.bai" + ] + ], [ [ { @@ -145,6 +177,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-06T14:32:49.756585296" + "timestamp": "2024-10-23T10:18:13.865281894" } } \ No newline at end of file diff --git a/workflows/germline.nf b/workflows/germline.nf index f7c91c18..8b56079a 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -18,7 +18,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_ include { CRAM_PREPARE_SAMTOOLS_BEDTOOLS } from '../subworkflows/local/cram_prepare_samtools_bedtools/main' include { INPUT_SPLIT_BEDTOOLS } from '../subworkflows/local/input_split_bedtools/main' include { CRAM_CALL_GENOTYPE_GATK4 } from '../subworkflows/local/cram_call_genotype_gatk4/main' -include { CRAM_CALL_VARDICTJAVA } from '../subworkflows/local/cram_call_vardictjava/main' +include { BAM_CALL_VARDICTJAVA } from '../subworkflows/local/bam_call_vardictjava/main' include { VCF_EXTRACT_RELATE_SOMALIER } from '../subworkflows/local/vcf_extract_relate_somalier/main' include { VCF_PED_RTGTOOLS } from '../subworkflows/local/vcf_ped_rtgtools/main' include { VCF_ANNOTATION } from '../subworkflows/local/vcf_annotation/main' @@ -358,7 +358,8 @@ workflow GERMLINE { // // Run sample preparation // - + + def create_bam_files = callers.intersect(GlobalVariables.bamCallers).size() > 0 // Only create BAM files when needed CRAM_PREPARE_SAMTOOLS_BEDTOOLS( ch_input.cram.filter { meta, _cram, _crai -> // Filter out files that already have a called GVCF when only GVCF callers are used @@ -370,7 +371,8 @@ workflow GERMLINE { }, ch_fasta_ready, ch_fai_ready, - ch_default_roi + ch_default_roi, + create_bam_files ) ch_versions = ch_versions.mix(CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.versions) @@ -378,6 +380,14 @@ workflow GERMLINE { // Split the BED files // + def ch_split_cram_bam = Channel.empty() + if(create_bam_files) { + ch_split_cram_bam = CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + .join(CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_bams, failOnDuplicate:true, failOnMismatch:true) + } else { + ch_split_cram_bam = CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + } + INPUT_SPLIT_BEDTOOLS( CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_beds.map { meta, bed -> [meta, bed, scatter_count] @@ -386,6 +396,12 @@ workflow GERMLINE { ) ch_versions = ch_versions.mix(INPUT_SPLIT_BEDTOOLS.out.versions) + def ch_caller_inputs = INPUT_SPLIT_BEDTOOLS.out.split + .multiMap { meta, cram, crai, bam=[], bai=[], bed -> + cram: [meta, cram, crai, bed] + bam: [meta, bam, bai, bed] + } + def ch_calls = Channel.empty() if("haplotypecaller" in callers) { // @@ -393,7 +409,7 @@ workflow GERMLINE { // CRAM_CALL_GENOTYPE_GATK4( - INPUT_SPLIT_BEDTOOLS.out.split.filter { meta, _cram, _crai, _bed -> + ch_caller_inputs.cram.filter { meta, _cram, _crai, _bed -> // Filter out the entries that already have a GVCF meta.type == "cram" }, @@ -422,18 +438,17 @@ workflow GERMLINE { // Call variants with VarDict // - CRAM_CALL_VARDICTJAVA( - CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams, - INPUT_SPLIT_BEDTOOLS.out.split, + BAM_CALL_VARDICTJAVA( + ch_caller_inputs.bam, ch_fasta_ready, ch_fai_ready, ch_dbsnp_ready, ch_dbsnp_tbi_ready, filter ) - ch_versions = ch_versions.mix(CRAM_CALL_VARDICTJAVA.out.versions) + ch_versions = ch_versions.mix(BAM_CALL_VARDICTJAVA.out.versions) - ch_calls = ch_calls.mix(CRAM_CALL_VARDICTJAVA.out.vcfs) + ch_calls = ch_calls.mix(BAM_CALL_VARDICTJAVA.out.vcfs) } def ch_called_variants = ch_calls From ebb363095ada4fdcc4bc766fc7a50ed5614c13b9 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 12:01:58 +0200 Subject: [PATCH 02/26] separate dbnsp annotation from vardictjava subwf --- .../local/bam_call_vardictjava/main.nf | 35 +++----------- subworkflows/local/vcf_dbsnp_vcfanno/main.nf | 47 +++++++++++++++++++ 2 files changed, 53 insertions(+), 29 deletions(-) create mode 100644 subworkflows/local/vcf_dbsnp_vcfanno/main.nf diff --git a/subworkflows/local/bam_call_vardictjava/main.nf b/subworkflows/local/bam_call_vardictjava/main.nf index 4ba412ce..1308cf03 100644 --- a/subworkflows/local/bam_call_vardictjava/main.nf +++ b/subworkflows/local/bam_call_vardictjava/main.nf @@ -7,6 +7,7 @@ include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcf include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' +include { VCF_DBSNP_VCFANNO } from '../vcf_dbsnp_vcfanno/main' workflow BAM_CALL_VARDICTJAVA { take: @@ -38,28 +39,12 @@ workflow BAM_CALL_VARDICTJAVA { def ch_annotated = Channel.empty() if(!(ch_dbsnp instanceof List)) { - ch_dbsnp.map { _meta, dbsnp -> [ get_vcfanno_config(dbsnp) ] } - .collect() - .set { ch_vcfanno_toml } // Set needs to be used here due to some Nextflow bug - - ch_dbsnp.map { _meta, dbsnp -> dbsnp } - .combine(ch_dbsnp_tbi.map { _meta, tbi -> tbi }) - .collect() - .set { ch_vcfanno_resources } // Set needs to be used here due to some Nextflow bug - - VCFANNO( - VCF_CONCAT_BCFTOOLS.out.vcfs.map { meta, vcf -> [ meta, vcf, [], [] ] }, - ch_vcfanno_toml, - [], - ch_vcfanno_resources - ) - ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) - - TABIX_BGZIP( - VCFANNO.out.vcf + VCF_DBNSP_VCFANNO( + VCF_CONCAT_BCFTOOLS.out.vcfs, + ch_dbsnp, + ch_dbsnp_tbi ) - ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) - + ch_versions = ch_versions.mix(VCF_DBSNP_VCFANNO.out.versions) ch_annotated = TABIX_BGZIP.out.output } else { ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs @@ -95,11 +80,3 @@ workflow BAM_CALL_VARDICTJAVA { versions = ch_versions // channel: [ path(versions.yml) ] } - -def get_vcfanno_config(vcf) { - def old_toml = file("${projectDir}/assets/dbsnp.toml", checkIfExists: true) - old_toml.copyTo("${workDir}/vcfanno/dbsnp.toml") - def new_toml = file("${workDir}/vcfanno/dbsnp.toml") - new_toml.text = old_toml.text.replace("DBSNP_FILE", vcf.getName()) - return new_toml -} diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf new file mode 100644 index 00000000..409ac005 --- /dev/null +++ b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf @@ -0,0 +1,47 @@ +include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' + +workflow VCF_DBSNP_VCFANNO { + take: + ch_input // channel: [mandatory] [ val(meta), path(vcf), path(tbi), ] => VCF files to be annotated + ch_dbsnp // channel: [optional] [ path(vcf) ] => the dbnsp vcf file + ch_dbsnp_tbi // channel: [optional] [ path(tbi) ] => the dbsnp vcf index file + + main: + def ch_versions = Channel.empty() + + def ch_vcfanno_toml = ch_dbsnp.map { _meta, dbsnp -> [ get_vcfanno_config(dbsnp) ] } + .collect() + + def ch_vcfanno_resources = ch_dbsnp.map { _meta, dbsnp -> dbsnp } + .combine(ch_dbsnp_tbi.map { _meta, tbi -> tbi }) + .collect() + + VCFANNO( + ch_input.map { meta, vcf -> [ meta, vcf, [], [] ] }, + ch_vcfanno_toml, + [], + ch_vcfanno_resources + ) + ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) + + TABIX_BGZIP( + VCFANNO.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) + + def ch_vcfs = TABIX_BGZIP.out.output + + emit: + vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] + +} + +def get_vcfanno_config(vcf) { + def old_toml = file("${projectDir}/assets/dbsnp.toml", checkIfExists: true) + old_toml.copyTo("${workDir}/vcfanno/dbsnp.toml") + def new_toml = file("${workDir}/vcfanno/dbsnp.toml") + new_toml.text = old_toml.text.replace("DBSNP_FILE", vcf.getName()) + return new_toml +} From 6a35da1e21997ee5fc7d35ea19aaad89eac8649a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 14:19:40 +0200 Subject: [PATCH 03/26] move filtering to the main wf --- conf/modules.config | 62 ++++++++++--------- .../local/bam_call_vardictjava/main.nf | 22 +------ .../local/cram_call_genotype_gatk4/main.nf | 23 +------ workflows/germline.nf | 17 ++++- 4 files changed, 54 insertions(+), 70 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 65276d4d..51749247 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -251,23 +251,6 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:VCF_FILTER_BCFTOOLS:FILTER_1\$" { - ext.prefix = { "${meta.id}_filtered_snps" } - ext.args = {"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'"} - } - - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:VCF_FILTER_BCFTOOLS:FILTER_2\$" { - ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} - ext.args = {'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\''} - publishDir = [ - enabled: enableOutput("filter"), - overwrite: true, - path: final_output, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] // SAVE - } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VARDICTJAVA @@ -314,27 +297,50 @@ process { ] // SAVE } - withName: "^.*BAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_1\$" { - ext.prefix = { "${meta.id}.filtered1" } - ext.args = "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z" - } - - withName: "^.*BAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_2\$" { - ext.args = "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" - ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} + withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_TABIX\$" { publishDir = [ overwrite: true, - enabled: enableOutput("filter"), + enabled: enableOutput("filter") || enableOutput("original"), mode: params.publish_dir_mode, path: final_output, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] // SAVE } - withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_TABIX\$" { + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FILTER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_1\$" { + ext.prefix = { "${meta.id}.filtered1" } + ext.args = { + meta.caller == "vardict" ? + "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z": + meta.caller == "haplotypecaller" ? + "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": + meta.caller == "elprep" ? + "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": + "" + } + } + + withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_2\$" { + ext.args = { + meta.caller == "vardict" ? + "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" : + meta.caller == "haplotypecaller" ? + '--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : + meta.caller == "elprep" ? + '--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : + "" + + } + ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} publishDir = [ overwrite: true, - enabled: enableOutput("filter") || enableOutput("original"), + enabled: enableOutput("filter"), mode: params.publish_dir_mode, path: final_output, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/subworkflows/local/bam_call_vardictjava/main.nf b/subworkflows/local/bam_call_vardictjava/main.nf index 1308cf03..83c4d78a 100644 --- a/subworkflows/local/bam_call_vardictjava/main.nf +++ b/subworkflows/local/bam_call_vardictjava/main.nf @@ -33,7 +33,7 @@ workflow BAM_CALL_VARDICTJAVA { VCF_CONCAT_BCFTOOLS( VARDICTJAVA.out.vcf, - false + true ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) @@ -50,25 +50,7 @@ workflow BAM_CALL_VARDICTJAVA { ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs } - def ch_filter_output = Channel.empty() - if(filter) { - VCF_FILTER_BCFTOOLS( - ch_annotated, - false - ) - ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) - ch_filter_output = VCF_FILTER_BCFTOOLS.out.vcfs - } else { - ch_filter_output = ch_annotated - } - - TABIX_TABIX( - ch_filter_output - ) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - - def ch_vcfs = ch_filter_output - .join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + def ch_vcfs = ch_annotated .map { meta, vcf, tbi -> def new_meta = meta + [family_samples: meta.sample] [ new_meta, vcf, tbi ] diff --git a/subworkflows/local/cram_call_genotype_gatk4/main.nf b/subworkflows/local/cram_call_genotype_gatk4/main.nf index 910baed6..46062b9e 100644 --- a/subworkflows/local/cram_call_genotype_gatk4/main.nf +++ b/subworkflows/local/cram_call_genotype_gatk4/main.nf @@ -4,7 +4,6 @@ include { CRAM_CALL_GATK4 } from '../cram_call_gatk4/main' include { GVCF_JOINT_GENOTYPE_GATK4 } from '../gvcf_joint_genotype_gatk4/main' -include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' workflow CRAM_CALL_GENOTYPE_GATK4 { take: @@ -64,26 +63,10 @@ workflow CRAM_CALL_GENOTYPE_GATK4 { } - if(!only_call && !only_merge) { - - if(filter) { - VCF_FILTER_BCFTOOLS( - GVCF_JOINT_GENOTYPE_GATK4.out.vcfs, - true - ) - ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) - - ch_vcfs = VCF_FILTER_BCFTOOLS.out.vcfs - } else { - ch_vcfs = GVCF_JOINT_GENOTYPE_GATK4.out.vcfs - } - - } - emit: - vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] + vcfs = GVCF_JOINT_GENOTYPE_GATK4.out.vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - reports = ch_reports // channel: [ path(reports) ] - versions = ch_versions // channel: [ versions.yml ] + reports = ch_reports // channel: [ path(reports) ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/germline.nf b/workflows/germline.nf index 8b56079a..d5fb56b7 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -25,6 +25,7 @@ include { VCF_ANNOTATION } from '../subworkflows/local/vcf_an include { VCF_VALIDATE_SMALL_VARIANTS } from '../subworkflows/local/vcf_validate_small_variants/main' include { VCF_UPD_UPDIO } from '../subworkflows/local/vcf_upd_updio/main' include { VCF_ROH_AUTOMAP } from '../subworkflows/local/vcf_roh_automap/main' +include { VCF_FILTER_BCFTOOLS } from '../subworkflows/local/vcf_filter_bcftools/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -468,10 +469,22 @@ workflow GERMLINE { ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) ch_reports = ch_reports.mix(BCFTOOLS_STATS.out.stats.collect { _meta, report -> report }) + def ch_filtered_variants = Channel.empty() + if(filter) { + VCF_FILTER_BCFTOOLS( + ch_called_variants, + true + ) + ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) + ch_filtered_variants = VCF_FILTER_BCFTOOLS.out.vcfs + } else { + ch_filtered_variants = ch_called_variants + } + def ch_normalized_variants = Channel.empty() if(normalize) { BCFTOOLS_NORM( - ch_called_variants, + ch_filtered_variants, ch_fasta_ready, ) ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) @@ -484,7 +497,7 @@ workflow GERMLINE { ch_normalized_variants = BCFTOOLS_NORM.out.vcf .join(TABIX_NORMALIZE.out.tbi, failOnDuplicate:true, failOnMismatch:true) } else { - ch_normalized_variants = ch_called_variants + ch_normalized_variants = ch_filtered_variants } if(!only_merge && !only_call) { From 9350160024f22da4b19544fcbc4b18d990723f6a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 14:56:19 +0200 Subject: [PATCH 04/26] move genotyping out of the gatk4 subwf --- .../local/cram_call_genotype_gatk4/main.nf | 72 ------------------- workflows/germline.nf | 38 ++++++---- 2 files changed, 26 insertions(+), 84 deletions(-) delete mode 100644 subworkflows/local/cram_call_genotype_gatk4/main.nf diff --git a/subworkflows/local/cram_call_genotype_gatk4/main.nf b/subworkflows/local/cram_call_genotype_gatk4/main.nf deleted file mode 100644 index 46062b9e..00000000 --- a/subworkflows/local/cram_call_genotype_gatk4/main.nf +++ /dev/null @@ -1,72 +0,0 @@ -// -// Call and genotype variants with GATK4 tooling -// - -include { CRAM_CALL_GATK4 } from '../cram_call_gatk4/main' -include { GVCF_JOINT_GENOTYPE_GATK4 } from '../gvcf_joint_genotype_gatk4/main' - -workflow CRAM_CALL_GENOTYPE_GATK4 { - take: - ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files - ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => earlier called GVCFs with their indices - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary - ch_strtablefile // channel: [optional] [ path(strtablefile) ] => STR table file - ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants - ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF - dragstr // boolean: create a DragSTR model and run haplotypecaller with it - only_call // boolean: only run the variant calling - only_merge // boolean: run until the family merging - filter // boolean: filter the VCFs - scatter_count // integer: the amount of times the VCFs should be scattered - - main: - - def ch_versions = Channel.empty() - def ch_vcfs = Channel.empty() - def ch_reports = Channel.empty() - - CRAM_CALL_GATK4( - ch_input, - ch_fasta, - ch_fai, - ch_dict, - ch_strtablefile, - ch_dbsnp, - ch_dbsnp_tbi, - dragstr - ) - ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions) - ch_reports = ch_reports.mix(CRAM_CALL_GATK4.out.reports) - - def ch_gvcfs_ready = ch_gvcfs - .map { meta, gvcf, tbi -> - def new_meta = meta + [caller:"haplotypecaller"] - [ new_meta, gvcf, tbi ] - } - .mix(CRAM_CALL_GATK4.out.gvcfs) - - if(!only_call) { - - GVCF_JOINT_GENOTYPE_GATK4( - ch_gvcfs_ready, - ch_fasta, - ch_fai, - ch_dict, - ch_dbsnp, - ch_dbsnp_tbi, - only_merge, - scatter_count - ) - ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions) - - } - - emit: - vcfs = GVCF_JOINT_GENOTYPE_GATK4.out.vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - - reports = ch_reports // channel: [ path(reports) ] - versions = ch_versions // channel: [ versions.yml ] - -} diff --git a/workflows/germline.nf b/workflows/germline.nf index d5fb56b7..a7a38c7a 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -17,7 +17,8 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_ include { CRAM_PREPARE_SAMTOOLS_BEDTOOLS } from '../subworkflows/local/cram_prepare_samtools_bedtools/main' include { INPUT_SPLIT_BEDTOOLS } from '../subworkflows/local/input_split_bedtools/main' -include { CRAM_CALL_GENOTYPE_GATK4 } from '../subworkflows/local/cram_call_genotype_gatk4/main' +include { CRAM_CALL_GATK4 } from '../subworkflows/local/cram_call_gatk4/main' +include { GVCF_JOINT_GENOTYPE_GATK4 } from '../subworkflows/local/gvcf_joint_genotype_gatk4/main' include { BAM_CALL_VARDICTJAVA } from '../subworkflows/local/bam_call_vardictjava/main' include { VCF_EXTRACT_RELATE_SOMALIER } from '../subworkflows/local/vcf_extract_relate_somalier/main' include { VCF_PED_RTGTOOLS } from '../subworkflows/local/vcf_ped_rtgtools/main' @@ -355,6 +356,10 @@ workflow GERMLINE { def ch_gvcfs_ready = ch_gvcf_branch.no_tbi .join(TABIX_GVCF.out.tbi, failOnDuplicate:true, failOnMismatch:true) .mix(ch_gvcf_branch.tbi) + .map { meta, gvcf, tbi -> + [ meta, gvcf, tbi, callers.intersect(GlobalVariables.gvcfCallers) ] + } + .transpose(by:3) // // Run sample preparation @@ -409,28 +414,22 @@ workflow GERMLINE { // Call variants with GATK4 HaplotypeCaller // - CRAM_CALL_GENOTYPE_GATK4( + CRAM_CALL_GATK4( ch_caller_inputs.cram.filter { meta, _cram, _crai, _bed -> // Filter out the entries that already have a GVCF meta.type == "cram" }, - ch_gvcfs_ready, ch_fasta_ready, ch_fai_ready, ch_dict_ready, ch_strtablefile_ready, ch_dbsnp_ready, ch_dbsnp_tbi_ready, - dragstr, - only_call, - only_merge, - filter, - scatter_count + dragstr ) - ch_versions = ch_versions.mix(CRAM_CALL_GENOTYPE_GATK4.out.versions) - ch_reports = ch_reports.mix(CRAM_CALL_GENOTYPE_GATK4.out.reports) - - ch_calls = ch_calls.mix(CRAM_CALL_GENOTYPE_GATK4.out.vcfs) + ch_gvcfs_ready = ch_gvcfs_ready.mix(CRAM_CALL_GATK4.out.gvcfs) + ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions) + ch_reports = ch_reports.mix(CRAM_CALL_GATK4.out.reports) } @@ -452,6 +451,21 @@ workflow GERMLINE { ch_calls = ch_calls.mix(BAM_CALL_VARDICTJAVA.out.vcfs) } + // TODO reimplement --only_call and --only_merge + + GVCF_JOINT_GENOTYPE_GATK4( + ch_gvcfs_ready, + ch_fasta_ready, + ch_fai_ready, + ch_dict_ready, + ch_dbsnp_ready, + ch_dbsnp_tbi_ready, + only_merge, + scatter_count + ) + ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions) + ch_calls = ch_calls.mix(GVCF_JOINT_GENOTYPE_GATK4.out.vcfs) + def ch_called_variants = ch_calls .map { meta, vcf, tbi -> def new_meta = meta - meta.subMap(["type", "vardict_min_af"]) From 10fa33c35415f1b66ce1f2fd9f74bb5c9305d154 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 16:20:38 +0200 Subject: [PATCH 05/26] add elprep --- conf/modules.config | 67 ++++++++++++++--- main.nf | 3 + modules.json | 12 ++- .../elprep/fastatoelfasta/environment.yml | 7 ++ modules/nf-core/elprep/fastatoelfasta/main.nf | 50 +++++++++++++ .../nf-core/elprep/fastatoelfasta/meta.yml | 55 ++++++++++++++ .../elprep/fastatoelfasta/tests/main.nf.test | 66 ++++++++++++++++ .../fastatoelfasta/tests/main.nf.test.snap | 62 +++++++++++++++ nextflow.config | 1 + subworkflows/local/bam_call_elprep/main.nf | 75 +++++++++++++++++++ subworkflows/local/cram_call_gatk4/main.nf | 8 +- .../local/input_split_bedtools/main.nf | 10 +-- workflows/germline.nf | 48 +++++++++++- 13 files changed, 441 insertions(+), 23 deletions(-) create mode 100644 modules/nf-core/elprep/fastatoelfasta/environment.yml create mode 100644 modules/nf-core/elprep/fastatoelfasta/main.nf create mode 100644 modules/nf-core/elprep/fastatoelfasta/meta.yml create mode 100644 modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test create mode 100644 modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap create mode 100644 subworkflows/local/bam_call_elprep/main.nf diff --git a/conf/modules.config b/conf/modules.config index 51749247..732e1ec5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -124,15 +124,15 @@ process { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GATK4_HAPLOTYPCECALLER + GATK4 HAPLOTYPCECALLER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:CRAM_CALL_GATK4:GATK4_CALIBRATEDRAGSTRMODEL\$" { + withName: "^.*CRAM_CALL_GATK4:GATK4_CALIBRATEDRAGSTRMODEL\$" { ext.args = "--parallel" } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:CRAM_CALL_GATK4:GATK4_HAPLOTYPECALLER\$" { + withName: "^.*CRAM_CALL_GATK4:GATK4_HAPLOTYPECALLER\$" { time = { 16.h * task.attempt } ext.prefix = {"${meta.id}.g"} ext.args = { @@ -169,7 +169,7 @@ process { ext.args = '' } - withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS_SINGLE\$" { + withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS\$" { publishDir = [ overwrite: true, enabled: true, @@ -180,12 +180,61 @@ process { ext.prefix = final_prefix } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:BCFTOOLS_QUERY\$" { + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ELPREP + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + withName: "^.*BAM_CALL_ELPREP:ELPREP_FILTER\$" { + ext.args = "--reference-confidence GVCF" + } + + withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { + publishDir = [ + overwrite: true, + enabled: true, + mode: params.publish_dir_mode, + path: individual_output, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] // SAVE + ext.prefix = { "${meta.id}.${meta.caller}.g" } + ext.args = '--allow-overlaps --output-type z' + } + + withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:TABIX_TABIX\$" { + publishDir = [ + overwrite: true, + enabled: true, + path: individual_output, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] // SAVE + } + + withName: "^.*BAM_CALL_ELPREP:BCFTOOLS_STATS\$" { + publishDir = [ + overwrite: true, + enabled: true, + path: individual_reports, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] // SAVE + ext.prefix = final_prefix + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GVCF JOINT GENOTYPING + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:BCFTOOLS_QUERY\$" { ext.args = "--exclude 'QUAL=\".\"' --format '%CHROM\t%POS0\t%END\\n'" ext.suffix = "bed" } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:MERGE_BEDS\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:MERGE_BEDS\$" { ext.args = "-d ${params.merge_distance}" publishDir = [ enabled: true, @@ -196,12 +245,12 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:GAWK\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:GAWK\$" { ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' ext.suffix = "bed" } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOMICSDBIMPORT\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOMICSDBIMPORT\$" { label = { meta.family_samples.tokenize(",").size() <= 10 ? "process_medium" : "process_high" } time = { 16.h * task.attempt } // Lots of parameters are fetched from https://gatk.broadinstitute.org/hc/en-us/articles/360056138571-GenomicsDBImport-usage-and-performance-guidelines @@ -228,7 +277,7 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOTYPEGVCFS\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOTYPEGVCFS\$" { time = { 16.h * task.attempt } ext.args = { [ diff --git a/main.nf b/main.nf index 2e82e0c1..c164385f 100644 --- a/main.nf +++ b/main.nf @@ -19,6 +19,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_cmgg_germline_pi params.fasta = getGenomeAttribute('fasta', params.genomes, params.genome) params.fai = getGenomeAttribute('fai', params.genomes, params.genome) params.dict = getGenomeAttribute('dict', params.genomes, params.genome) +params.elfasta = getGenomeAttribute('elfasta', params.genomes, params.genome) params.strtablefile = getGenomeAttribute('strtablefile', params.genomes, params.genome) params.sdf = getGenomeAttribute('sdf', params.genomes, params.genome) params.dbsnp = getGenomeAttribute('dbsnp', params.genomes, params.genome) @@ -73,6 +74,7 @@ workflow NFCMGG_GERMLINE { pipeline_params.fasta, pipeline_params.fai, pipeline_params.dict, + pipeline_params.elfasta, pipeline_params.strtablefile, pipeline_params.sdf, pipeline_params.dbsnp, @@ -103,6 +105,7 @@ workflow NFCMGG_GERMLINE { pipeline_params.automap_panel, pipeline_params.outdir, GlobalVariables.pedFiles, + pipeline_params.elsites, // Boolean inputs pipeline_params.dragstr, diff --git a/modules.json b/modules.json index fd1f2450..8ff9f050 100644 --- a/modules.json +++ b/modules.json @@ -66,6 +66,16 @@ "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", "installed_by": ["modules"] }, + "elprep/fastatoelfasta": { + "branch": "master", + "git_sha": "74ac5351a11a184171489dee73652e8b69ba9d22", + "installed_by": ["modules"] + }, + "elprep/filter": { + "branch": "master", + "git_sha": "909c4dcdbb1e751214e2bb155e8c0a59633ed12a", + "installed_by": ["modules"] + }, "ensemblvep/download": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -183,7 +193,7 @@ "tabix/bgzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "tabix/bgziptabix": { "branch": "master", diff --git a/modules/nf-core/elprep/fastatoelfasta/environment.yml b/modules/nf-core/elprep/fastatoelfasta/environment.yml new file mode 100644 index 00000000..6ab3f8fc --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::elprep=5.1.3" diff --git a/modules/nf-core/elprep/fastatoelfasta/main.nf b/modules/nf-core/elprep/fastatoelfasta/main.nf new file mode 100644 index 00000000..861350bf --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/main.nf @@ -0,0 +1,50 @@ +process ELPREP_FASTATOELFASTA { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/elprep:5.1.3--he881be0_1': + 'biocontainers/elprep:5.1.3--he881be0_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.elfasta") , emit: elfasta + tuple val(meta), path("logs/elprep/elprep*"), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + elprep fasta-to-elfasta \\ + $fasta \\ + ${prefix}.elfasta \\ + --log-path ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def timestamp = "${java.time.OffsetDateTime.now().format(java.time.format.DateTimeFormatter.ISO_DATE_TIME)}" + + """ + mkdir -p logs/elprep + + touch ${prefix}.elfasta + touch logs/elprep/elprep-${timestamp}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/elprep/fastatoelfasta/meta.yml b/modules/nf-core/elprep/fastatoelfasta/meta.yml new file mode 100644 index 00000000..41a8be31 --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "elprep_fastatoelfasta" +description: Convert a file in FASTA format to the ELFASTA format +keywords: + - fasta + - elfasta + - elprep +tools: + - "elprep": + description: "elPrep is a high-performance tool for preparing .sam/.bam files + for variant calling in sequencing pipelines. It can be used as a drop-in replacement + for SAMtools/Picard/GATK4." + homepage: "https://github.com/ExaScience/elprep" + documentation: "https://github.com/ExaScience/elprep" + tool_dev_url: "https://github.com/ExaScience/elprep" + doi: "10.1371/journal.pone.0244471" + licence: ["AGPL v3"] + identifier: biotools:elprep + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fasta: + type: file + description: FASTA file + pattern: "*.{fasta,fa,fna}" +output: + - elfasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.elfasta" + - "*.elfasta": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.elfasta" + - log: + - meta: {} + - logs/elprep/elprep*: {} + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test new file mode 100644 index 00000000..d22f6d9d --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process ELPREP_FASTATOELFASTA" + script "../main.nf" + process "ELPREP_FASTATOELFASTA" + + tag "modules" + tag "modules_nfcore" + tag "elprep" + tag "elprep/fastatoelfasta" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.elfasta, + process.out.log.collect { [it[0], file(it[1]).exists()] }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.elfasta, + process.out.log.collect { [it[0], file(it[1]).exists()] }, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap new file mode 100644 index 00000000..799bb0fb --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap @@ -0,0 +1,62 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.elfasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + true + ] + ], + [ + "versions.yml:md5,bf313ed1289a8969464c5593b0ff67be" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T14:29:50.861439255" + }, + "sarscov2 - fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.elfasta:md5,09a6f76bed84ee211ef0d962e26c77f1" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + true + ] + ], + [ + "versions.yml:md5,bf313ed1289a8969464c5593b0ff67be" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T14:25:24.238816922" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index fe527315..c0a32e09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,6 +78,7 @@ params { multiqc_methods_description = null // References + elsites = null cmgg_config_base = "/conf/" igenomes_base = null //'s3://ngi-igenomes/igenomes' igenomes_ignore = true diff --git a/subworkflows/local/bam_call_elprep/main.nf b/subworkflows/local/bam_call_elprep/main.nf new file mode 100644 index 00000000..bd45f542 --- /dev/null +++ b/subworkflows/local/bam_call_elprep/main.nf @@ -0,0 +1,75 @@ +// +// Call the variants using Elprep +// + +include { ELPREP_FILTER } from '../../../modules/nf-core/elprep/filter/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' + +include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' +include { VCF_DBSNP_VCFANNO } from '../vcf_dbsnp_vcfanno/main' + +workflow BAM_CALL_ELPREP { + take: + ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai), path(bed) ] => sample BAM files and their indexes with the split bed files + ch_elfasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference + ch_elsites // channel: [optional] [ val(meta), path(elsites) ] + ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants + ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF + + main: + + def ch_versions = Channel.empty() + + ELPREP_FILTER( + ch_input.map { meta, bam, bai, bed -> + def new_meta = meta + [caller:'elprep'] + [ new_meta, bam, bai, bed, [], [], [] ] + }, + [[],[]], + ch_elfasta, + ch_elsites, + true, // haplotypecaller + false, + false, + false, + false + ) + ch_versions = ch_versions.mix(ELPREP_FILTER.out.versions.first()) + + VCF_CONCAT_BCFTOOLS( + ELPREP_FILTER.out.gvcf, + true + ) + ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) + + def ch_annotated = Channel.empty() + if(!(ch_dbsnp instanceof List)) { + VCF_DBNSP_VCFANNO( + VCF_CONCAT_BCFTOOLS.out.vcfs, + ch_dbsnp, + ch_dbsnp_tbi + ) + ch_versions = ch_versions.mix(VCF_DBSNP_VCFANNO.out.versions) + ch_annotated = TABIX_BGZIP.out.output + } else { + ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs + } + + BCFTOOLS_STATS( + ch_annotated, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + [[],[]] + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) + + def ch_reports = BCFTOOLS_STATS.out.stats.collect{ _meta, report -> report} + + emit: + gvcfs = ch_annotated // channel: [ val(meta), path(vcf), path(tbi) ] + reports = ch_reports // channel: [ path(stats) ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/local/cram_call_gatk4/main.nf b/subworkflows/local/cram_call_gatk4/main.nf index d0e14ebd..05c1543a 100644 --- a/subworkflows/local/cram_call_gatk4/main.nf +++ b/subworkflows/local/cram_call_gatk4/main.nf @@ -4,7 +4,7 @@ include { GATK4_CALIBRATEDRAGSTRMODEL } from '../../../modules/nf-core/gatk4/calibratedragstrmodel/main' include { GATK4_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/haplotypecaller/main' -include { BCFTOOLS_STATS as BCFTOOLS_STATS_SINGLE } from '../../../modules/nf-core/bcftools/stats/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' @@ -87,7 +87,7 @@ workflow CRAM_CALL_GATK4 { ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) - BCFTOOLS_STATS_SINGLE( + BCFTOOLS_STATS( VCF_CONCAT_BCFTOOLS.out.vcfs, [[],[]], [[],[]], @@ -95,9 +95,9 @@ workflow CRAM_CALL_GATK4 { [[],[]], [[],[]] ) - ch_versions = ch_versions.mix(BCFTOOLS_STATS_SINGLE.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) - def ch_reports = BCFTOOLS_STATS_SINGLE.out.stats.collect{ _meta, report -> report} + def ch_reports = BCFTOOLS_STATS.out.stats.collect{ _meta, report -> report} emit: gvcfs = VCF_CONCAT_BCFTOOLS.out.vcfs // channel: [ val(meta), path(vcf), path(tbi) ] diff --git a/subworkflows/local/input_split_bedtools/main.nf b/subworkflows/local/input_split_bedtools/main.nf index dd332ab2..19c3db95 100644 --- a/subworkflows/local/input_split_bedtools/main.nf +++ b/subworkflows/local/input_split_bedtools/main.nf @@ -27,14 +27,14 @@ workflow INPUT_SPLIT_BEDTOOLS { def bed_is_list = beds instanceof ArrayList def new_meta = meta + [split_count: bed_is_list ? beds.size() : 1] def bed_output = bed_is_list ? [beds] : [[beds]] - return [new_meta] + row[1..-2] + bed_output + return [new_meta] + bed_output + row[1..-2] } - .transpose(by:3) // Create one channel entry for each BED file per sample + .transpose(by:1) // Create one channel entry for each BED file per sample + .view() .map { row -> // Set the base name of the BED file as the ID (this will look like sample_id.xxxx, where xxxx are numbers) - def new_row = row - new_row[0] = row[0] + [id:row[-1].baseName] - return new_row + def new_meta = row[0] + [id:row[1].baseName] + return [ new_meta, row[1] ] + row[3..-1] } emit: diff --git a/workflows/germline.nf b/workflows/germline.nf index a7a38c7a..10166be9 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -19,6 +19,7 @@ include { CRAM_PREPARE_SAMTOOLS_BEDTOOLS } from '../subworkflows/local/cram_p include { INPUT_SPLIT_BEDTOOLS } from '../subworkflows/local/input_split_bedtools/main' include { CRAM_CALL_GATK4 } from '../subworkflows/local/cram_call_gatk4/main' include { GVCF_JOINT_GENOTYPE_GATK4 } from '../subworkflows/local/gvcf_joint_genotype_gatk4/main' +include { BAM_CALL_ELPREP } from '../subworkflows/local/bam_call_elprep/main' include { BAM_CALL_VARDICTJAVA } from '../subworkflows/local/bam_call_vardictjava/main' include { VCF_EXTRACT_RELATE_SOMALIER } from '../subworkflows/local/vcf_extract_relate_somalier/main' include { VCF_PED_RTGTOOLS } from '../subworkflows/local/vcf_ped_rtgtools/main' @@ -36,6 +37,7 @@ include { VCF_FILTER_BCFTOOLS } from '../subworkflows/local/vcf_fi include { SAMTOOLS_FAIDX as FAIDX } from '../modules/nf-core/samtools/faidx/main' include { GATK4_CREATESEQUENCEDICTIONARY as CREATESEQUENCEDICTIONARY } from '../modules/nf-core/gatk4/createsequencedictionary/main' +include { ELPREP_FASTATOELFASTA } from '../modules/nf-core/elprep/fastatoelfasta/main' include { GATK4_COMPOSESTRTABLEFILE as COMPOSESTRTABLEFILE } from '../modules/nf-core/gatk4/composestrtablefile/main' include { RTGTOOLS_FORMAT } from '../modules/nf-core/rtgtools/format/main' include { UNTAR } from '../modules/nf-core/untar/main' @@ -69,6 +71,7 @@ workflow GERMLINE { fasta // string: path to the reference fasta fai // string: path to the index of the reference fasta dict // string: path to the sequence dictionary file + elfasta // string: path to the elfasta reference file strtablefile // string: path to the strtable file sdf // string: path to the SDF directory dbsnp // string: path to the DBSNP VCF file @@ -99,6 +102,7 @@ workflow GERMLINE { automap_panel // string: path to the Automap panel file outdir // string: path to the output directory pedFiles // map: a map that has the family ID as key and a PED file as value + elsites // string: path to the elsites file for elprep // Boolean inputs dragstr // boolean: create a dragstr model and use it for haplotypecaller @@ -140,6 +144,7 @@ workflow GERMLINE { def ch_fasta_ready = Channel.fromPath(fasta).map{ fasta_file -> [[id:"reference"], fasta_file] }.collect() def ch_fai = fai ? Channel.fromPath(fai).map{ fai_file -> [[id:"reference"], fai_file] }.collect() : null def ch_dict = dict ? Channel.fromPath(dict).map{ dict_file -> [[id:"reference"], dict_file] }.collect() : null + def ch_elfasta = elfasta ? Channel.fromPath(elfasta).map { elfasta_file -> [[id:"reference"], elfasta_file]}.collect() : null def ch_strtablefile = strtablefile ? Channel.fromPath(strtablefile).map{ str_file -> [[id:"reference"], str_file] }.collect() : null def ch_sdf = sdf ? Channel.fromPath(sdf).map { sdf_file -> [[id:'reference'], sdf_file] }.collect() : null @@ -161,6 +166,8 @@ workflow GERMLINE { def ch_automap_repeats = automap_repeats ? Channel.fromPath(automap_repeats).map{ repeats -> [[id:"repeats"], repeats] }.collect() : [] def ch_automap_panel = automap_panel ? Channel.fromPath(automap_panel).map{ panel -> [[id:"automap_panel"], panel] }.collect() : [[],[]] + def ch_elsites = elsites ? Channel.fromPath(elsites).map{ elsites_file -> [[id:'elsites'], elsites_file] }.collect() : [[],[]] + // // Check for the presence of EnsemblVEP plugins that use extra files // @@ -262,6 +269,18 @@ workflow GERMLINE { ch_dict_ready = ch_dict } + def ch_elfasta_ready = Channel.empty() + def elprep_used = callers.contains("elprep") + if (!ch_elfasta && elprep_used) { + ELPREP_FASTATOELFASTA( + ch_fasta_ready + ) + ch_versions = ch_versions.mix(ELPREP_FASTATOELFASTA.out.versions) + ch_elfasta_ready = ELPREP_FASTATOELFASTA.out.elfasta + } else { + ch_elfasta_ready = ch_elfasta + } + // Reference STR table file def ch_strtablefile_ready = Channel.empty() if (dragstr && !ch_strtablefile) { @@ -356,10 +375,11 @@ workflow GERMLINE { def ch_gvcfs_ready = ch_gvcf_branch.no_tbi .join(TABIX_GVCF.out.tbi, failOnDuplicate:true, failOnMismatch:true) .mix(ch_gvcf_branch.tbi) - .map { meta, gvcf, tbi -> - [ meta, gvcf, tbi, callers.intersect(GlobalVariables.gvcfCallers) ] + .combine(callers.intersect(GlobalVariables.gvcfCallers)) + .map { meta, gvcf, tbi, caller -> + def new_meta = meta + [caller:caller] + [ new_meta, gvcf, tbi ] } - .transpose(by:3) // // Run sample preparation @@ -398,7 +418,7 @@ workflow GERMLINE { CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_beds.map { meta, bed -> [meta, bed, scatter_count] }, - CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + ch_split_cram_bam ) ch_versions = ch_versions.mix(INPUT_SPLIT_BEDTOOLS.out.versions) @@ -430,6 +450,26 @@ workflow GERMLINE { ch_gvcfs_ready = ch_gvcfs_ready.mix(CRAM_CALL_GATK4.out.gvcfs) ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions) ch_reports = ch_reports.mix(CRAM_CALL_GATK4.out.reports) + } + + if("elprep" in callers) { + // + // Call variants with Elprep + // + + BAM_CALL_ELPREP( + ch_caller_inputs.bam.filter { meta, _bam, _bai, _bed -> + // Filter out the entries that already have a GVCF + meta.type == "cram" + }, + ch_elfasta_ready, + ch_elsites, + ch_dbsnp_ready, + ch_dbsnp_tbi_ready + ) + ch_gvcfs_ready = ch_gvcfs_ready.mix(BAM_CALL_ELPREP.out.gvcfs) + ch_versions = ch_versions.mix(BAM_CALL_ELPREP.out.versions) + ch_reports = ch_reports.mix(BAM_CALL_ELPREP.out.reports) } From 4e970d532c3cb15ad29006f2e6e8ed586ebf65d7 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 16:30:26 +0200 Subject: [PATCH 06/26] fix typo --- subworkflows/local/bam_call_elprep/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_call_elprep/main.nf b/subworkflows/local/bam_call_elprep/main.nf index bd45f542..0b0e6250 100644 --- a/subworkflows/local/bam_call_elprep/main.nf +++ b/subworkflows/local/bam_call_elprep/main.nf @@ -44,7 +44,7 @@ workflow BAM_CALL_ELPREP { def ch_annotated = Channel.empty() if(!(ch_dbsnp instanceof List)) { - VCF_DBNSP_VCFANNO( + VCF_DBSNP_VCFANNO( VCF_CONCAT_BCFTOOLS.out.vcfs, ch_dbsnp, ch_dbsnp_tbi From 59623f184d7124b14b661e6784494d04d420d54e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 23 Oct 2024 16:36:33 +0200 Subject: [PATCH 07/26] fix some small issues --- conf/test.config | 1 + modules.json | 3 ++- .../nf-core/elprep/filter/elprep-filter.diff | 20 +++++++++++++++++++ modules/nf-core/elprep/filter/main.nf | 2 +- .../local/input_split_bedtools/main.nf | 1 - 5 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/elprep/filter/elprep-filter.diff diff --git a/conf/test.config b/conf/test.config index 12dee471..c259bd53 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,6 +34,7 @@ params { fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" fai = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta.fai" dict = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.dict" + elfasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.elfasta" sdf = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz" strtablefile = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.strtable.zip" diff --git a/modules.json b/modules.json index 8ff9f050..92877e49 100644 --- a/modules.json +++ b/modules.json @@ -74,7 +74,8 @@ "elprep/filter": { "branch": "master", "git_sha": "909c4dcdbb1e751214e2bb155e8c0a59633ed12a", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/elprep/filter/elprep-filter.diff" }, "ensemblvep/download": { "branch": "master", diff --git a/modules/nf-core/elprep/filter/elprep-filter.diff b/modules/nf-core/elprep/filter/elprep-filter.diff new file mode 100644 index 00000000..17b545b2 --- /dev/null +++ b/modules/nf-core/elprep/filter/elprep-filter.diff @@ -0,0 +1,20 @@ +Changes in module 'nf-core/elprep/filter' +Changes in 'elprep/filter/main.nf': +--- modules/nf-core/elprep/filter/main.nf ++++ modules/nf-core/elprep/filter/main.nf +@@ -65,7 +65,7 @@ + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ +- elprep filter ${bam} ${prefix}.${suffix} \\ ++ elprep filter ${bam} /dev/null \\ + ${reference_sequences_cmd} \\ + ${filter_regions_cmd} \\ + ${markdup_cmd} \\ + +'modules/nf-core/elprep/filter/environment.yml' is unchanged +'modules/nf-core/elprep/filter/meta.yml' is unchanged +'modules/nf-core/elprep/filter/tests/main.nf.test' is unchanged +'modules/nf-core/elprep/filter/tests/main.nf.test.snap' is unchanged +'modules/nf-core/elprep/filter/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/elprep/filter/main.nf b/modules/nf-core/elprep/filter/main.nf index 6727106a..c533d253 100644 --- a/modules/nf-core/elprep/filter/main.nf +++ b/modules/nf-core/elprep/filter/main.nf @@ -65,7 +65,7 @@ process ELPREP_FILTER { if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ - elprep filter ${bam} ${prefix}.${suffix} \\ + elprep filter ${bam} /dev/null \\ ${reference_sequences_cmd} \\ ${filter_regions_cmd} \\ ${markdup_cmd} \\ diff --git a/subworkflows/local/input_split_bedtools/main.nf b/subworkflows/local/input_split_bedtools/main.nf index 19c3db95..dc3604f4 100644 --- a/subworkflows/local/input_split_bedtools/main.nf +++ b/subworkflows/local/input_split_bedtools/main.nf @@ -30,7 +30,6 @@ workflow INPUT_SPLIT_BEDTOOLS { return [new_meta] + bed_output + row[1..-2] } .transpose(by:1) // Create one channel entry for each BED file per sample - .view() .map { row -> // Set the base name of the BED file as the ID (this will look like sample_id.xxxx, where xxxx are numbers) def new_meta = row[0] + [id:row[1].baseName] From fc26fe48c1285d069fbd811311e5de397f2cd13d Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 24 Oct 2024 11:05:06 +0200 Subject: [PATCH 08/26] correct channels --- modules.json | 191 +++++++++++++----- .../nf-core/elprep/filter/elprep-filter.diff | 10 +- modules/nf-core/elprep/filter/main.nf | 1 - .../local/input_split_bedtools/main.nf | 2 +- 4 files changed, 153 insertions(+), 51 deletions(-) diff --git a/modules.json b/modules.json index 92877e49..70af501e 100644 --- a/modules.json +++ b/modules.json @@ -8,223 +8,310 @@ "bcftools/annotate": { "branch": "master", "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "d1e0ec7670fa77905a378627232566ce54c3c26d", - "installed_by": ["vcf_annotate_ensemblvep_snpeff"] + "installed_by": [ + "vcf_annotate_ensemblvep_snpeff" + ] }, "bcftools/filter": { "branch": "master", "git_sha": "f85dbddd7a335fc0f5ac331e8d22ca94123b654b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/pluginscatter": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["vcf_annotate_ensemblvep_snpeff"] + "installed_by": [ + "vcf_annotate_ensemblvep_snpeff" + ] }, "bcftools/query": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/reheader": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["vcf_annotate_ensemblvep_snpeff"] + "installed_by": [ + "vcf_annotate_ensemblvep_snpeff" + ] }, "bcftools/stats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/intersect": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/merge": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/split": { "branch": "master", "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "elprep/fastatoelfasta": { "branch": "master", "git_sha": "74ac5351a11a184171489dee73652e8b69ba9d22", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "elprep/filter": { "branch": "master", "git_sha": "909c4dcdbb1e751214e2bb155e8c0a59633ed12a", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/elprep/filter/elprep-filter.diff" }, "ensemblvep/download": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/ensemblvep/download/ensemblvep-download.diff" }, "ensemblvep/vep": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["vcf_annotate_ensemblvep_snpeff"], + "installed_by": [ + "vcf_annotate_ensemblvep_snpeff" + ], "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff" }, "gatk4/calibratedragstrmodel": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/composestrtablefile": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genotypegvcfs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/haplotypecaller": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gawk": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mosdepth": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rtgtools/format": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rtgtools/pedfilter": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/rtgtools/pedfilter/rtgtools-pedfilter.diff" }, "rtgtools/rocplot": { "branch": "master", "git_sha": "83e2df1e4ec594beb8a575b4db0b4197900f4ebd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rtgtools/vcfeval": { "branch": "master", "git_sha": "83e2df1e4ec594beb8a575b4db0b4197900f4ebd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/convert": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "snpeff/snpeff": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["vcf_annotate_ensemblvep_snpeff"] + "installed_by": [ + "vcf_annotate_ensemblvep_snpeff" + ] }, "somalier/extract": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "somalier/relate": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/somalier/relate/somalier-relate.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["vcf_annotate_ensemblvep_snpeff"] + "installed_by": [ + "vcf_annotate_ensemblvep_snpeff" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules", "vcf_annotate_ensemblvep_snpeff"] + "installed_by": [ + "modules", + "vcf_annotate_ensemblvep_snpeff" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vardictjava": { "branch": "master", "git_sha": "f85452fcbebab5dfd77c0752236f6f86e9a03b32", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcf2db": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcfanno": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -233,25 +320,33 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_annotate_ensemblvep_snpeff": { "branch": "master", "git_sha": "1b2fdf082b2ea7976b112e149a474d816094724c", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/elprep/filter/elprep-filter.diff b/modules/nf-core/elprep/filter/elprep-filter.diff index 17b545b2..47bb8da9 100644 --- a/modules/nf-core/elprep/filter/elprep-filter.diff +++ b/modules/nf-core/elprep/filter/elprep-filter.diff @@ -2,7 +2,15 @@ Changes in module 'nf-core/elprep/filter' Changes in 'elprep/filter/main.nf': --- modules/nf-core/elprep/filter/main.nf +++ modules/nf-core/elprep/filter/main.nf -@@ -65,7 +65,7 @@ +@@ -20,7 +20,6 @@ + + + output: +- tuple val(meta), path("*.{bam,sam}") , emit: bam + tuple val(meta), path("*.log") , emit: logs + tuple val(meta), path("*.metrics.txt") , optional: true, emit: metrics + tuple val(meta), path("*.recall") , optional: true, emit: recall +@@ -65,7 +64,7 @@ if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ diff --git a/modules/nf-core/elprep/filter/main.nf b/modules/nf-core/elprep/filter/main.nf index c533d253..231223ec 100644 --- a/modules/nf-core/elprep/filter/main.nf +++ b/modules/nf-core/elprep/filter/main.nf @@ -20,7 +20,6 @@ process ELPREP_FILTER { output: - tuple val(meta), path("*.{bam,sam}") , emit: bam tuple val(meta), path("*.log") , emit: logs tuple val(meta), path("*.metrics.txt") , optional: true, emit: metrics tuple val(meta), path("*.recall") , optional: true, emit: recall diff --git a/subworkflows/local/input_split_bedtools/main.nf b/subworkflows/local/input_split_bedtools/main.nf index dc3604f4..4aab4987 100644 --- a/subworkflows/local/input_split_bedtools/main.nf +++ b/subworkflows/local/input_split_bedtools/main.nf @@ -33,7 +33,7 @@ workflow INPUT_SPLIT_BEDTOOLS { .map { row -> // Set the base name of the BED file as the ID (this will look like sample_id.xxxx, where xxxx are numbers) def new_meta = row[0] + [id:row[1].baseName] - return [ new_meta, row[1] ] + row[3..-1] + return [ new_meta ] + row[2..-1] + [ row[1] ] } emit: From 660a8095f3c868f141cd5a2fa9d66ddcec9e4b58 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 24 Oct 2024 11:08:08 +0200 Subject: [PATCH 09/26] fix dbsnp subwf --- subworkflows/local/vcf_dbsnp_vcfanno/main.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf index 409ac005..0292dee6 100644 --- a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf +++ b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf @@ -1,4 +1,5 @@ -include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' +include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow VCF_DBSNP_VCFANNO { take: @@ -24,12 +25,12 @@ workflow VCF_DBSNP_VCFANNO { ) ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) - TABIX_BGZIP( + TABIX_BGZIPTABIX( VCFANNO.out.vcf ) - ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - def ch_vcfs = TABIX_BGZIP.out.output + def ch_vcfs = TABIX_BGZIPTABIX.out.gz_tbi emit: vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] From db31f349dcdf487410bcf1b18cf50c8324e20184 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 09:34:36 +0100 Subject: [PATCH 10/26] fix dbsnp flow issue --- subworkflows/local/bam_call_elprep/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_call_elprep/main.nf b/subworkflows/local/bam_call_elprep/main.nf index 0b0e6250..22f4939d 100644 --- a/subworkflows/local/bam_call_elprep/main.nf +++ b/subworkflows/local/bam_call_elprep/main.nf @@ -50,7 +50,7 @@ workflow BAM_CALL_ELPREP { ch_dbsnp_tbi ) ch_versions = ch_versions.mix(VCF_DBSNP_VCFANNO.out.versions) - ch_annotated = TABIX_BGZIP.out.output + ch_annotated = VCF_DBSNP_VCFANNO.out.vcfs } else { ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs } From 8b72777259fcb8af014c4bf795dcf03d3a77c5ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 11:47:02 +0100 Subject: [PATCH 11/26] fix wrong map + resources --- conf/modules.config | 2 ++ subworkflows/local/vcf_dbsnp_vcfanno/main.nf | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 732e1ec5..d4642735 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -187,6 +187,8 @@ process { */ withName: "^.*BAM_CALL_ELPREP:ELPREP_FILTER\$" { + cpus = { 25 * task.attempt } + memory = { 260.GB * task.attempt } ext.args = "--reference-confidence GVCF" } diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf index 0292dee6..298dc0af 100644 --- a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf +++ b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf @@ -18,7 +18,7 @@ workflow VCF_DBSNP_VCFANNO { .collect() VCFANNO( - ch_input.map { meta, vcf -> [ meta, vcf, [], [] ] }, + ch_input.map { meta, vcf, tbi -> [ meta, vcf, tbi, [] ] }, ch_vcfanno_toml, [], ch_vcfanno_resources From de1f4492e34b489184fa180f3492376155d88052 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 11:52:56 +0100 Subject: [PATCH 12/26] update vardict flow --- subworkflows/local/bam_call_vardictjava/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_call_vardictjava/main.nf b/subworkflows/local/bam_call_vardictjava/main.nf index 83c4d78a..0081bf51 100644 --- a/subworkflows/local/bam_call_vardictjava/main.nf +++ b/subworkflows/local/bam_call_vardictjava/main.nf @@ -39,13 +39,13 @@ workflow BAM_CALL_VARDICTJAVA { def ch_annotated = Channel.empty() if(!(ch_dbsnp instanceof List)) { - VCF_DBNSP_VCFANNO( + VCF_DBSNP_VCFANNO( VCF_CONCAT_BCFTOOLS.out.vcfs, ch_dbsnp, ch_dbsnp_tbi ) ch_versions = ch_versions.mix(VCF_DBSNP_VCFANNO.out.versions) - ch_annotated = TABIX_BGZIP.out.output + ch_annotated = VCF_DBSNP_VCFANNO.out.vcfs } else { ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs } From 3ae299d9066d6943d708afda88fb7e7a842fa7ac Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 12:02:36 +0100 Subject: [PATCH 13/26] small memory fix --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d4642735..2faaa75e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -188,7 +188,7 @@ process { withName: "^.*BAM_CALL_ELPREP:ELPREP_FILTER\$" { cpus = { 25 * task.attempt } - memory = { 260.GB * task.attempt } + memory = { 250.GB * task.attempt } ext.args = "--reference-confidence GVCF" } From 23729f7537a311dd2fbf30cb99edc5c7816de7e7 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 13:21:05 +0100 Subject: [PATCH 14/26] update nf-test ci command --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ed581fc..250fc3e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,7 +67,7 @@ jobs: - name: Run pipeline with test data run: | - $CONDA/bin/nf-test test --tag ${{ matrix.test }} --junitxml=default.xml + $CONDA/bin/nf-test test --tag ${{ matrix.test }} --ci --junitxml=default.xml - name: Publish Test Report uses: mikepenz/action-junit-report@v3 From ef5a811e0c52b54c5bf3af4fc9e8644817d4ddbb Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 13:24:55 +0100 Subject: [PATCH 15/26] nf-test ci updates --- .github/workflows/ci.yml | 2 +- tests/nextflow.config | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 250fc3e4..118a4a21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,7 +67,7 @@ jobs: - name: Run pipeline with test data run: | - $CONDA/bin/nf-test test --tag ${{ matrix.test }} --ci --junitxml=default.xml + $CONDA/bin/nf-test test --tag ${{ matrix.test }} --ci --only-changed --junitxml=default.xml - name: Publish Test Report uses: mikepenz/action-junit-report@v3 diff --git a/tests/nextflow.config b/tests/nextflow.config index 1643f647..80312509 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -66,7 +66,8 @@ params { igenomes_ignore = true genomes_ignore = true - validationSchemaIgnoreParams = 'genomes,igenomes_base,test_data,cram1,cram2,cram3,crai1,crai2,crai3,vcf1,vcf2,vcf3,tbi1,tbi2,tbi3,gvcf1,gvcf2,gvcf3,gtbi1,gtbi2,gtbi3,famvcf,famtbi,ped,bed,split1,split2,split3' + validationSchemaIgnoreParams = 'genomes,igenomes_base,test_data,cram1,cram2,cram3,crai1,crai2,crai3,vcf1,vcf2,vcf3,tbi1,tbi2,tbi3,gvcf1,gvcf2,gvcf3,gtbi1,gtbi2,gtbi3,famvcf,famtbi,ped,bed,split1,split2,split3,modules_testdata_base_path' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' } process { From 5afc3f30e555adc0c02a45a19368debd45e44582 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 13:47:53 +0100 Subject: [PATCH 16/26] use sharding and filters on nf-test --- .github/workflows/ci.yml | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 118a4a21..06e17afd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,7 @@ on: env: NXF_ANSI_LOG: false + NFT_MAX_SHARDS: 5 concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -27,21 +28,11 @@ jobs: NXF_VER: - "24.04.2" - "latest-everything" - test: - - "pipeline_default" - - "pipeline_callers" - - "pipeline_variations" - - "pipeline_variations2" - - "pipeline_gvcfs" - - "cram_call_genotype_gatk4" - - "bam_call_vardictjava" - - "cram_prepare_samtools_bedtools" - - "input_split_bedtools" - - "vcf_annotation" - - "vcf_extract_relate_somalier" - - "vcf_ped_rtgtools" - - "vcf_upd_updio" - - "vcf_validate_small_variants" + filter: + - "module" + - "workflow" + - "pipeline" + shard: [1,2,3,4,5] steps: - name: Free some space run: | @@ -65,9 +56,14 @@ jobs: run: | conda install -c bioconda nf-test - - name: Run pipeline with test data + - name: "Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }}" run: | - $CONDA/bin/nf-test test --tag ${{ matrix.test }} --ci --only-changed --junitxml=default.xml + $CONDA/bin/nf-test test \ + --ci \ + --changed-since HEAD^ \ + --shard ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }} \ + --filter ${{ matrix.filter }} \ + --junitxml=default.xml - name: Publish Test Report uses: mikepenz/action-junit-report@v3 From 7d28f4d22e659d0a059522c1b2c93202a85f0892 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 13:50:05 +0100 Subject: [PATCH 17/26] update test name --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06e17afd..ee77c4cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ concurrency: jobs: test_all: - name: Run nf-test with ${{ matrix.test }}-${{ matrix.NXF_VER }} + name: Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }} (${{ matrix.NXF_VER }}) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-cmgg/germline') }}" runs-on: ubuntu-latest From 9e736b0a34fb35684097908fc8988585d17e4b58 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 13:53:14 +0100 Subject: [PATCH 18/26] fix env issue in ci --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee77c4cc..408facd0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ concurrency: jobs: test_all: - name: Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }} (${{ matrix.NXF_VER }}) + name: Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ NFT_MAX_SHARDS }} (${{ matrix.NXF_VER }}) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-cmgg/germline') }}" runs-on: ubuntu-latest @@ -56,7 +56,7 @@ jobs: run: | conda install -c bioconda nf-test - - name: "Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }}" + - name: "Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ NFT_MAX_SHARDS }}" run: | $CONDA/bin/nf-test test \ --ci \ From 2fd1b38d4ba56c0c740fe9c5fcc9fdc1d6301524 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 13:55:54 +0100 Subject: [PATCH 19/26] fix env issue in ci --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 408facd0..da432d40 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ concurrency: jobs: test_all: - name: Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ NFT_MAX_SHARDS }} (${{ matrix.NXF_VER }}) + name: Run ${{ matrix.filter }} tests | shard ${{ matrix.shard }} (${{ matrix.NXF_VER }}) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-cmgg/germline') }}" runs-on: ubuntu-latest @@ -56,7 +56,7 @@ jobs: run: | conda install -c bioconda nf-test - - name: "Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ NFT_MAX_SHARDS }}" + - name: "Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }}" run: | $CONDA/bin/nf-test test \ --ci \ From f4654f05051b83d0e4cdf5a29b215d2cdc751709 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 14:00:02 +0100 Subject: [PATCH 20/26] module -> process in ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da432d40..f71c380a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: - "24.04.2" - "latest-everything" filter: - - "module" + - "process" - "workflow" - "pipeline" shard: [1,2,3,4,5] From aa015fcc97a8cecf45bac61b55d8f209c1159a05 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 14:06:14 +0100 Subject: [PATCH 21/26] update fetch depth in ci --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f71c380a..53b13318 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,6 +43,8 @@ jobs: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + fetch-depth: 0 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 From c444fa50042406683193528e9fee69b331321939 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 4 Nov 2024 15:59:48 +0100 Subject: [PATCH 22/26] add test for bam_call_elprep --- .../local/bam_call_elprep/main.nf.test | 107 ++++++++++++++++++ .../local/bam_call_elprep/main.nf.test.snap | 56 +++++++++ 2 files changed, 163 insertions(+) create mode 100644 tests/subworkflows/local/bam_call_elprep/main.nf.test create mode 100644 tests/subworkflows/local/bam_call_elprep/main.nf.test.snap diff --git a/tests/subworkflows/local/bam_call_elprep/main.nf.test b/tests/subworkflows/local/bam_call_elprep/main.nf.test new file mode 100644 index 00000000..2f639e35 --- /dev/null +++ b/tests/subworkflows/local/bam_call_elprep/main.nf.test @@ -0,0 +1,107 @@ +nextflow_workflow { + + name "Test Workflow BAM_CALL_ELPREP" + script "subworkflows/local/bam_call_elprep/main.nf" + workflow "BAM_CALL_ELPREP" + + tag "subworkflows" + tag "subworkflows_local" + tag "bam_call_elprep" + tag "vcf_dbsnp_vcfanno" + + test("bam_call_elprep - default") { + + + when { + params { + callers = "elprep" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.elfasta, checkIfExists:true) + ]) + input[2] = [[],[]] + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.gvcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } + + test("bam_call_elprep - dbsnp") { + + + when { + params { + callers = "elprep" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.elfasta, checkIfExists:true) + ]) + input[2] = [[],[]] + input[3] = [[id:'dbsnp'], file(params.vcf1, checkIfExists:true)] + input[4] = [[id:'dbsnp'], file(params.tbi1, checkIfExists:true)] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.gvcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } +} diff --git a/tests/subworkflows/local/bam_call_elprep/main.nf.test.snap b/tests/subworkflows/local/bam_call_elprep/main.nf.test.snap new file mode 100644 index 00000000..ae4dbbbd --- /dev/null +++ b/tests/subworkflows/local/bam_call_elprep/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "bam_call_elprep - dbsnp": { + "content": [ + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "elprep" + }, + "NA24143.elprep.g.vcf.gz,variantsMD5:974ed65cfad6264db7c6589d6b7d7d74", + "NA24143.elprep.g.vcf.gz.tbi" + ] + ], + [ + [ + "NA24143.elprep.bcftools_stats.txt:md5,36b9f979c03b24d87e2dc710baf3672b" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-04T15:56:30.054330853" + }, + "bam_call_elprep - default": { + "content": [ + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "elprep" + }, + "NA24143.elprep.g.vcf.gz,variantsMD5:974ed65cfad6264db7c6589d6b7d7d74", + "NA24143.elprep.g.vcf.gz.tbi" + ] + ], + [ + [ + "NA24143.elprep.bcftools_stats.txt:md5,36b9f979c03b24d87e2dc710baf3672b" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-04T15:55:56.561058236" + } +} \ No newline at end of file From acf18e86b3a8ad24006b44f2e5b75e52d74100f1 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 5 Nov 2024 09:54:43 +0100 Subject: [PATCH 23/26] fix some more tests --- .../local/bam_call_vardictjava/main.nf.test | 122 +---------------- .../bam_call_vardictjava/main.nf.test.snap | 63 +-------- .../local/cram_call_gatk4/main.nf.test | 129 ++++++++++++++++++ .../local/cram_call_gatk4/main.nf.test.snap | 36 +++++ .../{main.nf.test => main.nf.test.disabled} | 1 - 5 files changed, 170 insertions(+), 181 deletions(-) create mode 100644 tests/subworkflows/local/cram_call_gatk4/main.nf.test create mode 100644 tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap rename tests/subworkflows/local/cram_call_genotype_gatk4/{main.nf.test => main.nf.test.disabled} (99%) diff --git a/tests/subworkflows/local/bam_call_vardictjava/main.nf.test b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test index 6615e3ab..1b933dda 100644 --- a/tests/subworkflows/local/bam_call_vardictjava/main.nf.test +++ b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test @@ -8,65 +8,11 @@ nextflow_workflow { tag "subworkflows_local" tag "bam_call_vardictjava" tag "vcf_concat_bcftools" - tag "vcf_filter_bcftools" test("bam_call_vardictjava - default") { - - when { - params { - callers = "vardict" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.bam1, checkIfExists:true), - file(params.bai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.bam1, checkIfExists:true), - file(params.bai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.bam1, checkIfExists:true), - file(params.bai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[3] = [[],[]] - input[4] = [[],[]] - input[5] = false - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } - ).match("default") } - ) - } - - } - - test("bam_call_vardictjava - filter") { - - when { params { - filter = true callers = "vardict" } workflow { @@ -97,72 +43,6 @@ nextflow_workflow { ]) input[3] = [[],[]] input[4] = [[],[]] - input[5] = true - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } - ).match("filter") } - ) - } - - } - - test("bam_call_vardictjava - family") { - // The family should not be merged here - - when { - params { - callers = "vardict" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.bam1, checkIfExists:true), - file(params.bai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.bam1, checkIfExists:true), - file(params.bai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.bam1, checkIfExists:true), - file(params.bai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ],[ - [id:"NA24149.00001", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.bam2, checkIfExists:true), - file(params.bai2, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24149.00002", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.bam2, checkIfExists:true), - file(params.bai2, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24149.00003", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.bam2, checkIfExists:true), - file(params.bai2, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[3] = [[],[]] - input[4] = [[],[]] input[5] = false """ } @@ -173,7 +53,7 @@ nextflow_workflow { { assert workflow.success }, { assert snapshot( workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } - ).match("family") } + ).match() } ) } diff --git a/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap index 8ae2d467..c7011326 100644 --- a/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap +++ b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap @@ -1,5 +1,5 @@ { - "filter": { + "bam_call_vardictjava - default": { "content": [ [ [ @@ -16,64 +16,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-10-23T11:17:08.827619348" - }, - "default": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "vardict" - }, - "NA24143.vardict.vcf.gz,variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", - "NA24143.vardict.vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-23T11:16:29.818590197" - }, - "family": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "vardict" - }, - "NA24143.vardict.vcf.gz,variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", - "NA24143.vardict.vcf.gz.tbi" - ], - [ - { - "id": "NA24149", - "sample": "NA24149", - "family": "Ashkenazim", - "family_samples": "NA24149", - "caller": "vardict" - }, - "NA24149.vardict.vcf.gz,variantsMD5:37c24a3165a79df0b9744e80a9255f83", - "NA24149.vardict.vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-23T11:17:59.686967502" + "timestamp": "2024-11-05T09:44:33.098049827" } } \ No newline at end of file diff --git a/tests/subworkflows/local/cram_call_gatk4/main.nf.test b/tests/subworkflows/local/cram_call_gatk4/main.nf.test new file mode 100644 index 00000000..fa45e6a6 --- /dev/null +++ b/tests/subworkflows/local/cram_call_gatk4/main.nf.test @@ -0,0 +1,129 @@ +nextflow_workflow { + + name "Test Workflow CRAM_CALL_GATK4" + script "subworkflows/local/cram_call_gatk4/main.nf" + workflow "CRAM_CALL_GATK4" + + tag "subworkflows" + tag "subworkflows_local" + tag "cram_call_gatk4" + tag "vcf_concat_bcftools" + + test("cram_call_gatk4 - default") { + + + when { + params { + callers = "haplotypecaller" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = [[],[]] + input[7] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } + + test("cram_call_gatk4 - dragstr") { + + + when { + params { + callers = "haplotypecaller" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = Channel.value([ + [id:"str"], + file(params.strtablefile, checkIfExists:true) + ]) + input[5] = [[],[]] + input[6] = [[],[]] + input[7] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } + +} diff --git a/tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap b/tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap new file mode 100644 index 00000000..57de17e4 --- /dev/null +++ b/tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "cram_call_gatk4 - default": { + "content": [ + [ + + ], + [ + [ + "NA24143.haplotypecaller.bcftools_stats.txt:md5,09b4e7674e0f5b98b1e548df3002250e" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T09:53:12.737680049" + }, + "cram_call_gatk4 - dragstr": { + "content": [ + [ + + ], + [ + [ + "NA24143.haplotypecaller.bcftools_stats.txt:md5,c4dad5b8e05871dda66df42b1f6c89ff" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T09:54:07.696125828" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled similarity index 99% rename from tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test rename to tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled index 37eca413..f8c447d9 100644 --- a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test +++ b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled @@ -36,7 +36,6 @@ nextflow_workflow { file(params.crai1, checkIfExists:true), file(params.split3, checkIfExists:true) ]) - input[1] = Channel.empty() input[2] = Channel.value([ [id:"fasta"], file(params.fasta, checkIfExists:true) From 3faa5404a18c961a637c1b1dfa97a4fe32140efc Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 5 Nov 2024 14:48:29 +0100 Subject: [PATCH 24/26] more test fixes --- .github/workflows/ci.yml | 2 +- modules.json | 191 ++----- .../nf-core/elprep/filter/elprep-filter.diff | 47 +- modules/nf-core/elprep/filter/main.nf | 1 - .../elprep/filter/tests/main.nf.test.snap | 20 +- .../local/gvcf_joint_genotype_gatk4/main.nf | 4 +- subworkflows/local/vcf_dbsnp_vcfanno/main.nf | 8 +- .../main.nf.test.disabled | 494 ------------------ .../main.nf.test.snap | 164 ------ .../gvcf_joint_genotype_gatk4/main.nf.test | 190 +++++++ .../main.nf.test.snap | 87 +++ .../local/vcf_dbsnp_vcfanno/main.nf.test | 47 ++ .../local/vcf_dbsnp_vcfanno/main.nf.test.snap | 23 + 13 files changed, 449 insertions(+), 829 deletions(-) delete mode 100644 tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled delete mode 100644 tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap create mode 100644 tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test create mode 100644 tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap create mode 100644 tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test create mode 100644 tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 53b13318..d985e187 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: - "process" - "workflow" - "pipeline" - shard: [1,2,3,4,5] + shard: [1, 2, 3, 4, 5] steps: - name: Free some space run: | diff --git a/modules.json b/modules.json index 70af501e..92877e49 100644 --- a/modules.json +++ b/modules.json @@ -8,310 +8,223 @@ "bcftools/annotate": { "branch": "master", "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "d1e0ec7670fa77905a378627232566ce54c3c26d", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "bcftools/filter": { "branch": "master", "git_sha": "f85dbddd7a335fc0f5ac331e8d22ca94123b654b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/pluginscatter": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "bcftools/query": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/reheader": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/sort": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "bcftools/stats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/merge": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/split": { "branch": "master", "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "elprep/fastatoelfasta": { "branch": "master", "git_sha": "74ac5351a11a184171489dee73652e8b69ba9d22", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "elprep/filter": { "branch": "master", "git_sha": "909c4dcdbb1e751214e2bb155e8c0a59633ed12a", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/elprep/filter/elprep-filter.diff" }, "ensemblvep/download": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/ensemblvep/download/ensemblvep-download.diff" }, "ensemblvep/vep": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ], + "installed_by": ["vcf_annotate_ensemblvep_snpeff"], "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff" }, "gatk4/calibratedragstrmodel": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/composestrtablefile": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/genotypegvcfs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/haplotypecaller": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gawk": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mosdepth": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rtgtools/format": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rtgtools/pedfilter": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/rtgtools/pedfilter/rtgtools-pedfilter.diff" }, "rtgtools/rocplot": { "branch": "master", "git_sha": "83e2df1e4ec594beb8a575b4db0b4197900f4ebd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rtgtools/vcfeval": { "branch": "master", "git_sha": "83e2df1e4ec594beb8a575b4db0b4197900f4ebd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/convert": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "snpeff/snpeff": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "somalier/extract": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "somalier/relate": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/somalier/relate/somalier-relate.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules", - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["modules", "vcf_annotate_ensemblvep_snpeff"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "vardictjava": { "branch": "master", "git_sha": "f85452fcbebab5dfd77c0752236f6f86e9a03b32", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "vcf2db": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "vcfanno": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -320,33 +233,25 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_annotate_ensemblvep_snpeff": { "branch": "master", "git_sha": "1b2fdf082b2ea7976b112e149a474d816094724c", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/nf-core/elprep/filter/elprep-filter.diff b/modules/nf-core/elprep/filter/elprep-filter.diff index 47bb8da9..3a357774 100644 --- a/modules/nf-core/elprep/filter/elprep-filter.diff +++ b/modules/nf-core/elprep/filter/elprep-filter.diff @@ -19,10 +19,55 @@ Changes in 'elprep/filter/main.nf': ${reference_sequences_cmd} \\ ${filter_regions_cmd} \\ ${markdup_cmd} \\ +@@ -106,7 +105,6 @@ + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ +- touch ${prefix}.${suffix} + touch elprep-${timestamp}.log + ${markdup_cmd} + ${bqsr_cmd} 'modules/nf-core/elprep/filter/environment.yml' is unchanged 'modules/nf-core/elprep/filter/meta.yml' is unchanged 'modules/nf-core/elprep/filter/tests/main.nf.test' is unchanged -'modules/nf-core/elprep/filter/tests/main.nf.test.snap' is unchanged +Changes in 'elprep/filter/tests/main.nf.test.snap': +--- modules/nf-core/elprep/filter/tests/main.nf.test.snap ++++ modules/nf-core/elprep/filter/tests/main.nf.test.snap +@@ -1,15 +1,7 @@ + { + "test-elprep-filter": { + "content": [ +- [ +- [ +- { +- "id": "test", +- "single_end": false +- }, +- "test.bam,readsMD5:463ac3b905fbf4ddf113a94dbfa8d69f" +- ] +- ], ++ null, + [ + + ], +@@ -64,15 +56,7 @@ + }, + "test-elprep-filter-stub": { + "content": [ +- [ +- [ +- { +- "id": "test", +- "single_end": false +- }, +- "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" +- ] +- ], ++ null, + [ + + ], + 'modules/nf-core/elprep/filter/tests/nextflow.config' is unchanged ************************************************************ diff --git a/modules/nf-core/elprep/filter/main.nf b/modules/nf-core/elprep/filter/main.nf index 231223ec..df445339 100644 --- a/modules/nf-core/elprep/filter/main.nf +++ b/modules/nf-core/elprep/filter/main.nf @@ -105,7 +105,6 @@ process ELPREP_FILTER { if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ - touch ${prefix}.${suffix} touch elprep-${timestamp}.log ${markdup_cmd} ${bqsr_cmd} diff --git a/modules/nf-core/elprep/filter/tests/main.nf.test.snap b/modules/nf-core/elprep/filter/tests/main.nf.test.snap index 9112fe30..a95cae7f 100644 --- a/modules/nf-core/elprep/filter/tests/main.nf.test.snap +++ b/modules/nf-core/elprep/filter/tests/main.nf.test.snap @@ -1,15 +1,7 @@ { "test-elprep-filter": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam,readsMD5:463ac3b905fbf4ddf113a94dbfa8d69f" - ] - ], + null, [ ], @@ -64,15 +56,7 @@ }, "test-elprep-filter-stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], + null, [ ], diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf index b3f761f4..4a7ff91f 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf @@ -16,7 +16,7 @@ include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' workflow GVCF_JOINT_GENOTYPE_GATK4 { take: - ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => The GVCFs called with HaplotypeCaller + ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => The GVCFs ch_fasta // channel: [mandatory] [ path(fasta) ] => fasta reference ch_fai // channel: [mandatory] [ path(fai) ] => fasta reference index ch_dict // channel: [mandatory] [ path(dict) ] => sequence dictionary @@ -138,7 +138,7 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { } emit: - vcfs = ch_vcfs // [ val(meta), path(vcf) ] + vcfs = ch_vcfs // [ val(meta), path(vcf), path(tbi) ] versions = ch_versions // [ path(versions) ] } diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf index 298dc0af..718b3067 100644 --- a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf +++ b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf @@ -4,8 +4,8 @@ include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/m workflow VCF_DBSNP_VCFANNO { take: ch_input // channel: [mandatory] [ val(meta), path(vcf), path(tbi), ] => VCF files to be annotated - ch_dbsnp // channel: [optional] [ path(vcf) ] => the dbnsp vcf file - ch_dbsnp_tbi // channel: [optional] [ path(tbi) ] => the dbsnp vcf index file + ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ] => the dbnsp vcf file + ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] => the dbsnp vcf index file main: def ch_versions = Channel.empty() @@ -30,10 +30,8 @@ workflow VCF_DBSNP_VCFANNO { ) ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - def ch_vcfs = TABIX_BGZIPTABIX.out.gz_tbi - emit: - vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] + vcfs = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), path(vcf), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled deleted file mode 100644 index f8c447d9..00000000 --- a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.disabled +++ /dev/null @@ -1,494 +0,0 @@ -nextflow_workflow { - - name "Test Workflow CRAM_CALL_GENOTYPE_GATK4" - script "subworkflows/local/cram_call_genotype_gatk4/main.nf" - workflow "CRAM_CALL_GENOTYPE_GATK4" - - tag "subworkflows" - tag "subworkflows_local" - tag "cram_call_genotype_gatk4" - tag "cram_call_gatk4" // This is also tested here - tag "gvcf_joint_genotype_gatk4" // This is also tested here - tag "vcf_filter_bcftools" // This is also tested here - tag "vcf_concat_bcftools" // This is also tested here - - test("cram_call_genotype_gatk4 - default - crams") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("default - crams") } - ) - } - - } - - test("cram_call_genotype_gatk4 - default - gvcfs") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.empty() - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("default - gvcfs") } - ) - } - - } - - test("cram_call_genotype_gatk4 - default - family") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("default - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - filter - family") { - - when { - params { - callers = "haplotypecaller" - filter = true - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = true - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("filter - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - only_call - family") { - - when { - params { - callers = "haplotypecaller" - only_call = true - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = true - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("only_call - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - only_merge - family") { - - when { - params { - callers = "haplotypecaller" - only_merge = true - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = true - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("only_merge - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - default - sample + family") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"NA24149", family_samples:"NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("only_merge - sample + family") } - ) - } - - } - -} diff --git a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap deleted file mode 100644 index a0aec425..00000000 --- a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap +++ /dev/null @@ -1,164 +0,0 @@ -{ - "only_merge - family": { - "content": [ - [ - - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T11:20:45.005084818" - }, - "default - family": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143,NA24149,NA24385", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:35:02.44674969" - }, - "filter - family": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143,NA24149,NA24385", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:35:46.768542501" - }, - "default - gvcfs": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:34:17.846266913" - }, - "default - crams": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24143.haplotypecaller.bcftools_stats.txt:md5,09b4e7674e0f5b98b1e548df3002250e" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:33:52.045772718" - }, - "only_call - family": { - "content": [ - [ - - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T11:20:04.714403906" - }, - "only_merge - sample + family": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-06T14:26:29.249708339" - } -} \ No newline at end of file diff --git a/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test new file mode 100644 index 00000000..36de5cf1 --- /dev/null +++ b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test @@ -0,0 +1,190 @@ +nextflow_workflow { + + name "Test Workflow GVCF_JOINT_GENOTYPE_GATK4" + script "subworkflows/local/gvcf_joint_genotype_gatk4/main.nf" + workflow "GVCF_JOINT_GENOTYPE_GATK4" + + tag "subworkflows" + tag "subworkflows_local" + tag "gvcf_joint_genotype_gatk4" + tag "vcf_concat_bcftools" + tag "input_split_bedtools" + + test("gvcf_joint_genotype_gatk4 - single_sample") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = false + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } + + test("gvcf_joint_genotype_gatk4 - family") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ],[ + [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf2, checkIfExists:true), + file(params.gtbi2, checkIfExists:true) + ] + ) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = false + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } + + test("gvcf_joint_genotype_gatk4 - only_merge") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = true + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs + ).match() } + ) + } + + } + + test("gvcf_joint_genotype_gatk4 - single_sample + family") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ],[ + [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf2, checkIfExists:true), + file(params.gtbi2, checkIfExists:true) + ],[ + [id:"NA24385", sample:"NA24385", family:"NA24385", family_samples:"NA24385", caller:"haplotypecaller"], + file(params.gvcf3, checkIfExists:true), + file(params.gtbi3, checkIfExists:true) + ] + ) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = false + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } +} diff --git a/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap new file mode 100644 index 00000000..b02a7a06 --- /dev/null +++ b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap @@ -0,0 +1,87 @@ +{ + "gvcf_joint_genotype_gatk4 - single_sample + family": { + "content": [ + [ + [ + { + "family": "Ashkenazim", + "family_samples": "NA24143,NA24149", + "caller": "haplotypecaller", + "id": "Ashkenazim" + }, + "Ashkenazim.haplotypecaller.vcf.gz,variantsMD5:4dea305eb71decb122709e75af9c833f", + "Ashkenazim.haplotypecaller.vcf.gz.tbi" + ], + [ + { + "family": "NA24385", + "family_samples": "NA24385", + "caller": "haplotypecaller", + "id": "NA24385" + }, + "NA24385.haplotypecaller.vcf.gz,variantsMD5:4ffd515511f59e3561e3fb1b046d7675", + "NA24385.haplotypecaller.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T13:05:41.216399607" + }, + "gvcf_joint_genotype_gatk4 - single_sample": { + "content": [ + [ + [ + { + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "haplotypecaller", + "id": "Ashkenazim" + }, + "Ashkenazim.haplotypecaller.vcf.gz,variantsMD5:4c6db9171912bcbbaefeec2a24968a", + "Ashkenazim.haplotypecaller.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T13:03:11.507733028" + }, + "gvcf_joint_genotype_gatk4 - only_merge": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T11:43:25.386070284" + }, + "gvcf_joint_genotype_gatk4 - family": { + "content": [ + [ + [ + { + "family": "Ashkenazim", + "family_samples": "NA24143,NA24149", + "caller": "haplotypecaller", + "id": "Ashkenazim" + }, + "Ashkenazim.haplotypecaller.vcf.gz,variantsMD5:4dea305eb71decb122709e75af9c833f", + "Ashkenazim.haplotypecaller.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T13:03:57.301900285" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test new file mode 100644 index 00000000..75443075 --- /dev/null +++ b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test @@ -0,0 +1,47 @@ +nextflow_workflow { + + name "Test Workflow VCF_DBSNP_VCFANNO" + script "subworkflows/local/vcf_dbsnp_vcfanno/main.nf" + workflow "VCF_DBSNP_VCFANNO" + + tag "subworkflows" + tag "subworkflows_local" + tag "vcf_dbsnp_vcfanno" + + test("vcf_dbsnp_vcfanno - default") { + + when { + params { + annotate = true + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", family:"NA24143", family_samples:"NA24143", caller:"haplotypecaller"], + file(params.vcf1, checkIfExists:true), + file(params.tbi1, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"dbnsp"], + file(params.vcf2, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"dbnsp"], + file(params.tbi2, checkIfExists:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [ it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name ] } + ).match() } + ) + } + + } + +} diff --git a/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap new file mode 100644 index 00000000..43ea8318 --- /dev/null +++ b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "vcf_dbsnp_vcfanno - default": { + "content": [ + [ + [ + { + "id": "NA24143", + "family": "NA24143", + "family_samples": "NA24143", + "caller": "haplotypecaller" + }, + "NA24143.vcf.gz,variantsMD5:b4f76bc67ba0e159489393d4788349b3", + "NA24143.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T14:44:22.532988474" + } +} \ No newline at end of file From 76a86a2a293067eb84138c7e2f61b46e527cc7b2 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 5 Nov 2024 15:17:48 +0100 Subject: [PATCH 25/26] fix elprep/filter test --- .../nf-core/elprep/filter/elprep-filter.diff | 22 +++++++++++++------ .../elprep/filter/tests/main.nf.test.snap | 10 +++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/modules/nf-core/elprep/filter/elprep-filter.diff b/modules/nf-core/elprep/filter/elprep-filter.diff index 3a357774..c877a871 100644 --- a/modules/nf-core/elprep/filter/elprep-filter.diff +++ b/modules/nf-core/elprep/filter/elprep-filter.diff @@ -34,11 +34,10 @@ Changes in 'elprep/filter/main.nf': Changes in 'elprep/filter/tests/main.nf.test.snap': --- modules/nf-core/elprep/filter/tests/main.nf.test.snap +++ modules/nf-core/elprep/filter/tests/main.nf.test.snap -@@ -1,15 +1,7 @@ - { +@@ -2,13 +2,7 @@ "test-elprep-filter": { "content": [ -- [ + [ - [ - { - "id": "test", @@ -46,12 +45,21 @@ Changes in 'elprep/filter/tests/main.nf.test.snap': - }, - "test.bam,readsMD5:463ac3b905fbf4ddf113a94dbfa8d69f" - ] -- ], -+ null, ++ + ], [ - ], -@@ -64,15 +56,7 @@ +@@ -57,22 +51,14 @@ + ] + ], + "meta": { +- "nf-test": "0.9.0", +- "nextflow": "24.04.4" ++ "nf-test": "0.9.1", ++ "nextflow": "24.10.0" + }, +- "timestamp": "2024-10-22T11:05:45.927224502" ++ "timestamp": "2024-11-05T15:16:40.979143203" }, "test-elprep-filter-stub": { "content": [ diff --git a/modules/nf-core/elprep/filter/tests/main.nf.test.snap b/modules/nf-core/elprep/filter/tests/main.nf.test.snap index a95cae7f..d4848abc 100644 --- a/modules/nf-core/elprep/filter/tests/main.nf.test.snap +++ b/modules/nf-core/elprep/filter/tests/main.nf.test.snap @@ -1,7 +1,9 @@ { "test-elprep-filter": { "content": [ - null, + [ + + ], [ ], @@ -49,10 +51,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-10-22T11:05:45.927224502" + "timestamp": "2024-11-05T15:16:40.979143203" }, "test-elprep-filter-stub": { "content": [ From 6eaf981c6e2dc8345bcddc76fbfa3334edbe3419 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 5 Nov 2024 15:41:31 +0100 Subject: [PATCH 26/26] fix linting + reimplement only_call and only_merge --- conf/modules.config | 4 ++-- nextflow_schema.json | 18 ++++++++++++++++++ subworkflows/local/bam_call_elprep/main.nf | 2 +- .../main.nf.test | 6 +++--- workflows/germline.nf | 12 ++++++++---- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2faaa75e..cb5fe2ab 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -367,7 +367,7 @@ process { withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_1\$" { ext.prefix = { "${meta.id}.filtered1" } ext.args = { - meta.caller == "vardict" ? + meta.caller == "vardict" ? "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z": meta.caller == "haplotypecaller" ? "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": @@ -378,7 +378,7 @@ process { } withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_2\$" { - ext.args = { + ext.args = { meta.caller == "vardict" ? "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" : meta.caller == "haplotypecaller" ? diff --git a/nextflow_schema.json b/nextflow_schema.json index 53129997..79d18c26 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -104,6 +104,24 @@ "format": "path", "fa_icon": "fas fa-folder" }, + "elfasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.elfasta$", + "description": "Path to the ELFASTA genome file. This is used when `elprep` is part of the callers and will be automatically generated when missing.", + "fa_icon": "far fa-file-code" + }, + "elsites": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.elsites$", + "description": "Path to the elsites file. This is used when `elprep` is part of the callers.", + "fa_icon": "far fa-file-code" + }, "genomes_base": { "type": "string", "default": "/references/", diff --git a/subworkflows/local/bam_call_elprep/main.nf b/subworkflows/local/bam_call_elprep/main.nf index 22f4939d..6a010326 100644 --- a/subworkflows/local/bam_call_elprep/main.nf +++ b/subworkflows/local/bam_call_elprep/main.nf @@ -35,7 +35,7 @@ workflow BAM_CALL_ELPREP { false ) ch_versions = ch_versions.mix(ELPREP_FILTER.out.versions.first()) - + VCF_CONCAT_BCFTOOLS( ELPREP_FILTER.out.gvcf, true diff --git a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test index 5ca5ffb2..511ca4d3 100644 --- a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test +++ b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test @@ -46,7 +46,7 @@ nextflow_workflow { }, workflow.out.ready_bams.collect { [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] - }, + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WGS") } @@ -93,7 +93,7 @@ nextflow_workflow { }, workflow.out.ready_bams.collect { [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] - }, + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WES common ROI") } @@ -140,7 +140,7 @@ nextflow_workflow { }, workflow.out.ready_bams.collect { [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] - }, + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WES") } diff --git a/workflows/germline.nf b/workflows/germline.nf index 10166be9..1946490a 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -384,7 +384,7 @@ workflow GERMLINE { // // Run sample preparation // - + def create_bam_files = callers.intersect(GlobalVariables.bamCallers).size() > 0 // Only create BAM files when needed CRAM_PREPARE_SAMTOOLS_BEDTOOLS( ch_input.cram.filter { meta, _cram, _crai -> @@ -491,10 +491,11 @@ workflow GERMLINE { ch_calls = ch_calls.mix(BAM_CALL_VARDICTJAVA.out.vcfs) } - // TODO reimplement --only_call and --only_merge + // Stop pipeline execution when only calls should happen + def ch_gvcfs_final = ch_gvcfs_ready.filter { !only_call } GVCF_JOINT_GENOTYPE_GATK4( - ch_gvcfs_ready, + ch_gvcfs_final, ch_fasta_ready, ch_fai_ready, ch_dict_ready, @@ -506,7 +507,10 @@ workflow GERMLINE { ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions) ch_calls = ch_calls.mix(GVCF_JOINT_GENOTYPE_GATK4.out.vcfs) - def ch_called_variants = ch_calls + // Stop pipeline execution when only the merge should happen + def ch_calls_final = ch_calls.filter { !only_merge } + + def ch_called_variants = ch_calls_final .map { meta, vcf, tbi -> def new_meta = meta - meta.subMap(["type", "vardict_min_af"]) [ new_meta, vcf, tbi ]