diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e2be3da..d985e187 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,7 @@ on: env: NXF_ANSI_LOG: false + NFT_MAX_SHARDS: 5 concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -18,7 +19,7 @@ concurrency: jobs: test_all: - name: Run nf-test with ${{ matrix.test }}-${{ matrix.NXF_VER }} + name: Run ${{ matrix.filter }} tests | shard ${{ matrix.shard }} (${{ matrix.NXF_VER }}) # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-cmgg/germline') }}" runs-on: ubuntu-latest @@ -27,21 +28,11 @@ jobs: NXF_VER: - "24.04.2" - "latest-everything" - test: - - "pipeline_default" - - "pipeline_callers" - - "pipeline_variations" - - "pipeline_variations2" - - "pipeline_gvcfs" - - "cram_call_genotype_gatk4" - - "cram_call_vardictjava" - - "cram_prepare_samtools_bedtools" - - "input_split_bedtools" - - "vcf_annotation" - - "vcf_extract_relate_somalier" - - "vcf_ped_rtgtools" - - "vcf_upd_updio" - - "vcf_validate_small_variants" + filter: + - "process" + - "workflow" + - "pipeline" + shard: [1, 2, 3, 4, 5] steps: - name: Free some space run: | @@ -52,6 +43,8 @@ jobs: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + fetch-depth: 0 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 @@ -65,9 +58,14 @@ jobs: run: | conda install -c bioconda nf-test - - name: Run pipeline with test data + - name: "Run ${{ matrix.filter }} tests | ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }}" run: | - $CONDA/bin/nf-test test --tag ${{ matrix.test }} --junitxml=default.xml + $CONDA/bin/nf-test test \ + --ci \ + --changed-since HEAD^ \ + --shard ${{ matrix.shard }}/${{ env.NFT_MAX_SHARDS }} \ + --filter ${{ matrix.filter }} \ + --junitxml=default.xml - name: Publish Test Report uses: mikepenz/action-junit-report@v3 diff --git a/conf/modules.config b/conf/modules.config index e8406ec1..cb5fe2ab 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -124,15 +124,15 @@ process { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GATK4_HAPLOTYPCECALLER + GATK4 HAPLOTYPCECALLER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:CRAM_CALL_GATK4:GATK4_CALIBRATEDRAGSTRMODEL\$" { + withName: "^.*CRAM_CALL_GATK4:GATK4_CALIBRATEDRAGSTRMODEL\$" { ext.args = "--parallel" } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:CRAM_CALL_GATK4:GATK4_HAPLOTYPECALLER\$" { + withName: "^.*CRAM_CALL_GATK4:GATK4_HAPLOTYPECALLER\$" { time = { 16.h * task.attempt } ext.prefix = {"${meta.id}.g"} ext.args = { @@ -169,7 +169,7 @@ process { ext.args = '' } - withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS_SINGLE\$" { + withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS\$" { publishDir = [ overwrite: true, enabled: true, @@ -180,12 +180,63 @@ process { ext.prefix = final_prefix } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:BCFTOOLS_QUERY\$" { + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ELPREP + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + withName: "^.*BAM_CALL_ELPREP:ELPREP_FILTER\$" { + cpus = { 25 * task.attempt } + memory = { 250.GB * task.attempt } + ext.args = "--reference-confidence GVCF" + } + + withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { + publishDir = [ + overwrite: true, + enabled: true, + mode: params.publish_dir_mode, + path: individual_output, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] // SAVE + ext.prefix = { "${meta.id}.${meta.caller}.g" } + ext.args = '--allow-overlaps --output-type z' + } + + withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:TABIX_TABIX\$" { + publishDir = [ + overwrite: true, + enabled: true, + path: individual_output, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] // SAVE + } + + withName: "^.*BAM_CALL_ELPREP:BCFTOOLS_STATS\$" { + publishDir = [ + overwrite: true, + enabled: true, + path: individual_reports, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] // SAVE + ext.prefix = final_prefix + } + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GVCF JOINT GENOTYPING + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:BCFTOOLS_QUERY\$" { ext.args = "--exclude 'QUAL=\".\"' --format '%CHROM\t%POS0\t%END\\n'" ext.suffix = "bed" } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:MERGE_BEDS\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:MERGE_BEDS\$" { ext.args = "-d ${params.merge_distance}" publishDir = [ enabled: true, @@ -196,12 +247,12 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:GAWK\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:GAWK\$" { ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' ext.suffix = "bed" } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOMICSDBIMPORT\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOMICSDBIMPORT\$" { label = { meta.family_samples.tokenize(",").size() <= 10 ? "process_medium" : "process_high" } time = { 16.h * task.attempt } // Lots of parameters are fetched from https://gatk.broadinstitute.org/hc/en-us/articles/360056138571-GenomicsDBImport-usage-and-performance-guidelines @@ -228,7 +279,7 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOTYPEGVCFS\$" { + withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:GATK4_GENOTYPEGVCFS\$" { time = { 16.h * task.attempt } ext.args = { [ @@ -251,30 +302,13 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:VCF_FILTER_BCFTOOLS:FILTER_1\$" { - ext.prefix = { "${meta.id}_filtered_snps" } - ext.args = {"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'"} - } - - withName: "^.*CRAM_CALL_GENOTYPE_GATK4:VCF_FILTER_BCFTOOLS:FILTER_2\$" { - ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} - ext.args = {'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\''} - publishDir = [ - enabled: enableOutput("filter"), - overwrite: true, - path: final_output, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] // SAVE - } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VARDICTJAVA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: "^.*CRAM_CALL_VARDICTJAVA:VARDICTJAVA\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:VARDICTJAVA\$" { time = { 16.h * task.attempt } ext.prefix = {"${meta.id}"} ext.args = { @@ -291,7 +325,7 @@ process { } } - withName: "^.*CRAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { ext.args = '--allow-overlaps --output-type z' ext.prefix = enableOutput("original") ? final_prefix : {"${meta.id}.concat"} publishDir = [ @@ -303,7 +337,7 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" { + withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" { ext.prefix = enableOutput("original") ? final_prefix : {"${meta.id}.vcfanno"} publishDir = [ overwrite: true, @@ -314,27 +348,50 @@ process { ] // SAVE } - withName: "^.*CRAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_1\$" { - ext.prefix = { "${meta.id}.filtered1" } - ext.args = "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z" - } - - withName: "^.*CRAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_2\$" { - ext.args = "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" - ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} + withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_TABIX\$" { publishDir = [ overwrite: true, - enabled: enableOutput("filter"), + enabled: enableOutput("filter") || enableOutput("original"), mode: params.publish_dir_mode, path: final_output, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] // SAVE } - withName: "^.*CRAM_CALL_VARDICTJAVA:TABIX_TABIX\$" { + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FILTER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_1\$" { + ext.prefix = { "${meta.id}.filtered1" } + ext.args = { + meta.caller == "vardict" ? + "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z": + meta.caller == "haplotypecaller" ? + "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": + meta.caller == "elprep" ? + "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": + "" + } + } + + withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_2\$" { + ext.args = { + meta.caller == "vardict" ? + "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" : + meta.caller == "haplotypecaller" ? + '--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : + meta.caller == "elprep" ? + '--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : + "" + + } + ext.prefix = enableOutput("filter") ? final_prefix : {"${meta.id}.filtered"} publishDir = [ overwrite: true, - enabled: enableOutput("filter") || enableOutput("original"), + enabled: enableOutput("filter"), mode: params.publish_dir_mode, path: final_output, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/conf/test.config b/conf/test.config index 12dee471..c259bd53 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,6 +34,7 @@ params { fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" fai = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta.fai" dict = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.dict" + elfasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.elfasta" sdf = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz" strtablefile = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.strtable.zip" diff --git a/lib/GlobalVariables.groovy b/lib/GlobalVariables.groovy index 72d5754b..9d868d23 100644 --- a/lib/GlobalVariables.groovy +++ b/lib/GlobalVariables.groovy @@ -4,9 +4,11 @@ import java.nio.file.Path class GlobalVariables { // The available callers - public static List availableCallers = ["haplotypecaller", "vardict"] + public static List availableCallers = ["haplotypecaller", "vardict", "elprep"] - public static List gvcfCallers = ["haplotypecaller"] + public static List gvcfCallers = ["haplotypecaller", "elprep"] + + public static List bamCallers = ["elprep", "vardict"] public static Map pedFiles = [:] diff --git a/main.nf b/main.nf index 2e82e0c1..c164385f 100644 --- a/main.nf +++ b/main.nf @@ -19,6 +19,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_cmgg_germline_pi params.fasta = getGenomeAttribute('fasta', params.genomes, params.genome) params.fai = getGenomeAttribute('fai', params.genomes, params.genome) params.dict = getGenomeAttribute('dict', params.genomes, params.genome) +params.elfasta = getGenomeAttribute('elfasta', params.genomes, params.genome) params.strtablefile = getGenomeAttribute('strtablefile', params.genomes, params.genome) params.sdf = getGenomeAttribute('sdf', params.genomes, params.genome) params.dbsnp = getGenomeAttribute('dbsnp', params.genomes, params.genome) @@ -73,6 +74,7 @@ workflow NFCMGG_GERMLINE { pipeline_params.fasta, pipeline_params.fai, pipeline_params.dict, + pipeline_params.elfasta, pipeline_params.strtablefile, pipeline_params.sdf, pipeline_params.dbsnp, @@ -103,6 +105,7 @@ workflow NFCMGG_GERMLINE { pipeline_params.automap_panel, pipeline_params.outdir, GlobalVariables.pedFiles, + pipeline_params.elsites, // Boolean inputs pipeline_params.dragstr, diff --git a/modules.json b/modules.json index fd1f2450..92877e49 100644 --- a/modules.json +++ b/modules.json @@ -66,6 +66,17 @@ "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", "installed_by": ["modules"] }, + "elprep/fastatoelfasta": { + "branch": "master", + "git_sha": "74ac5351a11a184171489dee73652e8b69ba9d22", + "installed_by": ["modules"] + }, + "elprep/filter": { + "branch": "master", + "git_sha": "909c4dcdbb1e751214e2bb155e8c0a59633ed12a", + "installed_by": ["modules"], + "patch": "modules/nf-core/elprep/filter/elprep-filter.diff" + }, "ensemblvep/download": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -183,7 +194,7 @@ "tabix/bgzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "tabix/bgziptabix": { "branch": "master", diff --git a/modules/nf-core/elprep/fastatoelfasta/environment.yml b/modules/nf-core/elprep/fastatoelfasta/environment.yml new file mode 100644 index 00000000..6ab3f8fc --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::elprep=5.1.3" diff --git a/modules/nf-core/elprep/fastatoelfasta/main.nf b/modules/nf-core/elprep/fastatoelfasta/main.nf new file mode 100644 index 00000000..861350bf --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/main.nf @@ -0,0 +1,50 @@ +process ELPREP_FASTATOELFASTA { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/elprep:5.1.3--he881be0_1': + 'biocontainers/elprep:5.1.3--he881be0_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.elfasta") , emit: elfasta + tuple val(meta), path("logs/elprep/elprep*"), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + elprep fasta-to-elfasta \\ + $fasta \\ + ${prefix}.elfasta \\ + --log-path ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def timestamp = "${java.time.OffsetDateTime.now().format(java.time.format.DateTimeFormatter.ISO_DATE_TIME)}" + + """ + mkdir -p logs/elprep + + touch ${prefix}.elfasta + touch logs/elprep/elprep-${timestamp}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/elprep/fastatoelfasta/meta.yml b/modules/nf-core/elprep/fastatoelfasta/meta.yml new file mode 100644 index 00000000..41a8be31 --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "elprep_fastatoelfasta" +description: Convert a file in FASTA format to the ELFASTA format +keywords: + - fasta + - elfasta + - elprep +tools: + - "elprep": + description: "elPrep is a high-performance tool for preparing .sam/.bam files + for variant calling in sequencing pipelines. It can be used as a drop-in replacement + for SAMtools/Picard/GATK4." + homepage: "https://github.com/ExaScience/elprep" + documentation: "https://github.com/ExaScience/elprep" + tool_dev_url: "https://github.com/ExaScience/elprep" + doi: "10.1371/journal.pone.0244471" + licence: ["AGPL v3"] + identifier: biotools:elprep + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fasta: + type: file + description: FASTA file + pattern: "*.{fasta,fa,fna}" +output: + - elfasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.elfasta" + - "*.elfasta": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.elfasta" + - log: + - meta: {} + - logs/elprep/elprep*: {} + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test new file mode 100644 index 00000000..d22f6d9d --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process ELPREP_FASTATOELFASTA" + script "../main.nf" + process "ELPREP_FASTATOELFASTA" + + tag "modules" + tag "modules_nfcore" + tag "elprep" + tag "elprep/fastatoelfasta" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.elfasta, + process.out.log.collect { [it[0], file(it[1]).exists()] }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.elfasta, + process.out.log.collect { [it[0], file(it[1]).exists()] }, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap new file mode 100644 index 00000000..799bb0fb --- /dev/null +++ b/modules/nf-core/elprep/fastatoelfasta/tests/main.nf.test.snap @@ -0,0 +1,62 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.elfasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + true + ] + ], + [ + "versions.yml:md5,bf313ed1289a8969464c5593b0ff67be" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T14:29:50.861439255" + }, + "sarscov2 - fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.elfasta:md5,09a6f76bed84ee211ef0d962e26c77f1" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + true + ] + ], + [ + "versions.yml:md5,bf313ed1289a8969464c5593b0ff67be" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T14:25:24.238816922" + } +} \ No newline at end of file diff --git a/modules/nf-core/elprep/filter/elprep-filter.diff b/modules/nf-core/elprep/filter/elprep-filter.diff new file mode 100644 index 00000000..c877a871 --- /dev/null +++ b/modules/nf-core/elprep/filter/elprep-filter.diff @@ -0,0 +1,81 @@ +Changes in module 'nf-core/elprep/filter' +Changes in 'elprep/filter/main.nf': +--- modules/nf-core/elprep/filter/main.nf ++++ modules/nf-core/elprep/filter/main.nf +@@ -20,7 +20,6 @@ + + + output: +- tuple val(meta), path("*.{bam,sam}") , emit: bam + tuple val(meta), path("*.log") , emit: logs + tuple val(meta), path("*.metrics.txt") , optional: true, emit: metrics + tuple val(meta), path("*.recall") , optional: true, emit: recall +@@ -65,7 +64,7 @@ + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ +- elprep filter ${bam} ${prefix}.${suffix} \\ ++ elprep filter ${bam} /dev/null \\ + ${reference_sequences_cmd} \\ + ${filter_regions_cmd} \\ + ${markdup_cmd} \\ +@@ -106,7 +105,6 @@ + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ +- touch ${prefix}.${suffix} + touch elprep-${timestamp}.log + ${markdup_cmd} + ${bqsr_cmd} + +'modules/nf-core/elprep/filter/environment.yml' is unchanged +'modules/nf-core/elprep/filter/meta.yml' is unchanged +'modules/nf-core/elprep/filter/tests/main.nf.test' is unchanged +Changes in 'elprep/filter/tests/main.nf.test.snap': +--- modules/nf-core/elprep/filter/tests/main.nf.test.snap ++++ modules/nf-core/elprep/filter/tests/main.nf.test.snap +@@ -2,13 +2,7 @@ + "test-elprep-filter": { + "content": [ + [ +- [ +- { +- "id": "test", +- "single_end": false +- }, +- "test.bam,readsMD5:463ac3b905fbf4ddf113a94dbfa8d69f" +- ] ++ + ], + [ + +@@ -57,22 +51,14 @@ + ] + ], + "meta": { +- "nf-test": "0.9.0", +- "nextflow": "24.04.4" ++ "nf-test": "0.9.1", ++ "nextflow": "24.10.0" + }, +- "timestamp": "2024-10-22T11:05:45.927224502" ++ "timestamp": "2024-11-05T15:16:40.979143203" + }, + "test-elprep-filter-stub": { + "content": [ +- [ +- [ +- { +- "id": "test", +- "single_end": false +- }, +- "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" +- ] +- ], ++ null, + [ + + ], + +'modules/nf-core/elprep/filter/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/elprep/filter/environment.yml b/modules/nf-core/elprep/filter/environment.yml new file mode 100644 index 00000000..38dd4f47 --- /dev/null +++ b/modules/nf-core/elprep/filter/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::elprep=5.1.3 diff --git a/modules/nf-core/elprep/filter/main.nf b/modules/nf-core/elprep/filter/main.nf new file mode 100644 index 00000000..df445339 --- /dev/null +++ b/modules/nf-core/elprep/filter/main.nf @@ -0,0 +1,121 @@ +process ELPREP_FILTER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/elprep:5.1.3--he881be0_1': + 'biocontainers/elprep:5.1.3--he881be0_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(target_regions_bed), path(filter_regions_bed), path(intermediate_bqsr_tables), path(recall_file) + tuple val(meta2), path(reference_sequences) + tuple val(meta3), path(reference_elfasta) + tuple val(meta4), path(known_sites_elsites) + val(run_haplotypecaller) + val(run_bqsr) + val(bqsr_tables_only) + val(get_activity_profile) + val(get_assembly_regions) + + + output: + tuple val(meta), path("*.log") , emit: logs + tuple val(meta), path("*.metrics.txt") , optional: true, emit: metrics + tuple val(meta), path("*.recall") , optional: true, emit: recall + tuple val(meta), path("*.vcf.gz") , optional: true, emit: gvcf + tuple val(meta), path("*.table") , optional: true, emit: table + tuple val(meta), path("*.activity_profile.igv") , optional: true, emit: activity_profile + tuple val(meta), path("*.assembly_regions.igv") , optional: true, emit: assembly_regions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--output-type sam") ? "sam" : "bam" + + // filter args + def reference_sequences_cmd = reference_sequences ? "--replace-reference-sequences ${reference_sequences}" : "" + def filter_regions_cmd = filter_regions_bed ? "--filter-non-overlapping-reads ${filter_regions_bed}" : "" + + // markdup args + def markdup_cmd = args.contains("--mark-duplicates") ? "--mark-optical-duplicates ${prefix}.metrics.txt": "" + + // variant calling args + def haplotyper_cmd = run_haplotypecaller ? "--haplotypecaller ${prefix}.g.vcf.gz": "" + + def fasta_cmd = reference_elfasta ? "--reference ${reference_elfasta}": "" + def known_sites_cmd = known_sites_elsites ? "--known-sites ${known_sites_elsites}": "" + def target_regions_cmd = target_regions_bed ? "--target-regions ${target_regions_bed}": "" + + // bqsr args + def bqsr_cmd = run_bqsr ? "--bqsr ${prefix}.recall": "" + def bqsr_tables_only_cmd = bqsr_tables_only ? "--bqsr-tables-only ${prefix}.table": "" + + def intermediate_bqsr_cmd = intermediate_bqsr_tables ? "--bqsr-apply .": "" + def input_recall_cmd = recall_file ? "--recal-file $recall_file" : "" + // misc + def activity_profile_cmd = get_activity_profile ? "--activity-profile ${prefix}.activity_profile.igv": "" + def assembly_regions_cmd = get_assembly_regions ? "--assembly-regions ${prefix}.assembly_regions.igv": "" + + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + elprep filter ${bam} /dev/null \\ + ${reference_sequences_cmd} \\ + ${filter_regions_cmd} \\ + ${markdup_cmd} \\ + ${haplotyper_cmd} \\ + ${fasta_cmd} \\ + ${known_sites_cmd} \\ + ${target_regions_cmd} \\ + ${bqsr_cmd} \\ + ${bqsr_tables_only_cmd} \\ + ${intermediate_bqsr_cmd} \\ + ${input_recall_cmd} \\ + ${activity_profile_cmd} \\ + ${assembly_regions_cmd} \\ + --nr-of-threads ${task.cpus} \\ + --log-path ./ \\ + $args + + mv logs/elprep/*.log . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--output-type sam") ? "sam" : "bam" + def timestamp = "${java.time.OffsetDateTime.now().format(java.time.format.DateTimeFormatter.ISO_DATE_TIME)}" + def markdup_cmd = args.contains("--mark-duplicates") ? "touch ${prefix}.metrics.txt": "" + def bqsr_cmd = run_bqsr ? "touch ${prefix}.recall": "" + def haplotyper_cmd = run_haplotypecaller ? "echo | gzip > ${prefix}.g.vcf.gz": "" + def bqsr_tables_only_cmd = bqsr_tables_only ? "echo | gzip > ${prefix}.table": "" + def activity_profile_cmd = get_activity_profile ? "touch ${prefix}.activity_profile.igv": "" + def assembly_regions_cmd = get_assembly_regions ? "touch ${prefix}.assembly_regions.igv": "" + + if ("$bam" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + touch elprep-${timestamp}.log + ${markdup_cmd} + ${bqsr_cmd} + ${haplotyper_cmd} + ${bqsr_tables_only_cmd} + ${activity_profile_cmd} + ${assembly_regions_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/elprep/filter/meta.yml b/modules/nf-core/elprep/filter/meta.yml new file mode 100644 index 00000000..2af3b8b1 --- /dev/null +++ b/modules/nf-core/elprep/filter/meta.yml @@ -0,0 +1,212 @@ +name: "elprep_filter" +description: "Filter, sort and markdup sam/bam files, with optional BQSR and variant + calling." +keywords: + - sort + - bam + - sam + - filter + - variant calling +tools: + - "elprep": + description: "elPrep is a high-performance tool for preparing .sam/.bam files + for variant calling in sequencing pipelines. It can be used as a drop-in replacement + for SAMtools/Picard/GATK4." + homepage: "https://github.com/ExaScience/elprep" + documentation: "https://github.com/ExaScience/elprep" + tool_dev_url: "https://github.com/ExaScience/elprep" + doi: "10.1371/journal.pone.0244471" + licence: ["AGPL v3"] + identifier: biotools:elprep +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input SAM/BAM file + pattern: "*.{bam,sam}" + - bai: + type: file + description: Input BAM file index + pattern: "*.bai" + - target_regions_bed: + type: file + description: Optional BED file containing target regions for BQSR and variant + calling. + pattern: "*.bed" + - filter_regions_bed: + type: file + description: Optional BED file containing regions to filter. + pattern: "*.bed" + - intermediate_bqsr_tables: + type: file + description: Optional list of BQSR tables, used when parsing files created by + `elprep split` + pattern: "*.table" + - recall_file: + type: file + description: Recall file with intermediate results for bqsr + pattern: "*.recall" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference_sequences: + type: file + description: Optional SAM header to replace existing header. + pattern: "*.sam" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference_elfasta: + type: file + description: Elfasta file, required for BQSR and variant calling. + pattern: "*.elfasta" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - known_sites_elsites: + type: file + description: Optional elsites file containing known SNPs for BQSR. + pattern: "*.elsites" + - - run_haplotypecaller: + type: boolean + description: Run variant calling on the input files. Needed to generate gvcf + output. + - - run_bqsr: + type: boolean + description: Run BQSR on the input files. Needed to generate recall metrics. + - - bqsr_tables_only: + type: boolean + description: Write intermediate BQSR tables, used when parsing files created + by `elprep split`. + - - get_activity_profile: + type: boolean + description: Get the activity profile calculated by the haplotypecaller to the + given file in IGV format. + - - get_assembly_regions: + type: boolean + description: Get the assembly regions calculated by haplotypecaller to the speficied + file in IGV format. +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{bam,sam}" + - "*.{bam,sam}": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{bam,sam}" + - logs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "elprep-*.log" + - "*.log": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "elprep-*.log" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{metrics.txt}" + - "*.metrics.txt": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{metrics.txt}" + - recall: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{recall}" + - "*.recall": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{recall}" + - gvcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - "*.vcf.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{table}" + - "*.table": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{table}" + - activity_profile: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{activity_profile.igv}" + - "*.activity_profile.igv": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{activity_profile.igv}" + - assembly_regions: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{assembly_regions.igv}" + - "*.assembly_regions.igv": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{assembly_regions.igv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" +maintainers: + - "@matthdsm" diff --git a/modules/nf-core/elprep/filter/tests/main.nf.test b/modules/nf-core/elprep/filter/tests/main.nf.test new file mode 100644 index 00000000..84f6e16c --- /dev/null +++ b/modules/nf-core/elprep/filter/tests/main.nf.test @@ -0,0 +1,120 @@ + +nextflow_process { + + name "Test Process ELPREP_FILTER" + script "../main.nf" + process "ELPREP_FILTER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "elprep" + tag "elprep/filter" + + test("test-elprep-filter") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + [], + [], + [] + ]) // meta, bam, bai, target_regions, bqsr_table, recall + input[1] = [[],[]] // reference sequences + input[2] = [ + [ id:'elfasta' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.elfasta', checkIfExists: true) + ] // meta2, reference_elfasta + input[3] = [ + [ id: 'sites' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites', checkIfExists: true) + ] // elsites + input[4] = true // haplotypecaller + input[5] = true // bqsr + input[6] = false // bqsr_tables_only + input[7] = true // get_activity_profile + input[8] = true // get_assembly_regions + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.logs }, // name is unstable + { assert snapshot( + process.out.bam.collect { [it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}"] }, + process.out.metrics.collect { [it[0], file(it[1]).readLines()[10..20]] }, + process.out.recall, + process.out.gvcf.collect { [ it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}" ] }, + process.out.table, + process.out.activity_profile, + process.out.assembly_regions, + process.out.versions + ).match() + } + ) + } + } + + test("test-elprep-filter-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ + [ id:'ref_seq'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] // reference sequences + input[2] = [ + [ id:'elfasta' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.elfasta', checkIfExists: true) + ] // meta2, reference_elfasta + input[3] = [ + [ id: 'sites' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites', checkIfExists: true) + ] // elsites + input[4] = true // haplotypecaller + input[5] = false // bqsr + input[6] = false // bqsr_tables_only + input[7] = true // get_activity_profile + input[8] = true // get_assembly_regions + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.logs }, // name is unstable + { assert snapshot( + process.out.bam, + process.out.metrics, + process.out.recall, + process.out.gvcf, + process.out.table, + process.out.activity_profile, + process.out.assembly_regions, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/elprep/filter/tests/main.nf.test.snap b/modules/nf-core/elprep/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..d4848abc --- /dev/null +++ b/modules/nf-core/elprep/filter/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "test-elprep-filter": { + "content": [ + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.recall:md5,9a7921cc49a7a3f6c20e0278eaf3f235" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz,variantsMD5:b74f219f1f3ca2e59d6edfabf503a6a9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.activity_profile.igv:md5,c4b77c1bebcffd7822cafb8b90f70cde" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly_regions.igv:md5,7ec2070b4d4af26532cffbc1c465ba93" + ] + ], + [ + "versions.yml:md5,8193703d0cedd662b76ea48940dac55d" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T15:16:40.979143203" + }, + "test-elprep-filter-stub": { + "content": [ + null, + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.activity_profile.igv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly_regions.igv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,8193703d0cedd662b76ea48940dac55d" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-22T10:45:09.343805413" + } +} \ No newline at end of file diff --git a/modules/nf-core/elprep/filter/tests/nextflow.config b/modules/nf-core/elprep/filter/tests/nextflow.config new file mode 100644 index 00000000..bcb2dae0 --- /dev/null +++ b/modules/nf-core/elprep/filter/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: ELPREP_FILTER { + ext.args = "--reference-confidence GVCF" + } +} diff --git a/nextflow.config b/nextflow.config index fe527315..c0a32e09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,6 +78,7 @@ params { multiqc_methods_description = null // References + elsites = null cmgg_config_base = "/conf/" igenomes_base = null //'s3://ngi-igenomes/igenomes' igenomes_ignore = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 53129997..79d18c26 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -104,6 +104,24 @@ "format": "path", "fa_icon": "fas fa-folder" }, + "elfasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.elfasta$", + "description": "Path to the ELFASTA genome file. This is used when `elprep` is part of the callers and will be automatically generated when missing.", + "fa_icon": "far fa-file-code" + }, + "elsites": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.elsites$", + "description": "Path to the elsites file. This is used when `elprep` is part of the callers.", + "fa_icon": "far fa-file-code" + }, "genomes_base": { "type": "string", "default": "/references/", diff --git a/nf-test.config b/nf-test.config index 6d58c41d..5c6eea4f 100644 --- a/nf-test.config +++ b/nf-test.config @@ -6,7 +6,8 @@ config { profile "nf_test,docker" plugins { - load "nft-bam@0.1.1" + load "nft-bam@0.4.0" + load "nft-vcf@1.0.7" } } diff --git a/subworkflows/local/bam_call_elprep/main.nf b/subworkflows/local/bam_call_elprep/main.nf new file mode 100644 index 00000000..6a010326 --- /dev/null +++ b/subworkflows/local/bam_call_elprep/main.nf @@ -0,0 +1,75 @@ +// +// Call the variants using Elprep +// + +include { ELPREP_FILTER } from '../../../modules/nf-core/elprep/filter/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' + +include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' +include { VCF_DBSNP_VCFANNO } from '../vcf_dbsnp_vcfanno/main' + +workflow BAM_CALL_ELPREP { + take: + ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai), path(bed) ] => sample BAM files and their indexes with the split bed files + ch_elfasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference + ch_elsites // channel: [optional] [ val(meta), path(elsites) ] + ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants + ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF + + main: + + def ch_versions = Channel.empty() + + ELPREP_FILTER( + ch_input.map { meta, bam, bai, bed -> + def new_meta = meta + [caller:'elprep'] + [ new_meta, bam, bai, bed, [], [], [] ] + }, + [[],[]], + ch_elfasta, + ch_elsites, + true, // haplotypecaller + false, + false, + false, + false + ) + ch_versions = ch_versions.mix(ELPREP_FILTER.out.versions.first()) + + VCF_CONCAT_BCFTOOLS( + ELPREP_FILTER.out.gvcf, + true + ) + ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) + + def ch_annotated = Channel.empty() + if(!(ch_dbsnp instanceof List)) { + VCF_DBSNP_VCFANNO( + VCF_CONCAT_BCFTOOLS.out.vcfs, + ch_dbsnp, + ch_dbsnp_tbi + ) + ch_versions = ch_versions.mix(VCF_DBSNP_VCFANNO.out.versions) + ch_annotated = VCF_DBSNP_VCFANNO.out.vcfs + } else { + ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs + } + + BCFTOOLS_STATS( + ch_annotated, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + [[],[]] + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) + + def ch_reports = BCFTOOLS_STATS.out.stats.collect{ _meta, report -> report} + + emit: + gvcfs = ch_annotated // channel: [ val(meta), path(vcf), path(tbi) ] + reports = ch_reports // channel: [ path(stats) ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/local/bam_call_vardictjava/main.nf b/subworkflows/local/bam_call_vardictjava/main.nf new file mode 100644 index 00000000..0081bf51 --- /dev/null +++ b/subworkflows/local/bam_call_vardictjava/main.nf @@ -0,0 +1,64 @@ +include { VARDICTJAVA } from '../../../modules/nf-core/vardictjava/main' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' +include { BCFTOOLS_REHEADER } from '../../../modules/nf-core/bcftools/reheader/main' +include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' + +include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' +include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' +include { VCF_DBSNP_VCFANNO } from '../vcf_dbsnp_vcfanno/main' + +workflow BAM_CALL_VARDICTJAVA { + take: + ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai), path(bed) ] => sample CRAM files and their indexes + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index + ch_dbsnp // channel: [optional] [ path(vcf) ] => the dbnsp vcf file + ch_dbsnp_tbi // channel: [optional] [ path(tbi) ] => the dbsnp vcf index file + filter // boolean: filter the VCFs + + main: + def ch_versions = Channel.empty() + + VARDICTJAVA( + ch_input.map { meta, bam, bai, bed -> + def new_meta = meta + [caller:'vardict'] + [ new_meta, bam, bai, bed ] + }, + ch_fasta, + ch_fai + ) + ch_versions = ch_versions.mix(VARDICTJAVA.out.versions.first()) + + VCF_CONCAT_BCFTOOLS( + VARDICTJAVA.out.vcf, + true + ) + ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) + + def ch_annotated = Channel.empty() + if(!(ch_dbsnp instanceof List)) { + VCF_DBSNP_VCFANNO( + VCF_CONCAT_BCFTOOLS.out.vcfs, + ch_dbsnp, + ch_dbsnp_tbi + ) + ch_versions = ch_versions.mix(VCF_DBSNP_VCFANNO.out.versions) + ch_annotated = VCF_DBSNP_VCFANNO.out.vcfs + } else { + ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs + } + + def ch_vcfs = ch_annotated + .map { meta, vcf, tbi -> + def new_meta = meta + [family_samples: meta.sample] + [ new_meta, vcf, tbi ] + } + + emit: + vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] + +} diff --git a/subworkflows/local/cram_call_gatk4/main.nf b/subworkflows/local/cram_call_gatk4/main.nf index d0e14ebd..05c1543a 100644 --- a/subworkflows/local/cram_call_gatk4/main.nf +++ b/subworkflows/local/cram_call_gatk4/main.nf @@ -4,7 +4,7 @@ include { GATK4_CALIBRATEDRAGSTRMODEL } from '../../../modules/nf-core/gatk4/calibratedragstrmodel/main' include { GATK4_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/haplotypecaller/main' -include { BCFTOOLS_STATS as BCFTOOLS_STATS_SINGLE } from '../../../modules/nf-core/bcftools/stats/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' @@ -87,7 +87,7 @@ workflow CRAM_CALL_GATK4 { ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) - BCFTOOLS_STATS_SINGLE( + BCFTOOLS_STATS( VCF_CONCAT_BCFTOOLS.out.vcfs, [[],[]], [[],[]], @@ -95,9 +95,9 @@ workflow CRAM_CALL_GATK4 { [[],[]], [[],[]] ) - ch_versions = ch_versions.mix(BCFTOOLS_STATS_SINGLE.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) - def ch_reports = BCFTOOLS_STATS_SINGLE.out.stats.collect{ _meta, report -> report} + def ch_reports = BCFTOOLS_STATS.out.stats.collect{ _meta, report -> report} emit: gvcfs = VCF_CONCAT_BCFTOOLS.out.vcfs // channel: [ val(meta), path(vcf), path(tbi) ] diff --git a/subworkflows/local/cram_call_genotype_gatk4/main.nf b/subworkflows/local/cram_call_genotype_gatk4/main.nf deleted file mode 100644 index 910baed6..00000000 --- a/subworkflows/local/cram_call_genotype_gatk4/main.nf +++ /dev/null @@ -1,89 +0,0 @@ -// -// Call and genotype variants with GATK4 tooling -// - -include { CRAM_CALL_GATK4 } from '../cram_call_gatk4/main' -include { GVCF_JOINT_GENOTYPE_GATK4 } from '../gvcf_joint_genotype_gatk4/main' -include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' - -workflow CRAM_CALL_GENOTYPE_GATK4 { - take: - ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files - ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => earlier called GVCFs with their indices - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary - ch_strtablefile // channel: [optional] [ path(strtablefile) ] => STR table file - ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants - ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF - dragstr // boolean: create a DragSTR model and run haplotypecaller with it - only_call // boolean: only run the variant calling - only_merge // boolean: run until the family merging - filter // boolean: filter the VCFs - scatter_count // integer: the amount of times the VCFs should be scattered - - main: - - def ch_versions = Channel.empty() - def ch_vcfs = Channel.empty() - def ch_reports = Channel.empty() - - CRAM_CALL_GATK4( - ch_input, - ch_fasta, - ch_fai, - ch_dict, - ch_strtablefile, - ch_dbsnp, - ch_dbsnp_tbi, - dragstr - ) - ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions) - ch_reports = ch_reports.mix(CRAM_CALL_GATK4.out.reports) - - def ch_gvcfs_ready = ch_gvcfs - .map { meta, gvcf, tbi -> - def new_meta = meta + [caller:"haplotypecaller"] - [ new_meta, gvcf, tbi ] - } - .mix(CRAM_CALL_GATK4.out.gvcfs) - - if(!only_call) { - - GVCF_JOINT_GENOTYPE_GATK4( - ch_gvcfs_ready, - ch_fasta, - ch_fai, - ch_dict, - ch_dbsnp, - ch_dbsnp_tbi, - only_merge, - scatter_count - ) - ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions) - - } - - if(!only_call && !only_merge) { - - if(filter) { - VCF_FILTER_BCFTOOLS( - GVCF_JOINT_GENOTYPE_GATK4.out.vcfs, - true - ) - ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) - - ch_vcfs = VCF_FILTER_BCFTOOLS.out.vcfs - } else { - ch_vcfs = GVCF_JOINT_GENOTYPE_GATK4.out.vcfs - } - - } - - emit: - vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - - reports = ch_reports // channel: [ path(reports) ] - versions = ch_versions // channel: [ versions.yml ] - -} diff --git a/subworkflows/local/cram_call_vardictjava/main.nf b/subworkflows/local/cram_call_vardictjava/main.nf deleted file mode 100644 index 91ab4fef..00000000 --- a/subworkflows/local/cram_call_vardictjava/main.nf +++ /dev/null @@ -1,135 +0,0 @@ -include { SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main' -include { VARDICTJAVA } from '../../../modules/nf-core/vardictjava/main' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' -include { BCFTOOLS_REHEADER } from '../../../modules/nf-core/bcftools/reheader/main' -include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' - -include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' -include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' - -workflow CRAM_CALL_VARDICTJAVA { - take: - ch_crams // channel: [mandatory] [ val(meta), path(cram), path(crai) ] => sample CRAM files and their indexes - ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index - ch_dbsnp // channel: [optional] [ path(vcf) ] => the dbnsp vcf file - ch_dbsnp_tbi // channel: [optional] [ path(tbi) ] => the dbsnp vcf index file - filter // boolean: filter the VCFs - - main: - def ch_versions = Channel.empty() - - def ch_cram_bam = ch_crams - .map { meta, cram, crai -> - def new_meta = meta + [caller:"vardict"] - [ new_meta, cram, crai ] - } - .branch { _meta, cram, _crai -> - bam: cram.extension == "bam" - cram: cram.extension == "cram" - } - - SAMTOOLS_CONVERT( - ch_cram_bam.cram, - ch_fasta, - ch_fai - ) - ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) - - def ch_vardict_crams = ch_input - .map { meta, cram, crai, bed -> - def new_meta = meta - meta.subMap("split_count") + [caller:"vardict", id:meta.sample] - [ new_meta, cram, crai, bed, meta.split_count ] - } - - def ch_vardict_input = ch_cram_bam.bam - .mix(SAMTOOLS_CONVERT.out.bam.join(SAMTOOLS_CONVERT.out.bai, failOnMismatch:true, failOnDuplicate:true)) - .combine(ch_vardict_crams, by:0) - .map { meta, bam, bai, _cram, _crai, bed, split_count -> - def new_meta = meta + [id:bed.baseName, split_count:split_count] - [ new_meta, bam, bai, bed ] - } - - VARDICTJAVA( - ch_vardict_input, - ch_fasta, - ch_fai - ) - ch_versions = ch_versions.mix(VARDICTJAVA.out.versions.first()) - - VCF_CONCAT_BCFTOOLS( - VARDICTJAVA.out.vcf, - false - ) - ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) - - def ch_annotated = Channel.empty() - if(!(ch_dbsnp instanceof List)) { - ch_dbsnp.map { _meta, dbsnp -> [ get_vcfanno_config(dbsnp) ] } - .collect() - .set { ch_vcfanno_toml } // Set needs to be used here due to some Nextflow bug - - ch_dbsnp.map { _meta, dbsnp -> dbsnp } - .combine(ch_dbsnp_tbi.map { _meta, tbi -> tbi }) - .collect() - .set { ch_vcfanno_resources } // Set needs to be used here due to some Nextflow bug - - VCFANNO( - VCF_CONCAT_BCFTOOLS.out.vcfs.map { meta, vcf -> [ meta, vcf, [], [] ] }, - ch_vcfanno_toml, - [], - ch_vcfanno_resources - ) - ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) - - TABIX_BGZIP( - VCFANNO.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) - - ch_annotated = TABIX_BGZIP.out.output - } else { - ch_annotated = VCF_CONCAT_BCFTOOLS.out.vcfs - } - - def ch_filter_output = Channel.empty() - if(filter) { - VCF_FILTER_BCFTOOLS( - ch_annotated, - false - ) - ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) - ch_filter_output = VCF_FILTER_BCFTOOLS.out.vcfs - } else { - ch_filter_output = ch_annotated - } - - TABIX_TABIX( - ch_filter_output - ) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - - def ch_vcfs = ch_filter_output - .join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) - .map { meta, vcf, tbi -> - def new_meta = meta + [family_samples: meta.sample] - [ new_meta, vcf, tbi ] - } - - emit: - vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - - versions = ch_versions // channel: [ path(versions.yml) ] - -} - -def get_vcfanno_config(vcf) { - def old_toml = file("${projectDir}/assets/dbsnp.toml", checkIfExists: true) - old_toml.copyTo("${workDir}/vcfanno/dbsnp.toml") - def new_toml = file("${workDir}/vcfanno/dbsnp.toml") - new_toml.text = old_toml.text.replace("DBSNP_FILE", vcf.getName()) - return new_toml -} diff --git a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf index dbce8201..320008f4 100644 --- a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf +++ b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf @@ -8,6 +8,7 @@ include { FILTER_BEDS } from '../../../modules/local/filte include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main' include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' include { TABIX_BGZIP as UNZIP_ROI } from '../../../modules/nf-core/tabix/bgzip/main' include { BEDTOOLS_INTERSECT } from '../../../modules/nf-core/bedtools/intersect/main' @@ -20,6 +21,7 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { ch_fasta // channel: [mandatory] [ path(fasta) ] => fasta reference ch_fai // channel: [mandatory] [ path(fai) ] => fasta reference index ch_default_roi // channel: [optional] [ path(roi) ] => bed containing regions of interest to be used as default + output_bam // boolean: Also output BAM files main: @@ -71,6 +73,22 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { .join(SAMTOOLS_INDEX.out.crai, failOnDuplicate: true, failOnMismatch: true) .mix(ch_merged_crams.indexed) + // + // Optionally convert the CRAM files to BAM + // + + def ch_ready_bams = Channel.empty() + if(output_bam) { + SAMTOOLS_CONVERT( + ch_ready_crams, + ch_fasta, + ch_fai + ) + ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) + + ch_ready_bams = SAMTOOLS_CONVERT.out.bam.join(SAMTOOLS_CONVERT.out.bai, failOnDuplicate:true, failOnMismatch:true) + } + // // Preprocess the ROI BED files => sort and merge overlapping regions // @@ -168,6 +186,7 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { emit: ready_crams = ch_ready_crams // [ val(meta), path(cram), path(crai) ] + ready_bams = ch_ready_bams // [ val(meta), path(bam), path(bai) ] ready_beds = ch_ready_beds // [ val(meta), path(bed) ] versions = ch_versions // [ path(versions) ] reports = ch_reports // [ path(reports) ] diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf index b3f761f4..4a7ff91f 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf @@ -16,7 +16,7 @@ include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' workflow GVCF_JOINT_GENOTYPE_GATK4 { take: - ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => The GVCFs called with HaplotypeCaller + ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => The GVCFs ch_fasta // channel: [mandatory] [ path(fasta) ] => fasta reference ch_fai // channel: [mandatory] [ path(fai) ] => fasta reference index ch_dict // channel: [mandatory] [ path(dict) ] => sequence dictionary @@ -138,7 +138,7 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { } emit: - vcfs = ch_vcfs // [ val(meta), path(vcf) ] + vcfs = ch_vcfs // [ val(meta), path(vcf), path(tbi) ] versions = ch_versions // [ path(versions) ] } diff --git a/subworkflows/local/input_split_bedtools/main.nf b/subworkflows/local/input_split_bedtools/main.nf index 20977cb0..4aab4987 100644 --- a/subworkflows/local/input_split_bedtools/main.nf +++ b/subworkflows/local/input_split_bedtools/main.nf @@ -20,17 +20,20 @@ workflow INPUT_SPLIT_BEDTOOLS { def ch_split_output = ch_inputs .join(BEDTOOLS_SPLIT.out.beds, failOnDuplicate: true, failOnMismatch: true) - .map { meta, input, input_index, beds -> + .map { row -> + def meta = row[0] + def beds = row[-1] // Determine the amount of BED files per sample def bed_is_list = beds instanceof ArrayList def new_meta = meta + [split_count: bed_is_list ? beds.size() : 1] - [ new_meta, input, input_index, bed_is_list ? beds : [beds] ] + def bed_output = bed_is_list ? [beds] : [[beds]] + return [new_meta] + bed_output + row[1..-2] } - .transpose(by:3) // Create one channel entry for each BED file per sample - .map { meta, input, input_index, bed -> + .transpose(by:1) // Create one channel entry for each BED file per sample + .map { row -> // Set the base name of the BED file as the ID (this will look like sample_id.xxxx, where xxxx are numbers) - def new_meta = meta + [id:bed.baseName] - [ new_meta, input, input_index, bed ] + def new_meta = row[0] + [id:row[1].baseName] + return [ new_meta ] + row[2..-1] + [ row[1] ] } emit: diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf new file mode 100644 index 00000000..718b3067 --- /dev/null +++ b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf @@ -0,0 +1,46 @@ +include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' + +workflow VCF_DBSNP_VCFANNO { + take: + ch_input // channel: [mandatory] [ val(meta), path(vcf), path(tbi), ] => VCF files to be annotated + ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ] => the dbnsp vcf file + ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] => the dbsnp vcf index file + + main: + def ch_versions = Channel.empty() + + def ch_vcfanno_toml = ch_dbsnp.map { _meta, dbsnp -> [ get_vcfanno_config(dbsnp) ] } + .collect() + + def ch_vcfanno_resources = ch_dbsnp.map { _meta, dbsnp -> dbsnp } + .combine(ch_dbsnp_tbi.map { _meta, tbi -> tbi }) + .collect() + + VCFANNO( + ch_input.map { meta, vcf, tbi -> [ meta, vcf, tbi, [] ] }, + ch_vcfanno_toml, + [], + ch_vcfanno_resources + ) + ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) + + TABIX_BGZIPTABIX( + VCFANNO.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) + + emit: + vcfs = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] + +} + +def get_vcfanno_config(vcf) { + def old_toml = file("${projectDir}/assets/dbsnp.toml", checkIfExists: true) + old_toml.copyTo("${workDir}/vcfanno/dbsnp.toml") + def new_toml = file("${workDir}/vcfanno/dbsnp.toml") + new_toml.text = old_toml.text.replace("DBSNP_FILE", vcf.getName()) + return new_toml +} diff --git a/tests/nextflow.config b/tests/nextflow.config index 59ffd5da..80312509 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -11,6 +11,7 @@ params { // References for test data fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" + elfasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.elfasta" fai = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta.fai" dict = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.dict" sdf = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz" @@ -31,6 +32,13 @@ params { cram3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram" crai3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram.crai" + bam1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24143.bam" + bai1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24143.bam.bai" + bam2 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24149.bam" + bai2 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24149.bam.bai" + bam3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24385.bam" + bai3 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/bams/NA24385.bam.bai" + vcf1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz" tbi1 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz.tbi" vcf2 = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24149.vcf.gz" @@ -58,7 +66,8 @@ params { igenomes_ignore = true genomes_ignore = true - validationSchemaIgnoreParams = 'genomes,igenomes_base,test_data,cram1,cram2,cram3,crai1,crai2,crai3,vcf1,vcf2,vcf3,tbi1,tbi2,tbi3,gvcf1,gvcf2,gvcf3,gtbi1,gtbi2,gtbi3,famvcf,famtbi,ped,bed,split1,split2,split3' + validationSchemaIgnoreParams = 'genomes,igenomes_base,test_data,cram1,cram2,cram3,crai1,crai2,crai3,vcf1,vcf2,vcf3,tbi1,tbi2,tbi3,gvcf1,gvcf2,gvcf3,gtbi1,gtbi2,gtbi3,famvcf,famtbi,ped,bed,split1,split2,split3,modules_testdata_base_path' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' } process { diff --git a/tests/subworkflows/local/bam_call_elprep/main.nf.test b/tests/subworkflows/local/bam_call_elprep/main.nf.test new file mode 100644 index 00000000..2f639e35 --- /dev/null +++ b/tests/subworkflows/local/bam_call_elprep/main.nf.test @@ -0,0 +1,107 @@ +nextflow_workflow { + + name "Test Workflow BAM_CALL_ELPREP" + script "subworkflows/local/bam_call_elprep/main.nf" + workflow "BAM_CALL_ELPREP" + + tag "subworkflows" + tag "subworkflows_local" + tag "bam_call_elprep" + tag "vcf_dbsnp_vcfanno" + + test("bam_call_elprep - default") { + + + when { + params { + callers = "elprep" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.elfasta, checkIfExists:true) + ]) + input[2] = [[],[]] + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.gvcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } + + test("bam_call_elprep - dbsnp") { + + + when { + params { + callers = "elprep" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.elfasta, checkIfExists:true) + ]) + input[2] = [[],[]] + input[3] = [[id:'dbsnp'], file(params.vcf1, checkIfExists:true)] + input[4] = [[id:'dbsnp'], file(params.tbi1, checkIfExists:true)] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.gvcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } +} diff --git a/tests/subworkflows/local/bam_call_elprep/main.nf.test.snap b/tests/subworkflows/local/bam_call_elprep/main.nf.test.snap new file mode 100644 index 00000000..ae4dbbbd --- /dev/null +++ b/tests/subworkflows/local/bam_call_elprep/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "bam_call_elprep - dbsnp": { + "content": [ + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "elprep" + }, + "NA24143.elprep.g.vcf.gz,variantsMD5:974ed65cfad6264db7c6589d6b7d7d74", + "NA24143.elprep.g.vcf.gz.tbi" + ] + ], + [ + [ + "NA24143.elprep.bcftools_stats.txt:md5,36b9f979c03b24d87e2dc710baf3672b" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-04T15:56:30.054330853" + }, + "bam_call_elprep - default": { + "content": [ + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "elprep" + }, + "NA24143.elprep.g.vcf.gz,variantsMD5:974ed65cfad6264db7c6589d6b7d7d74", + "NA24143.elprep.g.vcf.gz.tbi" + ] + ], + [ + [ + "NA24143.elprep.bcftools_stats.txt:md5,36b9f979c03b24d87e2dc710baf3672b" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-04T15:55:56.561058236" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/bam_call_vardictjava/main.nf.test b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test new file mode 100644 index 00000000..1b933dda --- /dev/null +++ b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test @@ -0,0 +1,62 @@ +nextflow_workflow { + + name "Test Workflow BAM_CALL_VARDICTJAVA" + script "subworkflows/local/bam_call_vardictjava/main.nf" + workflow "BAM_CALL_VARDICTJAVA" + + tag "subworkflows" + tag "subworkflows_local" + tag "bam_call_vardictjava" + tag "vcf_concat_bcftools" + + test("bam_call_vardictjava - default") { + + when { + params { + callers = "vardict" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.bam1, checkIfExists:true), + file(params.bai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = [[],[]] + input[4] = [[],[]] + input[5] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } + +} diff --git a/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap new file mode 100644 index 00000000..c7011326 --- /dev/null +++ b/tests/subworkflows/local/bam_call_vardictjava/main.nf.test.snap @@ -0,0 +1,24 @@ +{ + "bam_call_vardictjava - default": { + "content": [ + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "vardict" + }, + "NA24143.vardict.vcf.gz,variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", + "NA24143.vardict.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T09:44:33.098049827" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/cram_call_gatk4/main.nf.test b/tests/subworkflows/local/cram_call_gatk4/main.nf.test new file mode 100644 index 00000000..fa45e6a6 --- /dev/null +++ b/tests/subworkflows/local/cram_call_gatk4/main.nf.test @@ -0,0 +1,129 @@ +nextflow_workflow { + + name "Test Workflow CRAM_CALL_GATK4" + script "subworkflows/local/cram_call_gatk4/main.nf" + workflow "CRAM_CALL_GATK4" + + tag "subworkflows" + tag "subworkflows_local" + tag "cram_call_gatk4" + tag "vcf_concat_bcftools" + + test("cram_call_gatk4 - default") { + + + when { + params { + callers = "haplotypecaller" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = [[],[]] + input[7] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } + + test("cram_call_gatk4 - dragstr") { + + + when { + params { + callers = "haplotypecaller" + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split1, checkIfExists:true) + ],[ + [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split2, checkIfExists:true) + ],[ + [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], + file(params.cram1, checkIfExists:true), + file(params.crai1, checkIfExists:true), + file(params.split3, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = Channel.value([ + [id:"str"], + file(params.strtablefile, checkIfExists:true) + ]) + input[5] = [[],[]] + input[6] = [[],[]] + input[7] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] }, + workflow.out.reports + ).match() } + ) + } + + } + +} diff --git a/tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap b/tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap new file mode 100644 index 00000000..57de17e4 --- /dev/null +++ b/tests/subworkflows/local/cram_call_gatk4/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "cram_call_gatk4 - default": { + "content": [ + [ + + ], + [ + [ + "NA24143.haplotypecaller.bcftools_stats.txt:md5,09b4e7674e0f5b98b1e548df3002250e" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T09:53:12.737680049" + }, + "cram_call_gatk4 - dragstr": { + "content": [ + [ + + ], + [ + [ + "NA24143.haplotypecaller.bcftools_stats.txt:md5,c4dad5b8e05871dda66df42b1f6c89ff" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T09:54:07.696125828" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test deleted file mode 100644 index 37eca413..00000000 --- a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test +++ /dev/null @@ -1,495 +0,0 @@ -nextflow_workflow { - - name "Test Workflow CRAM_CALL_GENOTYPE_GATK4" - script "subworkflows/local/cram_call_genotype_gatk4/main.nf" - workflow "CRAM_CALL_GENOTYPE_GATK4" - - tag "subworkflows" - tag "subworkflows_local" - tag "cram_call_genotype_gatk4" - tag "cram_call_gatk4" // This is also tested here - tag "gvcf_joint_genotype_gatk4" // This is also tested here - tag "vcf_filter_bcftools" // This is also tested here - tag "vcf_concat_bcftools" // This is also tested here - - test("cram_call_genotype_gatk4 - default - crams") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.empty() - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("default - crams") } - ) - } - - } - - test("cram_call_genotype_gatk4 - default - gvcfs") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.empty() - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("default - gvcfs") } - ) - } - - } - - test("cram_call_genotype_gatk4 - default - family") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("default - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - filter - family") { - - when { - params { - callers = "haplotypecaller" - filter = true - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = true - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("filter - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - only_call - family") { - - when { - params { - callers = "haplotypecaller" - only_call = true - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = true - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("only_call - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - only_merge - family") { - - when { - params { - callers = "haplotypecaller" - only_merge = true - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = true - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("only_merge - family") } - ) - } - - } - - test("cram_call_genotype_gatk4 - default - sample + family") { - - when { - params { - callers = "haplotypecaller" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24835.00001", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24835.00002", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24835.00003", sample:"NA24835", family:"Ashkenazim", family_samples:"NA24149,NA24385", split_count:3], - file(params.cram3, checkIfExists:true), - file(params.crai3, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"NA24149", family_samples:"NA24149,NA24385"], - file(params.gvcf2, checkIfExists:true), - file(params.gtbi2, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = Channel.value([ - [id:"dict"], - file(params.dict, checkIfExists:true) - ]) - input[5] = Channel.value([ - [id:"strtablefile"], - file(params.strtablefile, checkIfExists:true) - ]) - input[6] = [[],[]] - input[7] = [[],[]] - input[8] = false - input[9] = false - input[10] = false - input[11] = false - input[12] = 2 - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } }, - workflow.out.reports - ).match("only_merge - sample + family") } - ) - } - - } - -} diff --git a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap b/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap deleted file mode 100644 index a0aec425..00000000 --- a/tests/subworkflows/local/cram_call_genotype_gatk4/main.nf.test.snap +++ /dev/null @@ -1,164 +0,0 @@ -{ - "only_merge - family": { - "content": [ - [ - - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T11:20:45.005084818" - }, - "default - family": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143,NA24149,NA24385", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:35:02.44674969" - }, - "filter - family": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143,NA24149,NA24385", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:35:46.768542501" - }, - "default - gvcfs": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:34:17.846266913" - }, - "default - crams": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24143.haplotypecaller.bcftools_stats.txt:md5,09b4e7674e0f5b98b1e548df3002250e" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:33:52.045772718" - }, - "only_call - family": { - "content": [ - [ - - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T11:20:04.714403906" - }, - "only_merge - sample + family": { - "content": [ - [ - [ - { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" - }, - "Ashkenazim.haplotypecaller.vcf.gz", - "Ashkenazim.haplotypecaller.vcf.gz.tbi" - ] - ], - [ - [ - "NA24835.haplotypecaller.bcftools_stats.txt:md5,5f42bee02b2bd0d2af2954292ec3b422" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-06T14:26:29.249708339" - } -} \ No newline at end of file diff --git a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test b/tests/subworkflows/local/cram_call_vardictjava/main.nf.test deleted file mode 100644 index 05798304..00000000 --- a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test +++ /dev/null @@ -1,201 +0,0 @@ -nextflow_workflow { - - name "Test Workflow CRAM_CALL_VARDICTJAVA" - script "subworkflows/local/cram_call_vardictjava/main.nf" - workflow "CRAM_CALL_VARDICTJAVA" - - tag "subworkflows" - tag "subworkflows_local" - tag "cram_call_vardictjava" - tag "vcf_concat_bcftools" - tag "vcf_filter_bcftools" - - test("cram_call_vardictjava - default") { - - - when { - params { - callers = "vardict" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = [[],[]] - input[5] = [[],[]] - input[6] = false - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } } - ).match("default") } - ) - } - - } - - test("cram_call_vardictjava - filter") { - - - when { - params { - filter = true - callers = "vardict" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143"], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = [[],[]] - input[5] = [[],[]] - input[6] = true - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } } - ).match("filter") } - ) - } - - } - - test("cram_call_vardictjava - family") { - // The family should not be merged here - - when { - params { - callers = "vardict" - } - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149"], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true) - ],[ - [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149"], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true) - ]) - input[1] = Channel.of([ - [id:"NA24143.00001", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24143.00002", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24143.00003", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram1, checkIfExists:true), - file(params.crai1, checkIfExists:true), - file(params.split3, checkIfExists:true) - ],[ - [id:"NA24149.00001", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true), - file(params.split1, checkIfExists:true) - ],[ - [id:"NA24149.00002", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true), - file(params.split2, checkIfExists:true) - ],[ - [id:"NA24149.00003", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", split_count:3], - file(params.cram2, checkIfExists:true), - file(params.crai2, checkIfExists:true), - file(params.split3, checkIfExists:true) - ]) - input[2] = Channel.value([ - [id:"fasta"], - file(params.fasta, checkIfExists:true) - ]) - input[3] = Channel.value([ - [id:"fai"], - file(params.fai, checkIfExists:true) - ]) - input[4] = [[],[]] - input[5] = [[],[]] - input[6] = false - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { it.collect { it instanceof Map ? it : file(it).name } } - ).match("family") } - ) - } - - } - -} diff --git a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap b/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap deleted file mode 100644 index a476cb3c..00000000 --- a/tests/subworkflows/local/cram_call_vardictjava/main.nf.test.snap +++ /dev/null @@ -1,79 +0,0 @@ -{ - "filter": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "vardict" - }, - "NA24143.vardict.vcf.gz", - "NA24143.vardict.vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:38:01.461442987" - }, - "default": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "vardict" - }, - "NA24143.vardict.vcf.gz", - "NA24143.vardict.vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-05T17:37:40.178107639" - }, - "family": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "vardict" - }, - "NA24143.vardict.vcf.gz", - "NA24143.vardict.vcf.gz.tbi" - ], - [ - { - "id": "NA24149", - "sample": "NA24149", - "family": "Ashkenazim", - "family_samples": "NA24149", - "caller": "vardict" - }, - "NA24149.vardict.vcf.gz", - "NA24149.vardict.vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-08T16:58:11.678281371" - } -} \ No newline at end of file diff --git a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test index 0a69a066..511ca4d3 100644 --- a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test +++ b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test @@ -31,15 +31,22 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = [] + input[5] = false """ } } then { + def fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.ready_crams.collect { it.collect { it instanceof Map ? it : file(it).name } }, + workflow.out.ready_crams.collect { + [ it[0], it[1], file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WGS") } @@ -71,15 +78,22 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = Channel.fromPath(params.bed, checkIfExists:true) + input[5] = true """ } } then { + def fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.ready_crams.collect { it.collect { it instanceof Map ? it : file(it).name } }, + workflow.out.ready_crams.collect { + [ it[0], it[1], file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WES common ROI") } @@ -111,15 +125,22 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = [] + input[5] = false """ } } then { + def fasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.fasta" assertAll( { assert workflow.success }, { assert snapshot( - workflow.out.ready_crams.collect { it.collect { it instanceof Map ? it : file(it).name } }, + workflow.out.ready_crams.collect { + [ it[0], it[1], file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] + }, workflow.out.ready_beds, workflow.out.reports ).match("default - WES") } @@ -158,6 +179,7 @@ nextflow_workflow { file(params.fai, checkIfExists:true) ]) input[4] = [] + input[5] = true """ } } @@ -168,7 +190,10 @@ nextflow_workflow { { assert workflow.success }, { assert snapshot( workflow.out.ready_crams.collect { - [ it[0], cram(it[1], fasta).reads.size(), file(it[2]).name ] + [ it[0], "${file(it[1]).name},readsMD5:${cram(it[1], fasta).getReadsMD5()}", file(it[2]).name ] + }, + workflow.out.ready_bams.collect { + [ it[0], "${file(it[1]).name},readsMD5:${bam(it[1]).getReadsMD5()}", file(it[2]).name ] }, workflow.out.ready_beds, workflow.out.reports diff --git a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap index cbd7535e..8fabe768 100644 --- a/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap +++ b/tests/subworkflows/local/cram_prepare_samtools_bedtools/main.nf.test.snap @@ -10,10 +10,23 @@ "family_samples": "NA24143", "duplicate_count": 2 }, - 798258, + "NA24143.cram,readsMD5:be28f434d6f7bcfa398488a6611d89c1", "NA24143.cram.crai" ] ], + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "duplicate_count": 2 + }, + "NA24143.bam,readsMD5:be28f434d6f7bcfa398488a6611d89c1", + "NA24143.bam.bai" + ] + ], [ [ { @@ -34,7 +47,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-23T11:46:20.577603661" + "timestamp": "2024-10-23T10:19:20.53195135" }, "default - WGS": { "content": [ @@ -47,9 +60,12 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.cram", + "/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram", "NA24143.cram.crai" ] + ], + [ + ], [ [ @@ -71,7 +87,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-23T14:56:11.141634788" + "timestamp": "2024-10-23T10:17:48.433466279" }, "default - WES": { "content": [ @@ -84,9 +100,12 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.cram", + "/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram", "NA24143.cram.crai" ] + ], + [ + ], [ [ @@ -108,7 +127,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-06T14:33:15.988619426" + "timestamp": "2024-10-23T10:18:36.993186258" }, "default - WES common ROI": { "content": [ @@ -121,10 +140,23 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.cram", + "/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram", "NA24143.cram.crai" ] ], + [ + [ + { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "duplicate_count": 1 + }, + "NA24143.bam,readsMD5:77afffb023e537869c5c6ebf31187ded", + "NA24143.bam.bai" + ] + ], [ [ { @@ -145,6 +177,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-06T14:32:49.756585296" + "timestamp": "2024-10-23T10:18:13.865281894" } } \ No newline at end of file diff --git a/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test new file mode 100644 index 00000000..36de5cf1 --- /dev/null +++ b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test @@ -0,0 +1,190 @@ +nextflow_workflow { + + name "Test Workflow GVCF_JOINT_GENOTYPE_GATK4" + script "subworkflows/local/gvcf_joint_genotype_gatk4/main.nf" + workflow "GVCF_JOINT_GENOTYPE_GATK4" + + tag "subworkflows" + tag "subworkflows_local" + tag "gvcf_joint_genotype_gatk4" + tag "vcf_concat_bcftools" + tag "input_split_bedtools" + + test("gvcf_joint_genotype_gatk4 - single_sample") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = false + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } + + test("gvcf_joint_genotype_gatk4 - family") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ],[ + [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf2, checkIfExists:true), + file(params.gtbi2, checkIfExists:true) + ] + ) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = false + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } + + test("gvcf_joint_genotype_gatk4 - only_merge") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = true + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs + ).match() } + ) + } + + } + + test("gvcf_joint_genotype_gatk4 - single_sample + family") { + + when { + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf1, checkIfExists:true), + file(params.gtbi1, checkIfExists:true) + ],[ + [id:"NA24149", sample:"NA24149", family:"Ashkenazim", family_samples:"NA24143,NA24149", caller:"haplotypecaller"], + file(params.gvcf2, checkIfExists:true), + file(params.gtbi2, checkIfExists:true) + ],[ + [id:"NA24385", sample:"NA24385", family:"NA24385", family_samples:"NA24385", caller:"haplotypecaller"], + file(params.gvcf3, checkIfExists:true), + file(params.gtbi3, checkIfExists:true) + ] + ) + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists:true) + ]) + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists:true) + ]) + input[4] = [[],[]] + input[5] = [[],[]] + input[6] = false + input[7] = 2 + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name] } + ).match() } + ) + } + + } +} diff --git a/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap new file mode 100644 index 00000000..b02a7a06 --- /dev/null +++ b/tests/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf.test.snap @@ -0,0 +1,87 @@ +{ + "gvcf_joint_genotype_gatk4 - single_sample + family": { + "content": [ + [ + [ + { + "family": "Ashkenazim", + "family_samples": "NA24143,NA24149", + "caller": "haplotypecaller", + "id": "Ashkenazim" + }, + "Ashkenazim.haplotypecaller.vcf.gz,variantsMD5:4dea305eb71decb122709e75af9c833f", + "Ashkenazim.haplotypecaller.vcf.gz.tbi" + ], + [ + { + "family": "NA24385", + "family_samples": "NA24385", + "caller": "haplotypecaller", + "id": "NA24385" + }, + "NA24385.haplotypecaller.vcf.gz,variantsMD5:4ffd515511f59e3561e3fb1b046d7675", + "NA24385.haplotypecaller.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T13:05:41.216399607" + }, + "gvcf_joint_genotype_gatk4 - single_sample": { + "content": [ + [ + [ + { + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "haplotypecaller", + "id": "Ashkenazim" + }, + "Ashkenazim.haplotypecaller.vcf.gz,variantsMD5:4c6db9171912bcbbaefeec2a24968a", + "Ashkenazim.haplotypecaller.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T13:03:11.507733028" + }, + "gvcf_joint_genotype_gatk4 - only_merge": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T11:43:25.386070284" + }, + "gvcf_joint_genotype_gatk4 - family": { + "content": [ + [ + [ + { + "family": "Ashkenazim", + "family_samples": "NA24143,NA24149", + "caller": "haplotypecaller", + "id": "Ashkenazim" + }, + "Ashkenazim.haplotypecaller.vcf.gz,variantsMD5:4dea305eb71decb122709e75af9c833f", + "Ashkenazim.haplotypecaller.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T13:03:57.301900285" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test new file mode 100644 index 00000000..75443075 --- /dev/null +++ b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test @@ -0,0 +1,47 @@ +nextflow_workflow { + + name "Test Workflow VCF_DBSNP_VCFANNO" + script "subworkflows/local/vcf_dbsnp_vcfanno/main.nf" + workflow "VCF_DBSNP_VCFANNO" + + tag "subworkflows" + tag "subworkflows_local" + tag "vcf_dbsnp_vcfanno" + + test("vcf_dbsnp_vcfanno - default") { + + when { + params { + annotate = true + } + workflow { + """ + input[0] = Channel.of([ + [id:"NA24143", family:"NA24143", family_samples:"NA24143", caller:"haplotypecaller"], + file(params.vcf1, checkIfExists:true), + file(params.tbi1, checkIfExists:true) + ]) + input[1] = Channel.value([ + [id:"dbnsp"], + file(params.vcf2, checkIfExists:true) + ]) + input[2] = Channel.value([ + [id:"dbnsp"], + file(params.tbi2, checkIfExists:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs.collect { [ it[0], "${file(it[1]).name},variantsMD5:${path(it[1]).vcf.variantsMD5}", file(it[2]).name ] } + ).match() } + ) + } + + } + +} diff --git a/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap new file mode 100644 index 00000000..43ea8318 --- /dev/null +++ b/tests/subworkflows/local/vcf_dbsnp_vcfanno/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "vcf_dbsnp_vcfanno - default": { + "content": [ + [ + [ + { + "id": "NA24143", + "family": "NA24143", + "family_samples": "NA24143", + "caller": "haplotypecaller" + }, + "NA24143.vcf.gz,variantsMD5:b4f76bc67ba0e159489393d4788349b3", + "NA24143.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T14:44:22.532988474" + } +} \ No newline at end of file diff --git a/workflows/germline.nf b/workflows/germline.nf index f7c91c18..1946490a 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -17,14 +17,17 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_ include { CRAM_PREPARE_SAMTOOLS_BEDTOOLS } from '../subworkflows/local/cram_prepare_samtools_bedtools/main' include { INPUT_SPLIT_BEDTOOLS } from '../subworkflows/local/input_split_bedtools/main' -include { CRAM_CALL_GENOTYPE_GATK4 } from '../subworkflows/local/cram_call_genotype_gatk4/main' -include { CRAM_CALL_VARDICTJAVA } from '../subworkflows/local/cram_call_vardictjava/main' +include { CRAM_CALL_GATK4 } from '../subworkflows/local/cram_call_gatk4/main' +include { GVCF_JOINT_GENOTYPE_GATK4 } from '../subworkflows/local/gvcf_joint_genotype_gatk4/main' +include { BAM_CALL_ELPREP } from '../subworkflows/local/bam_call_elprep/main' +include { BAM_CALL_VARDICTJAVA } from '../subworkflows/local/bam_call_vardictjava/main' include { VCF_EXTRACT_RELATE_SOMALIER } from '../subworkflows/local/vcf_extract_relate_somalier/main' include { VCF_PED_RTGTOOLS } from '../subworkflows/local/vcf_ped_rtgtools/main' include { VCF_ANNOTATION } from '../subworkflows/local/vcf_annotation/main' include { VCF_VALIDATE_SMALL_VARIANTS } from '../subworkflows/local/vcf_validate_small_variants/main' include { VCF_UPD_UPDIO } from '../subworkflows/local/vcf_upd_updio/main' include { VCF_ROH_AUTOMAP } from '../subworkflows/local/vcf_roh_automap/main' +include { VCF_FILTER_BCFTOOLS } from '../subworkflows/local/vcf_filter_bcftools/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -34,6 +37,7 @@ include { VCF_ROH_AUTOMAP } from '../subworkflows/local/vcf_ro include { SAMTOOLS_FAIDX as FAIDX } from '../modules/nf-core/samtools/faidx/main' include { GATK4_CREATESEQUENCEDICTIONARY as CREATESEQUENCEDICTIONARY } from '../modules/nf-core/gatk4/createsequencedictionary/main' +include { ELPREP_FASTATOELFASTA } from '../modules/nf-core/elprep/fastatoelfasta/main' include { GATK4_COMPOSESTRTABLEFILE as COMPOSESTRTABLEFILE } from '../modules/nf-core/gatk4/composestrtablefile/main' include { RTGTOOLS_FORMAT } from '../modules/nf-core/rtgtools/format/main' include { UNTAR } from '../modules/nf-core/untar/main' @@ -67,6 +71,7 @@ workflow GERMLINE { fasta // string: path to the reference fasta fai // string: path to the index of the reference fasta dict // string: path to the sequence dictionary file + elfasta // string: path to the elfasta reference file strtablefile // string: path to the strtable file sdf // string: path to the SDF directory dbsnp // string: path to the DBSNP VCF file @@ -97,6 +102,7 @@ workflow GERMLINE { automap_panel // string: path to the Automap panel file outdir // string: path to the output directory pedFiles // map: a map that has the family ID as key and a PED file as value + elsites // string: path to the elsites file for elprep // Boolean inputs dragstr // boolean: create a dragstr model and use it for haplotypecaller @@ -138,6 +144,7 @@ workflow GERMLINE { def ch_fasta_ready = Channel.fromPath(fasta).map{ fasta_file -> [[id:"reference"], fasta_file] }.collect() def ch_fai = fai ? Channel.fromPath(fai).map{ fai_file -> [[id:"reference"], fai_file] }.collect() : null def ch_dict = dict ? Channel.fromPath(dict).map{ dict_file -> [[id:"reference"], dict_file] }.collect() : null + def ch_elfasta = elfasta ? Channel.fromPath(elfasta).map { elfasta_file -> [[id:"reference"], elfasta_file]}.collect() : null def ch_strtablefile = strtablefile ? Channel.fromPath(strtablefile).map{ str_file -> [[id:"reference"], str_file] }.collect() : null def ch_sdf = sdf ? Channel.fromPath(sdf).map { sdf_file -> [[id:'reference'], sdf_file] }.collect() : null @@ -159,6 +166,8 @@ workflow GERMLINE { def ch_automap_repeats = automap_repeats ? Channel.fromPath(automap_repeats).map{ repeats -> [[id:"repeats"], repeats] }.collect() : [] def ch_automap_panel = automap_panel ? Channel.fromPath(automap_panel).map{ panel -> [[id:"automap_panel"], panel] }.collect() : [[],[]] + def ch_elsites = elsites ? Channel.fromPath(elsites).map{ elsites_file -> [[id:'elsites'], elsites_file] }.collect() : [[],[]] + // // Check for the presence of EnsemblVEP plugins that use extra files // @@ -260,6 +269,18 @@ workflow GERMLINE { ch_dict_ready = ch_dict } + def ch_elfasta_ready = Channel.empty() + def elprep_used = callers.contains("elprep") + if (!ch_elfasta && elprep_used) { + ELPREP_FASTATOELFASTA( + ch_fasta_ready + ) + ch_versions = ch_versions.mix(ELPREP_FASTATOELFASTA.out.versions) + ch_elfasta_ready = ELPREP_FASTATOELFASTA.out.elfasta + } else { + ch_elfasta_ready = ch_elfasta + } + // Reference STR table file def ch_strtablefile_ready = Channel.empty() if (dragstr && !ch_strtablefile) { @@ -354,11 +375,17 @@ workflow GERMLINE { def ch_gvcfs_ready = ch_gvcf_branch.no_tbi .join(TABIX_GVCF.out.tbi, failOnDuplicate:true, failOnMismatch:true) .mix(ch_gvcf_branch.tbi) + .combine(callers.intersect(GlobalVariables.gvcfCallers)) + .map { meta, gvcf, tbi, caller -> + def new_meta = meta + [caller:caller] + [ new_meta, gvcf, tbi ] + } // // Run sample preparation // + def create_bam_files = callers.intersect(GlobalVariables.bamCallers).size() > 0 // Only create BAM files when needed CRAM_PREPARE_SAMTOOLS_BEDTOOLS( ch_input.cram.filter { meta, _cram, _crai -> // Filter out files that already have a called GVCF when only GVCF callers are used @@ -370,7 +397,8 @@ workflow GERMLINE { }, ch_fasta_ready, ch_fai_ready, - ch_default_roi + ch_default_roi, + create_bam_files ) ch_versions = ch_versions.mix(CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.versions) @@ -378,42 +406,70 @@ workflow GERMLINE { // Split the BED files // + def ch_split_cram_bam = Channel.empty() + if(create_bam_files) { + ch_split_cram_bam = CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + .join(CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_bams, failOnDuplicate:true, failOnMismatch:true) + } else { + ch_split_cram_bam = CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + } + INPUT_SPLIT_BEDTOOLS( CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_beds.map { meta, bed -> [meta, bed, scatter_count] }, - CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + ch_split_cram_bam ) ch_versions = ch_versions.mix(INPUT_SPLIT_BEDTOOLS.out.versions) + def ch_caller_inputs = INPUT_SPLIT_BEDTOOLS.out.split + .multiMap { meta, cram, crai, bam=[], bai=[], bed -> + cram: [meta, cram, crai, bed] + bam: [meta, bam, bai, bed] + } + def ch_calls = Channel.empty() if("haplotypecaller" in callers) { // // Call variants with GATK4 HaplotypeCaller // - CRAM_CALL_GENOTYPE_GATK4( - INPUT_SPLIT_BEDTOOLS.out.split.filter { meta, _cram, _crai, _bed -> + CRAM_CALL_GATK4( + ch_caller_inputs.cram.filter { meta, _cram, _crai, _bed -> // Filter out the entries that already have a GVCF meta.type == "cram" }, - ch_gvcfs_ready, ch_fasta_ready, ch_fai_ready, ch_dict_ready, ch_strtablefile_ready, ch_dbsnp_ready, ch_dbsnp_tbi_ready, - dragstr, - only_call, - only_merge, - filter, - scatter_count + dragstr ) - ch_versions = ch_versions.mix(CRAM_CALL_GENOTYPE_GATK4.out.versions) - ch_reports = ch_reports.mix(CRAM_CALL_GENOTYPE_GATK4.out.reports) + ch_gvcfs_ready = ch_gvcfs_ready.mix(CRAM_CALL_GATK4.out.gvcfs) + ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions) + ch_reports = ch_reports.mix(CRAM_CALL_GATK4.out.reports) + } - ch_calls = ch_calls.mix(CRAM_CALL_GENOTYPE_GATK4.out.vcfs) + if("elprep" in callers) { + // + // Call variants with Elprep + // + + BAM_CALL_ELPREP( + ch_caller_inputs.bam.filter { meta, _bam, _bai, _bed -> + // Filter out the entries that already have a GVCF + meta.type == "cram" + }, + ch_elfasta_ready, + ch_elsites, + ch_dbsnp_ready, + ch_dbsnp_tbi_ready + ) + ch_gvcfs_ready = ch_gvcfs_ready.mix(BAM_CALL_ELPREP.out.gvcfs) + ch_versions = ch_versions.mix(BAM_CALL_ELPREP.out.versions) + ch_reports = ch_reports.mix(BAM_CALL_ELPREP.out.reports) } @@ -422,21 +478,39 @@ workflow GERMLINE { // Call variants with VarDict // - CRAM_CALL_VARDICTJAVA( - CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams, - INPUT_SPLIT_BEDTOOLS.out.split, + BAM_CALL_VARDICTJAVA( + ch_caller_inputs.bam, ch_fasta_ready, ch_fai_ready, ch_dbsnp_ready, ch_dbsnp_tbi_ready, filter ) - ch_versions = ch_versions.mix(CRAM_CALL_VARDICTJAVA.out.versions) + ch_versions = ch_versions.mix(BAM_CALL_VARDICTJAVA.out.versions) - ch_calls = ch_calls.mix(CRAM_CALL_VARDICTJAVA.out.vcfs) + ch_calls = ch_calls.mix(BAM_CALL_VARDICTJAVA.out.vcfs) } - def ch_called_variants = ch_calls + // Stop pipeline execution when only calls should happen + def ch_gvcfs_final = ch_gvcfs_ready.filter { !only_call } + + GVCF_JOINT_GENOTYPE_GATK4( + ch_gvcfs_final, + ch_fasta_ready, + ch_fai_ready, + ch_dict_ready, + ch_dbsnp_ready, + ch_dbsnp_tbi_ready, + only_merge, + scatter_count + ) + ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions) + ch_calls = ch_calls.mix(GVCF_JOINT_GENOTYPE_GATK4.out.vcfs) + + // Stop pipeline execution when only the merge should happen + def ch_calls_final = ch_calls.filter { !only_merge } + + def ch_called_variants = ch_calls_final .map { meta, vcf, tbi -> def new_meta = meta - meta.subMap(["type", "vardict_min_af"]) [ new_meta, vcf, tbi ] @@ -453,10 +527,22 @@ workflow GERMLINE { ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) ch_reports = ch_reports.mix(BCFTOOLS_STATS.out.stats.collect { _meta, report -> report }) + def ch_filtered_variants = Channel.empty() + if(filter) { + VCF_FILTER_BCFTOOLS( + ch_called_variants, + true + ) + ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) + ch_filtered_variants = VCF_FILTER_BCFTOOLS.out.vcfs + } else { + ch_filtered_variants = ch_called_variants + } + def ch_normalized_variants = Channel.empty() if(normalize) { BCFTOOLS_NORM( - ch_called_variants, + ch_filtered_variants, ch_fasta_ready, ) ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) @@ -469,7 +555,7 @@ workflow GERMLINE { ch_normalized_variants = BCFTOOLS_NORM.out.vcf .join(TABIX_NORMALIZE.out.tbi, failOnDuplicate:true, failOnMismatch:true) } else { - ch_normalized_variants = ch_called_variants + ch_normalized_variants = ch_filtered_variants } if(!only_merge && !only_call) {