diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..0cdc614 Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md index 7ad39d6..1eb87f6 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,7 @@ We thank the following people for their extensive assistance in the development **TODO** + Nagarajan Paramasivam @NagaComBio n.paramasivam@dkfz.de ## Contributions and Support @@ -193,7 +194,6 @@ If you would like to contribute to this pipeline, please see the [contributing g ## Citations - diff --git a/assets/schema_input.json b/assets/schema_input.json index 76ec657..dc524f6 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,54 +1,54 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/ghga-de/nf-platypusindelcalling/master/assets/schema_input.json", - "title": "nf-platypusindelcalling pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" - }, - "tumor": { - "type": "string", - "pattern": "^\\S+\\.bam$", - "errorMessage": "BAM file for tumors must be provided'" - }, - "tumor_index": { - "type": "string", - "pattern": "^\\S+\\.bai$", - "errorMessage": "BAI file matching to BAM for tumors must be provided'" - }, - "control": { - "errorMessage": "BAM file for as control matching to tumor, if there is", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bam$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - }, - "control_index": { - "errorMessage": "BAI file matching to BAM for as control matching to tumor, if there is", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bai$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - } - }, - "required": ["sample", "tumor", "tumor_index"] - } + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/ghga-de/nf-platypusindelcalling/master/assets/schema_input.json", + "title": "nf-platypusindelcalling pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "tumor": { + "type": "string", + "pattern": "^\\S+\\.bam$", + "errorMessage": "BAM file for tumors must be provided'" + }, + "tumor_index": { + "type": "string", + "pattern": "^\\S+\\.bai$", + "errorMessage": "BAI file matching to BAM for tumors must be provided'" + }, + "control": { + "errorMessage": "BAM file for as control matching to tumor, if there is", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.bam$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "control_index": { + "errorMessage": "BAI file matching to BAM for as control matching to tumor, if there is", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.bai$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": ["sample", "tumor", "tumor_index"] + } } diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 648e64c..f6ce1d6 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -37,7 +37,7 @@ def check_samplesheet(file_in, file_out): """ This function checks that the samplesheet follows the following structure: sample,tumor,tumor_index, control, control_index - sample_WithControl,tumor1.bam,tumot1.bai, control1.bam, control1.bai + sample_WithControl,tumor1.bam,tumor1.bai, control1.bam, control1.bai sample_WithoutControl,tumor2.bam,tumor2.bai,, For an example see: https://github.com/ghga-de/nf-platypusindelcalling/assets/samplesheet.csv diff --git a/bin/vcfparser.pyc b/bin/vcfparser.pyc index 7fde8dc..28511f9 100644 Binary files a/bin/vcfparser.pyc and b/bin/vcfparser.pyc differ diff --git a/conf/modules.config b/conf/modules.config index a1499ed..d760965 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -106,7 +106,24 @@ process { pattern: "*.{txt}", mode: params.publish_dir_mode ] - } + } + withName: 'ENSEMBLVEP_DOWNLOAD' { + ext.args = { '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } + withName: 'ENSEMBLVEP_VEP' { + //ext.args ='--everything --filter_common --per_gene --total_length --offline' + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/${meta.id}/" }, + pattern: "*{gz,tbi,html}" + ] + ] + } } // // Don't publish results for these processes diff --git a/conf/test.config b/conf/test.config index e4d0d96..a255fb1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,19 +34,19 @@ params { skip_multiqc = false min_confidence_score = 0 - // Annovar - // Annovar needs to be build locally - buildver = "hg38" - dbtype = "wgEncodeGencodeCompV39" - segdupcol = "SEGDUP" - cytobandcol = "CYTOBAND" - geneannocols = '"ANNOVAR_FUNCTION,GENE,EXONIC_CLASSIFICATION,ANNOVAR_TRANSCRIPTS"' - annovar_path = "/Users/w620-admin/Desktop/Workflows/Annovar/annovar_Sept2022" - // Reference Files // genome = "GRCh38" + // Annotation with vep + annotation_tool = "vep" + species = "homo_sapiens" + vep_cache_version = 110 + vep_genome = 'GRCh38' + vep_version = '110' + vep_cache = null + download_cache = false // DO NOT Download annotation cache + // Annotation files k_genome ="${projectDir}/testdata/annotation_files/kgenomes_snvindels.GRCh38.27022019.sites.test.vcf.gz" dbsnp_indel ="${projectDir}/testdata/annotation_files/dbsnp_v151_GRCh38.INDEL.test.vcf.gz" @@ -100,4 +100,8 @@ process { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } } + // using vep online is only recommended for test purposes for a minimal set of variants! + withName: 'ENSEMBLVEP_VEP' { + ext.args ='--per_gene --total_length --database' + } } diff --git a/modules/nf-core/modules/ensemblvep/Dockerfile b/modules/nf-core/modules/ensemblvep/Dockerfile new file mode 100644 index 0000000..0abf8dc --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/Dockerfile @@ -0,0 +1,31 @@ +FROM nfcore/base:1.14 +LABEL \ + author="Maxime Garcia" \ + description="VEP image for nf-core pipelines" \ + maintainer="maxime.garcia@scilifelab.se" + +# Install the conda environment +COPY environment.yml / +RUN conda env create -f /environment.yml && conda clean -a + +# Setup default ARG variables +ARG GENOME=GRCh38 +ARG SPECIES=homo_sapiens +ARG VEP_CACHE_VERSION=108 +ARG VEP_VERSION=108.2 + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-vep-${VEP_VERSION}/bin:$PATH + +# Download Genome +RUN vep_install \ + -a c \ + -c .vep \ + -s ${SPECIES} \ + -y ${GENOME} \ + --CACHE_VERSION ${VEP_CACHE_VERSION} \ + --CONVERT \ + --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-vep-${VEP_VERSION} > nf-core-vep-${VEP_VERSION}.yml diff --git a/modules/nf-core/modules/ensemblvep/build.sh b/modules/nf-core/modules/ensemblvep/build.sh new file mode 100755 index 0000000..d3d41a8 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/build.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Build and push all containers + +build_push() { + GENOME=$1 + SPECIES=$2 + VEP_CACHE_VERSION=$3 + VEP_VERSION=$4 + + docker build \ + . \ + -t nfcore/vep:${VEP_VERSION}.${GENOME} \ + --build-arg GENOME=${GENOME} \ + --build-arg SPECIES=${SPECIES} \ + --build-arg VEP_CACHE_VERSION=${VEP_CACHE_VERSION} \ + --build-arg VEP_VERSION=${VEP_VERSION} + + docker push nfcore/vep:${VEP_VERSION}.${GENOME} +} + +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "108.2" +build_push "GRCh37" "homo_sapiens" "108" "108.2" +build_push "GRCh38" "homo_sapiens" "108" "108.2" +build_push "GRCm38" "mus_musculus" "102" "108.2" +build_push "GRCm39" "mus_musculus" "108" "108.2" +build_push "R64-1-1" "saccharomyces_cerevisiae" "108" "108.2" +build_push "UMD3.1" "bos_taurus" "94" "108.2" +build_push "WBcel235" "caenorhabditis_elegans" "108" "108.2" diff --git a/modules/nf-core/modules/ensemblvep/download/environment.yml b/modules/nf-core/modules/ensemblvep/download/environment.yml new file mode 100644 index 0000000..beebaca --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/download/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_download +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/modules/ensemblvep/download/main.nf b/modules/nf-core/modules/ensemblvep/download/main.nf new file mode 100644 index 0000000..f0776c4 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/download/main.nf @@ -0,0 +1,45 @@ +process ENSEMBLVEP_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'quay.io/biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(x) + + output: + path("vep_cache") , emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + vep_install \\ + --CACHEDIR vep_cache \\ + --SPECIES $params.species \\ + --ASSEMBLY $params.vep_genome \\ + --CACHE_VERSION $params.vep_cache_version \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir vep_cache + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/ensemblvep/download/meta.yml b/modules/nf-core/modules/ensemblvep/download/meta.yml new file mode 100644 index 0000000..a4277ad --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/download/meta.yml @@ -0,0 +1,45 @@ +name: ensemblvep_download +description: Ensembl Variant Effect Predictor (VEP). The cache downloading options are controlled through `task.ext.args`. +keywords: + - annotation + - cache + - download +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: string + description: | + Genome assembly + - species: + type: string + description: | + Specie + - cache_version: + type: string + description: | + cache version +output: + - cache: + type: file + description: cache + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/modules/ensemblvep/environment.yml b/modules/nf-core/modules/ensemblvep/environment.yml new file mode 100644 index 0000000..12e5917 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/environment.yml @@ -0,0 +1,10 @@ +# You can use this file to create a conda environment for this module: +# conda env create -f environment.yml +name: nf-core-vep-108.2 +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::ensembl-vep=108.2 diff --git a/modules/nf-core/modules/ensemblvep/filtervep/environment.yml b/modules/nf-core/modules/ensemblvep/filtervep/environment.yml new file mode 100644 index 0000000..d84dc89 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/filtervep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_filtervep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/modules/ensemblvep/filtervep/main.nf b/modules/nf-core/modules/ensemblvep/filtervep/main.nf new file mode 100644 index 0000000..53abf77 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/filtervep/main.nf @@ -0,0 +1,50 @@ +process ENSEMBLVEP_FILTERVEP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(input) + path (feature_file) + + output: + tuple val(meta), path("*.${extension}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "vcf" + """ + filter_vep \\ + $args \\ + --input_file $input \\ + --output_file ${prefix}.${extension} \\ + --only_matched + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "vcf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} + diff --git a/modules/nf-core/modules/ensemblvep/filtervep/meta.yml b/modules/nf-core/modules/ensemblvep/filtervep/meta.yml new file mode 100644 index 0000000..bde3aa1 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/filtervep/meta.yml @@ -0,0 +1,46 @@ +name: ensemblvep_filtervep +description: Filter variants based on Ensembl Variant Effect Predictor (VEP) annotations. +keywords: + - annotation + - vcf + - tab + - filter +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - input: + type: file + description: VCF/TAB file annotated with vep + pattern: "*.{vcf,tab,tsv,txt}" + - feature_file: + type: file + description: File containing features on separate lines. To be used with --filter option. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: VCF/TAB file + pattern: "*.{vcf,tab,txt,tsv}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/modules/ensemblvep/vep/environment.yml b/modules/nf-core/modules/ensemblvep/vep/environment.yml new file mode 100644 index 0000000..7a12774 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/vep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_vep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/modules/ensemblvep/vep/main.nf b/modules/nf-core/modules/ensemblvep/vep/main.nf new file mode 100644 index 0000000..1c7eca1 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/vep/main.nf @@ -0,0 +1,64 @@ +process ENSEMBLVEP_VEP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'quay.io/biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(vcf) + path(cache) + tuple path(fasta), path(index) + + output: + tuple val(meta), path("*.vcf.gz"), path("*.vcf.gz.tbi") , emit: vcf + path "*.summary.html" , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta $fasta" : "" + def dir_cache = args.contains("--offline") ? "--dir_cache ${cache} --cache" : "" + """ + vep \\ + -i $vcf \\ + -o ${prefix}.vep.vcf.gz \\ + --format vcf \\ + --vcf \\ + --compress_output bgzip \\ + $args \\ + $reference \\ + --assembly $params.vep_genome \\ + --species $params.species \\ + --cache_version $params.vep_cache_version \\ + $dir_cache \\ + --stats_file ${prefix}.summary.html + + tabix ${prefix}.vep.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.tab.gz + touch ${prefix}.json.gz + touch ${prefix}.summary.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/ensemblvep/vep/meta.yml b/modules/nf-core/modules/ensemblvep/vep/meta.yml new file mode 100644 index 0000000..d8ff8d1 --- /dev/null +++ b/modules/nf-core/modules/ensemblvep/vep/meta.yml @@ -0,0 +1,92 @@ +name: ensemblvep_vep +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. +keywords: + - annotation + - vcf + - json + - tab +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) + - genome: + type: string + description: | + which genome to annotate with + - species: + type: string + description: | + which species to annotate with + - cache_version: + type: integer + description: | + which version of the cache to annotate with + - cache: + type: file + description: | + path to VEP cache (optional) + - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" + - extra_files: + type: file + description: | + path to file(s) needed for plugins (optional) +output: + - vcf: + type: file + description: | + annotated vcf (optional) + pattern: "*.ann.vcf.gz" + - tab: + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab.gz" + - json: + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json.gz" + - report: + type: file + description: VEP report file + pattern: "*.html" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/nextflow.config b/nextflow.config index d066f23..e7c342d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,8 +53,16 @@ params { genome = "GRCh38" // use igenome or refgenie genome bundle - GRCh38, GRCh37, hg38 or hg37 // Annovar files + annotation_tool = "vep" // or vep annovar_path = null // path/to/annovar + vep_out_format = 'vcf' + outdir_cache = null // No output directory for cache + vep_cache = null // No directory for VEP cache + species = null + vep_include_fasta = true // Use fasta file for annotation with VEP + download_cache = false // DO NOT Download annotation cache + // Basic Annotation files k_genome = null // 1000k genome indels (integrated calls in vcf.gz format along with index) dbsnp_indel = null // dbSNP indels ( in vcf.gz format along with index) diff --git a/subworkflows/local/indel_annotation.nf b/subworkflows/local/indel_annotation.nf index 20c3375..0a8a3e5 100644 --- a/subworkflows/local/indel_annotation.nf +++ b/subworkflows/local/indel_annotation.nf @@ -4,11 +4,13 @@ params.options = [:] -include { ANNOTATE_VCF } from '../../modules/local/annotate_vcf.nf' addParams( options: params.options ) -include { ANNOVAR } from '../../modules/local/annovar.nf' addParams( options: params.options ) -include { INDEL_RELIABILITY_PIPE } from '../../modules/local/indel_reliability_pipe.nf' addParams( options: params.options ) -include { CONFIDENCE_ANNOTATION } from '../../modules/local/confidence_annotation.nf' addParams( options: params.options ) -include { ANNOTATION_PIPES } from '../../modules/local/annotation_pipes.nf' addParams( options: params.options ) +include { ANNOTATE_VCF } from '../../modules/local/annotate_vcf.nf' addParams( options: params.options ) +include { ANNOVAR } from '../../modules/local/annovar.nf' addParams( options: params.options ) +include { INDEL_RELIABILITY_PIPE } from '../../modules/local/indel_reliability_pipe.nf' addParams( options: params.options ) +include { CONFIDENCE_ANNOTATION } from '../../modules/local/confidence_annotation.nf' addParams( options: params.options ) +include { ANNOTATION_PIPES } from '../../modules/local/annotation_pipes.nf' addParams( options: params.options ) +include { ENSEMBLVEP_VEP } from '../../modules/nf-core/modules/ensemblvep/vep/main' addParams( options: params.options ) +include { ENSEMBLVEP_DOWNLOAD } from '../../modules/nf-core/modules/ensemblvep/download/main' addParams( options: params.options ) workflow INDEL_ANNOTATION { @@ -43,7 +45,8 @@ workflow INDEL_ANNOTATION { encode_tfbs // channel: [file.bed.gz, file.bed.gz.tbi] mirnas_sncrnas // channel: [file.bed.gz, file.bed.gz.tbi] chr_prefix // val channel: [prefix] - ref_type + ref + vep_cache main: @@ -72,25 +75,46 @@ workflow INDEL_ANNOTATION { .join(ANNOTATE_VCF.out.forannovar) .set{ input_ch} - // - // MODULE: ANNOVAR - // - // RUN annovar, processAnnovarOutput.pl and newCols2vcf.pl: annovar annotates and classifies the variants, - // perl scripts re-creates vcfs. - ANNOVAR( - input_ch, - annodb, - chr_prefix - ) - logs = logs.mix(ANNOVAR.out.log) - versions = versions.mix(ANNOVAR.out.versions) + if (params.annotation_tool.contains("annovar")){ + // + // MODULE: ANNOVAR + // + // RUN annovar, processAnnovarOutput.pl and newCols2vcf.pl: annovar annotates and classifies the variants, + // perl scripts re-creates vcfs. + ANNOVAR( + input_ch, + annodb, + chr_prefix + ) + logs = logs.mix(ANNOVAR.out.log) + versions = versions.mix(ANNOVAR.out.versions) + annotated_vcf = ANNOVAR.out.vcf + } + else{ + + if(params.download_cache){ + ENSEMBLVEP_DOWNLOAD( + input_ch.map{ it -> tuple( it[0], it[1])} + ) + versions = versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions) + vep_cache = ENSEMBLVEP_DOWNLOAD.out.cache + } + + ENSEMBLVEP_VEP( + ANNOTATE_VCF.out.unziped_vcf, + vep_cache, + ref + ) + versions = versions.mix(ENSEMBLVEP_VEP.out.versions) + annotated_vcf = ENSEMBLVEP_VEP.out.vcf.map{ it -> tuple( it[0], it[1], [])} + } // // MODULE: INDEL_RELIABILITY_PIPE // // RUN annotate_vcf.pl : BED files are used to annotate variants INDEL_RELIABILITY_PIPE( - ANNOVAR.out.vcf, + annotated_vcf, repeatmasker, dacblacklist, dukeexcluded, @@ -105,6 +129,12 @@ workflow INDEL_ANNOTATION { // MODULE: CONFIDENCE_ANNOTATION // // RUN: confidenceAnnotation_Indels.py : Confidence annotation will be added to the variants + if (params.fasta.contains("38")){ + ref_type = "hg38" + } + else{ + ref_type = "hg37" + } input_ch = vcf_ch.join(INDEL_RELIABILITY_PIPE.out.vcf) input_ch = input_ch.map{ it -> tuple( it[0], it[3], it[4], it[5], it[6])} CONFIDENCE_ANNOTATION( diff --git a/testdata/.DS_Store b/testdata/.DS_Store new file mode 100644 index 0000000..a273d59 Binary files /dev/null and b/testdata/.DS_Store differ diff --git a/testdata/annotation_files/dbsnp_v151_GRCh38.INDEL.test.vcf.gz b/testdata/annotation_files/dbsnp_v151_GRCh38.INDEL.test.vcf.gz new file mode 100644 index 0000000..a240e1e Binary files /dev/null and b/testdata/annotation_files/dbsnp_v151_GRCh38.INDEL.test.vcf.gz differ diff --git a/workflows/platypusindelcalling.nf b/workflows/platypusindelcalling.nf index 9774ab0..dcd7d8a 100644 --- a/workflows/platypusindelcalling.nf +++ b/workflows/platypusindelcalling.nf @@ -14,8 +14,7 @@ def checkPathParamList = [ params.input, params.fasta, params.multiqc_config] -def checkPathParamList_annotation = [params.annovar_path, - params.local_control_wgs, +def checkPathParamList_annotation = [params.local_control_wgs, params.local_control_wes, params.k_genome, params.dbsnp_indel, @@ -47,6 +46,9 @@ if ((params.runIndelDeepAnnotation) && (!params.enchancer_file && !params.cpgisl log.error "Please specify at least one annotation file to perform INDEL Deep Annotation" exit 1 } +if (params.annotation_tool.contains("annovar")){ + file(params.annovar_path, checkIfExists: true) +} //// Check mandatory parameters @@ -55,13 +57,6 @@ ref = Channel.fromPath([params.fasta,params.fasta_fai], checkIfExists chr_prefix = Channel.value(params.chr_prefix) chrlength = params.chrom_sizes ? Channel.fromPath(params.chrom_sizes, checkIfExists: true) : Channel.empty() -if (params.fasta.contains("38")){ - ref_type = "hg38" -} -else{ - ref_type = "hg37" -} - // Input samplesheet if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } @@ -83,8 +78,11 @@ gnomadgenomes = params.gnomad_genomes ? Channel.fromPath([params.gno gnomadexomes = params.gnomad_exomes ? Channel.fromPath([params.gnomad_exomes, params.gnomad_exomes + '.tbi'], checkIfExists: true).collect() : Channel.of([],[]) // Annovar table folder -annodb = params.annovar_path ? Channel.fromPath(params.annovar_path + '/humandb/', checkIfExists: true ) +annodb = params.annovar_path ? Channel.fromPath(params.annovar_path + '/humandb/') : Channel.empty() +// VEP cache +vep_cache_db = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : [] + // Realiability files repeatmasker = params.repeat_masker ? Channel.fromPath([params.repeat_masker, params.repeat_masker + '.tbi'], checkIfExists: true).collect() : Channel.of([],[]) @@ -273,7 +271,8 @@ workflow PLATYPUSINDELCALLING { encode_tfbs, mirna_sncrnas, chr_prefix, - ref_type + ref, + vep_cache_db ) ch_versions = ch_versions.mix(INDEL_ANNOTATION.out.versions)