Skip to content

Commit

Permalink
Merge pull request #26 from ghga-de/fix-annotation-file-input
Browse files Browse the repository at this point in the history
final annotation fix
  • Loading branch information
kubranarci authored Dec 12, 2023
2 parents a1a8338 + 383c7d9 commit 84fbfdf
Show file tree
Hide file tree
Showing 13 changed files with 148 additions and 119 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ The pipeline has 6 main steps: Indel calling using platypus, basic annotations,

Reliability and confidation annotations: It is an optional ste for mapability, hiseq, selfchain and repeat regions checks for reliability and confidence of those scores.


3. Deep Annotation (--runIndelDeepAnnotation True):

If basic annotations are applied, an extra optional step for number of extra indel annotations like enhancer, cosmic, mirBASE, encode databases can be applied too.
Expand Down Expand Up @@ -207,7 +206,6 @@ We thank the following people for their extensive assistance in the development

- Nagarajan Paramasivam (@NagaComBio) [email protected]


**TODO**

<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
Expand Down
9 changes: 8 additions & 1 deletion modules/local/annotate_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@ process ANNOTATE_VCF {

input:
tuple val(meta), path(vcf), path(vcf_tbi)
tuple val(meta2),path(kgenome),path(kgenome_i),path(dbsnpindel),path(dbsnpindel_i),path(exac),path(evs),path(evs_i),path(exac_i),path(localcontrolwgs),path(localcontrolwgs_i),path(localcontrolwes),path(localcontrolwes_i),path(gnomadgenomes),path(gnomadgenomes_i),path(gnomadexomes),path(gnomadexomes_i)
tuple path(kgenome),path(kgenome_i)
tuple path(dbsnpindel),path(dbsnpindel_i)
tuple path(exac),path(exac_i)
tuple path(evs),path(evs_i)
tuple path(localcontrolwgs),path(localcontrolwgs_i)
tuple path(localcontrolwes),path(localcontrolwes_i)
tuple path(gnomadgenomes),path(gnomadgenomes_i)
tuple path(gnomadexomes),path(gnomadexomes_i)
val (chrprefix)

output:
Expand Down
13 changes: 12 additions & 1 deletion modules/local/annotation_pipes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,18 @@ process ANNOTATION_PIPES {

input:
tuple val(meta), path(vcf), path(vcf_tbi)
tuple val(meta2),path(enchangers),path(enchangers_i),path(cpgislands),path(cpgislands_i),path(tfbscons),path(tfbscons_i),path(encode_dnase),path(encode_dnase_i),path(mirnas_snornas),path(mirnas_snornas_i),path(cosmic),file(cosmic_i),path(mirbase),path(mirbase_i),path(mir_targets),path(mir_targets_i),path(cgi_mountains),path(cgi_mountains_i),path(phastconselem),path(phastconselem_i),path(encode_tfbs),path(encode_tfbs_i),path(mirnas_sncrnas),path(mirnas_sncrnas_i)
tuple path(enchangers),path(enchangers_i)
tuple path(cpgislands),path(cpgislands_i)
tuple path(tfbscons),path(tfbscons_i)
tuple path(encode_dnase),path(encode_dnase_i)
tuple path(mirnas_snornas),path(mirnas_snornas_i)
tuple path(cosmic),file(cosmic_i)
tuple path(mirbase),path(mirbase_i)
tuple path(mir_targets),path(mir_targets_i)
tuple path(cgi_mountains),path(cgi_mountains_i)
tuple path(phastconselem),path(phastconselem_i)
tuple path(encode_tfbs),path(encode_tfbs_i)
tuple path(mirnas_sncrnas),path(mirnas_sncrnas_i)

output:
tuple val(meta), path('*.deepanno.vcf.gz'), path('*.deepanno.vcf.gz.tbi') , emit: vcf
Expand Down
5 changes: 2 additions & 3 deletions modules/local/annovar.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
//# Gene annotation with annovar
// PROCESS ANNOVAR table_annovar
// working database is annovar_Feb2016

process ANNOVAR {
tag "$meta.id"
Expand All @@ -11,8 +10,8 @@ process ANNOVAR {
'docker://kubran/odcf_platypusindelcalling:v1' :'kubran/odcf_platypusindelcalling:v1' }"

input:
tuple val(meta) , path(ch_vcf), path(annovar_bed)
each file(annovar_table)
tuple val(meta) , path(ch_vcf), path(annovar_bed)
path(annovar_table)
val(chrprefix)

output:
Expand Down
8 changes: 7 additions & 1 deletion modules/local/indel_reliability_pipe.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ process INDEL_RELIABILITY_PIPE {

input:
tuple val(meta),path(ch_vcf),path(ch_vcf_i)
tuple val(meta2),path(repeatmasker),path(repeatmasker_i),path(dacblacklist),path(dacblacklist_i),path(dukeexcluded),path(dukeexcluded_i),path(hiseqdepth),path(hiseqdepth_i),path(selfchain),path(selfchain_i),path(mapability),path(mapability_i),path(simpletandemrepeats),path(simpletandemrepeats_i)
tuple path(repeatmasker),path(repeatmasker_i)
tuple path(dacblacklist),path(dacblacklist_i)
tuple path(dukeexcluded),path(dukeexcluded_i)
tuple path(hiseqdepth),path(hiseqdepth_i)
tuple path(selfchain),path(selfchain_i)
tuple path(mapability),path(mapability_i)
tuple path(simpletandemrepeats),path(simpletandemrepeats_i)

output:
tuple val(meta), path('*.annotated.vcf.gz'), path('*.annotated.vcf.gz.tbi') , emit: vcf
Expand Down
9 changes: 6 additions & 3 deletions modules/local/sample_swap.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ process SAMPLE_SWAP {
input:
tuple val(meta) , path(ch_vcf), path(ch_vcf_i), val(tumorname), val(controlname)
tuple path(ref) , path(ref_fai)
each path(chrlength_file)
each path(genemodel)
tuple val(meta2),path(localcontroltindawgs), path(localcontroltindawgs_tbi),path(localcontroltindawes),path(localcontroltindawes_tbi),path(gnomadgenomes),path(gnomadgenomes_tbi),path(gnomadexomes),path(gnomadexomes_tbi)
path(chrlength_file)
path(genemodel)
tuple path(localcontroltindawgs), path(localcontroltindawgs_tbi)
tuple path(localcontroltindawes),path(localcontroltindawes_tbi)
tuple path(gnomadgenomes),path(gnomadgenomes_tbi)
tuple path(gnomadexomes),path(gnomadexomes_tbi)
val chrprefix

output:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/visualize.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process VISUALIZE {
input:
tuple val(meta) ,path(vcf)
tuple path(ref) ,path(ref_fai)
tuple val(meta2),path(repeatmasker) ,path(repeatmasker_tbi)
tuple path(repeatmasker) ,path(repeatmasker_tbi)

output:
tuple val(meta) , path('*.indel_somatic_functional_combined.pdf') , emit: pdf, optional: true
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ params {
crit_evs_maxmaf = 0 // default value: 1.0
crit_1kgenomes_maxmaf = 0 // default value: 0.01
crit_localcontrol_maxmaf = 0 // default value: 0.05
filter_non_clinic = false // default value: false

// screenshot options
max_var_screenshots = 100 // default value
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,12 @@
"default": "",
"fa_icon": "fas fa-annotation",
"description": "Max MAF for Local Control filtration. Only apply if annotation is done"
},
"filter_non_clinic": {
"type": "boolean",
"default": "",
"fa_icon": "fas fa-annotation",
"description": "If true, dbSNP filtration will be applied. Only apply if annotation is done"
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion run_pipeline.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash
module load nextflow/22.07.1-edge
nextflow run main.nf -profile test,singularity --input testdata/samplesheet_test.csv
nextflow run main.nf -profile dkfz_cluster_38,singularity --input testdata/samplesheet_test.csv -resume
39 changes: 32 additions & 7 deletions subworkflows/local/indel_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,33 @@ include { ENSEMBLVEP_DOWNLOAD } from '../../modules/nf-core/modules/ensemblve
workflow INDEL_ANNOTATION {
take:
vcf_ch // channel: [val(meta), vcf.gz, vcf.gz.tbi ,val(tumorname), val(controlname) ]
annotate_vcf_ref // channel: [val(meta2),file(kgenome),file(kgenome_i),file(dbsnpindel),file(dbsnpindel_i),file(exac),file(exac_i),file(evs),file(evs_i),file(localcontrolwgs),file(localcontrolwgs_i),file(localcontrolwes),file(localcontrolwes_i),file(gnomadgenomes),file(gnomadgenomes_i),file(gnomadexomes),file(gnomadexomes_i)]
realibility_ref // channel: [val(meta2),file(repeatmasker),file(repeatmasker_i),file(dacblacklist),file(dacblacklist_i),file(dukeexcluded),file(dukeexcluded_i),file(hiseqdepth),file(hiseqdepth_i),file(selfchain),file(selfchain_i),file(mapability),file(mapability_i),file(simpletandemrepeats),file(simpletandemrepeats_i)]
deepanno_ref // channel: [val(meta2),file(enchangers),file(enchangers_i),file(cpgislands),file(cpgislands_i),file(tfbscons),file(tfbscons_i),tuple file(encode_dnase),file(encode_dnase_i),file(mirnas_snornas),file(mirnas_snornas_i),file(cosmic),file(cosmic_i),file(mirbase),file(mirbase_i),file(mir_targets),file(mir_targets_i),file(cgi_mountains),file(cgi_mountains_i),file(phastconselem),file(phastconselem_i),file(encode_tfbs),file(encode_tfbs_i),file(mirnas_sncrnas),file(mirnas_sncrnas_i)]
kgenome // channel: [file,index]
dbsnpindel // channel: [file,index]
exac // channel: [file,index]
evs // channel: [file,index]
localcontrolwgs // channel: [file,index]
localcontrolwes // channel: [file,index]
gnomadgenomes // channel: [file,index]
gnomadexomes // channel: [file,index]
repeatmasker // channel: [file,index]
dacblacklist // channel: [file,index]
dukeexcluded // channel: [file,index]
hiseqdepth // channel: [file,index]
selfchain // channel: [file,index]
mapability // channel: [file,index]
simpletandemrepeats // channel: [file,index]
enchangers // channel: [file,index]
cpgislands // channel: [file,index]
tfbscons // channel: [file,index]
encode_dnase // channel: [file,index]
mirnas_snornas // channel: [file,index]
cosmic // channel: [file,index]
mirbase // channel: [file,index]
mir_targets // channel: [file,index]
cgi_mountains // channel: [file,index]
phastconselem // channel: [file,index]
encode_tfbs // channel: [file,index]
mirnas_sncrnas // channel: [file,index]
chr_prefix // val channel: [prefix]
ref // channel [fasta,fai]
annodb // channel: [table_annovar_dir]
Expand All @@ -35,7 +59,7 @@ workflow INDEL_ANNOTATION {
input_ch = vcf_ch.map{ it -> tuple( it[0], it[1], it[2])}
ANNOTATE_VCF (
input_ch,
annotate_vcf_ref,
kgenome,dbsnpindel,exac,evs,localcontrolwgs,localcontrolwes,gnomadgenomes,gnomadexomes,
chr_prefix
)
versions = versions.mix(ANNOTATE_VCF.out.versions)
Expand Down Expand Up @@ -84,7 +108,7 @@ workflow INDEL_ANNOTATION {
// RUN annotate_vcf.pl : BED files are used to annotate variants
INDEL_RELIABILITY_PIPE(
annotated_vcf,
realibility_ref
repeatmasker,dacblacklist,dukeexcluded,hiseqdepth,selfchain,mapability,simpletandemrepeats
)
versions = versions.mix(INDEL_RELIABILITY_PIPE.out.versions)

Expand All @@ -101,7 +125,8 @@ workflow INDEL_ANNOTATION {
input_ch = vcf_ch.join(INDEL_RELIABILITY_PIPE.out.vcf)
input_ch = input_ch.map{ it -> tuple( it[0], it[3], it[4], it[5], it[6])}
CONFIDENCE_ANNOTATION(
input_ch, ref_type
input_ch,
ref_type
)
ann_vcf_ch = CONFIDENCE_ANNOTATION.out.vcf_ann
versions = versions.mix(CONFIDENCE_ANNOTATION.out.versions)
Expand All @@ -114,7 +139,7 @@ workflow INDEL_ANNOTATION {
//
ANNOTATION_PIPES (
ann_vcf_ch,
deepanno_ref
enchangers, cpgislands,tfbscons,encode_dnase,mirnas_snornas,cosmic,mirbase,mir_targets,cgi_mountains,phastconselem,encode_tfbs,mirnas_sncrnas
)
ann_vcf_ch = ANNOTATION_PIPES.out.vcf
versions = versions.mix(ANNOTATION_PIPES.out.versions)
Expand Down
4 changes: 3 additions & 1 deletion testdata/samplesheet_test.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
sample,tumor,tumor_index,control,control_index
test_withcontrol,testdata/WES_LL_T_1_test.bam,testdata/WES_LL_T_1_test.bam.bai,testdata/WES_LL_N_1_test.bam,testdata/WES_LL_N_1_test.bam.bai
test,testdata/WES_LL_T_1_test.bam,testdata/WES_LL_T_1_test.bam.bai,testdata/WES_LL_N_1_test.bam,testdata/WES_LL_N_1_test.bam.bai
test_2,testdata/WES_LL_T_1_test.bam,testdata/WES_LL_T_1_test.bam.bai,testdata/WES_LL_N_1_test.bam,testdata/WES_LL_N_1_test.bam.bai
test_3,testdata/WES_LL_T_1_test.bam,testdata/WES_LL_T_1_test.bam.bai,testdata/WES_LL_N_1_test.bam,testdata/WES_LL_N_1_test.bam.bai
Loading

0 comments on commit 84fbfdf

Please sign in to comment.