From 070a9cbefc1acbb22738bdcd2d89ae821efdcc78 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Sep 2024 13:50:31 +0200 Subject: [PATCH] fix an issue with the validation workflow --- .../local/vcf_validate_small_variants/main.nf | 32 ------------------- workflows/germline.nf | 21 ++++-------- 2 files changed, 7 insertions(+), 46 deletions(-) diff --git a/subworkflows/local/vcf_validate_small_variants/main.nf b/subworkflows/local/vcf_validate_small_variants/main.nf index eaa17579..63ca83fd 100644 --- a/subworkflows/local/vcf_validate_small_variants/main.nf +++ b/subworkflows/local/vcf_validate_small_variants/main.nf @@ -14,38 +14,6 @@ workflow VCF_VALIDATE_SMALL_VARIANTS { ch_versions = Channel.empty() - happy_vcf = Channel.empty() - happy_tbi = Channel.empty() - happy_indel_roc = Channel.empty() - happy_indel_roc_pass = Channel.empty() - happy_snp_roc = Channel.empty() - happy_snp_roc_pass = Channel.empty() - happy_roc = Channel.empty() - happy_summary = Channel.empty() - happy_extended_csv = Channel.empty() - - vcfeval_true_positive_vcf = Channel.empty() - vcfeval_true_positive_vcf_tbi = Channel.empty() - vcfeval_false_negative_vcf = Channel.empty() - vcfeval_false_negative_vcf_tbi = Channel.empty() - vcfeval_false_positive_vcf = Channel.empty() - vcfeval_false_positive_vcf_tbi = Channel.empty() - vcfeval_true_positive_baseline_vcf = Channel.empty() - vcfeval_true_positive_baseline_vcf_tbi = Channel.empty() - vcfeval_summary = Channel.empty() - vcfeval_phasing = Channel.empty() - vcfeval_snp_roc = Channel.empty() - vcfeval_non_snp_roc = Channel.empty() - vcfeval_weighted_roc = Channel.empty() - - rtgtools_snp_png_rocplot = Channel.empty() - rtgtools_non_snp_png_rocplot = Channel.empty() - rtgtools_weighted_png_rocplot = Channel.empty() - - rtgtools_snp_svg_rocplot = Channel.empty() - rtgtools_non_snp_svg_rocplot = Channel.empty() - rtgtools_weighted_svg_rocplot = Channel.empty() - ch_input = ch_vcf.join(ch_beds, failOnDuplicate: true, failOnMismatch: true) RTGTOOLS_VCFEVAL( diff --git a/workflows/germline.nf b/workflows/germline.nf index ec5987a7..fe3e04b6 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -342,12 +342,9 @@ workflow GERMLINE { gvcf: [new_meta, gvcf, tbi] // Optional channel containing the GVCFs and their optional indices cram: [new_meta, cram, crai] // Mandatory channel containing the CRAM files and their optional indices roi: [new_meta, roi_file] // Optional channel containing the ROI BED files for WES samples - family_samples: [meta.family, meta.family_samples.tokenize(",")] // A channel containing the samples per family } .set { ch_input } - ch_family_samples = ch_input.family_samples.distinct() - // // Create the GVCF index if it's missing // @@ -595,14 +592,15 @@ workflow GERMLINE { ch_input.truth_variants .map { meta, vcf, tbi, bed -> - [ groupKey(meta, meta.duplicate_count), vcf, tbi, bed ] + def new_meta = meta - meta.subMap("duplicate_count") + [ groupKey(new_meta, meta.duplicate_count), vcf, tbi, bed ] } .groupTuple() .map { meta, vcf, tbi, bed -> // Get only one VCF for samples that were given multiple times - one_vcf = vcf.find { vcf_file -> vcf_file != [] } ?: [] - one_tbi = tbi.find { tbi_file -> tbi_file != [] } ?: [] - one_bed = bed.find { bed_file -> bed_file != [] } ?: [] + def one_vcf = vcf.find { vcf_file -> vcf_file != [] } ?: [] + def one_tbi = tbi.find { tbi_file -> tbi_file != [] } ?: [] + def one_bed = bed.find { bed_file -> bed_file != [] } ?: [] [ meta, one_vcf, one_tbi, one_bed ] } .branch { meta, vcf, tbi, bed -> @@ -637,12 +635,7 @@ workflow GERMLINE { ch_final_vcfs .map { meta, vcf, tbi -> def new_meta = meta - meta.subMap("family_samples") - [ meta.family, new_meta, vcf, tbi ] - } - .combine(ch_family_samples, by:0) - .map { family, meta, vcf, tbi, samples -> - def sample = meta.sample ? [meta.sample] : samples - [ meta, vcf, tbi, sample ] + [ new_meta, vcf, tbi, meta.family_samples.tokenize(",") ] } .transpose(by: 3) .map { meta, vcf, tbi, sample -> @@ -654,7 +647,7 @@ workflow GERMLINE { ] [ new_meta, vcf, tbi ] } - .combine(ch_truths, by:0) + .join(ch_truths, failOnMismatch:true, failOnDuplicate:true) .filter { meta, vcf, tbi, truth_vcf, truth_tbi, truth_bed -> // Filter out all samples that have no truth VCF truth_vcf != []