From 25c43e7f34456f037d59efd2848bb16ea1ce6191 Mon Sep 17 00:00:00 2001 From: wdecoster Date: Tue, 2 Jul 2024 14:37:52 +0200 Subject: [PATCH] handle case with one read and --support 1 should solve issue reported in https://github.com/wdecoster/STRdust/issues/4 --- src/genotype.rs | 13 +++++++++++++ src/phase_insertions.rs | 4 ++-- src/vcf.rs | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/genotype.rs b/src/genotype.rs index c5c7c55..a151e3f 100644 --- a/src/genotype.rs +++ b/src/genotype.rs @@ -145,6 +145,19 @@ fn genotype_repeat( insertions.len().to_string(), )); } + // handle the special case where there is only 1 read for the repeat (but minimal support is 1 so it passes) + // no need to make a consensus or phase, just report the single read + if insertions.len() == 1 { + return Ok(crate::vcf::VCFRecord::single_read( + &insertions[0], + repeat, + &repeat_ref_seq, + all_insertions, + reads.ps, + flags, + )); + } + debug!("{repeat}: Phasing {} insertions", insertions.len(),); let phased = crate::phase_insertions::split(&insertions, repeat, args.find_outliers); match phased.hap2 { diff --git a/src/phase_insertions.rs b/src/phase_insertions.rs index b351139..32822c2 100644 --- a/src/phase_insertions.rs +++ b/src/phase_insertions.rs @@ -45,8 +45,8 @@ pub fn split( let mut clusters = vec![]; // create a vector with candidate haplotype-clusters let mut haplotype_clusters = vec![]; - // clusters have to represent at least 20% of the reads - let min_cluster_size = (insertions.len() as f32 / 10.0) as usize; + // clusters have to represent at least 10% of the reads, but never less than 1 + let min_cluster_size = max((insertions.len() as f32 / 10.0) as usize, 1); debug!("{repeat}: Minimum cluster size: {}", min_cluster_size); for (index, step) in dend.steps().iter().enumerate() { diff --git a/src/vcf.rs b/src/vcf.rs index 5f5960c..93a0db7 100644 --- a/src/vcf.rs +++ b/src/vcf.rs @@ -184,6 +184,44 @@ impl VCFRecord { allele: (".".to_string(), ".".to_string()), } } + + pub fn single_read( + seq: &str, + repeat: &crate::repeats::RepeatInterval, + repeat_ref_seq: &str, + all_insertions: Option>, + ps: Option, + flag: Vec, + ) -> VCFRecord { + let somatic_info_field = match all_insertions { + Some(somatic_insertions) => { + format!(";SEQS={}", somatic_insertions.join(",")) + } + None => "".to_string(), + }; + VCFRecord { + chrom: repeat.chrom.clone(), + start: repeat.start, + end: repeat.end, + ref_seq: repeat_ref_seq.to_string(), + alt_seq: Some(seq.to_string()), + length: ((seq.len() as u32 - (repeat.end - repeat.start)).to_string(), ".".to_string()), + full_length: ((seq.len() as u32).to_string(), ".".to_string()), + support: ("1".to_string(), ".".to_string()), + std_dev: (".".to_string(), ".".to_string()), + score: (".".to_string(), ".".to_string()), + somatic_info_field, + outliers: "".to_string(), + ps, + flags : if flag.is_empty() { + "".to_string() + } else { + format!("{};", flag.join(";")) + }, + allele: (".".to_string(), ".".to_string()), + } + + } } impl fmt::Display for VCFRecord {