diff --git a/.gitignore b/.gitignore index 01714d65..ea644ddd 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ *.vcf* target *.log +*.jar # Intellij .idea/ diff --git a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Avocado.scala b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Avocado.scala index 1aedc574..bc967692 100644 --- a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Avocado.scala +++ b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Avocado.scala @@ -72,6 +72,9 @@ class AvocadoArgs extends Args4jBase with ParquetArgs { @Argument(metaVar = "CONFIG", required = true, usage = "avocado configuration file", index = 3) var configFile: String = _ + @Argument(metaVar = "NORMAL", required = false, usage = "ADAM normal data", index = 4) + var normalInput: String = _ + @option(name = "-debug", usage = "If set, prints a higher level of debug output.") var debug = false @@ -204,10 +207,18 @@ class Avocado(protected val args: AvocadoArgs) extends BDGSparkCommand[AvocadoAr log.info("Loading reads in from " + args.readInput) // load in reads from ADAM file - val reads: RDD[AlignmentRecord] = LoadReads.time { + var reads: RDD[AlignmentRecord] = LoadReads.time { Input(sc, args.readInput, reference, config) } + // load in reads from normal ADAM file if there is one + var normal: RDD[AlignmentRecord] = null + + if (args.normalInput != null) { + normal = Input(sc, args.normalInput, reference, config) + reads = sc.union(reads, normal) + } + // create stats/config item val stats = new AvocadoConfigAndStats(sc, args.debug, reads, reference) @@ -233,4 +244,4 @@ class Avocado(protected val args: AvocadoArgs) extends BDGSparkCommand[AvocadoAr args.disableDictionaryEncoding) } } -} +} \ No newline at end of file diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/algorithms/mutect/LikelihoodModel.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/algorithms/mutect/LikelihoodModel.scala new file mode 100644 index 00000000..f0c7b60f --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/algorithms/mutect/LikelihoodModel.scala @@ -0,0 +1,94 @@ +/* + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bdgenomics.avocado.algorithms.mutect + +import org.bdgenomics.avocado.models.AlleleObservation +import org.bdgenomics.adam.util.PhredUtils.phredToErrorProbability +import scala.math._ + +trait LikelihoodModel extends Serializable { + def logLikelihood(ref: String, + alt: String, + obs: Iterable[AlleleObservation], + f: Option[Double]): Double +} + +case class LogOdds(m1: LikelihoodModel, m2: LikelihoodModel) { + + def logOdds(ref: String, alt: String, + obs: Iterable[AlleleObservation], + f: Option[Double]): Double = + m1.logLikelihood(ref, alt, obs, f) - m2.logLikelihood(ref, alt, obs, f) +} + +object MutectLogOdds extends LogOdds(MfmModel, M0Model) { +} + +/** + * Use for the log odds that a normal is not a heterozygous site + */ +object MutectSomaticLogOdds extends LogOdds(M0Model, MHModel) { +} + +object M0Model extends LikelihoodModel { + + def logLikelihood(ref: String, + alt: String, + obs: Iterable[AlleleObservation], + f: Option[Double]): Double = + MfmModel.logLikelihood(ref, alt, obs, Some(0.0)) +} + +/** + * M_{m, 0.5} -- probability of a heterozygous site + */ +object MHModel extends LikelihoodModel { + + def logLikelihood(ref: String, + alt: String, + obs: Iterable[AlleleObservation], + f: Option[Double]): Double = + MfmModel.logLikelihood(ref, alt, obs, Some(0.5)) +} + +/** + * M_{m, f} + */ +object MfmModel extends LikelihoodModel { + + def P_bi(obs: AlleleObservation, r: String, m: String, f: Double): Double = { + val ei = phredToErrorProbability(obs.phred) + + if (obs.allele == r) { + f * (ei / 3.0) + (1.0 - f) * (1.0 - ei) + } else if (obs.allele == m) { + f * (1.0 - ei) + (1.0 - f) * (ei / 3.0) + } else { + ei / 3.0 + } + } + + def logLikelihood(ref: String, alt: String, + obs: Iterable[AlleleObservation], + f: Option[Double]): Double = { + val fEstimate: Double = f.getOrElse(obs.count(_.allele == alt).toDouble / obs.size) + obs.map { ob => log10(P_bi(ob, ref, alt, fEstimate)) }.sum + } +} + diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/discovery/ReadExplorer.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/discovery/ReadExplorer.scala index 2fa574af..d717dfe9 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/discovery/ReadExplorer.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/discovery/ReadExplorer.scala @@ -23,7 +23,8 @@ import org.apache.spark.Logging import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models.ReferencePosition import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.rich.RichAlignmentRecord +import org.bdgenomics.adam.rich.{ DecadentRead, RichAlignmentRecord } +import org.bdgenomics.adam.util.MdTag import org.bdgenomics.avocado.Timers._ import org.bdgenomics.avocado.models.{ AlleleObservation, Observation } import org.bdgenomics.avocado.stats.AvocadoConfigAndStats @@ -43,10 +44,38 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit val companion: ExplorerCompanion = ReadExplorer + def mdTagToMismatchPositions(mdTag: MdTag, cigar: List[CigarElement]): Seq[Int] = { + var idx = 0 + val insertions = cigar.map(c => { + (c, c.getLength) + }).map(kv => { + val r = (kv._1, idx) + idx += kv._2 + r + }).flatMap(kv => { + val (ce, i) = kv + if (ce.getOperator == CigarOperator.I) { + (0 until ce.getLength).map(_ + i) + } else { + Seq.empty + } + }) + + val deletions = mdTag.deletions + val oriPositions = mdTag.mismatches.keys + var mismatchPositions = oriPositions.zip(oriPositions) + for (iPos <- insertions) { + mismatchPositions = mismatchPositions.map({ case (p, i) => if (i <= iPos) (p + 1, i) else (p, i) }) + } + for ((dPos, _) <- deletions) { + mismatchPositions = mismatchPositions.map({ case (p, i) => if (i > dPos) (p - 1, i) else (p, i) }) + } + mismatchPositions.map({ case (p, i) => p.toInt }).toSeq + } + def readToObservations(r: (AlignmentRecord, Long)): Seq[Observation] = ExploringRead.time { val (read, readId) = r val richRead: RichAlignmentRecord = RichAlignmentRecord(read) - // get read start, contig, strand, sample, mapq, and sequence var pos: Long = read.getStart val contig: String = read.getContig.getContigName @@ -64,7 +93,12 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit // get cigar, md tag, and phred scores for bases val cigar: List[CigarElement] = richRead.samtoolsCigar.getCigarElements val quals = richRead.qualityScores - val mdTag = richRead.mdTag + val mdString = read.getMismatchingPositions + val mismatchPositions: Option[Seq[Int]] = if (mdString != null && mdString != "") + Some(mdTagToMismatchPositions(MdTag(read.getMismatchingPositions, + if (cigar.head.getOperator == CigarOperator.S) cigar.head.getLength else 0, + richRead.samtoolsCigar), cigar)) + else None // observations var observations = Seq[Observation]() @@ -72,6 +106,102 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit // position in the current read var readPos = 0 + // get the sum of mismatching bases + val qscores: Option[Seq[Int]] = mismatchPositions.map(l => { + l.map(p => { + quals(p) + }) + }) + + val mismatchQScoreSum = qscores.map(_.sum) + + // Helper function to get the unclipped read length (hard or soft) from CIGAR + def unclippedLenFromCigar(cigar: Cigar): Int = { + cigar.getCigarElements.map(ce => ce.getOperator match { + case CigarOperator.D | CigarOperator.N | CigarOperator.P => 0 + case _ => ce.getLength + }).sum + } + + def alignedLenFromCigar(cigar: Cigar): Int = { + cigar.getCigarElements.map(alignedElementLength).sum + } + + def alignedElementLength(ce: CigarElement): Int = { + ce.getOperator match { + case CigarOperator.D | CigarOperator.N | CigarOperator.P | CigarOperator.H | CigarOperator.S => 0 + case _ => ce.getLength + } + } + + // Helper function to calculate the length of an element, if it is a clipping element + def basesTrimmed(cigarElement: CigarElement): Int = { + cigarElement.getOperator match { + case CigarOperator.S | CigarOperator.H => cigarElement.getLength + case _ => 0 + } + } + + // Set up variables to help with tracking the distance from indels, and + // the distance from the current allele to the first and last trimmed base + // within this read. + val readLen = unclippedLenFromCigar(richRead.samtoolsCigar) + val trimmedFromStart = basesTrimmed(cigar.head) + val trimmedFromEnd = basesTrimmed(cigar.last) + var softclippedBases = 0 + val alignedLen = alignedLenFromCigar(richRead.samtoolsCigar) + + val cigarLenOps = cigar.zipWithIndex.map({ + case (ce: CigarElement, idx: Int) => + (idx, (ce.getLength, ce.getOperator)) + }).toMap + + val alignedLenFromCigars = cigar.map(alignedElementLength) + + // List of changes that are only insertions along with their lengths and pos (pos, (len, CigarOperator.I)) + val insertions = cigarLenOps.filter({ case (idx, (len, op)) => op == CigarOperator.I }) + // List of changes that are only deletions along with their lengths and pos (pos, (len, CigarOperator.D)) + val deletions = cigarLenOps.filter({ case (idx, (len, op)) => op == CigarOperator.D }) + + /** + * + * @param idx position of the insertion in the Cigar list + * @param len lenght of the insertion from the Cigar list + * @param del whether it is deletion or insertion + * @return + */ + def makeNaiveDistanceVec(idx: Int, len: Int, del: Boolean): Vector[Int] = { + // Finds the distance (num of bases in the read) before the event + val lpre = (0 until idx).map(alignedLenFromCigars(_)).sum + + // Finds the num of bases in the read after the event (at idx) + val lpost = ((idx + 1) until cigar.length).map(alignedLenFromCigars(_)).sum + + // Makes a list for every base pair in the read based on how far it is from this particular event + // eg. if it is an insertion, lpre = 5, lpost = 6, insertion len = 3, read length = 14 + // (5, 4, 3, 2, 1, 0, 0, 0, 1, 2, 3, 4, 5, 6) + val distanceVec = ((1 to lpre).reverse ++ (if (del) Vector.empty[Int] else Vector.fill(len)(0)) ++ (1 to lpost).map(-_)).toVector + assert(distanceVec.length == alignedLen) + distanceVec + } + + val insertionDistVecs = insertions.map({ case (idx, (len, _)) => makeNaiveDistanceVec(idx, len, false) }) + val deletionDistVecs = deletions.map({ case (idx, (len, _)) => makeNaiveDistanceVec(idx, len, true) }) + + val posToInsDist: Option[Vector[Int]] = if (insertionDistVecs.size > 0) Some(insertionDistVecs.transpose.map(l => l.minBy(Math.abs(_))).toVector) else None + val posToDelDist: Option[Vector[Int]] = if (deletionDistVecs.size > 0) Some(deletionDistVecs.transpose.map(l => l.minBy(Math.abs(_))).toVector) else None + + def getTags(read: RichAlignmentRecord): Option[Seq[org.bdgenomics.adam.models.Attribute]] = { + try { + Option(read.tags) + } catch { + case e: NullPointerException => None + } + } + + val tags: Option[Seq[org.bdgenomics.adam.models.Attribute]] = getTags(richRead) + val mateRescue: Boolean = tags.getOrElse(Seq()).exists(a => a.tag == "XT" && a.value == "M") + def processAlignmentMatch() { observations = AlleleObservation(ReferencePosition(contig, pos), 1, @@ -80,7 +210,15 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit mapq, negativeStrand, firstOfPair, - readPos, + readPos - softclippedBases, + alignedLen, + posToInsDist.flatMap((lst: Vector[Int]) => Some(lst(readPos - softclippedBases))), + posToDelDist.flatMap((lst: Vector[Int]) => Some(lst(readPos - softclippedBases))), + trimmedFromStart, + trimmedFromEnd, + readLen, + mismatchQScoreSum, + mateRescue, sample, readId) +: observations readPos += 1 @@ -100,13 +238,22 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit mapq, negativeStrand, firstOfPair, - readPos, + readPos - softclippedBases, + alignedLen, + posToInsDist.flatMap((lst: Vector[Int]) => Some(lst(readPos - softclippedBases))), + posToDelDist.flatMap((lst: Vector[Int]) => Some(lst(readPos - softclippedBases))), + trimmedFromStart, + trimmedFromEnd, + readLen, + mismatchQScoreSum, + mateRescue, sample, readId).asInstanceOf[Observation] +: observations // increment read pointers readPos += alleleLength pos += 1 + } else if (idx + 1 < cigar.length && cigar(idx + 1).getOperator == CigarOperator.D) { // the allele includes the matching base val alleleLength = 1 + cigar(idx + 1).getLength @@ -119,7 +266,15 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit mapq, negativeStrand, firstOfPair, - readPos, + readPos - softclippedBases, + alignedLen, + posToInsDist.flatMap((lst: Vector[Int]) => Some(lst(readPos - softclippedBases))), + posToDelDist.flatMap((lst: Vector[Int]) => Some(lst(readPos - softclippedBases))), + trimmedFromStart, + trimmedFromEnd, + readLen, + mismatchQScoreSum, + mateRescue, sample, readId).asInstanceOf[Observation] +: observations @@ -152,7 +307,9 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit // no op; handle inserts by looking ahead from match/mismatch operator } case CigarOperator.S => { - readPos += cigar(i).getLength + val clippedLen = cigar(i).getLength + readPos += clippedLen + softclippedBases += clippedLen } case CigarOperator.H => case _ => { @@ -177,4 +334,4 @@ class ReadExplorer(referenceObservations: RDD[Observation]) extends Explorer wit .flatMap(readToObservations) } ++ referenceObservations } -} +} \ No newline at end of file diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala index ba13568b..efec8e33 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala @@ -18,18 +18,9 @@ package org.bdgenomics.avocado.genotyping import org.apache.commons.configuration.{ HierarchicalConfiguration, SubnodeConfiguration } -import org.apache.spark.SparkContext._ import org.apache.spark.Logging -import org.apache.spark.rdd.{ InstrumentedRDD, RDD } -import org.bdgenomics.adam.models.{ - ReferencePosition, - ReferenceRegion, - SequenceDictionary, - SequenceRecord, - VariantContext -} +import org.bdgenomics.adam.models._ import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.rdd.GenomicPositionPartitioner import org.bdgenomics.adam.util.PhredUtils import org.bdgenomics.avocado.Timers._ import org.bdgenomics.avocado.algorithms.em.EMForAlleles @@ -38,7 +29,7 @@ import org.bdgenomics.avocado.genotyping.annotators.VariantCallingAnnotator import org.bdgenomics.avocado.models.{ AlleleObservation, Observation } import org.bdgenomics.avocado.stats.AvocadoConfigAndStats import org.bdgenomics.avocado.algorithms.math.LogToPhred.log2phred -import org.bdgenomics.formats.avro.{ Contig, Genotype, GenotypeAllele, Variant, VariantCallingAnnotations } +import org.bdgenomics.formats.avro._ import scala.annotation.tailrec import scala.math.{ exp, expm1, log => mathLog, log1p, max, min, pow } @@ -97,14 +88,14 @@ object BiallelicGenotyper extends GenotyperCompanion { } class BiallelicGenotyper(sd: SequenceDictionary, - contigLengths: Map[String, Long], + val contigLengths: Map[String, Long], ploidy: Int = 2, useEM: Boolean = false, emitGVCF: Boolean = true, estimatedReferenceFrequency: Double = 0.999, maxIterations: Option[Int] = Some(10), tolerance: Option[Double] = Some(1e-3), - saturationThreshold: Double = 0.001) extends Genotyper with Logging { + saturationThreshold: Double = 0.001) extends SiteGenotyper with Logging { val companion: GenotyperCompanion = BiallelicGenotyper @@ -288,7 +279,7 @@ class BiallelicGenotyper(sd: SequenceDictionary, def genotypeSite(region: ReferenceRegion, referenceObservation: Observation, - alleleObservations: Iterable[AlleleObservation]): VariantContext = GenotypingSite.time { + alleleObservations: Iterable[AlleleObservation]): Option[VariantContext] = GenotypingSite.time { // get reference allele val reference = referenceObservation.allele @@ -355,128 +346,6 @@ class BiallelicGenotyper(sd: SequenceDictionary, }) // emit the variant context - VariantContext(variant, genotypes, None) - } - - def genotype(observations: RDD[Observation]): RDD[VariantContext] = { - val sortedRdd = observations.keyBy(_.pos) - .repartitionAndSortWithinPartitions(GenomicPositionPartitioner(observations.partitions.size, - contigLengths)) - val instrumentedRdd = { - import org.apache.spark.rdd.MetricsContext._ - sortedRdd.instrument - } - instrumentedRdd.mapPartitions(genotypeIterator) - } - - def updateObservations(region: ReferenceRegion, - obs: List[Observation]): (Observation, Iterable[AlleleObservation]) = { - // extract references - val ref = obs.flatMap(o => o match { - case ao: AlleleObservation => None - case oo: Observation => Some(oo) - }).sortBy(_.pos) - .map(_.allele) - .mkString - val refLen = ref.length - val refPos = ReferencePosition(region.referenceName, region.start) - - // put read observations together - val readObservations = obs.flatMap(o => o match { - case ao: AlleleObservation => Some(ao) - case oo: Observation => None - }) - - // if the reference length is greater than 1, we need to expand the values - val finalObservations = if (refLen > 1) { - readObservations.groupBy(_.readId) - .map(ro => { - // sort the read observations - val sortedRo = ro._2.sortBy(_.pos) - - // get the start and end position - val start = sortedRo.head.pos.pos - val end = sortedRo.last.pos.pos + sortedRo.last.length - - // reduce down to get the sequence - val sequence = (ref.take((start - region.start).toInt) + - sortedRo.map(_.allele).mkString + - ref.takeRight((region.end - end).toInt)) - val phred = sortedRo.map(_.phred).sum / sortedRo.length - - // make observation - AlleleObservation(refPos, - refLen, - sequence, - phred, - sortedRo.head.mapq, - sortedRo.head.onNegativeStrand, - sortedRo.head.firstOfPair, - sortedRo.head.offsetInRead, - sortedRo.head.sample, - sortedRo.head.readId) - }) - } else { - readObservations - } - - // emit reference observation - (new Observation(refPos, ref), finalObservations) - } - - def genotypeIterator(iter: Iterator[(ReferencePosition, Observation)]): Iterator[VariantContext] = { - // do we have any elements in this iterator? - if (iter.hasNext) { - // peek at the head element - val (headPos, headObs) = iter.next - - // and initialize state - var windowStart = headPos.pos - var windowEnd = windowStart + headObs.length - var obsList = List(headObs) - - def flush(): VariantContext = { - val region = ReferenceRegion(headPos.referenceName, windowStart, windowEnd) - - // update observations - val (refObs, obsIterable) = updateObservations(region, obsList) - - // compute genotypes - genotypeSite(region, - refObs, - obsIterable) - } - - // start processing observation windows! - iter.flatMap(kv => { - val (pos, obs) = kv - - // does this observation start past the end of this window? if so, flush our observations - // and rebuild the window - if (pos.pos >= windowEnd) { - // flush the window - val vc = flush() - - // reinitialize state - windowStart = pos.pos - windowEnd = windowStart + obs.length - obsList = List(obs) - - // emit variant context - Some(vc) - } else { - // make possible update to window end - windowEnd = max(windowEnd, pos.pos + obs.length) - - // append observation to list - obsList = obs :: obsList - - // nothing to emit - None - } - }) ++ Iterator(flush()) - } else { - Iterator() - } + Some(VariantContext(variant, genotypes, None)) } } diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/CallGenotypes.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/CallGenotypes.scala index 44c1ee7f..9c356a9b 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/CallGenotypes.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/CallGenotypes.scala @@ -27,7 +27,8 @@ import org.bdgenomics.avocado.stats.AvocadoConfigAndStats object CallGenotypes { val genotypers: Seq[GenotyperCompanion] = Seq(BiallelicGenotyper, - ExternalGenotyper) + ExternalGenotyper, + MutectGenotyper) def apply(genotyperAlgorithm: String, genotyperName: String, diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/MutectGenotyper.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/MutectGenotyper.scala new file mode 100644 index 00000000..5d4c255e --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/MutectGenotyper.scala @@ -0,0 +1,364 @@ +/* + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.avocado.genotyping + +import java.io.File +import org.apache.commons.configuration.{ HierarchicalConfiguration, SubnodeConfiguration } +import org.apache.spark.Logging +import org.apache.spark.broadcast.Broadcast +import org.bdgenomics.adam.models._ +import org.bdgenomics.avocado.algorithms.mutect._ +import org.bdgenomics.avocado.algorithms.math.{ LogUtils, LogBinomial } +import org.bdgenomics.avocado.models.{ AlleleObservation, Observation } +import org.bdgenomics.avocado.stats.AvocadoConfigAndStats +import org.bdgenomics.formats.avro.{ Contig, Genotype, Variant } + +import scala.annotation.tailrec + +object MutectGenotyper extends GenotyperCompanion { + + val genotyperName: String = "MutectGenotyper" + + protected def apply(stats: AvocadoConfigAndStats, + config: SubnodeConfiguration): Genotyper = { + println("\n") + println("\n") + println("\n") + println("\n") + println("\n") + println("\n") + println("\n") + println("\n") + println("Mutect") + + require(config.containsKey("normalId"), + "Normal sample ID is not defined in configuration file.") + require(config.containsKey("somaticId"), + "Somatic sample ID is not defined in configuration file.") + + // load snp table + val snpTable = if (config.containsKey("dbSNPFile")) { + Some(stats.sc.broadcast(SnpTable(new File(config.getString("dbSNPFile"))))) + } else { + None + } + + // load known mutations (like cosmic) file + val recurrentMutationsTable = if (config.containsKey("recurrentMutationsFile")) { + Some(stats.sc.broadcast(SnpTable(new File(config.getString("recurrentMutationsFile"))))) + } else { + None + } + + val noisyMutationsTable = if (config.containsKey("noisyMutationsFile")) { + Some(stats.sc.broadcast(SnpTable(new File(config.getString("noisyMutationsFile"))))) + } else { + None + } + + // pull out algorithm parameters and return genotyper + new MutectGenotyper(config.getString("normalId"), + config.getString("somaticId"), + stats.contigLengths, + config.getDouble("threshold", 6.3), + config.getDouble("somDbSnpThreshold", 5.5), + config.getDouble("somNovelThreshold", 2.2), + config.getInt("maxGapEvents", 3), + config.getDouble("minPassStringentFiltersTumor", 0.3), + config.getDouble("maxMapq0Fraction", 0.5), + config.getInt("minPhredSupportingMutant", 20), + config.getInt("indelNearnessThreshold", 5), + config.getInt("maxPhredSumMismatchingBases", 100), + config.getDouble("maxFractionBasesSoftClippedTumor", 0.3), + config.getDouble("maxNormalSupportingFracToTriggerQscoreCheck", 0.015), + config.getInt("maxNormalQscoreSumSupportingMutant", 20), + config.getInt("minMedianDistanceFromReadEnd", 10), + config.getInt("minMedianAbsoluteDeviationOfAlleleInRead", 3), + config.getBoolean("experimentalMutectIndelDetector", false), + config.getDouble("errorForPowerCalculations", 0.001), + config.getInt("minThetaForPowerCalc", 20), + None, + snpTable, + recurrentMutationsTable, + noisyMutationsTable) + } +} + +/** + * + * @param threshold Default value of 6.3 corresponds to a 3x10-6 mutation rate, + * see the Online Methods of the Mutect paper for more details. + * @param somDbSnpThreshold if the site is a known dbSnp site, apply this cutoff for somatic classification + * @param somNovelThreshold if the site is a novel variant, apply this cutoff for somatic classification + * + */ +class MutectGenotyper(normalId: String, + somaticId: String, + val contigLengths: Map[String, Long] = Map.empty, + threshold: Double = 6.3, + somDbSnpThreshold: Double = 5.5, + somNovelThreshold: Double = 2.2, + maxGapEventsThreshold: Int = 3, + minPassStringentFiltersTumor: Double = 0.3, + maxMapq0Fraction: Double = 0.5, + minPhredSupportingMutant: Int = 20, + indelNearnessThreshold: Int = 5, + maxPhredSumMismatchingBases: Int = 100, + maxFractionBasesSoftClippedTumor: Double = 0.3, + maxNormalSupportingFracToTriggerQscoreCheck: Double = 0.015, + maxNormalQscoreSumSupportingMutant: Int = 20, + minMedianDistanceFromReadEnd: Int = 10, + minMedianAbsoluteDeviationOfAlleleInRead: Int = 3, + experimentalMutectIndelDetector: Boolean = false, + errorForPowerCalculations: Double = 0.001, + minThetaForPowerCalc: Int = 20, + f: Option[Double] = None, + snpTable: Option[Broadcast[SnpTable]] = None, + recurrentMutationsTable: Option[Broadcast[SnpTable]] = None, + noisyMutationsTable: Option[Broadcast[SnpTable]] = None) extends SiteGenotyper with Logging { + + val companion: GenotyperCompanion = MutectGenotyper + + val model = MutectLogOdds + val somaticModel = MutectSomaticLogOdds + + def constructVariant(region: ReferenceRegion, + ref: String, + alt: String, + obs: Iterable[AlleleObservation]): VariantContext = { + + val contig = Contig.newBuilder() + .setContigName(region.referenceName) + .build() + + val variant = Variant.newBuilder() + .setStart(region.start) + .setContig(contig) + .setEnd(region.end) + .setReferenceAllele(ref) + .setAlternateAllele(alt) + .build() + + val genotypes = Seq() + + VariantContext(variant, genotypes, None) + } + + protected[genotyping] def genotypeSite(region: ReferenceRegion, + referenceObservation: Observation, + alleleObservation: Iterable[AlleleObservation]): Option[VariantContext] = { + + require(alleleObservation.forall(_.mismatchQScoreSum.isDefined), + "MD tags must be set in reads for MuTect to function properly. See `samtools calmd` for example.") + + val ref = referenceObservation.allele + // get all possible alleles for this mutation call + // ref.size and ref.length always seem to be 1 + val alleles: Set[String] = if (experimentalMutectIndelDetector) + Set(alleleObservation.map(_.allele).toSeq: _*) + else + Set(alleleObservation.map(_.allele).toSeq: _*).filter(_.length == 1) // only accept length 1 alleles + val pointMutation: Boolean = ref.size == 1 && alleles.exists(_.length == 1) + + if (experimentalMutectIndelDetector || pointMutation) { + + val tumorsRaw = alleleObservation.filter(a => a.sample == somaticId) + val normals = alleleObservation.filter(a => a.sample == normalId) + + // apply 3 stringent filters to the tumor alleles + val tumors = tumorsRaw.filterNot(ao => { + val clippedFilter = (ao.clippedBasesReadStart + ao.clippedBasesReadEnd) / + ao.unclippedReadLen.toDouble >= maxFractionBasesSoftClippedTumor + val noisyFilter = ao.mismatchQScoreSum.get >= maxPhredSumMismatchingBases + val mateRescueFilter = ao.mateRescue + clippedFilter || noisyFilter || mateRescueFilter + }) + + val rankedAlts: Seq[(Double, String)] = + (alleles - ref).map { alt => + (model.logOdds(ref, alt, alleleObservation, f), alt) + }.toSeq.sorted.reverse + + val passingOddsAlts = rankedAlts.filter(oa => oa._1 >= threshold) + + if (passingOddsAlts.size == 1) { + val alt = passingOddsAlts(0)._2 + + val normalNotHet = somaticModel.logOdds(ref, alt, normals, None) + + // is this a known site? + val dbSNPsite = snpTable.fold(false)(t => { + t.value.contains(ReferencePosition(region.referenceName, region.start)) + }) + + // is this a recurrent site, for example cosmic? + val recurrentMutSite = recurrentMutationsTable.fold(false)(t => { + t.value.contains(ReferencePosition(region.referenceName, region.start)) + }) + + // is this a noisy mutation site, for example one called previously using only a Panel of Normals (PON)? + val noisyMutSite = noisyMutationsTable.fold(false)(t => { + t.value.contains(ReferencePosition(region.referenceName, region.start)) + }) + + val passSomatic: Boolean = (dbSNPsite && normalNotHet >= somDbSnpThreshold) || (!dbSNPsite && normalNotHet >= somNovelThreshold) + + val passNoiseFilter: Boolean = (recurrentMutSite || !noisyMutSite) + + val nInsertions = tumors.map(ao => if (math.abs(ao.distanceToNearestReadInsertion.getOrElse(Int.MaxValue)) <= indelNearnessThreshold) 1 else 0).sum + val nDeletions = tumors.map(ao => if (math.abs(ao.distanceToNearestReadDeletion.getOrElse(Int.MaxValue)) <= indelNearnessThreshold) 1 else 0).sum + + val passIndel: Boolean = nInsertions < maxGapEventsThreshold && nDeletions < maxGapEventsThreshold && pointMutation + + val passStringentFilters = tumors.size.toDouble / tumorsRaw.size.toDouble > (1.0 - minPassStringentFiltersTumor) + + val passMapq0Filter = tumorsRaw.filter(_.mapq.getOrElse(0) == 0).size.toDouble / tumorsRaw.size.toDouble <= maxMapq0Fraction && + normals.filter(_.mapq.getOrElse(0) == 0).size.toDouble / normals.size.toDouble <= maxMapq0Fraction + + val onlyTumorMut = tumors.filter(_.allele == alt) + + val passMaxMapqAlt = if (onlyTumorMut.size > 0) onlyTumorMut.map(_.phred).max >= minPhredSupportingMutant else false + + val passMaxNormalSupport = normals.filter(_.allele == alt).size.toDouble / normals.size.toDouble <= maxNormalSupportingFracToTriggerQscoreCheck || + normals.filter(_.allele == alt).map(_.phred).sum < maxNormalQscoreSumSupportingMutant + + val tumorPos = tumors.filterNot(_.onNegativeStrand) + val tumorPosAlt = tumorPos.filter(_.allele == alt) + val tumorNeg = tumors.filter(_.onNegativeStrand) + val tumorNegAlt = tumorNeg.filter(_.allele == alt) + + val alleleFrac = onlyTumorMut.size.toDouble / tumors.size.toDouble + + val tumorPosDepth = tumorPos.size + val tumorNegDepth = tumorNeg.size + val tPosFrac = if (tumorPosDepth > 0) tumorPosAlt.size.toDouble / tumorPosDepth.toDouble else 0.0 + val tNegFrac = if (tumorNegDepth > 0) tumorNegAlt.size.toDouble / tumorNegDepth.toDouble else 0.0 + + val lodPos = model.logOdds(ref, alt, tumorPos, Some(tPosFrac)) + val lodNeg = model.logOdds(ref, alt, tumorNeg, Some(tNegFrac)) + + val powerPos = calculateStrandPower(tumorPosDepth, alleleFrac) + val powerNeg = calculateStrandPower(tumorNegDepth, alleleFrac) + + val passingStrandBias = (powerPos < 0.9 || lodPos >= minThetaForPowerCalc) && + (powerNeg < 0.9 || lodNeg >= minThetaForPowerCalc) + + // Only pass mutations that do not cluster at the ends of reads + val passEndClustering = if (onlyTumorMut.size > 0) { + val forwardPositions: Seq[Double] = onlyTumorMut.map(_.offsetInAlignment.toDouble).toSeq + val reversePositions: Seq[Double] = onlyTumorMut.map(ao => ao.alignedReadLen - ao.offsetInAlignment.toDouble - 1.0).toSeq + + val forwardMedian = median(forwardPositions) + val reverseMedian = median(reversePositions) + + val forwardMad = mad(forwardPositions, forwardMedian) + val reverseMad = mad(reversePositions, reverseMedian) + + (forwardMad > minMedianAbsoluteDeviationOfAlleleInRead || forwardMedian > minMedianAbsoluteDeviationOfAlleleInRead) && + (reverseMad > minMedianAbsoluteDeviationOfAlleleInRead || reverseMedian > minMedianAbsoluteDeviationOfAlleleInRead) + + } else false + + // Do all filters pass? + if (passSomatic && passIndel && passStringentFilters && passMapq0Filter && + passMaxMapqAlt && passMaxNormalSupport && passEndClustering && + passingStrandBias && passNoiseFilter) { + Option(constructVariant(region, ref, alt, alleleObservation)) + } else { + None + } + + } else { + // either there are 0 passing variants, or there are > 1 passing variants + None + } + } else { + log.info("length: " + ref.length) + log.info("size: " + ref.size) + log.info("Dropping site %s, as reference allele is an insertion or complex variant.".format(referenceObservation.pos)) + None + } + } + + def calculateStrandPower(depth: Int, f: Double): Double = { + /* The power to detect a mutant is a function of depth, and the mutant allele fraction (unstranded). + Basically you assume that the probability of observing a base error is uniform and 0.001 (phred score of 30). + You see how many reads you require to pass the LOD threshold of 2.0, and then you calculate the binomial + probability of observing that many or more reads would be observed given the allele fraction and depth. + You also correct for the fact that the real result is somewhere between the minimum integer number to pass, + and the number below it, so you scale your probability at k by 1 - (2.0 - lod_(k-1) )/(lod_(k) - lod_(k-1)). + */ + if (depth < 1 || f <= 0.0000001) { + 0.0 + } else { + + def getLod(k: Int): Double = { + val nref = depth - k + val tf = k / depth.toDouble + val prefk = nref.toDouble * math.log10(tf * errorForPowerCalculations + (1.0 - tf) * (1.0 - errorForPowerCalculations)) + val paltk = k.toDouble * math.log10(tf * (1.0 - errorForPowerCalculations) + (1.0 - tf) * errorForPowerCalculations) + val pkm = prefk + paltk + val pref0 = nref.toDouble * math.log10(1.0 - errorForPowerCalculations) + val palt0 = k.toDouble * math.log10(errorForPowerCalculations) + val p0 = pref0 + palt0 + pkm - p0 + } + + @tailrec + def findMinPassingK(begin: Int, end: Int, passingK: Option[(Int, Double)]): Option[(Int, Double)] = { + if (begin >= end) passingK + else { + val mid = (begin + end) / 2 + val lod = getLod(mid) + val passingKupdate = if (lod >= minThetaForPowerCalc) Some((mid, lod)) else passingK + if (lod >= minThetaForPowerCalc && begin < end - 1) findMinPassingK(begin, mid, passingKupdate) + else if (begin < end - 1) findMinPassingK(mid, end, passingKupdate) + else passingKupdate + } + } + + val kLodOpt = findMinPassingK(1, depth + 1, None) + + if (kLodOpt.isDefined) { + val (k, lod) = kLodOpt.get + val probabilities: Array[Double] = LogBinomial.calculateLogProbabilities(math.log(f), depth) + val binomials = probabilities.drop(k) + + val lodM1 = getLod(k - 1) + + val partialLogPkM1: Double = probabilities(k - 1) + math.log(1.0 - (minThetaForPowerCalc - lodM1) / (lod - lodM1)) + + math.exp(LogUtils.sumLogProbabilities(partialLogPkM1 +: binomials)) + + } else { + 0.0 + } + } + + } + + def median(s: Seq[Double]): Double = { + val (lower, upper) = s.sortWith(_ < _).splitAt(s.size / 2) + if (s.size % 2 == 0) (lower.last + upper.head) / 2.0 else upper.head + } + + def mad(s: Seq[Double], m: Double): Double = { + median(s.map(i => math.abs(i - m))) + } + +} diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SiteGenotyper.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SiteGenotyper.scala new file mode 100644 index 00000000..df9a4447 --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SiteGenotyper.scala @@ -0,0 +1,166 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.avocado.genotyping + +import org.apache.spark.SparkContext._ +import org.apache.spark.rdd.{ InstrumentedRDD, RDD } +import org.bdgenomics.adam.models._ +import org.bdgenomics.adam.rdd.GenomicPositionPartitioner +import org.bdgenomics.avocado.Timers._ +import org.bdgenomics.avocado.models.{ AlleleObservation, Observation } +import org.bdgenomics.formats.avro._ +import scala.math.max + +trait SiteGenotyper extends Genotyper { + + val contigLengths: Map[String, Long] + + final def genotype(observations: RDD[Observation]): RDD[VariantContext] = { + val sortedRdd = observations.keyBy(_.pos) + .repartitionAndSortWithinPartitions(GenomicPositionPartitioner(observations.partitions.size, + contigLengths)) + val instrumentedRdd = { + import org.apache.spark.rdd.MetricsContext._ + sortedRdd.instrument + } + instrumentedRdd.mapPartitions(genotypeIterator) + } + + protected[genotyping] def genotypeSite(region: ReferenceRegion, + referenceObservation: Observation, + alleleObservations: Iterable[AlleleObservation]): Option[VariantContext] + + private def genotypeIterator(iter: Iterator[(ReferencePosition, Observation)]): Iterator[VariantContext] = { + // do we have any elements in this iterator? + if (iter.hasNext) { + // peek at the head element + val (headPos, headObs) = iter.next + + // and initialize state + var windowStart = headPos.pos + var windowEnd = windowStart + headObs.length + var obsList = List(headObs) + + def flush(): Option[VariantContext] = { + val region = ReferenceRegion(headPos.referenceName, windowStart, windowEnd) + + // update observations + val (refObs, obsIterable) = updateObservations(region, obsList) + + // compute genotypes + genotypeSite(region, + refObs, + obsIterable) + } + + // start processing observation windows! + iter.flatMap(kv => { + val (pos, obs) = kv + + // does this observation start past the end of this window? if so, flush our observations + // and rebuild the window + if (pos.pos >= windowEnd) { + // flush the window + val vc = flush() + + // reinitialize state + windowStart = pos.pos + windowEnd = windowStart + obs.length + obsList = List(obs) + + // emit variant context + vc + } else { + // make possible update to window end + windowEnd = max(windowEnd, pos.pos + obs.length) + + // append observation to list + obsList = obs :: obsList + + // nothing to emit + None + } + }) ++ flush().toIterator + } else { + Iterator() + } + } + + private def updateObservations(region: ReferenceRegion, + obs: List[Observation]): (Observation, Iterable[AlleleObservation]) = { + // extract references + val ref = obs.flatMap(o => o match { + case ao: AlleleObservation => None + case oo: Observation => Some(oo) + }).sortBy(_.pos) + .map(_.allele) + .mkString + val refLen = ref.length + val refPos = ReferencePosition(region.referenceName, region.start) + + // put read observations together + val readObservations = obs.flatMap(o => o match { + case ao: AlleleObservation => Some(ao) + case oo: Observation => None + }) + + // if the reference length is greater than 1, we need to expand the values + val finalObservations = if (refLen > 1) { + readObservations.groupBy(_.readId) + .map(ro => { + // sort the read observations + val sortedRo = ro._2.sortBy(_.pos) + + // get the start and end position + val start = sortedRo.head.pos.pos + val end = sortedRo.last.pos.pos + sortedRo.last.length + + // reduce down to get the sequence + val sequence = (ref.take((start - region.start).toInt) + + sortedRo.map(_.allele).mkString + + ref.takeRight((region.end - end).toInt)) + val phred = sortedRo.map(_.phred).sum / sortedRo.length + + // make observation + AlleleObservation(refPos, + refLen, + sequence, + phred, + sortedRo.head.mapq, + sortedRo.head.onNegativeStrand, + sortedRo.head.firstOfPair, + sortedRo.head.offsetInAlignment, + sortedRo.head.alignedReadLen, + sortedRo.head.distanceToNearestReadInsertion, + sortedRo.head.distanceToNearestReadDeletion, + sortedRo.head.clippedBasesReadStart, + sortedRo.head.clippedBasesReadEnd, + sortedRo.head.unclippedReadLen, + sortedRo.head.mismatchQScoreSum, + sortedRo.head.mateRescue, + sortedRo.head.sample, + sortedRo.head.readId) + }) + } else { + readObservations + } + + // emit reference observation + (new Observation(refPos, ref), finalObservations) + } +} diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/input/Input.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/input/Input.scala index 6c8abcf7..6e6dca84 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/input/Input.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/input/Input.scala @@ -19,13 +19,12 @@ package org.bdgenomics.avocado.input import org.bdgenomics.formats.avro.{ AlignmentRecord, NucleotideContigFragment } import org.apache.commons.configuration.HierarchicalConfiguration -import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD - +import org.apache.spark.{ SparkContext } object Input { // all our input stages - val stages = List(AlignedReadsInputStage) + val stages = List(AlignedReadsInputStage, KeyedReadsInputStage) /** * Builds the input stage that corresponds to the given stage name, and returns the read data @@ -44,9 +43,7 @@ object Input { config: HierarchicalConfiguration): RDD[AlignmentRecord] = { // get input stage to use; if none is specified, default to input being aligned reads val stageName: String = config.getString("inputStage", "AlignedReads") - val stage = stages.find(_.stageName == stageName) - stage match { case Some(s: InputStage) => { val stageConfig = config.configurationAt(stageName) @@ -58,4 +55,4 @@ object Input { } } } -} +} \ No newline at end of file diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/input/KeyedReadsInputStage.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/input/KeyedReadsInputStage.scala new file mode 100644 index 00000000..1adc25cf --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/input/KeyedReadsInputStage.scala @@ -0,0 +1,56 @@ +package org.bdgenomics.avocado.input + +import htsjdk.samtools.ValidationStringency +import org.apache.avro.Schema +import org.bdgenomics.adam.instrumentation.Timers._ +import org.bdgenomics.formats.avro.{ AlignmentRecord, NucleotideContigFragment } +import org.bdgenomics.adam.rdd.ADAMContext._ +import org.bdgenomics.adam.rdd.ADAMContext +import org.apache.commons.configuration.SubnodeConfiguration +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.hadoop.fs.{ FileStatus, FileSystem, Path } + +private[input] object KeyedReadsInputStage extends InputStage { + + val stageName = "KeyedReads" + + /** + * Sets up and loads data using this input stage. + * + * @param inputPath Path to input files. + * @param config Configuration for this input stage. + * @param reference RDD containing reference information. + * @return Returns an RDD of ADAM reads. + */ + def apply(sc: SparkContext, + inputPath: String, + config: SubnodeConfiguration, + reference: RDD[NucleotideContigFragment]): RDD[AlignmentRecord] = { + + println("Loading reads in from " + inputPath) + + val path: Path = new Path(inputPath) + var paths: Map[String, Path] = null + if (inputPath.equals(config.getString("normalFileName"))) { + paths = Map("normal" -> path) + } else { + paths = Map("tumor" -> path) + } + + loadKeyedReads(sc, paths) + } + + def loadKeyedReads(sc: SparkContext, paths: Map[String, Path]): RDD[AlignmentRecord] = { + val alignments = paths map { case (k, p) => (k, sc.loadAlignments(p.toString).map(a => getAlignment(a, k))) } + val alignedReads: Seq[RDD[AlignmentRecord]] = alignments.values.toList + val union = sc.union(alignedReads) + union + } + + def getAlignment(a: AlignmentRecord, key: String) = { + a.setRecordGroupDescription(key) + a + } + +} \ No newline at end of file diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/models/AlleleObservation.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/models/AlleleObservation.scala index 3347df6c..c9d9c5a4 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/models/AlleleObservation.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/models/AlleleObservation.scala @@ -26,7 +26,15 @@ case class AlleleObservation(override val pos: ReferencePosition, mapq: Option[Int], onNegativeStrand: Boolean, firstOfPair: Boolean, - offsetInRead: Int, + offsetInAlignment: Int, + alignedReadLen: Int, + distanceToNearestReadInsertion: Option[Int], + distanceToNearestReadDeletion: Option[Int], + clippedBasesReadStart: Int, + clippedBasesReadEnd: Int, + unclippedReadLen: Int, + mismatchQScoreSum: Option[Int], + mateRescue: Boolean, sample: String, readId: Long) extends Observation(pos, allele) { diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/FilterKnownSites.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/FilterKnownSites.scala new file mode 100644 index 00000000..0415631b --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/FilterKnownSites.scala @@ -0,0 +1,79 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.avocado.postprocessing + +import java.io.File +import org.apache.commons.configuration.SubnodeConfiguration +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.bdgenomics.adam.models.{ + ReferencePosition, + SnpTable, + VariantContext +} +import org.bdgenomics.adam.rich.RichVariant +import org.bdgenomics.avocado.stats.AvocadoConfigAndStats +import org.bdgenomics.formats.avro.Genotype + +object FilterKnownSites extends PostprocessingStage { + + val stageName = "filterKnownSites" + + def apply(rdd: RDD[VariantContext], + stats: AvocadoConfigAndStats, + config: SubnodeConfiguration): RDD[VariantContext] = { + + val snpTable = SnpTable(new File(config.getString("knownSiteFile"))) + val datasetName = if (config.containsKey("datasetName")) { + Some(config.getString("datasetName")) + } else { + None + } + + // by default, we _discard_ known sites + val keepKnowns = config.getBoolean("keepKnowns", false) + + // build and apply filter + val filter = new FilterKnownSites(datasetName, snpTable, keepKnowns, Some(stats.sc)) + filter.filter(rdd) + } +} + +class FilterKnownSites(dataset: Option[String], + siteTable: SnpTable, + keepKnowns: Boolean, + sc: Option[SparkContext] = None) extends VariantAttributeFilter { + + val filterName = if (keepKnowns) { + dataset.fold("KNOWN_SITE")(d => "SITE_IN_%s".format(d)) + } else { + dataset.fold("UNKNOWN_SITE")(d => "SITE_NOT_IN_%s".format(d)) + } + + private val table = sc.fold(siteTable)(c => c.broadcast(siteTable).value) + + def filterFn(v: RichVariant): Boolean = { + val in = table.contains(ReferencePosition(v.variant.getContig.getContigName, + v.variant.getStart)) + if (keepKnowns) { + in + } else { + !in + } + } +} diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/PostprocessingStage.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/PostprocessingStage.scala index 6f111c2e..28f8b8ea 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/PostprocessingStage.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/postprocessing/PostprocessingStage.scala @@ -21,9 +21,10 @@ import org.apache.commons.configuration.SubnodeConfiguration import org.apache.spark.rdd.RDD import org.bdgenomics.formats.avro.{ Genotype, VariantCallingAnnotations } import org.bdgenomics.adam.models.VariantContext +import org.bdgenomics.adam.rich.RichVariant import org.bdgenomics.avocado.stats.AvocadoConfigAndStats -private[postprocessing] trait PostprocessingStage { +private[postprocessing] trait PostprocessingStage extends Serializable { val stageName: String @@ -33,6 +34,46 @@ private[postprocessing] trait PostprocessingStage { } +private[postprocessing] trait VariantAttributeFilter extends Serializable { + val filterName: String + + def filterFn(variant: RichVariant): Boolean + + def filter(rdd: RDD[VariantContext]): RDD[VariantContext] = { + rdd.map(vc => { + val passed = filterFn(vc.variant) + + val newGts = vc.genotypes.map(g => { + val stats = Option(g.getVariantCallingAnnotations) + .getOrElse(VariantCallingAnnotations.newBuilder().build()) + val variantIsPassing = Option(stats.getVariantIsPassing) + + // we only "apply" the filter if the variant call is currently passing + if (variantIsPassing.fold(true)(v => v)) { + + // add to filter list + val filterList = stats.getVariantFilters + filterList.add(filterName) + + // if we failed, update flag + if (!passed || variantIsPassing.isEmpty) { + stats.setVariantIsPassing(passed) + } + + g.setVariantCallingAnnotations(stats) + } + + g + }) + + new VariantContext(vc.position, + vc.variant, + newGts, + vc.databases) + }) + } +} + private[postprocessing] trait GenotypeAttributeFilter[T] extends GenotypeFilter { val filterName: String @@ -48,7 +89,7 @@ private[postprocessing] trait GenotypeAttributeFilter[T] extends GenotypeFilter val genotypesWithStats: Seq[Genotype] = keyed.filter(t => t._1.isDefined) .map(kv => { val stats = Option(kv._2.getVariantCallingAnnotations) - .fold(VariantCallingAnnotations.newBuilder().build())(v => v) + .getOrElse(VariantCallingAnnotations.newBuilder().build()) val variantIsPassing = Option(stats.getVariantIsPassing) // we only apply the filter if the variant call is currently passing diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/MappingQualityFilter.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/MappingQualityFilter.scala new file mode 100644 index 00000000..58c787b2 --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/MappingQualityFilter.scala @@ -0,0 +1,33 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.avocado.preprocessing + +import org.apache.commons.configuration.SubnodeConfiguration +import org.apache.spark.rdd.RDD +import org.bdgenomics.formats.avro.AlignmentRecord + +object MappingQualityFilter extends PreprocessingStage { + override val stageName: String = "mapping_quality_filter" + + override def apply(rdd: RDD[AlignmentRecord], config: SubnodeConfiguration): RDD[AlignmentRecord] = { + // what is the threshold to apply? + val mqThreshold = config.getInt("mapQualityThreshold", 0) + + rdd.filter(rec => rec.getMapq >= mqThreshold) + } +} diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/MateRescueFilter.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/MateRescueFilter.scala new file mode 100644 index 00000000..1edfd3f0 --- /dev/null +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/MateRescueFilter.scala @@ -0,0 +1,33 @@ +/* + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.avocado.preprocessing + +import org.apache.commons.configuration.SubnodeConfiguration +import org.apache.spark.rdd.RDD +import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.adam.rich.RichAlignmentRecord._ + +object MateRescueFilter extends PreprocessingStage { + override val stageName: String = "mate_rescue_filter" + + override def apply(rdd: RDD[AlignmentRecord], config: SubnodeConfiguration): RDD[AlignmentRecord] = + rdd.filter { + case record => + !record.tags.exists(a => a.tag == "XT" && a.value == "M") + } +} diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/PreprocessingStage.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/PreprocessingStage.scala index 9c760b38..81473085 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/PreprocessingStage.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/PreprocessingStage.scala @@ -21,7 +21,7 @@ import org.apache.commons.configuration.SubnodeConfiguration import org.apache.spark.rdd.RDD import org.bdgenomics.formats.avro.AlignmentRecord -trait ReadFilter[T] extends PreprocessingStage { +trait ReadFilter[T] extends PreprocessingStage with Serializable { final def apply(rdd: RDD[AlignmentRecord], config: SubnodeConfiguration): RDD[AlignmentRecord] = { val threshold = getThreshold(config) diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/Preprocessor.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/Preprocessor.scala index 33d82eb6..0163f0b8 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/Preprocessor.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/preprocessing/Preprocessor.scala @@ -30,7 +30,9 @@ object Preprocessor { RealignIndels, BaseQualityFilter, ClippingFilter, - DuplicateFilter) + DuplicateFilter, + MappingQualityFilter, + MateRescueFilter) def apply(rdd: RDD[AlignmentRecord], stageName: String, @@ -41,7 +43,14 @@ object Preprocessor { val stageConfig = config.configurationAt(stageName) // find and run stage - val stage = stages.find(_.stageName == stageAlgorithm) + val stage = stages.find(_.stageName == stageName) + println("STAGE") + println("\n") + println("\n") + println("\n") + println("\n") + println("\n") + println(stage) assert(stage.isDefined, "Could not find stage with name: " + stageName) stage.get.apply(rdd, stageConfig) diff --git a/avocado-core/src/test/resources/NA12878_snp_A2G_chr20_225058.sam b/avocado-core/src/test/resources/NA12878_snp_A2G_chr20_225058.sam index 50cd1fe1..057bcb6a 100644 --- a/avocado-core/src/test/resources/NA12878_snp_A2G_chr20_225058.sam +++ b/avocado-core/src/test/resources/NA12878_snp_A2G_chr20_225058.sam @@ -1,5 +1,5 @@ @HD VN:1.0 GO:none SO:coordinate -@SQ SN:20 LN:63025520 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens +@SQ SN:chr20 LN:63025520 AS:hg19 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:20FUK.1 PL:illumina PU:20FUKAAXX100202.1 LB:Solexa-18483 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI @RG ID:20FUK.2 PL:illumina PU:20FUKAAXX100202.2 LB:Solexa-18484 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI @RG ID:20FUK.3 PL:illumina PU:20FUKAAXX100202.3 LB:Solexa-18483 DT:2010-02-02T00:00:00-0500 SM:NA12878 CN:BI @@ -17,93 +17,93 @@ @RG ID:20GAV.7 PL:illumina PU:20GAVAAXX100126.7 LB:Solexa-18483 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI @RG ID:20GAV.8 PL:illumina PU:20GAVAAXX100126.8 LB:Solexa-18484 DT:2010-01-26T00:00:00-0500 SM:NA12878 CN:BI @PG ID:BWA VN:0.5.7 CL:tk -20GAVAAXX100126:1:28:3151:64774 83 20 224959 60 101M = 224659 -400 CATCAGAAAACTAAGAATCAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGG CE;D;5<7@>A9A:DD=?BDEBEDD?EBD@F?EEDEED8ACCC?@?B4A:5? MD:Z:10G86A3 PG:Z:BWA RG:Z:20GAV.6 AM:i:23 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:0<::>2:3=?9==:A<=<@@@>@@9>?:6:<74?@B7;BBABBA9?@AB;;?B=@51@ UQ:i:63 -20FUKAAXX100202:7:1:3289:160642 163 20 224962 60 101M = 225250 388 CAGAAAACTGAGAATCAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCC ?BCCBCECFDFEFFEEFEEEFEEEFFEEEDFEEFFFEEEDDFEDEEEFEFFEDFEEDEBEBFADCEEEEEC?CCDCCCDEEBDDECEEEDCEED?9EFEFBDEFDGEGDC@ECDFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHGHHHHHHHIHHHHHHHIIHHHGFHHGIIHHHHGHGHHIHHH UQ:i:0 -20GAVAAXX100126:5:43:18525:141692 163 20 224976 60 91M10S = 225253 377 TCAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCCTTAAAT <@BDBAEAEC?FB8EF?@D@:?C=DDDB;CDEFFGDF9.*BCE97=2:*9:BE@0BE3C59D########### MD:Z:82A8 PG:Z:BWA RG:Z:20GAV.5 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z::@?B@CBC647@<23;15)73@?<2?A1>66@########### UQ:i:15 -20GAVAAXX100126:6:42:3292:192725 163 20 224977 60 101M = 225216 339 CAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATT ?BDCBCDDEEFFDDEGEEFFFEDEEEFEEDEEFEEFDEDDCDECDAECE?EEEDECDCCFECEEEEEEEEBDEAACAEDEADEDCC=CCECDCEDDCDB@A MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBCBBBBBBBBBBBBBBABBBBBBABBBBBBBBBBBBBBBABAB@BABAB@B@BBBABBB@BBBAAABA@@@@B@@@A@A@A@@@@?@@@@@@;=@ UQ:i:0 -20FUKAAXX100202:8:6:15052:143441 611 20 224978 60 101M = 225303 425 AAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTC :=>??>>>A??===A?>A?;9<89(C=>>=>A??>??AB>>=B?B>C>48::B@@>@A?@B?B@=>B?DAC<9<:>?:B@DB<=??<> MD:Z:80A20 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:EAFFEEEEEEEEEEEEEEE>:@AA2FEEFEEEEEEEEEFEEEEEEEEEEEEEEEG=2==@EEEEEEEEEEEEEEEEEEEEAA?BDDDEFD2><>CEEEBEE UQ:i:24 -20FUKAAXX100202:2:45:14286:70208 163 20 224980 60 101M = 225299 419 GGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAA ?BDCABEEEEEDFEEFFEECEDEFEEEEEFBFFEDFEEDECECFEEDEFDEEDEEEEEEEEEEEEEFEEECEECFEFC9EFEE=EEFE@EFDFEFFDDB?9 MD:Z:78A22 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHCHHGHHHHHHIB; UQ:i:24 -20FUKAAXX100202:5:7:2185:179696 83 20 224980 60 101M = 224722 -358 GGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAA 5=B>F?B7=685?@C@FBCBBBACBACCCDDDABCEAEBEEFBDCEEEEEEECEEAEABA=DE?ECEA=EEEE=EEEFCFEFGEFFEFECBA MD:Z:78A22 PG:Z:BWA RG:Z:20FUK.5 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:4EEDAEB);>6DAGAG64650EED?HFGFFGGDFEGFFIGIEGGGFHGGHHIGHHHHHHHHHHHFHFFF=GHDHFHEHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:36 -20FUKAAXX100202:6:25:3074:166692 83 20 224980 60 101M = 224678 -402 GGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAA B@BCDBEBGGFDDFEEEF>EEFEGEEEEBEEEF@EFFFE>ECEFEEE@BEFCECEEEEEEE=EDAEEEEAEE?EEEEEFEEE=EEEFCFEEGEFFEFECBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HGGGGFIEHHGIIIHIIIEIGHHHGHHHFHHHHDHHHHHAHGHHHHHFFHHHHHHHHHHHHFHHFHHHHEHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20GAVAAXX100126:4:64:3230:34731 99 20 224981 60 101M = 225296 415 GATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAG ACCDEFFEFFEFEEFFFEEFEEFFEDEEFEFFEEFFFDECECFFDEDFDEEBDDEEEEEEDDDEEFEEECEECFDFC=EEEE=EEFEEEFEFEFFDECDC< MD:Z:77A23 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHB UQ:i:28 -20FUKAAXX100202:7:22:6194:63085 163 20 224984 60 101M = 225196 312 AGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTC ?BDDABDDFDEFFFEEECEEEEEEEFEFFEEFEEDECECFEEDEFDEEEEEEEECEEEEEEEFECECEECFEFCFEECE=DEFEEEDFFFGEDFCEFDBCA MD:Z:101 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHFHHHHHHHHHHHHFHHIHHHIHEHHHHFFHIHHHFIH UQ:i:0 -20GAVAAXX100126:6:21:19774:137536 1187 20 224986 60 101M = 225252 366 AATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCA >CDC>ADDEEFEEADECFDFGFFGFGEEFGDDFEAEAC:AEGAFDDEECGBDAE@8AC:@EE@EEADDC*DD>D?DA>DB@A MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:ABCAC@CCBCBAC?AB@BCCCCCCCCACCCACCBCCC?=@?1@CBB@CBBCCBBCC=@CCCCCCBAABCCBCB?;?@>CBBCACCBB@*=@CC8@C:?B:C UQ:i:0 -20GAVAAXX100126:6:46:1647:72507 163 20 224986 60 79M22S = 225275 389 AATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCA ?CDAABDDEEFFEBEEEFEEEEEFEFFEDFEEBD@D=EBA=EFDAE@DD@CADAFDA@?BEAD;<9B@E;856>/45;####################### MD:Z:72A6 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBCCBBCBBBBBBABBBBBBBBBBBBBBABBB@AAA@AB@@BBB@B@B@@@@A?BB>??@A@@:74;@?9739>444@####################### UQ:i:21 -20GAVAAXX100126:6:63:8325:180406 163 20 224986 60 101M = 225252 366 AATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCA ?CDCAAEDEEFFEBEEEFEEEEEFEFFEEFEEEEBEBFBDCEFDDE@DDBFCFEDDCCDEFDFECCFBFAAA@ABA?@A7?@>A<7=;?>@>@@ UQ:i:24 -20GAVAAXX100126:1:28:6582:10746 83 20 224988 60 101M = 224670 -418 TTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAG CDCDDFDFFF?FCEEDFEEEECFFFEEFFFDBDDADD@EEFDDCDCAEEEEEFCFECDEEEAEDBEEDB:EEEE=EEDFCFEEGEFFEGGEFFEEF@/?A> MD:Z:70A30 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHFHHHHHHHEHEHHHIHHHHGHHHHHHHHHEHGFGFEHHGHFHHHFHHHHHHHHHGHHHHEHHEHHHFGHHHHHHHHHHHHHHHHHHHHHHHHHH@3@@@ UQ:i:36 -20FUKAAXX100202:1:27:16949:104551 99 20 224990 60 101M = 225281 391 TCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG ABDDEFGGEEFEEFEEEEEFEFFCEFEEEFCECFEEEEFDEEEEEEEEEEEEEEEEFEEECEECFEFCEDEEE=EEFEEEFEFFGCEBEFGF?FEEAFB@? MD:Z:101 PG:Z:BWA RG:Z:20FUK.1 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHIHHHHHHDGGHHHHBGHH@HGEC UQ:i:0 -20FUKAAXX100202:8:67:15860:121961 163 20 224990 60 101M = 225291 401 TCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGAGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG ?ADCACEFEDEDEFEEEEDEEFFEEFEEEECECFEEDEFDEEEDEEEEEBED=EEEFEEECEECFEFC;>ECE=EEEECCDEFDDCEFBFGE?E?CBC@CB MD:Z:52T15A32 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHH@HHHHHHHHHHHHHHHIEHFHHHHFHFIDHHGFEHHGHHHBFEFCFGFH UQ:i:54 -20FUKAAXX100202:7:27:19938:190465 1171 20 224990 60 10S91M = 224722 -358 GGATAGATTTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAA ###########=8A?A:=+:,C>B;CBB@9DEB>AFCBCCC9<:<9>@=@=7;8@<=:35?05643(78)E=CEEFCFEEFEEDBBCBB? MD:Z:68A22 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:###########@;@AA54/5-FBD:FFFC5FHEAEHGEFFG45554DD@D;@<@?@444-4AAAAA445044/55578>=0HHHHHHHHHHHHHHHHHHHH UQ:i:22 -20FUKAAXX100202:6:23:8481:198660 147 20 224993 60 10S91M = 224653 -430 TAGAATTTCTAGAAAGTCCCTCCCCCTAAGGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCT ###########==>A;1",,4'66789:<+,64:->563/;=:855<63>;:@;=?7;73;,=;=:27>7:;>?5?C9E==EEFCFDDFEEEEFEBADBB? MD:Z:7T3T7A45A25 PG:Z:BWA RG:Z:20FUK.6 AM:i:25 NM:i:4 SM:i:25 MQ:i:60 OQ:Z:###########A@@?<1&1*2-44535550.385/403,/5=<=83<3:>;<6:9? MD:Z:101 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHHHHHHHHHHHHHHHBCHHGIGGGIEHIFHFIIFHIHGIGGGFFEGCGBGBG@?38998E UQ:i:0 -20FUKAAXX100202:7:67:14544:95064 83 20 224996 60 101M = 224692 -404 AAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAAC C?:??AAD;FDFCBCF?CCCDDEDEBEFEBEEDBECECEEDE?EECEEEEEEECEE;9;=>>FEEE=DEEFCFCEFEEEEFFEEFEEFEEFFEFEEDEDBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HCDDG8HGHFEFHAFFFFFHGHCHHHFHHFFGHHHHHGHEHHHHHHHHHHGHHBBBD@EHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20GAVAAXX100126:8:61:20600:128532 99 20 224998 60 101M = 225318 420 GTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTC AAD5?FDAEDDDEEE?>?BCDECFEEECDECFDB=C?:?>=58699FBEBCB MD:Z:58A42 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHHGHHHHHHHHHIHH@24444HHHIHH UQ:i:28 -20FUKAAXX100202:8:6:5363:193841 99 20 225000 60 101M = 225306 406 TCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTT AACEEEEEEFEFFEEFEEEECECFEEEEFEEEEDEEEEEEEEEDEDFEEECEECFEFC=EEEE=EEEEEEEEEEFFEFEFGEEFDEEECEDFDEFECDCCB MD:Z:58A42 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHIIHIIFGFGHIIIHGHIF UQ:i:28 -20GAVAAXX100126:1:42:8423:49219 163 20 225001 60 101M = 225287 386 CCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTG ?BE@BACDFEFFEFHDDEGEGEHDEFGHFFFDEDEGGGEGEEEEEFFFFD>FEGDFD=DEEE=EFFEEEFFEBEEGEEC9AEFDEDFBDDECEC>=EBDB@ MD:Z:57A43 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCCBCCCCCCCCCCCCBCCCCCCBCCCCBCBCBBCCCBCCBBBCBCBCB9BCBBBBBABBBBBCBBABBBBAAABBB>5>BAAAAA>?A@BA>69?A@AA UQ:i:28 -20GAVAAXX100126:8:42:10420:86541 163 20 225001 60 101M = 225273 372 CCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTG ?CDA@BCDFEFFDCGDDEFCEEGDFEGFFFDDEDDBCBCDBBDFBFEFECAECCDEC=CE?D;@D>BCDCCE@@EDC=DC@=D5CBDB=ACAEBC?DBCBA MD:Z:57A43 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCCCCCCCCCCCBCCCBCBACCCCBCACBAACBBCBCBBA@BCBBBBBBABBAAABB@B?A@@@8BBA?AB=>AAB<@@=<<1?<>?<;@@A><>A@@@B UQ:i:28 -20FUKAAXX100202:8:4:8272:152285 99 20 225004 60 101M = 225283 379 TCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAA AACDDFEGFEEFFEEFCFCFEEEEFEEEEEEEEEEEEEEDE>FEEECEECFEFCEEEEE=EEEEEDEEEEFFEEEFFCEFEEFG=FFFDFGGDGEFEDDDD MD:Z:101 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHH?IHIHHHHHHHHHHHHH UQ:i:0 -20FUKAAXX100202:4:43:20720:97549 1107 20 225004 60 101M = 224727 -377 TCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAA 0CDE;EFFFEE=GEEEA?AAAACEFB;<>?24>;=49593<@?FEDE=CEEFCFEEFEEEEFFEEFEEEEEEFEFEEEFFEEFEFEDCBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:1HHH6HHHIHIBHGHGDBEDBFGHHFADCCAF=G>GFDCDD8:A>?44551DEDHHHHHGHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20FUKAAXX100202:6:41:13009:27153 83 20 225004 60 101M = 224727 -377 TCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAA AADE@?FFEBDAADEDCEEDEEFEDEFCB?EEEEEEECEEEEEEEE@@;ABDEDFEDE=DEDFCFEEFEEEEFFEEFEEFEEEFEFEEEFFEEFEFEDCBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:EFHHEBHHFEGEEFHEFGHGIHHHFHHHEFHHHHHHHHHHHHHHHGFFBFFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20GAVAAXX100126:8:61:9736:179657 99 20 225008 60 101M = 225317 409 CTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAA ACCCFEEFFFEFCFCFFEEEFEFEEEEEEEEEEDDDEEFEEECDECFDFCEEEDD=EEEEDDEEFEFFEFDFFEEFDEFFEFEFCFFFCFEFFEFFEB>C= MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFGHG UQ:i:0 -20GAVAAXX100126:8:42:20446:128709 83 20 225008 60 101M = 224740 -368 CTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAA ABAEEDDFBFEEEEEFFEBBF>FAECBEECEEEAEEEEEEEAEDDEADDDFDEE=DEDFCFDEFEFFEFFEFFEEFEEFFEFEEEFFEEFEFFBBFFCDCA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:IHFHHHIHGHHHHHHHHHFGHEHGIHGHHGHHHIHHHHHHHEHHHHDHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHH UQ:i:0 -20GAVAAXX100126:1:65:2583:115560 163 20 225015 60 101M = 225303 388 TTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCC ?@ACAADBFDDEFGFFEEEEEGGFGFEEEEFGEFECEFCED@B=EEEE=DEEDCEEFEEFFFECCECDFEDECCCC9;BECAFBFDCECCCEBCC114A@@ MD:Z:43A57 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCBCBBCBBBBCBCBBBC@BCCBCBBBBBCBBBBBBBBAA9ABBBBBBABAA4:<@?@@?@A@@=?>@=>>1-/>>@ UQ:i:28 -20GAVAAXX100126:2:27:15261:77255 595 20 225015 29 36S65M = 224613 -466 AGGATAGAATTTCTAGAAATTTCCTTCCCCTAAACCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCA #####################################?97=;<<=>;8>/?@;>*<))<+==:<;'><>;+7>;==??/A<=::<971<@5A@AA@A+AA6 MD:Z:65 PG:Z:BWA RG:Z:20GAV.2 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:#####################################A:66?@?CABBCDDFEDECEDFGFFDDE:CFFGFFFEEEBFDFAEC>DECFCEDDECDEDECGCEEFDDD7??AECEDFBC7DECCA6AD=@ACDDBBA MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:AACCCBBCBBC@BBCBCCCBAAB2@CCCCBCCBBBBBBBCB>BBBBBBB>9@BCBAABAAAABAB?AABBBA3@>>BBBAB>@1@A>?=/>A7:FDFFFFFFEDEEFGEDECEEBEBBB?BCBE9CEEEDBEDBDFADEDCDD@ED?CECCDD?DB@@EACACEDEDB9DBADCDBCDA>? MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBBBBBCBCBB@CBCBCBCBB@BBCBBABBBBBBBAB@AAABAABAB?BBAAAB@@????@@@>>7?@>B@A???A8? UQ:i:0 -20GAVAAXX100126:8:67:15419:163515 99 20 225019 60 101M = 225314 395 ACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGG A@D@FFEEEFEFEEEEEEEEEEEEEEEFEEECDECFDFCEEEDD=DDEEDDEEFEFFEFDEFEEFDEEFEFEFCFFFCFDFFEFFFFFFBCCEFE@DECBB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHHEHHHHH UQ:i:0 -20GAVAAXX100126:4:25:4357:32084 83 20 225019 60 10S91M = 224705 -404 TAAAGCTTTCACACTTGCCTCAGTGTATATATGGGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAG ###########A7B=D8+:9;<,*%9.8-1:-:(;1>;=5653===DD>DDDE=DEDDCFDEFEFFEFFDFFEEFDEFFEFEEEFBEEFEGFBBFFEBBB? MD:Z:23T67 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:###########F9F@F4345542+)424/85-4*514446042555HHDHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHCGHHFAFFF UQ:i:7 -20FUKAAXX100202:8:42:12533:83073 99 20 225020 60 101M = 225329 409 CACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGA ABAEEEEEFEFEEEEEEEEEEEEEEEFEEECEECFDFCEEEEE=DEEEEDEEEEFFEEDEFEEFDEEFEFEFCFFFCFEFFEFGDGGGACFEFFEEEDCCC MD:Z:101 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHEGHHHHHHHHHHH UQ:i:0 -20FUKAAXX100202:4:28:14874:15140 83 20 225020 60 101M = 224800 -320 CACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGA >6;:09BBA?D>C;CC9;<7548===.155782<@>?>A@AC=EEEFCFEEFEEEEFFEEFDEAEAEFDFEEEEEEEBEFE==EFEEEFEEEFEFEEECAA MD:Z:101 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:?455.4FDB?DDD?FD44461<:C<=22504AACCCFFGHHHHHHHHGHHHHHHHHHHHHEHFHHHHHHHHHHHFHHHEEHHHHHHHHHHHHHHHHHH UQ:i:0 -20GAVAAXX100126:3:63:21001:169004 99 20 225021 60 101M = 225272 351 ACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAG A@DEDEEFEFEEEEEEEEEEEEEEEFEEECEECFDFC=EEDD=DDEEDDEEFEFFEFDFFEEFDEFFEFEFCEFFCFDFFEFFFFFC@?EBFEEDEDDCCB MD:Z:37A63 PG:Z:BWA RG:Z:20GAV.3 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFECHFHHHHHHHHHH UQ:i:28 -20GAVAAXX100126:5:21:14804:97200 147 20 225021 60 9S92M = 224706 -406 AGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCA ##########D@=>AA6=;A7:>AABBBBABBABBBBACBBBBBBBCCCCBCACBCABBCBBCBBCCBBBBBBB UQ:i:35 -20GAVAAXX100126:1:44:4735:79229 83 20 225023 60 101M = 224768 -355 TTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAA CB:>=D@=?DB@CCB?;789=819;99>>??<@?;?EEC=EEDFAFDEFEFFEFFDFFEEFEDFFDFDEDFFEEFEFFCCFFFFFFEEEFEEEFFEDACCA MD:Z:101 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HDBDBDF@DGGDFGDG=;=;>53445:DDAABCA@5HHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20FUKAAXX100202:6:41:11198:131837 163 20 225025 60 101M = 225350 425 GCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAG ?ABB>BDD?DBBD=EEBDEECFEECADECFEFCEEEDE=DEEEDDEEEEFFEEDEFEDFDDEFEFEFCEFFCFEFFEFFFGGGDCFEFEEEEEEEFDEAD= MD:Z:101 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHIIHHHGHIGHIHHIHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHGHHHHHHGHG UQ:i:0 -20GAVAAXX100126:6:7:12934:132509 99 20 225025 60 101M = 225345 420 GCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAG @B?EDFEEEEEEE3EEEEEEEFEEECEFCFEFC=EEED=EDEEDDEEFEFFEFDEFEEFDEEFEFEFCFFFCFDFFEFFFFFFAAEEFEEEFEEEEDEDCB MD:Z:33A67 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHH8HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHH UQ:i:28 -20FUKAAXX100202:3:6:15018:84106 83 20 225025 60 101M = 224759 -366 GCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAG 9?>?@B8;9;>=?>?A78<5@=B>8B<999;=@BC7E=CEEFCFEEFEEEEFFEEFDEFEEEFDFDEEEEEEFEFECAEEEEEFEEEFEFEEFEEFDEDCA MD:Z:101 PG:Z:BWA RG:Z:20FUK.3 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:;D<@B>::4:8===>>>5@:>ADB2F46554@DCG;HHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20GAVAAXX100126:4:6:16280:102045 163 20 225026 60 101M = 225252 326 CCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGC ?BDCCBDCCDFEFEFEDEECFEF4BDCDEDC;A?ACB5DDDB3>DBDDFEFEBCA>DE>9:=AADEC?D4?BBFADC8@?CE=DCD7>CAB>AA?; MD:Z:101 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCBAA@CABCBCBBBBBBABBB7AA?B?A>=@?@B@>BAA@4?@@BABABB@A;=AA=:5>?@@A?=?A@;@=6:=@?;A>=37@:@::::: UQ:i:0 -20FUKAAXX100202:2:42:3257:33410 99 20 225027 60 101M = 225299 372 CTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCA ACCEEEEEEEEEEEEEEEEFEEECEECFEFC=EEEE=EEEEEDEEEEFFEEDEFDEFDEEFEFEFCEFFCFEADEFGGGGF;AFBFF:EFFFF>E>DCA?C MD:Z:31A69 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHBEHFHH;HIHHH=H:FFGDH UQ:i:28 -20GAVAAXX100126:7:1:21210:15897 99 20 225028 29 101M = 225313 378 TCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAA 17=?>9;??=?=>5@<92?9B=:-1=2<;?ACB@>CC,=;;ECA>DBD>ADA=A@<105(;;88<8;48:999969< MD:Z:30A70 PG:Z:BWA RG:Z:20GAV.7 AM:i:29 NM:i:1 SM:i:29 MQ:i:29 OQ:Z:55555@@??DD=FD>22D@D99IF>CCFCCCG=7==DCGCCCGF@GCGG8CG4-454GGDDGGGEGGD?DC?115-5555555554444455? UQ:i:24 -20FUKAAXX100202:3:25:16247:13724 99 20 225030 60 101M = 225273 343 AGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACA ABCDEEFFEEFEEEEEFEEECEECFEFCEEEEE=EEEEEDEEEEFFEEEEFEDFDEEFEFEFCEFFCFEFEEFFFFGGC@FCFEEEGFFEFFAGFDDDE?C MD:Z:101 PG:Z:BWA RG:Z:20FUK.3 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGEHFHHHHHHHIIHBHHIHIHGH UQ:i:0 -20GAVAAXX100126:6:64:9301:93116 163 20 225030 60 101M = 225336 406 AGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACA ?BBBAACDEEFECDEFGFFFEEFCFEFCFEEEE@B@@@A@A?<0>A?@??88 UQ:i:0 -20FUKAAXX100202:6:4:19457:82619 83 20 225030 60 101M = 224720 -410 AGTGTATATATGTGGCTATCCCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACA A>B<>DBFCCF;57:<993"6>5=>>=E>EAE=EEEFCFEEFEEEEFFEEFDEFEEEFDFDDEEEEEFEFEBAEEEEEFEEEFEEEEFEEFEFFFEDECBA MD:Z:19A81 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:EFDAAFFHFDH42554452*44.554BH?HEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:1 -20GAVAAXX100126:4:5:21339:99108 163 20 225032 60 84M17S = 225350 418 TGTATATATGTGGCTATACCACTTACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATG ?BBCB@CCEDCCA8EEB;?CBBD*@=;DB9C;>??CA@2CD@6>D7A<66>BB=?@>@B7;?>=07>997@B?8>@>A>0<<@A?7?BB=-7<>>9<>@A>>2>?>38A8<:03=@A=<;;:>76<9<-2:864>1<?>>:E=EEDFCFEEFEEEEFFEEFEEFEDEDEFEEEEEDEEDCABCB? MD:Z:15A2A52 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:###############################CDDDE?=7>;265++24,555AA>A@HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:3 -20FUKAAXX100202:6:23:8496:198647 1171 20 225038 29 55S46M = 224653 -430 AGTAATTGTAAGAACTGCCCTCCCCCTAAAGCTTCAACCCTGGCCTCAGTGTATATATGTGGCTATCCCCCTGACGGCCCGCCAGTCATTAAATTCAAGCT ########################################################>?;@?8=;=:&7+5::;2.08(7;>BEFCEDDFEEEEFEB>DAB? MD:Z:11A2A5A1G23 PG:Z:BWA RG:Z:20FUK.6 AM:i:29 NM:i:4 SM:i:29 MQ:i:29 OQ:Z:########################################################AAADA544440415555055F7BFBGHHHGHHHHHHHHHHFHFHH UQ:i:47 -20GAVAAXX100126:2:63:4742:63598 99 20 225039 60 101M = 225351 411 ATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTC ABCCDEEFEEECEFCFEFC=EEEE=EEFECDEEFEFFEFDEF>DFDDEFEFEFCEFFCFDFFDCFFFFFBCEBEEEEFEEEFEFFFEEFCCFBEFBEEDCA MD:Z:19A81 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHGHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHEHHHHGHHHHHHHHHHHHHHHEHHHHHGGHGHHHHHHHHHHHHHHHHFHHGHHGHHHHH UQ:i:28 -20GAVAAXX100126:8:63:8623:62616 147 20 225041 60 7S94M = 224719 -415 TATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGAT ########DC:D5/7-C?A@A;-1D?09;DCEGCBEF@EBCFFFEDGBEECEDFEF=DDDD@CD?EDEDEDDDDFGFEGFFFEGFFGBCCFDDB>AACCC? MD:Z:17A76 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:########??:?5-2'@;6>>;'4B<0:@?BBBBBBBCBCBBBBBCBBBBBBBBCBCBBBB@BBCBBBCBCCCCBCCBCCCCBCCCCCCCCCCCCCCBCCB UQ:i:35 -20GAVAAXX100126:4:43:2300:91558 163 20 225042 60 89M12S = 225276 334 TGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACA ?BCBBACDCEECFEFC=EECEEA>9AD?############# MD:Z:16A72 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBBCCCCBBBBBBBBBBBBABAB@AA>BBBBBBBBBBB?A@BBAA@B@B?AAAAA@BABA?@?@@?A@>@?@=;@?@?>@:@?86<>@############# UQ:i:28 -20FUKAAXX100202:1:6:14171:30403 83 20 225044 60 101M = 224737 -407 GCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATA =<44:;BA@BC?@9DD>B=DFEFCFEFFEEEECFEEFEEEEEEFEFDE@EECEFEFEEBDCBB MD:Z:101 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHDHHHHHHHHHHHHHEBHHHHHHHHHHHHBHHGHHHHHHHHG?DDDDHHHHHH UQ:i:0 -20GAVAAXX100126:6:4:13415:44301 99 20 225045 60 101M = 225370 425 CTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATAT 6>2;>:AB@?9?:BADD=:DD@>@B0;;9;@??=@@=>@=?A@>>C?A7B78=:=@>A@>?BB=>??9<8;56;;;;<9;67=::=2@><<1C=BA=> MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:>>>>A>?CCA@>>CCGGEFGG?ADG25555DDDDDDDDDDADDD8AC>C7DDDFDG UQ:i:0 -20GAVAAXX100126:2:46:16245:173351 163 20 225049 60 101M = 225344 395 ACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTA ?@CC@DCDCEEEEE=EEFEDEFFFEGFFEEF?@A@@@@AAB=@>>A@@??AA@;A?@A UQ:i:0 -20GAVAAXX100126:3:62:9519:106030 147 20 225049 60 29S72M = 224778 -342 CACTTGCCTCAGTGTATATATGTGGCTATCCCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGA ##############################A:36><=:9<9D8ADCECEDFFDDDEFEECEDEDDEDGFFDDEEDAEFEFGEDDDDDCGDDDFCDABDCB? MD:Z:0A71 PG:Z:BWA RG:Z:20GAV.3 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:##############################?6+2787:1;7A>=AABBBBBBBBBBBBBBBBBABBBBCBBBBBC:B@BBCBCCCCCCCBBBBCBCBBCCB UQ:i:2 -20FUKAAXX100202:2:64:13645:16057 147 20 225053 60 101M = 224750 -403 CTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTAACTT :175,6>)2>D@ED@FEEGDFEEFFEEFDEFEEEEEDACBC;A@@AFDBACEEEEFCDDDCEEEFEDFDEBF=EEEECEDDEFEFFBBBA>E@EEBABCC= MD:Z:5A95 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:525314C-:HFEHEEHHHHGHHHHHHHHGHHHHHIHFFFFF@DDADHGGGFHHHHHIHHIIHHHHHHHHHHHEHHHHIHHGHHHHHFFFGFHGHHHHHHHD UQ:i:21 -20GAVAAXX100126:7:45:18980:156799 1683 20 225054 60 50S51M = 224715 -389 TCCCTTAAAGCTTCCACACTTGCCTAGGTGAATATAGGTGGGTATCCCGCTGACAGGCCCCCAGTCATTAAATTCAAGCTCCAAGAGACAAACCCTTGAAA ###################################################BBB7.;:4$2DB> MD:Z:9G33T7 PG:Z:BWA RG:Z:20GAV.7 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:###################################################@:>50:;4(6B?BAAA?BBCBBBA=A=@2BBB:B?B:C@BAA+AA6:?A? UQ:i:10 -20FUKAAXX100202:5:41:18178:6583 83 20 225056 60 66S35M = 224710 -380 TCTAGAAAGTACCTTCCCCTAAAGCTTTCACTCTTGCCTCAGTGTATATATGTGGGTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG #########################################################################################;>=C=:C?DBD4 MD:Z:35 PG:Z:BWA RG:Z:20FUK.5 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#########################################################################################@@4455444455 UQ:i:0 -20FUKAAXX100202:5:41:18201:6601 1107 20 225056 60 66S35M = 224710 -380 TCTAGAAAGTTCCTCCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG ############################################################################@>@;B6A==C<:==AFAEC555555455445455 UQ:i:0 -20FUKAAXX100202:8:28:5396:197191 147 20 225056 60 45S56M = 224733 -378 AACCTTTCACACTGGCCTCAGTGTATATATGTGGCTATCCCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGC ##############################################4EECE=EEEFCFDEFEEEEFFEEFEEFEDEFDFDECEEEEFEFE@?EEDBADBA? MD:Z:2A53 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:##############################################BHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHH UQ:i:36 -20FUKAAXX100202:3:63:10698:105044 83 20 225057 60 15S86M = 224789 -353 TGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACA ################DE=FEEEEEFEDDFEEEEFDDEABCDA? MD:Z:0C0A68 PG:Z:BWA RG:Z:20FUK.3 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:################################A=18GEHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHFHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:30 -20GAVAAXX100126:2:63:19101:65384 147 20 225057 60 48S53M = 224707 -402 TAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAG #################################################CD:D9DEDF>FEFFC>DEDFCDGEEEFFEHFHFDFDDDEFEFEDAA?>AAD? MD:Z:1A51 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:#################################################@B>A@BBBBCBBBBBCCCBBCCCBBACCBCCCCBCCCBCBCBBCCCCCCCCB UQ:i:34 -20GAVAAXX100126:8:63:19683:109321 147 20 225057 60 59S42M = 224677 -421 GTTCCTCCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTC ############################################################DD7B4ADEFCFEFFEDDFFEFDFFEEFBCGEFDCCBBBCB? MD:Z:1A40 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:############################################################BB;A@ABBBBBBBBBCCBBCCCBCBACCCCCCCBBBBBBBB UQ:i:35 -20GAVAAXX100126:6:47:6085:14272 163 20 225058 60 101M = 225307 349 GGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTAACTTGGTTT ?BCCB;CDEEDEFFDFGGGDFFGFEFFEFFEFEEAEEF?FAFDDEEBEFDFEEEFEDDEDEDEEEEFEEEDCECBDDECEDDFDEC=ECEDDECEDCCCAA MD:Z:0A100 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBBBBABCCCCCCCCCCCCBBCBCBBBBBCCBBCBCBBCBBCBBBBBBBBBBBBABBABAABBBBBBBBB@AA@BBABBB@:B@A@B@BB@BA;?B UQ:i:30 -20FUKAAXX100202:4:47:20584:49257 83 20 225058 60 10S91M = 224761 -387 TACCACGGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTT ###########D=C=EEEFCFEEFEFEEFFEEFEECEEEFEFDEEEEDEFEFEBBEEEEEFEDEFDEEEFEEFEEEFEEEEEEEEEFFFGFEEEFEEFDCA MD:Z:91 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###########I@FIHIHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHIHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20FUKAAXX100202:7:65:8106:6840 83 20 225058 60 9S92M = 224798 -351 ACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTA ##########?>DEFEFEDEEEDEEEFE>AEEEEEFEDEFEEEEFEEFDEEFEEEEEEFEEFFFFFEEEFEEFFEBA MD:Z:92 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##########@AGGDEIHHHHHHHHHHIHHHHHHHHDHHHHHIHHHIHIHHHEGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 -20GAVAAXX100126:2:46:5468:93666 147 20 225058 60 27S74M = 224757 -374 GTTTATATATGTGGCTATCCCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACAT ############################B3CEBCFEEEEEDDEFEEEGFEFEFFEFEDDECAABBC? MD:Z:74 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:############################A3A?CE;D;5<7@>A9A:DD=?BDEBEDD?EBD@F?EEDEED8ACCC?@?B4A:5? MD:Z:10G86A3 PG:Z:BWA RG:Z:20GAV.6 AM:i:23 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:0<::>2:3=?9==:A<=<@@@>@@9>?:6:<74?@B7;BBABBA9?@AB;;?B=@51@ UQ:i:63 +20FUKAAXX100202:7:1:3289:160642 163 chr20 224962 60 101M = 225250 388 CAGAAAACTGAGAATCAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCC ?BCCBCECFDFEFFEEFEEEFEEEFFEEEDFEEFFFEEEDDFEDEEEFEFFEDFEEDEBEBFADCEEEEEC?CCDCCCDEEBDDECEEEDCEED?9EFEFBDEFDGEGDC@ECDFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHGHHHHHHHIHHHHHHHIIHHHGFHHGIIHHHHGHGHHIHHH UQ:i:0 +20GAVAAXX100126:5:43:18525:141692 163 chr20 224976 60 91M10S = 225253 377 TCAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCCTTAAAT <@BDBAEAEC?FB8EF?@D@:?C=DDDB;CDEFFGDF9.*BCE97=2:*9:BE@0BE3C59D########### MD:Z:82A8 PG:Z:BWA RG:Z:20GAV.5 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z::@?B@CBC647@<23;15)73@?<2?A1>66@########### UQ:i:15 +20GAVAAXX100126:6:42:3292:192725 163 chr20 224977 60 101M = 225216 339 CAAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATT ?BDCBCDDEEFFDDEGEEFFFEDEEEFEEDEEFEEFDEDDCDECDAECE?EEEDECDCCFECEEEEEEEEBDEAACAEDEADEDCC=CCECDCEDDCDB@A MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBCBBBBBBBBBBBBBBABBBBBBABBBBBBBBBBBBBBBABAB@BABAB@B@BBBABBB@BBBAAABA@@@@B@@@A@A@A@@@@?@@@@@@;=@ UQ:i:0 +20FUKAAXX100202:8:6:15052:143441 611 chr20 224978 60 101M = 225303 425 AAGGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTC :=>??>>>A??===A?>A?;9<89(C=>>=>A??>??AB>>=B?B>C>48::B@@>@A?@B?B@=>B?DAC<9<:>?:B@DB<=??<> MD:Z:80A20 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:EAFFEEEEEEEEEEEEEEE>:@AA2FEEFEEEEEEEEEFEEEEEEEEEEEEEEEG=2==@EEEEEEEEEEEEEEEEEEEEAA?BDDDEFD2><>CEEEBEE UQ:i:24 +20FUKAAXX100202:2:45:14286:70208 163 chr20 224980 60 101M = 225299 419 GGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAA ?BDCABEEEEEDFEEFFEECEDEFEEEEEFBFFEDFEEDECECFEEDEFDEEDEEEEEEEEEEEEEFEEECEECFEFC9EFEE=EEFE@EFDFEFFDDB?9 MD:Z:78A22 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHCHHGHHHHHHIB; UQ:i:24 +20FUKAAXX100202:5:7:2185:179696 83 chr20 224980 60 101M = 224722 -358 GGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAA 5=B>F?B7=685?@C@FBCBBBACBACCCDDDABCEAEBEEFBDCEEEEEEECEEAEABA=DE?ECEA=EEEE=EEEFCFEFGEFFEFECBA MD:Z:78A22 PG:Z:BWA RG:Z:20FUK.5 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:4EEDAEB);>6DAGAG64650EED?HFGFFGGDFEGFFIGIEGGGFHGGHHIGHHHHHHHHHHHFHFFF=GHDHFHEHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:36 +20FUKAAXX100202:6:25:3074:166692 83 chr20 224980 60 101M = 224678 -402 GGATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAA B@BCDBEBGGFDDFEEEF>EEFEGEEEEBEEEF@EFFFE>ECEFEEE@BEFCECEEEEEEE=EDAEEEEAEE?EEEEEFEEE=EEEFCFEEGEFFEFECBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HGGGGFIEHHGIIIHIIIEIGHHHGHHHFHHHHDHHHHHAHGHHHHHFFHHHHHHHHHHHHFHHFHHHHEHHEHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:4:64:3230:34731 99 chr20 224981 60 101M = 225296 415 GATAGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAG ACCDEFFEFFEFEEFFFEEFEEFFEDEEFEFFEEFFFDECECFFDEDFDEEBDDEEEEEEDDDEEFEEECEECFDFC=EEEE=EEFEEEFEFEFFDECDC< MD:Z:77A23 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHB UQ:i:28 +20FUKAAXX100202:7:22:6194:63085 163 chr20 224984 60 101M = 225196 312 AGAATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTC ?BDDABDDFDEFFFEEECEEEEEEEFEFFEEFEEDECECFEEDEFDEEEEEEEECEEEEEEEFECECEECFEFCFEECE=DEFEEEDFFFGEDFCEFDBCA MD:Z:101 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHFHHHHHHHHHHHHFHHIHHHIHEHHHHFFHIHHHFIH UQ:i:0 +20GAVAAXX100126:6:21:19774:137536 1187 chr20 224986 60 101M = 225252 366 AATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCA >CDC>ADDEEFEEADECFDFGFFGFGEEFGDDFEAEAC:AEGAFDDEECGBDAE@8AC:@EE@EEADDC*DD>D?DA>DB@A MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:ABCAC@CCBCBAC?AB@BCCCCCCCCACCCACCBCCC?=@?1@CBB@CBBCCBBCC=@CCCCCCBAABCCBCB?;?@>CBBCACCBB@*=@CC8@C:?B:C UQ:i:0 +20GAVAAXX100126:6:46:1647:72507 163 chr20 224986 60 79M22S = 225275 389 AATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCA ?CDAABDDEEFFEBEEEFEEEEEFEFFEDFEEBD@D=EBA=EFDAE@DD@CADAFDA@?BEAD;<9B@E;856>/45;####################### MD:Z:72A6 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBCCBBCBBBBBBABBBBBBBBBBBBBBABBB@AAA@AB@@BBB@B@B@@@@A?BB>??@A@@:74;@?9739>444@####################### UQ:i:21 +20GAVAAXX100126:6:63:8325:180406 163 chr20 224986 60 101M = 225252 366 AATTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCA ?CDCAAEDEEFFEBEEEFEEEEEFEFFEEFEEEEBEBFBDCEFDDE@DDBFCFEDDCCDEFDFECCFBFAAA@ABA?@A7?@>A<7=;?>@>@@ UQ:i:24 +20GAVAAXX100126:1:28:6582:10746 83 chr20 224988 60 101M = 224670 -418 TTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAG CDCDDFDFFF?FCEEDFEEEECFFFEEFFFDBDDADD@EEFDDCDCAEEEEEFCFECDEEEAEDBEEDB:EEEE=EEDFCFEEGEFFEGGEFFEEF@/?A> MD:Z:70A30 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHFHHHHHHHEHEHHHIHHHHGHHHHHHHHHEHGFGFEHHGHFHHHFHHHHHHHHHGHHHHEHHEHHHFGHHHHHHHHHHHHHHHHHHHHHHHHHH@3@@@ UQ:i:36 +20FUKAAXX100202:1:27:16949:104551 99 chr20 224990 60 101M = 225281 391 TCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG ABDDEFGGEEFEEFEEEEEFEFFCEFEEEFCECFEEEEFDEEEEEEEEEEEEEEEEFEEECEECFEFCEDEEE=EEFEEEFEFFGCEBEFGF?FEEAFB@? MD:Z:101 PG:Z:BWA RG:Z:20FUK.1 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHIHHHHHHDGGHHHHBGHH@HGEC UQ:i:0 +20FUKAAXX100202:8:67:15860:121961 163 chr20 224990 60 101M = 225291 401 TCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGAGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG ?ADCACEFEDEDEFEEEEDEEFFEEFEEEECECFEEDEFDEEEDEEEEEBED=EEEFEEECEECFEFC;>ECE=EEEECCDEFDDCEFBFGE?E?CBC@CB MD:Z:52T15A32 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHH@HHHHHHHHHHHHHHHIEHFHHHHFHFIDHHGFEHHGHHHBFEFCFGFH UQ:i:54 +20FUKAAXX100202:7:27:19938:190465 1171 chr20 224990 60 10S91M = 224722 -358 GGATAGATTTTCTAGAAAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAA ###########=8A?A:=+:,C>B;CBB@9DEB>AFCBCCC9<:<9>@=@=7;8@<=:35?05643(78)E=CEEFCFEEFEEDBBCBB? MD:Z:68A22 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:###########@;@AA54/5-FBD:FFFC5FHEAEHGEFFG45554DD@D;@<@?@444-4AAAAA445044/55578>=0HHHHHHHHHHHHHHHHHHHH UQ:i:22 +20FUKAAXX100202:6:23:8481:198660 147 chr20 224993 60 10S91M = 224653 -430 TAGAATTTCTAGAAAGTCCCTCCCCCTAAGGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCT ###########==>A;1",,4'66789:<+,64:->563/;=:855<63>;:@;=?7;73;,=;=:27>7:;>?5?C9E==EEFCFDDFEEEEFEBADBB? MD:Z:7T3T7A45A25 PG:Z:BWA RG:Z:20FUK.6 AM:i:25 NM:i:4 SM:i:25 MQ:i:60 OQ:Z:###########A@@?<1&1*2-44535550.385/403,/5=<=83<3:>;<6:9? MD:Z:101 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHHHHHHHHHHHHHHHBCHHGIGGGIEHIFHFIIFHIHGIGGGFFEGCGBGBG@?38998E UQ:i:0 +20FUKAAXX100202:7:67:14544:95064 83 chr20 224996 60 101M = 224692 -404 AAGTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAAC C?:??AAD;FDFCBCF?CCCDDEDEBEFEBEEDBECECEEDE?EECEEEEEEECEE;9;=>>FEEE=DEEFCFCEFEEEEFFEEFEEFEEFFEFEEDEDBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HCDDG8HGHFEFHAFFFFFHGHCHHHFHHFFGHHHHHGHEHHHHHHHHHHGHHBBBD@EHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:61:20600:128532 99 chr20 224998 60 101M = 225318 420 GTTCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTC AAD5?FDAEDDDEEE?>?BCDECFEEECDECFDB=C?:?>=58699FBEBCB MD:Z:58A42 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHHGHHHHHHHHHIHH@24444HHHIHH UQ:i:28 +20FUKAAXX100202:8:6:5363:193841 99 chr20 225000 60 101M = 225306 406 TCCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTT AACEEEEEEFEFFEEFEEEECECFEEEEFEEEEDEEEEEEEEEDEDFEEECEECFEFC=EEEE=EEEEEEEEEEFFEFEFGEEFDEEECEDFDEFECDCCB MD:Z:58A42 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIHHHHHHHHHHHHHHIIHIIFGFGHIIIHGHIF UQ:i:28 +20GAVAAXX100126:1:42:8423:49219 163 chr20 225001 60 101M = 225287 386 CCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTG ?BE@BACDFEFFEFHDDEGEGEHDEFGHFFFDEDEGGGEGEEEEEFFFFD>FEGDFD=DEEE=EFFEEEFFEBEEGEEC9AEFDEDFBDDECEC>=EBDB@ MD:Z:57A43 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCCBCCCCCCCCCCCCBCCCCCCBCCCCBCBCBBCCCBCCBBBCBCBCB9BCBBBBBABBBBBCBBABBBBAAABBB>5>BAAAAA>?A@BA>69?A@AA UQ:i:28 +20GAVAAXX100126:8:42:10420:86541 163 chr20 225001 60 101M = 225273 372 CCTTCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTG ?CDA@BCDFEFFDCGDDEFCEEGDFEGFFFDDEDDBCBCDBBDFBFEFECAECCDEC=CE?D;@D>BCDCCE@@EDC=DC@=D5CBDB=ACAEBC?DBCBA MD:Z:57A43 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCCCCCCCCCCCBCCCBCBACCCCBCACBAACBBCBCBBA@BCBBBBBBABBAAABB@B?A@@@8BBA?AB=>AAB<@@=<<1?<>?<;@@A><>A@@@B UQ:i:28 +20FUKAAXX100202:8:4:8272:152285 99 chr20 225004 60 101M = 225283 379 TCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAA AACDDFEGFEEFFEEFCFCFEEEEFEEEEEEEEEEEEEEDE>FEEECEECFEFCEEEEE=EEEEEDEEEEFFEEEFFCEFEEFG=FFFDFGGDGEFEDDDD MD:Z:101 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHH?IHIHHHHHHHHHHHHH UQ:i:0 +20FUKAAXX100202:4:43:20720:97549 1107 chr20 225004 60 101M = 224727 -377 TCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAA 0CDE;EFFFEE=GEEEA?AAAACEFB;<>?24>;=49593<@?FEDE=CEEFCFEEFEEEEFFEEFEEEEEEFEFEEEFFEEFEFEDCBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:1HHH6HHHIHIBHGHGDBEDBFGHHFADCCAF=G>GFDCDD8:A>?44551DEDHHHHHGHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20FUKAAXX100202:6:41:13009:27153 83 chr20 225004 60 101M = 224727 -377 TCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAA AADE@?FFEBDAADEDCEEDEEFEDEFCB?EEEEEEECEEEEEEEE@@;ABDEDFEDE=DEDFCFEEFEEEEFFEEFEEFEEEFEFEEEFFEEFEFEDCBA MD:Z:101 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:EFHHEBHHFEGEEFHEFGHGIHHHFHHHEFHHHHHHHHHHHHHHHGFFBFFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:8:61:9736:179657 99 chr20 225008 60 101M = 225317 409 CTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAA ACCCFEEFFFEFCFCFFEEEFEFEEEEEEEEEEDDDEEFEEECDECFDFCEEEDD=EEEEDDEEFEFFEFDFFEEFDEFFEFEFCFFFCFEFFEFFEB>C= MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFGHG UQ:i:0 +20GAVAAXX100126:8:42:20446:128709 83 chr20 225008 60 101M = 224740 -368 CTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAA ABAEEDDFBFEEEEEFFEBBF>FAECBEECEEEAEEEEEEEAEDDEADDDFDEE=DEDFCFDEFEFFEFFEFFEEFEEFFEFEEEFFEEFEFFBBFFCDCA MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:IHFHHHIHGHHHHHHHHHFGHEHGIHGHHGHHHIHHHHHHHEHHHHDHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHH UQ:i:0 +20GAVAAXX100126:1:65:2583:115560 163 chr20 225015 60 101M = 225303 388 TTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCC ?@ACAADBFDDEFGFFEEEEEGGFGFEEEEFGEFECEFCED@B=EEEE=DEEDCEEFEEFFFECCECDFEDECCCC9;BECAFBFDCECCCEBCC114A@@ MD:Z:43A57 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BCCBCBBCBBBBCBCBBBC@BCCBCBBBBBCBBBBBBBBAA9ABBBBBBABAA4:<@?@@?@A@@=?>@=>>1-/>>@ UQ:i:28 +20GAVAAXX100126:2:27:15261:77255 595 chr20 225015 29 36S65M = 224613 -466 AGGATAGAATTTCTAGAAATTTCCTTCCCCTAAACCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCA #####################################?97=;<<=>;8>/?@;>*<))<+==:<;'><>;+7>;==??/A<=::<971<@5A@AA@A+AA6 MD:Z:65 PG:Z:BWA RG:Z:20GAV.2 AM:i:29 NM:i:0 SM:i:29 MQ:i:29 OQ:Z:#####################################A:66?@?CABBCDDFEDECEDFGFFDDE:CFFGFFFEEEBFDFAEC>DECFCEDDECDEDECGCEEFDDD7??AECEDFBC7DECCA6AD=@ACDDBBA MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:AACCCBBCBBC@BBCBCCCBAAB2@CCCCBCCBBBBBBBCB>BBBBBBB>9@BCBAABAAAABAB?AABBBA3@>>BBBAB>@1@A>?=/>A7:FDFFFFFFEDEEFGEDECEEBEBBB?BCBE9CEEEDBEDBDFADEDCDD@ED?CECCDD?DB@@EACACEDEDB9DBADCDBCDA>? MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BCCBBBBBCBCBB@CBCBCBCBB@BBCBBABBBBBBBAB@AAABAABAB?BBAAAB@@????@@@>>7?@>B@A???A8? UQ:i:0 +20GAVAAXX100126:8:67:15419:163515 99 chr20 225019 60 101M = 225314 395 ACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGG A@D@FFEEEFEFEEEEEEEEEEEEEEEFEEECDECFDFCEEEDD=DDEEDDEEFEFFEFDEFEEFDEEFEFEFCFFFCFDFFEFFFFFFBCCEFE@DECBB MD:Z:101 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHHEHHHHH UQ:i:0 +20GAVAAXX100126:4:25:4357:32084 83 chr20 225019 60 10S91M = 224705 -404 TAAAGCTTTCACACTTGCCTCAGTGTATATATGGGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAG ###########A7B=D8+:9;<,*%9.8-1:-:(;1>;=5653===DD>DDDE=DEDDCFDEFEFFEFFDFFEEFDEFFEFEEEFBEEFEGFBBFFEBBB? MD:Z:23T67 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:###########F9F@F4345542+)424/85-4*514446042555HHDHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHCGHHFAFFF UQ:i:7 +20FUKAAXX100202:8:42:12533:83073 99 chr20 225020 60 101M = 225329 409 CACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGA ABAEEEEEFEFEEEEEEEEEEEEEEEFEEECEECFDFCEEEEE=DEEEEDEEEEFFEEDEFEEFDEEFEFEFCFFFCFEFFEFGDGGGACFEFFEEEDCCC MD:Z:101 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHEGHHHHHHHHHHH UQ:i:0 +20FUKAAXX100202:4:28:14874:15140 83 chr20 225020 60 101M = 224800 -320 CACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGA >6;:09BBA?D>C;CC9;<7548===.155782<@>?>A@AC=EEEFCFEEFEEEEFFEEFDEAEAEFDFEEEEEEEBEFE==EFEEEFEEEFEFEEECAA MD:Z:101 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:?455.4FDB?DDD?FD44461<:C<=22504AACCCFFGHHHHHHHHGHHHHHHHHHHHHEHFHHHHHHHHHHHFHHHEEHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:3:63:21001:169004 99 chr20 225021 60 101M = 225272 351 ACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAG A@DEDEEFEFEEEEEEEEEEEEEEEFEEECEECFDFC=EEDD=DDEEDDEEFEFFEFDFFEEFDEFFEFEFCEFFCFDFFEFFFFFC@?EBFEEDEDDCCB MD:Z:37A63 PG:Z:BWA RG:Z:20GAV.3 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFECHFHHHHHHHHHH UQ:i:28 +20GAVAAXX100126:5:21:14804:97200 147 chr20 225021 60 9S92M = 224706 -406 AGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCA ##########D@=>AA6=;A7:>AABBBBABBABBBBACBBBBBBBCCCCBCACBCABBCBBCBBCCBBBBBBB UQ:i:35 +20GAVAAXX100126:1:44:4735:79229 83 chr20 225023 60 101M = 224768 -355 TTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAA CB:>=D@=?DB@CCB?;789=819;99>>??<@?;?EEC=EEDFAFDEFEFFEFFDFFEEFEDFFDFDEDFFEEFEFFCCFFFFFFEEEFEEEFFEDACCA MD:Z:101 PG:Z:BWA RG:Z:20GAV.1 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HDBDBDF@DGGDFGDG=;=;>53445:DDAABCA@5HHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20FUKAAXX100202:6:41:11198:131837 163 chr20 225025 60 101M = 225350 425 GCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAG ?ABB>BDD?DBBD=EEBDEECFEECADECFEFCEEEDE=DEEEDDEEEEFFEEDEFEDFDDEFEFEFCEFFCFEFFEFFFGGGDCFEFEEEEEEEFDEAD= MD:Z:101 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHIIHHHGHIGHIHHIHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHGHHHHHHGHG UQ:i:0 +20GAVAAXX100126:6:7:12934:132509 99 chr20 225025 60 101M = 225345 420 GCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAG @B?EDFEEEEEEE3EEEEEEEFEEECEFCFEFC=EEED=EDEEDDEEFEFFEFDEFEEFDEEFEFEFCFFFCFDFFEFFFFFFAAEEFEEEFEEEEDEDCB MD:Z:33A67 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHH8HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHH UQ:i:28 +20FUKAAXX100202:3:6:15018:84106 83 chr20 225025 60 101M = 224759 -366 GCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAG 9?>?@B8;9;>=?>?A78<5@=B>8B<999;=@BC7E=CEEFCFEEFEEEEFFEEFDEFEEEFDFDEEEEEEFEFECAEEEEEFEEEFEFEEFEEFDEDCA MD:Z:101 PG:Z:BWA RG:Z:20FUK.3 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:;D<@B>::4:8===>>>5@:>ADB2F46554@DCG;HHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:4:6:16280:102045 163 chr20 225026 60 101M = 225252 326 CCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGC ?BDCCBDCCDFEFEFEDEECFEF4BDCDEDC;A?ACB5DDDB3>DBDDFEFEBCA>DE>9:=AADEC?D4?BBFADC8@?CE=DCD7>CAB>AA?; MD:Z:101 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:BBCBAA@CABCBCBBBBBBABBB7AA?B?A>=@?@B@>BAA@4?@@BABABB@A;=AA=:5>?@@A?=?A@;@=6:=@?;A>=37@:@::::: UQ:i:0 +20FUKAAXX100202:2:42:3257:33410 99 chr20 225027 60 101M = 225299 372 CTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCA ACCEEEEEEEEEEEEEEEEFEEECEECFEFC=EEEE=EEEEEDEEEEFFEEDEFDEFDEEFEFEFCEFFCFEADEFGGGGF;AFBFF:EFFFF>E>DCA?C MD:Z:31A69 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHBEHFHH;HIHHH=H:FFGDH UQ:i:28 +20GAVAAXX100126:7:1:21210:15897 99 chr20 225028 29 101M = 225313 378 TCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAA 17=?>9;??=?=>5@<92?9B=:-1=2<;?ACB@>CC,=;;ECA>DBD>ADA=A@<105(;;88<8;48:999969< MD:Z:30A70 PG:Z:BWA RG:Z:20GAV.7 AM:i:29 NM:i:1 SM:i:29 MQ:i:29 OQ:Z:55555@@??DD=FD>22D@D99IF>CCFCCCG=7==DCGCCCGF@GCGG8CG4-454GGDDGGGEGGD?DC?115-5555555554444455? UQ:i:24 +20FUKAAXX100202:3:25:16247:13724 99 chr20 225030 60 101M = 225273 343 AGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACA ABCDEEFFEEFEEEEEFEEECEECFEFCEEEEE=EEEEEDEEEEFFEEEEFEDFDEEFEFEFCEFFCFEFEEFFFFGGC@FCFEEEGFFEFFAGFDDDE?C MD:Z:101 PG:Z:BWA RG:Z:20FUK.3 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGEHFHHHHHHHIIHBHHIHIHGH UQ:i:0 +20GAVAAXX100126:6:64:9301:93116 163 chr20 225030 60 101M = 225336 406 AGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACA ?BBBAACDEEFECDEFGFFFEEFCFEFCFEEEE@B@@@A@A?<0>A?@??88 UQ:i:0 +20FUKAAXX100202:6:4:19457:82619 83 chr20 225030 60 101M = 224720 -410 AGTGTATATATGTGGCTATCCCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACA A>B<>DBFCCF;57:<993"6>5=>>=E>EAE=EEEFCFEEFEEEEFFEEFDEFEEEFDFDDEEEEEFEFEBAEEEEEFEEEFEEEEFEEFEFFFEDECBA MD:Z:19A81 PG:Z:BWA RG:Z:20FUK.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:EFDAAFFHFDH42554452*44.554BH?HEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:1 +20GAVAAXX100126:4:5:21339:99108 163 chr20 225032 60 84M17S = 225350 418 TGTATATATGTGGCTATACCACTTACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATG ?BBCB@CCEDCCA8EEB;?CBBD*@=;DB9C;>??CA@2CD@6>D7A<66>BB=?@>@B7;?>=07>997@B?8>@>A>0<<@A?7?BB=-7<>>9<>@A>>2>?>38A8<:03=@A=<;;:>76<9<-2:864>1<?>>:E=EEDFCFEEFEEEEFFEEFEEFEDEDEFEEEEEDEEDCABCB? MD:Z:15A2A52 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:###############################CDDDE?=7>;265++24,555AA>A@HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:3 +20FUKAAXX100202:6:23:8496:198647 1171 chr20 225038 29 55S46M = 224653 -430 AGTAATTGTAAGAACTGCCCTCCCCCTAAAGCTTCAACCCTGGCCTCAGTGTATATATGTGGCTATCCCCCTGACGGCCCGCCAGTCATTAAATTCAAGCT ########################################################>?;@?8=;=:&7+5::;2.08(7;>BEFCEDDFEEEEFEB>DAB? MD:Z:11A2A5A1G23 PG:Z:BWA RG:Z:20FUK.6 AM:i:29 NM:i:4 SM:i:29 MQ:i:29 OQ:Z:########################################################AAADA544440415555055F7BFBGHHHGHHHHHHHHHHFHFHH UQ:i:47 +20GAVAAXX100126:2:63:4742:63598 99 chr20 225039 60 101M = 225351 411 ATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTC ABCCDEEFEEECEFCFEFC=EEEE=EEFECDEEFEFFEFDEF>DFDDEFEFEFCEFFCFDFFDCFFFFFBCEBEEEEFEEEFEFFFEEFCCFBEFBEEDCA MD:Z:19A81 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:HHHGHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHEHHHHGHHHHHHHHHHHHHHHEHHHHHGGHGHHHHHHHHHHHHHHHHFHHGHHGHHHHH UQ:i:28 +20GAVAAXX100126:8:63:8623:62616 147 chr20 225041 60 7S94M = 224719 -415 TATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGAT ########DC:D5/7-C?A@A;-1D?09;DCEGCBEF@EBCFFFEDGBEECEDFEF=DDDD@CD?EDEDEDDDDFGFEGFFFEGFFGBCCFDDB>AACCC? MD:Z:17A76 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:########??:?5-2'@;6>>;'4B<0:@?BBBBBBBCBCBBBBBCBBBBBBBBCBCBBBB@BBCBBBCBCCCCBCCBCCCCBCCCCCCCCCCCCCCBCCB UQ:i:35 +20GAVAAXX100126:4:43:2300:91558 163 chr20 225042 60 89M12S = 225276 334 TGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACA ?BCBBACDCEECFEFC=EECEEA>9AD?############# MD:Z:16A72 PG:Z:BWA RG:Z:20GAV.4 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBBCCCCBBBBBBBBBBBBABAB@AA>BBBBBBBBBBB?A@BBAA@B@B?AAAAA@BABA?@?@@?A@>@?@=;@?@?>@:@?86<>@############# UQ:i:28 +20FUKAAXX100202:1:6:14171:30403 83 chr20 225044 60 101M = 224737 -407 GCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATA =<44:;BA@BC?@9DD>B=DFEFCFEFFEEEECFEEFEEEEEEFEFDE@EECEFEFEEBDCBB MD:Z:101 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:HHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHDHHHHHHHHHHHHHEBHHHHHHHHHHHHBHHGHHHHHHHHG?DDDDHHHHHH UQ:i:0 +20GAVAAXX100126:6:4:13415:44301 99 chr20 225045 60 101M = 225370 425 CTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATAT 6>2;>:AB@?9?:BADD=:DD@>@B0;;9;@??=@@=>@=?A@>>C?A7B78=:=@>A@>?BB=>??9<8;56;;;;<9;67=::=2@><<1C=BA=> MD:Z:101 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:>>>>A>?CCA@>>CCGGEFGG?ADG25555DDDDDDDDDDADDD8AC>C7DDDFDG UQ:i:0 +20GAVAAXX100126:2:46:16245:173351 163 chr20 225049 60 101M = 225344 395 ACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTA ?@CC@DCDCEEEEE=EEFEDEFFFEGFFEEF?@A@@@@AAB=@>>A@@??AA@;A?@A UQ:i:0 +20GAVAAXX100126:3:62:9519:106030 147 chr20 225049 60 29S72M = 224778 -342 CACTTGCCTCAGTGTATATATGTGGCTATCCCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGA ##############################A:36><=:9<9D8ADCECEDFFDDDEFEECEDEDDEDGFFDDEEDAEFEFGEDDDDDCGDDDFCDABDCB? MD:Z:0A71 PG:Z:BWA RG:Z:20GAV.3 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:##############################?6+2787:1;7A>=AABBBBBBBBBBBBBBBBBABBBBCBBBBBC:B@BBCBCCCCCCCBBBBCBCBBCCB UQ:i:2 +20FUKAAXX100202:2:64:13645:16057 147 chr20 225053 60 101M = 224750 -403 CTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTAACTT :175,6>)2>D@ED@FEEGDFEEFFEEFDEFEEEEEDACBC;A@@AFDBACEEEEFCDDDCEEEFEDFDEBF=EEEECEDDEFEFFBBBA>E@EEBABCC= MD:Z:5A95 PG:Z:BWA RG:Z:20FUK.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:525314C-:HFEHEEHHHHGHHHHHHHHGHHHHHIHFFFFF@DDADHGGGFHHHHHIHHIIHHHHHHHHHHHEHHHHIHHGHHHHHFFFGFHGHHHHHHHD UQ:i:21 +20GAVAAXX100126:7:45:18980:156799 1683 chr20 225054 60 50S51M = 224715 -389 TCCCTTAAAGCTTCCACACTTGCCTAGGTGAATATAGGTGGGTATCCCGCTGACAGGCCCCCAGTCATTAAATTCAAGCTCCAAGAGACAAACCCTTGAAA ###################################################BBB7.;:4$2DB> MD:Z:9G33T7 PG:Z:BWA RG:Z:20GAV.7 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:###################################################@:>50:;4(6B?BAAA?BBCBBBA=A=@2BBB:B?B:C@BAA+AA6:?A? UQ:i:10 +20FUKAAXX100202:5:41:18178:6583 83 chr20 225056 60 66S35M = 224710 -380 TCTAGAAAGTACCTTCCCCTAAAGCTTTCACTCTTGCCTCAGTGTATATATGTGGGTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG #########################################################################################;>=C=:C?DBD4 MD:Z:35 PG:Z:BWA RG:Z:20FUK.5 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:#########################################################################################@@4455444455 UQ:i:0 +20FUKAAXX100202:5:41:18201:6601 1107 chr20 225056 60 66S35M = 224710 -380 TCTAGAAAGTTCCTCCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAG ############################################################################@>@;B6A==C<:==AFAEC555555455445455 UQ:i:0 +20FUKAAXX100202:8:28:5396:197191 147 chr20 225056 60 45S56M = 224733 -378 AACCTTTCACACTGGCCTCAGTGTATATATGTGGCTATCCCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGC ##############################################4EECE=EEEFCFDEFEEEEFFEEFEEFEDEFDFDECEEEEFEFE@?EEDBADBA? MD:Z:2A53 PG:Z:BWA RG:Z:20FUK.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:##############################################BHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHH UQ:i:36 +20FUKAAXX100202:3:63:10698:105044 83 chr20 225057 60 15S86M = 224789 -353 TGGCTATACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACA ################DE=FEEEEEFEDDFEEEEFDDEABCDA? MD:Z:0C0A68 PG:Z:BWA RG:Z:20FUK.3 AM:i:37 NM:i:2 SM:i:37 MQ:i:60 OQ:Z:################################A=18GEHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHFHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:30 +20GAVAAXX100126:2:63:19101:65384 147 chr20 225057 60 48S53M = 224707 -402 TAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAG #################################################CD:D9DEDF>FEFFC>DEDFCDGEEEFFEHFHFDFDDDEFEFEDAA?>AAD? MD:Z:1A51 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:#################################################@B>A@BBBBCBBBBBCCCBBCCCBBACCBCCCCBCCCBCBCBBCCCCCCCCB UQ:i:34 +20GAVAAXX100126:8:63:19683:109321 147 chr20 225057 60 59S42M = 224677 -421 GTTCCTCCCCCTAAAGCTTTCACACTTGCCTCAGTGTATATATGTGGCTATACCACTGACGGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTC ############################################################DD7B4ADEFCFEFFEDDFFEFDFFEEFBCGEFDCCBBBCB? MD:Z:1A40 PG:Z:BWA RG:Z:20GAV.8 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:############################################################BB;A@ABBBBBBBBBCCBBCCCBCBACCCCCCCBBBBBBBB UQ:i:35 +20GAVAAXX100126:6:47:6085:14272 163 chr20 225058 60 101M = 225307 349 GGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTAACTTGGTTT ?BCCB;CDEEDEFFDFGGGDFFGFEFFEFFEFEEAEEF?FAFDDEEBEFDFEEEFEDDEDEDEEEEFEEEDCECBDDECEDDFDEC=ECEDDECEDCCCAA MD:Z:0A100 PG:Z:BWA RG:Z:20GAV.6 AM:i:37 NM:i:1 SM:i:37 MQ:i:60 OQ:Z:BBBBBBBBBBABCCCCCCCCCCCCBBCBCBBBBBCCBBCBCBBCBBCBBBBBBBBBBBBABBABAABBBBBBBBB@AA@BBABBB@:B@A@B@BB@BA;?B UQ:i:30 +20FUKAAXX100202:4:47:20584:49257 83 chr20 225058 60 10S91M = 224761 -387 TACCACGGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTT ###########D=C=EEEFCFEEFEFEEFFEEFEECEEEFEFDEEEEDEFEFEBBEEEEEFEDEFDEEEFEEFEEEFEEEEEEEEEFFFGFEEEFEEFDCA MD:Z:91 PG:Z:BWA RG:Z:20FUK.4 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:###########I@FIHIHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHIHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20FUKAAXX100202:7:65:8106:6840 83 chr20 225058 60 9S92M = 224798 -351 ACCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACATGATTTTTCACATATTTTA ##########?>DEFEFEDEEEDEEEFE>AEEEEEFEDEFEEEEFEEFDEEFEEEEEEFEEFFFFFEEEFEEFFEBA MD:Z:92 PG:Z:BWA RG:Z:20FUK.7 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:##########@AGGDEIHHHHHHHHHHIHHHHHHHHDHHHHHIHHHIHIHHHEGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH UQ:i:0 +20GAVAAXX100126:2:46:5468:93666 147 chr20 225058 60 27S74M = 224757 -374 GTTTATATATGTGGCTATCCCACTGACAGGCCGCCAGTCATTAAATTCAAGCTCCAAGAGACAAACTCTTGAAAAAAAGGCAGCCTAGGAGAAAGCAACAT ############################B3CEBCFEEEEEDDEFEEEGFEFEFFEFEDDECAABBC? MD:Z:74 PG:Z:BWA RG:Z:20GAV.2 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 OQ:Z:############################A3A? -0.001901013984 + MathTestUtils.assertAlmostEqual(m0likelihood, -0.001901013984) + MathTestUtils.assertAlmostEqual(mflikelihood, m0likelihood) + + val mhlikelihood = MHModel.logLikelihood("C", "A", all_c, None) + // assuming f = 0.5 mhlikelihood + // in R + // log10(prod(0.5*(e/3) + 0.5*(1-e))) + // -3.01156717 + MathTestUtils.assertAlmostEqual(mhlikelihood, -3.01156717) + } + test("Likelihood of simple model, all errors, no reference or mutant sites") { + val m0likelihood = M0Model.logLikelihood("T", "G", all_c, None) + val mflikelihood = MfmModel.logLikelihood("T", "G", all_c, None) + val mhlikelihood = MHModel.logLikelihood("T", "G", all_c, None) + + // Assuming f = 0 or 0.5, mflikelihood or m0likelihood + // in R: + // options(digits=10) + // p = seq(30,39) + // e = 10^(-p/10) + // log10(prod(e/3)) + // > -39.27121 + MathTestUtils.assertAlmostEqual(m0likelihood, -39.27121255) + MathTestUtils.assertAlmostEqual(mflikelihood, m0likelihood) + MathTestUtils.assertAlmostEqual(mhlikelihood, m0likelihood) + + } + + test("Likelihood of simple model, all mutant, no error or reference sites") { + val m0likelihood = M0Model.logLikelihood("A", "C", all_c, None) + // Assuming f = 0, mflikelihood or m0likelihood + // in R: + // options(digits=10) + // p = seq(30,39) + // e = 10^(-p/10) + // log10(prod(e/3)) + // > -39.27121 + MathTestUtils.assertAlmostEqual(m0likelihood, -39.27121255) + + val mhlikelihood = MHModel.logLikelihood("C", "A", all_c, None) + // assuming f = 0.5 mhlikelihood + // in R + // log10(prod(0.5*(1-e) + 0.5*(e/3))) + // -3.01156717 + MathTestUtils.assertAlmostEqual(mhlikelihood, -3.01156717) + + } + + test("Likelihood/log10 likelihood of small mutant, first 3 reads (30,31,32) MfModel") { + val mflikelihood = MfmModel.logLikelihood("C", "A", some_muts, None) + val m03likelihood = MfmModel.logLikelihood("C", "A", some_muts, Some(0.3)) + val mhlikelihood = MHModel.logLikelihood("C", "A", some_muts, None) + val m0likelihood = M0Model.logLikelihood("C", "A", some_muts, None) + // f should be 0.3 + // in R + // pm = seq(30,32) + // pr = seq(33,39) + // em = 10^(-pm/10) + // er = 10^(-pr/10) + // log10(prod(0.3*(1-em) + 0.7*(em/3))*prod( 0.3*(er/3) + 0.7*(1-er))) + // -2.653910268 + MathTestUtils.assertAlmostEqual(mflikelihood, -2.653910268) + MathTestUtils.assertAlmostEqual(m03likelihood, mflikelihood) //same if f properly calculated here + + // the no mutants model in R + // log10(prod(0*(1-em) + 1*(em/3))*prod( 0*(er/3) + 1*(1-er))) + // -10.73221105 + MathTestUtils.assertAlmostEqual(m0likelihood, -10.73221105) + val logOddsMutant = MutectLogOdds.logOdds("C", "A", some_muts, None) + MathTestUtils.assertAlmostEqual(logOddsMutant, mflikelihood - m0likelihood) + + // the heterozygous site model in R: + // log10(prod(0.5*(1-em) + 0.5*(em/3))*prod( 0.5*(er/3) + 0.5*(1-er))) + // -3.01156717 + MathTestUtils.assertAlmostEqual(mhlikelihood, -3.01156717) + val logOddsHet = MutectSomaticLogOdds.logOdds("C", "A", some_muts, None) + MathTestUtils.assertAlmostEqual(logOddsHet, m0likelihood - mhlikelihood) + } + +} diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/discovery/ReadExplorerSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/discovery/ReadExplorerSuite.scala index b4ae4621..5d95ebbb 100644 --- a/avocado-core/src/test/scala/org/bdgenomics/avocado/discovery/ReadExplorerSuite.scala +++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/discovery/ReadExplorerSuite.scala @@ -34,8 +34,9 @@ class ReadExplorerSuite extends AvocadoFunSuite { .build()) .setMapq(40) .setSequence("ACTGA") - .setQual(":::::") + .setQual("::;?:") .setCigar("5M") + .setMismatchingPositions("2AA1") .setRecordGroupSample("sample1") .build() @@ -44,23 +45,31 @@ class ReadExplorerSuite extends AvocadoFunSuite { case ao: AlleleObservation => Some(ao) case _ => None }) - assert(observations.length === 5) - assert(observations.forall(_.phred == 25)) assert(observations.forall(_.mapq == Some(40))) assert(observations.forall(_.pos.referenceName == "chr1")) assert(observations.forall(_.sample == "sample1")) assert(observations.forall(!_.onNegativeStrand)) + assert(observations.forall(_.distanceToNearestReadDeletion.isEmpty)) + assert(observations.forall(_.distanceToNearestReadInsertion.isEmpty)) + assert(observations.forall(_.alignedReadLen == 5)) + assert(observations.head.mismatchQScoreSum === Some(30 + 26)) assert(observations.filter(_.pos.pos == 10L).length === 1) assert(observations.filter(_.pos.pos == 10L).head.allele === "A") + assert(observations.filter(_.pos.pos == 10L).head.phred === 25) assert(observations.filter(_.pos.pos == 11L).length === 1) assert(observations.filter(_.pos.pos == 11L).head.allele === "C") + assert(observations.filter(_.pos.pos == 11L).head.phred === 25) assert(observations.filter(_.pos.pos == 12L).length === 1) assert(observations.filter(_.pos.pos == 12L).head.allele === "T") + assert(observations.filter(_.pos.pos == 12L).head.phred === 26) assert(observations.filter(_.pos.pos == 13L).length === 1) assert(observations.filter(_.pos.pos == 13L).head.allele === "G") + assert(observations.filter(_.pos.pos == 13L).head.phred === 30) assert(observations.filter(_.pos.pos == 14L).length === 1) assert(observations.filter(_.pos.pos == 14L).head.allele === "A") + assert(observations.filter(_.pos.pos == 14L).head.phred === 25) + } sparkTest("observe a read with a deletion") { @@ -74,8 +83,9 @@ class ReadExplorerSuite extends AvocadoFunSuite { .build()) .setMapq(40) .setSequence("ACTGA") - .setQual(":::::") + .setQual(":::?:") .setCigar("2M2D3M") + .setMismatchingPositions("2^AC1A1") .setRecordGroupSample("sample1") .build() @@ -86,25 +96,35 @@ class ReadExplorerSuite extends AvocadoFunSuite { }) assert(observations.length === 5) - assert(observations.filter(_.allele != "_").forall(_.phred == 25)) + assert(observations.filter(_.pos.pos != 15L).filter(_.allele != "_").forall(_.phred == 25)) assert(observations.filter(_.allele == "_").forall(_.phred == 40)) assert(observations.forall(_.mapq == Some(40))) assert(observations.forall(_.pos.referenceName == "chr1")) assert(observations.forall(_.sample == "sample1")) assert(observations.forall(!_.onNegativeStrand)) + assert(observations.exists(_.distanceToNearestReadDeletion.isDefined)) + assert(observations.exists(_.distanceToNearestReadInsertion.isEmpty)) + assert(observations.head.mismatchQScoreSum === Some(30)) + assert(observations.forall(_.alignedReadLen == 5)) assert(observations.filter(_.pos.pos == 10L).length === 1) + assert(observations.filter(_.pos.pos == 10L).head.distanceToNearestReadDeletion === Some(2)) assert(observations.filter(_.pos.pos == 10L).head.allele === "A") assert(observations.filter(_.pos.pos == 11L).length === 1) assert(observations.filter(_.pos.pos == 11L).head.allele === "C") + assert(observations.filter(_.pos.pos == 11L).head.distanceToNearestReadDeletion === Some(1)) assert(observations.filter(_.pos.pos == 11L).head.length === 3) assert(observations.filter(_.pos.pos == 12L).length === 0) assert(observations.filter(_.pos.pos == 13L).length === 0) assert(observations.filter(_.pos.pos == 14L).length === 1) assert(observations.filter(_.pos.pos == 14L).head.allele === "T") + assert(observations.filter(_.pos.pos == 14L).head.distanceToNearestReadDeletion === Some(-1)) assert(observations.filter(_.pos.pos == 15L).length === 1) assert(observations.filter(_.pos.pos == 15L).head.allele === "G") + assert(observations.filter(_.pos.pos == 15L).head.distanceToNearestReadDeletion === Some(-2)) assert(observations.filter(_.pos.pos == 16L).length === 1) assert(observations.filter(_.pos.pos == 16L).head.allele === "A") + assert(observations.filter(_.pos.pos == 16L).head.distanceToNearestReadDeletion === Some(-3)) + } sparkTest("observe a read with an insertion") { @@ -118,8 +138,9 @@ class ReadExplorerSuite extends AvocadoFunSuite { .build()) .setMapq(40) .setSequence("ACTGA") - .setQual(":::::") + .setQual(":::?:") .setCigar("1M2I2M") + .setMismatchingPositions("1C1") .setRecordGroupSample("sample1") .build() @@ -130,16 +151,24 @@ class ReadExplorerSuite extends AvocadoFunSuite { }) assert(observations.length === 3) - assert(observations.forall(_.phred == 25)) + assert(observations.filter(_.pos.pos != 11L).forall(_.phred == 25)) assert(observations.forall(_.mapq == Some(40))) assert(observations.forall(_.pos.referenceName == "chr1")) assert(observations.forall(_.sample == "sample1")) assert(observations.forall(!_.onNegativeStrand)) + assert(observations.exists(_.distanceToNearestReadDeletion.isEmpty)) + assert(observations.exists(_.distanceToNearestReadInsertion.isDefined)) + assert(observations.forall(_.alignedReadLen == 5)) + assert(observations.head.mismatchQScoreSum === Some(30)) assert(observations.filter(_.pos.pos == 10L).length === 1) + assert(observations.filter(_.pos.pos == 10L).head.distanceToNearestReadInsertion === Some(1)) assert(observations.filter(_.pos.pos == 10L).head.allele === "ACT") assert(observations.filter(_.pos.pos == 11L).length === 1) + assert(observations.filter(_.pos.pos == 11L).head.distanceToNearestReadInsertion === Some(-1)) assert(observations.filter(_.pos.pos == 11L).head.allele === "G") + assert(observations.filter(_.pos.pos == 11L).head.phred === 30) assert(observations.filter(_.pos.pos == 12L).length === 1) + assert(observations.filter(_.pos.pos == 12L).head.distanceToNearestReadInsertion === Some(-2)) assert(observations.filter(_.pos.pos == 12L).head.allele === "A") } @@ -170,15 +199,168 @@ class ReadExplorerSuite extends AvocadoFunSuite { assert(observations.forall(_.pos.referenceName == "chr1")) assert(observations.forall(_.sample == "sample1")) assert(observations.forall(!_.onNegativeStrand)) + assert(observations.exists(_.distanceToNearestReadDeletion.isEmpty)) + assert(observations.exists(_.distanceToNearestReadInsertion.isEmpty)) + assert(observations.forall(_.alignedReadLen == 5)) + assert(observations.filter(_.pos.pos == 10L).length === 1) + assert(observations.filter(_.pos.pos == 10L).head.allele === "A") + assert(observations.filter(_.pos.pos == 11L).length === 1) + assert(observations.filter(_.pos.pos == 11L).head.allele === "C") + assert(observations.filter(_.pos.pos == 12L).length === 1) + assert(observations.filter(_.pos.pos == 12L).head.allele === "T") + assert(observations.filter(_.pos.pos == 13L).length === 1) + assert(observations.filter(_.pos.pos == 13L).head.allele === "G") + assert(observations.filter(_.pos.pos == 14L).length === 1) + assert(observations.filter(_.pos.pos == 14L).head.allele === "A") + } + + sparkTest("observe a read with soft clipping") { + val re = new ReadExplorer(sc.parallelize(Seq[Observation]())) + + val read = AlignmentRecord.newBuilder() + .setStart(10L) + .setEnd(15L) + .setContig(Contig.newBuilder() + .setContigName("chr1") + .build()) + .setMapq(40) + .setSequence("GGGACTGAGGG") + .setQual(":::?:::::::") + .setCigar("3S5M3S") + .setMismatchingPositions("0A4") + .setRecordGroupSample("sample1") + .build() + + val observations = re.readToObservations((read, 0L)) + .flatMap(o => o match { + case ao: AlleleObservation => Some(ao) + case _ => None + }) + + assert(observations.length === 5) + assert(observations.filter(_.pos.pos != 10L).forall(_.phred == 25)) + assert(observations.forall(_.mapq == Some(40))) + assert(observations.forall(_.pos.referenceName == "chr1")) + assert(observations.forall(_.sample == "sample1")) + assert(observations.forall(!_.onNegativeStrand)) + assert(observations.forall(_.distanceToNearestReadDeletion.isEmpty)) + assert(observations.forall(_.distanceToNearestReadInsertion.isEmpty)) + assert(observations.head.mismatchQScoreSum === Some(30)) + assert(observations.forall(_.alignedReadLen == 5)) assert(observations.filter(_.pos.pos == 10L).length === 1) + assert(observations.filter(_.pos.pos == 10L).head.offsetInAlignment === 0) assert(observations.filter(_.pos.pos == 10L).head.allele === "A") + assert(observations.filter(_.pos.pos == 10L).head.phred === 30) assert(observations.filter(_.pos.pos == 11L).length === 1) + assert(observations.filter(_.pos.pos == 11L).head.offsetInAlignment === 1) assert(observations.filter(_.pos.pos == 11L).head.allele === "C") assert(observations.filter(_.pos.pos == 12L).length === 1) + assert(observations.filter(_.pos.pos == 12L).head.offsetInAlignment === 2) assert(observations.filter(_.pos.pos == 12L).head.allele === "T") assert(observations.filter(_.pos.pos == 13L).length === 1) + assert(observations.filter(_.pos.pos == 13L).head.offsetInAlignment === 3) assert(observations.filter(_.pos.pos == 13L).head.allele === "G") assert(observations.filter(_.pos.pos == 14L).length === 1) + assert(observations.filter(_.pos.pos == 14L).head.offsetInAlignment === 4) assert(observations.filter(_.pos.pos == 14L).head.allele === "A") } + + sparkTest("observe a read with soft clipping and insertion") { + val re = new ReadExplorer(sc.parallelize(Seq[Observation]())) + + val read = AlignmentRecord.newBuilder() + .setStart(10L) + .setEnd(15L) + .setContig(Contig.newBuilder() + .setContigName("chr1") + .build()) + .setMapq(40) + .setSequence("GGGACTGAGG") + .setQual(":::?:::::::") + .setCigar("3S1M2I2M2S") + .setMismatchingPositions("0A4") + .setRecordGroupSample("sample1") + .build() + + val observations = re.readToObservations((read, 0L)) + .flatMap(o => o match { + case ao: AlleleObservation => Some(ao) + case _ => None + }) + + assert(observations.length === 3) + assert(observations.filter(_.pos.pos != 10L).forall(_.phred == 25)) + assert(observations.forall(_.mapq == Some(40))) + assert(observations.forall(_.pos.referenceName == "chr1")) + assert(observations.forall(_.sample == "sample1")) + assert(observations.forall(!_.onNegativeStrand)) + assert(observations.forall(_.distanceToNearestReadDeletion.isEmpty)) + assert(observations.exists(_.distanceToNearestReadInsertion.isDefined)) + //changed + assert(observations.head.mismatchQScoreSum === Some(25)) + assert(observations.forall(_.alignedReadLen == 5)) + assert(observations.filter(_.pos.pos == 10L).length === 1) + assert(observations.filter(_.pos.pos == 10L).head.distanceToNearestReadInsertion === Some(1)) + assert(observations.filter(_.pos.pos == 10L).head.allele === "ACT") + assert(observations.filter(_.pos.pos == 11L).length === 1) + assert(observations.filter(_.pos.pos == 11L).head.distanceToNearestReadInsertion === Some(-1)) + assert(observations.filter(_.pos.pos == 11L).head.allele === "G") + // changed + assert(observations.filter(_.pos.pos == 11L).head.phred === 25) + assert(observations.filter(_.pos.pos == 12L).length === 1) + assert(observations.filter(_.pos.pos == 12L).head.distanceToNearestReadInsertion === Some(-2)) + assert(observations.filter(_.pos.pos == 12L).head.allele === "A") + } + + sparkTest("observe a read with hard clipping") { + val re = new ReadExplorer(sc.parallelize(Seq[Observation]())) + + val read = AlignmentRecord.newBuilder() + .setStart(10L) + .setEnd(15L) + .setContig(Contig.newBuilder() + .setContigName("chr1") + .build()) + .setMapq(40) + .setSequence("ACTGA") + .setQual("::::?") + .setCigar("3H5M3H") + .setMismatchingPositions("4C0") + .setRecordGroupSample("sample1") + .build() + + val observations = re.readToObservations((read, 0L)) + .flatMap(o => o match { + case ao: AlleleObservation => Some(ao) + case _ => None + }) + + assert(observations.length === 5) + assert(observations.filter(_.pos.pos != 14L).forall(_.phred == 25)) + assert(observations.forall(_.mapq == Some(40))) + assert(observations.forall(_.pos.referenceName == "chr1")) + assert(observations.forall(_.sample == "sample1")) + assert(observations.forall(!_.onNegativeStrand)) + assert(observations.forall(_.distanceToNearestReadDeletion.isEmpty)) + assert(observations.forall(_.distanceToNearestReadInsertion.isEmpty)) + assert(observations.head.mismatchQScoreSum === Some(30)) + assert(observations.forall(_.alignedReadLen == 5)) + assert(observations.filter(_.pos.pos == 10L).length === 1) + assert(observations.filter(_.pos.pos == 10L).head.offsetInAlignment === 0) + assert(observations.filter(_.pos.pos == 10L).head.allele === "A") + assert(observations.filter(_.pos.pos == 11L).length === 1) + assert(observations.filter(_.pos.pos == 11L).head.offsetInAlignment === 1) + assert(observations.filter(_.pos.pos == 11L).head.allele === "C") + assert(observations.filter(_.pos.pos == 12L).length === 1) + assert(observations.filter(_.pos.pos == 12L).head.offsetInAlignment === 2) + assert(observations.filter(_.pos.pos == 12L).head.allele === "T") + assert(observations.filter(_.pos.pos == 13L).length === 1) + assert(observations.filter(_.pos.pos == 13L).head.offsetInAlignment === 3) + assert(observations.filter(_.pos.pos == 13L).head.allele === "G") + assert(observations.filter(_.pos.pos == 14L).length === 1) + assert(observations.filter(_.pos.pos == 14L).head.offsetInAlignment === 4) + assert(observations.filter(_.pos.pos == 14L).head.allele === "A") + assert(observations.filter(_.pos.pos == 14L).head.phred === 30) + } + } diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala index eb08d8e6..5e769613 100644 --- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala +++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala @@ -45,6 +45,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 1L), AlleleObservation(ReferencePosition("ctg", 0L), @@ -55,6 +58,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 2L), AlleleObservation(ReferencePosition("ctg", 0L), @@ -65,6 +71,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 3L)) @@ -94,6 +103,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 1L), AlleleObservation(ReferencePosition("ctg", 0L), @@ -104,6 +116,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 2L), AlleleObservation(ReferencePosition("ctg", 0L), @@ -114,6 +129,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 3L)) @@ -143,6 +161,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 1L), AlleleObservation(ReferencePosition("ctg", 0L), @@ -153,6 +174,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 2L), AlleleObservation(ReferencePosition("ctg", 0L), @@ -163,6 +187,9 @@ class BiallelicGenotyperSuite extends FunSuite { true, true, 1, + 10, + None, None, + 0, 0, 1, None, false, "mySample", 3L)) diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/MutectGenotyperSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/MutectGenotyperSuite.scala new file mode 100644 index 00000000..686a5e8d --- /dev/null +++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/MutectGenotyperSuite.scala @@ -0,0 +1,342 @@ +/* + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bdgenomics.avocado.genotyping + +import org.bdgenomics.adam.models.ReferencePosition +import org.bdgenomics.avocado.models.{ AlleleObservation, Observation } +import org.scalatest.FunSuite + +class MutectGenotyperSuite extends FunSuite { + val pos = ReferencePosition("ctg", 101l) + val mt = new MutectGenotyper("normal", "tumor") + + val ref = new Observation(ReferencePosition("ctg", 101L), "C") + + //Demo data + val allC = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = "C", + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID * 10, + 100, + None, None, + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMuts = for (readID <- 1 to 20) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID * 10 % 100, + 100, + None, Some(-50), + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val allMuts = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = "A", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID * 10, + 100, + None, None, + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsEndClusteredBeginning = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID, + 100, + None, None, + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsSoftClipped = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID, + 100, + None, None, + 20, 20, 100, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsNoisyReads = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID, + 100, + None, None, + 0, 0, 100, Some(101), false, + sample = "tumor", + readId = 1L) + } + + val someMutsNearInsertion = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID, + 100, + Some(3), None, + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsNearDeletion = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID, + 100, + None, Some(3), + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsMateRescue = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID, + 100, + None, Some(3), + 0, 0, 1, Some(0), true, + sample = "tumor", + readId = 1L) + } + + val someMutsEndClusteredEnd = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 3) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = 100 - readID, + 100, + None, None, + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsStrandBiasNeg = for (readID <- 1 to 50) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 25) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID <= 25, + firstOfPair = true, + offsetInAlignment = readID * 10 % 100, + 100, + None, Some(-50), + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val someMutsStrandBiasPos = for (readID <- 1 to 50) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID <= 25) "A" else "C", //Mutants = A x 3 w/ scores (30,31,32) + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID > 25, + firstOfPair = true, + offsetInAlignment = readID * 10 % 100, + 100, + None, Some(-50), + 0, 0, 1, Some(0), false, + sample = "tumor", + readId = 1L) + } + + val normalAllC = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = "C", + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID * 10, + 100, + None, None, + 0, 0, 1, Some(0), false, + sample = "normal", + readId = 1L) + } + val normalHet = for (readID <- 1 to 10) yield { + AlleleObservation(pos = ReferencePosition("ctg", 0L), + length = 1, + allele = if (readID % 2 == 0) "C" else "A", + phred = 30 + (readID - 1), // 30 to 39 + mapq = Some(30), + onNegativeStrand = readID % 2 == 0, + firstOfPair = true, + offsetInAlignment = readID * 10, + 100, + None, None, + 0, 0, 1, Some(0), false, + sample = "normal", + readId = 1L) + } + + val noMutClean = allC ++ normalAllC + val noMutHet = allC ++ normalHet + val mutClean = someMuts ++ normalAllC + val mutHet = someMuts ++ normalHet + val mutEndClusterBeginning = someMutsEndClusteredBeginning ++ normalAllC + val mutEndClusterEnd = someMutsEndClusteredEnd ++ normalAllC + val mutHeavilyClipped = someMutsSoftClipped ++ normalAllC + val mutNearInsertion = someMutsNearInsertion ++ normalAllC + val mutNearDeletion = someMutsNearDeletion ++ normalAllC + val mutNoisyReads = someMutsNoisyReads ++ normalAllC + val mutMateRescue = someMutsMateRescue ++ normalAllC + val mutStrandBiasNeg = someMutsStrandBiasNeg ++ normalAllC + val mutStrandBiasPos = someMutsStrandBiasPos ++ normalAllC + val mutAllMutants = allMuts ++ normalAllC + + test("Sites with no mutations should not have any variants returned") { + val resultClean = mt.genotypeSite(pos, ref, noMutClean) + assert(resultClean.isEmpty, "Result should be empty") + val resultHet = mt.genotypeSite(pos, ref, noMutHet) + assert(resultHet.isEmpty, "Although normal is het, tumor has no mutants and there should be no result") + val resultEmptyTumor = mt.genotypeSite(pos, ref, normalHet) + assert(resultEmptyTumor.isEmpty, "A position with no tumor reads should not be classified somatic.") + } + + test("Sites with mutations should return a variant") { + val result = mt.genotypeSite(pos, ref, mutClean) + assert(result.isDefined, "Result should not be empty") + val variant = result.get.variant.variant + assert(variant.getAlternateAllele === "A", "Alternate allele should be a") + assert(variant.getReferenceAllele === "C", "Reference allele should be c") + assert(variant.getContig.getContigName === "ctg", "Contig should be 'ctg'") + assert(variant.getStart === 101l, "Variant should be at position 101") + assert(variant.getEnd === 102l, "Variant should be at position 101") + } + + test("Sites with tumor mutations that are heterozygous in normal should not return a variant") { + val result = mt.genotypeSite(pos, ref, mutHet) + assert(result.isEmpty, "Site should not be classified somatic, strong het in normal") + val resultMissingNormal = mt.genotypeSite(pos, ref, someMuts) + assert(resultMissingNormal.isEmpty, "When there is no normal coverage, the site should not be classified somatic") + } + + test("Mutations that cluster near beginning/end of alignment should be discarded") { + val beginning = mt.genotypeSite(pos, ref, mutEndClusterBeginning) + val end = mt.genotypeSite(pos, ref, mutEndClusterEnd) + assert(beginning.isEmpty, "Mutation clustering at beginning of the alignment should fail mutect") + assert(end.isEmpty, "Mutation clustering at end of the alignment should fail mutect") + + } + + test("Test noisy read filter") { + val result = mt.genotypeSite(pos, ref, mutNoisyReads) + assert(result.isEmpty) + } + + test("Test heavily clipped filter") { + val result = mt.genotypeSite(pos, ref, mutHeavilyClipped) + assert(result.isEmpty) + } + + test("Test near insertion filter") { + val result = mt.genotypeSite(pos, ref, mutNearInsertion) + assert(result.isEmpty) + } + + test("Test near deletion filter") { + val result = mt.genotypeSite(pos, ref, mutNearDeletion) + assert(result.isEmpty) + } + + test("Test mate rescue filter") { + val result = mt.genotypeSite(pos, ref, mutMateRescue) + assert(result.isEmpty) + } + + test("Test strand bias filter") { + val resultNeg = mt.genotypeSite(pos, ref, mutStrandBiasNeg) + val resultPos = mt.genotypeSite(pos, ref, mutStrandBiasPos) + assert(resultNeg.isEmpty, "Fails negative strand bias filter") + assert(resultPos.isEmpty, "Fails positive strand bias filter") + } + + test("Works even when all alleles are mutant") { + val result = mt.genotypeSite(pos, ref, mutAllMutants) + assert(result.isDefined) + } + +} diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/annotators/VariantCallingAnnotatorSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/annotators/VariantCallingAnnotatorSuite.scala index f92b156f..73c364be 100644 --- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/annotators/VariantCallingAnnotatorSuite.scala +++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/annotators/VariantCallingAnnotatorSuite.scala @@ -31,6 +31,10 @@ class VariantCallingAnnotatorSuite extends FunSuite { strand, true, 1, + 10, + None, None, + 0, 0, 1, None, + false, "mySample", 1L) } diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/postprocessing/FilterKnownSitesSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/postprocessing/FilterKnownSitesSuite.scala new file mode 100644 index 00000000..244a3b22 --- /dev/null +++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/postprocessing/FilterKnownSitesSuite.scala @@ -0,0 +1,51 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.avocado.postprocessing + +import org.bdgenomics.adam.models.SnpTable +import org.bdgenomics.adam.rich.RichVariant._ +import org.bdgenomics.formats.avro.{ Contig, Variant } +import org.scalatest.FunSuite + +class FilterKnownSitesSuite extends FunSuite { + + val snpTable = new SnpTable(Map("chr1" -> Set(10000L))) + val keepFilter = new FilterKnownSites(None, snpTable, true) + val discardFilter = new FilterKnownSites(None, snpTable, false) + val ctg = Contig.newBuilder() + .setContigName("chr1") + .build() + + test("process a known site correctly") { + val v = Variant.newBuilder() + .setContig(ctg) + .setStart(10000L) + .build() + assert(keepFilter.filterFn(v)) + assert(!discardFilter.filterFn(v)) + } + + test("process an unknown site correctly") { + val v = Variant.newBuilder() + .setContig(ctg) + .setStart(10001L) + .build() + assert(!keepFilter.filterFn(v)) + assert(discardFilter.filterFn(v)) + } +} diff --git a/avocado-sample-configs/keyed.properties b/avocado-sample-configs/keyed.properties new file mode 100644 index 00000000..0a788b1a --- /dev/null +++ b/avocado-sample-configs/keyed.properties @@ -0,0 +1,33 @@ +{ + KeyedReads = { + normalFileName = ../resources/NA12878_pre_normal.sorted.adam + } + readExplorer = { + } + mutectGenotyper = { + normalId = normal; + somaticId = tumor; + } + nonRef = { + } + + mapping_quality_filter = { + } + mate_rescue_filter = { + } + duplicateFilter = { + } + inputStage = KeyedReads; + + preprocessorNames = (mate_rescue_filter, duplicateFilter); + preprocessorAlgorithms = (MateRescueFilter,DuplicateFilter); + + explorerName = readExplorer; + explorerAlgorithm = ReadExplorer; + + genotyperName = mutectGenotyper; + genotyperAlgorithm = MutectGenotyper; + + postprocessorNames = ( nonRef ); + postprocessorAlgorithms = ( filterReferenceCalls ); +} diff --git a/docs/source/03_architecture.md b/docs/source/03_architecture.md index 16213b6b..e8df3bbc 100644 --- a/docs/source/03_architecture.md +++ b/docs/source/03_architecture.md @@ -188,3 +188,17 @@ average genome coverage. At most, one of the `absoluteDepth` or `relativeDepth` options can be specified. If neither are specified, we default to `relativeDepth = 0.75`. + +### Filter Known Sites + +This filter can be configured to either only keep variant calls that occur at known sites, +or to discard variant calls that occur at known sites. + +For configuration, the filter is named `filterKnownSites`. It has the following options: + +* `knownSiteFile`: The path to a VCF that describes the known sites. +* `datasetName`: This field is optional. If provided, it is used to describe where +the known sites are from (e.g., 1000G, dbSNP). +* `keepKnowns`: This field is optional and defaults to false. If true, we will keep all +sites that overlap a known variant. If false, we discard all sites that overlap a known +variant. \ No newline at end of file diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/05/temp_shuffle_360f7bf6-13e1-41c7-af74-71c5a648c959 b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/05/temp_shuffle_360f7bf6-13e1-41c7-af74-71c5a648c959 new file mode 100644 index 00000000..e69de29b diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/05/temp_shuffle_b6695f91-dc24-494f-b3c8-e15cea57eff1 b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/05/temp_shuffle_b6695f91-dc24-494f-b3c8-e15cea57eff1 new file mode 100644 index 00000000..e69de29b diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/07/temp_shuffle_24977519-1583-40be-8d4d-7232217b4b6b b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/07/temp_shuffle_24977519-1583-40be-8d4d-7232217b4b6b new file mode 100644 index 00000000..e69de29b diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/1f/temp_shuffle_19e43401-b742-4aa7-82be-7025483f4f52 b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/1f/temp_shuffle_19e43401-b742-4aa7-82be-7025483f4f52 new file mode 100644 index 00000000..e69de29b diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/27/temp_shuffle_92e6205a-f10e-4e86-a753-2f09cfbe3931 b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/27/temp_shuffle_92e6205a-f10e-4e86-a753-2f09cfbe3931 new file mode 100644 index 00000000..e69de29b diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/27/temp_shuffle_e93d3d46-3ad4-4fe6-bf81-6d7450bdbb11 b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/27/temp_shuffle_e93d3d46-3ad4-4fe6-bf81-6d7450bdbb11 new file mode 100644 index 00000000..e69de29b diff --git a/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/3e/temp_shuffle_5de0d875-5f60-4f93-b240-ce03f5a67817 b/tmp/spark-09171a3b-14ff-45ef-82f5-e98fc7ec2931/blockmgr-254e3048-f58c-499e-b65a-8791df92d7ff/3e/temp_shuffle_5de0d875-5f60-4f93-b240-ce03f5a67817 new file mode 100644 index 00000000..e69de29b