Skip to content

Commit

Permalink
Merge pull request #228 from Ferlab-Ste-Justine/enrich-variant-checkp…
Browse files Browse the repository at this point in the history
…oint

feat: Add more checkpoint in enrich variants
  • Loading branch information
jecos authored May 23, 2024
2 parents 1d72c98 + 7cdeb97 commit 15e73c0
Showing 1 changed file with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ case class Variants(rc: RuntimeETLContext, participantId: Column = col("particip
val variantsCheckpoint = if (checkpoint) variants.checkpoint() else variants

variantsCheckpoint
.withFrequencies(participantId, affectedStatus, snv, splits)
.withFrequencies(participantId, affectedStatus, snv, splits, checkpoint)
.withPopulations(data(thousand_genomes.id), data(topmed_bravo.id), data(gnomad_genomes_v2.id), data(gnomad_exomes_v2.id), data(gnomad_genomes_v3.id))
.withDbSNP(data(dbsnp.id))
.withClinvar(data(clinvar.id))
Expand Down Expand Up @@ -206,11 +206,15 @@ object Variants {
}


def withFrequencies(participantId: Column, affectedStatus: Column, snv: DataFrame, splits: Seq[OccurrenceSplit]): DataFrame = splits match {
def withFrequencies(participantId: Column, affectedStatus: Column, snv: DataFrame, splits: Seq[OccurrenceSplit], checkpoint: Boolean = false): DataFrame = splits match {
case Nil => df
case _ =>
val variantWithFreq = snv.split(participantId = participantId, affectedStatus = affectedStatus, splits)
df.joinByLocus(variantWithFreq, "left")
if (checkpoint) {
df.joinByLocus(variantWithFreq.checkpoint(), "left").checkpoint()
} else {
df.joinByLocus(variantWithFreq, "left")
}
}

def withSpliceAi(spliceai: DataFrame)(implicit spark: SparkSession): DataFrame = {
Expand Down

0 comments on commit 15e73c0

Please sign in to comment.