Skip to content

Commit

Permalink
Merge pull request #258 from Ferlab-Ste-Justine/fix/clin-2536
Browse files Browse the repository at this point in the history
fix: CLIN-2536 use Glow to register spark before read
  • Loading branch information
meek0 authored Jan 8, 2025
2 parents 430715c + c872357 commit a508dc3
Showing 1 changed file with 3 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import bio.ferlab.datalake.commons.config.{DatasetConf, RepartitionByRange, Runt
import bio.ferlab.datalake.spark3.etl.v4.SimpleETLP
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits._
import bio.ferlab.datalake.spark3.implicits.GenomicImplicits.columns._
import io.projectglow.Glow
import mainargs.{ParserForMethods, main}
import org.apache.spark.sql.DataFrame

Expand All @@ -16,7 +17,8 @@ case class GnomadV4(rc: RuntimeETLContext) extends SimpleETLP(rc) {

override def extract(lastRunValue: LocalDateTime = minValue,
currentRunValue: LocalDateTime = LocalDateTime.now()): Map[String, DataFrame] = {
Map(gnomad_vcf.id -> gnomad_vcf.read)
val sess = Glow.register(spark)
Map(gnomad_vcf.id -> sess.read.format("vcf").load(gnomad_vcf.location))
}

override def transformSingle(data: Map[String, DataFrame],
Expand Down

0 comments on commit a508dc3

Please sign in to comment.