From 06c233fdab252ad6051893ee5f4d8a33424eef32 Mon Sep 17 00:00:00 2001 From: Kunal Gosar Date: Tue, 24 Oct 2017 17:25:53 -0700 Subject: [PATCH] SparkSession Creation fix for compatibility with pySpark (#21) --- bin/pygnocchi | 6 +++--- .../fnothaft/gnocchi/sql/GnocchiSession.scala | 17 +++++++++++++++-- .../bdgenomics/gnocchi/gnocchiSession.py | 10 +++++----- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/bin/pygnocchi b/bin/pygnocchi index 11efe426..b3808ae1 100644 --- a/bin/pygnocchi +++ b/bin/pygnocchi @@ -30,14 +30,14 @@ else ASSEMBLY_DIR="$SCRIPT_DIR/gnocchi-assembly/target" fi -num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^gnocchi-assembly_[0-9A-Za-z\.-]*\.jar$" | grep -v javadoc | grep -v sources | wc -l)" +num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^gnocchi[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources | wc -l)" if [ "$num_jars" -eq "0" ]; then - echo "Failed to find Gnocchi assembly in $ASSEMBLY_DIR." 1>&2 + echo "Failed to find Gnocchi cli assembly in $ASSEMBLY_DIR." 1>&2 echo "You need to build Gnocchi before running this program." 1>&2 exit 1 fi -ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^gnocchi-assembly_[0-9A-Za-z\.-]*\.jar$" | grep -v javadoc | grep -v sources || true)" +ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^gnocchi[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources || true)" if [ "$num_jars" -gt "1" ]; then echo "Found multiple Gnocchi cli assembly jars in $ASSEMBLY_DIR:" 1>&2 echo "$ASSEMBLY_JARS" 1>&2 diff --git a/gnocchi-core/src/main/scala/net/fnothaft/gnocchi/sql/GnocchiSession.scala b/gnocchi-core/src/main/scala/net/fnothaft/gnocchi/sql/GnocchiSession.scala index 82cee004..ac349fdf 100755 --- a/gnocchi-core/src/main/scala/net/fnothaft/gnocchi/sql/GnocchiSession.scala +++ b/gnocchi-core/src/main/scala/net/fnothaft/gnocchi/sql/GnocchiSession.scala @@ -32,9 +32,22 @@ import scala.collection.JavaConversions._ object GnocchiSession { // Add GnocchiContext methods - implicit def sparkContextToGnocchiContext(sc: SparkContext): GnocchiSession = + implicit def sparkContextToGnocchiSession(sc: SparkContext): GnocchiSession = new GnocchiSession(sc) + /** + * Creates a GnocchiSession from SparkSession. Sets the active session to the + * input session. + * + * + * @param ss SparkSession + * @return GnocchiSession + */ + def GnocchiSessionFromSession(ss: SparkSession): GnocchiSession = { + SparkSession.setActiveSession(ss) + new GnocchiSession(ss.sparkContext) + } + } class GnocchiSession(@transient val sc: SparkContext) extends Serializable with Logging { @@ -293,4 +306,4 @@ class GnocchiSession(@transient val sc: SparkContext) extends Serializable with phenoCovarDF.withColumn("phenoName", lit(phenoName)).as[Phenotype].collect().map(x => (x.sampleId, x)).toMap } -} \ No newline at end of file +} diff --git a/gnocchi-python/bdgenomics/gnocchi/gnocchiSession.py b/gnocchi-python/bdgenomics/gnocchi/gnocchiSession.py index 7e278460..07ecced2 100644 --- a/gnocchi-python/bdgenomics/gnocchi/gnocchiSession.py +++ b/gnocchi-python/bdgenomics/gnocchi/gnocchiSession.py @@ -20,11 +20,11 @@ class GnocchiSession(object): """ """ - def __init__(self, sc): - self._sc = sc - self._jvm = sc._jvm - session = sc._jvm.net.fnothaft.gnocchi.sql.GnocchiSession(sc._jsc.sc()) - self.__jgs = sc._jvm.net.fnothaft.gnocchi.api.java.JavaGnocchiSession(session) + def __init__(self, ss): + self._sc = ss.sparkContext + self._jvm = self._sc._jvm + session = self._jvm.net.fnothaft.gnocchi.sql.GnocchiSession.GnocchiSessionFromSession(ss._jsparkSession) + self.__jgs = self._jvm.net.fnothaft.gnocchi.api.java.JavaGnocchiSession(session) def filterSamples(self, genotypesDataset, mind, ploidy): dataset = self.__jgs.filterSamples(genotypesDataset.get(), mind, ploidy)