From 9d78a74347d382a00503ec69c3fa22bb109ad569 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Tue, 10 Aug 2021 16:40:30 -0700 Subject: [PATCH] BDP-7396: TransportUdf Determine scala version in CoralSpark and return respective ivy link (#119) --- coral-spark/build.gradle | 3 +- coral-spark/spark_itest.gradle | 32 +++++ .../coral/spark/TransportableUDFMap.java | 115 ++++++++++++------ .../TransportableUDFMapTest.java | 70 +++++++++++ .../TransportableUDFMapTest.java | 64 ++++++++++ .../linkedin/coral/spark/CoralSparkTest.java | 2 +- gradle/dependencies.gradle | 12 +- 7 files changed, 254 insertions(+), 44 deletions(-) create mode 100644 coral-spark/spark_itest.gradle create mode 100644 coral-spark/src/spark3test/java/com.linkedin.coral.spark/TransportableUDFMapTest.java create mode 100644 coral-spark/src/sparktest/java/com.linkedin.coral.spark/TransportableUDFMapTest.java diff --git a/coral-spark/build.gradle b/coral-spark/build.gradle index 8eaacd20e..8d2d1fea1 100644 --- a/coral-spark/build.gradle +++ b/coral-spark/build.gradle @@ -1,4 +1,5 @@ apply from: "$rootDir/gradle/java-publication.gradle" +apply from: "spark_itest.gradle" dependencies { compile deps.'gson' @@ -6,6 +7,7 @@ dependencies { compile deps.'slf4j-log4j12' compile project(':coral-hive') + compileOnly deps.'spark'.'sql' testCompile(deps.'hive'.'hive-exec-core') { exclude group: 'org.apache.avro', module: 'avro-tools' // These exclusions are required to prevent duplicate classes since we include @@ -13,7 +15,6 @@ dependencies { exclude group: 'org.apache.calcite', module: 'calcite-core' exclude group: 'org.apache.calcite', module: 'calcite-avatica' } - testCompile deps.'hadoop'.'hadoop-mapreduce-client-core' testCompile deps.'kryo' } diff --git a/coral-spark/spark_itest.gradle b/coral-spark/spark_itest.gradle new file mode 100644 index 000000000..1b38dd333 --- /dev/null +++ b/coral-spark/spark_itest.gradle @@ -0,0 +1,32 @@ +def itests = ['spark', 'spark3'] + +for (String itest : itests) { + sourceSets { + create("${itest}test", { + java { + compileClasspath += main.output + runtimeClasspath += main.output + } + }) + } + + configurations { + getByName("${itest}testCompile").extendsFrom(testCompile) + getByName("${itest}testRuntime").extendsFrom(testRuntime) + } + + tasks.register("${itest}Test", Test) { + description = "Run ${itest} integration tests" + testClassesDirs = project.sourceSets.getByName("${itest}test").output.classesDirs + classpath = project.sourceSets.getByName("${itest}test").runtimeClasspath + shouldRunAfter test + useTestNG() + } + + check.dependsOn(tasks.getByName("${itest}Test")) +} + +dependencies { + sparktestCompile(deps.'spark'.'hive') + spark3testCompile(deps.'spark3'.'hive') +} diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/TransportableUDFMap.java b/coral-spark/src/main/java/com/linkedin/coral/spark/TransportableUDFMap.java index 0d4c0fa2c..5797d6a70 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/TransportableUDFMap.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/TransportableUDFMap.java @@ -6,14 +6,17 @@ package com.linkedin.coral.spark; import java.net.URI; -import java.net.URISyntaxException; +import java.util.Collections; import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; import java.util.Map; import java.util.Optional; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import com.linkedin.coral.spark.containers.SparkUDFInfo; +import com.linkedin.coral.spark.exceptions.UnsupportedUDFException; /** @@ -28,88 +31,103 @@ class TransportableUDFMap { private TransportableUDFMap() { } - private static final Map UDF_MAP = new HashMap(); - public static final String STANDARD_UDFS_DALI_UDFS_URL = - "ivy://com.linkedin.standard-udfs-dali-udfs:standard-udfs-dali-udfs:1.0.4?classifier=spark"; + private static final Logger LOG = LoggerFactory.getLogger(TransportableUDFMap.class); + private static final Map> UDF_MAP = new HashMap(); + public static final String DALI_UDFS_IVY_URL_SPARK_2_11 = + "ivy://com.linkedin.standard-udfs-dali-udfs:standard-udfs-dali-udfs:2.0.0?classifier=spark_2.11"; + public static final String DALI_UDFS_IVY_URL_SPARK_2_12 = + "ivy://com.linkedin.standard-udfs-dali-udfs:standard-udfs-dali-udfs:2.0.0?classifier=spark_2.12"; + + enum ScalaVersion { + SCALA_2_11, + SCALA_2_12 + } static { // LIHADOOP-48502: The following UDFs are the legacy Hive UDF. Since they have been converted to // Transport UDF, we point their class files to the corresponding Spark jar. add("com.linkedin.dali.udf.date.hive.DateFormatToEpoch", "com.linkedin.stdudfs.daliudfs.spark.DateFormatToEpoch", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.dali.udf.date.hive.EpochToDateFormat", "com.linkedin.stdudfs.daliudfs.spark.EpochToDateFormat", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.dali.udf.date.hive.EpochToEpochMilliseconds", - "com.linkedin.stdudfs.daliudfs.spark.EpochToEpochMilliseconds", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.EpochToEpochMilliseconds", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.dali.udf.isguestmemberid.hive.IsGuestMemberId", - "com.linkedin.stdudfs.daliudfs.spark.IsGuestMemberId", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.IsGuestMemberId", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); // LIHADOOP-49851 add the transportudf spark version for lookup UDF add("com.linkedin.dali.udf.istestmemberid.hive.IsTestMemberId", - "com.linkedin.stdudfs.daliudfs.spark.IsTestMemberId", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.IsTestMemberId", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.dali.udf.maplookup.hive.MapLookup", "com.linkedin.stdudfs.daliudfs.spark.MapLookup", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.dali.udf.sanitize.hive.Sanitize", "com.linkedin.stdudfs.daliudfs.spark.Sanitize", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); // LIHADOOP-49851 add the transportudf spark version for lookup UDF add("com.linkedin.dali.udf.watbotcrawlerlookup.hive.WATBotCrawlerLookup", - "com.linkedin.stdudfs.daliudfs.spark.WatBotCrawlerLookup", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.WatBotCrawlerLookup", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); // LIHADOOP-48502: The following UDFs are already defined using Transport UDF. // The class name is the corresponding Hive UDF. // We point their class files to the corresponding Spark jar file. add("com.linkedin.stdudfs.daliudfs.hive.DateFormatToEpoch", "com.linkedin.stdudfs.daliudfs.spark.DateFormatToEpoch", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.EpochToDateFormat", "com.linkedin.stdudfs.daliudfs.spark.EpochToDateFormat", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.EpochToEpochMilliseconds", - "com.linkedin.stdudfs.daliudfs.spark.EpochToEpochMilliseconds", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.EpochToEpochMilliseconds", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.GetProfileSections", - "com.linkedin.stdudfs.daliudfs.spark.GetProfileSections", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.GetProfileSections", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.stringudfs.hive.InitCap", "com.linkedin.stdudfs.stringudfs.spark.InitCap", - "ivy://com.linkedin.standard-udfs-common-sql-udfs:standard-udfs-string-udfs:0.0.7?classifier=spark"); + "ivy://com.linkedin.standard-udfs-common-sql-udfs:standard-udfs-string-udfs:0.0.7?classifier=spark", null); add("com.linkedin.stdudfs.daliudfs.hive.IsGuestMemberId", "com.linkedin.stdudfs.daliudfs.spark.IsGuestMemberId", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.IsTestMemberId", "com.linkedin.stdudfs.daliudfs.spark.IsTestMemberId", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.MapLookup", "com.linkedin.stdudfs.daliudfs.spark.MapLookup", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.PortalLookup", "com.linkedin.stdudfs.daliudfs.spark.PortalLookup", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.daliudfs.hive.Sanitize", "com.linkedin.stdudfs.daliudfs.spark.Sanitize", - STANDARD_UDFS_DALI_UDFS_URL); + DALI_UDFS_IVY_URL_SPARK_2_11, DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.stdudfs.userinterfacelookup.hive.UserInterfaceLookup", "com.linkedin.stdudfs.userinterfacelookup.spark.UserInterfaceLookup", - "ivy://com.linkedin.standard-udf-userinterfacelookup:userinterfacelookup-std-udf:0.0.9?classifier=spark"); + "ivy://com.linkedin.standard-udf-userinterfacelookup:userinterfacelookup-std-udf:0.0.9?classifier=spark", null); add("com.linkedin.stdudfs.daliudfs.hive.WatBotCrawlerLookup", - "com.linkedin.stdudfs.daliudfs.spark.WatBotCrawlerLookup", STANDARD_UDFS_DALI_UDFS_URL); + "com.linkedin.stdudfs.daliudfs.spark.WatBotCrawlerLookup", DALI_UDFS_IVY_URL_SPARK_2_11, + DALI_UDFS_IVY_URL_SPARK_2_12); add("com.linkedin.jemslookup.udf.hive.JemsLookup", "com.linkedin.jemslookup.udf.spark.JemsLookup", - "ivy://com.linkedin.jobs-udf:jems-udfs:1.0.0?classifier=spark"); + "ivy://com.linkedin.jobs-udf:jems-udfs:1.0.0?classifier=spark", null); add("com.linkedin.stdudfs.parsing.hive.UserAgentParser", "com.linkedin.stdudfs.parsing.spark.UserAgentParser", - "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:2.0.1?classifier=spark"); + "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:2.0.1?classifier=spark", null); add("com.linkedin.stdudfs.parsing.hive.Ip2Str", "com.linkedin.stdudfs.parsing.spark.Ip2Str", - "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:2.0.1?classifier=spark"); + "ivy://com.linkedin.standard-udfs-parsing:parsing-stdudfs:2.0.1?classifier=spark", null); } /** @@ -119,20 +137,37 @@ private TransportableUDFMap() { * @return Optional */ static Optional lookup(String className) { - return Optional.ofNullable(UDF_MAP.get(className)); + ScalaVersion scalaVersion = getScalaVersion(); + return Optional.ofNullable(UDF_MAP.get(className)).map(scalaMap -> Optional.ofNullable(scalaMap.get(scalaVersion)) + .orElseThrow(() -> new UnsupportedUDFException(String.format( + "Transport UDF for class '%s' is not supported for scala %s, please contact " + "the UDF owner for upgrade", + className, scalaVersion.toString())))); + } + + public static void add(String className, String sparkClassName, String artifactoryUrlSpark211, + String artifactoryUrlSpark212) { + Map scalaToTransportUdfMap = new HashMap() { + { + put(ScalaVersion.SCALA_2_11, artifactoryUrlSpark211 == null ? null : new SparkUDFInfo(sparkClassName, null, + Collections.singletonList(URI.create(artifactoryUrlSpark211)), SparkUDFInfo.UDFTYPE.TRANSPORTABLE_UDF)); + put(ScalaVersion.SCALA_2_12, artifactoryUrlSpark212 == null ? null : new SparkUDFInfo(sparkClassName, null, + Collections.singletonList(URI.create(artifactoryUrlSpark212)), SparkUDFInfo.UDFTYPE.TRANSPORTABLE_UDF)); + } + }; + UDF_MAP.put(className, scalaToTransportUdfMap); } - public static void add(String className, String sparkClassName, String artifcatoryUrl) { + static ScalaVersion getScalaVersion() { try { - URI url = new URI(artifcatoryUrl); - List listOfUris = new LinkedList<>(); - listOfUris.add(url); - - // Set the function name to null here because it is determined dynamically from the enclosing SqlOperand - UDF_MAP.put(className, - new SparkUDFInfo(sparkClassName, null, listOfUris, SparkUDFInfo.UDFTYPE.TRANSPORTABLE_UDF)); - } catch (URISyntaxException e) { - throw new RuntimeException(String.format("Artifactory URL is malformed %s", artifcatoryUrl), e); + String sparkVersion = SparkSession.active().version(); + if (sparkVersion.matches("2\\.[\\d\\.]*")) + return ScalaVersion.SCALA_2_11; + if (sparkVersion.matches("3\\.[\\d\\.]*")) + return ScalaVersion.SCALA_2_12; + throw new IllegalStateException(String.format("Unsupported Spark Version %s", sparkVersion)); + } catch (IllegalStateException | NoClassDefFoundError ex) { + LOG.warn("Couldn't determine Spark version, falling back to scala_2.11", ex); + return ScalaVersion.SCALA_2_11; } } diff --git a/coral-spark/src/spark3test/java/com.linkedin.coral.spark/TransportableUDFMapTest.java b/coral-spark/src/spark3test/java/com.linkedin.coral.spark/TransportableUDFMapTest.java new file mode 100644 index 000000000..8b85fc4dd --- /dev/null +++ b/coral-spark/src/spark3test/java/com.linkedin.coral.spark/TransportableUDFMapTest.java @@ -0,0 +1,70 @@ +/** + * Copyright 2018-2021 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.spark; + +import org.apache.spark.sql.SparkSession; +import org.testng.Assert; +import org.testng.annotations.Test; + +import com.linkedin.coral.spark.exceptions.UnsupportedUDFException; + + +public class TransportableUDFMapTest { + + @Test + public void testScalaVersionWithSparkSession() { + SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]") + .enableHiveSupport().getOrCreate(); + Assert.assertEquals(TransportableUDFMap.getScalaVersion(), TransportableUDFMap.ScalaVersion.SCALA_2_12); + ss.close(); + } + + @Test + public void testDefaultScalaVersion() { + // If SparkSession is not active, getScalaVersion should return Scala2.11 + Assert.assertEquals(TransportableUDFMap.getScalaVersion(), TransportableUDFMap.ScalaVersion.SCALA_2_11); + } + + @Test + public void testLookupWithSparkSession() { + SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]") + .enableHiveSupport().getOrCreate(); + String stdDaliUdf = "com.linkedin.dali.udf.date.hive.EpochToDateFormat"; + Assert.assertTrue(TransportableUDFMap.lookup(stdDaliUdf).get().getArtifactoryUrls().get(0).toString() + .contains("classifier=spark_2.12")); + ss.close(); + } + + @Test + public void testDefaultLookup() { + // If SparkSession is not active, lookup should return Scala2.11 + String stdDaliUdf = "com.linkedin.dali.udf.date.hive.EpochToDateFormat"; + Assert.assertTrue(TransportableUDFMap.lookup(stdDaliUdf).get().getArtifactoryUrls().get(0).toString() + .contains("classifier=spark_2.11")); + } + + @Test + public void testLookupDoesNotExist() { + String stdClassName = "com.linkedin.dali.udf.date.hive.UdfDoesNotExist"; + Assert.assertFalse(TransportableUDFMap.lookup(stdClassName).isPresent()); + } + + @Test + public void testLookupExistButNotReadyForSpark3() { + SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]") + .enableHiveSupport().getOrCreate(); + try { + String stdClassName = "com.transport.test.hive.Spark3Fail"; + TransportableUDFMap.add(stdClassName, "com.transport.test.spark.Spark3Fail", + "com.transport:spark-udf:0.0.0?classifier=spark", null); + TransportableUDFMap.lookup(stdClassName); + } catch (Exception ex) { + Assert.assertTrue(ex instanceof UnsupportedUDFException); + } finally { + ss.close(); + } + } +} diff --git a/coral-spark/src/sparktest/java/com.linkedin.coral.spark/TransportableUDFMapTest.java b/coral-spark/src/sparktest/java/com.linkedin.coral.spark/TransportableUDFMapTest.java new file mode 100644 index 000000000..cf2cbcf58 --- /dev/null +++ b/coral-spark/src/sparktest/java/com.linkedin.coral.spark/TransportableUDFMapTest.java @@ -0,0 +1,64 @@ +/** + * Copyright 2018-2021 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.spark; + +import org.apache.spark.sql.SparkSession; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class TransportableUDFMapTest { + + @Test + public void testScalaVersionWithSparkSession() { + SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]") + .enableHiveSupport().getOrCreate(); + Assert.assertEquals(TransportableUDFMap.getScalaVersion(), TransportableUDFMap.ScalaVersion.SCALA_2_11); + ss.close(); + } + + @Test + public void testDefaultScalaVersion() { + // If SparkSession is not active, getScalaVersion should return Scala2.11 + Assert.assertEquals(TransportableUDFMap.getScalaVersion(), TransportableUDFMap.ScalaVersion.SCALA_2_11); + } + + @Test + public void testLookupWithSparkSession() { + SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]") + .enableHiveSupport().getOrCreate(); + String stdDaliUdf = "com.linkedin.dali.udf.date.hive.EpochToDateFormat"; + Assert.assertTrue(TransportableUDFMap.lookup(stdDaliUdf).get().getArtifactoryUrls().get(0).toString() + .contains("classifier=spark_2.11")); + ss.close(); + } + + @Test + public void testDefaultLookup() { + // If SparkSession is not active, lookup should return Scala2.11 + String stdDaliUdf = "com.linkedin.dali.udf.date.hive.EpochToDateFormat"; + Assert.assertTrue(TransportableUDFMap.lookup(stdDaliUdf).get().getArtifactoryUrls().get(0).toString() + .contains("classifier=spark_2.11")); + } + + @Test + public void testLookupDoesNotExist() { + String stdClassName = "com.linkedin.dali.udf.date.hive.UdfDoesNotExist"; + Assert.assertFalse(TransportableUDFMap.lookup(stdClassName).isPresent()); + } + + @Test + public void testLookupExistButNotReadyForSpark3() { + SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]") + .enableHiveSupport().getOrCreate(); + String stdClassName = "com.transport.test.hive.Spark3Fail"; + TransportableUDFMap.add(stdClassName, "com.transport.test.spark.Spark3Fail", + "com.transport:spark-udf:0.0.0?classifier=spark_2.11", null); + assert TransportableUDFMap.lookup(stdClassName).get().getArtifactoryUrls().get(0).toString() + .contains("classifier=spark_2.11"); + ss.close(); + } +} diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index 820b0480b..34d03a18c 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -49,7 +49,7 @@ public void beforeClass() throws HiveException, MetaException { UnsupportedHiveUDFsInSpark.add("com.linkedin.coral.hive.hive2rel.CoralTestUnsupportedUDF"); TransportableUDFMap.add("com.linkedin.coral.hive.hive2rel.CoralTestUDF", "com.linkedin.coral.spark.CoralTestUDF", - "ivy://com.linkedin.coral.spark.CoralTestUDF"); + "ivy://com.linkedin.coral.spark.CoralTestUDF", null); } @Test diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index ccade4cb6..e16a439bb 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -12,7 +12,9 @@ def versions = [ 'hive': '1.2.2', 'hadoop': '2.7.0', 'pig': '0.15.0', - 'ivy': '2.4.0' + 'ivy': '2.4.0', + 'spark': '2.4.0', + 'spark3': '3.1.1' ] ext.deps = [ @@ -33,7 +35,6 @@ ext.deps = [ 'hive-metastore': "org.apache.hive:hive-metastore:${versions['hive']}", 'hive-exec-core': "org.apache.hive:hive-exec:${versions['hive']}:core", 'hive-common': "org.apache.hive:hive-common:${versions['hive']}" - ], 'hadoop': [ 'hadoop-common': "org.apache.hadoop:hadoop-common:${versions['hadoop']}", @@ -43,5 +44,12 @@ ext.deps = [ 'pig': [ 'pig': "org.apache.pig:pig:${versions['pig']}:h2", 'pigunit': "org.apache.pig:pigunit:${versions['pig']}" + ], + 'spark': [ + 'sql': "org.apache.spark:spark-sql_2.11:${versions['spark']}", + 'hive': "org.apache.spark:spark-hive_2.11:${versions['spark']}" + ], + 'spark3': [ + 'hive': "org.apache.spark:spark-hive_2.12:${versions['spark3']}" ] ]