diff --git a/.gitignore b/.gitignore index 6b756767d02..b4ba88390c2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ *.swp *~ .DS_Store +.bsp .cache .classpath .ensime @@ -24,6 +25,8 @@ .settings /lib/ build/apache-maven* +build/sbt-launch-*.jar +build/sbt-config/repositories-local cache checkpoint conf/*.cmd diff --git a/.rat-excludes b/.rat-excludes index 8e359143bfa..2ed4a78fbd8 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -34,4 +34,5 @@ NOTICE* assets/** build/apache-maven-*/** build/scala-*/** +build/sbt-config/** **/benchmarks/** diff --git a/build/sbt b/build/sbt new file mode 100755 index 00000000000..8251f889e91 --- /dev/null +++ b/build/sbt @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SELF=$(cd $(dirname $0) && pwd) +. "$SELF/util.sh" + +# Check if repositories-local file exists, otherwise use repositories +SBT_REPOSITORIES_LOCAL_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories-local" +SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + +if [ -f "$SBT_REPOSITORIES_LOCAL_CONFIG" ]; then + SBT_OPTS="${SBT_OPTS:="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_LOCAL_CONFIG"}" +else + SBT_OPTS="${SBT_OPTS:="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG"}" +fi + +export SBT_OPTS + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" +declare -r default_sbt_opts="-Xss64m" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_default_mem, which is $(get_mem_opts $sbt_default_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/build/sbt-config/repositories b/build/sbt-config/repositories new file mode 100644 index 00000000000..4121b268283 --- /dev/null +++ b/build/sbt-config/repositories @@ -0,0 +1,16 @@ +[repositories] + local + local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext] + local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/} + # The system property value of `celeborn.sbt.default.artifact.repository` is + # fetched from the environment variable `DEFAULT_ARTIFACT_REPOSITORY` and + # assigned within the build/sbt-launch-lib.bash script. + private: ${celeborn.sbt.default.artifact.repository-file:///dev/null} + gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/ + maven-central + typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + bintray-typesafe-sbt-plugin-releases: https://dl.bintray.com/typesafe/sbt-plugins/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/ + typesafe-releases: https://repo.typesafe.com/typesafe/releases/ diff --git a/build/sbt-config/repositories-cn.template b/build/sbt-config/repositories-cn.template new file mode 100644 index 00000000000..5cbd50a9aa6 --- /dev/null +++ b/build/sbt-config/repositories-cn.template @@ -0,0 +1,16 @@ +# As a Chinese developer facing network issues, you can accelerate the download +# speed of bootstrap/plugin/dependencies jar packages by executing the +# following command to configure the mirror: +# +# ``` +# cp build/sbt-config/repositories-cn.template build/sbt-config/repositories-local +# ``` + +[repositories] + local + # The system property value of `celeborn.sbt.default.artifact.repository` is + # fetched from the environment variable `DEFAULT_ARTIFACT_REPOSITORY` and + # assigned within the build/sbt-launch-lib.bash script. + private: ${celeborn.sbt.default.artifact.repository-file:///dev/null} + aliyun-maven: https://maven.aliyun.com/nexus/content/groups/public/ + huawei-central: https://mirrors.huaweicloud.com/repository/maven/ diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..858bb2f80c7 --- /dev/null +++ b/build/sbt-launch-lib.bash @@ -0,0 +1,212 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles +declare sbt_default_mem=4096 + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + # DEFAULT_ARTIFACT_REPOSITORY env variable can be used to only fetch + # artifacts from internal repos only. + # Ex: + # DEFAULT_ARTIFACT_REPOSITORY=https://artifacts.internal.com/libs-release/ + if [ -n "$DEFAULT_ARTIFACT_REPOSITORY" ]; then + export SBT_OPTS="$SBT_OPTS -Dceleborn.sbt.default.artifact.repository=${DEFAULT_ARTIFACT_REPOSITORY}" + fi + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + JAR=build/sbt-launch-${SBT_VERSION}.jar + + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf "Attempting to fetch sbt\n" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-$sbt_default_mem} + local codecache=$(( $mem / 8 )) + (( $codecache > 128 )) || codecache=128 + (( $codecache < 2048 )) || codecache=2048 + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec (Compile / sourceManaged).value) + ) + + lazy val commonUnitTestDependencies = Seq( + "org.mockito" % "mockito-core" % "4.11.0" % "test", + "org.scalatest" %% "scalatest" % "3.2.16" % "test", + "junit" % "junit" % "4.12" % "test", + // https://www.scala-sbt.org/1.x/docs/Testing.html + "com.github.sbt" % "junit-interface" % "0.13.3" % "test") +} + +object CelebornBuild extends sbt.internal.BuildDef { + override def projectDefinitions(baseDirectory: File): Seq[Project] = { + Seq( + CelebornCommon.common, + CelebornClient.client, + CelebornService.service, + CelebornWorker.worker, + CelebornMaster.master) ++ maybeSparkClientModules + } + + // ThisBuild / parallelExecution := false + + // scalaVersion := "2.11.12" + + // autoScalaLibrary := false + + crossScalaVersions := Nil + + // load user-defined Profiles + // loadProfiles() +} + +object Utils { + val profiles = { + val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") + .orElse(Properties.propOrNone("sbt.maven.profiles")) match { + case None => Seq("sbt") + case Some(v) => + v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq + } + if (profiles.contains("jdwp-test-debug")) { + sys.props.put("test.jdwp.enabled", "true") + } + profiles + } + + val SPARK_VERSION = profiles.filter(_.startsWith("spark")).headOption + + lazy val sparkClientProjects = SPARK_VERSION match { + case Some("spark-2.4") => Some(Spark24) + case Some("spark-3.0") => Some(Spark30) + case Some("spark-3.1") => Some(Spark31) + case Some("spark-3.2") => Some(Spark32) + case Some("spark-3.3") => Some(Spark33) + case Some("spark-3.4") => Some(Spark34) + case _ => None + } + + lazy val maybeSparkClientModules: Seq[Project] = sparkClientProjects.map(_.modules).getOrElse(Seq.empty) + + def defaultScalaVersion(): String = { + // 1. Inherit the scala version of the spark project + // 2. if the spark profile not specified, using the DEFAULT_SCALA_VERSION + val v = sparkClientProjects.map(_.sparkProjectScalaVersion).getOrElse(DEFAULT_SCALA_VERSION) + require(ALL_SCALA_VERSIONS.contains(v), s"found not allow scala version: $v") + v + } +} + +object CelebornCommon { + lazy val common = Project("celeborn-common", file("common")) + .settings ( + commonSettings, + protoSettings, + libraryDependencies ++= Seq( + "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf", + "com.google.code.findbugs" % "jsr305" % findbugsVersion, + "com.google.guava" % "guava" % guavaVersion, + "commons-io" % "commons-io" % commonsIoVersion, + "io.dropwizard.metrics" % "metrics-core" % metricsVersion, + "io.dropwizard.metrics" % "metrics-graphite" % metricsVersion, + "io.dropwizard.metrics" % "metrics-jvm" % metricsVersion, + "io.netty" % "netty-all" % nettyVersion, + "org.apache.commons" % "commons-crypto" % commonsCryptoVersion, + "org.apache.commons" % "commons-lang3" % commonsLang3Version, + "org.apache.hadoop" % "hadoop-client-api" % hadoopVersion, + "org.apache.hadoop" % "hadoop-client-runtime" % hadoopVersion, + "org.apache.ratis" % "ratis-client" % ratisVersion, + "org.apache.ratis" % "ratis-common" % ratisVersion, + "org.fusesource.leveldbjni" % "leveldbjni-all" % leveldbJniVersion, + "org.roaringbitmap" % "RoaringBitmap" % roaringBitmapVersion, + "org.scala-lang" % "scala-reflect" % scalaVersion.value, + "org.slf4j" % "jcl-over-slf4j" % slf4jVersion, + "org.slf4j" % "jul-to-slf4j" % slf4jVersion, + "org.slf4j" % "slf4j-api" % slf4jVersion, + "org.yaml" % "snakeyaml" % snakeyamlVersion, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version % "test", + "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version % "test", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies, + + Compile / sourceGenerators += Def.task { + val file = (Compile / sourceManaged).value / "org" / "apache" / "celeborn" / "package.scala" + streams.value.log.info(s"geneate version information file ${file.toPath}") + IO.write(file, + s"""package org.apache + | + |package object celeborn { + | val VERSION = "${version.value}" + |} + |""".stripMargin) + Seq(file) + // generate version task depends on PB generate to avoid concurrency generate source files + }.dependsOn(Compile / PB.generate), + + // a task to show current profiles + printProfiles := { + val message = profiles.mkString("", " ", "") + println("compile with profiles: %s".format(message)) + } + ) + + lazy val printProfiles = taskKey[Unit]("Prints Profiles") +} + +object CelebornClient { + lazy val client = Project("celeborn-client", file("client")) + // ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies + .dependsOn(CelebornCommon.common % "test->test;compile->compile") + .settings ( + commonSettings, + libraryDependencies ++= Seq( + "io.netty" % "netty-all" % nettyVersion, + "com.google.guava" % "guava" % guavaVersion, + "org.lz4" % "lz4-java" % lz4JavaVersion, + "com.github.luben" % "zstd-jni" % zstdJniVersion, + "org.apache.commons" % "commons-lang3" % commonsLang3Version, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version % "test", + "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version % "test", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) +} + +object CelebornService { + lazy val service = Project("celeborn-service", file("service")) + .dependsOn(CelebornCommon.common) + .settings ( + commonSettings, + libraryDependencies ++= Seq( + "com.google.code.findbugs" % "jsr305" % findbugsVersion, + "commons-io" % "commons-io" % commonsIoVersion, + "io.netty" % "netty-all" % nettyVersion, + "javax.servlet" % "javax.servlet-api" % javaxServletVersion, + "org.apache.commons" % "commons-crypto" % commonsCryptoVersion, + "org.slf4j" % "slf4j-api" % slf4jVersion, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version % "test", + "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version % "test", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) +} + +object CelebornMaster { + lazy val master = Project("celeborn-master", file("master")) + .dependsOn(CelebornCommon.common, CelebornService.service) + .settings ( + commonSettings, + protoSettings, + libraryDependencies ++= Seq( + "com.google.guava" % "guava" % guavaVersion, + "com.google.protobuf" % "protobuf-java" % protoVersion, + "io.netty" % "netty-all" % nettyVersion, + "org.apache.hadoop" % "hadoop-client-api" % hadoopVersion, + "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version, + "org.apache.ratis" % "ratis-client" % ratisVersion, + "org.apache.ratis" % "ratis-common" % ratisVersion, + "org.apache.ratis" % "ratis-grpc" % ratisVersion, + "org.apache.ratis" % "ratis-netty" % ratisVersion, + "org.apache.ratis" % "ratis-server" % ratisVersion, + "org.apache.ratis" % "ratis-shell" % ratisVersion, + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) +} + +object CelebornWorker { + lazy val worker = Project("celeborn-worker", file("worker")) + .dependsOn(CelebornCommon.common, CelebornService.service) + .dependsOn(CelebornClient.client % "test->test;compile->compile") + .dependsOn(CelebornMaster.master % "test->test;compile->compile") + .settings ( + commonSettings, + libraryDependencies ++= Seq( + "com.google.guava" % "guava" % guavaVersion, + "commons-io" % "commons-io" % commonsIoVersion, + "io.netty" % "netty-all" % nettyVersion, + "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version, + "org.fusesource.leveldbjni" % "leveldbjni-all" % leveldbJniVersion, + "org.roaringbitmap" % "RoaringBitmap" % roaringBitmapVersion, + "org.mockito" %% "mockito-scala-scalatest" % scalatestMockitoVersion % "test", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) +} + +//////////////////////////////////////////////////////// +// Spark Client // +//////////////////////////////////////////////////////// + +object Spark24 extends SparkClientProjects { + + val sparkClientProjectPath = "client-spark/spark-2" + val sparkClientProjectName = "celeborn-client-spark-2" + val sparkClientShadedProjectPath = "client-spark/spark-2-shaded" + val sparkClientShadedProjectName = "celeborn-client-spark-2-shaded" + + // val jacksonVersion = "2.5.7" + // val jacksonDatabindVersion = "2.6.7.3" + val lz4JavaVersion = "1.4.0" + val sparkProjectScalaVersion = "2.11.12" + // scalaBinaryVersion + // val scalaBinaryVersion = "2.11" + val sparkVersion = "2.4.8" + val zstdJniVersion = "1.4.4-3" +} + +object Spark30 extends SparkClientProjects { + + val sparkClientProjectPath = "client-spark/spark-3" + val sparkClientProjectName = "celeborn-client-spark-3" + val sparkClientShadedProjectPath = "client-spark/spark-3-shaded" + val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded" + + val lz4JavaVersion = "1.7.1" + val sparkProjectScalaVersion = "2.12.10" + + val sparkVersion = "3.0.3" + val zstdJniVersion = "1.4.4-3" +} + +object Spark31 extends SparkClientProjects { + + val sparkClientProjectPath = "client-spark/spark-3" + val sparkClientProjectName = "celeborn-client-spark-3" + val sparkClientShadedProjectPath = "client-spark/spark-3-shaded" + val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded" + + val lz4JavaVersion = "1.7.1" + val sparkProjectScalaVersion = "2.12.10" + + val sparkVersion = "3.1.3" + val zstdJniVersion = "1.4.8-1" +} + +object Spark32 extends SparkClientProjects { + + val sparkClientProjectPath = "client-spark/spark-3" + val sparkClientProjectName = "celeborn-client-spark-3" + val sparkClientShadedProjectPath = "client-spark/spark-3-shaded" + val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded" + + val lz4JavaVersion = "1.7.1" + val sparkProjectScalaVersion = "2.12.15" + + val sparkVersion = "3.2.4" + val zstdJniVersion = "1.5.0-4" +} + +object Spark33 extends SparkClientProjects { + + val sparkClientProjectPath = "client-spark/spark-3" + val sparkClientProjectName = "celeborn-client-spark-3" + val sparkClientShadedProjectPath = "client-spark/spark-3-shaded" + val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded" + + // val jacksonVersion = "2.13.4" + // val jacksonDatabindVersion = "2.13.4.2" + val lz4JavaVersion = "1.8.0" + val sparkProjectScalaVersion = "2.12.15" + // scalaBinaryVersion + // val scalaBinaryVersion = "2.12" + val sparkVersion = "3.3.2" + val zstdJniVersion = "1.5.2-1" +} + +object Spark34 extends SparkClientProjects { + + val sparkClientProjectPath = "client-spark/spark-3" + val sparkClientProjectName = "celeborn-client-spark-3" + val sparkClientShadedProjectPath = "client-spark/spark-3-shaded" + val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded" + + val lz4JavaVersion = "1.8.0" + val sparkProjectScalaVersion = "2.12.17" + + val sparkVersion = "3.4.1" + val zstdJniVersion = "1.5.2-5" + + lazy val deps = Seq( + // Spark Use `log4j-slf4j2-impl` instead of `log4j-slf4j-impl` in SPARK-40511 + // to fix the error: + // ``` + // java.lang.NoSuchMethodError: org.apache.logging.slf4j.Log4jLoggerFactory.(Lorg/apache/logging/slf4j/Log4jMarkerFactory;)V + // ``` + "org.apache.logging.log4j" % "log4j-slf4j2-impl" % "2.19.0" % "test" + ) + + override def sparkCommon: Project = { + super.sparkCommon + .settings(libraryDependencies ++= deps) + } + + override def sparkClient: Project = { + super.sparkClient + .settings(libraryDependencies ++= deps) + } + + override def sparkIt: Project = { + super.sparkIt + .settings(libraryDependencies ++= deps) + } +} + +trait SparkClientProjects { + + val sparkClientProjectPath: String + val sparkClientProjectName: String + val sparkClientShadedProjectPath: String + val sparkClientShadedProjectName: String + + val lz4JavaVersion: String + val sparkProjectScalaVersion: String + val sparkVersion: String + val zstdJniVersion: String + + def modules: Seq[Project] = Seq(sparkCommon, sparkClient, sparkIt, sparkClientShade) + + def sparkCommon: Project = { + Project("celeborn-spark-common", file("client-spark/common")) + .dependsOn(CelebornCommon.common) + // ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies + .dependsOn(CelebornClient.client % "test->test;compile->compile") + .settings ( + commonSettings, + libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-core" % sparkVersion % "provided", + "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) + } + + def sparkClient: Project = { + Project(sparkClientProjectName, file(sparkClientProjectPath)) + .dependsOn(CelebornCommon.common, sparkCommon) + // ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies + .dependsOn(CelebornClient.client % "test->test;compile->compile") + .settings ( + commonSettings, + libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-core" % sparkVersion % "provided", + "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) + } + + def sparkIt: Project = { + Project("celeborn-spark-it", file("tests/spark-it")) + // ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies + .dependsOn(CelebornCommon.common % "test->test;compile->compile") + .dependsOn(CelebornClient.client % "test->test;compile->compile") + .dependsOn(CelebornMaster.master % "test->test;compile->compile") + .dependsOn(CelebornWorker.worker % "test->test;compile->compile") + .dependsOn(sparkClient % "test->test;compile->compile") + .settings ( + commonSettings, + libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-core" % sparkVersion % "test", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin( + "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ) ++ commonUnitTestDependencies + ) + } + + def sparkClientShade: Project = { + Project(sparkClientShadedProjectName, file(sparkClientShadedProjectPath)) + .dependsOn(sparkClient) + .settings ( + commonSettings, + + // align final shaded jar name with maven. + (assembly / assemblyJarName) := { + val extension = artifact.value.extension + s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension" + }, + + (assembly / test) := { }, + + (assembly / logLevel) := Level.Info, + + // Exclude `scala-library` from assembly. + (assembly / assemblyPackageScala / assembleArtifact) := false, + + (assembly / assemblyExcludedJars) := { + val cp = (assembly / fullClasspath).value + cp filter { v => + val name = v.data.getName + !(name.startsWith("celeborn-") || name.startsWith("protobuf-java-") || + name.startsWith("guava-") || name.startsWith("netty-") || name.startsWith("commons-lang3-")) + } + }, + + (assembly / assemblyShadeRules) := Seq( + ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll, + ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll, + ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll, + ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll + ), + + (assembly / assemblyMergeStrategy) := { + case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard + // Drop all proto files that are not needed as artifacts of the build. + case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard + case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard + // Drop netty jnilib + case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard + // rename netty native lib + case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename( _ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so" ) + case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename( _ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so" ) + case _ => MergeStrategy.first + } + ) + } +} diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 00000000000..41f6be16879 --- /dev/null +++ b/project/build.properties @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +sbt.version=1.9.3 diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 00000000000..d6d602ae1c6 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1") + +addSbtPlugin("com.thesamet" % "sbt-protoc" % "1.0.6") diff --git a/version.sbt b/version.sbt new file mode 100644 index 00000000000..699bb889ff8 --- /dev/null +++ b/version.sbt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ThisBuild / version := "0.4.0-SNAPSHOT"