Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CELEBORN-836][BUILD] Initial support sbt #1757

Closed
wants to merge 44 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
014dded
sbt
cfmcgrady Jun 26, 2023
ed700ce
pure sbt
cfmcgrady Jul 20, 2023
847404b
add spark 3 client
cfmcgrady Jul 24, 2023
5bdc2cc
CelebornBuild.scala
cfmcgrady Jul 24, 2023
42e0893
refactor to support spark 2.4
cfmcgrady Jul 24, 2023
b37570b
auto set scalaVersion
cfmcgrady Jul 24, 2023
40396eb
refactor SparkClientProjects
cfmcgrady Jul 24, 2023
dd0eb5e
set lz4/zstd version
cfmcgrady Jul 24, 2023
aa01dd8
enabled junit
cfmcgrady Jul 24, 2023
8da3e32
commonUnitTestDependencies
cfmcgrady Jul 24, 2023
0cf7dc1
ignore sbt-launch-*.jar
cfmcgrady Jul 25, 2023
8e638b2
add default repositories
cfmcgrady Jul 25, 2023
2996822
add license header
cfmcgrady Jul 25, 2023
3cea3e4
revert client-spark/common/pom.xml
cfmcgrady Jul 25, 2023
0313dd3
add license header
cfmcgrady Jul 25, 2023
19f9845
add license header
cfmcgrady Jul 25, 2023
3161a8b
extract dependencies version
cfmcgrady Jul 25, 2023
23b64e6
spark-3.0 support
cfmcgrady Jul 25, 2023
65f688d
spark-3.1 support
cfmcgrady Jul 25, 2023
b7db214
spark-3.2 support
cfmcgrady Jul 25, 2023
0dc5c83
spark-3.4 support
cfmcgrady Jul 25, 2023
bc9bf27
addresse comment
cfmcgrady Jul 25, 2023
af73dae
add worker module
cfmcgrady Jul 25, 2023
8de82fd
add spark-it module
cfmcgrady Jul 25, 2023
e6022e2
revert common/src/test/scala/org/apache/celeborn/common/CelebornConfS…
cfmcgrady Jul 25, 2023
40af990
Xmx2048m -> Xmx4g && add missing deps
cfmcgrady Jul 25, 2023
913bdd7
rat exclude repositories
cfmcgrady Jul 25, 2023
55592f3
fix spark-3.4 CI
cfmcgrady Jul 25, 2023
3b41963
fix spark-common CI for spark 3.4
cfmcgrady Jul 25, 2023
2e6852a
address comment
cfmcgrady Jul 25, 2023
9980561
order
cfmcgrady Jul 25, 2023
5a9aa09
fix project version
cfmcgrady Jul 25, 2023
cb7bec1
revert common/src/main/java/org/apache/celeborn/common/network/protoc…
cfmcgrady Jul 25, 2023
26edf32
address comment
cfmcgrady Jul 25, 2023
bc9a89e
add CN repositories
cfmcgrady Jul 26, 2023
8f16921
add repositories-cn.template
cfmcgrady Jul 26, 2023
4b2dcea
address comment
cfmcgrady Jul 26, 2023
668577e
add DEFAULT_ARTIFACT_REPOSITORY to repositories
cfmcgrady Jul 26, 2023
8f21700
address comment
cfmcgrady Jul 26, 2023
48a1aa9
set system property sbt.celeborn.default.artifact.repository
cfmcgrady Jul 26, 2023
5c8507d
correct path
cfmcgrady Jul 27, 2023
960a4d5
align final shaded jar name with maven
cfmcgrady Jul 27, 2023
bb161d7
add `celeborn-` prefix for project name
cfmcgrady Jul 27, 2023
fab0da0
address comment
cfmcgrady Jul 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
.settings
/lib/
build/apache-maven*
build/sbt-launch-*.jar
cache
checkpoint
conf/*.cmd
Expand Down
152 changes: 152 additions & 0 deletions build/sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

SELF=$(cd $(dirname $0) && pwd)
. "$SELF/util.sh"

# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
# that we can run Hive to generate the golden answer. This is not required for normal development
# or testing.
if [ -n "$HIVE_HOME" ]; then
for i in "$HIVE_HOME"/lib/*
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
done
export HADOOP_CLASSPATH
fi

SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories"
export SBT_OPTS="${SBT_OPTS:="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG"}"

. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash


declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
declare -r default_sbt_opts="-Xss64m"

usage() {
cat <<EOM
Usage: $script_name [options]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this is not the same as sbt official options, we may want to migrate it to the official one in the future.


-h | -help print this message
-v | -verbose this runner is chattier
-d | -debug set sbt log level to debug
-no-colors disable ANSI color codes
-sbt-create start sbt even if current directory contains no sbt project
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
-mem <integer> set memory options (default: $sbt_default_mem, which is $(get_mem_opts $sbt_default_mem))
-no-share use all local caches; no sharing
-no-global uses global caches, but does not use global ~/.sbt directory.
-jvm-debug <port> Turn on JVM debugging, open at the given port.
-batch Disable interactive mode

# sbt version (default: from project/build.properties if present, else latest release)
-sbt-version <version> use the specified version of sbt
-sbt-jar <path> use the specified jar as the sbt launcher
-sbt-rc use an RC version of sbt
-sbt-snapshot use a snapshot version of sbt

# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
-java-home <path> alternate JAVA_HOME

# jvm options and output control
JAVA_OPTS environment variable, if unset uses "$java_opts"
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
.sbtopts if this file exists in the current directory, it is
prepended to the runner args
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
-Dkey=val pass -Dkey=val directly to the java runtime
-J-X pass option -X directly to the java runtime
(-J is stripped)
-S-X add -X to sbt's scalacOptions (-S is stripped)
-PmavenProfiles Enable a maven profile for the build.

In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.
EOM
}

process_my_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-no-share) addJava "$noshare_opts" && shift ;;
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-batch) exec </dev/null && shift ;;

-sbt-create) sbt_create=true && shift ;;

*) addResidual "$1" && shift ;;
esac
done

# Now, ensure sbt version is used.
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
}

loadConfigFile() {
cat "$1" | sed '/^\#/d'
}

# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"

exit_status=127
saved_stty=""

restoreSttySettings() {
stty $saved_stty
saved_stty=""
}

onExit() {
if [[ "$saved_stty" != "" ]]; then
restoreSttySettings
fi
exit $exit_status
}

saveSttySettings() {
saved_stty=$(stty -g 2>/dev/null)
if [[ ! $? ]]; then
saved_stty=""
fi
}

if [ ! -z "${SPARK_LOCAL_HOSTNAME}" ]; then
echo "Using SPARK_LOCAL_HOSTNAME=$SPARK_LOCAL_HOSTNAME" 1>&2
fi
if [ ! -z "${SPARK_LOCAL_IP}" ]; then
echo "Using SPARK_LOCAL_IP=$SPARK_LOCAL_IP" 1>&2
fi

saveSttySettings
trap onExit INT

run "$@"

exit_status=$?
onExit
12 changes: 12 additions & 0 deletions build/sbt-config/repositories
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[repositories]
local
local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext]
local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}
gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/
maven-central
typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly
sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly
sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
bintray-typesafe-sbt-plugin-releases: https://dl.bintray.com/typesafe/sbt-plugins/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/
typesafe-releases: https://repo.typesafe.com/typesafe/releases/
209 changes: 209 additions & 0 deletions build/sbt-launch-lib.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# A library to simplify using the SBT launcher from other packages.
# Note: This should be used by tools like giter8/conscript etc.

# TODO - Should we merge the main SBT script with this library?

if test -z "$HOME"; then
declare -r script_dir="$(dirname "$script_path")"
else
declare -r script_dir="$HOME/.sbt"
fi

declare -a residual_args
declare -a java_args
declare -a scalac_args
declare -a sbt_commands
declare -a maven_profiles
declare sbt_default_mem=4096

if test -x "$JAVA_HOME/bin/java"; then
echo -e "Using $JAVA_HOME as default JAVA_HOME."
echo "Note, this will be overridden by -java-home if it is set."
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi

echoerr () {
echo 1>&2 "$@"
}
vlog () {
[[ $verbose || $debug ]] && echoerr "$@"
}
dlog () {
[[ $debug ]] && echoerr "$@"
}

acquire_sbt_jar () {
SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
# DEFAULT_ARTIFACT_REPOSITORY env variable can be used to only fetch
# artifacts from internal repos only.
# Ex:
# DEFAULT_ARTIFACT_REPOSITORY=https://artifacts.internal.com/libs-release/
URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
JAR=build/sbt-launch-${SBT_VERSION}.jar

sbt_jar=$JAR

if [[ ! -f "$sbt_jar" ]]; then
# Download sbt launch jar if it hasn't been downloaded yet
if [ ! -f "${JAR}" ]; then
# Download
printf "Attempting to fetch sbt\n"
JAR_DL="${JAR}.part"
if [ $(command -v curl) ]; then
curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\
mv "${JAR_DL}" "${JAR}"
elif [ $(command -v wget) ]; then
wget --quiet ${URL1} -O "${JAR_DL}" &&\
mv "${JAR_DL}" "${JAR}"
else
printf "You do not have curl or wget installed, please install sbt manually from https://www.scala-sbt.org/\n"
exit -1
fi
fi
if [ ! -f "${JAR}" ]; then
# We failed to download
printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from https://www.scala-sbt.org/\n"
exit -1
fi
printf "Launching sbt from ${JAR}\n"
fi
}

execRunner () {
# print the arguments one to a line, quoting any containing spaces
[[ $verbose || $debug ]] && echo "# Executing command line:" && {
for arg; do
if printf "%s\n" "$arg" | grep -q ' '; then
printf "\"%s\"\n" "$arg"
else
printf "%s\n" "$arg"
fi
done
echo ""
}

"$@"
}

addJava () {
dlog "[addJava] arg = '$1'"
java_args=( "${java_args[@]}" "$1" )
}

enableProfile () {
dlog "[enableProfile] arg = '$1'"
maven_profiles=( "${maven_profiles[@]}" "$1" )
export SBT_MAVEN_PROFILES="${maven_profiles[@]}"
}

addSbt () {
dlog "[addSbt] arg = '$1'"
sbt_commands=( "${sbt_commands[@]}" "$1" )
}
addResidual () {
dlog "[residual] arg = '$1'"
residual_args=( "${residual_args[@]}" "$1" )
}
addDebugger () {
addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1"
}

# a ham-fisted attempt to move some memory settings in concert
# so they need not be dicked around with individually.
get_mem_opts () {
local mem=${1:-$sbt_default_mem}
local codecache=$(( $mem / 8 ))
(( $codecache > 128 )) || codecache=128
(( $codecache < 2048 )) || codecache=2048

echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m"
}

require_arg () {
local type="$1"
local opt="$2"
local arg="$3"
if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
echo "$opt requires <$type> argument" 1>&2
exit 1
fi
}

is_function_defined() {
declare -f "$1" > /dev/null
}

process_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-h|-help) usage; exit 1 ;;
-v|-verbose) verbose=1 && shift ;;
-d|-debug) debug=1 && shift ;;

-ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
-mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
-jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
-batch) exec </dev/null && shift ;;

-sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
-sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;;
-java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;

-D*) addJava "$1" && shift ;;
-J*) addJava "${1:2}" && shift ;;
-P*) enableProfile "$1" && shift ;;
*) addResidual "$1" && shift ;;
esac
done

is_function_defined process_my_args && {
myargs=("${residual_args[@]}")
residual_args=()
process_my_args "${myargs[@]}"
}
}

run() {
# no jar? download it.
[[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
# still no jar? uh-oh.
echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
exit 1
}

# process the combined args, then reset "$@" to the residuals
process_args "$@"
set -- "${residual_args[@]}"
argumentCount=$#

# run sbt
execRunner "$java_cmd" \
$(get_mem_opts $sbt_mem) \
${SBT_OPTS:-$default_sbt_opts} \
${java_opts} \
${java_args[@]} \
-jar "$sbt_jar" \
"${sbt_commands[@]}" \
"${residual_args[@]}"
}
Loading