Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUDI-5387] Add bundle validation for hudi-cli-bundle #12882

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions packaging/bundle-validation/ci_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,12 @@ if [ -z "$STAGING_REPO_NUM" ] && [ -z "$MAVEN_BASE_URL" ]; then
echo 'Adding built bundle jars for validation'
if [[ "$SCALA_PROFILE" != 'scala-2.13' ]]; then
# For Scala 2.13, Flink is not support, so skipping the Flink bundle validation
cp ${GITHUB_WORKSPACE}/packaging/hudi-cli-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-flink-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-kafka-connect-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-metaserver-server-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
fi
cp ${GITHUB_WORKSPACE}/packaging/hudi-cli-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-hadoop-mr-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-spark-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
cp ${GITHUB_WORKSPACE}/packaging/hudi-utilities-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/
Expand All @@ -145,27 +147,33 @@ if [ -z "$STAGING_REPO_NUM" ] && [ -z "$MAVEN_BASE_URL" ]; then
else
echo 'Adding environment variables for bundles in the release candidate or artifact'

HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle
HUDI_HADOOP_MR_BUNDLE_NAME=hudi-hadoop-mr-bundle
HUDI_KAFKA_CONNECT_BUNDLE_NAME=hudi-kafka-connect-bundle
HUDI_METASERVER_SERVER_BUNDLE_NAME=hudi-metaserver-server-bundle

if [[ ${SPARK_PROFILE} == 'spark3.3' ]]; then
HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12
HUDI_SPARK_BUNDLE_NAME=hudi-spark3.3-bundle_2.12
HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
elif [[ ${SPARK_PROFILE} == 'spark3.4' ]]; then
HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12
HUDI_SPARK_BUNDLE_NAME=hudi-spark3.4-bundle_2.12
HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
elif [[ ${SPARK_PROFILE} == 'spark3.5' && ${SCALA_PROFILE} == 'scala-2.12' ]]; then
HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12
HUDI_SPARK_BUNDLE_NAME=hudi-spark3.5-bundle_2.12
HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
elif [[ ${SPARK_PROFILE} == 'spark3.5' && ${SCALA_PROFILE} == 'scala-2.13' ]]; then
HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.13
HUDI_SPARK_BUNDLE_NAME=hudi-spark3.5-bundle_2.13
HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.13
HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.13
elif [[ ${SPARK_PROFILE} == 'spark3' ]]; then
HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12
HUDI_SPARK_BUNDLE_NAME=hudi-spark3-bundle_2.12
HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12
HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12
Expand All @@ -188,6 +196,7 @@ else
fi

echo "Downloading bundle jars from base URL - $REPO_BASE_URL ..."
wget -q $REPO_BASE_URL/$HUDI_CLI_BUNDLE_NAME/$HUDI_VERSION/$HUDI_CLI_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/
wget -q $REPO_BASE_URL/$HUDI_FLINK_BUNDLE_NAME/$HUDI_VERSION/$HUDI_FLINK_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/
wget -q $REPO_BASE_URL/$HUDI_HADOOP_MR_BUNDLE_NAME/$HUDI_VERSION/$HUDI_HADOOP_MR_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/
wget -q $REPO_BASE_URL/$HUDI_KAFKA_CONNECT_BUNDLE_NAME/$HUDI_VERSION/$HUDI_KAFKA_CONNECT_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/
Expand Down
5 changes: 5 additions & 0 deletions packaging/bundle-validation/cli/commands.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
create --path file:///tmp/hudi-bundles/tests/table --tableName trips --tableType COPY_ON_WRITE
connect --path file:///tmp/hudi-bundles/tests/table
desc
commits show
exit
22 changes: 22 additions & 0 deletions packaging/bundle-validation/cli/conf/hudi-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set the necessary environment variables
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop/conf"}
export SPARK_CONF_DIR=${SPARK_CONF_DIR:-"/etc/spark/conf"}
export CLIENT_JAR=${CLIENT_JAR}
69 changes: 69 additions & 0 deletions packaging/bundle-validation/cli/hudi-cli-with-bundle.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

JAKARTA_EL_VERSION=3.0.3
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo "DIR is ${DIR}"

if [ -z "$CLI_BUNDLE_JAR" ]; then
echo "Inferring CLI_BUNDLE_JAR path assuming this script is under Hudi repo"
CLI_BUNDLE_JAR=`ls $DIR/target/hudi-cli-bundle*.jar | grep -v source | grep -v javadoc`
fi

if [ -z "$SPARK_BUNDLE_JAR" ]; then
echo "Inferring SPARK_BUNDLE_JAR path assuming this script is under Hudi repo"
SPARK_BUNDLE_JAR=`ls $DIR/../hudi-spark-bundle/target/hudi-spark*-bundle*.jar | grep -v source | grep -v javadoc`
fi

echo "CLI_BUNDLE_JAR: $CLI_BUNDLE_JAR"
echo "SPARK_BUNDLE_JAR: $SPARK_BUNDLE_JAR"

if [ -z "$HUDI_CONF_DIR" ]; then
echo "HUDI_CONF_DIR not set, setting HUDI_CONF_DIR"
HUDI_CONF_DIR="${DIR}"/conf
fi

echo "HUDI_CONF_DIR: $HUDI_CONF_DIR"

# hudi aux lib contains jakarta.el jars, which need to be put directly on class path
HUDI_AUX_LIB="${DIR}"/auxlib

if [ ! -d $HUDI_AUX_LIB ]; then
echo "Downloading necessary auxiliary jars for Hudi CLI to $HUDI_AUX_LIB"
wget https://repo1.maven.org/maven2/org/glassfish/jakarta.el/$JAKARTA_EL_VERSION/jakarta.el-$JAKARTA_EL_VERSION.jar -P $HUDI_AUX_LIB
wget https://repo1.maven.org/maven2/jakarta/el/jakarta.el-api/$JAKARTA_EL_VERSION/jakarta.el-api-$JAKARTA_EL_VERSION.jar -P $HUDI_AUX_LIB
fi

. "${DIR}"/conf/hudi-env.sh

if [ -z "$CLI_BUNDLE_JAR" ] || [ -z "$SPARK_BUNDLE_JAR" ]; then
echo "Make sure to generate both the hudi-cli-bundle.jar and hudi-spark-bundle.jar before running this script."
exit
fi

if [ -z "$SPARK_HOME" ]; then
echo "SPARK_HOME not set, setting to /usr/local/spark"
export SPARK_HOME="/usr/local/spark"
fi

if [ -z "$CLIENT_JAR" ]; then
echo "Client jar location not set, please set it in conf/hudi-env.sh"
fi

echo "Running : java -cp ${HUDI_CONF_DIR}:${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@"
java -cp ${HUDI_CONF_DIR}:${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@
47 changes: 47 additions & 0 deletions packaging/bundle-validation/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ ln -sf $JARS_DIR/hudi-spark*.jar $JARS_DIR/spark.jar
ln -sf $JARS_DIR/hudi-utilities-bundle*.jar $JARS_DIR/utilities.jar
ln -sf $JARS_DIR/hudi-utilities-slim*.jar $JARS_DIR/utilities-slim.jar
ln -sf $JARS_DIR/hudi-metaserver-server-bundle*.jar $JARS_DIR/metaserver.jar
ln -sf $JARS_DIR/hudi-cli-bundle*.jar $JARS_DIR/cli.jar

##
# Function to change Java runtime version by changing JAVA_HOME
Expand Down Expand Up @@ -268,11 +269,57 @@ test_metaserver_bundle () {
kill $DERBY_PID $HIVE_PID $METASEVER_PID
}

##
# Function to test the hudi-cli bundle.
# It creates a test table and connects to it using CLI commands
#
# env vars
# SPARK_HOME: path to the spark directory
# CLI_BUNDLE_JAR: path to the hudi cli bundle jar
# SPARK_BUNDLE_JAR: path to the hudi spark bundle jar
##
test_cli_bundle() {
echo "::warning::validate.sh setting up CLI bundle validation"

# Create a temporary directory for CLI commands output
CLI_TEST_DIR="/tmp/hudi-bundles/tests/log"
mkdir -p $CLI_TEST_DIR

# Set required environment variables
export SPARK_HOME=$SPARK_HOME
export CLI_BUNDLE_JAR=$JARS_DIR/cli.jar
export SPARK_BUNDLE_JAR=$JARS_DIR/spark.jar

# Execute with debug output
echo "Executing Hudi CLI commands..."
$WORKDIR/cli/hudi-cli-with-bundle.sh < $WORKDIR/cli/commands.txt 2>&1 | tee $CLI_TEST_DIR/output.txt

# Verify table was created
if [ ! -d "/tmp/hudi-bundles/tests/table/.hoodie" ]; then
echo "::error::validate.sh CLI bundle validation failed - Table directory not created"
return 1
fi

if ! grep -q "Metadata for table trips loaded" $CLI_TEST_DIR/output.txt; then
echo "::error::validate.sh CLI bundle validation failed - Table connection failed"
return 1
fi

echo "::warning::validate.sh CLI bundle validation was successful"
return 0
}

############################
# Execute tests
############################

echo "::warning::validate.sh validating cli bundle"
test_cli_bundle
if [ "$?" -ne 0 ]; then
exit 1
fi
echo "::warning::validate.sh done validating cli bundle"

echo "::warning::validate.sh validating spark & hadoop-mr bundle"
test_spark_hadoop_mr_bundles
if [ "$?" -ne 0 ]; then
Expand Down
Loading