diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index d8d1ba93b247b..7b7c897d7460d 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -133,10 +133,12 @@ if [ -z "$STAGING_REPO_NUM" ] && [ -z "$MAVEN_BASE_URL" ]; then echo 'Adding built bundle jars for validation' if [[ "$SCALA_PROFILE" != 'scala-2.13' ]]; then # For Scala 2.13, Flink is not support, so skipping the Flink bundle validation + cp ${GITHUB_WORKSPACE}/packaging/hudi-cli-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-flink-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-kafka-connect-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-metaserver-server-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ fi + cp ${GITHUB_WORKSPACE}/packaging/hudi-cli-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-hadoop-mr-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-spark-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-utilities-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ @@ -145,27 +147,33 @@ if [ -z "$STAGING_REPO_NUM" ] && [ -z "$MAVEN_BASE_URL" ]; then else echo 'Adding environment variables for bundles in the release candidate or artifact' + HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle HUDI_HADOOP_MR_BUNDLE_NAME=hudi-hadoop-mr-bundle HUDI_KAFKA_CONNECT_BUNDLE_NAME=hudi-kafka-connect-bundle HUDI_METASERVER_SERVER_BUNDLE_NAME=hudi-metaserver-server-bundle if [[ ${SPARK_PROFILE} == 'spark3.3' ]]; then + HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12 HUDI_SPARK_BUNDLE_NAME=hudi-spark3.3-bundle_2.12 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12 HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12 elif [[ ${SPARK_PROFILE} == 'spark3.4' ]]; then + HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12 HUDI_SPARK_BUNDLE_NAME=hudi-spark3.4-bundle_2.12 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12 HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12 elif [[ ${SPARK_PROFILE} == 'spark3.5' && ${SCALA_PROFILE} == 'scala-2.12' ]]; then + HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12 HUDI_SPARK_BUNDLE_NAME=hudi-spark3.5-bundle_2.12 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12 HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12 elif [[ ${SPARK_PROFILE} == 'spark3.5' && ${SCALA_PROFILE} == 'scala-2.13' ]]; then + HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.13 HUDI_SPARK_BUNDLE_NAME=hudi-spark3.5-bundle_2.13 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.13 HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.13 elif [[ ${SPARK_PROFILE} == 'spark3' ]]; then + HUDI_CLI_BUNDLE_NAME=hudi-cli-bundle_2.12 HUDI_SPARK_BUNDLE_NAME=hudi-spark3-bundle_2.12 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12 HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12 @@ -188,6 +196,7 @@ else fi echo "Downloading bundle jars from base URL - $REPO_BASE_URL ..." + wget -q $REPO_BASE_URL/$HUDI_CLI_BUNDLE_NAME/$HUDI_VERSION/$HUDI_CLI_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/ wget -q $REPO_BASE_URL/$HUDI_FLINK_BUNDLE_NAME/$HUDI_VERSION/$HUDI_FLINK_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/ wget -q $REPO_BASE_URL/$HUDI_HADOOP_MR_BUNDLE_NAME/$HUDI_VERSION/$HUDI_HADOOP_MR_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/ wget -q $REPO_BASE_URL/$HUDI_KAFKA_CONNECT_BUNDLE_NAME/$HUDI_VERSION/$HUDI_KAFKA_CONNECT_BUNDLE_NAME-$HUDI_VERSION.jar -P $TMP_JARS_DIR/ diff --git a/packaging/bundle-validation/cli/commands.txt b/packaging/bundle-validation/cli/commands.txt new file mode 100644 index 0000000000000..eaebd50d17279 --- /dev/null +++ b/packaging/bundle-validation/cli/commands.txt @@ -0,0 +1,5 @@ +create --path file:///tmp/hudi-bundles/tests/table --tableName trips --tableType COPY_ON_WRITE +connect --path file:///tmp/hudi-bundles/tests/table +desc +commits show +exit \ No newline at end of file diff --git a/packaging/bundle-validation/cli/conf/hudi-env.sh b/packaging/bundle-validation/cli/conf/hudi-env.sh new file mode 100644 index 0000000000000..5d2cc36c2bbd6 --- /dev/null +++ b/packaging/bundle-validation/cli/conf/hudi-env.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set the necessary environment variables +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop/conf"} +export SPARK_CONF_DIR=${SPARK_CONF_DIR:-"/etc/spark/conf"} +export CLIENT_JAR=${CLIENT_JAR} diff --git a/packaging/bundle-validation/cli/hudi-cli-with-bundle.sh b/packaging/bundle-validation/cli/hudi-cli-with-bundle.sh new file mode 100755 index 0000000000000..ecb5adc0fb643 --- /dev/null +++ b/packaging/bundle-validation/cli/hudi-cli-with-bundle.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +JAKARTA_EL_VERSION=3.0.3 +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +echo "DIR is ${DIR}" + +if [ -z "$CLI_BUNDLE_JAR" ]; then + echo "Inferring CLI_BUNDLE_JAR path assuming this script is under Hudi repo" + CLI_BUNDLE_JAR=`ls $DIR/target/hudi-cli-bundle*.jar | grep -v source | grep -v javadoc` +fi + +if [ -z "$SPARK_BUNDLE_JAR" ]; then + echo "Inferring SPARK_BUNDLE_JAR path assuming this script is under Hudi repo" + SPARK_BUNDLE_JAR=`ls $DIR/../hudi-spark-bundle/target/hudi-spark*-bundle*.jar | grep -v source | grep -v javadoc` +fi + +echo "CLI_BUNDLE_JAR: $CLI_BUNDLE_JAR" +echo "SPARK_BUNDLE_JAR: $SPARK_BUNDLE_JAR" + +if [ -z "$HUDI_CONF_DIR" ]; then + echo "HUDI_CONF_DIR not set, setting HUDI_CONF_DIR" + HUDI_CONF_DIR="${DIR}"/conf +fi + +echo "HUDI_CONF_DIR: $HUDI_CONF_DIR" + +# hudi aux lib contains jakarta.el jars, which need to be put directly on class path +HUDI_AUX_LIB="${DIR}"/auxlib + +if [ ! -d $HUDI_AUX_LIB ]; then + echo "Downloading necessary auxiliary jars for Hudi CLI to $HUDI_AUX_LIB" + wget https://repo1.maven.org/maven2/org/glassfish/jakarta.el/$JAKARTA_EL_VERSION/jakarta.el-$JAKARTA_EL_VERSION.jar -P $HUDI_AUX_LIB + wget https://repo1.maven.org/maven2/jakarta/el/jakarta.el-api/$JAKARTA_EL_VERSION/jakarta.el-api-$JAKARTA_EL_VERSION.jar -P $HUDI_AUX_LIB +fi + +. "${DIR}"/conf/hudi-env.sh + +if [ -z "$CLI_BUNDLE_JAR" ] || [ -z "$SPARK_BUNDLE_JAR" ]; then + echo "Make sure to generate both the hudi-cli-bundle.jar and hudi-spark-bundle.jar before running this script." + exit +fi + +if [ -z "$SPARK_HOME" ]; then + echo "SPARK_HOME not set, setting to /usr/local/spark" + export SPARK_HOME="/usr/local/spark" +fi + +if [ -z "$CLIENT_JAR" ]; then + echo "Client jar location not set, please set it in conf/hudi-env.sh" +fi + +echo "Running : java -cp ${HUDI_CONF_DIR}:${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@" +java -cp ${HUDI_CONF_DIR}:${HUDI_AUX_LIB}/*:${SPARK_HOME}/*:${SPARK_HOME}/jars/*:${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${CLI_BUNDLE_JAR}:${SPARK_BUNDLE_JAR}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.apache.hudi.cli.Main $@ diff --git a/packaging/bundle-validation/validate.sh b/packaging/bundle-validation/validate.sh index 84459cf0d1752..83e5dae77542f 100755 --- a/packaging/bundle-validation/validate.sh +++ b/packaging/bundle-validation/validate.sh @@ -42,6 +42,7 @@ ln -sf $JARS_DIR/hudi-spark*.jar $JARS_DIR/spark.jar ln -sf $JARS_DIR/hudi-utilities-bundle*.jar $JARS_DIR/utilities.jar ln -sf $JARS_DIR/hudi-utilities-slim*.jar $JARS_DIR/utilities-slim.jar ln -sf $JARS_DIR/hudi-metaserver-server-bundle*.jar $JARS_DIR/metaserver.jar +ln -sf $JARS_DIR/hudi-cli-bundle*.jar $JARS_DIR/cli.jar ## # Function to change Java runtime version by changing JAVA_HOME @@ -268,11 +269,57 @@ test_metaserver_bundle () { kill $DERBY_PID $HIVE_PID $METASEVER_PID } +## +# Function to test the hudi-cli bundle. +# It creates a test table and connects to it using CLI commands +# +# env vars +# SPARK_HOME: path to the spark directory +# CLI_BUNDLE_JAR: path to the hudi cli bundle jar +# SPARK_BUNDLE_JAR: path to the hudi spark bundle jar +## +test_cli_bundle() { + echo "::warning::validate.sh setting up CLI bundle validation" + + # Create a temporary directory for CLI commands output + CLI_TEST_DIR="/tmp/hudi-bundles/tests/log" + mkdir -p $CLI_TEST_DIR + + # Set required environment variables + export SPARK_HOME=$SPARK_HOME + export CLI_BUNDLE_JAR=$JARS_DIR/cli.jar + export SPARK_BUNDLE_JAR=$JARS_DIR/spark.jar + + # Execute with debug output + echo "Executing Hudi CLI commands..." + $WORKDIR/cli/hudi-cli-with-bundle.sh < $WORKDIR/cli/commands.txt 2>&1 | tee $CLI_TEST_DIR/output.txt + + # Verify table was created + if [ ! -d "/tmp/hudi-bundles/tests/table/.hoodie" ]; then + echo "::error::validate.sh CLI bundle validation failed - Table directory not created" + return 1 + fi + + if ! grep -q "Metadata for table trips loaded" $CLI_TEST_DIR/output.txt; then + echo "::error::validate.sh CLI bundle validation failed - Table connection failed" + return 1 + fi + + echo "::warning::validate.sh CLI bundle validation was successful" + return 0 +} ############################ # Execute tests ############################ +echo "::warning::validate.sh validating cli bundle" +test_cli_bundle +if [ "$?" -ne 0 ]; then + exit 1 +fi +echo "::warning::validate.sh done validating cli bundle" + echo "::warning::validate.sh validating spark & hadoop-mr bundle" test_spark_hadoop_mr_bundles if [ "$?" -ne 0 ]; then