From 5d19e2c4e776914c883508893e960b3fead086fa Mon Sep 17 00:00:00 2001 From: mck Date: Fri, 2 Aug 2024 08:45:38 +0200 Subject: [PATCH] Improve how we detect and can configure DSE/HCD Fixes - separate gzipping of tarball - default cqlshUsername and cqlshPassword to unset, and don't specify them if not set, - fail-fast if cqlshUsername is set but not cqlshPassword - remove append-redirect when redirecting to `/dev/null` - fixes copying of etc/* files (can't use glob). - fixes running checks from a macos bastion Fixes in tests - install docker-compose in gha - only build `integration-cassandra.docker` once - added tests that the collected tarballs can be untarred --- .github/workflows/ds-collector-ci.yml | 2 + ds-collector-tests/cluster-dse-k8s.make | 1 + ds-collector-tests/cluster-vanilla-k8s.make | 1 + .../cluster-vanilla-ssh-docker.make | 3 ++ ds-collector-tests/docker-compose.yml | 6 --- ds-collector/ds-collector | 52 ++++++++++++------- 6 files changed, 39 insertions(+), 26 deletions(-) diff --git a/.github/workflows/ds-collector-ci.yml b/.github/workflows/ds-collector-ci.yml index db92e1d..018bf6a 100644 --- a/.github/workflows/ds-collector-ci.yml +++ b/.github/workflows/ds-collector-ci.yml @@ -26,6 +26,8 @@ jobs: working-directory: ds-collector-tests run: | sudo apt-get install -y binfmt-support qemu qemu-user-static + sudo curl -L "https://github.com/docker/compose/releases/download/v2.29.1/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose + sudo chmod uga+x /usr/local/bin/docker-compose echo "Testing ds-collector" make -f cluster-vanilla-ssh-docker.make diff --git a/ds-collector-tests/cluster-dse-k8s.make b/ds-collector-tests/cluster-dse-k8s.make index 4d9def8..d8375ab 100755 --- a/ds-collector-tests/cluster-dse-k8s.make +++ b/ds-collector-tests/cluster-dse-k8s.make @@ -21,6 +21,7 @@ ${TESTS}: test_%: ./collector/ds-collector -T -p -f /tmp/datastax/test-collector-dse-k8s.conf -n cluster2-dc1-default-sts-0 ./collector/ds-collector -X -f /tmp/datastax/test-collector-dse-k8s.conf -n cluster2-dc1-default-sts-0 if ! ls /tmp/datastax/ | grep -q ".tar.gz" ; then echo "Failed to generate artefacts in the K8s cluster "; ls -l /tmp/datastax/ ; exit 1 ; fi + for f in $(ls /tmp/datastax/*.tar.gz) ; do if ! tar -xf $f ; then echo "Failed to untar artefact $f in the K8s cluster " ; exit 1 ; fi ; done setup: diff --git a/ds-collector-tests/cluster-vanilla-k8s.make b/ds-collector-tests/cluster-vanilla-k8s.make index 7d44e8c..0074631 100755 --- a/ds-collector-tests/cluster-vanilla-k8s.make +++ b/ds-collector-tests/cluster-vanilla-k8s.make @@ -18,6 +18,7 @@ ${TESTS}: test_%: ./collector/ds-collector -T -p -f /tmp/datastax/test-collector-k8s.conf -n cluster1-dc1-default-sts-0 ./collector/ds-collector -X -f /tmp/datastax/test-collector-k8s.conf -n cluster1-dc1-default-sts-0 if ! ls /tmp/datastax/ | grep -q ".tar.gz" ; then echo "Failed to generate artefacts in the K8s cluster " ; ls -l /tmp/datastax/ ; exit 1 ; fi + for f in $(ls /tmp/datastax/*.tar.gz) ; do if ! tar -xf $f ; then echo "Failed to untar artefact $f in the K8s cluster " ; exit 1 ; fi ; done setup: diff --git a/ds-collector-tests/cluster-vanilla-ssh-docker.make b/ds-collector-tests/cluster-vanilla-ssh-docker.make index 7aa1d3f..fb5848a 100755 --- a/ds-collector-tests/cluster-vanilla-ssh-docker.make +++ b/ds-collector-tests/cluster-vanilla-ssh-docker.make @@ -18,6 +18,7 @@ ${TESTS_SSH}: test_ssh_%: docker exec -t ds-collector-tests_bastion_1 /collector/ds-collector -X -f /ds-collector-tests/$* -n ds-collector-tests_cassandra-00_1 # test archives exist if ! ( docker exec ds-collector-tests_bastion_1 ls /tmp/datastax/ ) | grep -q ".tar.gz" ; then echo "Failed to generate artefacts in the SSH cluster" ; ( docker exec ds-collector-tests_bastion_1 ls /tmp/datastax/ ) ; exit 1 ; fi + for f in $(ls /tmp/datastax/*.tar.gz) ; do if ! tar -xf $f ; then echo "Failed to untar artefact $f in the ssh cluster " ; exit 1 ; fi ; done # ds-collector over SSH with verbose mode @echo "\n Testing SSH verbose $* \n" docker exec -t ds-collector-tests_bastion_1 /collector/ds-collector -v -T -f /ds-collector-tests/$* -n ds-collector-tests_cassandra-00_1 @@ -25,6 +26,7 @@ ${TESTS_SSH}: test_ssh_%: docker exec -t ds-collector-tests_bastion_1 /collector/ds-collector -v -X -f /ds-collector-tests/$* -n ds-collector-tests_cassandra-00_1 # test archives exist if ! ( docker exec ds-collector-tests_bastion_1 ls /tmp/datastax/ ) | grep -q ".tar.gz" ; then echo "Failed to generate artefacts in the SSH cluster" ; ( docker exec ds-collector-tests_bastion_1 ls /tmp/datastax/ ) ; exit 1 ; fi + for f in $(ls /tmp/datastax/*.tar.gz) ; do if ! tar -xf $f ; then echo "Failed to untar artefact $f in the ssh cluster " ; exit 1 ; fi ; done ${TESTS_DOCKER}: test_docker_%: # ds-collector over docker @@ -38,6 +40,7 @@ ${TESTS_DOCKER}: test_docker_%: ./collector/ds-collector -X -f $* -n ds-collector-tests_cassandra-00_1 # test archives exist if ! ls /tmp/datastax/ | grep -q ".tar.gz" ; then echo "Failed to generate artefacts in the docker cluster " ; ls -l /tmp/datastax/ ; exit 1 ; fi + for f in $(ls /tmp/datastax/*.tar.gz) ; do if ! tar -xf $f ; then echo "Failed to untar artefact $f in the ssh cluster " ; exit 1 ; fi ; done setup: diff --git a/ds-collector-tests/docker-compose.yml b/ds-collector-tests/docker-compose.yml index 1a52150..e766d0a 100644 --- a/ds-collector-tests/docker-compose.yml +++ b/ds-collector-tests/docker-compose.yml @@ -20,9 +20,6 @@ services: retries: 60 cassandra-01: - build: - context: . - dockerfile: integration-cassandra.docker image: ds-collector:integration-cassandra container_name: ds-collector-tests_cassandra-01_1 depends_on: @@ -43,9 +40,6 @@ services: retries: 60 cassandra-02: - build: - context: . - dockerfile: integration-cassandra.docker image: ds-collector:integration-cassandra container_name: ds-collector-tests_cassandra-02_1 depends_on: diff --git a/ds-collector/ds-collector b/ds-collector/ds-collector index ca0ccf3..647a5ec 100755 --- a/ds-collector/ds-collector +++ b/ds-collector/ds-collector @@ -21,6 +21,7 @@ # date # df # ethtool +# gzip # hostname # iostat # ip @@ -43,7 +44,7 @@ # uptime # # On a debian/ubuntu server these can be installed by running: -# `apt-get install -y procps ethtool iproute2 lsof net-tools sysstat pciutils ntp ntpstat numactl lvm2 curl` +# `apt-get install -y procps ethtool gzip iproute2 lsof net-tools sysstat pciutils ntp ntpstat numactl lvm2 curl` # @@ -276,6 +277,10 @@ bastion_checks() { fi command -v ip >/dev/null 2>&1 || command -v ifconfig >/dev/null 2>&1 || { echo >&2 "ip or ifconfig needs to be installed"; exit 1; } command -v timeout >/dev/null 2>&1 || { echo >&2 "timeout needs to be installed (on macos do 'brew install coreutils')"; exit 1; } + + if [ -n "${cqlshUsername}" ] ; then + [ -n "${cqlshPassword}" ] || { echo >&2 "The cqlsh username is configured but not the password" ; exit 1 ; } + fi [ "$use_docker" = "true" ] || [ "$use_k8s" = "true" ] || command -v ssh >/dev/null 2>&1 || { echo >&2 "ssh needs to be installed"; exit 1; } [ "$use_docker" = "true" ] || [ "$use_k8s" = "true" ] || command -v scp >/dev/null 2>&1 || { echo >&2 "scp needs to be installed"; exit 1; } [ "$use_docker" = "true" ] || [ "$use_k8s" = "true" ] || ! [ -n "$sshPassword" ] || command -v sshpass >/dev/null 2>&1 || { echo >&2 "sshpass needs to be installed"; exit 1; } @@ -283,7 +288,7 @@ bastion_checks() { ! [ "$use_k8s" = "true" ] || command -v kubectl >/dev/null 2>&1 || { echo >&2 "kubectl needs to be installed"; exit 1; } # detect if df supports --portability DF_OPT="" - ( df --help | grep -q "\-\-portability" ) && DF_OPT="--portability" + ( ( df --help 2>/dev/null ) | grep -q "\-\-portability" ) && DF_OPT="--portability" [ $(df $DF_OPT "$baseDir" | tail -n +2 | awk '{print $4}') -ge 1000000 ] || { echo >&2 "There must be at least 1GB free at $baseDir"; exit 1; } # revert the prometheus jarfile from text back to jarfile [[ -f "${script_directory}/${prometheus}" ]] || command -v xxd >/dev/null 2>&1 || { echo >&2 "xxd needs to be installed"; exit 1; } @@ -540,14 +545,14 @@ collect_ethernet_device() { sub_dir "network" # legacy eth network names for ethernet_device in eth{0..9} ;do - ip a show $ethernet_device >> /dev/null 2>&1 + ip a show $ethernet_device > /dev/null 2>&1 if [ "$?" = "0" ] ; then ethtool -i $ethernet_device > "$artifactSubDir/ethtool-$ethernet_device.txt" fi done # udev en network names for ethernet_device in $(ip -4 a | grep ": en" | cut -d":" -f2) ;do - ip a show $ethernet_device >> /dev/null 2>&1 + ip a show $ethernet_device > /dev/null 2>&1 if [ "$?" = "0" ] ; then ethtool -i $ethernet_device > "$artifactSubDir/ethtool-$ethernet_device.txt" fi @@ -628,7 +633,9 @@ collect_info_setup() { update_path update_env - cqlshOpts="${cqlshOpts} --username=$cqlshUsername --password=$cqlshPassword" + if [ -n "${cqlshUsername}" ] ; then + cqlshOpts="${cqlshOpts} --username=${cqlshUsername} --password=${cqlshPassword}" + fi if [ "$cqlshSSL" != "false" ]; then echo "enabling ssl for cqlsh" cqlshOpts="$cqlshOpts --ssl" @@ -674,21 +681,20 @@ collect_info_setup() { # DSE if [ "$is_dse" == "true" ]; then - [ "/etc/cassandra" = "${configHome}" ] || echo "Overridding configHome (ignoring previous value of ${configHome})" - # Check DSE package install echo "DSE install: Checking install type..." if [ -z "$dse_root_dir" ] && [ -d "/etc/dse" ] && [ -d "/etc/dse/cassandra" ] && [ -f "/etc/default/dse" ] && [ -d "/usr/share/dse/" ]; then + [ -n "${configHome}" ] && echo "Overridding configHome (ignoring previous value: ${configHome})" echo "DSE install: package directories successfully found. Proceeding..." dse_root_dir="/etc/dse/" dse_bin_dir="/usr/bin/" dse_conf_dir="${dseConfigHome:-/etc/dse/}" configHome="/etc/dse/cassandra" - elif [ -d "$dse_root_dir" ] && [ -d "$dse_root_dir/resources/cassandra/conf" ] && [ -d "$dse_root_dir/resources/dse/conf" ]; then + elif [ -d "${dse_root_dir}" ] && ( [ -d "${configHome}" ] || [ -d "${dse_root_dir}/resources/cassandra/conf" ] ) && ( [ -d "${dseConfigHome}" ] || [ -d "${dse_root_dir}/resources/dse/conf" ] ) ; then echo "DSE install: tarball directories successfully found. Proceeding..." - dse_bin_dir="$dse_root_dir/bin/" - dse_conf_dir="${dseConfigHome:-$dse_root_dir/resources/dse/conf/}" - configHome="$dse_root_dir/resources/cassandra/conf" + dse_bin_dir="${dse_root_dir}/bin/" + dse_conf_dir="${dseConfigHome:-${dse_root_dir}/resources/dse/conf/}" + configHome="${configHome:-${dse_root_dir}/resources/cassandra/conf}" else echo "DSE install: no package or tarball directories found, or no tarball directory specified." exit 1 @@ -721,6 +727,7 @@ collect_info_setup() { fi else skip_dse="true" + configHome="${configHome:-/etc/cassandra}" fi } @@ -762,10 +769,14 @@ archive_artifacts() { fi # shellcheck disable=SC1001 # NF is number of fields, which the blanks in front of the first / is the first field - artifactName=$(echo "$artifactDir"|awk -F\/ '{print $NF}') - tar -zcvf "$artifactDir.tar.gz" "$artifactName" >> /dev/null 2>&1 && rm -rf "$artifactDir" + artifactName=$(echo "${artifactDir}"|awk -F\/ '{print $NF}') + tar -cf "${artifactDir}.tar" "$artifactName" statusState=$? - echo "$artifactDir.tar.gz" > "$baseDir/$artifactFile" + if [ 0 = ${statusState} ] ; then + rm -rf "$artifactDir" + gzip "${artifactDir}.tar" + echo "${artifactDir}.tar.gz" > "${baseDir}/${artifactFile}" + fi print_status_state } @@ -912,7 +923,8 @@ node_cleanup() { node_connect "rm -f \"${baseDir}/${dstat}\"" node_connect "rm -f \"${baseDir}/collect-info\"" for f in ${script_directory}/etc/*; do - node_connect "rm -f \"${baseDir}/etc/${f}\"" + f_remote="$(echo $f | awk -F'/etc/' '{print $2}')" + node_connect "rm -f \"${baseDir}/etc/${f_remote}\"" done } @@ -990,7 +1002,7 @@ get_info() { node_connect "mkdir -p $baseDir/etc" for f in ${script_directory}/etc/*; do - node_push "$f" "$baseDir/etc" + node_push "$f" "$baseDir/etc/" done if [ "$copyConfig" ]; then @@ -1000,7 +1012,10 @@ get_info() { node_connect "chmod +x \"${baseDir}/$targetFile\"" node_connect "chmod +x \"${baseDir}/$dstat\"" node_connect "chmod +x \"${baseDir}/collect-info\"" - node_connect "chmod +x \"${baseDir}/etc/*\"" + for f in ${script_directory}/etc/*; do + f_remote="$(echo $f | awk -F'/etc/' '{print $2}')" + node_connect "chmod +x \"${baseDir}/etc/${f_remote}\"" + done if [ "$sudo_script_on_node" ]; then node_connect "sudo $baseDir/$targetFile ${collectionArgs[*]}" @@ -1061,7 +1076,6 @@ targetFile=$(basename $0) # base cassandra logHome="/var/log/cassandra" -configHome="/etc/cassandra" #dataHome="/var/lib/cassandra/data" # expected base ssh options @@ -1083,8 +1097,6 @@ jmxPort="7199" bucket="collector-dead-drop" #useS3Auth="true" -cqlshUsername="cassandra" -cqlshPassword="cassandra" cqlshSSL="false" # initialize arrays