Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
stopAll and general improvements (#109)
Browse files Browse the repository at this point in the history
- Stop/Start with new stopAll checkbox in Truecharts, charts.
- Improve message for heavyscript mount
- Suppress false error after exiting container shell
- Add in error message for when an applications fails to enter the desired state even after a rollback
- Fix restart app function to accept multiple deployments
  • Loading branch information
Heavybullets8 authored May 27, 2023
1 parent 8659d83 commit 1e13073
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 108 deletions.
26 changes: 18 additions & 8 deletions functions/app/start_app_prompt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,27 @@ start_app_prompt(){

echo -e "Starting ${blue}$app_name${reset}..."


# Check if app is a cnpg instance, or an operator instance
output=$(check_filtered_apps "$app_name")

if [[ $output == "${app_name},cnpg" ]]; then
scale_resources "$app_name" 120 "$replica_count"
#TODO: Add a check to ensure the pods are running
echo -e "${yellow}Sent the command to start all pods in: $app_name${reset}"
echo -e "${yellow}However, HeavyScript cannot monitor the new applications${reset}"
echo -e "${yellow}with the new postgres backend to ensure it worked..${reset}"
elif cli -c 'app chart_release scale release_name='\""$app_name"\"\ 'scale_options={"replica_count": '"$replica_count}" &> /dev/null; then
# Initialize a flag
cli_success=true

if [[ $output == "${app_name},stopAll-on" ]]; then
if ! cli -c 'app chart_release scale release_name='\""$app_name"\"\ 'scale_options={"replica_count": '"1}" > /dev/null; then
cli_success=false
fi
if ! cli -c "app chart_release update chart_release=\"$app_name\" values={\"global\": {\"stopAll\": false}}" > /dev/null; then
cli_success=false
fi
else
if ! cli -c 'app chart_release scale release_name='\""$app_name"\"\ 'scale_options={"replica_count": '"$replica_count}" > /dev/null; then
cli_success=false
fi
fi

# Check if all cli commands were successful
if $cli_success; then
echo -e "${blue}$app_name ${green}Started${reset}"
echo -e "${green}Replica count set to ${blue}$replica_count${reset}"
else
Expand Down
38 changes: 38 additions & 0 deletions functions/backup_restore/database/cnpg_dump.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,44 @@ get_current_replica_counts() {
k3s kubectl get deploy -n ix-"$app_name" -o json | jq -r '[.items[] | select(.metadata.labels.cnpg != "true" and (.metadata.name | contains("-cnpg-main-") | not)) | {(.metadata.name): .spec.replicas}] | add'
}

wait_for_pods_to_stop() {
local app_name timeout deployment_name
app_name="$1"
timeout="$2"

SECONDS=0
while true; do
# If a specific deployment is provided, check only its pods
if ! k3s kubectl get pods -n ix-"$app_name" \
--field-selector=status.phase!=Succeeded,status.phase!=Failed -o=name \
| grep -vE -- '-[[:digit:]]$' \
| rev | cut -d- -f3- | rev \
| grep -vE -- "-cnpg$|-cnpg-" \
| grep -qE -- "$deployment_name$"; then
break
fi
if [[ "$SECONDS" -gt $timeout ]]; then
return 1
fi
sleep 1
done
}

scale_deployments() {
local app_name timeout replicas deployment_name
app_name="$1"
timeout="$2"
replicas="${3:-$(pull_replicas "$app_name")}"
deployment_name="$4"

# Specific deployment passed, scale only this deployment
k3s kubectl scale deployments/"$deployment_name" -n ix-"$app_name" --replicas="$replicas"

if [[ $replicas -eq 0 ]]; then
wait_for_pods_to_stop "$app_name" "$timeout" "$deployment_name" && return 0 || return 1
fi
}

dump_database() {
app="$1"
output_dir="$2/${app}"
Expand Down
8 changes: 7 additions & 1 deletion functions/pod/container_shell_or_logs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,20 @@ cmd_execute_shell() {
read -rsn1 -d ' ' ; echo
clear -x
title
if ! k3s kubectl exec -n "ix-$app_name" "${pod}" -c "$container" -it -- sh -c '[ -e /bin/bash ] && exec /bin/bash || exec /bin/sh'; then
k3s kubectl exec -n "ix-$app_name" "${pod}" -c "$container" -it -- sh -c '[ -e /bin/bash ] && exec /bin/bash || exec /bin/sh' 2> >(grep -v "command terminated with exit code 130" >&2)
status=$?
if [[ $status -eq 130 ]]; then
echo "Received exit code 130, ignoring it."
elif [[ $status -ne 0 ]]; then
echo -e "${red}This container does not accept shell access, try a different one.${reset}"
fi
break
done
}
export -f cmd_execute_shell



cmd_execute_logs() {
local lines
while true
Expand Down
3 changes: 2 additions & 1 deletion functions/pvc/mount.sh
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ mount_app_func(){
echo -e "${bold}Unmount Manually with:${reset}\n${blue}zfs set mountpoint=legacy \"$full_path\" && rmdir /mnt/*/mounted_pvc/$data_name${reset}"
fi
echo
echo -e "Or use the Unmount All option"
echo -e "${bold}Or use the Unmount All option:${reset}"
echo -e "${blue}heavyscript pvc --unmount${reset}"

#Ask if user would like to mount something else
while true
Expand Down
19 changes: 19 additions & 0 deletions functions/update/commander.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ skip_previously_failed_version() {
return 1
}

get_apps_with_status() {
local app_name status

# Call the existing function and process its output
while IFS=, read -r app_name status; do
# If the status is "stopAll-on" or "operator", append the app_name and status to the apps_with_status array
if [[ "$status" == "stopAll-on" ]] || [[ "$status" == "operator" ]]; then
apps_with_status+=("$app_name,$status")
fi
done < <(check_filtered_apps "${array[@]/,*}")

}

# Skip if the image update should be ignored
skip_image_update() {
if [[ $old_full_ver == "$new_full_ver" && $ignore_image_update == true ]]; then
Expand Down Expand Up @@ -179,8 +192,10 @@ handle_concurrency() {


commander() {
apps_with_status=()
mapfile -t array < <(get_app_info)
echo_updates_header

display_update_status
process_apps

Expand All @@ -190,5 +205,9 @@ commander() {
return
fi

if [[ $rollback == true ]]; then
get_apps_with_status
fi

handle_concurrency
}
22 changes: 14 additions & 8 deletions functions/update/post_process.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,23 @@ handle_rollback() {
fi
}

check_rollback_availability() {
# Check if app is a cnpg instance, or an operator instance
output=$(check_filtered_apps "$app_name")
failed_rollback() {
echo_array+=("Error: Application did not come up even after a rollback")
echo_array+=("Manual intervention is required\nStopping, then Abandoning")
}

# Check if the output contains the desired namespace and "cnpg" or "operator"
if [[ $output == "${app_name},cnpg" ]]; then
echo_array+=("Error: $app_name contains a cnpg instance, and cannot be rolled back")
return 1
elif [[ $output == "${app_name},operator" ]]; then
check_rollback_availability() {
if printf '%s\0' "${apps_with_status[@]}" | grep -iFxqz "${app_name},operator"; then
echo_array+=("Error: $app_name contains an operator instance, and cannot be rolled back")
return 1
fi
if printf '%s\0' "${apps_with_status[@]}" | grep -iFxqz "${app_name},cnpg"; then
echo_array+=("Error: $app_name contains a CNPG deployment, and cannot be rolled back")
echo_array+=("You can attempt a force rollback by shutting down the application with heavyscript")
echo_array+=("Then rolling back from the GUI")
return 1
fi
return 0
}

post_process(){
Expand Down Expand Up @@ -92,6 +97,7 @@ post_process(){
SECONDS=0
continue
else
failed_rollback
update_stop_handler 'Stopping...'
break
fi
Expand Down
6 changes: 5 additions & 1 deletion functions/update/pre_process.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,11 @@ pre_process() {
fi

if [[ $rollback == true || "$startstatus" == "STOPPED" ]]; then
if ! check_replicas; then
if printf '%s\0' "${apps_with_status[@]}" | grep -iFxqz "${app_name},stopAll-on"; then
echo_array+=("Stopped")
echo_array
return
elif ! check_replicas; then
echo_array
return
fi
Expand Down
16 changes: 11 additions & 5 deletions utils/check_filtered_apps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
check_filtered_apps() {
# Define a function to process each app name
process_app_name() {
appname=$1
local app_name=$1

# Run the command and directly check if the values are true, and include the reason
midclt call chart.release.get_instance "$appname" | jq -r '
midclt call chart.release.get_instance "$app_name" | jq -r '
if .config.operator.enabled == true then
.name + ",operator"
else
Expand All @@ -17,6 +16,13 @@ check_filtered_apps() {
.name + ",cnpg"
else
empty
end,
if .config.global.stopAll == true then
.name + ",stopAll-on"
elif .config.global.stopAll == false then
.name + ",stopAll-off"
else
empty
end
| select(length > 0)
'
Expand Down Expand Up @@ -48,9 +54,9 @@ check_filtered_apps() {
fi

# Process the app names with a maximum of 5 concurrent processes
for appname in "${app_names[@]}"; do
for app_name in "${app_names[@]}"; do
wait_for_slot
process_app_name "$appname" &
process_app_name "$app_name" &
done

# Wait for any remaining jobs to finish
Expand Down
36 changes: 8 additions & 28 deletions utils/resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,13 @@ pull_replicas() {
midclt call chart.release.get_instance "$app_name" | jq '.config.controller.replicas // .config.workload.main.replicas // .pod_status.desired'
}

scale_resources() {
local app_name timeout replicas deployment_name
app_name="$1"
timeout="$2"
replicas="${3:-$(pull_replicas "$app_name")}"
deployment_name="$4"

if [[ -z "$deployment_name" ]]; then
# No specific deployment passed, scale all applicable deployments in the app
k3s kubectl get deployments -n ix-"$app_name" -o json | jq -r '.items[] | select(.metadata.name | contains("-cnpg-main-") | not) | .metadata.name' | xargs -r -I{} k3s kubectl scale deployments/{} -n ix-"$app_name" --replicas="$replicas"
else
# Specific deployment passed, scale only this deployment
k3s kubectl scale deployments/"$deployment_name" -n ix-"$app_name" --replicas="$replicas"
fi

if [[ $replicas -eq 0 ]]; then
wait_for_pods_to_stop "$app_name" "$timeout" "$deployment_name" && return 0 || return 1
fi
}


restart_app(){
# There are no good labels to use to identify the deployment, so we have to simply filter out the cnpg deployment for now
dep_name=$(k3s kubectl -n ix-"$app_name" get deploy | grep -vE -- '(-cnpg-)' | sed -e '1d' -e 's/ .*//')
if k3s kubectl -n ix-"$app_name" rollout restart deploy "$dep_name" &>/dev/null; then
return 0
else
return 1
fi
}
dep_names=$(k3s kubectl -n ix-"$app_name" get deploy | grep -vE -- '(-cnpg-)' | sed -e '1d' -e 's/ .*//')
for dep_name in $dep_names; do
if ! k3s kubectl -n ix-"$app_name" rollout restart deploy "$dep_name" &>/dev/null; then
return 1
fi
done
return 0
}
84 changes: 28 additions & 56 deletions utils/stop_app.sh
Original file line number Diff line number Diff line change
@@ -1,41 +1,5 @@
#!/bin/bash


wait_for_pods_to_stop() {
local app_name timeout deployment_name
app_name="$1"
timeout="$2"
deployment_name="$3"

SECONDS=0
while true; do
if [[ -n "$deployment_name" ]]; then
# If a specific deployment is provided, check only its pods
if ! k3s kubectl get pods -n ix-"$app_name" \
--field-selector=status.phase!=Succeeded,status.phase!=Failed -o=name \
| grep -vE -- '-[[:digit:]]$' \
| rev | cut -d- -f3- | rev \
| grep -vE -- "-cnpg$|-cnpg-" \
| grep -qE -- "$deployment_name$"; then
break
fi
else
# If no specific deployment is provided, check any non-CNPG pods
if ! k3s kubectl get pods -n ix-"$app_name" \
--field-selector=status.phase!=Succeeded,status.phase!=Failed -o=name \
| grep -v -- '-cnpg-' \
| grep -vqE -- '-[[:digit:]]$'; then
break
fi
fi
if [[ "$SECONDS" -gt $timeout ]]; then
return 1
fi
sleep 1
done
}


get_app_status() {
local app_name stop_type
app_name="$1"
Expand All @@ -46,7 +10,8 @@ get_app_status() {
else
cli -m csv -c 'app chart_release query name,status' | \
grep -- "^$app_name," | \
awk -F ',' '{print $2}'
awk -F ',' '{print $2}' | \
tr -d " \t\r"
fi
}

Expand Down Expand Up @@ -84,30 +49,37 @@ stop_app() {
app_name="$2"
timeout="150"

# Check if app is a cnpg instance, or an operator instance
output=$(check_filtered_apps "$app_name")

# Check if the output contains the desired namespace and "cnpg" or "operator"
if [[ $output == "${app_name},cnpg" ]]; then
scale_resources "$app_name" "$timeout" 0 && return 0 || return 1
elif [[ $output == "${app_name},operator" ]]; then
return 3
fi

handle_timeout() {
local timeout_result=$1
if [[ $timeout_result -eq 0 ]]; then
return 0
elif [[ $timeout_result -eq 124 ]]; then
return 2
else
return 1
fi
}

status=$(get_app_status "$app_name" "$stop_type")

if [[ "$status" == "STOPPED" ]]; then
return 0
fi

timeout "${timeout}s" cli -c 'app chart_release scale release_name='\""$app_name"\"\ 'scale_options={"replica_count": 0}' &> /dev/null
timeout_result=$?

if [[ $timeout_result -eq 0 ]]; then
return 0
elif [[ $timeout_result -eq 124 ]]; then
return 2
fi
output=$(check_filtered_apps "$app_name")

return 1
# Check if the output contains the desired namespace and "cnpg" or "operator"
case $output in
"${app_name},stopAll-on" | "${app_name},stopAll-off")
timeout "${timeout}s" cli -c "app chart_release update chart_release=\"$app_name\" values={\"global\": {\"stopAll\": true}}" > /dev/null
handle_timeout $?
;;
"${app_name},operator")
return 3
;;
*)
timeout "${timeout}s" cli -c 'app chart_release scale release_name='\""$app_name"\"\ 'scale_options={"replica_count": 0}' > /dev/null
handle_timeout $?
;;
esac
}

0 comments on commit 1e13073

Please sign in to comment.