Skip to content

Commit

Permalink
MGMT-18628: fix better waiting for deployment
Browse files Browse the repository at this point in the history
  • Loading branch information
eifrach committed Oct 31, 2024
1 parent ce2eb1d commit a391da2
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 46 deletions.
6 changes: 3 additions & 3 deletions deploy/operator/capi/deploy_capi_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ for manifest in $(find ${__dir}/generated -type f); do
tee < "${manifest}" >(oc apply -f -)
done

wait_for_condition "infraenv/${ASSISTED_INFRAENV_NAME}" "ImageCreated" "5m" "${SPOKE_NAMESPACE}"
wait_for_condition "infraenv/${ASSISTED_INFRAENV_NAME}" "condition=ImageCreated" "5m" "${SPOKE_NAMESPACE}"

echo "Waiting until at least ${SPOKE_CONTROLPLANE_AGENTS} agents are available..."

Expand Down Expand Up @@ -233,8 +233,8 @@ hypershift_cli hypershift create cluster agent --name $ASSISTED_CLUSTER_NAME --b
# Wait for a hypershift hostedcontrolplane to report ready status
wait_for_resource "hostedcontrolplane/${ASSISTED_CLUSTER_NAME}" "${SPOKE_NAMESPACE}-${ASSISTED_CLUSTER_NAME}"
wait_for_boolean_field "hostedcontrolplane/${ASSISTED_CLUSTER_NAME}" status.ready "${SPOKE_NAMESPACE}-${ASSISTED_CLUSTER_NAME}"
wait_for_condition "nodepool/$ASSISTED_CLUSTER_NAME" "Ready" "10m" "$SPOKE_NAMESPACE"
wait_for_condition "hostedcluster/$ASSISTED_CLUSTER_NAME" "Available" "10m" "$SPOKE_NAMESPACE"
wait_for_condition "nodepool/$ASSISTED_CLUSTER_NAME" "condition=Ready" "10m" "$SPOKE_NAMESPACE"
wait_for_condition "hostedcluster/$ASSISTED_CLUSTER_NAME" "condition=Available" "10m" "$SPOKE_NAMESPACE"

# Scale up
echo "Scaling the hosted cluster up to contain ${SPOKE_CONTROLPLANE_AGENTS} worker nodes"
Expand Down
12 changes: 6 additions & 6 deletions deploy/operator/hypershift/deploy_hypershift_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ oc get hostedcluster ${ASSISTED_CLUSTER_NAME} -n ${HYPERSHIFT_AGENT_NS} || \

echo "Wait for a running hypershift cluster with no worker nodes"
wait_for_pods "$SPOKE_NAMESPACE"
wait_for_condition "nodepool/$ASSISTED_CLUSTER_NAME" "Ready" "10m" "$HYPERSHIFT_AGENT_NS"
wait_for_condition "hostedcluster/$ASSISTED_CLUSTER_NAME" "Available" "10m" "$HYPERSHIFT_AGENT_NS"
wait_for_condition "nodepool/$ASSISTED_CLUSTER_NAME" "condition=Ready" "10m" "$HYPERSHIFT_AGENT_NS"
wait_for_condition "hostedcluster/$ASSISTED_CLUSTER_NAME" "condition=Available" "10m" "$HYPERSHIFT_AGENT_NS"

echo "Extract spoke kubeconfig"
oc extract -n $HYPERSHIFT_AGENT_NS secret/$ASSISTED_CLUSTER_NAME-admin-kubeconfig --to=- > /tmp/$ASSISTED_CLUSTER_NAME-kubeconfig
Expand All @@ -79,7 +79,7 @@ oc --kubeconfig $SPOKE_KUBECONFIG apply -f ${__root}/hack/crds
echo "Apply HypershiftAgentServiceConfig on hub"
ansible-playbook "${playbooks_dir}/hasc-playbook.yaml"
oc apply -f ${playbooks_dir}/generated/hasc.yaml -n $SPOKE_NAMESPACE
wait_for_condition "hypershiftagentserviceconfigs/hypershift-agent" "DeploymentsHealthy" "20m" "$SPOKE_NAMESPACE"
wait_for_condition "hypershiftagentserviceconfigs/hypershift-agent" "condition=DeploymentsHealthy" "20m" "$SPOKE_NAMESPACE"

echo "Create assisted secrets"
oc --kubeconfig $SPOKE_KUBECONFIG get secret "${ASSISTED_PULLSECRET_NAME}" -n "${SPOKE_NAMESPACE}" || \
Expand All @@ -95,7 +95,7 @@ oc --kubeconfig $SPOKE_KUBECONFIG apply -f ${playbooks_dir}/generated/agentClust
oc --kubeconfig $SPOKE_KUBECONFIG apply -f ${playbooks_dir}/generated/infraEnv.yaml -n $SPOKE_NAMESPACE

echo "Wait for InfraEnv ImageCreated"
KUBECONFIG=$SPOKE_KUBECONFIG wait_for_condition "infraenv/${ASSISTED_INFRAENV_NAME}" "ImageCreated" "5m" "${SPOKE_NAMESPACE}"
KUBECONFIG=$SPOKE_KUBECONFIG wait_for_condition "infraenv/${ASSISTED_INFRAENV_NAME}" "condition=ImageCreated" "5m" "${SPOKE_NAMESPACE}"
export ISO_DOWNLOAD_URL=$(oc get --kubeconfig $SPOKE_KUBECONFIG -n $SPOKE_NAMESPACE infraenv $ASSISTED_INFRAENV_NAME -o jsonpath='{.status.isoDownloadURL}')

echo "Apply BareMetalHost on hub"
Expand All @@ -113,10 +113,10 @@ oc --kubeconfig $SPOKE_KUBECONFIG -n $SPOKE_NAMESPACE patch agent $agent_name -p

echo "Waiting until cluster is installed"

KUBECONFIG=$SPOKE_KUBECONFIG wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "Stopped" "90m" "${SPOKE_NAMESPACE}"
KUBECONFIG=$SPOKE_KUBECONFIG wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "condition=Stopped" "90m" "${SPOKE_NAMESPACE}"
echo "Cluster installation has been stopped (either for good or bad reasons)"

KUBECONFIG=$SPOKE_KUBECONFIG wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "Completed" "90m" "${SPOKE_NAMESPACE}"
KUBECONFIG=$SPOKE_KUBECONFIG wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "condition=Completed" "90m" "${SPOKE_NAMESPACE}"
echo "Cluster has been installed successfully!"


Expand Down
2 changes: 1 addition & 1 deletion deploy/operator/setup_assisted_operator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ EOCR
oc patch -n ${ASSISTED_NAMESPACE} agentserviceconfig agent --type merge -p '{"spec":{"osImages":'"${OS_IMAGES_CAMELCASE}"'}}'

wait_for_operator "assisted-service-operator" "${ASSISTED_NAMESPACE}"
wait_for_condition "agentserviceconfigs/agent" "ReconcileCompleted" "5m"
wait_for_condition "agentserviceconfigs/agent" "condition=ReconcileCompleted" "5m"
wait_for_deployment "assisted-service" "${ASSISTED_NAMESPACE}" "5m"
wait_for_pod "assisted-image-service" "${ASSISTED_NAMESPACE}" "app=assisted-image-service"

Expand Down
2 changes: 1 addition & 1 deletion deploy/operator/setup_hive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ spec:
featureSet: Custom
EOF

wait_for_condition "hiveconfig.hive.openshift.io/hive" "Ready" "10m"
wait_for_condition "hiveconfig.hive.openshift.io/hive" "condition=Ready" "10m"
}

if [ -z "$@" ] || ! declare -F "$@"; then
Expand Down
83 changes: 51 additions & 32 deletions deploy/operator/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ function wait_for_crd() {
crd="$1"
namespace="${2:-}"

wait_for_condition "crd/${crd}" "Established" "60s" "${namespace}"
wait_for_condition "crd/${crd}" "condition=Established" "60s" "${namespace}"
}

function remote_agents() {
Expand All @@ -26,41 +26,40 @@ function installed_remote_agents() {
export -f installed_remote_agents

function wait_for_operator() {
subscription="$1"
namespace="${2:-}"
echo "Waiting for operator ${subscription} to get installed on namespace ${namespace}..."

for _ in $(seq 1 60); do
csv=$(oc -n "${namespace}" get subscription "${subscription}" -o jsonpath='{.status.installedCSV}' || true)
if [[ -n "${csv}" ]]; then
if [[ "$(oc -n "${namespace}" get csv "${csv}" -o jsonpath='{.status.phase}')" == "Succeeded" ]]; then
echo "ClusterServiceVersion (${csv}) is ready"
return 0
fi
fi

sleep 10
done

echo "Timed out waiting for csv to become ready!"
return 1
}
subscription="$1"
namespace="${2:-}"

wait_for_condition "subscriptions.operators.coreos.com/${subscription}" jsonpath='{..status.state}'=AtLatestKnown "30s" "${namespace}"

csv=$(oc get subscriptions.operators.coreos.com/${subscription} --namespace=${namespace} -o jsonpath='{..status.installedCSV}')
echo "Waiting for CSV ${csv} installation"
if ! [[ $(oc wait "clusterserviceversions.operators.coreos.com/${csv}" --namespace=${namespace} --for=jsonpath='{.status.phase}'="Succeeded" --timeout=30s) ]]
then
echo "ERROR: CSV installation has failed"
oc get "clusterserviceversions.operators.coreos.com/${csv}" --namespace=${namespace} -o json
exit 1
fi
}

function wait_for_pod() {
pod="$1"
namespace="${2:-}"
selector="${3:-}"

wait_for_condition "pod" "Ready" "30m" "${namespace}" "${selector}"
wait_for_condition "pod" "condition=Ready" "30m" "${namespace}" "${selector}"
}

function wait_for_pods(){
while [[ $(oc get pods -n $1 -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}'| tr ' ' '\n' | sort -u) != "True" ]]; do
echo "Waiting for pods in namespace $1 to be ready"
oc get pods -n $1 -o 'jsonpath={..status.containerStatuses}' | jq "."
sleep 5;
done
echo "Pods in namespace $1 are ready"
namespace=$1

if [[ $(oc wait --namespace "${namespace}" --all --for=condition=Ready pod --timeout 1m) ]]; then
echo "All Pods in namespace ${namespace} are ready"}
else
echo "ERROR: Failed waiting for pods"
# debug output
oc get pods --namespace "${namespace}"
exit 1
fi
}

function wait_for_deployment() {
Expand All @@ -70,11 +69,22 @@ function wait_for_deployment() {

echo "Waiting for (deployment) on namespace (${namespace}) with name (${deployment}) to be created..."
for i in {1..40}; do
oc get deployment "${deployment}" --namespace="${namespace}" |& grep -ivE "(no resources found|not found)" && break || sleep 10
oc get deployments.apps "${deployment}" --namespace="${namespace}" |& grep -ivE "(no resources found|not found)" && break || sleep 10
done
if [ $i -eq 40 ]; then
echo "ERROR: failed Waiting for (deployment) on namespace (${namespace}) with name (${deployment}) to be created..."
exit 1
fi

echo "Waiting for (deployment) on namespace (${namespace}) with name (${deployment}) to rollout..."
oc rollout status "deploy/${deployment}" -n "${namespace}" --timeout="${timeout}"
REPLICAS=$(oc get deployments.apps --namespace="${namespace}" "${deployment}" -o jsonpath='{..status.replicas}')
if ! [[ $(oc --namespace="${namespace}" wait --for=jsonpath='{..status.availableReplicas}'="${REPLICAS}" --timeout=5m "deployments.apps/${deployment}") ]];
then
echo "ERROR: Deployment failed"
oc get --namespace="${namespace}" "deployments.apps/${deployment}" -o json
exit 1
fi

}

function hash() {
Expand All @@ -97,13 +107,22 @@ function wait_for_condition() {
namespace="${4:-}"
selector="${5:-}"

counter=1
echo "Waiting for (${object}) on namespace (${namespace}) with labels (${selector}) to be created..."
for i in {1..40}; do
oc get ${object} --selector="${selector}" --namespace=${namespace} |& grep -ivE "(no resources found|not found)" && break || sleep 10
until [[ $(oc get ${object} --selector="${selector}" --namespace="${namespace}" 2> /dev/null ) ]]
do
if [[ "${counter}" -eq 30 ]];
then
echo "ERROR: failed Waiting for (${object}) on namespace (${namespace}) with labels (${selector}) to become (${condition})..."
oc get ${object} --selector="${selector}" --namespace="${namespace}" -o json
exit 1
break
fi
((counter++)) && sleep 2
done

echo "Waiting for (${object}) on namespace (${namespace}) with labels (${selector}) to become (${condition})..."
oc wait -n "${namespace}" --for=condition=${condition} --selector "${selector}" ${object} --timeout=${timeout}
oc wait -n "${namespace}" --all --for=${condition} ${object} --timeout=${timeout} --selector "${selector}"
}

function wait_for_object_amount() {
Expand Down
6 changes: 3 additions & 3 deletions deploy/operator/ztp/deploy_spoke_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ for manifest in $(find ${__dir}/generated -type f); do
oc apply -f "${manifest}"
done
wait_for_condition "infraenv/${ASSISTED_INFRAENV_NAME}" "ImageCreated" "5m" "${SPOKE_NAMESPACE}"
wait_for_condition "infraenv/${ASSISTED_INFRAENV_NAME}" "condition=ImageCreated" "5m" "${SPOKE_NAMESPACE}"
echo "Waiting until at least ${SPOKE_CONTROLPLANE_AGENTS} agents are available..."
Expand Down Expand Up @@ -130,10 +130,10 @@ if [ ${SPOKE_CONTROLPLANE_AGENTS} -ne 1 ] && [ "${USER_MANAGED_NETWORKING}" == "
fi
fi
wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "Stopped" "90m" "${SPOKE_NAMESPACE}"
wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "condition=Stopped" "90m" "${SPOKE_NAMESPACE}"
echo "Cluster installation has been stopped (either for good or bad reasons)"
wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "Completed" "1m" "${SPOKE_NAMESPACE}"
wait_for_condition "agentclusterinstall/${ASSISTED_AGENT_CLUSTER_INSTALL_NAME}" "condition=Completed" "1m" "${SPOKE_NAMESPACE}"
echo "Cluster has been installed successfully!"
wait_for_boolean_field "clusterdeployment/${ASSISTED_CLUSTER_DEPLOYMENT_NAME}" spec.installed "${SPOKE_NAMESPACE}"
Expand Down

0 comments on commit a391da2

Please sign in to comment.