From 6a7106e69ccc03c0377d562e78a5d3aa09f4e5bc Mon Sep 17 00:00:00 2001 From: Michal Orzechowski Date: Sun, 23 Jul 2017 16:22:32 +0200 Subject: [PATCH] long awaited commit of wait_for script --- Dockerfile | 26 +++++ Makefile | 16 ++++ README.md | 270 +++++++++++++++++++++++++++++++++++++++++++++++++++- wait_for.sh | 215 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 Dockerfile create mode 100644 Makefile create mode 100755 wait_for.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..83907bd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM alpine + +MAINTAINER Michal Orzechowski + +ARG VCS_REF +ARG BUILD_DATE + +# Metadata +LABEL org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url="https://github.com/groundnuty/k8s-wait-for" \ + org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.docker.dockerfile="/Dockerfile" + +ENV KUBE_LATEST_VERSION="v1.6.4" + +RUN apk add --update ca-certificates \ + && apk add --update -t deps curl\ + && curl -L https://storage.googleapis.com/kubernetes-release/release/${KUBE_LATEST_VERSION}/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl \ + && chmod +x /usr/local/bin/kubectl \ + && apk del --purge deps \ + && apk add --update jq \ + && rm /var/cache/apk/* + +ADD wait_for.sh /usr/local/bin/wait_for.sh + +ENTRYPOINT ["wait_for.sh"] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..797f50e --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +TAG = $(shell git describe --tags --always) +PREFIX = groundnuty + +all: push + +container: image + +image: + docker build -t $(PREFIX)/k8s-wait-for . # Build new image and automatically tag it as latest + docker tag $(PREFIX)/k8s-wait-for $(PREFIX)/k8s-wait-for:$(TAG) # Add the version tag to the latest image + +push: image + docker push $(PREFIX)/k8s-wait-for # Push image tagged as latest to repository + docker push $(PREFIX)/k8s-wait-for:$(TAG) # Push version tagged image to repository (since this image is already pushed it will simply create or update version tag) + +clean: \ No newline at end of file diff --git a/README.md b/README.md index c3efb43..be2024e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,270 @@ # k8s-wait-for -A simple script that allows to wait for a k8s service, job or pods to enter desired state + +A simple script that allows to wait for a k8s service, job or pods to enter desired state. + +## Using + +Please consult `wait_for.sh -h` for detailed documentation. + +## Example + +A complex Kubernetes deployment manifest (generated by [helm](https://github.com/kubernetes/helm)), which uses old json syntax for init container declaration. This deployment wait for one job to finish and 2 pods to enter ready state. + +~~~~ +--- +# Source: cross-support-job-3p/charts/onedata-3p/charts/oneprovider-krakow/templates/deployment.yaml +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: develop-oneprovider-krakow + labels: + app: develop-oneprovider-krakow + chart: oneprovider-krakow + release: develop + heritage: Tiller + annotations: + version: "0.2.7" +spec: + replicas: + template: + metadata: + labels: + app: develop-oneprovider-krakow + chart: "oneprovider-krakow" + release: "develop" + heritage: "Tiller" + annotations: + version: "0.2.7" + annotations: + pod.beta.kubernetes.io/init-containers: '[ + { + "name": "wait-for-volume-s3-init", + "image": "groundnuty/k8s-wait-for:0.1", + "imagePullPolicy": "Always", + "imagePullSecrets": [], + "args": [ + "job", "develop-volume-s3-krakow-init" + ] + }, + { + "name": "wait-for-volume-ceph", + "image": "groundnuty/k8s-wait-for:0.1", + "imagePullPolicy": "Always", + "imagePullSecrets": [], + "args": [ + "pod", "-lapp=develop-volume-ceph-krakow" + ] + }, + { + "name": "wait-for-volume-gluster", + "image": "groundnuty/k8s-wait-for:0.1", + "imagePullPolicy": "Always", + "imagePullSecrets": [], + "args": [ + "pod", "-lapp=develop-volume-gluster-krakow" + ] + }]' + spec: + hostname: node1 + subdomain: develop-oneprovider-krakow + imagePullSecrets: [] + volumes: + - name: nfs + persistentVolumeClaim: + claimName: develop-oneprovider-krakow-nfs-pvc + containers: + - name: oneprovider-krakow + image: docker.onedata.org/oneprovider:ID-75d8645bec + imagePullPolicy: Always + resources: + requests: + cpu: 2 + memory: 4Gi + ports: + - containerPort: 53 + - containerPort: 80 + - containerPort: 443 + - containerPort: 5555 + - containerPort: 5556 + - containerPort: 6665 + - containerPort: 6666 + - containerPort: 7443 + - containerPort: 8443 + - containerPort: 8876 + - containerPort: 8877 + - containerPort: 9443 + lifecycle: + preStop: + exec: + command: + - "sh" + - "-c" + - > + op_panel stop ; + op_worker stop ; + cluster_manager stop ; + /etc/init.d/couchbase-server stop ; + pkill -f oneprovider.py ; + readinessProbe: + exec: + command: + - "/bin/bash" + - "-c" + - > + service=provider ; + user="$(curl -k -u a:b -sS --tlsv1.2 -X GET 'https://localhost:9443/api/v3/onepanel/users/user')"; + if [[ "$user" == "" ]] ; then exit 1; else exit 0; fi ; + env: + - name: ONEPANEL_LOG_LEVEL + value: "info" + - name: ONEPANEL_BATCH_MODE + value: "true" + - name: ONEPROVIDER_CONFIG + value: | + cluster: + domainName: "develop-oneprovider-krakow.develop.svc.dev.onedata.uk.to" + nodes: + n1: + hostname: node1 + managers: + mainNode: n1 + nodes: + - n1 + workers: + nodes: + - n1 + databases: + nodes: + - n1 + storages: + posix: + type: posix + mountPoint: /volumes/storage + nfs: + type: posix + mountPoint: /volumes/nfs + s3: + type: s3 + hostname: develop-volume-s3-krakow:8000 + bucketName: test + accessKey: accessKey + secretKey: verySecretKey + insecure: true + ceph: + type: ceph + username: client.k8s + key: A + monitorHostname: develop-volume-ceph-krakow + clusterName: ceph + poolName: test + gluster: + type: glusterfs + hostname: develop-volume-gluster-krakow + volume: test + transport: tcp + oneprovider: + register: true + name: develop-oneprovider-krakow + redirectionPoint: https://develop-oneprovider-krakow.develop.svc.dev.onedata.uk.to + geoLatitude: 50.0647 + geoLongitude: 19.945 + # TODO: make it possible for onedata services to communicate using + # system configured DNS. this will allow to put here just service name + # instead of FQDN + onezone: + domainName: develop-onezone.develop.svc.dev.onedata.uk.to + onepanel: + users: + admin: + password: a + userRole: a + user: + password: a + userRole: a + volumeMounts: + - mountPath: /volumes/nfs + name: nfs +~~~~ + +## Complex deployment use case + +This container is used extensively in deployments of Onedata system [onedata/charts](https://github.com/onedata/charts) for the the purpose of specifying dependencies. It leverages Kubernetes [init containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/), thus providing: + +- a detailed event log in `kubectl describe `, on what init container is pod hanging at the moment. +- a comprehensive view in `kubectl get pods` output where init containers are shown in a form `Init:/` + +Example output from the deployment run of ~16 pod with dependencies just after deployment: + +~~~bash +NAME READY STATUS RESTARTS AGE +develop-cross-support-job-3p-krk-3-lis-c-b4nv1 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-krk-3-par-c-lis-n-z7x6w 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-krk-3-x9719 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-krk-g-par-3-ztvz0 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-krk-g-v5lf2 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-krk-n-par-3-pnbcm 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-lis-3-cpj3f 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-par-n-8zdt2 0/1 Init:0/1 0 11s +develop-cross-support-job-3p-par-n-lis-c-kqdf0 0/1 Init:0/1 0 11s +develop-oneclient-krakow-2773392814-wc1dv 0/1 Init:0/3 0 11s +develop-oneclient-lisbon-3267879054-2v6cg 0/1 Init:0/3 0 9s +develop-oneclient-paris-2076479302-f6hh9 0/1 Init:0/3 0 9s +develop-onedata-cli-krakow-1801798075-b5wpj 0/1 Init:0/1 0 11s +develop-onedata-cli-lisbon-139116355-fwtjv 0/1 Init:0/1 0 10s +develop-onedata-cli-paris-2662312307-9z9l1 0/1 Init:0/1 0 11s +develop-oneprovider-krakow-3634465102-tftc6 0/1 Pending 0 10s +develop-oneprovider-lisbon-3034775369-8n31x 0/1 Init:0/3 0 8s +develop-oneprovider-paris-3034358951-19mhf 0/1 Init:0/3 0 10s +develop-onezone-304145816-dmxn1 0/1 ContainerCreating 0 11s +develop-volume-ceph-krakow-479580114-mkd1d 0/1 ContainerCreating 0 11s +develop-volume-ceph-lisbon-1249181958-1f0mt 0/1 ContainerCreating 0 9s +develop-volume-ceph-paris-400443052-dc347 0/1 ContainerCreating 0 9s +develop-volume-gluster-krakow-761992225-sj06m 0/1 Running 0 11s +develop-volume-gluster-lisbon-3947152141-jlmvb 0/1 Running 0 8s +develop-volume-gluster-paris-3588749681-9bnw8 0/1 ContainerCreating 0 11s +develop-volume-nfs-krakow-2528947555-6mxzt 1/1 Running 0 10s +develop-volume-nfs-lisbon-3473018547-7nljf 0/1 ContainerCreating 0 11s +develop-volume-nfs-paris-2956540513-4bdzt 0/1 ContainerCreating 0 11s +develop-volume-s3-krakow-23786741-pdxtj 0/1 Running 0 9s +develop-volume-s3-krakow-init-gqmmp 0/1 Init:0/1 0 11s +develop-volume-s3-lisbon-3912793669-d4xh5 0/1 Running 0 10s +develop-volume-s3-lisbon-init-mq9nk 0/1 Init:0/1 0 11s +develop-volume-s3-paris-124394749-qwt18 0/1 Running 0 8s +develop-volume-s3-paris-init-jb4k3 0/1 Init:0/1 0 11s +~~~ + +1 min after, you can see the changes in the *Status* column: + +~~~bash +develop-cross-support-job-3p-krk-3-lis-c-b4nv1 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-krk-3-par-c-lis-n-z7x6w 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-krk-3-x9719 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-krk-g-par-3-ztvz0 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-krk-g-v5lf2 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-krk-n-par-3-pnbcm 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-lis-3-cpj3f 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-par-n-8zdt2 0/1 Init:0/1 0 1m +develop-cross-support-job-3p-par-n-lis-c-kqdf0 0/1 Init:0/1 0 1m +develop-oneclient-krakow-2773392814-wc1dv 0/1 Init:0/3 0 1m +develop-oneclient-lisbon-3267879054-2v6cg 0/1 Init:0/3 0 58s +develop-oneclient-paris-2076479302-f6hh9 0/1 Init:0/3 0 58s +develop-onedata-cli-krakow-1801798075-b5wpj 0/1 Init:0/1 0 1m +develop-onedata-cli-lisbon-139116355-fwtjv 0/1 Init:0/1 0 59s +develop-onedata-cli-paris-2662312307-9z9l1 0/1 Init:0/1 0 1m +develop-oneprovider-krakow-3634465102-tftc6 0/1 Init:1/3 0 59s +develop-oneprovider-lisbon-3034775369-8n31x 0/1 Init:2/3 0 57s +develop-oneprovider-paris-3034358951-19mhf 0/1 PodInitializing 0 59s +develop-onezone-304145816-dmxn1 0/1 Running 0 1m +develop-volume-ceph-krakow-479580114-mkd1d 1/1 Running 0 1m +develop-volume-ceph-lisbon-1249181958-1f0mt 1/1 Running 0 58s +develop-volume-ceph-paris-400443052-dc347 1/1 Running 0 58s +develop-volume-gluster-krakow-761992225-sj06m 1/1 Running 0 1m +develop-volume-gluster-lisbon-3947152141-jlmvb 1/1 Running 0 57s +develop-volume-gluster-paris-3588749681-9bnw8 1/1 Running 0 1m +develop-volume-nfs-krakow-2528947555-6mxzt 1/1 Running 0 59s +develop-volume-nfs-lisbon-3473018547-7nljf 1/1 Running 0 1m +develop-volume-nfs-paris-2956540513-4bdzt 1/1 Running 0 1m +develop-volume-s3-krakow-23786741-pdxtj 1/1 Running 0 58s +develop-volume-s3-lisbon-3912793669-d4xh5 1/1 Running 0 59s +develop-volume-s3-paris-124394749-qwt18 1/1 Running 0 57s +~~~ diff --git a/wait_for.sh b/wait_for.sh new file mode 100755 index 0000000..d5a13c8 --- /dev/null +++ b/wait_for.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env sh + +# This script is aimed to be POSIX-compliant and style consistent with help of these tools: +# - https://github.com/koalaman/shellcheck +# - https://github.com/openstack-dev/bashate + +trap "exit 1" TERM +TOP_PID=$$ + +KUBECTL_ARGS="" +WIAT_TIME=2 # seconds +DEBUG=0 + +usage() { +cat < | -l] +${0##*/} pod [ | -l] +${0##*/} service [ | -l] + +Examples: +Wait for all pods with with a following label to enter 'Ready' state: +${0##*/} pod -lapp=develop-volume-gluster-krakow + +Wait for all the pods in that job to have a 'Succeeded' state: +${0##*/} job develop-volume-s3-krakow-init + +Wait for all the pods in that job to have a 'Succeeded' state: +${0##*/} job develop-volume-s3-krakow-init + +Wait for all selected pods to enter the 'Ready' state: +${0##*/} pod -l"release in (develop), chart notin (cross-support-job-3p)" + +Wait for all selected pods to enter the 'Ready' state: +${0##*/} pod -l"release in (develop), chart notin (cross-support-job-3p)" +EOF +exit 1 +} + +# Job or set of pods is considered ready if all of the are ready +# example output with 3 pods, where 2 are not ready would be: "false false" +get_pod_state() { + get_pod_state_name="$1" + get_pod_state_flags="$2" + get_pod_state_output1=$(kubectl get pods "$get_pod_state_name" $get_pod_state_flags $KUBECTL_ARGS -o go-template='{{- if .items -}} + {{- if gt (len .items) 0}} + {{- range .items -}} + {{- range .status.conditions -}} + {{- if and (eq .type "Ready") (eq .status "False") -}} + {{ .status }} + {{- end -}} + {{- end -}} + {{- end -}} + {{- else -}} + {{- range .status.conditions -}} + {{- if and (eq .type "Ready") (eq .status "False") -}} + {{ .status }} + {{- end -}} + {{- end -}} + {{- end -}} +{{- else -}} + {{- printf "No resources found.\n" -}} +{{- end -}}' 2>&1) + if [ $? -ne 0 ]; then + if expr match "$get_pod_state_output1" '\(.*not found$\)' 1>/dev/null ; then + echo "No pods found, waiting for them to be created..." >&2 + echo "$get_pod_state_output1" >&2 + else + echo "$get_pod_state_output1" >&2 + kill -s TERM $TOP_PID + fi + elif [ $DEBUG -ge 2 ]; then + echo "$get_pod_state_output1" >&2 + fi + get_pod_state_output2=$(printf "%s" "$get_pod_state_output1" | xargs ) + if [ $DEBUG -ge 1 ]; then + echo "$get_pod_state_output2" >&2 + fi + echo "$get_pod_state_output2" +} + +# Service or set of service is considered ready if all of the pods matched my service selector are considered ready +# example output with 2 services each matching a single pod would be: "falsefalse" +get_service_state() { + get_service_state_name="$1" + get_service_state_selectors=$(kubectl get service "$get_service_state_name" $KUBECTL_ARGS -ojson 2>&1 | jq -cr 'if . | has("items") then .items[] else . end | [ .spec.selector | to_entries[] | "-l\(.key)=\(.value)" ] | join(",") ') + get_service_state_states="" + for get_service_state_selector in $get_service_state_selectors ; do + get_service_state_selector=$(echo "$get_service_state_selector" | tr ',' ' ') + get_service_state_state=$(get_pod_state "" "$get_service_state_selector") + get_service_state_states="${get_service_state_states}${get_service_state_state}" ; + done + echo "$get_service_state_states" +} + +# Job or set of jobs is considered ready if all of them succeeded at least once +# example output with 2 still running jobs would be "0 0" +# this function considers the line: +# Pods Statuses: 0 Running / 1 Succeeded / 0 Failed +# in a 'kubectl describe' job output. +get_job_state() { + get_job_state_name="$1" + get_job_state_output=$(kubectl describe jobs $get_job_state_name $KUBECTL_ARGS 2>&1) + if [ $? -ne 0 ]; then + echo "$get_job_state_output" >&2 + kill -s TERM $TOP_PID + elif [ $DEBUG -ge 2 ]; then + echo "$get_job_state_output" >&2 + fi + if [ "$get_job_state_output" = "" ]; then + echo "wait_for.sh: No jobs found!" >&2 + kill -s TERM $TOP_PID + fi + get_job_state_output1=$(printf "%s" "$get_job_state_output" | sed -nr 's#.*/ (0+) .*/.*#\1#p' 2>&1) + if [ $? -ne 0 ]; then + echo "$get_job_state_output" >&2 + echo "$get_job_state_output1" >&2 + kill -s TERM $TOP_PID + elif [ $DEBUG -ge 2 ]; then + echo "$get_job_state_output1" >&2 + fi + get_job_state_output2=$(printf "%s" "$get_job_state_output1" | xargs ) + if [ $DEBUG -ge 1 ]; then + echo "$get_job_state_output2" >&2 + fi + echo "$get_job_state_output2" +} + +wait_for_service() { + wait_for_service_name="$1" + while [ "$(get_service_state "$wait_for_service_name")" != "" ] ; do + wait_for "service" "$wait_for_service_name" + done + ready "service" "$wait_for_service_name" +} + +wait_for_pod() { + wait_for_pod_name="$1" + while [ "$(get_pod_state "$wait_for_pod_name")" != "" ] ; do + wait_for "pod" "$wait_for_pod_name" + done + ready "pod" "$wait_for_pod_name" +} + +wait_for_job() { + wait_for_job_name="$1" + while [ "$(get_job_state "$wait_for_job_name")" != "" ] ; do + wait_for "job" "$wait_for_job_name" + done + ready "job" "$wait_for_job_name" +} + +wait_for() { + wait_for_resource="$1" + wait_for_name="$2" + echo "Waiting for $wait_for_resource $wait_for_name $KUBECTL_ARGS..." + sleep $WIAT_TIME +} + +ready() { + printf "%s %s %s is ready." "$1" "$2" "$KUBECTL_ARGS" +} + +main() { + if [ $# -eq 0 ]; then + usage + fi + + main_name="" + main_resouce="" + + case $1 in + pod) + main_resouce="pod" + main_name="$2" + shift + shift + ;; + service) + main_resouce="service" + main_name="$2" + shift + shift + ;; + job) + main_resouce="job" + main_name="$2" + shift + shift + ;; + *) + printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2 + exit 1 + ;; + esac + + KUBECTL_ARGS="${*}" + + case $main_resouce in + pod) + wait_for_pod "$main_name" + ;; + job) + wait_for_job "$main_name" + ;; + service) + wait_for_service "$main_name" + ;; + esac + + exit 0 +} + +main "$@"