Multicluster #8427
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Multicluster | |
# Any change in triggers needs to be reflected in the concurrency group. | |
on: | |
### FOR TESTING PURPOSES | |
# This workflow runs in the context of `main`, and ignores changes to | |
# workflow files in PRs. For testing changes to this workflow from a PR: | |
# - Make sure the PR uses a branch from the base repository (requires write | |
# privileges). It will not work with a branch from a fork (missing secrets). | |
# - Uncomment the `pull_request` event below, commit separately with a `DO | |
# NOT MERGE` message, and push to the PR. As long as the commit is present, | |
# any push to the PR will trigger this workflow. | |
# - Don't forget to remove the `DO NOT MERGE` commit once satisfied. The run | |
# will disappear from the PR checks: please provide a direct link to the | |
# successful workflow run (can be found from Actions tab) in a comment. | |
# | |
# pull_request: {} | |
### | |
pull_request_target: {} | |
# Run every 6 hours | |
schedule: | |
- cron: '0 3/6 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || 'scheduled' }} | |
cancel-in-progress: true | |
env: | |
zone: us-west2-a | |
# renovate: datasource=github-releases depName=cilium/cilium | |
cilium_version: v1.14.5 | |
kubectl_version: v1.23.6 | |
USE_GKE_GCLOUD_AUTH_PLUGIN: True | |
jobs: | |
installation-and-connectivity: | |
name: Multicluster Installation and Connectivity Test | |
if: ${{ github.repository == 'cilium/cilium-cli' }} | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 45 | |
strategy: | |
fail-fast: false | |
steps: | |
- name: Checkout | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
# Note: These names currently approach the limit of 40 characters | |
- name: Set mode-specific names | |
run: | | |
echo "clusterNameBase=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}" >> $GITHUB_ENV | |
echo "clusterName1=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-1" >> $GITHUB_ENV | |
echo "clusterName2=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-2" >> $GITHUB_ENV | |
echo "firewallRuleName=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}" >> $GITHUB_ENV | |
- name: Install kubectl | |
run: | | |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl" | |
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256" | |
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check | |
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl | |
kubectl version --client | |
- name: Set up gcloud credentials | |
id: 'auth' | |
uses: google-github-actions/auth@67e9c72af6e0492df856527b474995862b7b6591 # v2.0.0 | |
with: | |
credentials_json: '${{ secrets.GCP_PR_SA_KEY }}' | |
- name: Set up gcloud CLI | |
uses: google-github-actions/setup-gcloud@5a5f7b85fca43e76e53463acaa9d408a03c98d3a # v2.0.1 | |
with: | |
project_id: ${{ secrets.GCP_PR_PROJECT_ID }} | |
version: "405.0.0" | |
- name: Install gke-gcloud-auth-plugin | |
run: | | |
gcloud components install gke-gcloud-auth-plugin | |
- name: Display gcloud CLI info | |
run: | | |
gcloud info | |
- name: Set up Go | |
uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 | |
with: | |
# renovate: datasource=golang-version depName=go | |
go-version: 1.21.5 | |
- name: Set up job variables | |
id: vars | |
run: | | |
if [ ${{ github.event.issue.pull_request || github.event.pull_request }} ]; then | |
PR_API_JSON=$(curl \ | |
-H "Accept: application/vnd.github.v3+json" \ | |
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ | |
${{ github.event.issue.pull_request.url || github.event.pull_request.url }}) | |
SHA=$(echo "$PR_API_JSON" | jq -r ".head.sha") | |
OWNER=$(echo "$PR_API_JSON" | jq -r ".number") | |
else | |
SHA=${{ github.sha }} | |
OWNER=${{ github.sha }} | |
fi | |
echo "sha=${SHA}" >> $GITHUB_OUTPUT | |
echo "owner=${OWNER}" >> $GITHUB_OUTPUT | |
- name: Create GKE cluster 2 | |
run: | | |
gcloud container clusters create ${{ env.clusterName2 }} \ | |
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \ | |
--zone ${{ env.zone }} \ | |
--enable-ip-alias \ | |
--create-subnetwork="range=/26" \ | |
--cluster-ipv4-cidr="/21" \ | |
--services-ipv4-cidr="/24" \ | |
--image-type COS_CONTAINERD \ | |
--num-nodes 2 \ | |
--machine-type e2-custom-2-4096 \ | |
--disk-type pd-standard \ | |
--disk-size 10GB \ | |
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \ | |
--preemptible \ | |
--async | |
- name: Create GKE cluster 1 | |
run: | | |
gcloud container clusters create ${{ env.clusterName1 }} \ | |
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \ | |
--zone ${{ env.zone }} \ | |
--enable-ip-alias \ | |
--create-subnetwork="range=/26" \ | |
--cluster-ipv4-cidr="/21" \ | |
--services-ipv4-cidr="/24" \ | |
--image-type COS_CONTAINERD \ | |
--num-nodes 2 \ | |
--machine-type e2-custom-2-4096 \ | |
--disk-type pd-standard \ | |
--disk-size 10GB \ | |
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \ | |
--preemptible \ | |
--async | |
- name: Wait for clusters to be provisioned | |
run: | | |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterNameBase }}" --format="value(name)")" ];do | |
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 10 | |
done | |
- name: Get cluster 2 credentials | |
run: | | |
gcloud container clusters get-credentials ${{ env.clusterName2 }} --zone ${{ env.zone }} | |
- name: Create gcloud-free kubeconfig for cluster 2 | |
run: | | |
.github/get-kubeconfig.sh | |
mv kubeconfig kubeconfig-cluster2 | |
- name: Get cluster 1 credentials | |
run: | | |
gcloud container clusters get-credentials ${{ env.clusterName1 }} --zone ${{ env.zone }} | |
- name: Allow cross-cluster traffic | |
run: | | |
TAG1=$(gcloud compute firewall-rules list --filter="name~^gke-${{ env.clusterName1 }}-[0-9a-z]*-all$" --format="value(name)") | |
TAG2=$(gcloud compute firewall-rules list --filter="name~^gke-${{ env.clusterName2 }}-[0-9a-z]*-all$" --format="value(name)") | |
gcloud compute firewall-rules describe $TAG1 | |
gcloud compute firewall-rules describe $TAG2 | |
gcloud compute firewall-rules create ${{ env.firewallRuleName }} --allow tcp,udp,icmp,sctp,esp,ah --priority=999 --source-ranges=10.0.0.0/9 --target-tags=${TAG1/-all/-node},${TAG2/-all/-node} | |
gcloud compute firewall-rules describe ${{ env.firewallRuleName }} | |
- name: Create gcloud-free kubeconfig for cluster 1, merge kubeconfigs and put them in configmap | |
run: | | |
.github/get-kubeconfig.sh | |
mv kubeconfig kubeconfig-cluster1 | |
go run .github/tools/kubeconfig-merger/main.go kubeconfig-cluster1 kubeconfig-cluster2 kubeconfig | |
kubectl create configmap cilium-cli-kubeconfig -n kube-system --from-file kubeconfig | |
- name: Load cilium test script in configmap | |
run: | | |
kubectl create configmap cilium-cli-test-script -n kube-system --from-file=in-cluster-test-script.sh=.github/in-cluster-test-scripts/multicluster.sh | |
- name: Create cilium-cli test job | |
run: | | |
helm install .github/cilium-cli-test-job-chart \ | |
--generate-name \ | |
--set tag=${{ steps.vars.outputs.sha }} \ | |
--set cilium_version=${{ env.cilium_version }} \ | |
--set job_name=cilium-cli \ | |
--set test_script_cm=cilium-cli-test-script \ | |
--set cluster_name_1=${{ env.clusterName1 }} \ | |
--set cluster_name_2=${{ env.clusterName2 }} | |
- name: Wait for test job | |
env: | |
timeout: 30m | |
run: | | |
# Background wait for job to complete or timeout | |
kubectl -n kube-system wait job/cilium-cli --for=condition=complete --timeout=${{ env.timeout }} & | |
complete_pid=$! | |
# Background wait for job to fail | |
(kubectl -n kube-system wait job/cilium-cli --for=condition=failed --timeout=${{ env.timeout }} && exit 1) & | |
failed_pid=$! | |
# Active wait for whichever background process ends first | |
wait -n $complete_pid $failed_pid | |
EXIT_CODE=$? | |
# Retrieve job logs | |
kubectl logs --timestamps -n kube-system job/cilium-cli | |
exit ${EXIT_CODE} | |
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently | |
- name: Post-test information gathering | |
if: ${{ !success() }} | |
run: | | |
echo "=== Install latest stable CLI ===" | |
curl -sSL --remote-name-all https://github.com/cilium/cilium-cli/releases/latest/download/cilium-linux-amd64.tar.gz{,.sha256sum} | |
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum | |
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/bin | |
rm cilium-linux-amd64.tar.gz{,.sha256sum} | |
cilium version | |
echo "=== Retrieve cluster1 state ===" | |
export KUBECONFIG=kubeconfig-cluster1 | |
kubectl get pods --all-namespaces -o wide | |
cilium status | |
cilium clustermesh status | |
cilium sysdump --output-filename cilium-sysdump-cluster1 | |
echo "=== Retrieve cluster2 state ===" | |
export KUBECONFIG=kubeconfig-cluster2 | |
kubectl get pods --all-namespaces -o wide | |
cilium status | |
cilium clustermesh status | |
cilium sysdump --output-filename cilium-sysdump-cluster2 | |
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently | |
- name: Clean up GKE | |
if: ${{ always() }} | |
run: | | |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterNameBase }}" --format="value(name)")" ];do | |
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15 | |
done | |
gcloud container clusters delete ${{ env.clusterName1 }} --zone ${{ env.zone }} --quiet --async | |
gcloud container clusters delete ${{ env.clusterName2 }} --zone ${{ env.zone }} --quiet --async | |
gcloud compute firewall-rules delete ${{ env.firewallRuleName }} --quiet | |
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently | |
- name: Upload artifacts | |
if: ${{ !success() }} | |
uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 | |
with: | |
name: cilium-sysdump-out.zip | |
path: | | |
cilium-sysdump-cluster1.zip | |
cilium-sysdump-cluster2.zip | |
retention-days: 5 |