diff --git a/.github/workflows/application-signals-java-e2e-test.yml b/.github/workflows/application-signals-java-e2e-test.yml deleted file mode 100644 index 8d500428..00000000 --- a/.github/workflows/application-signals-java-e2e-test.yml +++ /dev/null @@ -1,335 +0,0 @@ -# This is a reusable workflow for running the Java E2E test for App Signals. -# It is meant to be called from another workflow. -# This E2E test is responsible for validating setting up a sample application on an EKS cluster and enabling -# App Signals using the staging image of the CloudWatch Agent Operator. It validates the generated telemetry -# including logs, metrics, and traces, then cleans up the cluster. The testing resources can be found in the -# ADOT java instrumentation repo: https://github.com/aws-observability/aws-otel-java-instrumentation/tree/main/testing -# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview -name: App Signals Enablement Java E2E Testing -on: - workflow_dispatch: - workflow_call: - inputs: - # Ensure two tests do not run on the same cluster at the same time through GitHub Action concurrency - test-java-cluster-name: - required: true - type: string - tag: - description: 'Staging Artifact Tag' - required: false - default: 'staging' - type: string - -permissions: - id-token: write - contents: read - -env: - AWS_DEFAULT_REGION: us-east-1 - TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} - SAMPLE_APP_NAMESPACE: sample-app-namespace - SAMPLE_APP_FRONTEND_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }} - SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }} - METRIC_NAMESPACE: ApplicationSignals - LOG_GROUP: /aws/application-signals/data - ECR_OPERATOR_STAGING_REPO: ${{ vars.ECR_OPERATOR_STAGING_REPO }} - APPLICATION_SIGNALS_ADOT_IMAGE: 611364707713.dkr.ecr.us-west-2.amazonaws.com/adot-autoinstrumentation-java-operator-staging:1.33.0-SNAPSHOT-91cbba8 - APPLICATION_SIGNALS_CW_AGENT_IMAGE: 506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-integration-test:eca8174758d95308006632ec4d5533d765db9ca8 - -jobs: - appsignals-java-e2e-test: - runs-on: ubuntu-latest - steps: - # This step avoids code duplication for terraform templates and the validator - # To simplify, we get the entire repo - - name: Get testing resources from aws-application-signals-test-framework - uses: actions/checkout@v4 - with: - repository: aws-observability/aws-application-signals-test-framework - ref: ga-release - - - name: Download enablement script - uses: actions/checkout@v4 - with: - repository: aws-observability/application-signals-demo - ref: main - path: enablement-script - sparse-checkout: | - scripts/eks/appsignals/enable-app-signals.sh - scripts/eks/appsignals/clean-app-signals.sh - sparse-checkout-cone-mode: false - - - name: Generate testing id - run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ env.TEST_ACCOUNT }}:role/${{ secrets.APP_SIGNALS_E2E_TEST_ROLE_NAME }} - aws-region: ${{ env.AWS_DEFAULT_REGION }} - - # local directory to store the kubernetes config - - name: Create kubeconfig directory - run: mkdir -p ${{ github.workspace }}/.kube - - - name: Set KUBECONFIG environment variable - run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV - - - name: Set up kubeconfig - run: aws eks update-kubeconfig --name ${{ inputs.test-java-cluster-name }} --region ${{ env.AWS_DEFAULT_REGION }} - - - name: Install eksctl - run: | - mkdir ${{ github.workspace }}/eksctl - curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" - tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz - echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH - - - name: Create role for AWS access from the sample app - id: create_service_account - run: | - eksctl create iamserviceaccount \ - --name service-account-${{ env.TESTING_ID }} \ - --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-java-cluster-name }} \ - --role-name eks-s3-access-${{ env.TESTING_ID }} \ - --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ - --region ${{ env.AWS_DEFAULT_REGION }} \ - --approve - - - name: Set up terraform - uses: hashicorp/setup-terraform@v3 - with: - terraform_wrapper: false - - - name: Deploy sample app via terraform - working-directory: terraform/eks - run: | - terraform init - terraform validate - terraform apply -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-java-cluster-name }}" \ - -var="eks_cluster_context_name=$(kubectl config current-context)" \ - -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ - -var="sample_app_image=${{ env.SAMPLE_APP_FRONTEND_SERVICE_IMAGE }}" \ - -var="sample_remote_app_image=${{ env.SAMPLE_APP_REMOTE_SERVICE_IMAGE }}" - - # Enable App Signals on the test cluster - - name: Enable App Signals - working-directory: enablement-script/scripts/eks/appsignals - run: | - ./enable-app-signals.sh \ - ${{ inputs.test-java-cluster-name }} \ - ${{ env.AWS_DEFAULT_REGION }} \ - ${{ env.SAMPLE_APP_NAMESPACE }} - - - name: Save CloudWatch Agent Operator image to environment before patching - run: | - echo "OLD_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ - jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV - - - name: Patch the CloudWatch Agent Operator image and restart CloudWatch pods - run: | - kubectl patch deploy -n amazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "${{ env.ECR_OPERATOR_STAGING_REPO }}:${{ inputs.tag }}"}, {"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]]' - kubectl delete pods --all -n amazon-cloudwatch - sleep 10 - kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch - - - name: Patch the CloudWatch Agent image and restart CloudWatch pods - run: | - kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": "${{ env.APPLICATION_SIGNALS_CW_AGENT_IMAGE }}"}]' - kubectl delete pods --all -n amazon-cloudwatch - sleep 10 - kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch - - - name: Patch the ADOT image and restart CloudWatch pods - run: | - kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \ - -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--auto-instrumentation-java-image=${{ env.APPLICATION_SIGNALS_ADOT_IMAGE }}"}]' - kubectl delete pods --all -n amazon-cloudwatch - sleep 10 - kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch - - # Application pods need to be restarted for the - # app signals instrumentation to take effect - - name: Restart the app pods - run: kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }} - - - name: Wait for sample app pods to come up - run: | - kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }} \ - - - name: Get remote service deployment name and IP - run: | - echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV - echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV - - - name: Log pod ADOT image ID - run: | - kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --output json | \ - jq '.items[0].status.initContainerStatuses[0].imageID' - - - name: Log pod CWAgent image ID - run: | - kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=cloudwatch-agent -o json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - - name: Log pod Fluent Bit image ID - run: | - kubectl get pods -n amazon-cloudwatch -l k8s-app=fluent-bit -o json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - - name: Log pod CWAgent Operator image ID and save image to the environment - run: | - kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - echo "NEW_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ - jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV - -# - name: Check if CW Agent Operator image has changed -# run: | -# if [ ${{ env.OLD_CW_AGENT_OPERATOR_IMAGE }} = ${{ env.NEW_CW_AGENT_OPERATOR_IMAGE }} ]; then -# echo "Operator image did not change" -# exit 1 -# fi - - - name: Get the sample app endpoint - run: | - echo "APP_ENDPOINT=$(terraform output sample_app_endpoint)" >> $GITHUB_ENV - working-directory: terraform/eks - - - name: Wait for app endpoint to come online - id: endpoint-check - run: | - attempt_counter=0 - max_attempts=30 - until $(curl --output /dev/null --silent --head --fail http://${{ env.APP_ENDPOINT }}); do - if [ ${attempt_counter} -eq ${max_attempts} ];then - echo "Max attempts reached" - exit 1 - fi - - printf '.' - attempt_counter=$(($attempt_counter+1)) - sleep 10 - done - - # This steps increases the speed of the validation by creating the telemetry data in advance - - name: Call all test APIs - continue-on-error: true - run: | - curl -S -s "http://${{ env.APP_ENDPOINT }}/outgoing-http-call" - curl -S -s "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}" - curl -S -s "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}" - curl -S -s "http://${{ env.APP_ENDPOINT }}/client-call" - - - name: Build Gradle - run: ./gradlew - - # Validation for app signals telemetry data - - name: Call endpoint and validate generated EMF logs - id: log-validation - if: steps.endpoint-check.outcome == 'success' && !cancelled() - run: ./gradlew validator:run --args='-c eks/log-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ env.AWS_DEFAULT_REGION }} - --account-id ${{ env.TEST_ACCOUNT }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP }} - --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-java-cluster-name }} - --service-name sample-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} - --rollup' - - - name: Call endpoints and validate generated metrics - id: metric-validation - if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() - run: ./gradlew validator:run --args='-c eks/metric-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ env.AWS_DEFAULT_REGION }} - --account-id ${{ env.TEST_ACCOUNT }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP }} - --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-java-cluster-name }} - --service-name sample-application-${{ env.TESTING_ID }} - --remote-service-name sample-remote-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} - --rollup' - - - name: Call endpoints and validate generated traces - id: trace-validation - if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() - run: ./gradlew validator:run --args='-c eks/trace-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ env.AWS_DEFAULT_REGION }} - --account-id ${{ env.TEST_ACCOUNT }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP }} - --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-java-cluster-name }} - --service-name sample-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} - --rollup' - - # Clean up Procedures - - - name: Remove log group deletion command - if: always() - working-directory: enablement-script/scripts/eks/appsignals - run: | - delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP }}' --region \$REGION" - sed -i "s#$delete_log_group##g" clean-app-signals.sh - - - name: Clean Up App Signals - if: always() - continue-on-error: true - working-directory: enablement-script/scripts/eks/appsignals - run: | - ./clean-app-signals.sh \ - ${{ inputs.test-java-cluster-name }} \ - ${{ env.AWS_DEFAULT_REGION }} \ - ${{ env.SAMPLE_APP_NAMESPACE }} - - # This step also deletes lingering resources from previous test runs - - name: Delete all sample app resources - if: always() - continue-on-error: true - timeout-minutes: 10 - run: kubectl delete namespace ${{ env.SAMPLE_APP_NAMESPACE }} - - - name: Terraform destroy - if: always() - continue-on-error: true - working-directory: terraform/eks - run: | - terraform destroy -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-java-cluster-name }}" \ - -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ - -var="sample_app_image=${{ env.SAMPLE_APP_IMAGE }}" - - - name: Remove aws access service account - if: always() - continue-on-error: true - run: | - eksctl delete iamserviceaccount \ - --name service-account-${{ env.TESTING_ID }} \ - --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-java-cluster-name }} \ - --region ${{ env.AWS_DEFAULT_REGION }} diff --git a/.github/workflows/application-signals-python-e2e-test.yml b/.github/workflows/application-signals-python-e2e-test.yml deleted file mode 100644 index 70b5551a..00000000 --- a/.github/workflows/application-signals-python-e2e-test.yml +++ /dev/null @@ -1,339 +0,0 @@ -# This is a reusable workflow for running the Python E2E test for App Signals. -# It is meant to be called from another workflow. -# This E2E test is responsible for validating setting up a sample application on an EKS cluster and enabling -# App Signals using the staging image of the CloudWatch Agent Operator. It validates the generated telemetry -# including logs, metrics, and traces, then cleans up the cluster. The testing resources can be found in the -# ADOT python test framework repo: https://github.com/aws-observability/aws-application-signals-test-framework/tree/main -# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview -name: App Signals Enablement Python E2E Testing -on: - workflow_call: - inputs: - # Ensure two tests do not run on the same cluster at the same time through GitHub Action concurrency - test-python-cluster-name: - required: true - type: string - tag: - description: 'Staging Artifact Tag' - required: false - default: 'staging' - type: string - -permissions: - id-token: write - contents: read - -env: - AWS_DEFAULT_REGION: us-east-1 - TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} - SAMPLE_APP_NAMESPACE: python-sample-app-namespace - APP_SIGNALS_PYTHON_E2E_FE_SA_IMG: appsignals-python-django-main-service - APP_SIGNALS_PYTHON_E2E_RE_SA_IMG: appsignals-python-django-remote-service - METRIC_NAMESPACE: ApplicationSignals - LOG_GROUP: /aws/application-signals/data - ECR_OPERATOR_STAGING_REPO: ${{ vars.ECR_OPERATOR_STAGING_REPO }} - APPLICATION_SIGNALS_ADOT_IMAGE: 637423224110.dkr.ecr.us-east-1.amazonaws.com/aws-observability/adot-autoinstrumentation-python-staging:0.2.0-408d938 - APPLICATION_SIGNALS_CW_AGENT_IMAGE: 506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-integration-test:eca8174758d95308006632ec4d5533d765db9ca8 - -jobs: - appsignals-python-e2e-test: - runs-on: ubuntu-latest - steps: - - name: Download enablement script - uses: actions/checkout@v4 - with: - repository: aws-observability/application-signals-demo - ref: main - path: enablement-script - sparse-checkout: | - scripts/eks/appsignals/enable-app-signals.sh - scripts/eks/appsignals/clean-app-signals.sh - sparse-checkout-cone-mode: false - -# Resolve conflict to clean up the redundant configuration files deployed by helm. - - name: Resolve Add-on configuration conflict - working-directory: enablement-script/scripts/eks/appsignals - run: | - sed -i 's/aws eks create-addon \\/aws eks create-addon \\\n --resolve-conflicts OVERWRITE \\/' enable-app-signals.sh - - - name: Generate testing id - run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ env.TEST_ACCOUNT }}:role/${{ secrets.APP_SIGNALS_E2E_TEST_ROLE_NAME }} - aws-region: ${{ env.AWS_DEFAULT_REGION }} - - # local directory to store the kubernetes config - - name: Create kubeconfig directory - run: mkdir -p ${{ github.workspace }}/.kube - - - name: Set KUBECONFIG environment variable - run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV - - - name: Set up kubeconfig - run: aws eks update-kubeconfig --name ${{ inputs.test-python-cluster-name }} --region ${{ env.AWS_DEFAULT_REGION }} - - - name: Install eksctl - run: | - mkdir ${{ github.workspace }}/eksctl - curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" - tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz - echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH - - - name: Create role for AWS access from the sample app - id: create_service_account - run: | - eksctl create iamserviceaccount \ - --name srvc-acc-${{ env.TESTING_ID }} \ - --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-python-cluster-name }} \ - --role-name eks-s3-access-python-${{ env.TESTING_ID }} \ - --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ - --region ${{ env.AWS_DEFAULT_REGION }} \ - --approve - - # This step avoids code duplication for terraform templates and the validator - # To simplify, we get the entire repo - - name: Get testing resources from aws-application-signals-test-framework - uses: actions/checkout@v4 - with: - repository: aws-observability/aws-application-signals-test-framework - ref: ga-python - path: aws-application-signals-test-framework - - - name: Set up terraform - uses: hashicorp/setup-terraform@v3 - with: - terraform_wrapper: false - - - name: Deploy sample app via terraform - working-directory: aws-application-signals-test-framework/terraform/python/eks - run: | - terraform init - terraform validate - terraform apply -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-python-cluster-name }}" \ - -var="eks_cluster_context_name=$(kubectl config current-context)" \ - -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=srvc-acc-${{ env.TESTING_ID }}" \ - -var="python_app_image=${{ env.TEST_ACCOUNT }}.dkr.ecr.${{ env.AWS_DEFAULT_REGION }}.amazonaws.com/${{ env.APP_SIGNALS_PYTHON_E2E_FE_SA_IMG }}:latest" \ - -var="python_remote_app_image=${{ env.TEST_ACCOUNT }}.dkr.ecr.${{ env.AWS_DEFAULT_REGION }}.amazonaws.com/${{ env.APP_SIGNALS_PYTHON_E2E_RE_SA_IMG }}:latest" - - # Enable App Signals on the test cluster - - name: Enable App Signals - working-directory: enablement-script/scripts/eks/appsignals - run: | - ./enable-app-signals.sh \ - ${{ inputs.test-python-cluster-name }} \ - ${{ env.AWS_DEFAULT_REGION }} \ - ${{ env.SAMPLE_APP_NAMESPACE }} - - - name: Save CloudWatch Agent Operator image to environment before patching - run: | - echo "OLD_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ - jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV - - - name: Patch the CloudWatch Agent Operator image and restart CloudWatch pods - run: | - kubectl patch deploy -n amazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "${{ env.ECR_OPERATOR_STAGING_REPO }}:${{ inputs.tag }}"}, {"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]]' - kubectl delete pods --all -n amazon-cloudwatch - sleep 10 - kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch - - - name: Patch the CloudWatch Agent image and restart CloudWatch pods - run: | - kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": "${{ env.APPLICATION_SIGNALS_CW_AGENT_IMAGE }}"}]' - kubectl delete pods --all -n amazon-cloudwatch - sleep 10 - kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch - - - name: Patch the ADOT image and restart CloudWatch pods - run: | - kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \ - -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/2", "value": "--auto-instrumentation-python-image=${{ env.APPLICATION_SIGNALS_ADOT_IMAGE }}"}]' - kubectl delete pods --all -n amazon-cloudwatch - sleep 10 - kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch - - # Application pods need to be restarted for the - # app signals instrumentation to take effect - - name: Restart the app pods - run: | - kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }} - kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} - kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }} - - - name: Get remote service deployment name and IP - run: | - echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV - echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV - - - name: Log pod ADOT image ID - run: | - kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --output json | \ - jq '.items[0].status.initContainerStatuses[0].imageID' - - - name: Log pod CWAgent image ID - run: | - kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=cloudwatch-agent -o json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - - name: Log pod Fluent Bit image ID - run: | - kubectl get pods -n amazon-cloudwatch -l k8s-app=fluent-bit -o json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - - name: Log pod CWAgent Operator image ID and save image to the environment - run: | - kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ - jq '.items[0].status.containerStatuses[0].imageID' - - echo "NEW_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ - jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV - -# - name: Check if CW Agent Operator image has changed -# run: | -# if [ ${{ env.OLD_CW_AGENT_OPERATOR_IMAGE }} = ${{ env.NEW_CW_AGENT_OPERATOR_IMAGE }} ]; then -# echo "Operator image did not change" -# exit 1 -# fi - - - name: Get the sample app endpoint - run: | - echo "APP_ENDPOINT=$(terraform output python_app_endpoint)" >> $GITHUB_ENV - working-directory: aws-application-signals-test-framework/terraform/python/eks - - - name: Wait for app endpoint to come online - id: endpoint-check - run: | - attempt_counter=0 - max_attempts=30 - until $(curl --output /dev/null --silent --head --fail http://${{ env.APP_ENDPOINT }}); do - if [ ${attempt_counter} -eq ${max_attempts} ];then - echo "Max attempts reached" - exit 1 - fi - - printf '.' - attempt_counter=$(($attempt_counter+1)) - sleep 10 - done - - # This steps increases the speed of the validation by creating the telemetry data in advance - - name: Call all test APIs - continue-on-error: true - run: | - curl -S -s -o /dev/null "http://${{ env.APP_ENDPOINT }}/outgoing-http-call"; echo - curl -S -s -o /dev/null "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"; echo - curl -S -s -o /dev/null "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"; echo - curl -S -s -o /dev/null "http://${{ env.APP_ENDPOINT }}/client-call"; echo - - # Validation for app signals telemetry data - - name: Call endpoint and validate generated EMF logs - id: log-validation - if: steps.endpoint-check.outcome == 'success' && !cancelled() - working-directory: aws-application-signals-test-framework/ - run: ./gradlew validator:run --args='-c python/eks/log-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ env.AWS_DEFAULT_REGION }} - --account-id ${{ env.TEST_ACCOUNT }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP }} - --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-python-cluster-name }} - --service-name python-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} - --rollup' - - - name: Call endpoints and validate generated metrics - id: metric-validation - if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() - working-directory: aws-application-signals-test-framework/ - run: ./gradlew validator:run --args='-c python/eks/metric-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ env.AWS_DEFAULT_REGION }} - --account-id ${{ env.TEST_ACCOUNT }} - --metric-namespace ${{ env.METRIC_NAMESPACE }} - --log-group ${{ env.LOG_GROUP }} - --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-python-cluster-name }} - --service-name python-application-${{ env.TESTING_ID }} - --remote-service-name python-remote-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} - --rollup' - - - name: Call endpoints and validate generated traces - id: trace-validation - if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() - working-directory: aws-application-signals-test-framework/ - run: ./gradlew validator:run --args='-c python/eks/trace-validation.yml - --testing-id ${{ env.TESTING_ID }} - --endpoint http://${{ env.APP_ENDPOINT }} - --region ${{ env.AWS_DEFAULT_REGION }} - --account-id ${{ env.TEST_ACCOUNT }} - --log-group ${{ env.LOG_GROUP }} - --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} - --platform-info ${{ inputs.test-python-cluster-name }} - --service-name python-application-${{ env.TESTING_ID }} - --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} - --query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} - --rollup' - - # Clean up Procedures - - name: Remove log group deletion command - if: always() - working-directory: enablement-script/scripts/eks/appsignals - run: | - delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP }}' --region \$REGION" - sed -i "s#$delete_log_group##g" clean-app-signals.sh - - - name: Clean Up App Signals - if: always() - continue-on-error: true - working-directory: enablement-script/scripts/eks/appsignals - run: | - ./clean-app-signals.sh \ - ${{ inputs.test-python-cluster-name }} \ - ${{ env.AWS_DEFAULT_REGION }} \ - ${{ env.SAMPLE_APP_NAMESPACE }} - - # This step also deletes lingering resources from previous test runs - - name: Delete all sample app resources - if: always() - continue-on-error: true - timeout-minutes: 10 - run: kubectl delete namespace ${{ env.SAMPLE_APP_NAMESPACE }} - - - name: Terraform destroy - if: always() - continue-on-error: true - working-directory: aws-application-signals-test-framework/terraform/python/eks - run: | - terraform destroy -auto-approve \ - -var="test_id=${{ env.TESTING_ID }}" \ - -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ - -var="kube_directory_path=${{ github.workspace }}/.kube" \ - -var="eks_cluster_name=${{ inputs.test-python-cluster-name }}" \ - -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ - -var="service_account_aws_access=srvc-acc-${{ env.TESTING_ID }}" \ - -var="python_app_image=${{ env.TEST_ACCOUNT }}.dkr.ecr.${{ env.AWS_DEFAULT_REGION }}.amazonaws.com/${{ env.APP_SIGNALS_PYTHON_E2E_FE_SA_IMG }}:latest" \ - -var="python_remote_app_image=${{ env.TEST_ACCOUNT }}.dkr.ecr.${{ env.AWS_DEFAULT_REGION }}.amazonaws.com/${{ env.APP_SIGNALS_PYTHON_E2E_RE_SA_IMG }}:latest" - - - name: Remove aws access service account - if: always() - continue-on-error: true - run: | - eksctl delete iamserviceaccount \ - --name srvc-acc-${{ env.TESTING_ID }} \ - --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ - --cluster ${{ inputs.test-python-cluster-name }} \ - --region ${{ env.AWS_DEFAULT_REGION }} \ No newline at end of file diff --git a/.github/workflows/build-and-upload-release.yml b/.github/workflows/build-and-upload-release.yml index 5cf1cdfa..f31e6137 100644 --- a/.github/workflows/build-and-upload-release.yml +++ b/.github/workflows/build-and-upload-release.yml @@ -82,12 +82,14 @@ jobs: platforms: linux/amd64, linux/arm64 e2e-test: + name: "Application Signals E2E Test" needs: MakeBinary uses: ./.github/workflows/application-signals-e2e-test.yml secrets: inherit + permissions: + id-token: write + contents: read with: - test-java-cluster-name: 'e2e-cw-agent-operator-test' - test-python-cluster-name: 'e2e-cw-agent-operator-python-test' tag: ${{ inputs.tag }} push-release-ecr: diff --git a/.github/workflows/test-2.yml b/.github/workflows/test-2.yml index 7b51ad1e..5779262e 100644 --- a/.github/workflows/test-2.yml +++ b/.github/workflows/test-2.yml @@ -12,55 +12,55 @@ env: ECR_OPERATOR_RELEASE_IMAGE: ${{ secrets.ECR_OPERATOR_RELEASE_IMAGE }} jobs: -# MakeBinary: -# name: 'MakeContainerImage' -# runs-on: ubuntu-latest -# permissions: -# id-token: write -# contents: read -# steps: -# - uses: actions/checkout@v3 -# with: -# fetch-depth: 0 -# -# - name: Set up Go 1.x -# uses: actions/setup-go@v4 -# with: -# go-version: ~1.19.6 -# cache: false -# -# - name: Configure AWS Credentials -# uses: aws-actions/configure-aws-credentials@v2 -# with: -# role-to-assume: ${{ env.AWS_ASSUME_ROLE }} -# aws-region: us-west-2 -# -# - name: Login to ECR -# if: steps.cached_binaries.outputs.cache-hit == false -# id: login-ecr -# uses: aws-actions/amazon-ecr-login@v1 -# -# - name: Set up Docker Buildx -# if: steps.cached_binaries.outputs.cache-hit == false -# uses: docker/setup-buildx-action@v1 -# -# - name: Set up QEMU -# if: steps.cached_binaries.outputs.cache-hit == false -# uses: docker/setup-qemu-action@v1 -# -# - name: Build Cloudwatch Agent Operator Image and push to ECR -# uses: docker/build-push-action@v4 -# if: steps.cached_binaries.outputs.cache-hit == false -# with: -# file: ./Dockerfile -# context: . -# push: true -# tags: ${{ env.ECR_OPERATOR_STAGING_REPO }}:staging -# platforms: linux/amd64, linux/arm64 + MakeBinary: + name: 'MakeContainerImage' + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Go 1.x + uses: actions/setup-go@v4 + with: + go-version: ~1.19.6 + cache: false + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ env.AWS_ASSUME_ROLE }} + aws-region: us-west-2 + + - name: Login to ECR + if: steps.cached_binaries.outputs.cache-hit == false + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Set up Docker Buildx + if: steps.cached_binaries.outputs.cache-hit == false + uses: docker/setup-buildx-action@v1 + + - name: Set up QEMU + if: steps.cached_binaries.outputs.cache-hit == false + uses: docker/setup-qemu-action@v1 + + - name: Build Cloudwatch Agent Operator Image and push to ECR + uses: docker/build-push-action@v4 + if: steps.cached_binaries.outputs.cache-hit == false + with: + file: ./Dockerfile + context: . + push: true + tags: ${{ env.ECR_OPERATOR_STAGING_REPO }}:staging + platforms: linux/amd64, linux/arm64 ApplicationSignalsEndToEndTest: name: "Application Signals E2E Test" -# needs: MakeBinary + needs: MakeBinary uses: ./.github/workflows/application-signals-e2e-test.yml secrets: inherit permissions: