From be21a5d6af803026361650e33cd05ff44d76afde Mon Sep 17 00:00:00 2001 From: Natalia Marukovich Date: Sat, 11 Feb 2023 13:32:11 +0400 Subject: [PATCH 1/6] add new tags --- cloud/jenkins/psmdb_operator_eks_latest.groovy | 1 + cloud/jenkins/psmdb_operator_eks_version.groovy | 1 + cloud/jenkins/pxc_operator_eks_latest.groovy | 1 + cloud/jenkins/pxc_operator_eks_version.groovy | 1 + 4 files changed, 4 insertions(+) diff --git a/cloud/jenkins/psmdb_operator_eks_latest.groovy b/cloud/jenkins/psmdb_operator_eks_latest.groovy index aa583af752..3a819fef28 100644 --- a/cloud/jenkins/psmdb_operator_eks_latest.groovy +++ b/cloud/jenkins/psmdb_operator_eks_latest.groovy @@ -41,6 +41,7 @@ nodeGroups: tags: 'iit-billing-tag': 'jenkins-eks' 'delete-cluster-after-hours': '10' + 'team': 'cloud' EOF """ diff --git a/cloud/jenkins/psmdb_operator_eks_version.groovy b/cloud/jenkins/psmdb_operator_eks_version.groovy index 0dc6637a23..7dd1975b61 100644 --- a/cloud/jenkins/psmdb_operator_eks_version.groovy +++ b/cloud/jenkins/psmdb_operator_eks_version.groovy @@ -41,6 +41,7 @@ nodeGroups: tags: 'iit-billing-tag': 'jenkins-eks' 'delete-cluster-after-hours': '10' + 'team': 'cloud' EOF """ diff --git a/cloud/jenkins/pxc_operator_eks_latest.groovy b/cloud/jenkins/pxc_operator_eks_latest.groovy index 292acabbf1..17027098d4 100644 --- a/cloud/jenkins/pxc_operator_eks_latest.groovy +++ b/cloud/jenkins/pxc_operator_eks_latest.groovy @@ -41,6 +41,7 @@ nodeGroups: tags: 'iit-billing-tag': 'jenkins-eks' 'delete-cluster-after-hours': '10' + 'team': 'cloud' EOF """ diff --git a/cloud/jenkins/pxc_operator_eks_version.groovy b/cloud/jenkins/pxc_operator_eks_version.groovy index ca96f78e5d..4b1675674c 100644 --- a/cloud/jenkins/pxc_operator_eks_version.groovy +++ b/cloud/jenkins/pxc_operator_eks_version.groovy @@ -41,6 +41,7 @@ nodeGroups: tags: 'iit-billing-tag': 'jenkins-eks' 'delete-cluster-after-hours': '10' + 'team': 'cloud' EOF """ From 5962c64b736484b5e6f538f3ba4a8f1767a9f39d Mon Sep 17 00:00:00 2001 From: Natalia Marukovich Date: Wed, 14 Aug 2024 16:35:00 +0200 Subject: [PATCH 2/6] test fix --- cloud/jenkins/pxc_operator_aks_latest.groovy | 6 +++--- cloud/jenkins/pxc_operator_aks_version.groovy | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/jenkins/pxc_operator_aks_latest.groovy b/cloud/jenkins/pxc_operator_aks_latest.groovy index 2a8b98ec63..ba9c8c3d4c 100644 --- a/cloud/jenkins/pxc_operator_aks_latest.groovy +++ b/cloud/jenkins/pxc_operator_aks_latest.groovy @@ -33,7 +33,8 @@ void prepareNode() { } if ("$PLATFORM_VER" == "latest") { - USED_PLATFORM_VER = sh(script: "az aks get-versions --location $location --output json | jq -r '.values | max_by(.patchVersions) | .patchVersions | keys[]' | sort --version-sort | tail -1", , returnStdout: true).trim() + USED_PLATFORM_VER = "1.30" +// sh(script: "az aks get-versions --location $location --output json | jq -r '.values | max_by(.patchVersions) | .patchVersions | keys[]' | sort --version-sort | tail -1", , returnStdout: true).trim() } else { USED_PLATFORM_VER="$PLATFORM_VER" } @@ -176,8 +177,7 @@ void createCluster(String CLUSTER_SUFFIX) { --generate-ssh-keys \ --enable-cluster-autoscaler \ --outbound-type loadbalancer \ - --kubernetes-version $USED_PLATFORM_VER \ - -l $location + --kubernetes-version $USED_PLATFORM_VER az aks get-credentials --subscription eng-cloud-dev --resource-group percona-operators --name $CLUSTER_NAME-$CLUSTER_SUFFIX --overwrite-existing """ } diff --git a/cloud/jenkins/pxc_operator_aks_version.groovy b/cloud/jenkins/pxc_operator_aks_version.groovy index 6e7fc0fa13..6f0922dcc1 100644 --- a/cloud/jenkins/pxc_operator_aks_version.groovy +++ b/cloud/jenkins/pxc_operator_aks_version.groovy @@ -33,7 +33,8 @@ void prepareNode() { } if ("$PLATFORM_VER" == "latest") { - USED_PLATFORM_VER = sh(script: "az aks get-versions --location $location --output json | jq -r '.values | max_by(.patchVersions) | .patchVersions | keys[]' | sort --version-sort | tail -1", , returnStdout: true).trim() + USED_PLATFORM_VER = "1.30" +// sh(script: "az aks get-versions --location $location --output json | jq -r '.values | max_by(.patchVersions) | .patchVersions | keys[]' | sort --version-sort | tail -1", , returnStdout: true).trim() } else { USED_PLATFORM_VER="$PLATFORM_VER" } @@ -176,8 +177,7 @@ void createCluster(String CLUSTER_SUFFIX) { --generate-ssh-keys \ --enable-cluster-autoscaler \ --outbound-type loadbalancer \ - --kubernetes-version $USED_PLATFORM_VER \ - -l $location + --kubernetes-version $USED_PLATFORM_VER az aks get-credentials --subscription eng-cloud-dev --resource-group percona-operators --name $CLUSTER_NAME-$CLUSTER_SUFFIX --overwrite-existing """ } From c6619ffedaff88d129626f67476d9e138ae29c68 Mon Sep 17 00:00:00 2001 From: Natalia Marukovich Date: Wed, 14 Aug 2024 18:01:46 +0200 Subject: [PATCH 3/6] update fix --- cloud/jenkins/pxc_operator_aks_latest.groovy | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/jenkins/pxc_operator_aks_latest.groovy b/cloud/jenkins/pxc_operator_aks_latest.groovy index ba9c8c3d4c..b797c636be 100644 --- a/cloud/jenkins/pxc_operator_aks_latest.groovy +++ b/cloud/jenkins/pxc_operator_aks_latest.groovy @@ -1,4 +1,4 @@ -location='westeurope' +location='useast' tests=[] clusters=[] @@ -33,8 +33,7 @@ void prepareNode() { } if ("$PLATFORM_VER" == "latest") { - USED_PLATFORM_VER = "1.30" -// sh(script: "az aks get-versions --location $location --output json | jq -r '.values | max_by(.patchVersions) | .patchVersions | keys[]' | sort --version-sort | tail -1", , returnStdout: true).trim() + USED_PLATFORM_VER = sh(script: "az aks get-versions --location $location --output json | jq -r '.values | max_by(.patchVersions) | .patchVersions | keys[]' | sort --version-sort | tail -1", , returnStdout: true).trim() } else { USED_PLATFORM_VER="$PLATFORM_VER" } @@ -177,7 +176,8 @@ void createCluster(String CLUSTER_SUFFIX) { --generate-ssh-keys \ --enable-cluster-autoscaler \ --outbound-type loadbalancer \ - --kubernetes-version $USED_PLATFORM_VER + --kubernetes-version $USED_PLATFORM_VER \ + -l $location az aks get-credentials --subscription eng-cloud-dev --resource-group percona-operators --name $CLUSTER_NAME-$CLUSTER_SUFFIX --overwrite-existing """ } From 775b0e9e5c934d8b506ddf8a0aacaad31e95ab76 Mon Sep 17 00:00:00 2001 From: Natalia Marukovich Date: Wed, 14 Aug 2024 18:50:00 +0200 Subject: [PATCH 4/6] update fix --- cloud/jenkins/pxc_operator_aks_latest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/jenkins/pxc_operator_aks_latest.groovy b/cloud/jenkins/pxc_operator_aks_latest.groovy index b797c636be..df26e01c8f 100644 --- a/cloud/jenkins/pxc_operator_aks_latest.groovy +++ b/cloud/jenkins/pxc_operator_aks_latest.groovy @@ -1,4 +1,4 @@ -location='useast' +location='eastus' tests=[] clusters=[] From a805333fcac70d069d207eacb250e4fc6ebff14c Mon Sep 17 00:00:00 2001 From: Natalia Marukovich Date: Wed, 14 Aug 2024 19:00:56 +0200 Subject: [PATCH 5/6] update fix --- cloud/jenkins/pxc_operator_aks_latest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/jenkins/pxc_operator_aks_latest.groovy b/cloud/jenkins/pxc_operator_aks_latest.groovy index df26e01c8f..57e82e9fe5 100644 --- a/cloud/jenkins/pxc_operator_aks_latest.groovy +++ b/cloud/jenkins/pxc_operator_aks_latest.groovy @@ -1,4 +1,4 @@ -location='eastus' +location='norwayeast' tests=[] clusters=[] From 412a33d612fb7f9821a9b724f9884f866fa48560 Mon Sep 17 00:00:00 2001 From: Natalia Marukovich Date: Wed, 21 Aug 2024 17:45:49 +0300 Subject: [PATCH 6/6] fix stacks deletion --- .../aws-functions/orphaned_cloudformation.py | 99 ++++++++++++++++--- .../orphaned_openshift_instances.py | 5 +- 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/cloud/aws-functions/orphaned_cloudformation.py b/cloud/aws-functions/orphaned_cloudformation.py index fbc3f02775..a657f61818 100644 --- a/cloud/aws-functions/orphaned_cloudformation.py +++ b/cloud/aws-functions/orphaned_cloudformation.py @@ -3,10 +3,21 @@ import datetime import boto3 from boto3.exceptions import Boto3Error +from botocore.exceptions import ClientError from utils import get_regions_list +from time import sleep + +def is_stack_to_terminate(stack, aws_region): + cf_client = boto3.client('cloudformation', region_name=aws_region) + + try: + stack_desc = cf_client.describe_stacks(StackName=stack)['Stacks'][0] + tags = stack_desc['Tags'] + + except ClientError as e: + print(e) + return False -def is_stack_to_terminate(stack): - tags = stack.tags tags_dict = {item['Key']: item['Value'] for item in tags} if 'team' not in tags_dict.keys() or ('team' in tags_dict.keys() and tags_dict['team'] != 'cloud'): @@ -16,37 +27,88 @@ def is_stack_to_terminate(stack): stack_lifetime = float(tags_dict['delete-cluster-after-hours']) current_time = datetime.datetime.now().timestamp() - creation_time = int(stack.creation_time.timestamp()) + + + creation_time = int(stack_desc['CreationTime'].timestamp()) if (current_time - creation_time) / 3600 > stack_lifetime + 1: return True return False def get_cloudformation_to_terminate(aws_region): - cf_client = boto3.resource('cloudformation') - statuses = ['ROLLBACK_COMPLETE', 'CREATE_COMPLETE', 'UPDATE_COMPLETE', 'DELETE_FAILED'] + cf_client = boto3.client('cloudformation', region_name=aws_region) + stacks_for_deletion = [] - cloudformation_stacks = [stack for stack in cf_client.stacks.all() if stack.stack_status in statuses] + + cloudformation_stacks = [stack['StackName'] for stack in cf_client.list_stacks(StackStatusFilter=['ROLLBACK_COMPLETE', 'CREATE_COMPLETE', 'UPDATE_COMPLETE', 'DELETE_FAILED'])['StackSummaries']] + if not cloudformation_stacks: logging.info(f"There are no cloudformation_stacks in cloud") for stack in cloudformation_stacks: - if is_stack_to_terminate(stack): - stacks_for_deletion.append(stack.name) + if is_stack_to_terminate(stack, aws_region): + stacks_for_deletion.append(stack) if not stacks_for_deletion: logging.info(f"There are no stacks for deletion") return stacks_for_deletion -def delete_cloudformation_stacks(cloudformation_stack): - cf_client = boto3.client('cloudformation') +def delete_stack(stack_name, aws_region): + cf_client = boto3.client('cloudformation', region_name=aws_region) try: - logging.info(f"Removing cloudformation stack: {cloudformation_stack}") - cf_client.delete_stack(StackName=cloudformation_stack) - except Boto3Error as e: - logging.error(f"Delete of stack failed with error: {e}") + # Initiate the delete operation add timeout + logging.info(f"Removing cloudformation stack: {stack_name}") + waiter_config = { + 'Delay': 30, # Time (in seconds) to wait between attempts + 'MaxAttempts': 10 # Maximum number of attempts (30s * 40 = 1200s or 20 minutes) + } + waiter = cf_client.get_waiter('stack_delete_complete') + print(f"Waiting for stack {stack_name} to be deleted...") + response = cf_client.delete_stack(StackName=stack_name) + waiter.wait(StackName=stack_name, WaiterConfig=waiter_config) + except ClientError as e: + logging.info(f"Error deleting stack: {e}") + +def delete_stack_resources(stack_name, aws_region): + cf_client = boto3.client('cloudformation', region_name=aws_region) + iam_client = boto3.client('iam') + + try: + resources = cf_client.describe_stack_resources(StackName=stack_name) + for resource in resources['StackResources']: + resource_id = resource['PhysicalResourceId'] + resource_type = resource['ResourceType'] + print(f'resource_type {resource_type} stack_name {stack_name}') + try: + print(f"Attempting to delete resource: {resource_id} of type: {resource_type}") + if resource_type == 'AWS::IAM::Role': + iam_client.delete_role(RoleName=resource_id) + elif resource_type == 'AWS::IAM::Policy': + iam_client.delete_policy(PolicyArn=resource_id) + elif resource_type == 'AWS::IAM::InstanceProfile': + try: + response = iam_client.get_instance_profile(InstanceProfileName=resource_id) + roles = response['InstanceProfile']['Roles'] + if roles: + for role in roles: + print(f"Role attached to instance profile {instance_profile_name}: {role['RoleName']}") + iam_client.remove_role_from_instance_profile(InstanceProfileName=resource_id, RoleName=role) + else: + print(f"No roles are attached to instance profile {instance_profile_name}.") + except iam_client.exceptions.NoSuchEntityException: + print(f"Instance profile {instance_profile_name} does not exist.") + except Exception as e: + print(f"An error occurred: {e}") + iam_client.delete_instance_profile(InstanceProfileName=resource_id) + + sleep(2) # Sleep to avoid hitting rate limits + except ClientError as e: + print(f"Failed to delete resource: {resource_id}. Error: {e}") + except ClientError as e: + print(f"Error describing stack resources: {e}") def lambda_handler(event, context): + aws_regions = get_regions_list() for aws_region in aws_regions: @@ -54,5 +116,10 @@ def lambda_handler(event, context): cloudformation_stacks = get_cloudformation_to_terminate(aws_region) for cloudformation_stack in cloudformation_stacks: - logging.info(f"Deleting cloudformation stacks.") - delete_cloudformation_stacks(cloudformation_stack) + try: + logging.info(f"Deleting cloudformation stacks.") + delete_stack_resources(cloudformation_stack, aws_region) + delete_stack(cloudformation_stack, aws_region) + except ClientError as e: + logging.info(f"Failed to delete resource: {resource_id}. Error: {e}") + continue diff --git a/cloud/aws-functions/orphaned_openshift_instances.py b/cloud/aws-functions/orphaned_openshift_instances.py index 9ecdf68609..7f23a02afc 100644 --- a/cloud/aws-functions/orphaned_openshift_instances.py +++ b/cloud/aws-functions/orphaned_openshift_instances.py @@ -21,7 +21,10 @@ def is_instance_to_terminate(instance): instance_lifetime = float(tags_dict['delete-cluster-after-hours']) current_time = datetime.datetime.now().timestamp() - creation_time = instance.launch_time.timestamp() + try: + creation_time = int(tags_dict['creation-time']) + except KeyError as e: + return False if (current_time - creation_time) / 3600 > instance_lifetime: return True