From b075a049f00e14a21d2cffe20f4233d65c78e4c1 Mon Sep 17 00:00:00 2001 From: syedimranhassan Date: Fri, 21 Feb 2025 16:05:01 +0500 Subject: [PATCH 1/2] chore: Added script to drop tables from DB --- util/drop_db_tables/drop_db_tables.py | 123 ++++++++++++++++++++++++++ util/drop_db_tables/requirements.txt | 1 + 2 files changed, 124 insertions(+) create mode 100644 util/drop_db_tables/drop_db_tables.py create mode 120000 util/drop_db_tables/requirements.txt diff --git a/util/drop_db_tables/drop_db_tables.py b/util/drop_db_tables/drop_db_tables.py new file mode 100644 index 00000000000..bee53ef32ee --- /dev/null +++ b/util/drop_db_tables/drop_db_tables.py @@ -0,0 +1,123 @@ +import boto3 +import click +import backoff +from botocore.exceptions import ClientError +import pymysql + +MAX_TRIES = 5 + + +class EC2BotoWrapper: + def __init__(self): + self.client = boto3.client("ec2") + + @backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES) + def describe_regions(self): + return self.client.describe_regions() + + +class RDSBotoWrapper: + def __init__(self, **kwargs): + self.client = boto3.client("rds", **kwargs) + + @backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES) + def describe_db_instances(self): + return self.client.describe_db_instances() + + +def connect_to_db(db_host, db_user, db_password, db_name): + """ Establish a connection to the RDS MySQL database """ + return pymysql.connect( + host=db_host, + user=db_user, + password=db_password, + database=db_name, + cursorclass=pymysql.cursors.DictCursor + ) + + +def get_foreign_key_dependencies(connection, table_name): + """ Retrieve foreign key dependencies for a given table """ + query = f""" + SELECT + TABLE_NAME, COLUMN_NAME, CONSTRAINT_NAME, REFERENCED_TABLE_NAME + FROM + INFORMATION_SCHEMA.KEY_COLUMN_USAGE + WHERE + REFERENCED_TABLE_NAME IS NOT NULL + AND (TABLE_NAME = '{table_name}' OR REFERENCED_TABLE_NAME = '{table_name}') + """ + with connection.cursor() as cursor: + cursor.execute(query) + return cursor.fetchall() + + +def get_last_activity_date(connection, table_name): + """ Retrieve the last activity date for a table """ + query = f""" + SELECT MAX(GREATEST( + COALESCE(UPDATE_TIME, '1970-01-01 00:00:00'), + COALESCE(CREATE_TIME, '1970-01-01 00:00:00') + )) AS last_activity + FROM information_schema.tables + WHERE TABLE_NAME = '{table_name}'; + """ + with connection.cursor() as cursor: + cursor.execute(query) + result = cursor.fetchone() + return result["last_activity"] if result else None + + +def drop_table(connection, table_name): + """ Drops a table after checking dependencies and last activity date """ + dependencies = get_foreign_key_dependencies(connection, table_name) + if dependencies: + print(f"Table {table_name} has foreign key dependencies. Skipping removal.") + for dep in dependencies: + print(f"Dependent on table: {dep['REFERENCED_TABLE_NAME']}") + return + + last_activity = get_last_activity_date(connection, table_name) + if last_activity: + from datetime import datetime, timedelta + one_year_ago = datetime.now() - timedelta(days=365) + if last_activity > one_year_ago: + print(f"Skipping {table_name}: Last activity was on {last_activity}") + return + + print(f"Dropping table {table_name}...") + with connection.cursor() as cursor: + cursor.execute(f"DROP TABLE IF EXISTS {table_name}") + connection.commit() + print(f"Table {table_name} dropped.") + + +def read_tables_from_file(file_path): + """ Read table names from a given file """ + with open(file_path, "r") as file: + return [line.strip() for line in file if line.strip()] + + +@click.command() +@click.option('--db-host', required=True, help="RDS DB host") +@click.option('--db-user', required=True, help="RDS DB user") +@click.option('--db-password', required=True, help="RDS DB password") +@click.option('--db-name', required=True, help="RDS DB name") +@click.option('--table-file', required=True, type=click.Path(exists=True), help="Path to the file containing table names") +def drop_tables(db_host, db_user, db_password, db_name, table_file): + """ + A script to drop tables from an RDS database while handling foreign key dependencies. + Table names are read from the provided file. + """ + tables_to_drop = read_tables_from_file(table_file) + + connection = connect_to_db(db_host, db_user, db_password, db_name) + + for table in tables_to_drop: + drop_table(connection, table) + + connection.close() + + +if __name__ == '__main__': + drop_tables() diff --git a/util/drop_db_tables/requirements.txt b/util/drop_db_tables/requirements.txt new file mode 120000 index 00000000000..68c7b6c4342 --- /dev/null +++ b/util/drop_db_tables/requirements.txt @@ -0,0 +1 @@ +../jenkins/requirements.txt \ No newline at end of file From 8406c804e111d66f25411674f02b9e29ec3a89e5 Mon Sep 17 00:00:00 2001 From: syedimranhassan Date: Tue, 25 Feb 2025 18:04:06 +0500 Subject: [PATCH 2/2] chore: Added script to drop tables from DB --- util/drop_db_tables/drop_db_tables.py | 125 +++++++++++++++++--------- 1 file changed, 85 insertions(+), 40 deletions(-) diff --git a/util/drop_db_tables/drop_db_tables.py b/util/drop_db_tables/drop_db_tables.py index bee53ef32ee..61e64d4f054 100644 --- a/util/drop_db_tables/drop_db_tables.py +++ b/util/drop_db_tables/drop_db_tables.py @@ -1,11 +1,59 @@ +""" +Script to drop tables from an RDS MySQL database while handling foreign key dependencies. + +Usage: + python script.py --db-host=my-db-host --db-name=my-db + +Arguments: + --db-host The RDS database host. + --db-name The database name. + +Environment Variables: + DB_USERNAME The RDS database username (set via environment variable). + DB_PASSWORD The RDS database password (set via environment variable). + +Functionality: + - Drops specific tables only if they have had no activity in the last 12 months. + - Handles foreign key constraints before dropping dependent tables. + - Ensures safe execution using retries for AWS service interactions. + +Example: + export DB_USERNAME=admin + export DB_PASSWORD=securepass + python script.py --db-host=mydb.amazonaws.com --db-name=mydatabase +""" + import boto3 import click import backoff from botocore.exceptions import ClientError import pymysql +import logging MAX_TRIES = 5 +TABLES_TO_DROP = [ + "oauth2_provider_trustedclient", # FK reference to oauth2_client + "third_party_auth_providerapipermissions", # FK reference to oauth2_client + "oauth2_client", + "oauth2_grant", + "oauth2_accesstoken", + "oauth2_refreshtoken", + "oauth_provider_consumer", + "oauth_provider_nonce", + "oauth_provider_scope", + "oauth_provider_token", +] +FK_DEPENDENCIES = { + "third_party_auth_providerapipermissions": "oauth2_client", + "oauth2_provider_trustedclient": "oauth2_client", +} + +# Configure logging +LOGGER = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + + class EC2BotoWrapper: def __init__(self): @@ -27,6 +75,7 @@ def describe_db_instances(self): def connect_to_db(db_host, db_user, db_password, db_name): """ Establish a connection to the RDS MySQL database """ + logging.info("Connecting to the database...") return pymysql.connect( host=db_host, user=db_user, @@ -36,20 +85,27 @@ def connect_to_db(db_host, db_user, db_password, db_name): ) -def get_foreign_key_dependencies(connection, table_name): - """ Retrieve foreign key dependencies for a given table """ +def drop_foreign_key(connection, table_name, referenced_table): + """ Drop the foreign key constraint only for specific tables """ + last_activity = get_last_activity_date(connection, table_name) + if last_activity: + one_year_ago = datetime.now() - timedelta(days=365) + if last_activity > one_year_ago: + logging.info(f"Skipping {table_name}: Last activity was on {last_activity}") + return query = f""" - SELECT - TABLE_NAME, COLUMN_NAME, CONSTRAINT_NAME, REFERENCED_TABLE_NAME - FROM - INFORMATION_SCHEMA.KEY_COLUMN_USAGE - WHERE - REFERENCED_TABLE_NAME IS NOT NULL - AND (TABLE_NAME = '{table_name}' OR REFERENCED_TABLE_NAME = '{table_name}') + SELECT CONSTRAINT_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE + WHERE TABLE_NAME = '{table_name}' AND REFERENCED_TABLE_NAME = '{referenced_table}'; """ with connection.cursor() as cursor: cursor.execute(query) - return cursor.fetchall() + result = cursor.fetchone() + if result: + constraint_name = result["CONSTRAINT_NAME"] + drop_query = f"ALTER TABLE {table_name} DROP FOREIGN KEY {constraint_name};" + cursor.execute(drop_query) + connection.commit() + logging.info(f"Dropped foreign key {constraint_name} from {table_name}.") def get_last_activity_date(connection, table_name): @@ -69,54 +125,43 @@ def get_last_activity_date(connection, table_name): def drop_table(connection, table_name): - """ Drops a table after checking dependencies and last activity date """ - dependencies = get_foreign_key_dependencies(connection, table_name) - if dependencies: - print(f"Table {table_name} has foreign key dependencies. Skipping removal.") - for dep in dependencies: - print(f"Dependent on table: {dep['REFERENCED_TABLE_NAME']}") - return - last_activity = get_last_activity_date(connection, table_name) if last_activity: - from datetime import datetime, timedelta one_year_ago = datetime.now() - timedelta(days=365) if last_activity > one_year_ago: - print(f"Skipping {table_name}: Last activity was on {last_activity}") + logging.info(f"Skipping {table_name}: Last activity was on {last_activity}") return - - print(f"Dropping table {table_name}...") + logging.info(f"Dropping table {table_name}...") with connection.cursor() as cursor: cursor.execute(f"DROP TABLE IF EXISTS {table_name}") connection.commit() - print(f"Table {table_name} dropped.") - - -def read_tables_from_file(file_path): - """ Read table names from a given file """ - with open(file_path, "r") as file: - return [line.strip() for line in file if line.strip()] + logging.info(f"Table {table_name} dropped.") @click.command() @click.option('--db-host', required=True, help="RDS DB host") -@click.option('--db-user', required=True, help="RDS DB user") -@click.option('--db-password', required=True, help="RDS DB password") +@click.option('--db-user', envvar='DB_USERNAME', required=True, help="RDS DB user (can be set via environment variable DB_USERNAME)") +@click.option('--db-password', envvar='DB_PASSWORD', required=True, help="RDS DB password (can be set via environment variable DB_PASSWORD)") @click.option('--db-name', required=True, help="RDS DB name") -@click.option('--table-file', required=True, type=click.Path(exists=True), help="Path to the file containing table names") def drop_tables(db_host, db_user, db_password, db_name, table_file): """ A script to drop tables from an RDS database while handling foreign key dependencies. Table names are read from the provided file. """ - tables_to_drop = read_tables_from_file(table_file) - - connection = connect_to_db(db_host, db_user, db_password, db_name) - - for table in tables_to_drop: - drop_table(connection, table) - - connection.close() + try: + tables_to_drop = read_tables_from_file(table_file) + + connection = connect_to_db(db_host, db_user, db_password, db_name) + + for table, referenced_table in FK_DEPENDENCIES.items(): + drop_foreign_key(connection, table, referenced_table) + + for table in TABLES_TO_DROP: + drop_table(connection, table) + connection.close() + logging.info("Database cleanup completed successfully.") + except Exception as e: + logging.error(f"An error occurred: {e}") if __name__ == '__main__':