diff --git a/config/default.py b/config/default.py index 2e1e75557..78ab6430f 100644 --- a/config/default.py +++ b/config/default.py @@ -883,3 +883,9 @@ def check_engine_admin_permission(request, *args, **kwargs): if "BKAPP_SOPS_BROKER_URL" in os.environ: BROKER_URL = os.getenv("BKAPP_SOPS_BROKER_URL") print(f"BROKER_URL: {BROKER_URL}") + +# 统计信息清理配置 +ENABLE_CLEAN_EXPIRED_STATISTICS = env.ENABLE_CLEAN_EXPIRED_STATISTICS +STATISTICS_VALIDITY_DAY = env.STATISTICS_VALIDITY_DAY +CLEAN_EXPIRED_STATISTICS_BATCH_NUM = env.CLEAN_EXPIRED_STATISTICS_BATCH_NUM +CLEAN_EXPIRED_STATISTICS_CRON = env.CLEAN_EXPIRED_STATISTICS_CRON diff --git a/env.py b/env.py index 59c9045d1..3fb100697 100644 --- a/env.py +++ b/env.py @@ -153,3 +153,9 @@ # bk_audit BK_AUDIT_ENDPOINT = os.getenv("BK_AUDIT_ENDPOINT", None) BK_AUDIT_DATA_TOKEN = os.getenv("BK_AUDIT_DATA_TOKEN", None) + +# 统计信息清理配置 +ENABLE_CLEAN_EXPIRED_STATISTICS = bool(os.getenv("BKAPP_ENABLE_CLEAN_EXPIRED_STATISTICS", False)) +STATISTICS_VALIDITY_DAY = int(os.getenv("BKAPP_STATISTICS_VALIDITY_DAY", 730)) +CLEAN_EXPIRED_STATISTICS_BATCH_NUM = int(os.getenv("BKAPP_CLEAN_EXPIRED_STATISTICS_BATCH_NUM", 100)) +CLEAN_EXPIRED_STATISTICS_CRON = tuple(os.getenv("BKAPP_CLEAN_EXPIRED_STATISTICS_CRON", "30 0 * * *").split()) diff --git a/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py b/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py index b288e1ca0..f4f125341 100644 --- a/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py +++ b/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py @@ -15,6 +15,7 @@ from django.db.models import QuerySet from gcloud.taskflow3.models import AutoRetryNodeStrategy, TimeoutNodeConfig +from gcloud.analysis_statistics.models import TaskflowStatistics, TaskflowExecutedNodeStatistics from pipeline.contrib.periodic_task.models import PeriodicTaskHistory from pipeline.eri.models import ( ContextValue, @@ -84,3 +85,17 @@ def get_clean_pipeline_instance_data(instance_ids: List[str]) -> Dict[str, Query "periodic_task_history": periodic_task_history, "pipeline_instances": pipeline_instances, } + + +def get_clean_statistics_data(expire_time): + """ + 根据过期时间获取过期的统计记录 + :param expire_time 过期时间 + """ + taskflow_ids = TaskflowStatistics.objects.filter(create_time__lt=expire_time).values_list("id", flat=True) + taskflow_node_ids = TaskflowExecutedNodeStatistics.objects.filter(instance_create_time__lt=expire_time).values_list( + "id", flat=True + ) + statistics_data = {TaskflowStatistics: taskflow_ids, TaskflowExecutedNodeStatistics: taskflow_node_ids} + + return statistics_data diff --git a/gcloud/contrib/cleaner/tasks.py b/gcloud/contrib/cleaner/tasks.py index 0d5c9bf76..774769dc3 100644 --- a/gcloud/contrib/cleaner/tasks.py +++ b/gcloud/contrib/cleaner/tasks.py @@ -18,7 +18,10 @@ from django.db import transaction from django.utils import timezone -from gcloud.contrib.cleaner.pipeline.bamboo_engine_tasks import get_clean_pipeline_instance_data +from gcloud.contrib.cleaner.pipeline.bamboo_engine_tasks import ( + get_clean_pipeline_instance_data, + get_clean_statistics_data, +) from gcloud.contrib.cleaner.signals import pre_delete_pipeline_instance_data from gcloud.taskflow3.models import TaskFlowInstance from gcloud.utils.decorators import time_record @@ -69,3 +72,33 @@ def clean_expired_v2_task_data(): logger.info(f"[clean_expired_v2_task_data] success clean tasks: {task_ids}") except Exception as e: logger.exception(f"[clean_expired_v2_task_data] error: {e}") + + +@periodic_task( + run_every=(crontab(*settings.CLEAN_EXPIRED_STATISTICS_CRON)), ignore_result=True, queue="task_data_clean" +) +@time_record(logger) +def clear_statistics_info(): + """ + 清除过期的统计信息 + """ + if not settings.ENABLE_CLEAN_EXPIRED_STATISTICS: + logger.info("Skip clean expired statistics data") + return + + logger.info("Start clean expired statistics data...") + try: + validity_day = settings.STATISTICS_VALIDITY_DAY + expire_time = timezone.now() - timezone.timedelta(days=validity_day) + batch_num = settings.CLEAN_EXPIRED_STATISTICS_BATCH_NUM + + data_to_clean = get_clean_statistics_data(expire_time) + + for model, ids in data_to_clean.items(): + ids_to_delete = ids[:batch_num] + if ids_to_delete: + model.objects.filter(id__in=ids_to_delete).delete() + logger.info(f"[clear_statistics_info] clean model: {model}, deleted ids: {list(ids_to_delete)}") + logger.info("[clear_statistics_info] success clean statistics") + except Exception as e: + logger.error(f"Failed to clear expired statistics data: {e}")