From 1ac5d156aaba466e8d0d84eb502d32f418f675e9 Mon Sep 17 00:00:00 2001 From: guohelu <19503896967@163.com> Date: Wed, 15 Jan 2025 11:17:55 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B8=85=E7=90=86=E8=BF=87=E6=9C=9F?= =?UTF-8?q?=E7=9A=84=E4=BB=BB=E5=8A=A1=E7=BB=9F=E8=AE=A1=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=20#7668?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/default.py | 6 ++++ env.py | 6 ++++ .../cleaner/pipeline/bamboo_engine_tasks.py | 15 ++++++++ gcloud/contrib/cleaner/tasks.py | 35 ++++++++++++++++++- 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/config/default.py b/config/default.py index 2e1e75557e..78ab6430fa 100644 --- a/config/default.py +++ b/config/default.py @@ -883,3 +883,9 @@ def check_engine_admin_permission(request, *args, **kwargs): if "BKAPP_SOPS_BROKER_URL" in os.environ: BROKER_URL = os.getenv("BKAPP_SOPS_BROKER_URL") print(f"BROKER_URL: {BROKER_URL}") + +# 统计信息清理配置 +ENABLE_CLEAN_EXPIRED_STATISTICS = env.ENABLE_CLEAN_EXPIRED_STATISTICS +STATISTICS_VALIDITY_DAY = env.STATISTICS_VALIDITY_DAY +CLEAN_EXPIRED_STATISTICS_BATCH_NUM = env.CLEAN_EXPIRED_STATISTICS_BATCH_NUM +CLEAN_EXPIRED_STATISTICS_CRON = env.CLEAN_EXPIRED_STATISTICS_CRON diff --git a/env.py b/env.py index 59c9045d14..3fb1006972 100644 --- a/env.py +++ b/env.py @@ -153,3 +153,9 @@ # bk_audit BK_AUDIT_ENDPOINT = os.getenv("BK_AUDIT_ENDPOINT", None) BK_AUDIT_DATA_TOKEN = os.getenv("BK_AUDIT_DATA_TOKEN", None) + +# 统计信息清理配置 +ENABLE_CLEAN_EXPIRED_STATISTICS = bool(os.getenv("BKAPP_ENABLE_CLEAN_EXPIRED_STATISTICS", False)) +STATISTICS_VALIDITY_DAY = int(os.getenv("BKAPP_STATISTICS_VALIDITY_DAY", 730)) +CLEAN_EXPIRED_STATISTICS_BATCH_NUM = int(os.getenv("BKAPP_CLEAN_EXPIRED_STATISTICS_BATCH_NUM", 100)) +CLEAN_EXPIRED_STATISTICS_CRON = tuple(os.getenv("BKAPP_CLEAN_EXPIRED_STATISTICS_CRON", "30 0 * * *").split()) diff --git a/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py b/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py index b288e1ca03..f4f125341a 100644 --- a/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py +++ b/gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py @@ -15,6 +15,7 @@ from django.db.models import QuerySet from gcloud.taskflow3.models import AutoRetryNodeStrategy, TimeoutNodeConfig +from gcloud.analysis_statistics.models import TaskflowStatistics, TaskflowExecutedNodeStatistics from pipeline.contrib.periodic_task.models import PeriodicTaskHistory from pipeline.eri.models import ( ContextValue, @@ -84,3 +85,17 @@ def get_clean_pipeline_instance_data(instance_ids: List[str]) -> Dict[str, Query "periodic_task_history": periodic_task_history, "pipeline_instances": pipeline_instances, } + + +def get_clean_statistics_data(expire_time): + """ + 根据过期时间获取过期的统计记录 + :param expire_time 过期时间 + """ + taskflow_ids = TaskflowStatistics.objects.filter(create_time__lt=expire_time).values_list("id", flat=True) + taskflow_node_ids = TaskflowExecutedNodeStatistics.objects.filter(instance_create_time__lt=expire_time).values_list( + "id", flat=True + ) + statistics_data = {TaskflowStatistics: taskflow_ids, TaskflowExecutedNodeStatistics: taskflow_node_ids} + + return statistics_data diff --git a/gcloud/contrib/cleaner/tasks.py b/gcloud/contrib/cleaner/tasks.py index 0d5c9bf766..774769dc3e 100644 --- a/gcloud/contrib/cleaner/tasks.py +++ b/gcloud/contrib/cleaner/tasks.py @@ -18,7 +18,10 @@ from django.db import transaction from django.utils import timezone -from gcloud.contrib.cleaner.pipeline.bamboo_engine_tasks import get_clean_pipeline_instance_data +from gcloud.contrib.cleaner.pipeline.bamboo_engine_tasks import ( + get_clean_pipeline_instance_data, + get_clean_statistics_data, +) from gcloud.contrib.cleaner.signals import pre_delete_pipeline_instance_data from gcloud.taskflow3.models import TaskFlowInstance from gcloud.utils.decorators import time_record @@ -69,3 +72,33 @@ def clean_expired_v2_task_data(): logger.info(f"[clean_expired_v2_task_data] success clean tasks: {task_ids}") except Exception as e: logger.exception(f"[clean_expired_v2_task_data] error: {e}") + + +@periodic_task( + run_every=(crontab(*settings.CLEAN_EXPIRED_STATISTICS_CRON)), ignore_result=True, queue="task_data_clean" +) +@time_record(logger) +def clear_statistics_info(): + """ + 清除过期的统计信息 + """ + if not settings.ENABLE_CLEAN_EXPIRED_STATISTICS: + logger.info("Skip clean expired statistics data") + return + + logger.info("Start clean expired statistics data...") + try: + validity_day = settings.STATISTICS_VALIDITY_DAY + expire_time = timezone.now() - timezone.timedelta(days=validity_day) + batch_num = settings.CLEAN_EXPIRED_STATISTICS_BATCH_NUM + + data_to_clean = get_clean_statistics_data(expire_time) + + for model, ids in data_to_clean.items(): + ids_to_delete = ids[:batch_num] + if ids_to_delete: + model.objects.filter(id__in=ids_to_delete).delete() + logger.info(f"[clear_statistics_info] clean model: {model}, deleted ids: {list(ids_to_delete)}") + logger.info("[clear_statistics_info] success clean statistics") + except Exception as e: + logger.error(f"Failed to clear expired statistics data: {e}")