diff --git a/dbm-ui/backend/db_monitor/exceptions.py b/dbm-ui/backend/db_monitor/exceptions.py index 1953c9fdd1..77944c420f 100644 --- a/dbm-ui/backend/db_monitor/exceptions.py +++ b/dbm-ui/backend/db_monitor/exceptions.py @@ -40,3 +40,9 @@ class AutofixException(DBMonitorBaseException): ERROR_CODE = "203" MESSAGE = _("故障自愈异常") MESSAGE_TPL = _("故障自愈异常: {message}") + + +class DutyRuleSaveException(DBMonitorBaseException): + ERROR_CODE = "204" + MESSAGE = _("轮值策略保存失败") + MESSAGE_TPL = _("轮值策略保存失败: {message}") diff --git a/dbm-ui/backend/db_monitor/migrations/0021_dutyrule_biz_config.py b/dbm-ui/backend/db_monitor/migrations/0021_dutyrule_biz_config.py new file mode 100644 index 0000000000..1278840b5a --- /dev/null +++ b/dbm-ui/backend/db_monitor/migrations/0021_dutyrule_biz_config.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.25 on 2025-01-02 11:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("db_monitor", "0020_auto_20240621_1216"), + ] + + operations = [ + migrations.AddField( + model_name="dutyrule", + name="biz_config", + field=models.JSONField(default=dict, verbose_name="业务设置(包含业务include/排除业务exclude)"), + ), + ] diff --git a/dbm-ui/backend/db_monitor/models/alarm.py b/dbm-ui/backend/db_monitor/models/alarm.py index 3523974aa4..d2dc1aa095 100644 --- a/dbm-ui/backend/db_monitor/models/alarm.py +++ b/dbm-ui/backend/db_monitor/models/alarm.py @@ -47,8 +47,9 @@ BkMonitorDeleteAlarmException, BkMonitorSaveAlarmException, BuiltInNotAllowDeleteException, + DutyRuleSaveException, ) -from backend.db_monitor.tasks import update_app_policy +from backend.db_monitor.tasks import delete_monitor_duty_rule, update_app_policy, update_db_notice_group from backend.db_monitor.utils import ( bkm_delete_alarm_strategy, bkm_save_alarm_strategy, @@ -167,12 +168,8 @@ def save_monitor_group(self) -> int: resp = BKMonitorV3Api.save_duty_rule(save_duty_rule_params, use_admin=True, raw=True) if resp.get("result"): self.monitor_duty_rule_id = resp["data"]["id"] - monitor_duty_rule_ids = ( - DutyRule.objects.filter(db_type=self.db_type) - .exclude(monitor_duty_rule_id=0) - .order_by("-priority") - .values_list("monitor_duty_rule_id", flat=True) - ) + duty_rules = DutyRule.get_biz_db_duty_rules(self.bk_biz_id, self.db_type) + monitor_duty_rule_ids = [rule.monitor_duty_rule_id for rule in duty_rules] save_monitor_group_params["need_duty"] = True save_monitor_group_params["duty_rules"] = list(monitor_duty_rule_ids) + [self.monitor_duty_rule_id] else: @@ -275,11 +272,15 @@ class DutyRule(AuditedModel): category = models.CharField(verbose_name=_("轮值类型"), choices=DutyRuleCategory.get_choices(), max_length=LEN_SHORT) db_type = models.CharField(_("数据库类型"), choices=DBType.get_choices(), max_length=LEN_SHORT) duty_arranges = models.JSONField(_("轮值人员设置")) + biz_config = models.JSONField(_("业务设置(包含业务include/排除业务exclude)"), default=dict) def save(self, *args, **kwargs): """ 保存轮值 """ + # 0. (前置校验)不允许同时存在包含业务和排除业务两个设置 + if self.biz_config.get("include") and self.biz_config.get("exclude"): + raise DutyRuleSaveException(_("不允许通知存在包含业务和排除业务配置")) # 1. 新建监控轮值 params = { "name": f"{self.db_type}_{self.name}", @@ -343,26 +344,43 @@ def save(self, *args, **kwargs): # 3. 判断是否需要变更用户组 # 3.1 非老规则(即新建的规则) need_update_user_group = not is_old_rule - # 3.2 调整了优先级的规则 + # 3.2 调整了优先级的规则,或者调整了业务配置 if self.pk: old_rule = DutyRule.objects.get(pk=self.pk) - if old_rule.priority != self.priority: + if old_rule.priority != self.priority or old_rule.biz_config != self.biz_config: need_update_user_group = True # 4. 保存本地轮值规则 super().save(*args, **kwargs) - # 5. 变更告警组 + # 5. 变更告警组-异步执行 if need_update_user_group: - for notice_group in NoticeGroup.objects.filter(is_built_in=True, db_type=self.db_type): - notice_group.save() + update_db_notice_group.delay(self.db_type) def delete(self, using=None, keep_parents=False): - BKMonitorV3Api.delete_duty_rules({"ids": [self.monitor_duty_rule_id], "bk_biz_ids": [env.DBA_APP_BK_BIZ_ID]}) + """删除轮值""" super().delete() + delete_monitor_duty_rule.delay(self.db_type, self.monitor_duty_rule_id) @classmethod def priority_distinct(cls) -> list: return list(cls.objects.values_list("priority", flat=True).distinct().order_by("-priority")) + @classmethod + def get_biz_db_duty_rules(cls, bk_biz_id: int, db_type: str): + """获取指定业务DB组件的轮值策略""" + duty_rules = DutyRule.objects.filter(db_type=db_type).exclude(monitor_duty_rule_id=0).order_by("-priority") + active_biz_duty_rules: list = [] + + for rule in duty_rules: + # 如果业务不在包含名单,或者业务在排除名单,则本策略不属于该业务下 + if rule.biz_config: + include, exclude = rule.biz_config.get("include"), rule.biz_config.get("exclude") + if (include and bk_biz_id not in include) or (exclude and bk_biz_id in exclude): + continue + # 添加该业务下的轮值策略 + active_biz_duty_rules.append(rule) + + return active_biz_duty_rules + class Meta: verbose_name_plural = verbose_name = _("轮值规则(DutyRule)") diff --git a/dbm-ui/backend/db_monitor/serializers.py b/dbm-ui/backend/db_monitor/serializers.py index da6a1b58c9..aa22021111 100644 --- a/dbm-ui/backend/db_monitor/serializers.py +++ b/dbm-ui/backend/db_monitor/serializers.py @@ -9,6 +9,7 @@ specific language governing permissions and limitations under the License. """ import urllib.parse +from collections import defaultdict from django.utils.translation import gettext_lazy as _ from rest_framework import serializers @@ -17,6 +18,7 @@ from backend.bk_web.serializers import AuditedSerializer from backend.configuration.constants import DBType from backend.db_meta.enums import ClusterType +from backend.db_meta.models import AppCache from backend.db_monitor import mock_data from backend.db_monitor.constants import AlertLevelEnum, DetectAlgEnum, OperatorEnum, TargetLevel from backend.db_monitor.exceptions import AutofixException @@ -64,21 +66,36 @@ class Meta: class DutyRuleSerializer(AuditedSerializer, serializers.ModelSerializer): + biz_config_display = serializers.SerializerMethodField(help_text=_("业务配置信息")) + + @property + def biz_name_map(self): + if not hasattr(self, "_biz_name_map"): + setattr(self, "_biz_name_map", AppCache.get_appcache(key="appcache_dict")) + return self._biz_name_map + class Meta: model = DutyRule fields = "__all__" + def get_biz_config_display(self, obj): + biz_config_display = defaultdict(dict) + for key, bizs in obj.biz_config.items(): + infos = [{"bk_biz_id": biz, "bk_biz_name": self.biz_name_map[str(biz)]["bk_biz_name"]} for biz in bizs] + biz_config_display[key] = infos + return biz_config_display + class DutyRuleCreateSerializer(DutyRuleSerializer): class Meta: - model = NoticeGroup + model = DutyRule fields = "__all__" swagger_schema_fields = {"example": mock_data.CREATE_HANDOFF_DUTY_RULE} class DutyRuleUpdateSerializer(DutyRuleSerializer): class Meta: - model = NoticeGroup + model = DutyRule fields = "__all__" swagger_schema_fields = {"example": mock_data.CREATE_CUSTOM_DUTY_RULE} diff --git a/dbm-ui/backend/db_monitor/tasks.py b/dbm-ui/backend/db_monitor/tasks.py index cc20aa1ee3..25d480d100 100644 --- a/dbm-ui/backend/db_monitor/tasks.py +++ b/dbm-ui/backend/db_monitor/tasks.py @@ -13,7 +13,10 @@ from celery import shared_task +from backend import env +from backend.components import BKMonitorV3Api from backend.configuration.constants import PLAT_BIZ_ID +from backend.exceptions import ApiResultError logger = logging.getLogger("celery") @@ -85,3 +88,27 @@ def update_app_policy(bk_biz_id, notify_group_id, db_type): notify_group_id, db_type, ) + + +@shared_task +def update_db_notice_group(db_type: str): + """更新DB类型的告警组""" + from backend.db_monitor.models import NoticeGroup + + for notice_group in NoticeGroup.objects.filter(is_built_in=True, db_type=db_type): + logger.info("[local_notice_group] update notice group: %s", notice_group.name) + notice_group.save() + + +@shared_task +def delete_monitor_duty_rule(db_type: str, monitor_duty_rule_id): + """解绑相关告警组,删除轮值策略,调用此函数之前保证轮值已从DBM中删除""" + update_db_notice_group(db_type) + + logger.info("[duty_rule] delete duty rule: %s", monitor_duty_rule_id) + + try: + BKMonitorV3Api.delete_duty_rules({"ids": [monitor_duty_rule_id], "bk_biz_ids": [env.DBA_APP_BK_BIZ_ID]}) + except (ApiResultError, Exception) as e: + # 轮值删除错误暂可忽略,因为删除之前已经停用不会生效,并且在DBM数据也清理。只是会在监控平台留下一条脏数据 + logger.error("[duty_rule] error in deleting duty: %s", e)