From 4571bbbc2594e9e5f9d35d6690d5bde2c27a3aca Mon Sep 17 00:00:00 2001 From: Mahendra Date: Wed, 30 May 2018 14:15:43 -0700 Subject: [PATCH 1/3] Support for escalations --- cabot/cabotapp/alert.py | 37 +++++++++++++++++++++++++++---------- cabot/cabotapp/models.py | 20 +++++++++++++++++--- cabot/cabotapp/views.py | 2 ++ 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/cabot/cabotapp/alert.py b/cabot/cabotapp/alert.py index 2f3b4a7fd..9f2a3b87c 100644 --- a/cabot/cabotapp/alert.py +++ b/cabot/cabotapp/alert.py @@ -1,6 +1,8 @@ import logging +from datetime import timedelta from django.db import models +from django.utils import timezone from polymorphic import PolymorphicModel logger = logging.getLogger(__name__) @@ -35,18 +37,33 @@ def __unicode__(self): return u'%s' % (self.title) -def send_alert(service, duty_officers=[], fallback_officers=[]): +def send_alert(service, duty_officers=None, + escalation_officers=None, + fallback_officers=None): + + duty_officers = duty_officers or [] + escalation_officers = escalation_officers or [] + fallback_officers = fallback_officers or [] + + escalation_cutoff = timezone.now() - timedelta( + minutes=service.escalate_after) + users = service.users_to_notify.filter(is_active=True) + for alert in service.alerts.all(): - try: - alert.send_alert(service, users, duty_officers) - except Exception: - logging.exception('Could not sent {} alert'.format(alert.name)) - if fallback_officers: - try: - alert.send_alert(service, users, fallback_officers) - except Exception: - logging.exception('Could not send {} alert to fallback officer'.format(alert.name)) + for user_list in [duty_officers, escalation_officers, fallback_officers]: + if not user_list: + continue + try: + alert.send_alert(service, users, user_list) + break + except Exception: + logging.exception('Could not sent {} alert'.format(alert.name)) + + if escalation_cutoff < service.last_alert_sent: + logging.info('Service {}: Not escalating {}'.format( + service.name, alert.name)) + break def update_alert_plugins(): diff --git a/cabot/cabotapp/models.py b/cabot/cabotapp/models.py index 336221a02..bd7f22b76 100644 --- a/cabot/cabotapp/models.py +++ b/cabot/cabotapp/models.py @@ -96,6 +96,18 @@ class Meta: null=True, help_text='Oncall schedule to be alerted.' ) + escalation_schedules = models.ManyToManyField( + 'Escalation Schedules', + blank=True, + null=True, + help_text='Oncall schedule to be alerted in case of missed alerts.' + ) + escalate_after = models.IntegerField( + 'Escalation timeout (minutes)', + blank=True, + null=True, + help_text='The time, in minutes, after which the alert is escalated', + ) alerts_enabled = models.BooleanField( default=True, help_text='Alert when this service is not healthy.', @@ -190,13 +202,15 @@ def alert(self): self.snapshot.did_send_alert = True self.snapshot.save() - schedules = self.schedules.all() + schedules = self.schedules.all() or [] - if not schedules: - send_alert(self) + escalation_officers = [] + for escalation in self.escalation_schedules.all(): + escalation_officers.extend(get_duty_officers(escalation)) for schedule in schedules: send_alert(self, duty_officers=get_duty_officers(schedule), + escalation_officers=escalation_officers, fallback_officers=get_fallback_officers(schedule)) @property diff --git a/cabot/cabotapp/views.py b/cabot/cabotapp/views.py index 1f34f3eb5..d1b6e20f2 100644 --- a/cabot/cabotapp/views.py +++ b/cabot/cabotapp/views.py @@ -225,6 +225,8 @@ class Meta: 'url', 'users_to_notify', 'schedules', + 'escalation_schedules', + 'escalate_after', 'status_checks', 'alerts', 'alerts_enabled', From c7361da909b577bebafa48b1a7e3148cf5f4967d Mon Sep 17 00:00:00 2001 From: Mahendra Date: Wed, 30 May 2018 14:16:51 -0700 Subject: [PATCH 2/3] Set default time for escalations --- cabot/cabotapp/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cabot/cabotapp/models.py b/cabot/cabotapp/models.py index bd7f22b76..6a87f49df 100644 --- a/cabot/cabotapp/models.py +++ b/cabot/cabotapp/models.py @@ -106,6 +106,7 @@ class Meta: 'Escalation timeout (minutes)', blank=True, null=True, + default=5, help_text='The time, in minutes, after which the alert is escalated', ) alerts_enabled = models.BooleanField( From a626319c51b79ea64c91fd3911fb7744c7b65c9b Mon Sep 17 00:00:00 2001 From: Mahendra Date: Wed, 30 May 2018 14:18:46 -0700 Subject: [PATCH 3/3] Update help_text --- cabot/cabotapp/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cabot/cabotapp/models.py b/cabot/cabotapp/models.py index 6a87f49df..099c1dca2 100644 --- a/cabot/cabotapp/models.py +++ b/cabot/cabotapp/models.py @@ -107,7 +107,8 @@ class Meta: blank=True, null=True, default=5, - help_text='The time, in minutes, after which the alert is escalated', + help_text='The time, in minutes, after which the alert is escalated. ' + 'If set to 0, the alert is escalated immediately', ) alerts_enabled = models.BooleanField( default=True,