diff --git a/cabot/cabotapp/alert.py b/cabot/cabotapp/alert.py index 2f3b4a7fd..9f2a3b87c 100644 --- a/cabot/cabotapp/alert.py +++ b/cabot/cabotapp/alert.py @@ -1,6 +1,8 @@ import logging +from datetime import timedelta from django.db import models +from django.utils import timezone from polymorphic import PolymorphicModel logger = logging.getLogger(__name__) @@ -35,18 +37,33 @@ def __unicode__(self): return u'%s' % (self.title) -def send_alert(service, duty_officers=[], fallback_officers=[]): +def send_alert(service, duty_officers=None, + escalation_officers=None, + fallback_officers=None): + + duty_officers = duty_officers or [] + escalation_officers = escalation_officers or [] + fallback_officers = fallback_officers or [] + + escalation_cutoff = timezone.now() - timedelta( + minutes=service.escalate_after) + users = service.users_to_notify.filter(is_active=True) + for alert in service.alerts.all(): - try: - alert.send_alert(service, users, duty_officers) - except Exception: - logging.exception('Could not sent {} alert'.format(alert.name)) - if fallback_officers: - try: - alert.send_alert(service, users, fallback_officers) - except Exception: - logging.exception('Could not send {} alert to fallback officer'.format(alert.name)) + for user_list in [duty_officers, escalation_officers, fallback_officers]: + if not user_list: + continue + try: + alert.send_alert(service, users, user_list) + break + except Exception: + logging.exception('Could not sent {} alert'.format(alert.name)) + + if escalation_cutoff < service.last_alert_sent: + logging.info('Service {}: Not escalating {}'.format( + service.name, alert.name)) + break def update_alert_plugins(): diff --git a/cabot/cabotapp/models.py b/cabot/cabotapp/models.py index 336221a02..099c1dca2 100644 --- a/cabot/cabotapp/models.py +++ b/cabot/cabotapp/models.py @@ -96,6 +96,20 @@ class Meta: null=True, help_text='Oncall schedule to be alerted.' ) + escalation_schedules = models.ManyToManyField( + 'Escalation Schedules', + blank=True, + null=True, + help_text='Oncall schedule to be alerted in case of missed alerts.' + ) + escalate_after = models.IntegerField( + 'Escalation timeout (minutes)', + blank=True, + null=True, + default=5, + help_text='The time, in minutes, after which the alert is escalated. ' + 'If set to 0, the alert is escalated immediately', + ) alerts_enabled = models.BooleanField( default=True, help_text='Alert when this service is not healthy.', @@ -190,13 +204,15 @@ def alert(self): self.snapshot.did_send_alert = True self.snapshot.save() - schedules = self.schedules.all() + schedules = self.schedules.all() or [] - if not schedules: - send_alert(self) + escalation_officers = [] + for escalation in self.escalation_schedules.all(): + escalation_officers.extend(get_duty_officers(escalation)) for schedule in schedules: send_alert(self, duty_officers=get_duty_officers(schedule), + escalation_officers=escalation_officers, fallback_officers=get_fallback_officers(schedule)) @property diff --git a/cabot/cabotapp/views.py b/cabot/cabotapp/views.py index 1f34f3eb5..d1b6e20f2 100644 --- a/cabot/cabotapp/views.py +++ b/cabot/cabotapp/views.py @@ -225,6 +225,8 @@ class Meta: 'url', 'users_to_notify', 'schedules', + 'escalation_schedules', + 'escalate_after', 'status_checks', 'alerts', 'alerts_enabled',