Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Escalations #112

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions cabot/cabotapp/alert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging

from datetime import timedelta
from django.db import models
from django.utils import timezone
from polymorphic import PolymorphicModel

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -35,18 +37,33 @@ def __unicode__(self):
return u'%s' % (self.title)


def send_alert(service, duty_officers=[], fallback_officers=[]):
def send_alert(service, duty_officers=None,
escalation_officers=None,
fallback_officers=None):

duty_officers = duty_officers or []
escalation_officers = escalation_officers or []
fallback_officers = fallback_officers or []

escalation_cutoff = timezone.now() - timedelta(
minutes=service.escalate_after)

users = service.users_to_notify.filter(is_active=True)

for alert in service.alerts.all():
try:
alert.send_alert(service, users, duty_officers)
except Exception:
logging.exception('Could not sent {} alert'.format(alert.name))
if fallback_officers:
try:
alert.send_alert(service, users, fallback_officers)
except Exception:
logging.exception('Could not send {} alert to fallback officer'.format(alert.name))
for user_list in [duty_officers, escalation_officers, fallback_officers]:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand the logic here. Seems like your new code:

  1. Sends alerts to first non-empty officers list
  2. If successful, quit
  3. If an exception is thrown, check if we should escalate. If we are scheduled to escalate, then loop and alert the next set of officers. Else, quit.

Seems like if alerts are delivered successfully, we'll never escalate? Or am I missing something?

if not user_list:
continue
try:
alert.send_alert(service, users, user_list)
break
except Exception:
logging.exception('Could not sent {} alert'.format(alert.name))

if escalation_cutoff < service.last_alert_sent:
logging.info('Service {}: Not escalating {}'.format(
service.name, alert.name))
break


def update_alert_plugins():
Expand Down
22 changes: 19 additions & 3 deletions cabot/cabotapp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ class Meta:
null=True,
help_text='Oncall schedule to be alerted.'
)
escalation_schedules = models.ManyToManyField(
'Escalation Schedules',
blank=True,
null=True,
help_text='Oncall schedule to be alerted in case of missed alerts.'
)
escalate_after = models.IntegerField(
'Escalation timeout (minutes)',
blank=True,
null=True,
default=5,
help_text='The time, in minutes, after which the alert is escalated. '
'If set to 0, the alert is escalated immediately',
)
alerts_enabled = models.BooleanField(
default=True,
help_text='Alert when this service is not healthy.',
Expand Down Expand Up @@ -190,13 +204,15 @@ def alert(self):
self.snapshot.did_send_alert = True
self.snapshot.save()

schedules = self.schedules.all()
schedules = self.schedules.all() or []

if not schedules:
send_alert(self)
escalation_officers = []
for escalation in self.escalation_schedules.all():
escalation_officers.extend(get_duty_officers(escalation))

for schedule in schedules:
send_alert(self, duty_officers=get_duty_officers(schedule),
escalation_officers=escalation_officers,
fallback_officers=get_fallback_officers(schedule))

@property
Expand Down
2 changes: 2 additions & 0 deletions cabot/cabotapp/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ class Meta:
'url',
'users_to_notify',
'schedules',
'escalation_schedules',
'escalate_after',
'status_checks',
'alerts',
'alerts_enabled',
Expand Down