diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 642b75089..109e27d02 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -28,6 +28,47 @@ resource "azurerm_monitor_action_group" "notify_slack_email" { } } +resource "azurerm_monitor_activity_log_alert" "azure_service_health_alert" { + count = local.non_pr_environment ? 1 : 0 + name = "cdcti-${var.environment}-azure-status-alert" + location = data.azurerm_resource_group.group.location + resource_group_name = data.azurerm_resource_group.group.name + scopes = ["/subscriptions/${data.azurerm_client_config.current.subscription_id}"] + + criteria { + category = "ServiceHealth" + levels = ["Error"] + service_health { + locations = ["global"] + events = ["Incident"] + } + } + + action { + action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id + } + + description = "Alert service(s) appear to be down" + enabled = true + + lifecycle { + ignore_changes = [ + tags["business_steward"], + tags["center"], + tags["environment"], + tags["escid"], + tags["funding_source"], + tags["pii_data"], + tags["security_compliance"], + tags["security_steward"], + tags["support_group"], + tags["system"], + tags["technical_steward"], + tags["zone"] + ] + } +} + resource "azurerm_monitor_scheduled_query_rules_alert" "database_token_expired_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-api-log-token-alert"