diff --git a/operations/template/alert.tf b/operations/template/alert.tf index ffedb85c0..4d7211cf9 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -160,3 +160,58 @@ resource "azurerm_monitor_metric_alert" "low_instance_count_alert" { ] } } + +resource "azurerm_monitor_scheduled_query_rules_alert" "ti-log-errors-alert" { + count = local.non_pr_environment ? 1 : 0 + name = "cdcti-${var.environment}-log-errors-alert" + location = data.azurerm_resource_group.group.location + resource_group_name = data.azurerm_resource_group.group.name + + action { + action_group = [azurerm_monitor_action_group.notify_slack_email[count.index].id] + email_subject = "${var.environment}: TI log errors detected!" + } + + data_source_id = azurerm_linux_web_app.api.id + description = "Alert when total errors cross threshold" + enabled = true + + query = <<-QUERY + AppServiceConsoleLogs + | where TimeGenerated >= ago(00.001m) + and TimeGenerated <= now() + | project columnifexists("ResultDescription", 'default_value') + | project JsonResult = parse_json(ResultDescription) + | evaluate bag_unpack(JsonResult) : (level:string) + | where level in ( 'ERROR' ) + | summarize count() + QUERY + + severity = 3 + frequency = 10 + time_window = 30 + auto_mitigation_enabled = true + + trigger { + operator = "GreaterThan" + threshold = 1 + } + + # below tags are managed by CDC + lifecycle { + ignore_changes = [ + tags["business_steward"], + tags["center"], + tags["environment"], + tags["escid"], + tags["funding_source"], + tags["pii_data"], + tags["security_compliance"], + tags["security_steward"], + tags["support_group"], + tags["system"], + tags["technical_steward"], + tags["zone"] + ] + } +}