Skip to content

Commit

Permalink
Merge pull request #1519 from CDCgov/4xx-alert-ignore-rs
Browse files Browse the repository at this point in the history
Update 4XX Alert to Ignore RS Receiver Status Check
  • Loading branch information
halprin authored Oct 30, 2024
2 parents b2ce274 + 3d4082b commit 21fedf5
Showing 1 changed file with 27 additions and 14 deletions.
41 changes: 27 additions & 14 deletions operations/template/alert.tf
Original file line number Diff line number Diff line change
Expand Up @@ -124,30 +124,43 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "database_token_expired_a
}
}

resource "azurerm_monitor_metric_alert" "azure_4XX_alert" {
resource "azurerm_monitor_scheduled_query_rules_alert" "azure_4XX_alert" {
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-azure-http-4XX-alert"
location = data.azurerm_resource_group.group.location
resource_group_name = data.azurerm_resource_group.group.name
scopes = [azurerm_linux_web_app.api.id]
description = "Action will be triggered when Http Status Code 4XX is greater than or equal to 3"
frequency = "PT1M" // Checks every 1 minute
window_size = "PT1H" // Every Check looks back 1 hour for 4xx errors

criteria {
metric_namespace = "Microsoft.Web/sites"
metric_name = "Http4xx"
aggregation = "Total"
operator = "GreaterThanOrEqual"
threshold = 3
action {
action_group = [azurerm_monitor_action_group.notify_slack_email[count.index].id]
email_subject = "${var.environment}: TI 4xx errors detected!"
}

action {
action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id
data_source_id = azurerm_linux_web_app.api.id
description = "Alert when 4xx errors cross threshold"
enabled = true

query = <<-QUERY
AppServiceHTTPLogs |
where CsHost !contains "pre-live" |
where UserAgent != "AlwaysOn" |
where not(CsMethod == "GET" and CsUriStem startswith "/v1/etor/") | // ignore RS receiver status check that uses GET
where not(CsMethod == "GET" and CsUriStem startswith "/admin/") | // ignore Microsoft hitting non-existent URLs
where ScStatus >= 400 and ScStatus < 500
QUERY

severity = 3
frequency = 5
time_window = 60
auto_mitigation_enabled = true

trigger {
operator = "GreaterThanOrEqual"
threshold = 3
}

lifecycle {
# Ignore changes to tags because the CDC sets these automagically
ignore_changes = [
# below tags are managed by CDC
tags["business_steward"],
tags["center"],
tags["environment"],
Expand Down

0 comments on commit 21fedf5

Please sign in to comment.