Skip to content

Commit

Permalink
Add TI app CPU alert
Browse files Browse the repository at this point in the history
Co-Authored-By: Samuel Aquino <[email protected]>
Co-Authored-By: jherrflexion <[email protected]>
Co-Authored-By: jcrichlake <[email protected]>
  • Loading branch information
4 people committed Nov 5, 2024
1 parent 2b1d4eb commit b3f1731
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions operations/template/alert.tf
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,47 @@ resource "azurerm_monitor_metric_alert" "memory_alert" {
}
}

resource "azurerm_monitor_metric_alert" "cpu_alert" {
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-cpu-alert"
resource_group_name = data.azurerm_resource_group.group.name
scopes = [azurerm_service_plan.plan.id]
description = "Alerts when the average CPU usage across the service plan is high"
severity = 2
frequency = "PT1M"
window_size = "PT10M"

criteria {
metric_name = "CpuPercentage"
metric_namespace = "microsoft.web/serverfarms"
aggregation = "Average"
operator = "GreaterThan"
threshold = 80 # We autoscale at 75%, so CPU over that means either we've hit the scaling limit or something is wrong with scaling
}

action {
action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id
}

lifecycle {
# Ignore changes to tags because the CDC sets these automagically
ignore_changes = [
tags["business_steward"],
tags["center"],
tags["environment"],
tags["escid"],
tags["funding_source"],
tags["pii_data"],
tags["security_compliance"],
tags["security_steward"],
tags["support_group"],
tags["system"],
tags["technical_steward"],
tags["zone"]
]
}
}

resource "azurerm_monitor_metric_alert" "low_instance_count_alert" {
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-azure-low-instance-count-alert"
Expand Down

0 comments on commit b3f1731

Please sign in to comment.