Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure Memory Usage Alert #1546

Merged
merged 16 commits into from
Nov 5, 2024
Merged
82 changes: 82 additions & 0 deletions operations/template/alert.tf
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,88 @@ resource "azurerm_monitor_metric_alert" "azure_5XX_alert" {
}
}

resource "azurerm_monitor_metric_alert" "ti_dynamic_memory_alert" {
jherrflexion marked this conversation as resolved.
Show resolved Hide resolved
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-dynamic-memory-alert"
resource_group_name = data.azurerm_resource_group.group.name
scopes = [azurerm_linux_web_app.api.id]
description = "Alert when memory usage is high on CDC TI."
jherrflexion marked this conversation as resolved.
Show resolved Hide resolved
severity = 2
frequency = "PT5M"
window_size = "PT15M"

dynamic_criteria {
metric_name = "MemoryWorkingSet"
metric_namespace = "Microsoft.Web/sites"
aggregation = "Average"
operator = "GreaterThan"
alert_sensitivity = "Medium"
}

action {
action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id
}

lifecycle {
# Ignore changes to tags because the CDC sets these automagically
ignore_changes = [
tags["business_steward"],
tags["center"],
tags["environment"],
tags["escid"],
tags["funding_source"],
tags["pii_data"],
tags["security_compliance"],
tags["security_steward"],
tags["support_group"],
tags["system"],
tags["technical_steward"],
tags["zone"]
]
}
}

resource "azurerm_monitor_metric_alert" "ti_memory_alert" {
jherrflexion marked this conversation as resolved.
Show resolved Hide resolved
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-memory-alert"
resource_group_name = data.azurerm_resource_group.group.name
scopes = [azurerm_linux_web_app.api.id]
description = "Alert when memory usage is high on CDC TI."
severity = 2
frequency = "PT5M"
window_size = "PT15M"

criteria {
metric_name = "MemoryWorkingSet"
metric_namespace = "Microsoft.Web/sites"
aggregation = "Average"
operator = "GreaterThan"
threshold = local.higher_environment_level ? 4000000000 : 2000000000 #4gb and 2gb in bytes. This is half what the service plan allows
jherrflexion marked this conversation as resolved.
Show resolved Hide resolved
}

action {
action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id
}

lifecycle {
# Ignore changes to tags because the CDC sets these automagically
ignore_changes = [
tags["business_steward"],
tags["center"],
tags["environment"],
tags["escid"],
tags["funding_source"],
tags["pii_data"],
tags["security_compliance"],
tags["security_steward"],
tags["support_group"],
tags["system"],
tags["technical_steward"],
tags["zone"]
]
}
}

resource "azurerm_monitor_metric_alert" "low_instance_count_alert" {
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-azure-low-instance-count-alert"
Expand Down
Loading