From 1f774d0b1ead537c1f33319300d03bf3186d7348 Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 09:32:02 -0600 Subject: [PATCH 01/13] WIP Initial start to memory alert --- operations/template/alert.tf | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 40297a534..2181be721 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -217,6 +217,43 @@ resource "azurerm_monitor_metric_alert" "azure_5XX_alert" { } } +resource "azurerm_monitor_metric_alertrule" "ti-memory_alert" { + count = local.non_pr_environment ? 1 : 0 + name = "cdcti-${var.environment}-memory-alert" + resource_group_name = data.azurerm_resource_group.group.name + location = data.azurerm_resource_group.group.location + description = "Alert when memory usage is high on CDC TI." + period = "PT5M" + + metric_name = "PercentageMemory" + metric_namespace = "Microsoft.Compute/virtualMachines" + operator = "GreaterThan" + threshold = 80 + + + action { + action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id + } + + lifecycle { + # Ignore changes to tags because the CDC sets these automagically + ignore_changes = [ + tags["business_steward"], + tags["center"], + tags["environment"], + tags["escid"], + tags["funding_source"], + tags["pii_data"], + tags["security_compliance"], + tags["security_steward"], + tags["support_group"], + tags["system"], + tags["technical_steward"], + tags["zone"] + ] + } +} + resource "azurerm_monitor_metric_alert" "low_instance_count_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-azure-low-instance-count-alert" From d9ff64a0610e665ddbc03803c5ee89585a38d48e Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 09:43:42 -0600 Subject: [PATCH 02/13] Removed deprecated type --- operations/template/alert.tf | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 2181be721..81b38982e 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -217,19 +217,24 @@ resource "azurerm_monitor_metric_alert" "azure_5XX_alert" { } } -resource "azurerm_monitor_metric_alertrule" "ti-memory_alert" { +resource "azurerm_monitor_metric_alert" "ti_memory_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-memory-alert" resource_group_name = data.azurerm_resource_group.group.name - location = data.azurerm_resource_group.group.location description = "Alert when memory usage is high on CDC TI." - period = "PT5M" - - metric_name = "PercentageMemory" - metric_namespace = "Microsoft.Compute/virtualMachines" - operator = "GreaterThan" - threshold = 80 + severity = 2 + enabled = true + frequency = "PT5M" + window_size = "PT15M" + scopes = ["/subscriptions/${data.azurerm_client_config.current.subscription_id}"] + criteria { + metric_name = "Percentage Memory" + metric_namespace = "Microsoft.Compute/virtualMachines" + aggregation = "Average" + operator = "GreaterThan" + threshold = 80 + } action { action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id From 0df9453954475ca0e18108a159ad96915e0ce7e7 Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 11:58:53 -0600 Subject: [PATCH 03/13] Updated memory alert config based on ClickOps Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> --- operations/template/alert.tf | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 81b38982e..0db233a6e 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -223,17 +223,16 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { resource_group_name = data.azurerm_resource_group.group.name description = "Alert when memory usage is high on CDC TI." severity = 2 - enabled = true frequency = "PT5M" window_size = "PT15M" scopes = ["/subscriptions/${data.azurerm_client_config.current.subscription_id}"] - criteria { - metric_name = "Percentage Memory" - metric_namespace = "Microsoft.Compute/virtualMachines" - aggregation = "Average" - operator = "GreaterThan" - threshold = 80 + dynamic_criteria { + metric_name = "MemoryWorkingSet" + metric_namespace = "Microsoft.Web/sites" + aggregation = "Average" + operator = "GreaterThan" + alert_sensitivity = "Medium" } action { From 2aea3f9d2a0adde7744c546a68e5d3784b8b6a6d Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 12:05:48 -0600 Subject: [PATCH 04/13] Added resource_type Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> --- operations/template/alert.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 0db233a6e..feb9b7822 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -226,6 +226,7 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { frequency = "PT5M" window_size = "PT15M" scopes = ["/subscriptions/${data.azurerm_client_config.current.subscription_id}"] + target_resource_type= "Microsoft.Web/sites" dynamic_criteria { metric_name = "MemoryWorkingSet" From 19485c5872db174fe8ea2f22b422bbd09d236e7a Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 12:11:24 -0600 Subject: [PATCH 05/13] Changed scope for memory alert Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> --- operations/template/alert.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index feb9b7822..7dcad64df 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -225,8 +225,7 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { severity = 2 frequency = "PT5M" window_size = "PT15M" - scopes = ["/subscriptions/${data.azurerm_client_config.current.subscription_id}"] - target_resource_type= "Microsoft.Web/sites" + scopes = [azurerm_linux_web_app.api.id] dynamic_criteria { metric_name = "MemoryWorkingSet" From f19513710b4cb4c08b9f0c0a4c4ebf7742eba759 Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 12:11:40 -0600 Subject: [PATCH 06/13] Formatting Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> --- operations/template/alert.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 7dcad64df..7a6bdbc66 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -221,11 +221,11 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-memory-alert" resource_group_name = data.azurerm_resource_group.group.name + scopes = [azurerm_linux_web_app.api.id] description = "Alert when memory usage is high on CDC TI." severity = 2 frequency = "PT5M" window_size = "PT15M" - scopes = [azurerm_linux_web_app.api.id] dynamic_criteria { metric_name = "MemoryWorkingSet" From 3f7d32744576257e4aa7e70a4315f7fa99b83763 Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 12:54:49 -0600 Subject: [PATCH 07/13] Changed criteria type for memory alert Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> Co-Authored-By: jcrichlake <145698165+jcrichlake@users.noreply.github.com> --- operations/template/alert.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 7a6bdbc66..d3249b18d 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -227,12 +227,12 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { frequency = "PT5M" window_size = "PT15M" - dynamic_criteria { + criteria { metric_name = "MemoryWorkingSet" metric_namespace = "Microsoft.Web/sites" aggregation = "Average" operator = "GreaterThan" - alert_sensitivity = "Medium" + threshold = local.higher_environment_level ? "4GB" : "2GB" } action { From accd929ff02bebf45878b896609a798086f515dc Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 12:58:34 -0600 Subject: [PATCH 08/13] Changed threshold to bytes Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> Co-Authored-By: jcrichlake <145698165+jcrichlake@users.noreply.github.com> --- operations/template/alert.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index d3249b18d..2761dc074 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -232,7 +232,7 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { metric_namespace = "Microsoft.Web/sites" aggregation = "Average" operator = "GreaterThan" - threshold = local.higher_environment_level ? "4GB" : "2GB" + threshold = local.higher_environment_level ? 4000000000 : 2000000000 #4gb and 2gb in bytes. This is half what the service plan allows } action { From 852c6e953a12b598d5916ec32ff0d8384118ed98 Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 14:54:16 -0600 Subject: [PATCH 09/13] Added a dynamic memory alert Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> Co-Authored-By: jcrichlake <145698165+jcrichlake@users.noreply.github.com> --- operations/template/alert.tf | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 2761dc074..f62507267 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -217,6 +217,47 @@ resource "azurerm_monitor_metric_alert" "azure_5XX_alert" { } } +resource "azurerm_monitor_metric_alert" "ti_dynamic_memory_alert" { + count = local.non_pr_environment ? 1 : 0 + name = "cdcti-${var.environment}-memory-alert" + resource_group_name = data.azurerm_resource_group.group.name + scopes = [azurerm_linux_web_app.api.id] + description = "Alert when memory usage is high on CDC TI." + severity = 2 + frequency = "PT5M" + window_size = "PT15M" + + dynamic_criteria { + metric_name = "MemoryWorkingSet" + metric_namespace = "Microsoft.Web/sites" + aggregation = "Average" + operator = "GreaterThan" + alert_sensitivity = "Medium" + } + + action { + action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id + } + + lifecycle { + # Ignore changes to tags because the CDC sets these automagically + ignore_changes = [ + tags["business_steward"], + tags["center"], + tags["environment"], + tags["escid"], + tags["funding_source"], + tags["pii_data"], + tags["security_compliance"], + tags["security_steward"], + tags["support_group"], + tags["system"], + tags["technical_steward"], + tags["zone"] + ] + } +} + resource "azurerm_monitor_metric_alert" "ti_memory_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-memory-alert" From 75726dd126bb4628299ab82c3591300fbfc416fa Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 14:57:21 -0600 Subject: [PATCH 10/13] Fixed dynamic name Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> Co-Authored-By: jcrichlake <145698165+jcrichlake@users.noreply.github.com> --- operations/template/alert.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index f62507267..e9563a62e 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -219,7 +219,7 @@ resource "azurerm_monitor_metric_alert" "azure_5XX_alert" { resource "azurerm_monitor_metric_alert" "ti_dynamic_memory_alert" { count = local.non_pr_environment ? 1 : 0 - name = "cdcti-${var.environment}-memory-alert" + name = "cdcti-${var.environment}-dynamic-memory-alert" resource_group_name = data.azurerm_resource_group.group.name scopes = [azurerm_linux_web_app.api.id] description = "Alert when memory usage is high on CDC TI." From 1e4fab1dfaa1da8e0b6c406df8128b6704c0c89e Mon Sep 17 00:00:00 2001 From: James Herr Date: Mon, 4 Nov 2024 15:29:30 -0600 Subject: [PATCH 11/13] Added spaces Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> Co-Authored-By: jcrichlake <145698165+jcrichlake@users.noreply.github.com> --- operations/template/alert.tf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index e9563a62e..52c046e91 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -269,11 +269,11 @@ resource "azurerm_monitor_metric_alert" "ti_memory_alert" { window_size = "PT15M" criteria { - metric_name = "MemoryWorkingSet" - metric_namespace = "Microsoft.Web/sites" - aggregation = "Average" - operator = "GreaterThan" - threshold = local.higher_environment_level ? 4000000000 : 2000000000 #4gb and 2gb in bytes. This is half what the service plan allows + metric_name = "MemoryWorkingSet" + metric_namespace = "Microsoft.Web/sites" + aggregation = "Average" + operator = "GreaterThan" + threshold = local.higher_environment_level ? 4000000000 : 2000000000 #4gb and 2gb in bytes. This is half what the service plan allows } action { From 1cb3ff4aae9ffebb6dfc9c7d9be75614dc11565a Mon Sep 17 00:00:00 2001 From: James Herr Date: Tue, 5 Nov 2024 09:29:15 -0600 Subject: [PATCH 12/13] Updated descriptions Co-Authored-By: Samuel Aquino Co-Authored-By: Sylvie <38440028+somesylvie@users.noreply.github.com> Co-Authored-By: jcrichlake <145698165+jcrichlake@users.noreply.github.com> --- operations/template/alert.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 52c046e91..2abff99fc 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -217,12 +217,12 @@ resource "azurerm_monitor_metric_alert" "azure_5XX_alert" { } } -resource "azurerm_monitor_metric_alert" "ti_dynamic_memory_alert" { +resource "azurerm_monitor_metric_alert" "dynamic_memory_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-dynamic-memory-alert" resource_group_name = data.azurerm_resource_group.group.name scopes = [azurerm_linux_web_app.api.id] - description = "Alert when memory usage is high on CDC TI." + description = "Monitors memory usage patterns dynamically to identify when usage exceeds acceptable thresholds." severity = 2 frequency = "PT5M" window_size = "PT15M" @@ -258,12 +258,12 @@ resource "azurerm_monitor_metric_alert" "ti_dynamic_memory_alert" { } } -resource "azurerm_monitor_metric_alert" "ti_memory_alert" { +resource "azurerm_monitor_metric_alert" "memory_alert" { count = local.non_pr_environment ? 1 : 0 name = "cdcti-${var.environment}-memory-alert" resource_group_name = data.azurerm_resource_group.group.name scopes = [azurerm_linux_web_app.api.id] - description = "Alert when memory usage is high on CDC TI." + description = "Alerts when memory consumption surpasses configured thresholds, indicating high resource utilization." severity = 2 frequency = "PT5M" window_size = "PT15M" From c6ea51f62beaa08ab9603884d7fc618bf07e35fb Mon Sep 17 00:00:00 2001 From: James Herr Date: Tue, 5 Nov 2024 09:50:38 -0600 Subject: [PATCH 13/13] Updated descriptions --- operations/template/alert.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/template/alert.tf b/operations/template/alert.tf index 2abff99fc..ac55cd289 100644 --- a/operations/template/alert.tf +++ b/operations/template/alert.tf @@ -222,7 +222,7 @@ resource "azurerm_monitor_metric_alert" "dynamic_memory_alert" { name = "cdcti-${var.environment}-dynamic-memory-alert" resource_group_name = data.azurerm_resource_group.group.name scopes = [azurerm_linux_web_app.api.id] - description = "Monitors memory usage patterns dynamically to identify when usage exceeds acceptable thresholds." + description = "This alert checks if the backpack is starting to get heavy but does it in a way that keeps watching how much stuff is added. If it gets too full, it lets you know so you can take action before it becomes a problem." severity = 2 frequency = "PT5M" window_size = "PT15M" @@ -263,7 +263,7 @@ resource "azurerm_monitor_metric_alert" "memory_alert" { name = "cdcti-${var.environment}-memory-alert" resource_group_name = data.azurerm_resource_group.group.name scopes = [azurerm_linux_web_app.api.id] - description = "Alerts when memory consumption surpasses configured thresholds, indicating high resource utilization." + description = "This alert is like a rule that says, “If the backpack gets more than half full, send a warning!” It’s more straightforward and uses a set amount to decide when to tell you. If the memory being used goes over a certain limit (like half the backpack space), it shouts, “Hey, you’re running out of room!" severity = 2 frequency = "PT5M" window_size = "PT15M"