From ccd525ef7664e4a58f55bf0984612eb29ee09e50 Mon Sep 17 00:00:00 2001 From: MancunianSam Date: Tue, 11 Jun 2024 16:05:28 +0100 Subject: [PATCH 1/3] DR2-1668 Use RATE for DLQ alarms. The issue is that if we get a message in a DLQ where there is already a message in the DLQ, we don't get another notification. Repeating messages for alarms that don't change state is a bit complicated so we'll go with this as a solution now. The period is 60 seconds so we shouldn't get massively spammed by a huge number of messages entering the DLQ at once. --- sqs/main.tf | 46 ++++++++++++++++++++++++++++++++++------------ sqs/outputs.tf | 2 +- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/sqs/main.tf b/sqs/main.tf index effa12e..a2d02c1 100644 --- a/sqs/main.tf +++ b/sqs/main.tf @@ -64,20 +64,42 @@ resource "aws_sqs_queue" "dlq_with_sse" { sqs_managed_sse_enabled = true } -module "dlq_cloudwatch_alarm" { - source = "../cloudwatch_alarms" - metric_name = "ApproximateNumberOfMessagesVisible" - namespace = "AWS/SQS" - name = "${var.queue_name}-messages-visible--dlq-alarm" - threshold = var.dlq_cloudwatch_alarm_visible_messages_threshold +resource "aws_cloudwatch_metric_alarm" "dlq_metric_alarm" { + alarm_name = "${var.queue_name}-messages-visible--dlq-alarm" comparison_operator = "GreaterThanThreshold" - statistic = "Sum" - treat_missing_data = "ignore" - datapoints_to_alarm = 1 - dimensions = { - QueueName = local.sqs_dlq.name + evaluation_periods = 60 + metric_query { + id = "m1" + metric { + dimensions = { + QueueName = "${var.queue_name}-dlq" + } + metric_name = "ApproximateNumberOfMessagesVisible" + period = 60 + stat = "Maximum" + namespace = "AWS/SQS" + } + } + metric_query { + id = "m2" + metric { + dimensions = { + QueueName = "${var.queue_name}-dlq" + } + metric_name = "ApproximateNumberOfMessagesVisible" + period = 60 + stat = "Maximum" + namespace = "AWS/SQS" + } } - notification_topic = var.dlq_notification_topic + metric_query { + expression = "RATE(m1+m2)" + id = "e1" + label = "AllMessagesInQueue" + period = 60 + return_data = true + } + } module "queue_cloudwatch_alarm" { diff --git a/sqs/outputs.tf b/sqs/outputs.tf index 2427edf..dd9f698 100644 --- a/sqs/outputs.tf +++ b/sqs/outputs.tf @@ -15,5 +15,5 @@ output "dlq_sqs_url" { } output "dlq_cloudwatch_alarm_arn" { - value = var.dlq_notification_topic == "" ? "" : module.dlq_cloudwatch_alarm.*.cloudwatch_alarm_arn[0] + value = aws_cloudwatch_metric_alarm.dlq_metric_alarm.arn } From ff985abde43e4795e1d86b6ef423c328598c33b6 Mon Sep 17 00:00:00 2001 From: MancunianSam Date: Thu, 13 Jun 2024 15:01:23 +0100 Subject: [PATCH 2/3] Change the other metric --- sqs/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqs/main.tf b/sqs/main.tf index a2d02c1..5408cc3 100644 --- a/sqs/main.tf +++ b/sqs/main.tf @@ -86,7 +86,7 @@ resource "aws_cloudwatch_metric_alarm" "dlq_metric_alarm" { dimensions = { QueueName = "${var.queue_name}-dlq" } - metric_name = "ApproximateNumberOfMessagesVisible" + metric_name = "ApproximateNumberOfMessagesNotVisible" period = 60 stat = "Maximum" namespace = "AWS/SQS" From 499e3699dbd33780966eefef0593bc684098834b Mon Sep 17 00:00:00 2001 From: MancunianSam Date: Thu, 13 Jun 2024 15:06:01 +0100 Subject: [PATCH 3/3] Add evaluation period variable --- sqs/main.tf | 6 +++--- sqs/variables.tf | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sqs/main.tf b/sqs/main.tf index 5408cc3..ff5b75a 100644 --- a/sqs/main.tf +++ b/sqs/main.tf @@ -67,7 +67,7 @@ resource "aws_sqs_queue" "dlq_with_sse" { resource "aws_cloudwatch_metric_alarm" "dlq_metric_alarm" { alarm_name = "${var.queue_name}-messages-visible--dlq-alarm" comparison_operator = "GreaterThanThreshold" - evaluation_periods = 60 + evaluation_periods = var.dlq_alarm_evaluation_period metric_query { id = "m1" metric { @@ -75,7 +75,7 @@ resource "aws_cloudwatch_metric_alarm" "dlq_metric_alarm" { QueueName = "${var.queue_name}-dlq" } metric_name = "ApproximateNumberOfMessagesVisible" - period = 60 + period = var.dlq_alarm_evaluation_period stat = "Maximum" namespace = "AWS/SQS" } @@ -87,7 +87,7 @@ resource "aws_cloudwatch_metric_alarm" "dlq_metric_alarm" { QueueName = "${var.queue_name}-dlq" } metric_name = "ApproximateNumberOfMessagesNotVisible" - period = 60 + period = var.dlq_alarm_evaluation_period stat = "Maximum" namespace = "AWS/SQS" } diff --git a/sqs/variables.tf b/sqs/variables.tf index a37a903..6e93554 100644 --- a/sqs/variables.tf +++ b/sqs/variables.tf @@ -73,3 +73,9 @@ variable "queue_visibility_alarm_notification_topic" { description = "A topic arn which will be used to send ALARM events if the alarm for max number of messages in the queue is triggered." default = null } + +variable "dlq_alarm_evaluation_period" { + type = number + description = "The evaluation period for the metrics for the DLQ alarm" + default = 60 +} \ No newline at end of file