Skip to content

Commit

Permalink
DEVOPS-5857 autoscaling by time calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
wesleung-appzen committed Aug 27, 2020
1 parent e72cd91 commit d950766
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 7 deletions.
100 changes: 97 additions & 3 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ resource "aws_appautoscaling_policy" "scale_up" {
}
}
}

resource "aws_appautoscaling_policy" "scale_big_up" {
count = "${
var.high_big_threshold > 0
Expand All @@ -125,6 +126,32 @@ resource "aws_appautoscaling_policy" "scale_big_up" {
}
}

resource "aws_appautoscaling_policy" "scale_queuetime_up" {
count = "${
var.queue_time_threshold > 0
? 1 : 0}"

depends_on = ["aws_appautoscaling_target.target"]
name = "${module.label.id}-queue_time-up"
policy_type = "StepScaling"
resource_id = "service/${var.cluster_name}/${var.service_name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"

step_scaling_policy_configuration = {
cooldown = "${var.scale_up_cooldown}"
adjustment_type = "${var.adjustment_type_up}"
metric_aggregation_type = "Average"
min_adjustment_magnitude = "${var.scale_up_min_adjustment_magnitude}"

step_adjustment {
metric_interval_lower_bound = "${var.scale_up_lower_bound}"
metric_interval_upper_bound = "${var.scale_up_upper_bound}"
scaling_adjustment = "${var.scale_up_count}"
}
}
}

resource "aws_appautoscaling_policy" "scale_down" {
depends_on = ["aws_appautoscaling_target.target"]
name = "${module.label.id}-sqs-down"
Expand Down Expand Up @@ -165,9 +192,10 @@ resource "aws_cloudwatch_metric_alarm" "service_max_stuck" {
ok_actions = ["${var.sns_stuck_alarm_arn}"]
insufficient_data_actions = []
treat_missing_data = "ignore"
dimensions = {
ClusterName = "${var.cluster_name}"
ServiceName = "${var.service_name}"

dimensions = {
ClusterName = "${var.cluster_name}"
ServiceName = "${var.service_name}"
}
}

Expand Down Expand Up @@ -296,6 +324,7 @@ resource "aws_cloudwatch_metric_alarm" "service_queue_low" {
label = "Sum_Visible+NonVisible"
return_data = "true"
}

metric_query {
id = "visible"

Expand All @@ -312,6 +341,7 @@ resource "aws_cloudwatch_metric_alarm" "service_queue_low" {
}
}
}

metric_query {
id = "notvisible"

Expand All @@ -329,3 +359,67 @@ resource "aws_cloudwatch_metric_alarm" "service_queue_low" {
}
}
}

resource "aws_cloudwatch_metric_alarm" "queue_time" {
count = "${
var.queue_time_threshold > 0
? 1 : 0}"

# Requires ECS ContainerInsights to be enabled: aws ecs update-cluster-settings --cluster <cluster name> --settings name=containerInsights,value=enabled
# ECS cluster name and service name

alarm_name = "${module.label.id}-sqs-big-up"
alarm_description = "Alarm monitors ${var.queue_name} QueueTime = ((Queue Size * Worker Timing) / (number of current tasks * Number Of workers per task))"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "1"
threshold = "${var.queue_time_threshold}"
alarm_actions = ["${aws_appautoscaling_policy.queue_time_up.arn}"]
metric_query {
id = "queuetime"
expression = "((visible+notvisible) * ${var.queue_worker_timing}) / (taskcount * ${var.queue_task_worker_count}))"
label = "WaitTime"
return_data = "true"
}
metric_query {
id = "visible"

metric {
metric_name = "ApproximateNumberOfMessagesVisible"
namespace = "AWS/SQS"
period = "60"
stat = "Maximum"

dimensions {
QueueName = "${var.queue_name}"
}
}
}
metric_query {
id = "notvisible"

metric {
metric_name = "ApproximateNumberOfMessagesNotVisible"
namespace = "AWS/SQS"
period = "60"
stat = "Maximum"

dimensions {
QueueName = "${var.queue_name}"
}
}
}
metric_query {
id = "taskcount"

metric {
metric_name = "RunningTaskCount"
namespace = "AWS/SQS"
period = "60"
stat = "Maximum"

dimensions {
ServiceName = "${var.service_name}"
}
}
}
}
23 changes: 19 additions & 4 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ variable "cluster_name" {
description = "Name of ECS cluster that service is in"
}

variable "queue_name" {
description = "Name of SQS queue to monitor"
}

variable "service_name" {
description = "Name of ECS service to autoscale"
}
Expand Down Expand Up @@ -80,6 +76,25 @@ variable "min_capacity" {
default = "0"
}

variable "queue_name" {
description = "Name of SQS queue to monitor"
}

variable "queue_time_threshold" {
description = "Calculation of time it takes for queue job to get start processing ((Queue Size * Worker Timing) / (number of Current Tasks * number of Workers per Task))"
default = ""
}

variable "queue_worker_timing" {
description = "Calculation of time it takes for queue job to get start processing ((Queue Size * Worker Timing) / (number of current tasks * Number Of workers per task))"
default = "1"
}

variable "queue_task_worker_count" {
description = "Calculation of time it takes for queue job to get start processing ((Queue Size * Worker Timing) / (number of current tasks * Number Of workers per task))"
default = "1"
}

variable "scale_down_cooldown" {
description = "The amount of time, in seconds, after a scaling down completes and before the next scaling activity can start"
default = "60"
Expand Down

0 comments on commit d950766

Please sign in to comment.