Skip to content

Commit

Permalink
Move calico and tigera to be handled by argo (#831)
Browse files Browse the repository at this point in the history
* Move calico and tigera to be handled by argo

Signed-off-by: Stavros Foteinopoulos <[email protected]>

* Add calico support and max pods customization

Signed-off-by: Stavros Foteinopoulos <[email protected]>

---------

Signed-off-by: Stavros Foteinopoulos <[email protected]>
  • Loading branch information
stafot authored Feb 26, 2025
1 parent 345e49e commit 35ed4fa
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 76 deletions.
5 changes: 1 addition & 4 deletions aws/cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

| Name | Source | Version |
|------|--------|---------|
| <a name="module_managed_node_group"></a> [managed\_node\_group](#module\_managed\_node\_group) | github.com/mattermost/mattermost-cloud-monitoring.git//aws/eks-managed-node-groups | v1.8.44 |
| <a name="module_managed_node_group"></a> [managed\_node\_group](#module\_managed\_node\_group) | github.com/mattermost/mattermost-cloud-monitoring.git//aws/eks-managed-node-groups | v1.8.45 |

## Resources

Expand Down Expand Up @@ -74,8 +74,6 @@
| [kubernetes_config_map.aws_auth_configmap](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource |
| [kubernetes_storage_class_v1.gp3](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/storage_class_v1) | resource |
| [local_file.kubeconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
| [null_resource.calico_operator_configuration](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [null_resource.install_calico_operator](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [null_resource.patch_aws_node](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
| [aws_eks_cluster.cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster) | data source |
Expand All @@ -102,7 +100,6 @@
| <a name="input_calico_desired_size"></a> [calico\_desired\_size](#input\_calico\_desired\_size) | Desired size for the Calico node group | `number` | `3` | no |
| <a name="input_calico_max_size"></a> [calico\_max\_size](#input\_calico\_max\_size) | Maximum size for the Calico node group | `number` | `5` | no |
| <a name="input_calico_min_size"></a> [calico\_min\_size](#input\_calico\_min\_size) | Minimum size for the Calico node group | `number` | `2` | no |
| <a name="input_calico_operator_version"></a> [calico\_operator\_version](#input\_calico\_operator\_version) | n/a | `string` | `"v3.29.2"` | no |
| <a name="input_cidr_blocks"></a> [cidr\_blocks](#input\_cidr\_blocks) | n/a | `list(string)` | n/a | yes |
| <a name="input_cluster_short_name"></a> [cluster\_short\_name](#input\_cluster\_short\_name) | n/a | `string` | n/a | yes |
| <a name="input_coredns_addon_version"></a> [coredns\_addon\_version](#input\_coredns\_addon\_version) | The version of the EKS CoreDNS addon | `string` | n/a | yes |
Expand Down
81 changes: 38 additions & 43 deletions aws/cluster/calico.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,56 +14,51 @@ resource "aws_secretsmanager_secret_version" "kubeconfig_secret_version" {
depends_on = [local.kubeconfig]
}

# Install Calico operator only if it is not already installed
resource "null_resource" "install_calico_operator" {
count = var.is_calico_enabled ? 1 : 0

provisioner "local-exec" {
command = <<EOF
if ! KUBECONFIG=${path.root}/kubeconfig-${aws_eks_cluster.cluster.name} \
kubectl get deployment -n tigera-operator tigera-operator >/dev/null 2>&1; then
echo "Installing Calico Operator..."
KUBECONFIG=${path.root}/kubeconfig-${aws_eks_cluster.cluster.name} \
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/${var.calico_operator_version}/manifests/tigera-operator.yaml
touch ${path.root}/calico_installed
else
echo "Calico Operator already installed. Skipping."
fi
EOF
}

depends_on = [aws_eks_cluster.cluster, resource.local_file.kubeconfig]
}

# 3. Apply Calico configuration if the operator was newly installed
resource "null_resource" "calico_operator_configuration" {
count = var.is_calico_enabled ? 1 : 0

provisioner "local-exec" {
command = <<EOF
if [ -f ${path.root}/calico_installed ]; then
echo "Applying Calico configuration..."
KUBECONFIG=${path.root}/kubeconfig-${aws_eks_cluster.cluster.name} kubectl apply -f ${path.module}/manifests
touch ${path.root}/calico_config_applied
else
echo "Calico already configured. Skipping."
fi
EOF
}

depends_on = [null_resource.install_calico_operator]
}

resource "null_resource" "patch_aws_node" {
count = var.is_calico_enabled ? 1 : 0

provisioner "local-exec" {
command = <<EOF
KUBECONFIG=${path.root}/kubeconfig-${aws_eks_cluster.cluster.name} kubectl patch daemonset aws-node -n kube-system --type='json' -p='[
{ "op": "add", "path": "/spec/template/spec/nodeSelector", "value": { "calico": "false" } }
KUBECONFIG=${path.root}/kubeconfig-${aws_eks_cluster.cluster.name} kubectl patch daemonset aws-node -n kube-system --type=json -p='[
{
"op": "replace",
"path": "/spec/template/spec/affinity",
"value": {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "kubernetes.io/os",
"operator": "In",
"values": ["linux"]
},
{
"key": "kubernetes.io/arch",
"operator": "In",
"values": ["amd64", "arm64"]
},
{
"key": "eks.amazonaws.com/compute-type",
"operator": "NotIn",
"values": ["fargate"]
},
{
"key": "calico",
"operator": "NotIn",
"values": ["true"]
}
]
}
]
}
}
}
}
]'
EOF
}

depends_on = [aws_eks_cluster.cluster]
}

16 changes: 0 additions & 16 deletions aws/cluster/manifests/calico_installation.yaml

This file was deleted.

6 changes: 0 additions & 6 deletions aws/cluster/manifests/felix_config.yaml

This file was deleted.

5 changes: 0 additions & 5 deletions aws/cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,6 @@ variable "is_calico_enabled" {
default = false
}

variable "calico_operator_version" {
type = string
default = "v3.29.2"
}

variable "calico_desired_size" {
description = "Desired size for the Calico node group"
type = number
Expand Down
2 changes: 1 addition & 1 deletion aws/cluster/worker_asg.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module "managed_node_group" {
source = "github.com/mattermost/mattermost-cloud-monitoring.git//aws/eks-managed-node-groups?ref=v1.8.44"
source = "github.com/mattermost/mattermost-cloud-monitoring.git//aws/eks-managed-node-groups?ref=v1.8.45"
vpc_security_group_ids = [aws_security_group.worker-sg.id]
volume_size = var.node_volume_size
volume_type = var.node_volume_type
Expand Down
2 changes: 2 additions & 0 deletions aws/eks-managed-node-groups/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ No modules.
| <a name="input_arm_min_size"></a> [arm\_min\_size](#input\_arm\_min\_size) | The minimum number of arm nodes in the node group | `string` | n/a | yes |
| <a name="input_availability_zones"></a> [availability\_zones](#input\_availability\_zones) | List of availability zones to place the instances | `list(string)` | n/a | yes |
| <a name="input_calico_desired_size"></a> [calico\_desired\_size](#input\_calico\_desired\_size) | Desired size for the Calico node group | `number` | `3` | no |
| <a name="input_calico_max_pods"></a> [calico\_max\_pods](#input\_calico\_max\_pods) | Maximum number of pods when Calico CNI is enabled | `number` | `110` | no |
| <a name="input_calico_max_size"></a> [calico\_max\_size](#input\_calico\_max\_size) | Maximum size for the Calico node group | `number` | `5` | no |
| <a name="input_calico_min_size"></a> [calico\_min\_size](#input\_calico\_min\_size) | Minimum size for the Calico node group | `number` | `2` | no |
| <a name="input_certificate_authority"></a> [certificate\_authority](#input\_certificate\_authority) | The certificate authority data for the EKS cluster | `string` | n/a | yes |
Expand All @@ -54,6 +55,7 @@ No modules.
| <a name="input_enable_spot_nodes"></a> [enable\_spot\_nodes](#input\_enable\_spot\_nodes) | If true, spot nodes will be created | `bool` | `false` | no |
| <a name="input_image_id"></a> [image\_id](#input\_image\_id) | The AMI ID used for the nodes in the node group | `string` | n/a | yes |
| <a name="input_instance_type"></a> [instance\_type](#input\_instance\_type) | The instance type used for the nodes in the node group | `string` | n/a | yes |
| <a name="input_instance_type_max_pods_map"></a> [instance\_type\_max\_pods\_map](#input\_instance\_type\_max\_pods\_map) | Map of instance types to their maximum pod limits for AWS VPC CNI | `map(number)` | <pre>{<br/> "c6g.12xlarge": 234,<br/> "c6g.16xlarge": 737,<br/> "c6g.2xlarge": 58,<br/> "c6g.4xlarge": 234,<br/> "c6g.8xlarge": 234,<br/> "c6g.large": 35,<br/> "c6g.medium": 17,<br/> "c6g.metal": 737,<br/> "c6g.xlarge": 58,<br/> "c6gd.12xlarge": 234,<br/> "c6gd.16xlarge": 737,<br/> "c6gd.2xlarge": 58,<br/> "c6gd.4xlarge": 234,<br/> "c6gd.8xlarge": 234,<br/> "c6gd.large": 35,<br/> "c6gd.medium": 17,<br/> "c6gd.metal": 737,<br/> "c6gd.xlarge": 58,<br/> "m6g.12xlarge": 234,<br/> "m6g.16xlarge": 737,<br/> "m6g.2xlarge": 58,<br/> "m6g.4xlarge": 234,<br/> "m6g.8xlarge": 234,<br/> "m6g.large": 35,<br/> "m6g.medium": 17,<br/> "m6g.metal": 737,<br/> "m6g.xlarge": 58,<br/> "m6gd.12xlarge": 234,<br/> "m6gd.16xlarge": 737,<br/> "m6gd.2xlarge": 58,<br/> "m6gd.4xlarge": 234,<br/> "m6gd.8xlarge": 234,<br/> "m6gd.large": 35,<br/> "m6gd.medium": 17,<br/> "m6gd.metal": 737,<br/> "m6gd.xlarge": 58,<br/> "r6g.12xlarge": 234,<br/> "r6g.16xlarge": 737,<br/> "r6g.2xlarge": 58,<br/> "r6g.4xlarge": 234,<br/> "r6g.8xlarge": 234,<br/> "r6g.large": 35,<br/> "r6g.medium": 17,<br/> "r6g.metal": 737,<br/> "r6g.xlarge": 58,<br/> "r6gd.12xlarge": 234,<br/> "r6gd.16xlarge": 737,<br/> "r6gd.2xlarge": 58,<br/> "r6gd.4xlarge": 234,<br/> "r6gd.8xlarge": 234,<br/> "r6gd.large": 35,<br/> "r6gd.medium": 17,<br/> "r6gd.metal": 737,<br/> "r6gd.xlarge": 58,<br/> "t4g.2xlarge": 58,<br/> "t4g.large": 35,<br/> "t4g.medium": 17,<br/> "t4g.micro": 4,<br/> "t4g.nano": 4,<br/> "t4g.small": 11,<br/> "t4g.xlarge": 58<br/>}</pre> | no |
| <a name="input_is_calico_enabled"></a> [is\_calico\_enabled](#input\_is\_calico\_enabled) | n/a | `bool` | `false` | no |
| <a name="input_max_size"></a> [max\_size](#input\_max\_size) | The maximum number of nodes in the node group | `string` | n/a | yes |
| <a name="input_min_size"></a> [min\_size](#input\_min\_size) | The minimum number of nodes in the node group | `string` | n/a | yes |
Expand Down
5 changes: 4 additions & 1 deletion aws/eks-managed-node-groups/graviton_node_groups.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,16 @@ spec:
certificateAuthority: |
${var.certificate_authority}
cidr: ${var.service_ipv4_cidr}
kubelet:
maxPods: ${var.is_calico_enabled ? var.calico_max_pods : lookup(var.instance_type_max_pods_map, var.arm_instance_type, 17)}
EOF
/usr/local/bin/nodeadm init -c file:///etc/eks/nodeadm-config.yaml
USERDATA
) : base64encode(<<USERDATA
#!/bin/bash
/etc/eks/bootstrap.sh --apiserver-endpoint '${var.api_server_endpoint}' --b64-cluster-ca '${var.certificate_authority}' '${var.cluster_name}' --kubelet-extra-args "--kube-reserved cpu=250m,memory=1Gi,ephemeral-storage=1Gi --system-reserved cpu=250m,memory=0.2Gi,ephemeral-storage=1Gi --eviction-hard memory.available<0.2Gi,nodefs.available<10%"
/etc/eks/bootstrap.sh --apiserver-endpoint '${var.api_server_endpoint}' --b64-cluster-ca '${var.certificate_authority}' '${var.cluster_name}' \
--kubelet-extra-args "--max-pods=${var.is_calico_enabled ? var.calico_max_pods : lookup(var.instance_type_max_pods_map, var.arm_instance_type, 17)} --kube-reserved cpu=250m,memory=1Gi,ephemeral-storage=1Gi --system-reserved cpu=250m,memory=0.2Gi,ephemeral-storage=1Gi --eviction-hard memory.available<0.2Gi,nodefs.available<10%"
USERDATA
)

Expand Down
87 changes: 87 additions & 0 deletions aws/eks-managed-node-groups/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,90 @@ variable "calico_max_size" {
type = number
default = 5
}

variable "calico_max_pods" {
description = "Maximum number of pods when Calico CNI is enabled"
type = number
default = 110
}

variable "instance_type_max_pods_map" {
description = "Map of instance types to their maximum pod limits for AWS VPC CNI"
type = map(number)
default = {
# T4g instances
"t4g.nano" = 4
"t4g.micro" = 4
"t4g.small" = 11
"t4g.medium" = 17
"t4g.large" = 35
"t4g.xlarge" = 58
"t4g.2xlarge" = 58

# M6g instances
"m6g.medium" = 17
"m6g.large" = 35
"m6g.xlarge" = 58
"m6g.2xlarge" = 58
"m6g.4xlarge" = 234
"m6g.8xlarge" = 234
"m6g.12xlarge" = 234
"m6g.16xlarge" = 737
"m6g.metal" = 737

# M6gd instances
"m6gd.medium" = 17
"m6gd.large" = 35
"m6gd.xlarge" = 58
"m6gd.2xlarge" = 58
"m6gd.4xlarge" = 234
"m6gd.8xlarge" = 234
"m6gd.12xlarge" = 234
"m6gd.16xlarge" = 737
"m6gd.metal" = 737

# C6g instances
"c6g.medium" = 17
"c6g.large" = 35
"c6g.xlarge" = 58
"c6g.2xlarge" = 58
"c6g.4xlarge" = 234
"c6g.8xlarge" = 234
"c6g.12xlarge" = 234
"c6g.16xlarge" = 737
"c6g.metal" = 737

# C6gd instances
"c6gd.medium" = 17
"c6gd.large" = 35
"c6gd.xlarge" = 58
"c6gd.2xlarge" = 58
"c6gd.4xlarge" = 234
"c6gd.8xlarge" = 234
"c6gd.12xlarge" = 234
"c6gd.16xlarge" = 737
"c6gd.metal" = 737

# R6g instances
"r6g.medium" = 17
"r6g.large" = 35
"r6g.xlarge" = 58
"r6g.2xlarge" = 58
"r6g.4xlarge" = 234
"r6g.8xlarge" = 234
"r6g.12xlarge" = 234
"r6g.16xlarge" = 737
"r6g.metal" = 737

# R6gd instances
"r6gd.medium" = 17
"r6gd.large" = 35
"r6gd.xlarge" = 58
"r6gd.2xlarge" = 58
"r6gd.4xlarge" = 234
"r6gd.8xlarge" = 234
"r6gd.12xlarge" = 234
"r6gd.16xlarge" = 737
"r6gd.metal" = 737
}
}

0 comments on commit 35ed4fa

Please sign in to comment.