From 47b88f8cc04fa79aae35b2e15840fee89a30e32c Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Thu, 8 Feb 2024 07:15:48 -0800 Subject: [PATCH 01/13] Add terraform for eks windows 1. Added terraform templates for fluentbit and CW agent on windows. 2. Fix fluentbit tests for Windows --- environment/metadata.go | 8 + generator/test_case_generator.go | 17 +- .../default_resources/cwagent-windows.yaml | 62 ++ .../fluenbit-windows-configmap.yaml | 147 +++++ .../fluenbit-windows-daemonset.yaml | 76 +++ .../test-sample-windows.yaml | 28 + terraform/eks/daemon/fluent/common/main.tf | 10 +- terraform/eks/daemon/fluent/windows/main.tf | 220 +++++++ .../eks/daemon/fluent/windows/providers.tf | 17 + .../eks/daemon/fluent/windows/variables.tf | 42 ++ terraform/eks/daemon/windows/main.tf | 560 ++++++++++++++++++ terraform/eks/daemon/windows/providers.tf | 17 + terraform/eks/daemon/windows/variables.tf | 47 ++ test/fluent/fluent_test.go | 17 + 14 files changed, 1251 insertions(+), 17 deletions(-) create mode 100644 terraform/eks/daemon/default_resources/cwagent-windows.yaml create mode 100644 terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml create mode 100644 terraform/eks/daemon/default_resources/fluenbit-windows-daemonset.yaml create mode 100644 terraform/eks/daemon/default_resources/test-sample-windows.yaml create mode 100644 terraform/eks/daemon/fluent/windows/main.tf create mode 100644 terraform/eks/daemon/fluent/windows/providers.tf create mode 100644 terraform/eks/daemon/fluent/windows/variables.tf create mode 100644 terraform/eks/daemon/windows/main.tf create mode 100644 terraform/eks/daemon/windows/providers.tf create mode 100644 terraform/eks/daemon/windows/variables.tf diff --git a/environment/metadata.go b/environment/metadata.go index 4929b2ae3..c9c0162ab 100644 --- a/environment/metadata.go +++ b/environment/metadata.go @@ -41,6 +41,7 @@ type MetaData struct { ProxyUrl string AssumeRoleArn string InstanceId string + InstancePlatform string AgentStartCommand string } @@ -62,6 +63,7 @@ type MetaDataStrings struct { ProxyUrl string AssumeRoleArn string InstanceId string + InstancePlatform string AgentStartCommand string } @@ -122,6 +124,10 @@ func registerInstanceId(dataString *MetaDataStrings) { flag.StringVar(&(dataString.InstanceId), "instanceId", "", "ec2 instance ID that is being used by a test") } +func registerInstancePlatform(dataString *MetaDataStrings) { + flag.StringVar(&(dataString.InstancePlatform), "instancePlatform", "linux", "ec2 instance OS that is being used for a test") +} + func registerAgentStartCommand(dataString *MetaDataStrings) { flag.StringVar(&(dataString.AgentStartCommand), "agentStartCommand", DefaultEC2AgentStartCommand, @@ -218,6 +224,7 @@ func RegisterEnvironmentMetaDataFlags() *MetaDataStrings { registerProxyUrl(registeredMetaDataStrings) registerAssumeRoleArn(registeredMetaDataStrings) registerInstanceId(registeredMetaDataStrings) + registerInstancePlatform(registeredMetaDataStrings) registerAgentStartCommand(registeredMetaDataStrings) return registeredMetaDataStrings @@ -241,6 +248,7 @@ func GetEnvironmentMetaData() *MetaData { metaDataStorage.ProxyUrl = registeredMetaDataStrings.ProxyUrl metaDataStorage.AssumeRoleArn = registeredMetaDataStrings.AssumeRoleArn metaDataStorage.InstanceId = registeredMetaDataStrings.InstanceId + metaDataStorage.InstancePlatform = registeredMetaDataStrings.InstancePlatform metaDataStorage.AgentStartCommand = registeredMetaDataStrings.AgentStartCommand return metaDataStorage diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index af46a6f44..f26b6f4c9 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -182,22 +182,7 @@ var testTypeToTestConfig = map[string][]testConfig{ testDir: "./test/metric_value_benchmark", targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, }, - { - testDir: "./test/statsd", terraformDir: "terraform/eks/daemon/statsd", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - { - testDir: "./test/emf", terraformDir: "terraform/eks/daemon/emf", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - { - testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/d", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, - {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/bit"}, - {testDir: "./test/app_signals", terraformDir: "terraform/eks/daemon/app_signals", - targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, - }, + {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/windows"}, }, "eks_deployment": { {testDir: "./test/metric_value_benchmark"}, diff --git a/terraform/eks/daemon/default_resources/cwagent-windows.yaml b/terraform/eks/daemon/default_resources/cwagent-windows.yaml new file mode 100644 index 000000000..04116b291 --- /dev/null +++ b/terraform/eks/daemon/default_resources/cwagent-windows.yaml @@ -0,0 +1,62 @@ +# create amazon-cloudwatch namespace +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudwatch-agent-windows + namespace: amazon-cloudwatch +spec: + selector: + matchLabels: + app: cloudwatch-agent + template: + metadata: + labels: + app: cloudwatch-agent + spec: + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\SYSTEM" + hostNetwork: true + containers: + - name: cloudwatch-agent + image: CW_TEST_IMAGE + volumeMounts: + - name: cwagentconfig + mountPath: C:\Program Files\Amazon\AmazonCloudWatchAgent\cwagentconfig + resources: + limits: + cpu: 400m + memory: 400Mi + requests: + cpu: 400m + memory: 400Mi + imagePullPolicy: Always + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: "status.hostIP" + - name: HOST_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CI_VERSION + value: "k8s/1.3.20" + - name: CWAGENT_LOG_LEVEL + value: DEBUG + - name: RUN_IN_CONTAINER + value: "True" + - name: RUN_AS_HOST_PROCESS_CONTAINER + value: "True" + nodeSelector: + kubernetes.io/os: windows + serviceAccountName: cloudwatch-agent + volumes: + - name: cwagentconfig + configMap: + name: cwagentconfig \ No newline at end of file diff --git a/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml b/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml new file mode 100644 index 000000000..0a0ebbd29 --- /dev/null +++ b/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml @@ -0,0 +1,147 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluent-bit-windows-config + namespace: amazon-cloudwatch + labels: + k8s-app: fluent-bit +data: + fluent-bit.conf: | + [SERVICE] + Flush 5 + Log_Level info + Daemon off + net.dns.resolver LEGACY + Parsers_File parsers.conf + + @INCLUDE application-log.conf + @INCLUDE dataplane-log.conf + @INCLUDE host-log.conf + + application-log.conf: | + [INPUT] + Name tail + Tag application.* + Exclude_Path C:\\var\\log\\containers\\fluent-bit*, C:\\var\\log\\containers\\cloudwatch-agent* + Path C:\\var\\log\\containers\\*.log + Parser docker + DB C:\\var\\fluent-bit\\state\\flb_container.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + Rotate_Wait 30 + Read_from_Head ${READ_FROM_HEAD} + + [INPUT] + Name tail + Tag application.* + Path C:\\var\\log\\containers\\fluent-bit* + Parser docker + DB C:\\var\\fluent-bit\\state\\flb_log.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + Rotate_Wait 30 + Refresh_Interval 10 + Read_from_Head ${READ_FROM_HEAD} + + [INPUT] + Name tail + Tag application.* + Path C:\\var\\log\\containers\\cloudwatch-agent* + Parser docker + DB C:\\var\\fluent-bit\\state\\flb_cwagent.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + Rotate_Wait 30 + Refresh_Interval 10 + Read_from_Head ${READ_FROM_HEAD} + + [OUTPUT] + Name cloudwatch_logs + Match application.* + region ${AWS_REGION} + log_group_name /aws/containerinsights/${CLUSTER_NAME}/application + log_stream_prefix ${HOST_NAME}- + auto_create_group true + extra_user_agent container-insights + + dataplane-log.conf: | + [INPUT] + Name tail + Tag dataplane.tail.* + Path C:\\ProgramData\\containerd\\root\\*.log, C:\\ProgramData\\Amazon\\EKS\\logs\\*.log + Parser dataplane_firstline + DB C:\\var\\fluent-bit\\state\\flb_dataplane_tail.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + Rotate_Wait 30 + Refresh_Interval 10 + Read_from_Head ${READ_FROM_HEAD} + + [INPUT] + Name tail + Tag dataplane.tail.C.ProgramData.Amazon.EKS.logs.vpc-bridge + Path C:\\ProgramData\\Amazon\\EKS\\logs\\*.log.* + Path_Key file_name + Parser dataplane_firstline + DB C:\\var\\fluent-bit\\state\\flb_dataplane_cni_tail.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + Rotate_Wait 30 + Refresh_Interval 10 + Read_from_Head ${READ_FROM_HEAD} + + [FILTER] + Name aws + Match dataplane.* + imds_version v2 + + [OUTPUT] + Name cloudwatch_logs + Match dataplane.* + region ${AWS_REGION} + log_group_name /aws/containerinsights/${CLUSTER_NAME}/dataplane + log_stream_prefix ${HOST_NAME}- + auto_create_group true + extra_user_agent container-insights + + host-log.conf: | + [INPUT] + Name winlog + Channels EKS, System + DB C:\\var\\fluent-bit\\state\\flb_system_winlog.db + Interval_Sec 60 + + [FILTER] + Name aws + Match winlog.* + imds_version v2 + + [OUTPUT] + Name cloudwatch_logs + Match winlog.* + region ${AWS_REGION} + log_group_name /aws/containerinsights/${CLUSTER_NAME}/host + log_stream_prefix ${HOST_NAME}. + auto_create_group true + extra_user_agent container-insights + + parsers.conf: | + [PARSER] + Name docker + Format json + Time_Key time + Time_Format %b %d %H:%M:%S + + [PARSER] + Name container_firstline + Format regex + Regex (?<log>(?<="log":")\S(?!\.).*?)(?<!\\)".*(?<stream>(?<="stream":").*?)".*(?<time>\d{4}-\d{1,2}-\d{1,2}T\d{2}:\d{2}:\d{2}\.\w*).*(?=}) + Time_Key time + Time_Format %Y-%m-%dT%H:%M:%S.%LZ + + [PARSER] + Name dataplane_firstline + Format regex + Regex (?<log>(?<="log":")\S(?!\.).*?)(?<!\\)".*(?<stream>(?<="stream":").*?)".*(?<time>\d{4}-\d{1,2}-\d{1,2}T\d{2}:\d{2}:\d{2}\.\w*).*(?=}) + Time_Key time + Time_Format %Y-%m-%dT%H:%M:%S.%LZ \ No newline at end of file diff --git a/terraform/eks/daemon/default_resources/fluenbit-windows-daemonset.yaml b/terraform/eks/daemon/default_resources/fluenbit-windows-daemonset.yaml new file mode 100644 index 000000000..a94303449 --- /dev/null +++ b/terraform/eks/daemon/default_resources/fluenbit-windows-daemonset.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: fluent-bit-windows + namespace: amazon-cloudwatch + labels: + k8s-app: fluent-bit + version: v1 + kubernetes.io/cluster-service: "true" +spec: + selector: + matchLabels: + k8s-app: fluent-bit + template: + metadata: + labels: + k8s-app: fluent-bit + version: v1 + kubernetes.io/cluster-service: "true" + spec: + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\System" + hostNetwork: true + nodeSelector: + kubernetes.io/os: windows + containers: + - name: fluent-bit + image: public.ecr.aws/aws-observability/aws-for-fluent-bit:windowsservercore-stable + imagePullPolicy: Always + command: ["powershell.exe", "-Command", "New-Item -ItemType Directory -Path C:\\var\\fluent-bit\\state -Force;", "%CONTAINER_SANDBOX_MOUNT_POINT%/fluent-bit/bin/fluent-bit.exe", "-e", "%CONTAINER_SANDBOX_MOUNT_POINT%/fluent-bit/kinesis.dll", "-e", "%CONTAINER_SANDBOX_MOUNT_POINT%/fluent-bit/firehose.dll", "-e", "%CONTAINER_SANDBOX_MOUNT_POINT%/fluent-bit/cloudwatch.dll", "-c", "%CONTAINER_SANDBOX_MOUNT_POINT%/fluent-bit/configuration/fluent-bit.conf"] + env: + - name: AWS_REGION + valueFrom: + configMapKeyRef: + name: "fluent-bit-cluster-info" + key: "logs.region" + - name: CLUSTER_NAME + valueFrom: + configMapKeyRef: + name: "fluent-bit-cluster-info" + key: "cluster.name" + - name: READ_FROM_HEAD + valueFrom: + configMapKeyRef: + name: "fluent-bit-cluster-info" + key: "read.head" + - name: HOST_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: HOSTNAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: CI_VERSION + value: "k8s/1.3.17" + resources: + limits: + cpu: 500m + memory: 600Mi + requests: + cpu: 500m + memory: 600Mi + volumeMounts: + - name: fluent-bit-config + mountPath: fluent-bit\configuration\ + volumes: + - name: fluent-bit-config + configMap: + name: fluent-bit-windows-config + terminationGracePeriodSeconds: 10 + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: cloudwatch-agent diff --git a/terraform/eks/daemon/default_resources/test-sample-windows.yaml b/terraform/eks/daemon/default_resources/test-sample-windows.yaml new file mode 100644 index 000000000..843ff7d7b --- /dev/null +++ b/terraform/eks/daemon/default_resources/test-sample-windows.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: windows-test-deployment +spec: + selector: + matchLabels: + app: windows-test-deployment + tier: backend + track: stable + replicas: 1 + template: + metadata: + labels: + app: windows-test-deployment + tier: backend + track: stable + spec: + containers: + - name: windows-test-deployment + image: mcr.microsoft.com/windows/servercore/iis:windowsservercore-ltsc2022 + command: + - powershell.exe + - -command + - " ping -t google.com " + nodeSelector: + kubernetes.io/os: windows + diff --git a/terraform/eks/daemon/fluent/common/main.tf b/terraform/eks/daemon/fluent/common/main.tf index 78496e13c..93edbfb9c 100644 --- a/terraform/eks/daemon/fluent/common/main.tf +++ b/terraform/eks/daemon/fluent/common/main.tf @@ -200,7 +200,7 @@ resource "kubernetes_cluster_role" "clusterrole" { api_groups = ["get"] } rule { - verbs = ["create"] + verbs = ["create", "get"] resources = ["nodes/stats", "configmaps", "events"] api_groups = [""] } @@ -421,3 +421,11 @@ output "cluster_endpoint" { output "cluster_cert" { value = aws_eks_cluster.cluster.certificate_authority.0.data } + +output "node_role_name" { + value = aws_iam_role.node_role.name +} + +output "node_role_arn" { + value = aws_iam_role.node_role.arn +} \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/main.tf b/terraform/eks/daemon/fluent/windows/main.tf new file mode 100644 index 000000000..fa86c47c6 --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/main.tf @@ -0,0 +1,220 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "fluent_common" { + source = "../common" + ami_type = var.ami_type + instance_type = var.instance_type +} + +module "basic_components" { + source = "../../../../basic_components" + + region = var.region +} + +locals { + aws_eks = "aws eks --region ${var.region}" + cluster_name = module.fluent_common.cluster_name +} + +data "aws_caller_identity" "account_id" {} + +data "aws_eks_cluster" "eks_windows_cluster_ca" { + name = module.fluent_common.cluster_name +} + +output "account_id" { + value = data.aws_caller_identity.account_id.account_id +} + +data "aws_eks_cluster_auth" "this" { + name = module.fluent_common.cluster_name +} + +## EKS Cluster Addon + +resource "aws_eks_addon" "eks_windows_addon" { + cluster_name = module.fluent_common.cluster_name + addon_name = "vpc-cni" +} + +## Enable VPC CNI Windows Support + +resource "kubernetes_config_map_v1_data" "amazon_vpc_cni_windows" { + depends_on = [ + module.fluent_common, + aws_eks_addon.eks_windows_addon + ] + metadata { + name = "amazon-vpc-cni" + namespace = "kube-system" + } + + force = true + + data = { + enable-windows-ipam : "true" + } +} + +## AWS CONFIGMAP + +resource "kubernetes_config_map" "configmap" { + data = { + "mapRoles" = <<EOT +- groups: + - system:bootstrappers + - system:nodes + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/${module.fluent_common.node_role_name} + username: system:node:{{EC2PrivateDNSName}} +- groups: + - eks:kube-proxy-windows + - system:bootstrappers + - system:nodes + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/${module.fluent_common.node_role_name} + username: system:node:{{EC2PrivateDNSName}} +- groups: + - system:masters + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/Admin-Windows + +EOT + } + + metadata { + name = "aws-auth" + namespace = "kube-system" + } +} + +# EKS Windows Node Groups +resource "aws_eks_node_group" "node_group_windows" { + cluster_name = module.fluent_common.cluster_name + node_group_name = "${local.cluster_name}-windows-node" + node_role_arn = module.fluent_common.node_role_arn + subnet_ids = module.basic_components.public_subnet_ids + + scaling_config { + desired_size = 1 + max_size = 1 + min_size = 1 + } + + ami_type = var.windows_os_version + capacity_type = "ON_DEMAND" + disk_size = 50 + instance_types = ["t3.large"] + + depends_on = [ + module.fluent_common + ] +} + +resource "null_resource" "kubectl" { + depends_on = [ + aws_eks_node_group.node_group_windows + ] + provisioner "local-exec" { + command = <<-EOT + ${local.aws_eks} update-kubeconfig --name ${module.fluent_common.cluster_name} + ${local.aws_eks} list-clusters --output text + ${local.aws_eks} describe-cluster --name ${module.fluent_common.cluster_name} --output text + EOT + } +} + +resource "kubernetes_config_map" "cluster_info" { + depends_on = [ + module.fluent_common + ] + metadata { + name = "fluent-bit-cluster-info" + namespace = "amazon-cloudwatch" + } + data = { + "cluster.name" = module.fluent_common.cluster_name + "logs.region" = var.region + "http.server" = "On" + "http.port" = "2020" + "read.head" = "Off" + "read.tail" = "On" + } +} + +resource "kubernetes_service_account" "fluentbit_service" { + metadata { + name = "fluent-bit" + namespace = "amazon-cloudwatch" + } +} + +resource "kubernetes_cluster_role" "fluentbit_clusterrole" { + metadata { + name = "fluent-bit-role" + } + rule { + non_resource_urls = ["/metrics"] + verbs = ["get"] + } + rule { + verbs = ["get", "list", "watch"] + resources = ["namespaces", "pods", "pods/logs", "nodes", "nodes/proxy"] + api_groups = [""] + } +} + +resource "kubernetes_cluster_role_binding" "fluentbit_rolebinding" { + depends_on = [ + kubernetes_service_account.fluentbit_service, + kubernetes_cluster_role.fluentbit_clusterrole, + ] + metadata { + name = "fluent-bit-role-binding" + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "fluent-bit-role" + } + subject { + kind = "ServiceAccount" + name = "fluent-bit" + namespace = "amazon-cloudwatch" + } +} + +resource "null_resource" "fluentbit-windows" { + depends_on = [ + module.fluent_common, + aws_eks_node_group.node_group_windows, + null_resource.kubectl + ] + + provisioner "local-exec" { + command = <<-EOT + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + ./kubectl apply -f ./../../default_resources/fluenbit-windows-configmap.yaml + ./kubectl apply -f ./../../default_resources/fluenbit-windows-daemonset.yaml + ./kubectl rollout status daemonset fluent-bit-windows -n amazon-cloudwatch --timeout 600s + ./kubectl apply -f ./../../default_resources/test-sample-windows.yaml + ./kubectl rollout status deployment windows-test-deployment --timeout 600s + sleep 120 + EOT + } +} + +resource "null_resource" "validator" { + depends_on = [ + module.fluent_common, + null_resource.fluentbit-windows, + kubernetes_cluster_role_binding.fluentbit_rolebinding + ] + provisioner "local-exec" { + command = <<-EOT + echo "Validating EKS fluentbit logs" + cd ../../../../.. + go test ${var.test_dir} -eksClusterName=${module.fluent_common.cluster_name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -instancePlatform=windows + EOT + } +} diff --git a/terraform/eks/daemon/fluent/windows/providers.tf b/terraform/eks/daemon/fluent/windows/providers.tf new file mode 100644 index 000000000..bf99b8f3d --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/providers.tf @@ -0,0 +1,17 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +provider "aws" { + region = var.region +} + +provider "kubernetes" { + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", module.fluent_common.cluster_name] + } + host = module.fluent_common.cluster_endpoint + cluster_ca_certificate = base64decode(module.fluent_common.cluster_cert) + token = module.fluent_common.cluster_auth_token +} \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/variables.tf b/terraform/eks/daemon/fluent/windows/variables.tf new file mode 100644 index 000000000..81d839cf3 --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/variables.tf @@ -0,0 +1,42 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "test_dir" { + type = string + default = "./test/metric_value_benchmark" +} + +variable "cwagent_image_repo" { + type = string + default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" +} + +variable "cwagent_image_tag" { + type = string + default = "latest" +} + +variable "k8s_version" { + type = string + default = "1.24" +} + +variable "ami_type" { + type = string + default = "AL2_x86_64" +} + +variable "instance_type" { + type = string + default = "t3a.medium" +} + +variable "windows_os_version" { + type = string + default = "WINDOWS_CORE_2022_x86_64" +} diff --git a/terraform/eks/daemon/windows/main.tf b/terraform/eks/daemon/windows/main.tf new file mode 100644 index 000000000..6f563fa52 --- /dev/null +++ b/terraform/eks/daemon/windows/main.tf @@ -0,0 +1,560 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "common" { + source = "../../../common" + cwagent_image_repo = var.cwagent_image_repo + cwagent_image_tag = var.cwagent_image_tag +} + +module "basic_components" { + source = "../../../basic_components" + + region = var.region +} + +locals { + aws_eks = "aws eks --region ${var.region}" + cluster_name = "cwagent-eks-integ-${module.common.testing_id}" +} + +data "aws_caller_identity" "account_id" {} + +data "aws_eks_cluster" "eks_windows_cluster_ca" { + name = aws_eks_cluster.this.name +} + +output "account_id" { + value = data.aws_caller_identity.account_id.account_id +} + +data "aws_eks_cluster_auth" "this" { + name = aws_eks_cluster.this.name +} + +resource "aws_eks_cluster" "this" { + name = "cwagent-eks-integ-${module.common.testing_id}" + role_arn = module.basic_components.role_arn + version = var.k8s_version + enabled_cluster_log_types = [ + "api", + "audit", + "authenticator", + "controllerManager", + "scheduler" + ] + vpc_config { + subnet_ids = module.basic_components.public_subnet_ids + security_group_ids = [module.basic_components.security_group] + } +} + +## EKS Cluster Addon + +resource "aws_eks_addon" "eks_windows_addon" { + cluster_name = aws_eks_cluster.this.name + addon_name = "vpc-cni" +} + +## Enable VPC CNI Windows Support + +resource "kubernetes_config_map_v1_data" "amazon_vpc_cni_windows" { + depends_on = [ + aws_eks_cluster.this, + aws_eks_addon.eks_windows_addon + ] + metadata { + name = "amazon-vpc-cni" + namespace = "kube-system" + } + + force = true + + data = { + enable-windows-ipam : "true" + } +} + +## AWS CONFIGMAP + +resource "kubernetes_config_map" "configmap" { + data = { + "mapRoles" = <<EOT +- groups: + - system:bootstrappers + - system:nodes + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/cwagent-eks-Worker-Role-${module.common.testing_id} + username: system:node:{{EC2PrivateDNSName}} +- groups: + - eks:kube-proxy-windows + - system:bootstrappers + - system:nodes + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/cwagent-eks-Worker-Role-${module.common.testing_id} + username: system:node:{{EC2PrivateDNSName}} +- groups: + - system:masters + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/Admin-Windows + +EOT + } + + metadata { + name = "aws-auth" + namespace = "kube-system" + } +} + +# EKS Node Groups +resource "aws_eks_node_group" "this" { + cluster_name = aws_eks_cluster.this.name + node_group_name = "cwagent-eks-integ-node" + node_role_arn = aws_iam_role.node_role.arn + subnet_ids = module.basic_components.public_subnet_ids + + scaling_config { + desired_size = 1 + max_size = 1 + min_size = 1 + } + + ami_type = "AL2_x86_64" + capacity_type = "ON_DEMAND" + disk_size = 20 + instance_types = ["t3.medium"] + + depends_on = [ + aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly, + aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy, + aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy, + aws_iam_role_policy_attachment.node_CloudWatchAgentServerPolicy, + ] +} + +# EKS Node IAM Role +resource "aws_iam_role" "node_role" { + name = "cwagent-eks-Worker-Role-${module.common.testing_id}" + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Principal = { + Service = "ec2.amazonaws.com" + }, + Action = "sts:AssumeRole" + } + ] + }) +} + +# EKS Windows Node Groups +resource "aws_eks_node_group" "node_group_windows" { + cluster_name = aws_eks_cluster.this.name + node_group_name = "${local.cluster_name}-windows-node" + node_role_arn = aws_iam_role.node_role.arn + subnet_ids = module.basic_components.public_subnet_ids + + scaling_config { + desired_size = 1 + max_size = 1 + min_size = 1 + } + + ami_type = var.windows_os_version + capacity_type = "ON_DEMAND" + disk_size = 50 + instance_types = ["t3.large"] + + depends_on = [ + aws_iam_role_policy_attachment.node_CloudWatchAgentServerPolicy, + aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly, + aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy, + aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy + ] +} + + +resource "aws_iam_role_policy_attachment" "node_AmazonEKSWorkerNodePolicy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + role = aws_iam_role.node_role.name +} + +resource "aws_iam_role_policy_attachment" "node_AmazonEKS_CNI_Policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.node_role.name +} + +resource "aws_iam_role_policy_attachment" "node_AmazonEC2ContainerRegistryReadOnly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.node_role.name +} + +resource "aws_iam_role_policy_attachment" "node_CloudWatchAgentServerPolicy" { + policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" + role = aws_iam_role.node_role.name +} + +# TODO: these security groups be created once and then reused +# EKS Cluster Security Group +resource "aws_security_group" "eks_cluster_sg" { + name = "cwagent-eks-cluster-sg-${module.common.testing_id}" + description = "Cluster communication with worker nodes" + vpc_id = module.basic_components.vpc_id +} + +resource "aws_security_group_rule" "cluster_inbound" { + description = "Allow worker nodes to communicate with the cluster API Server" + from_port = 443 + protocol = "tcp" + security_group_id = aws_security_group.eks_cluster_sg.id + source_security_group_id = aws_security_group.eks_nodes_sg.id + to_port = 443 + type = "ingress" +} + +resource "aws_security_group_rule" "cluster_outbound" { + description = "Allow cluster API Server to communicate with the worker nodes" + from_port = 1024 + protocol = "tcp" + security_group_id = aws_security_group.eks_cluster_sg.id + source_security_group_id = aws_security_group.eks_nodes_sg.id + to_port = 65535 + type = "egress" +} + + +# EKS Node Security Group +resource "aws_security_group" "eks_nodes_sg" { + name = "cwagent-eks-node-sg-${module.common.testing_id}" + description = "Security group for all nodes in the cluster" + vpc_id = module.basic_components.vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_security_group_rule" "nodes_internal" { + description = "Allow nodes to communicate with each other" + from_port = 0 + protocol = "-1" + security_group_id = aws_security_group.eks_nodes_sg.id + source_security_group_id = aws_security_group.eks_nodes_sg.id + to_port = 65535 + type = "ingress" +} + +resource "aws_security_group_rule" "nodes_cluster_inbound" { + description = "Allow worker Kubelets and pods to receive communication from the cluster control plane" + from_port = 1025 + protocol = "tcp" + security_group_id = aws_security_group.eks_nodes_sg.id + source_security_group_id = aws_security_group.eks_cluster_sg.id + to_port = 65535 + type = "ingress" +} + +resource "kubernetes_namespace" "namespace" { + metadata { + name = "amazon-cloudwatch" + } +} + +# TODO: how do we support different deployment types? Should they be in separate terraform +# files, and spawn separate tests? +resource "kubernetes_daemonset" "service" { + depends_on = [ + kubernetes_namespace.namespace, + kubernetes_config_map.cwagentconfig, + kubernetes_service_account.cwagentservice, + aws_eks_node_group.this + ] + metadata { + name = "cloudwatch-agent" + namespace = "amazon-cloudwatch" + } + spec { + selector { + match_labels = { + "name" : "cloudwatch-agent" + } + } + template { + metadata { + labels = { + "name" : "cloudwatch-agent" + } + } + spec { + node_selector = { + "kubernetes.io/os" : "linux" + } + container { + name = "cwagent" + image = "${var.cwagent_image_repo}:${var.cwagent_image_tag}" + image_pull_policy = "Always" + resources { + limits = { + "cpu" : "200m", + "memory" : "200Mi" + } + requests = { + "cpu" : "200m", + "memory" : "200Mi" + } + } + env { + name = "HOST_IP" + value_from { + field_ref { + field_path = "status.hostIP" + } + } + } + env { + name = "HOST_NAME" + value_from { + field_ref { + field_path = "spec.nodeName" + } + } + } + env { + name = "K8S_NAMESPACE" + value_from { + field_ref { + field_path = "metadata.namespace" + } + } + } + volume_mount { + mount_path = "/etc/cwagentconfig" + name = "cwagentconfig" + } + volume_mount { + mount_path = "/rootfs" + name = "rootfs" + read_only = true + } + volume_mount { + mount_path = "/var/run/docker.sock" + name = "dockersock" + read_only = true + } + volume_mount { + mount_path = "/var/lib/docker" + name = "varlibdocker" + read_only = true + } + volume_mount { + mount_path = "/run/containerd/containerd.sock" + name = "containerdsock" + read_only = true + } + volume_mount { + mount_path = "/sys" + name = "sys" + read_only = true + } + volume_mount { + mount_path = "/dev/disk" + name = "devdisk" + read_only = true + } + } + volume { + name = "cwagentconfig" + config_map { + name = "cwagentconfig" + } + } + volume { + name = "rootfs" + host_path { + path = "/" + } + } + volume { + name = "dockersock" + host_path { + path = "/var/run/docker.sock" + } + } + volume { + name = "varlibdocker" + host_path { + path = "/var/lib/docker" + } + } + volume { + name = "containerdsock" + host_path { + path = "/run/containerd/containerd.sock" + } + } + volume { + name = "sys" + host_path { + path = "/sys" + } + } + volume { + name = "devdisk" + host_path { + path = "/dev/disk" + } + } + service_account_name = "cloudwatch-agent" + termination_grace_period_seconds = 60 + } + } + } +} + +resource "null_resource" "kubectl" { + depends_on = [ + aws_eks_cluster.this, + aws_eks_node_group.this, + aws_eks_node_group.node_group_windows + ] + provisioner "local-exec" { + command = <<-EOT + ${local.aws_eks} update-kubeconfig --name ${aws_eks_cluster.this.name} + ${local.aws_eks} list-clusters --output text + ${local.aws_eks} describe-cluster --name ${aws_eks_cluster.this.name} --output text + EOT + } +} + + +resource "null_resource" "windows-cwagent" { + depends_on = [ + kubernetes_namespace.namespace, + kubernetes_config_map.cwagentconfig, + kubernetes_service_account.cwagentservice, + aws_eks_node_group.node_group_windows, + null_resource.kubectl + ] + + provisioner "local-exec" { + command = <<-EOT + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + sed 's+CW_TEST_IMAGE+506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal:latest+' ./../default_resources/cwagent-windows.yaml | ./kubectl apply -f - + ./kubectl apply -f ./../default_resources/test-sample-windows.yaml + ./kubectl rollout status daemonset cloudwatch-agent-windows -n amazon-cloudwatch --timeout 600s + ./kubectl rollout status deployment windows-test-deployment --timeout 600s + EOT + } + +} + +########################################## +# Template Files +########################################## +locals { + cwagent_config = fileexists("../../../${var.test_dir}/resources/config.json") ? "../../../${var.test_dir}/resources/config.json" : "./../default_resources/default_amazon_cloudwatch_agent.json" +} + +data "template_file" "cwagent_config" { + template = file(local.cwagent_config) + vars = { + } +} + +resource "kubernetes_config_map" "cwagentconfig" { + depends_on = [ + kubernetes_namespace.namespace, + kubernetes_service_account.cwagentservice + ] + metadata { + name = "cwagentconfig" + namespace = "amazon-cloudwatch" + } + data = { + "cwagentconfig.json" : data.template_file.cwagent_config.rendered + } +} + +resource "kubernetes_service_account" "cwagentservice" { + depends_on = [kubernetes_namespace.namespace] + metadata { + name = "cloudwatch-agent" + namespace = "amazon-cloudwatch" + } +} + +resource "kubernetes_cluster_role" "clusterrole" { + depends_on = [kubernetes_namespace.namespace] + metadata { + name = "cloudwatch-agent-role" + } + rule { + verbs = ["list", "watch"] + resources = ["pods", "nodes", "endpoints"] + api_groups = [""] + } + rule { + verbs = ["list", "watch"] + resources = ["replicasets"] + api_groups = ["apps"] + } + rule { + verbs = ["list", "watch"] + resources = ["jobs"] + api_groups = ["batch"] + } + rule { + verbs = ["get"] + resources = ["nodes/proxy"] + api_groups = [""] + } + rule { + verbs = ["create", "get"] + resources = ["nodes/stats", "configmaps", "events"] + api_groups = [""] + } + rule { + verbs = ["get", "update"] + resource_names = ["cwagent-clusterleader"] + resources = ["configmaps"] + api_groups = [""] + } +} + +resource "kubernetes_cluster_role_binding" "rolebinding" { + depends_on = [kubernetes_namespace.namespace] + metadata { + name = "cloudwatch-agent-role-binding" + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "cloudwatch-agent-role" + } + subject { + kind = "ServiceAccount" + name = "cloudwatch-agent" + namespace = "amazon-cloudwatch" + } +} + +resource "null_resource" "validator" { + depends_on = [ + aws_eks_node_group.this, + kubernetes_daemonset.service, + kubernetes_cluster_role_binding.rolebinding, + kubernetes_service_account.cwagentservice, + null_resource.windows-cwagent + ] + provisioner "local-exec" { + command = <<-EOT + echo "Validating EKS metrics/logs" + cd ../../../.. + go test ${var.test_dir} -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON + EOT + } +} diff --git a/terraform/eks/daemon/windows/providers.tf b/terraform/eks/daemon/windows/providers.tf new file mode 100644 index 000000000..9bd2885f5 --- /dev/null +++ b/terraform/eks/daemon/windows/providers.tf @@ -0,0 +1,17 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +provider "aws" { + region = var.region +} + +provider "kubernetes" { + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", aws_eks_cluster.this.name] + } + host = aws_eks_cluster.this.endpoint + cluster_ca_certificate = base64decode(aws_eks_cluster.this.certificate_authority.0.data) + token = data.aws_eks_cluster_auth.this.token +} \ No newline at end of file diff --git a/terraform/eks/daemon/windows/variables.tf b/terraform/eks/daemon/windows/variables.tf new file mode 100644 index 000000000..a7509c12a --- /dev/null +++ b/terraform/eks/daemon/windows/variables.tf @@ -0,0 +1,47 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "test_dir" { + type = string + default = "./test/metric_value_benchmark" +} + +variable "cwagent_image_repo" { + type = string + default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" +} + +variable "cwagent_windows_image_repo" { + type = string + default = "506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal" +} + +variable "cwagent_image_tag" { + type = string + default = "latest" +} + +variable "k8s_version" { + type = string + default = "1.24" +} + +variable "ami_type" { + type = string + default = "AL2_x86_64" +} + +variable "instance_type" { + type = string + default = "t3a.medium" +} + +variable "windows_os_version" { + type = string + default = "WINDOWS_CORE_2022_x86_64" +} diff --git a/test/fluent/fluent_test.go b/test/fluent/fluent_test.go index ab42d2c13..86e8969ab 100644 --- a/test/fluent/fluent_test.go +++ b/test/fluent/fluent_test.go @@ -35,6 +35,19 @@ var logGroupToKey = map[string][][]string{ }, } +// fluent log group with expected log message fields on Windows node. +var logGroupToKeyWindows = map[string][][]string{ + "dataplane": { + {"log", "file_name"}, + }, + "host": { + {"SourceName", "Message", "ComputerName"}, + }, + "application": { + {"log"}, + }, +} + func init() { environment.RegisterEnvironmentMetaDataFlags() } @@ -43,6 +56,10 @@ func TestFluentLogs(t *testing.T) { t.Log("starting EKS fluent log validation...") env := environment.GetEnvironmentMetaData() + if env.InstancePlatform == "windows" { + logGroupToKey = logGroupToKeyWindows + } + now := time.Now() for group, fieldsArr := range logGroupToKey { group = fmt.Sprintf("/aws/containerinsights/%s/%s", env.EKSClusterName, group) From 47338684409768f1c50d09eec31b8091eb7e0c63 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Tue, 20 Feb 2024 11:43:11 -0800 Subject: [PATCH 02/13] Fix CW agent image and test generator --- generator/test_case_generator.go | 21 +++++++++++++++++++++ terraform/eks/daemon/windows/main.tf | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index f26b6f4c9..5e4eff296 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -182,6 +182,27 @@ var testTypeToTestConfig = map[string][]testConfig{ testDir: "./test/metric_value_benchmark", targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, }, + { + testDir: "./test/metric_value_benchmark", + terraformDir: "terraform/eks/daemon/windows", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/statsd", terraformDir: "terraform/eks/daemon/statsd", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/emf", terraformDir: "terraform/eks/daemon/emf", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/d", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/bit"}, + {testDir: "./test/app_signals", terraformDir: "terraform/eks/daemon/app_signals", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/windows"}, }, "eks_deployment": { diff --git a/terraform/eks/daemon/windows/main.tf b/terraform/eks/daemon/windows/main.tf index 6f563fa52..045d5d34c 100644 --- a/terraform/eks/daemon/windows/main.tf +++ b/terraform/eks/daemon/windows/main.tf @@ -443,7 +443,7 @@ resource "null_resource" "windows-cwagent" { command = <<-EOT curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" chmod +x kubectl - sed 's+CW_TEST_IMAGE+506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal:latest+' ./../default_resources/cwagent-windows.yaml | ./kubectl apply -f - + sed 's+CW_TEST_IMAGE+${var.cwagent_image_repo}:${var.cwagent_image_tag}+' ./../default_resources/cwagent-windows.yaml | ./kubectl apply -f - ./kubectl apply -f ./../default_resources/test-sample-windows.yaml ./kubectl rollout status daemonset cloudwatch-agent-windows -n amazon-cloudwatch --timeout 600s ./kubectl rollout status deployment windows-test-deployment --timeout 600s From 78c6d0d790c77c88e8f5268dc2d64455a4355bba Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Fri, 23 Feb 2024 10:15:52 -0800 Subject: [PATCH 03/13] Fix formatting in terraform templates --- terraform/eks/daemon/fluent/windows/main.tf | 2 +- terraform/eks/daemon/windows/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/eks/daemon/fluent/windows/main.tf b/terraform/eks/daemon/fluent/windows/main.tf index fa86c47c6..32fd56f9d 100644 --- a/terraform/eks/daemon/fluent/windows/main.tf +++ b/terraform/eks/daemon/fluent/windows/main.tf @@ -14,7 +14,7 @@ module "basic_components" { } locals { - aws_eks = "aws eks --region ${var.region}" + aws_eks = "aws eks --region ${var.region}" cluster_name = module.fluent_common.cluster_name } diff --git a/terraform/eks/daemon/windows/main.tf b/terraform/eks/daemon/windows/main.tf index 045d5d34c..15426f824 100644 --- a/terraform/eks/daemon/windows/main.tf +++ b/terraform/eks/daemon/windows/main.tf @@ -14,7 +14,7 @@ module "basic_components" { } locals { - aws_eks = "aws eks --region ${var.region}" + aws_eks = "aws eks --region ${var.region}" cluster_name = "cwagent-eks-integ-${module.common.testing_id}" } From 550ddad5b1caed5d5bdf4287d6612ef21876da8c Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Fri, 23 Feb 2024 12:31:36 -0800 Subject: [PATCH 04/13] Removed temporary link to cw agent container image --- terraform/eks/daemon/windows/variables.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/terraform/eks/daemon/windows/variables.tf b/terraform/eks/daemon/windows/variables.tf index a7509c12a..81d839cf3 100644 --- a/terraform/eks/daemon/windows/variables.tf +++ b/terraform/eks/daemon/windows/variables.tf @@ -16,11 +16,6 @@ variable "cwagent_image_repo" { default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" } -variable "cwagent_windows_image_repo" { - type = string - default = "506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal" -} - variable "cwagent_image_tag" { type = string default = "latest" From bdc185fdfe00d3187bd9de1194f4783afe719ada Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Wed, 28 Feb 2024 12:53:15 -0800 Subject: [PATCH 05/13] Add terraform templates for both 2019 and 2022 --- generator/test_case_generator.go | 9 ++- .../fluenbit-windows-configmap.yaml | 5 +- .../test-sample-windows.yaml | 6 +- .../eks/daemon/fluent/windows/2019/main.tf | 12 ++++ .../daemon/fluent/windows/2019/variables.tf | 47 ++++++++++++++++ .../eks/daemon/fluent/windows/2022/main.tf | 12 ++++ .../daemon/fluent/windows/2022/variables.tf | 47 ++++++++++++++++ terraform/eks/daemon/fluent/windows/main.tf | 12 ++-- .../eks/daemon/fluent/windows/variables.tf | 7 ++- terraform/eks/daemon/windows/2019/main.tf | 12 ++++ .../eks/daemon/windows/2019/variables.tf | 55 +++++++++++++++++++ terraform/eks/daemon/windows/2022/main.tf | 12 ++++ .../eks/daemon/windows/2022/variables.tf | 55 +++++++++++++++++++ terraform/eks/daemon/windows/main.tf | 8 +-- terraform/eks/daemon/windows/variables.tf | 7 ++- 15 files changed, 287 insertions(+), 19 deletions(-) create mode 100644 terraform/eks/daemon/fluent/windows/2019/main.tf create mode 100644 terraform/eks/daemon/fluent/windows/2019/variables.tf create mode 100644 terraform/eks/daemon/fluent/windows/2022/main.tf create mode 100644 terraform/eks/daemon/fluent/windows/2022/variables.tf create mode 100644 terraform/eks/daemon/windows/2019/main.tf create mode 100644 terraform/eks/daemon/windows/2019/variables.tf create mode 100644 terraform/eks/daemon/windows/2022/main.tf create mode 100644 terraform/eks/daemon/windows/2022/variables.tf diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index 5e4eff296..0431f2996 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -184,7 +184,12 @@ var testTypeToTestConfig = map[string][]testConfig{ }, { testDir: "./test/metric_value_benchmark", - terraformDir: "terraform/eks/daemon/windows", + terraformDir: "terraform/eks/daemon/windows/2019", + targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, + }, + { + testDir: "./test/metric_value_benchmark", + terraformDir: "terraform/eks/daemon/windows/2022", targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, }, { @@ -203,7 +208,7 @@ var testTypeToTestConfig = map[string][]testConfig{ {testDir: "./test/app_signals", terraformDir: "terraform/eks/daemon/app_signals", targets: map[string]map[string]struct{}{"arc": {"amd64": {}}}, }, - {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/windows"}, + {testDir: "./test/fluent", terraformDir: "terraform/eks/daemon/fluent/windows/2022"}, }, "eks_deployment": { {testDir: "./test/metric_value_benchmark"}, diff --git a/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml b/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml index 0a0ebbd29..012da153f 100644 --- a/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml +++ b/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml @@ -9,7 +9,7 @@ data: fluent-bit.conf: | [SERVICE] Flush 5 - Log_Level info + Log_Level error Daemon off net.dns.resolver LEGACY Parsers_File parsers.conf @@ -26,9 +26,10 @@ data: Path C:\\var\\log\\containers\\*.log Parser docker DB C:\\var\\fluent-bit\\state\\flb_container.db - Mem_Buf_Limit 5MB + Mem_Buf_Limit 50MB Skip_Long_Lines On Rotate_Wait 30 + Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} [INPUT] diff --git a/terraform/eks/daemon/default_resources/test-sample-windows.yaml b/terraform/eks/daemon/default_resources/test-sample-windows.yaml index 843ff7d7b..e685173fe 100644 --- a/terraform/eks/daemon/default_resources/test-sample-windows.yaml +++ b/terraform/eks/daemon/default_resources/test-sample-windows.yaml @@ -8,7 +8,7 @@ spec: app: windows-test-deployment tier: backend track: stable - replicas: 1 + replicas: REPLICAS template: metadata: labels: @@ -18,11 +18,11 @@ spec: spec: containers: - name: windows-test-deployment - image: mcr.microsoft.com/windows/servercore/iis:windowsservercore-ltsc2022 + image: mcr.microsoft.com/windows/servercore/iis:windowsservercore-ltscWINDOWS_SERVER_VERSION command: - powershell.exe - -command - - " ping -t google.com " + - "Add-WindowsFeature Web-Server; Invoke-WebRequest -UseBasicParsing -Uri 'https://dotnetbinaries.blob.core.windows.net/servicemonitor/2.0.1.6/ServiceMonitor.exe' -OutFile 'C:\\ServiceMonitor.exe'; echo '<html><body><br/><br/><H1>Windows Container Workshop - Windows LTSC2019!!!<H1></body><html>' > C:\\inetpub\\wwwroot\\iisstart.htm; C:\\ServiceMonitor.exe 'w3svc'; " nodeSelector: kubernetes.io/os: windows diff --git a/terraform/eks/daemon/fluent/windows/2019/main.tf b/terraform/eks/daemon/fluent/windows/2019/main.tf new file mode 100644 index 000000000..df3870f36 --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/2019/main.tf @@ -0,0 +1,12 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "windows" { + source = "../" + windows_ami_type = var.windows_ami_type + windows_os_version = var.windows_os_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version +} \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/2019/variables.tf b/terraform/eks/daemon/fluent/windows/2019/variables.tf new file mode 100644 index 000000000..e4625b7aa --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/2019/variables.tf @@ -0,0 +1,47 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "test_dir" { + type = string + default = "./test/metric_value_benchmark" +} + +variable "cwagent_image_repo" { + type = string + default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" +} + +variable "cwagent_image_tag" { + type = string + default = "latest" +} + +variable "k8s_version" { + type = string + default = "1.24" +} + +variable "ami_type" { + type = string + default = "AL2_x86_64" +} + +variable "instance_type" { + type = string + default = "t3a.medium" +} + +variable "windows_ami_type" { + type = string + default = "WINDOWS_CORE_2019_x86_64" +} + +variable "windows_os_version" { + type = string + default = "2019" +} \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/2022/main.tf b/terraform/eks/daemon/fluent/windows/2022/main.tf new file mode 100644 index 000000000..df3870f36 --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/2022/main.tf @@ -0,0 +1,12 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "windows" { + source = "../" + windows_ami_type = var.windows_ami_type + windows_os_version = var.windows_os_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version +} \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/2022/variables.tf b/terraform/eks/daemon/fluent/windows/2022/variables.tf new file mode 100644 index 000000000..46ecb09b4 --- /dev/null +++ b/terraform/eks/daemon/fluent/windows/2022/variables.tf @@ -0,0 +1,47 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "test_dir" { + type = string + default = "./test/metric_value_benchmark" +} + +variable "cwagent_image_repo" { + type = string + default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" +} + +variable "cwagent_image_tag" { + type = string + default = "latest" +} + +variable "k8s_version" { + type = string + default = "1.24" +} + +variable "ami_type" { + type = string + default = "AL2_x86_64" +} + +variable "instance_type" { + type = string + default = "t3a.medium" +} + +variable "windows_ami_type" { + type = string + default = "WINDOWS_CORE_2022_x86_64" +} + +variable "windows_os_version" { + type = string + default = "2022" +} \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/main.tf b/terraform/eks/daemon/fluent/windows/main.tf index 32fd56f9d..9c48c2580 100644 --- a/terraform/eks/daemon/fluent/windows/main.tf +++ b/terraform/eks/daemon/fluent/windows/main.tf @@ -100,7 +100,7 @@ resource "aws_eks_node_group" "node_group_windows" { min_size = 1 } - ami_type = var.windows_os_version + ami_type = var.windows_ami_type capacity_type = "ON_DEMAND" disk_size = 50 instance_types = ["t3.large"] @@ -134,7 +134,7 @@ resource "kubernetes_config_map" "cluster_info" { data = { "cluster.name" = module.fluent_common.cluster_name "logs.region" = var.region - "http.server" = "On" + "http.server" = "Off" "http.port" = "2020" "read.head" = "Off" "read.tail" = "On" @@ -194,10 +194,10 @@ resource "null_resource" "fluentbit-windows" { command = <<-EOT curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" chmod +x kubectl - ./kubectl apply -f ./../../default_resources/fluenbit-windows-configmap.yaml - ./kubectl apply -f ./../../default_resources/fluenbit-windows-daemonset.yaml + ./kubectl apply -f ./../../../default_resources/fluenbit-windows-configmap.yaml + ./kubectl apply -f ./../../../default_resources/fluenbit-windows-daemonset.yaml ./kubectl rollout status daemonset fluent-bit-windows -n amazon-cloudwatch --timeout 600s - ./kubectl apply -f ./../../default_resources/test-sample-windows.yaml + sed -e 's+WINDOWS_SERVER_VERSION+${var.windows_os_version}+' -e 's+REPLICAS+1+' ./../../../default_resources/test-sample-windows.yaml | ./kubectl apply -f - ./kubectl rollout status deployment windows-test-deployment --timeout 600s sleep 120 EOT @@ -213,7 +213,7 @@ resource "null_resource" "validator" { provisioner "local-exec" { command = <<-EOT echo "Validating EKS fluentbit logs" - cd ../../../../.. + cd ../../../../../.. go test ${var.test_dir} -eksClusterName=${module.fluent_common.cluster_name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -instancePlatform=windows EOT } diff --git a/terraform/eks/daemon/fluent/windows/variables.tf b/terraform/eks/daemon/fluent/windows/variables.tf index 81d839cf3..f5d2d1cfc 100644 --- a/terraform/eks/daemon/fluent/windows/variables.tf +++ b/terraform/eks/daemon/fluent/windows/variables.tf @@ -36,7 +36,12 @@ variable "instance_type" { default = "t3a.medium" } -variable "windows_os_version" { +variable "windows_ami_type" { type = string default = "WINDOWS_CORE_2022_x86_64" } + +variable "windows_os_version" { + type = string + default = "2022" +} diff --git a/terraform/eks/daemon/windows/2019/main.tf b/terraform/eks/daemon/windows/2019/main.tf new file mode 100644 index 000000000..df3870f36 --- /dev/null +++ b/terraform/eks/daemon/windows/2019/main.tf @@ -0,0 +1,12 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "windows" { + source = "../" + windows_ami_type = var.windows_ami_type + windows_os_version = var.windows_os_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version +} \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2019/variables.tf b/terraform/eks/daemon/windows/2019/variables.tf new file mode 100644 index 000000000..3f5d7fb1b --- /dev/null +++ b/terraform/eks/daemon/windows/2019/variables.tf @@ -0,0 +1,55 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "test_dir" { + type = string + default = "./test/metric_value_benchmark" +} + +variable "cwagent_image_repo" { + type = string + default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" +} + +variable "cwagent_windows_image_repo" { + type = string + default = "506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal" +} + +variable "cwagent_image_tag" { + type = string + default = "latest" +} + +variable "k8s_version" { + type = string + default = "1.24" +} + +variable "ami_type" { + type = string + default = "AL2_x86_64" +} + +variable "instance_type" { + type = string + default = "t3a.medium" +} + +variable "windows_ami_type" { + type = string + default = "WINDOWS_CORE_2019_x86_64" +} + +variable "windows_os_version" { + type = string + default = "2019" +} diff --git a/terraform/eks/daemon/windows/2022/main.tf b/terraform/eks/daemon/windows/2022/main.tf new file mode 100644 index 000000000..df3870f36 --- /dev/null +++ b/terraform/eks/daemon/windows/2022/main.tf @@ -0,0 +1,12 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "windows" { + source = "../" + windows_ami_type = var.windows_ami_type + windows_os_version = var.windows_os_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version +} \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2022/variables.tf b/terraform/eks/daemon/windows/2022/variables.tf new file mode 100644 index 000000000..8dd73da1e --- /dev/null +++ b/terraform/eks/daemon/windows/2022/variables.tf @@ -0,0 +1,55 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "test_dir" { + type = string + default = "./test/metric_value_benchmark" +} + +variable "cwagent_image_repo" { + type = string + default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" +} + +variable "cwagent_windows_image_repo" { + type = string + default = "506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal" +} + +variable "cwagent_image_tag" { + type = string + default = "latest" +} + +variable "k8s_version" { + type = string + default = "1.24" +} + +variable "ami_type" { + type = string + default = "AL2_x86_64" +} + +variable "instance_type" { + type = string + default = "t3a.medium" +} + +variable "windows_ami_type" { + type = string + default = "WINDOWS_CORE_2022_x86_64" +} + +variable "windows_os_version" { + type = string + default = "2022" +} diff --git a/terraform/eks/daemon/windows/main.tf b/terraform/eks/daemon/windows/main.tf index 15426f824..4c6211b01 100644 --- a/terraform/eks/daemon/windows/main.tf +++ b/terraform/eks/daemon/windows/main.tf @@ -160,7 +160,7 @@ resource "aws_eks_node_group" "node_group_windows" { min_size = 1 } - ami_type = var.windows_os_version + ami_type = var.windows_ami_type capacity_type = "ON_DEMAND" disk_size = 50 instance_types = ["t3.large"] @@ -444,7 +444,7 @@ resource "null_resource" "windows-cwagent" { curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" chmod +x kubectl sed 's+CW_TEST_IMAGE+${var.cwagent_image_repo}:${var.cwagent_image_tag}+' ./../default_resources/cwagent-windows.yaml | ./kubectl apply -f - - ./kubectl apply -f ./../default_resources/test-sample-windows.yaml + sed -e 's+WINDOWS_SERVER_VERSION+${var.windows_os_version}+' -e 's+REPLICAS+1+' ./../../default_resources/test-sample-windows.yaml | ./kubectl apply -f - ./kubectl rollout status daemonset cloudwatch-agent-windows -n amazon-cloudwatch --timeout 600s ./kubectl rollout status deployment windows-test-deployment --timeout 600s EOT @@ -456,7 +456,7 @@ resource "null_resource" "windows-cwagent" { # Template Files ########################################## locals { - cwagent_config = fileexists("../../../${var.test_dir}/resources/config.json") ? "../../../${var.test_dir}/resources/config.json" : "./../default_resources/default_amazon_cloudwatch_agent.json" + cwagent_config = fileexists("../../../${var.test_dir}/resources/config.json") ? "../../../${var.test_dir}/resources/config.json" : "./../../default_resources/default_amazon_cloudwatch_agent.json" } data "template_file" "cwagent_config" { @@ -553,7 +553,7 @@ resource "null_resource" "validator" { provisioner "local-exec" { command = <<-EOT echo "Validating EKS metrics/logs" - cd ../../../.. + cd ../../../../.. go test ${var.test_dir} -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON EOT } diff --git a/terraform/eks/daemon/windows/variables.tf b/terraform/eks/daemon/windows/variables.tf index 81d839cf3..f5d2d1cfc 100644 --- a/terraform/eks/daemon/windows/variables.tf +++ b/terraform/eks/daemon/windows/variables.tf @@ -36,7 +36,12 @@ variable "instance_type" { default = "t3a.medium" } -variable "windows_os_version" { +variable "windows_ami_type" { type = string default = "WINDOWS_CORE_2022_x86_64" } + +variable "windows_os_version" { + type = string + default = "2022" +} From bcc13b3004df0983c16c9a13d4183afd35e2dd9c Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Wed, 28 Feb 2024 13:18:33 -0800 Subject: [PATCH 06/13] Lint formatting --- terraform/eks/daemon/fluent/windows/2019/main.tf | 12 ++++++------ terraform/eks/daemon/fluent/windows/2022/main.tf | 12 ++++++------ terraform/eks/daemon/windows/2019/main.tf | 12 ++++++------ terraform/eks/daemon/windows/2022/main.tf | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/terraform/eks/daemon/fluent/windows/2019/main.tf b/terraform/eks/daemon/fluent/windows/2019/main.tf index df3870f36..85422989c 100644 --- a/terraform/eks/daemon/fluent/windows/2019/main.tf +++ b/terraform/eks/daemon/fluent/windows/2019/main.tf @@ -2,11 +2,11 @@ // SPDX-License-Identifier: MIT module "windows" { - source = "../" - windows_ami_type = var.windows_ami_type + source = "../" + windows_ami_type = var.windows_ami_type windows_os_version = var.windows_os_version - test_dir = var.test_dir - ami_type = var.ami_type - instance_type = var.instance_type - k8s_version = var.k8s_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version } \ No newline at end of file diff --git a/terraform/eks/daemon/fluent/windows/2022/main.tf b/terraform/eks/daemon/fluent/windows/2022/main.tf index df3870f36..85422989c 100644 --- a/terraform/eks/daemon/fluent/windows/2022/main.tf +++ b/terraform/eks/daemon/fluent/windows/2022/main.tf @@ -2,11 +2,11 @@ // SPDX-License-Identifier: MIT module "windows" { - source = "../" - windows_ami_type = var.windows_ami_type + source = "../" + windows_ami_type = var.windows_ami_type windows_os_version = var.windows_os_version - test_dir = var.test_dir - ami_type = var.ami_type - instance_type = var.instance_type - k8s_version = var.k8s_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version } \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2019/main.tf b/terraform/eks/daemon/windows/2019/main.tf index df3870f36..85422989c 100644 --- a/terraform/eks/daemon/windows/2019/main.tf +++ b/terraform/eks/daemon/windows/2019/main.tf @@ -2,11 +2,11 @@ // SPDX-License-Identifier: MIT module "windows" { - source = "../" - windows_ami_type = var.windows_ami_type + source = "../" + windows_ami_type = var.windows_ami_type windows_os_version = var.windows_os_version - test_dir = var.test_dir - ami_type = var.ami_type - instance_type = var.instance_type - k8s_version = var.k8s_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version } \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2022/main.tf b/terraform/eks/daemon/windows/2022/main.tf index df3870f36..85422989c 100644 --- a/terraform/eks/daemon/windows/2022/main.tf +++ b/terraform/eks/daemon/windows/2022/main.tf @@ -2,11 +2,11 @@ // SPDX-License-Identifier: MIT module "windows" { - source = "../" - windows_ami_type = var.windows_ami_type + source = "../" + windows_ami_type = var.windows_ami_type windows_os_version = var.windows_os_version - test_dir = var.test_dir - ami_type = var.ami_type - instance_type = var.instance_type - k8s_version = var.k8s_version + test_dir = var.test_dir + ami_type = var.ami_type + instance_type = var.instance_type + k8s_version = var.k8s_version } \ No newline at end of file From 4f78f52b3d87889befac75d04f76b30b7407bc85 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Sun, 3 Mar 2024 14:08:21 -0800 Subject: [PATCH 07/13] Remove test cw repo 1. Add CW agent repo arg to terraform --- terraform/eks/daemon/windows/2019/main.tf | 2 ++ terraform/eks/daemon/windows/2019/variables.tf | 5 ----- terraform/eks/daemon/windows/2022/main.tf | 2 ++ terraform/eks/daemon/windows/2022/variables.tf | 5 ----- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/terraform/eks/daemon/windows/2019/main.tf b/terraform/eks/daemon/windows/2019/main.tf index 85422989c..f3cefa6ff 100644 --- a/terraform/eks/daemon/windows/2019/main.tf +++ b/terraform/eks/daemon/windows/2019/main.tf @@ -9,4 +9,6 @@ module "windows" { ami_type = var.ami_type instance_type = var.instance_type k8s_version = var.k8s_version + cwagent_image_repo = var.cwagent_image_repo + cwagent_image_tag = var.cwagent_image_tag } \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2019/variables.tf b/terraform/eks/daemon/windows/2019/variables.tf index 3f5d7fb1b..e2326185f 100644 --- a/terraform/eks/daemon/windows/2019/variables.tf +++ b/terraform/eks/daemon/windows/2019/variables.tf @@ -19,11 +19,6 @@ variable "cwagent_image_repo" { default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" } -variable "cwagent_windows_image_repo" { - type = string - default = "506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal" -} - variable "cwagent_image_tag" { type = string default = "latest" diff --git a/terraform/eks/daemon/windows/2022/main.tf b/terraform/eks/daemon/windows/2022/main.tf index 85422989c..f3cefa6ff 100644 --- a/terraform/eks/daemon/windows/2022/main.tf +++ b/terraform/eks/daemon/windows/2022/main.tf @@ -9,4 +9,6 @@ module "windows" { ami_type = var.ami_type instance_type = var.instance_type k8s_version = var.k8s_version + cwagent_image_repo = var.cwagent_image_repo + cwagent_image_tag = var.cwagent_image_tag } \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2022/variables.tf b/terraform/eks/daemon/windows/2022/variables.tf index 8dd73da1e..d6531d3b7 100644 --- a/terraform/eks/daemon/windows/2022/variables.tf +++ b/terraform/eks/daemon/windows/2022/variables.tf @@ -19,11 +19,6 @@ variable "cwagent_image_repo" { default = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent" } -variable "cwagent_windows_image_repo" { - type = string - default = "506463145083.dkr.ecr.us-west-2.amazonaws.com/windows-container-internal" -} - variable "cwagent_image_tag" { type = string default = "latest" From 0b9a30d59e6ef90422fc03caed0f7857bf1c4128 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Sun, 3 Mar 2024 16:35:17 -0800 Subject: [PATCH 08/13] Fix lint issues --- terraform/eks/daemon/windows/2019/main.tf | 2 +- terraform/eks/daemon/windows/2022/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/eks/daemon/windows/2019/main.tf b/terraform/eks/daemon/windows/2019/main.tf index f3cefa6ff..38ee4dcb3 100644 --- a/terraform/eks/daemon/windows/2019/main.tf +++ b/terraform/eks/daemon/windows/2019/main.tf @@ -10,5 +10,5 @@ module "windows" { instance_type = var.instance_type k8s_version = var.k8s_version cwagent_image_repo = var.cwagent_image_repo - cwagent_image_tag = var.cwagent_image_tag + cwagent_image_tag = var.cwagent_image_tag } \ No newline at end of file diff --git a/terraform/eks/daemon/windows/2022/main.tf b/terraform/eks/daemon/windows/2022/main.tf index f3cefa6ff..38ee4dcb3 100644 --- a/terraform/eks/daemon/windows/2022/main.tf +++ b/terraform/eks/daemon/windows/2022/main.tf @@ -10,5 +10,5 @@ module "windows" { instance_type = var.instance_type k8s_version = var.k8s_version cwagent_image_repo = var.cwagent_image_repo - cwagent_image_tag = var.cwagent_image_tag + cwagent_image_tag = var.cwagent_image_tag } \ No newline at end of file From 0b2721018bcf14e94491531404e1c4884162e0a9 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Wed, 6 Mar 2024 04:48:08 -0800 Subject: [PATCH 09/13] fix path to CW agent on Windows --- terraform/eks/daemon/windows/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/eks/daemon/windows/main.tf b/terraform/eks/daemon/windows/main.tf index 4c6211b01..f2f215333 100644 --- a/terraform/eks/daemon/windows/main.tf +++ b/terraform/eks/daemon/windows/main.tf @@ -443,7 +443,7 @@ resource "null_resource" "windows-cwagent" { command = <<-EOT curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" chmod +x kubectl - sed 's+CW_TEST_IMAGE+${var.cwagent_image_repo}:${var.cwagent_image_tag}+' ./../default_resources/cwagent-windows.yaml | ./kubectl apply -f - + sed 's+CW_TEST_IMAGE+${var.cwagent_image_repo}:${var.cwagent_image_tag}+' ./../../default_resources/cwagent-windows.yaml | ./kubectl apply -f - sed -e 's+WINDOWS_SERVER_VERSION+${var.windows_os_version}+' -e 's+REPLICAS+1+' ./../../default_resources/test-sample-windows.yaml | ./kubectl apply -f - ./kubectl rollout status daemonset cloudwatch-agent-windows -n amazon-cloudwatch --timeout 600s ./kubectl rollout status deployment windows-test-deployment --timeout 600s From 6c94a72edfd7609a42bd551fef95da64408f80da Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Wed, 6 Mar 2024 07:45:43 -0800 Subject: [PATCH 10/13] Fix CW agent for validating individual metric in dimensions --- terraform/eks/daemon/windows/main.tf | 2 +- .../eks_resources/util.go | 394 +++++++++--------- 2 files changed, 203 insertions(+), 193 deletions(-) diff --git a/terraform/eks/daemon/windows/main.tf b/terraform/eks/daemon/windows/main.tf index f2f215333..1b9a4ab8d 100644 --- a/terraform/eks/daemon/windows/main.tf +++ b/terraform/eks/daemon/windows/main.tf @@ -554,7 +554,7 @@ resource "null_resource" "validator" { command = <<-EOT echo "Validating EKS metrics/logs" cd ../../../../.. - go test ${var.test_dir} -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON + go test ${var.test_dir} -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -instancePlatform=windows EOT } } diff --git a/test/metric_value_benchmark/eks_resources/util.go b/test/metric_value_benchmark/eks_resources/util.go index f1a480665..990080817 100644 --- a/test/metric_value_benchmark/eks_resources/util.go +++ b/test/metric_value_benchmark/eks_resources/util.go @@ -5,6 +5,7 @@ package eks_resources import ( _ "embed" + "github.com/aws/amazon-cloudwatch-agent-test/environment" ) var ( @@ -55,198 +56,207 @@ var ( } ) -// Hard coded map which lists the expected metrics in each dimension set -var ExpectedDimsToMetrics = map[string][]string{ - "ClusterName": { - "pod_number_of_containers", - "node_status_allocatable_pods", - "pod_number_of_container_restarts", - "node_status_condition_unknown", - "node_number_of_running_pods", - "pod_container_status_running", - "node_status_condition_ready", - "pod_status_running", - "node_filesystem_utilization", - "pod_container_status_terminated", - "pod_status_pending", - "pod_cpu_utilization", - "node_filesystem_inodes", - "node_diskio_io_service_bytes_total", - "node_status_condition_memory_pressure", - "container_cpu_utilization", - "service_number_of_running_pods", - "pod_memory_utilization_over_pod_limit", - "node_memory_limit", - "pod_cpu_request", - "pod_interface_network_tx_dropped", - "pod_status_succeeded", - "namespace_number_of_running_pods", - "pod_memory_reserved_capacity", - "node_diskio_io_serviced_total", - "pod_network_rx_bytes", - "node_status_capacity_pods", - "pod_status_unknown", - "cluster_failed_node_count", - "container_memory_utilization", - "node_memory_utilization", - "node_filesystem_inodes_free", - "container_memory_request", - "container_cpu_limit", - "node_memory_reserved_capacity", - "node_interface_network_tx_dropped", - "pod_cpu_utilization_over_pod_limit", - "container_memory_failures_total", - "pod_status_ready", - "pod_number_of_running_containers", - "cluster_node_count", - "pod_memory_request", - "node_cpu_utilization", - "cluster_number_of_running_pods", - "node_memory_working_set", - "pod_status_failed", - "node_status_condition_pid_pressure", - "pod_status_scheduled", - "node_number_of_running_containers", - "node_cpu_limit", - "node_status_condition_disk_pressure", - "pod_cpu_limit", - "pod_memory_limit", - "node_cpu_usage_total", - "pod_cpu_reserved_capacity", - "pod_network_tx_bytes", - "container_memory_limit", - "pod_memory_utilization", - "node_interface_network_rx_dropped", - "node_network_total_bytes", - "container_cpu_utilization_over_container_limit", - "pod_interface_network_rx_dropped", - "pod_container_status_waiting", - "node_cpu_reserved_capacity", - "container_memory_utilization_over_container_limit", - "container_cpu_request", - }, - "ClusterName-FullPodName-Namespace-PodName": { - "pod_network_tx_bytes", - "pod_interface_network_rx_dropped", - "pod_cpu_limit", - "pod_status_succeeded", - "pod_container_status_waiting", - "pod_number_of_running_containers", - "pod_number_of_container_restarts", - "pod_status_pending", - "pod_status_running", - "pod_container_status_running", - "pod_memory_limit", - "pod_status_unknown", - "pod_memory_utilization_over_pod_limit", - "pod_cpu_request", - "pod_status_scheduled", - "pod_memory_utilization", - "pod_status_failed", - "pod_network_rx_bytes", - "pod_number_of_containers", - "pod_cpu_utilization", - "pod_memory_reserved_capacity", - "pod_status_ready", - "pod_container_status_terminated", - "pod_interface_network_tx_dropped", - "pod_memory_request", - "pod_cpu_reserved_capacity", - "pod_cpu_utilization_over_pod_limit", - }, - "ClusterName-Namespace-PodName": { - "pod_interface_network_rx_dropped", - "pod_status_succeeded", - "pod_container_status_running", - "pod_network_rx_bytes", - "pod_cpu_utilization", - "pod_memory_utilization", - "pod_interface_network_tx_dropped", - "pod_status_ready", - "pod_container_status_terminated", - "pod_cpu_reserved_capacity", - "pod_memory_request", - "pod_status_running", - "pod_status_pending", - "pod_number_of_containers", - "pod_memory_utilization_over_pod_limit", - "pod_status_unknown", - "pod_cpu_limit", - "pod_container_status_waiting", - "pod_memory_reserved_capacity", - "pod_network_tx_bytes", - "pod_status_failed", - "pod_number_of_running_containers", - "pod_number_of_container_restarts", - "pod_cpu_request", - "pod_cpu_utilization_over_pod_limit", - "pod_status_scheduled", - "pod_memory_limit", - }, +func GetExpectedDimsToMetrics(env *environment.MetaData) map[string][]string { + // Hard coded map which lists the expected metrics in each dimension set + var ExpectedDimsToMetrics = map[string][]string{ + "ClusterName": { + "pod_number_of_containers", + "node_status_allocatable_pods", + "pod_number_of_container_restarts", + "node_status_condition_unknown", + "node_number_of_running_pods", + "pod_container_status_running", + "node_status_condition_ready", + "pod_status_running", + "node_filesystem_utilization", + "pod_container_status_terminated", + "pod_status_pending", + "pod_cpu_utilization", + "node_filesystem_inodes", + "node_diskio_io_service_bytes_total", + "node_status_condition_memory_pressure", + "container_cpu_utilization", + "service_number_of_running_pods", + "pod_memory_utilization_over_pod_limit", + "node_memory_limit", + "pod_cpu_request", + "pod_interface_network_tx_dropped", + "pod_status_succeeded", + "namespace_number_of_running_pods", + "pod_memory_reserved_capacity", + "node_diskio_io_serviced_total", + "pod_network_rx_bytes", + "node_status_capacity_pods", + "pod_status_unknown", + "cluster_failed_node_count", + "container_memory_utilization", + "node_memory_utilization", + "node_filesystem_inodes_free", + "container_memory_request", + "container_cpu_limit", + "node_memory_reserved_capacity", + "node_interface_network_tx_dropped", + "pod_cpu_utilization_over_pod_limit", + "container_memory_failures_total", + "pod_status_ready", + "pod_number_of_running_containers", + "cluster_node_count", + "pod_memory_request", + "node_cpu_utilization", + "cluster_number_of_running_pods", + "node_memory_working_set", + "pod_status_failed", + "node_status_condition_pid_pressure", + "pod_status_scheduled", + "node_number_of_running_containers", + "node_cpu_limit", + "node_status_condition_disk_pressure", + "pod_cpu_limit", + "pod_memory_limit", + "node_cpu_usage_total", + "pod_cpu_reserved_capacity", + "pod_network_tx_bytes", + "container_memory_limit", + "pod_memory_utilization", + "node_interface_network_rx_dropped", + "node_network_total_bytes", + "container_cpu_utilization_over_container_limit", + "pod_interface_network_rx_dropped", + "pod_container_status_waiting", + "node_cpu_reserved_capacity", + "container_memory_utilization_over_container_limit", + "container_cpu_request", + }, + "ClusterName-FullPodName-Namespace-PodName": { + "pod_network_tx_bytes", + "pod_interface_network_rx_dropped", + "pod_cpu_limit", + "pod_status_succeeded", + "pod_container_status_waiting", + "pod_number_of_running_containers", + "pod_number_of_container_restarts", + "pod_status_pending", + "pod_status_running", + "pod_container_status_running", + "pod_memory_limit", + "pod_status_unknown", + "pod_memory_utilization_over_pod_limit", + "pod_cpu_request", + "pod_status_scheduled", + "pod_memory_utilization", + "pod_status_failed", + "pod_network_rx_bytes", + "pod_number_of_containers", + "pod_cpu_utilization", + "pod_memory_reserved_capacity", + "pod_status_ready", + "pod_container_status_terminated", + "pod_interface_network_tx_dropped", + "pod_memory_request", + "pod_cpu_reserved_capacity", + "pod_cpu_utilization_over_pod_limit", + }, + "ClusterName-Namespace-PodName": { + "pod_interface_network_rx_dropped", + "pod_status_succeeded", + "pod_container_status_running", + "pod_network_rx_bytes", + "pod_cpu_utilization", + "pod_memory_utilization", + "pod_interface_network_tx_dropped", + "pod_status_ready", + "pod_container_status_terminated", + "pod_cpu_reserved_capacity", + "pod_memory_request", + "pod_status_running", + "pod_status_pending", + "pod_number_of_containers", + "pod_memory_utilization_over_pod_limit", + "pod_status_unknown", + "pod_cpu_limit", + "pod_container_status_waiting", + "pod_memory_reserved_capacity", + "pod_network_tx_bytes", + "pod_status_failed", + "pod_number_of_running_containers", + "pod_number_of_container_restarts", + "pod_cpu_request", + "pod_cpu_utilization_over_pod_limit", + "pod_status_scheduled", + "pod_memory_limit", + }, - "ClusterName-InstanceId-NodeName": { - "node_status_allocatable_pods", - "node_network_total_bytes", - "node_status_condition_unknown", - "node_interface_network_rx_dropped", - "node_number_of_running_containers", - "node_interface_network_tx_dropped", - "node_memory_utilization", - "node_cpu_limit", - "node_status_condition_disk_pressure", - "node_memory_working_set", - "node_cpu_reserved_capacity", - "node_status_condition_ready", - "node_filesystem_utilization", - "node_status_condition_memory_pressure", - "node_memory_limit", - "node_memory_reserved_capacity", - "node_diskio_io_serviced_total", - "node_status_condition_pid_pressure", - "node_filesystem_inodes", - "node_cpu_usage_total", - "node_number_of_running_pods", - "node_diskio_io_service_bytes_total", - "node_status_capacity_pods", - "node_filesystem_inodes_free", - "node_cpu_utilization", - }, + "ClusterName-InstanceId-NodeName": { + "node_status_allocatable_pods", + "node_network_total_bytes", + "node_status_condition_unknown", + "node_interface_network_rx_dropped", + "node_number_of_running_containers", + "node_interface_network_tx_dropped", + "node_memory_utilization", + "node_cpu_limit", + "node_status_condition_disk_pressure", + "node_memory_working_set", + "node_cpu_reserved_capacity", + "node_status_condition_ready", + "node_filesystem_utilization", + "node_status_condition_memory_pressure", + "node_memory_limit", + "node_memory_reserved_capacity", + "node_diskio_io_serviced_total", + "node_status_condition_pid_pressure", + "node_filesystem_inodes", + "node_cpu_usage_total", + "node_number_of_running_pods", + "node_diskio_io_service_bytes_total", + "node_status_capacity_pods", + "node_filesystem_inodes_free", + "node_cpu_utilization", + }, - "ClusterName-Namespace-Service": { - "pod_status_unknown", - "pod_memory_limit", - "pod_container_status_terminated", - "pod_status_ready", - "pod_number_of_container_restarts", - "pod_status_pending", - "pod_status_succeeded", - "pod_network_rx_bytes", - "pod_status_failed", - "pod_number_of_containers", - "pod_cpu_request", - "service_number_of_running_pods", - "pod_memory_reserved_capacity", - "pod_network_tx_bytes", - "pod_container_status_waiting", - "pod_memory_request", - "pod_status_running", - "pod_container_status_running", - "pod_cpu_reserved_capacity", - "pod_memory_utilization_over_pod_limit", - "pod_cpu_utilization", - "pod_memory_utilization", - "pod_number_of_running_containers", - "pod_status_scheduled", - }, - "ClusterName-Namespace": { - "pod_interface_network_rx_dropped", - "pod_network_rx_bytes", - "pod_cpu_utilization_over_pod_limit", - "pod_memory_utilization_over_pod_limit", - "namespace_number_of_running_pods", - "pod_memory_utilization", - "pod_interface_network_tx_dropped", - "pod_cpu_utilization", - "pod_network_tx_bytes", - }, + "ClusterName-Namespace-Service": { + "pod_status_unknown", + "pod_memory_limit", + "pod_container_status_terminated", + "pod_status_ready", + "pod_number_of_container_restarts", + "pod_status_pending", + "pod_status_succeeded", + "pod_network_rx_bytes", + "pod_status_failed", + "pod_number_of_containers", + "pod_cpu_request", + "service_number_of_running_pods", + "pod_memory_reserved_capacity", + "pod_network_tx_bytes", + "pod_container_status_waiting", + "pod_memory_request", + "pod_status_running", + "pod_container_status_running", + "pod_cpu_reserved_capacity", + "pod_memory_utilization_over_pod_limit", + "pod_cpu_utilization", + "pod_memory_utilization", + "pod_number_of_running_containers", + "pod_status_scheduled", + }, + "ClusterName-Namespace": { + "pod_interface_network_rx_dropped", + "pod_network_rx_bytes", + "pod_cpu_utilization_over_pod_limit", + "pod_memory_utilization_over_pod_limit", + "namespace_number_of_running_pods", + "pod_memory_utilization", + "pod_interface_network_tx_dropped", + "pod_cpu_utilization", + "pod_network_tx_bytes", + }, + } + + if env.InstancePlatform == "windows" { + ExpectedDimsToMetrics["ClusterName"] = append(ExpectedDimsToMetrics["ClusterName"], + "container_filesystem_usage", "container_filesystem_available", "container_filesystem_utilization") + } + + return ExpectedDimsToMetrics } From 9ed55036bd594a92bb27ffcd5d2626f587006e52 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Wed, 6 Mar 2024 08:25:37 -0800 Subject: [PATCH 11/13] fix sytax error --- test/metric_value_benchmark/eks_daemonset_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/metric_value_benchmark/eks_daemonset_test.go b/test/metric_value_benchmark/eks_daemonset_test.go index ca01673c6..9572bd8bd 100644 --- a/test/metric_value_benchmark/eks_daemonset_test.go +++ b/test/metric_value_benchmark/eks_daemonset_test.go @@ -37,7 +37,7 @@ type EKSDaemonTestRunner struct { func (e *EKSDaemonTestRunner) Validate() status.TestGroupResult { var testResults []status.TestResult - testResults = append(testResults, validateMetrics(e.env, gpuMetricIndicator, eks_resources.ExpectedDimsToMetrics)...) + testResults = append(testResults, validateMetrics(e.env, gpuMetricIndicator, eks_resources.GetExpectedDimsToMetrics(e.env))...) testResults = append(testResults, e.validateLogs(e.env)) return status.TestGroupResult{ Name: e.GetTestName(), From 3372dcd2840e7471850f8615c2d5cf0945c2b027 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Thu, 7 Mar 2024 07:15:56 -0800 Subject: [PATCH 12/13] Fix lint issues --- test/metric_value_benchmark/eks_resources/util.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/metric_value_benchmark/eks_resources/util.go b/test/metric_value_benchmark/eks_resources/util.go index 990080817..96e832aac 100644 --- a/test/metric_value_benchmark/eks_resources/util.go +++ b/test/metric_value_benchmark/eks_resources/util.go @@ -5,6 +5,7 @@ package eks_resources import ( _ "embed" + "github.com/aws/amazon-cloudwatch-agent-test/environment" ) From 9656063b1a663227b3a33c0c84460ef513b70616 Mon Sep 17 00:00:00 2001 From: Kulwant Singh <ikulwant@amazon.com> Date: Thu, 7 Mar 2024 07:21:32 -0800 Subject: [PATCH 13/13] Remove extra license header from terraform templates --- terraform/eks/daemon/windows/2019/variables.tf | 3 --- terraform/eks/daemon/windows/2022/variables.tf | 3 --- 2 files changed, 6 deletions(-) diff --git a/terraform/eks/daemon/windows/2019/variables.tf b/terraform/eks/daemon/windows/2019/variables.tf index e2326185f..87bbf9349 100644 --- a/terraform/eks/daemon/windows/2019/variables.tf +++ b/terraform/eks/daemon/windows/2019/variables.tf @@ -1,9 +1,6 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - variable "region" { type = string default = "us-west-2" diff --git a/terraform/eks/daemon/windows/2022/variables.tf b/terraform/eks/daemon/windows/2022/variables.tf index d6531d3b7..f5d2d1cfc 100644 --- a/terraform/eks/daemon/windows/2022/variables.tf +++ b/terraform/eks/daemon/windows/2022/variables.tf @@ -1,9 +1,6 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: MIT - variable "region" { type = string default = "us-west-2"