diff --git a/aws-efa-k8s-device-plugin.tf b/aws-efa-k8s-device-plugin.tf index ff9d716..85527a1 100644 --- a/aws-efa-k8s-device-plugin.tf +++ b/aws-efa-k8s-device-plugin.tf @@ -1,12 +1,27 @@ +locals { + aws_efa_k8s_device_plugin_default_values = <<-EOT +tolerations: + - operator: Exists # DaemonSet is tolerant of any taints, regardless of the key or value of the taint. + - key: CriticalAddonsOnly + operator: Exists +EOT + + aws_efa_k8s_device_plugin_merged_values_yaml = yamlencode(merge( + yamldecode(local.aws_efa_k8s_device_plugin_default_values), + try(yamldecode(var.aws_efa_k8s_device_plugin_helm_config.values[0]), {}) + )) +} + + resource "helm_release" "aws_efa_k8s_device_plugin" { count = var.enable_aws_efa_k8s_device_plugin ? 1 : 0 name = try(var.aws_efa_k8s_device_plugin_helm_config["name"], "aws-efa-k8s-device-plugin") - repository = try(var.aws_efa_k8s_device_plugin_helm_config["repository"], null) - chart = try(var.aws_efa_k8s_device_plugin_helm_config["chart"], "${path.module}/helm-charts/aws-efa-k8s-device-plugin") - version = try(var.aws_efa_k8s_device_plugin_helm_config["version"], "0.1.0") + repository = try(var.aws_efa_k8s_device_plugin_helm_config["repository"], "https://aws.github.io/eks-charts") + chart = try(var.aws_efa_k8s_device_plugin_helm_config["chart"], "aws-efa-k8s-device-plugin") + version = try(var.aws_efa_k8s_device_plugin_helm_config["version"], "v0.4.4") timeout = try(var.aws_efa_k8s_device_plugin_helm_config["timeout"], 300) - values = try(var.aws_efa_k8s_device_plugin_helm_config["values"], null) + values = [local.aws_efa_k8s_device_plugin_merged_values_yaml] create_namespace = try(var.aws_efa_k8s_device_plugin_helm_config["create_namespace"], false) namespace = try(var.aws_efa_k8s_device_plugin_helm_config["namespace"], "kube-system") lint = try(var.aws_efa_k8s_device_plugin_helm_config["lint"], false) diff --git a/helm-charts/aws-efa-k8s-device-plugin/.helmignore b/helm-charts/aws-efa-k8s-device-plugin/.helmignore deleted file mode 100644 index 0e8a0eb..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/helm-charts/aws-efa-k8s-device-plugin/Chart.yaml b/helm-charts/aws-efa-k8s-device-plugin/Chart.yaml deleted file mode 100644 index 52cfea1..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/Chart.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v2 -name: aws-efa-k8s-device-plugin -description: A Helm chart for EFA device plugin. -type: application -version: 0.1.0 -appVersion: "v0.3.3" diff --git a/helm-charts/aws-efa-k8s-device-plugin/templates/NOTES.txt b/helm-charts/aws-efa-k8s-device-plugin/templates/NOTES.txt deleted file mode 100644 index 76da59c..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/templates/NOTES.txt +++ /dev/null @@ -1 +0,0 @@ -EFA device plugin is installed, it can be requested as `vpc.amazonaws.com/efa` resource. diff --git a/helm-charts/aws-efa-k8s-device-plugin/templates/_helpers.tpl b/helm-charts/aws-efa-k8s-device-plugin/templates/_helpers.tpl deleted file mode 100644 index a454828..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/templates/_helpers.tpl +++ /dev/null @@ -1,62 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "aws-efa-k8s-device-plugin.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "aws-efa-k8s-device-plugin.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "aws-efa-k8s-device-plugin.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "aws-efa-k8s-device-plugin.labels" -}} -helm.sh/chart: {{ include "aws-efa-k8s-device-plugin.chart" . }} -{{ include "aws-efa-k8s-device-plugin.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "aws-efa-k8s-device-plugin.selectorLabels" -}} -app.kubernetes.io/name: {{ include "aws-efa-k8s-device-plugin.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "aws-efa-k8s-device-plugin.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "aws-efa-k8s-device-plugin.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} diff --git a/helm-charts/aws-efa-k8s-device-plugin/templates/daemonset.yaml b/helm-charts/aws-efa-k8s-device-plugin/templates/daemonset.yaml deleted file mode 100644 index d18fd60..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/templates/daemonset.yaml +++ /dev/null @@ -1,73 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: {{ include "aws-efa-k8s-device-plugin.fullname" . }} - labels: - {{- include "aws-efa-k8s-device-plugin.labels" . | nindent 4 }} -spec: - selector: - matchLabels: - name: {{ include "aws-efa-k8s-device-plugin.fullname" . }} - updateStrategy: - type: RollingUpdate - template: - metadata: - # This annotation is deprecated. Kept here for backward compatibility - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - annotations: - scheduler.alpha.kubernetes.io/critical-pod: "" - {{- with .Values.additionalPodAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - name: {{ include "aws-efa-k8s-device-plugin.fullname" . }} - {{- with .Values.additionalPodLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - # Mark this pod as a critical add-on; when enabled, the critical add-on - # scheduler reserves resources for critical add-on pods so that they can - # be rescheduled after a failure. - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - priorityClassName: "system-node-critical" - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - {{- range .Values.supportedInstanceLabels.keys }} - - matchExpressions: - - key: {{ . }} - operator: In - values: - {{- toYaml $.Values.supportedInstanceLabels.values | nindent 20 }} - {{- end }} - hostNetwork: true - containers: - - image: {{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }} - imagePullPolicy: Always - name: aws-efa-k8s-device-plugin - securityContext: - {{- toYaml .Values.securityContext | nindent 12}} - {{- with .Values.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins diff --git a/helm-charts/aws-efa-k8s-device-plugin/templates/test/test-efa.yaml b/helm-charts/aws-efa-k8s-device-plugin/templates/test/test-efa.yaml deleted file mode 100644 index d800509..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/templates/test/test-efa.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: "{{ include "aws-efa-k8s-device-plugin.fullname" . }}-test-efa" - annotations: - helm.sh/hook: test -spec: - template: - spec: - restartPolicy: Never - containers: - - name: efa-test - image: public.ecr.aws/w6p6i9i7/aws-efa-nccl-rdma:base-cudnn8-cuda11-ubuntu20.04 - command: ["/bin/bash"] - args: ["-c", "fi_info -p efa && echo 'test:' && if [[ $(fi_info -p efa -t FI_EP_RDM | grep 'provider: efa' | wc -l) == 4 ]]; then echo 'passed' && exit 0; else echo 'failed' && exit 1; fi"] - imagePullPolicy: IfNotPresent - resources: - requests: - hugepages-2Mi: 20000Mi - vpc.amazonaws.com/efa: 4 - memory: 1070000Mi - limits: - hugepages-2Mi: 20000Mi - vpc.amazonaws.com/efa: 4 - memory: 1070000Mi - backoffLimit: 1 diff --git a/helm-charts/aws-efa-k8s-device-plugin/values.yaml b/helm-charts/aws-efa-k8s-device-plugin/values.yaml deleted file mode 100644 index 915eb48..0000000 --- a/helm-charts/aws-efa-k8s-device-plugin/values.yaml +++ /dev/null @@ -1,98 +0,0 @@ -image: - repository: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-efa-k8s-device-plugin - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "v0.3.3" -securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - -supportedInstanceLabels: # EFA supported instances: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types - keys: - - "beta.kubernetes.io/instance-type" - - "node.kubernetes.io/instance-type" - values: - - m5dn.24xlarge - - m5dn.metal - - m5n.24xlarge - - m5n.metal - - m5zn.12xlarge - - m5zn.metal - - m6a.32xlarge - - m6a.48xlarge - - m6a.metal - - m6i.32xlarge - - m6i.metal - - m6id.32xlarge - - m6id.metal - - c5n.18xlarge - - c5n.9xlarge - - c5n.metal - - c6a.32xlarge - - c6a.48xlarge - - c6a.metal - - c6gn.16xlarge - - c6i.32xlarge - - c6i.metal - - c6id.32xlarge - - c6id.metal - - hpc6a.48xlarge - - r5dn.24xlarge - - r5dn.metal - - r5n.24xlarge - - r5n.metal - - r6i.32xlarge - - r6i.metal - - r6id.32xlarge - - r6id.metal - - x2d.32xlarge - - x2d.metal - - x2ed.32xlarge - - x2ed.metal - - x2iezn.12xlarge - - x2iezn.metal - - x2idn.32xlarge - - x2iedn.32xlarge - - i3en.24xlarge - - i3en.12xlarge - - i3en.metal - - i4i.32xlarge - - i4i.metal - - im4gn.16xlarge - - dl1.24xlarge - - g4dn.8xlarge - - g4dn.12xlarge - - g4dn.metal - - g5.48xlarge - - inf1.24xlarge - - p3dn.24xlarge - - p4d.24xlarge - - p4de.24xlarge - - trn1.32xlarge - - trn1n.32xlarge - -resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi -nodeSelector: {} -# efa: present - -additionalPodAnnotations: {} -additionalPodLabels: {} -nameOverride: "" -fullnameOverride: "" -imagePullSecrets: [] - -tolerations: - - operator: Exists # DaemonSet is tolerant of any taints, regardless of the key or value of the taint. - - key: CriticalAddonsOnly - operator: Exists