From 583b9d786ec82955d375bafdae09c5653e81207c Mon Sep 17 00:00:00 2001 From: Apoorva Kulkarni Date: Tue, 29 Oct 2024 14:43:21 -0700 Subject: [PATCH 1/3] chore: Use official neuron helm chart --- aws-neuron-device-plugin.tf | 4 +- helm-charts/neuron-device-plugin/.helmignore | 23 ----- helm-charts/neuron-device-plugin/Chart.yaml | 6 -- .../templates/_helpers.tpl | 68 -------------- .../templates/clusterrole.yaml | 19 ---- .../templates/clusterrolebinding.yaml | 14 --- .../templates/daemonset.yaml | 77 ---------------- .../templates/serviceaccount.yaml | 13 --- helm-charts/neuron-device-plugin/values.yaml | 91 ------------------- 9 files changed, 2 insertions(+), 313 deletions(-) delete mode 100644 helm-charts/neuron-device-plugin/.helmignore delete mode 100644 helm-charts/neuron-device-plugin/Chart.yaml delete mode 100644 helm-charts/neuron-device-plugin/templates/_helpers.tpl delete mode 100644 helm-charts/neuron-device-plugin/templates/clusterrole.yaml delete mode 100644 helm-charts/neuron-device-plugin/templates/clusterrolebinding.yaml delete mode 100644 helm-charts/neuron-device-plugin/templates/daemonset.yaml delete mode 100644 helm-charts/neuron-device-plugin/templates/serviceaccount.yaml delete mode 100644 helm-charts/neuron-device-plugin/values.yaml diff --git a/aws-neuron-device-plugin.tf b/aws-neuron-device-plugin.tf index 6d1f7ed..249e1d9 100644 --- a/aws-neuron-device-plugin.tf +++ b/aws-neuron-device-plugin.tf @@ -3,8 +3,8 @@ resource "helm_release" "aws_neuron_device_plugin" { name = try(var.aws_neuron_device_plugin_helm_config["name"], "neuron-device-plugin") repository = try(var.aws_neuron_device_plugin_helm_config["repository"], null) - chart = try(var.aws_neuron_device_plugin_helm_config["chart"], "${path.module}/helm-charts/neuron-device-plugin") - version = try(var.aws_neuron_device_plugin_helm_config["version"], "0.1.0") + chart = try(var.aws_neuron_device_plugin_helm_config["chart"], "oci://public.ecr.aws/neuron/neuron-helm-chart") + version = try(var.aws_neuron_device_plugin_helm_config["version"], "1.0.0") timeout = try(var.aws_neuron_device_plugin_helm_config["timeout"], 300) values = try(var.aws_neuron_device_plugin_helm_config["values"], null) create_namespace = try(var.aws_neuron_device_plugin_helm_config["create_namespace"], false) diff --git a/helm-charts/neuron-device-plugin/.helmignore b/helm-charts/neuron-device-plugin/.helmignore deleted file mode 100644 index 0e8a0eb..0000000 --- a/helm-charts/neuron-device-plugin/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/helm-charts/neuron-device-plugin/Chart.yaml b/helm-charts/neuron-device-plugin/Chart.yaml deleted file mode 100644 index bce6c0c..0000000 --- a/helm-charts/neuron-device-plugin/Chart.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v2 -name: neuron-device-plugin -description: A Helm chart for Nueron Device Plugin -type: application -version: 0.2.0 -appVersion: "2.21.14.0" # Neuron SDK Device Plugin version diff --git a/helm-charts/neuron-device-plugin/templates/_helpers.tpl b/helm-charts/neuron-device-plugin/templates/_helpers.tpl deleted file mode 100644 index 71f1dfa..0000000 --- a/helm-charts/neuron-device-plugin/templates/_helpers.tpl +++ /dev/null @@ -1,68 +0,0 @@ -{{/* vim: set filetype=mustache: */}} -{{/* -Expand the name of the chart. -*/}} -{{- define "neuron-device-plugin.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "neuron-device-plugin.fullname" -}} -{{- if .Values.fullnameOverride -}} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} -{{- else -}} -{{- $name := default .Chart.Name .Values.nameOverride -}} -{{- if contains $name .Release.Name -}} -{{- .Release.Name | trunc 63 | trimSuffix "-" -}} -{{- else -}} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} -{{- end -}} -{{- end -}} -{{- end -}} - - -{{/* -Create the name of the service account to use -*/}} -{{- define "neuron-device-plugin.serviceAccountName" -}} -{{- if .Values.serviceAccount.create -}} - {{ default (include "neuron-device-plugin.fullname" .) .Values.serviceAccount.name }} -{{- else -}} - {{ default "default" .Values.serviceAccount.name }} -{{- end -}} -{{- end -}} - -{{/* -Image tag -*/}} -{{- define "neuron-device-plugin.imageTag" -}} -{{- if .Values.image.tag -}} -{{- .Values.image.tag -}} -{{- else -}} -{{- .Chart.AppVersion -}} -{{- end -}} -{{- end -}} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "neuron-device-plugin.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Common labels -*/}} -{{- define "neuron-device-plugin.labels" -}} -app.kubernetes.io/name: {{ include "neuron-device-plugin.name" . }} -helm.sh/chart: {{ include "neuron-device-plugin.chart" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end -}} diff --git a/helm-charts/neuron-device-plugin/templates/clusterrole.yaml b/helm-charts/neuron-device-plugin/templates/clusterrole.yaml deleted file mode 100644 index 5334846..0000000 --- a/helm-charts/neuron-device-plugin/templates/clusterrole.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: {{ include "neuron-device-plugin.fullname" . }} - labels: -{{ include "neuron-device-plugin.labels" . | indent 4 }} -rules: -- apiGroups: [""] - resources: ["nodes"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["events"] - verbs: ["create", "patch"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["update", "patch", "get", "list", "watch"] -- apiGroups: [""] - resources: ["nodes/status"] - verbs: ["patch", "update"] diff --git a/helm-charts/neuron-device-plugin/templates/clusterrolebinding.yaml b/helm-charts/neuron-device-plugin/templates/clusterrolebinding.yaml deleted file mode 100644 index 19ce06e..0000000 --- a/helm-charts/neuron-device-plugin/templates/clusterrolebinding.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: {{ include "neuron-device-plugin.fullname" . }} - labels: -{{ include "neuron-device-plugin.labels" . | indent 4 }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: {{ include "neuron-device-plugin.fullname" . }} -subjects: - - kind: ServiceAccount - name: {{ template "neuron-device-plugin.serviceAccountName" . }} - namespace: {{ .Values.namespace }} diff --git a/helm-charts/neuron-device-plugin/templates/daemonset.yaml b/helm-charts/neuron-device-plugin/templates/daemonset.yaml deleted file mode 100644 index e0601f2..0000000 --- a/helm-charts/neuron-device-plugin/templates/daemonset.yaml +++ /dev/null @@ -1,77 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: {{ include "neuron-device-plugin.fullname" . }} - namespace: {{ .Release.Namespace }} - labels: -{{ include "neuron-device-plugin.labels" . | indent 4 }} -spec: - updateStrategy: -{{ toYaml .Values.updateStrategy | indent 4 }} - selector: - matchLabels: - name: {{ include "neuron-device-plugin.fullname" . }}-ds - template: - metadata: - {{- if .Values.podAnnotations }} - annotations: - {{- range $key, $value := .Values.podAnnotations }} - {{ $key }}: {{ $value | quote }} - {{- end }} - {{- end }} - labels: - name: {{ include "neuron-device-plugin.fullname" . }}-ds - {{- if .Values.podLabels }} -{{ toYaml .Values.podLabels | indent 8 }} - {{- end }} - spec: - priorityClassName: "{{ .Values.priorityClassName }}" - serviceAccountName: {{ template "neuron-device-plugin.serviceAccountName" . }} - hostNetwork: true - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - image: {{ .Values.image.repository }}:{{ include "neuron-device-plugin.imageTag" . }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - name: neuron-device-plugin - env: - - name: KUBECONFIG - value: /etc/kubernetes/kubelet.conf - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - - name: infa-map - mountPath: /run - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins - - name: infa-map - hostPath: - path: /run diff --git a/helm-charts/neuron-device-plugin/templates/serviceaccount.yaml b/helm-charts/neuron-device-plugin/templates/serviceaccount.yaml deleted file mode 100644 index 14b2fd0..0000000 --- a/helm-charts/neuron-device-plugin/templates/serviceaccount.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.serviceAccount.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ template "neuron-device-plugin.serviceAccountName" . }} - namespace: {{ .Values.namespace }} -{{- with .Values.serviceAccount.annotations }} - annotations: -{{ toYaml . | indent 4 }} -{{- end }} - labels: -{{ include "neuron-device-plugin.labels" . | indent 4 }} -{{- end -}} diff --git a/helm-charts/neuron-device-plugin/values.yaml b/helm-charts/neuron-device-plugin/values.yaml deleted file mode 100644 index 9e346c8..0000000 --- a/helm-charts/neuron-device-plugin/values.yaml +++ /dev/null @@ -1,91 +0,0 @@ -# Override various naming aspects of this chart -# Only edit these if you know what you're doing -nameOverride: neuron-device-plugin - -fullnameOverride: neuron-device-plugin -namespace: kube-system - -imagePullSecrets: [] - -priorityClassName: system-node-critical - -podSecurityContext: {} - -podAnnotations: {} - -podLabels: {} -serviceAccount: - # Specifies whether a service account should be created - create: true - # Annotations to add to the service account - annotations: {} - labels: {} - # The name of the service account to use. - # If not set and create is true, a name is generated using the fullname template - name: - -# resources: -# requests: -# cpu: 25m - -updateStrategy: - type: RollingUpdate - rollingUpdate: - maxUnavailable: "10%" - -nodeSelector: {} - -image: - repository: public.ecr.aws/neuron/neuron-device-plugin - pullPolicy: IfNotPresent - tag: "2.21.14.0" - -tolerations: - - operator: Exists # DaemonSet is tolerant of any taints, regardless of the key or value of the taint. - - key: CriticalAddonsOnly - operator: Exists - - key: aws.amazon.com/neuron - operator: Exists - effect: NoSchedule - - key: aws.amazon.com/neuroncore - operator: Exists - effect: NoSchedule - - key: aws.amazon.com/neurondevice - operator: Exists - effect: NoSchedule -affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: "beta.kubernetes.io/instance-type" - operator: In - values: - - inf1.xlarge - - inf1.2xlarge - - inf1.6xlarge - - inf1.24xlarge - - inf2.xlarge - - inf2.4xlarge - - inf2.8xlarge - - inf2.24xlarge - - inf2.48xlarge - - trn1.2xlarge - - trn1.32xlarge - - trn1n.32xlarge - - matchExpressions: - - key: "node.kubernetes.io/instance-type" - operator: In - values: - - inf1.xlarge - - inf1.2xlarge - - inf1.6xlarge - - inf1.24xlarge - - inf2.xlarge - - inf2.4xlarge - - inf2.8xlarge - - inf2.24xlarge - - inf2.48xlarge - - trn1.2xlarge - - trn1.32xlarge - - trn1n.32xlarge From 23cdab2b65ebd35e611eacb1b9b196bd7798e219 Mon Sep 17 00:00:00 2001 From: Apoorva Kulkarni Date: Tue, 29 Oct 2024 14:52:28 -0700 Subject: [PATCH 2/3] fix precommit errors --- helm-charts/karpenter-resources/templates/node-pool.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm-charts/karpenter-resources/templates/node-pool.yaml b/helm-charts/karpenter-resources/templates/node-pool.yaml index a8427f7..60c80aa 100644 --- a/helm-charts/karpenter-resources/templates/node-pool.yaml +++ b/helm-charts/karpenter-resources/templates/node-pool.yaml @@ -35,7 +35,7 @@ spec: {{- with .Values.nodePool.disruption.budgets }} budgets: {{- toYaml . | nindent 4 }} - {{- end }} + {{- end }} limits: cpu: {{ .Values.nodePool.limits.cpu }} memory: {{ .Values.nodePool.limits.memory }} From e6f4e878e022bdb75db4c0b56fc20976bd6f91c3 Mon Sep 17 00:00:00 2001 From: Apoorva Kulkarni Date: Tue, 29 Oct 2024 15:00:52 -0700 Subject: [PATCH 3/3] fix name of the helm chart --- aws-neuron-device-plugin.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-neuron-device-plugin.tf b/aws-neuron-device-plugin.tf index 249e1d9..a511a19 100644 --- a/aws-neuron-device-plugin.tf +++ b/aws-neuron-device-plugin.tf @@ -1,7 +1,7 @@ resource "helm_release" "aws_neuron_device_plugin" { count = var.enable_aws_neuron_device_plugin ? 1 : 0 - name = try(var.aws_neuron_device_plugin_helm_config["name"], "neuron-device-plugin") + name = try(var.aws_neuron_device_plugin_helm_config["name"], "neuron-helm-chart") repository = try(var.aws_neuron_device_plugin_helm_config["repository"], null) chart = try(var.aws_neuron_device_plugin_helm_config["chart"], "oci://public.ecr.aws/neuron/neuron-helm-chart") version = try(var.aws_neuron_device_plugin_helm_config["version"], "1.0.0")