From 05d12e5d974e8c47865c92c4ed031f85f01540f8 Mon Sep 17 00:00:00 2001 From: Tarun Gupta Akirala Date: Wed, 18 Dec 2024 14:48:47 -0800 Subject: [PATCH] feat: upgrade to kubecost v2 Signed-off-by: Tarun Gupta Akirala Signed-off-by: Tarun Gupta Akirala --- common/helm-repositories/kubecost.yaml | 10 + common/helm-repositories/kustomization.yaml | 1 + hack/list-images.sh | 1 + licenses.d2iq.yaml | 24 +- .../0.37.8/defaults/cm.yaml | 153 --------- .../0.37.8/release/release.yaml | 156 --------- .../2.5.0/cosi-storage.yaml | 26 ++ .../2.5.0/cosi-storage/cosi-bucket.yaml | 35 +++ .../2.5.0/cosi-storage}/kustomization.yaml | 2 +- .../2.5.0/defaults/cm.yaml | 295 ++++++++++++++++++ .../defaults/kustomization.yaml | 0 .../2.5.0/kustomization.yaml | 8 + .../2.5.0/move-to-konvoy.yaml | 20 ++ .../2.5.0/move-to-konvoy/kustomization.yaml | 4 + .../move-to-konvoy-cosi-hr.yaml | 40 +++ .../post-install.yaml} | 5 +- .../post-install}/post-install-jobs.yaml | 0 .../2.5.0/pre-install.yaml | 24 ++ .../2.5.0/pre-install/pre-install-jobs.yaml | 121 +++++++ .../{0.37.8 => 2.5.0}/release.yaml | 5 +- .../2.5.0/release/release.yaml | 105 +++++++ services/kommander/0.14.0/defaults/cm.yaml | 3 - services/kubecost/0.37.9/defaults/cm.yaml | 130 -------- services/kubecost/0.37.9/kubecost.yaml | 84 ----- services/kubecost/2.5.0/defaults/cm.yaml | 170 ++++++++++ .../defaults/kustomization.yaml | 0 .../2.5.0}/kustomization.yaml | 2 +- services/kubecost/2.5.0/prerequisites.yaml | 24 ++ .../2.5.0/prerequisites/prerequisites.yaml | 53 ++++ services/kubecost/2.5.0/release.yaml | 23 ++ services/kubecost/2.5.0/release/release.yaml | 31 ++ services/kubecost/metadata.yaml | 2 +- 32 files changed, 1018 insertions(+), 539 deletions(-) create mode 100644 common/helm-repositories/kubecost.yaml delete mode 100644 services/centralized-kubecost/0.37.8/defaults/cm.yaml delete mode 100644 services/centralized-kubecost/0.37.8/release/release.yaml create mode 100644 services/centralized-kubecost/2.5.0/cosi-storage.yaml create mode 100644 services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml rename services/{kubecost/0.37.9 => centralized-kubecost/2.5.0/cosi-storage}/kustomization.yaml (79%) create mode 100644 services/centralized-kubecost/2.5.0/defaults/cm.yaml rename services/centralized-kubecost/{0.37.8 => 2.5.0}/defaults/kustomization.yaml (100%) create mode 100644 services/centralized-kubecost/2.5.0/kustomization.yaml create mode 100644 services/centralized-kubecost/2.5.0/move-to-konvoy.yaml create mode 100644 services/centralized-kubecost/2.5.0/move-to-konvoy/kustomization.yaml create mode 100644 services/centralized-kubecost/2.5.0/move-to-konvoy/move-to-konvoy-cosi-hr.yaml rename services/centralized-kubecost/{0.37.8/post-install-jobs.yaml => 2.5.0/post-install.yaml} (75%) rename services/centralized-kubecost/{0.37.8/post-install-jobs => 2.5.0/post-install}/post-install-jobs.yaml (100%) create mode 100644 services/centralized-kubecost/2.5.0/pre-install.yaml create mode 100644 services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml rename services/centralized-kubecost/{0.37.8 => 2.5.0}/release.yaml (73%) create mode 100644 services/centralized-kubecost/2.5.0/release/release.yaml delete mode 100644 services/kubecost/0.37.9/defaults/cm.yaml delete mode 100644 services/kubecost/0.37.9/kubecost.yaml create mode 100644 services/kubecost/2.5.0/defaults/cm.yaml rename services/kubecost/{0.37.9 => 2.5.0}/defaults/kustomization.yaml (100%) rename services/{centralized-kubecost/0.37.8 => kubecost/2.5.0}/kustomization.yaml (77%) create mode 100644 services/kubecost/2.5.0/prerequisites.yaml create mode 100644 services/kubecost/2.5.0/prerequisites/prerequisites.yaml create mode 100644 services/kubecost/2.5.0/release.yaml create mode 100644 services/kubecost/2.5.0/release/release.yaml diff --git a/common/helm-repositories/kubecost.yaml b/common/helm-repositories/kubecost.yaml new file mode 100644 index 0000000000..6d3c9ad372 --- /dev/null +++ b/common/helm-repositories/kubecost.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: kubecost + namespace: kommander-flux +spec: + interval: 10m + timeout: 1m + url: "${helmMirrorURL:=https://kubecost.github.io/cost-analyzer/}" diff --git a/common/helm-repositories/kustomization.yaml b/common/helm-repositories/kustomization.yaml index 2d6415c620..38694642e5 100644 --- a/common/helm-repositories/kustomization.yaml +++ b/common/helm-repositories/kustomization.yaml @@ -13,6 +13,7 @@ resources: - kommander-ui.yaml - kommander.yaml - kube-logging.yaml + - kubecost.yaml - kubefed.yaml - kubetunnel.yaml - mesosphere-repos.yaml diff --git a/hack/list-images.sh b/hack/list-images.sh index d90f06d624..0e61398847 100755 --- a/hack/list-images.sh +++ b/hack/list-images.sh @@ -78,6 +78,7 @@ declare -rx releaseNamespace=unused \ tfaName=unused \ notPopulatedAnywhereAsThisIsOnlyForAirgappedBundle=unused \ caIssuerName=unused \ + CLUSTER_ID=unused \ kommanderChartVersion="${kommanderChartVersion:-}" IMAGES_FILE="$(realpath "$(mktemp .helm-list-images-XXXXXX)")" diff --git a/licenses.d2iq.yaml b/licenses.d2iq.yaml index 2922115f77..c54df4caab 100644 --- a/licenses.d2iq.yaml +++ b/licenses.d2iq.yaml @@ -20,11 +20,11 @@ resources: - license_path: LICENSE ref: v${image_tag%-debian-12-r0} url: https://github.com/thanos-io/thanos - - container_image: ghcr.io/mesosphere/dkp-container-images/docker.io/grafana/grafana:10.3.3-d2iq.0 + - container_image: docker.io/grafana/grafana:11.3.1 sources: - license_path: LICENSE notice_path: NOTICE.md - ref: v${image_tag%-d2iq.0} + ref: v${image_tag} url: https://github.com/grafana/grafana - container_image: docker.io/grafana/grafana:11.2.2-security-01 sources: @@ -478,7 +478,7 @@ resources: notice_path: NOTICE ref: ${image_tag} url: https://github.com/prometheus/node_exporter - - container_image: quay.io/prometheus/prometheus:v2.55.0 + - container_image: quay.io/prometheus/prometheus:v2.55.1 sources: - license_path: LICENSE notice_path: NOTICE @@ -572,18 +572,28 @@ resources: - url: https://github.com/mirror/busybox ref: master license_path: LICENSE - - container_image: gcr.io/kubecost1/cost-model:prod-1.108.1 + - container_image: gcr.io/kubecost1/cost-model:prod-2.5.0 + sources: + - url: https://github.com/opencost/opencost + ref: v1.112.1 + license_path: LICENSE + - container_image: gcr.io/kubecost1/kubecost-modeling:v0.1.18 sources: - url: https://github.com/opencost/opencost - ref: v${image_tag#prod-} + ref: v1.112.1 license_path: LICENSE - - container_image: ghcr.io/mesosphere/dkp-container-images/gcr.io/kubecost1/frontend:prod-1.108.1-d2iq.0 + - container_image: gcr.io/kubecost1/frontend:prod-2.5.0 sources: - url: https://github.com/opencost/opencost - ref: v1.108.1 + ref: v1.112.1 license_path: LICENSE - container_image: registry.k8s.io/pause:3.10 sources: - url: https://github.com/kubernetes/kubernetes ref: master license_path: LICENSE + - container_image: gcr.io/k8s-staging-sig-storage/objectstorage-controller:v20221027-v0.1.1-8-g300019f # TODO(takirala): drop this entry after merging https://github.com/mesosphere/konvoy2/pull/3474 + sources: + - url: https://github.com/kubernetes-sigs/container-object-storage-interface + ref: main + license_path: LICENSE diff --git a/services/centralized-kubecost/0.37.8/defaults/cm.yaml b/services/centralized-kubecost/0.37.8/defaults/cm.yaml deleted file mode 100644 index e1ba333175..0000000000 --- a/services/centralized-kubecost/0.37.8/defaults/cm.yaml +++ /dev/null @@ -1,153 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: centralized-kubecost-0.37.8-d2iq-defaults - namespace: ${releaseNamespace} -data: - values.yaml: | - --- - hooks: - clusterID: - kubectlImage: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" - priorityClassName: dkp-high-priority - - cost-analyzer: - fullnameOverride: "kommander-kubecost-cost-analyzer" - kubecostFrontend: - fullImageName: ghcr.io/mesosphere/dkp-container-images/gcr.io/kubecost1/frontend:prod-1.108.1-d2iq.0 - priority: - enabled: true - name: dkp-high-priority - diagnostics: - enabled: false - global: - prometheus: - fqdn: http://kubecost-prometheus-server.kommander.svc.cluster.local - enabled: false - - thanos: - enabled: true - queryService: http://kommander-kubecost-thanos-query-http.kubecost.svc.cluster.local:10902 - # The wait time before Kommander begins querying cost data for all attached clusters - queryOffset: 5m - query: - deploymentAnnotations: - secret.reloader.stakater.com/reload: kommander-kubecost-thanos-client-tls - - grafana: - enabled: false - # Use kommander monitoring Grafana instance - domainName: centralized-grafana.${releaseNamespace}.svc.cluster.local - - # For Thanos Installs, Allow Higher Concurrency from Cost-Model - # Still may require tweaking for some installs, but the thanos-query-frontend - # will greatly assist in reduction memory bloat in query. - kubecostModel: - maxQueryConcurrency: 5 - # This configuration is applied to thanos only. Expresses the resolution to - # use for longer query ranges. Options: raw, 5m, 1h - Default: raw - maxSourceResolution: 5m - - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: kommander-traefik - ingress.kubernetes.io/auth-response-headers: X-Forwarded-User - traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: "${releaseNamespace}-stripprefixes@kubernetescrd,${releaseNamespace}-forwardauth@kubernetescrd" - paths: - - "/dkp/kommander/kubecost/frontend/" - hosts: - - "" - tls: [] - - kubecostDeployment: - labels: - vendor.kubecost.io/partner: d2iq - - podSecurityPolicy: - enabled: false - - grafana: - # These values are set so that kubecost grafana dashboards are installed. - # Grafana itself is not installed. - sidecar: - image: - repository: docker.io/kiwigrid/k8s-sidecar - tag: 1.28.0 - dashboards: - enabled: true - label: grafana_dashboard_kommander - datasources: - enabled: true - defaultDatasourceEnabled: false - label: grafana_datasource_kommander - - prometheus: - fullnameOverride: "kommander-kubecost-prometheus" - server: - fullnameOverride: "kommander-kubecost-prometheus-server" - priorityClassName: dkp-high-priority - alertmanager: - fullnameOverride: "kommander-kubecost-prometheus-alertmanager" - priorityClassName: dkp-high-priority - kube-state-metrics: - fullnameOverride: "kommander-kubecost-prometheus-kube-state-metrics" - priorityClassName: dkp-high-priority - - thanos: - image: - repository: quay.io/thanos/thanos - tag: v0.37.1 - fullnameOverride: "kommander-kubecost-thanos" - nameOverride: "kubecost-thanos" - priorityClassName: dkp-high-priority - query: - enabled: true - timeout: 3m - maxConcurrent: 10 - # Name of HTTP request header used for dynamic prefixing of UI links and redirects. - webPrefixHeader: "X-Forwarded-Prefix" - resources: - limits: - cpu: 2000m - memory: 16Gi - requests: - cpu: 1000m - memory: 4Gi - http: - service: - labels: - servicemonitor.kommander.mesosphere.io/path: "metrics" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: kommander-traefik - traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: "${releaseNamespace}-stripprefixes@kubernetescrd,${releaseNamespace}-forwardauth@kubernetescrd" - path: "/dkp/kommander/kubecost/query" - hosts: - - "" - tls: [] - # Enable DNS discovery for stores - storeDNSDiscovery: false - # Enable DNS discovery for sidecars (this is for the chart built-in sidecar service) - sidecarDNSDiscovery: false - # Names of configmaps that contain addresses of store API servers, used for file service discovery. - serviceDiscoveryFileConfigMaps: - - kubecost-thanos-query-stores - # Refresh interval to re-read file SD files. It is used as a resync fallback. - serviceDiscoveryInterval: 5m - extraArgs: - - "--log.format=json" - - "--grpc-client-tls-secure" - - "--grpc-client-tls-cert=/etc/certs/tls.crt" - - "--grpc-client-tls-key=/etc/certs/tls.key" - - "--grpc-client-tls-ca=/etc/certs/ca.crt" - - "--grpc-client-server-name=server.thanos.kubecost.localhost.localdomain" - certSecretName: kommander-kubecost-thanos-client-tls - - kubecostProductConfigs: - grafanaURL: "/dkp/kommander/monitoring/grafana" - # used for display in Kubecost UI - clusterName: "Kommander Host" diff --git a/services/centralized-kubecost/0.37.8/release/release.yaml b/services/centralized-kubecost/0.37.8/release/release.yaml deleted file mode 100644 index 88df95b7f6..0000000000 --- a/services/centralized-kubecost/0.37.8/release/release.yaml +++ /dev/null @@ -1,156 +0,0 @@ -apiVersion: helm.toolkit.fluxcd.io/v2beta2 -kind: HelmRelease -metadata: - name: centralized-kubecost - namespace: ${releaseNamespace} -spec: - chart: - spec: - chart: kubecost - sourceRef: - kind: HelmRepository - name: mesosphere.github.io-charts-stable - namespace: kommander-flux - version: 0.37.4 - interval: 15s - install: - crds: CreateReplace - remediation: - retries: 30 - createNamespace: true - upgrade: - crds: CreateReplace - remediation: - retries: 30 - releaseName: centralized-kubecost - valuesFrom: - - kind: ConfigMap - name: centralized-kubecost-0.37.8-d2iq-defaults - targetNamespace: kubecost ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kubecost-thanos-configmap-edit - namespace: kubecost ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: kubecost-thanos-configmap-edit - namespace: kubecost -rules: - - apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "create", "update", "patch", "delete"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: kubecost-thanos-configmap-edit - namespace: kubecost -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: kubecost-thanos-configmap-edit -subjects: - - kind: ServiceAccount - name: kubecost-thanos-configmap-edit - namespace: kubecost ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: create-kubecost-thanos-query-stores-configmap - namespace: kubecost -spec: - template: - metadata: - name: create-kubecost-thanos-query-stores-configmap - spec: - serviceAccountName: kubecost-thanos-configmap-edit - restartPolicy: OnFailure - priorityClassName: dkp-high-priority - containers: - - name: kubectl - image: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" - command: - - sh - - "-c" - - |- - /bin/bash <<'EOF' - set -o nounset - set -o errexit - set -o pipefail - - echo "checking if kubecost-thanos-query-stores configmap exists" - - RES=$(set -o errexit; kubectl get configmap --ignore-not-found kubecost-thanos-query-stores) - if [[ $RES == "" ]]; then - echo "kubecost-thanos-query-stores configmap does not exist - creating" - printf '%s\n' "apiVersion: v1" "kind: ConfigMap" "metadata:" " name: kubecost-thanos-query-stores" "data:" " stores.yaml: |-" " - targets: []" > /tmp/kubecost-thanos-query-stores.yaml - kubectl apply -f /tmp/kubecost-thanos-query-stores.yaml - exit 0 - fi - - echo "kubecost-thanos-query-stores configmap already exists - no need to create" - EOF ---- -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: kommander-kubecost-thanos-client-cert - namespace: kubecost -spec: - commonName: client.thanos.kubecost.localhost.localdomain - dnsNames: - - client.thanos.kubecost.localhost.localdomain - duration: 87600h - subject: - organizations: - - Nutanix - secretName: kommander-kubecost-thanos-client-tls - issuerRef: - name: kommander-ca - kind: ClusterIssuer ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: dkp-centralized-kubecost-view -rules: - - nonResourceURLs: - - /dkp/kommander/kubecost - - /dkp/kommander/kubecost/* - verbs: - - get - - head ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: dkp-centralized-kubecost-edit -rules: - - nonResourceURLs: - - /dkp/kommander/kubecost - - /dkp/kommander/kubecost/* - verbs: - - get - - head - - post - - put ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: dkp-centralized-kubecost-admin -rules: - - nonResourceURLs: - - /dkp/kommander/kubecost - - /dkp/kommander/kubecost/* - verbs: - - get - - head - - post - - put - - delete diff --git a/services/centralized-kubecost/2.5.0/cosi-storage.yaml b/services/centralized-kubecost/2.5.0/cosi-storage.yaml new file mode 100644 index 0000000000..d3106999fa --- /dev/null +++ b/services/centralized-kubecost/2.5.0/cosi-storage.yaml @@ -0,0 +1,26 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: kubecost-cosi-storage + namespace: ${releaseNamespace} +spec: + force: true + prune: true + wait: true + interval: 6h + retryInterval: 1m + path: ./services/centralized-kubecost/2.5.0/cosi-storage + sourceRef: + kind: GitRepository + name: management + namespace: kommander-flux + timeout: 1m + dependsOn: + - name: todo-move-to-konvoy + namespace: ${releaseNamespace} + postBuild: + substitute: + releaseNamespace: ${releaseNamespace} + substituteFrom: + - kind: ConfigMap + name: substitution-vars diff --git a/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml b/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml new file mode 100644 index 0000000000..389f527f77 --- /dev/null +++ b/services/centralized-kubecost/2.5.0/cosi-storage/cosi-bucket.yaml @@ -0,0 +1,35 @@ +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: kubecost-cosi-storage + namespace: ${releaseNamespace} +spec: + chart: + spec: + chart: cosi-bucket-kit + sourceRef: + kind: HelmRepository + name: takirala + #name: mesosphere.github.io-charts-stable + namespace: kommander-flux + version: 0.0.1-alpha.0 + interval: 15s + install: + crds: CreateReplace + remediation: + retries: 30 + createNamespace: true + upgrade: + crds: CreateReplace + remediation: + retries: 30 + releaseName: kubecost-cosi-storage + targetNamespace: ${releaseNamespace} + valuesFrom: + - kind: ConfigMap + name: centralized-kubecost-2.5.0-d2iq-defaults + valuesKey: ${kubecostClusterMode:=single-cluster}-values.yaml + - kind: ConfigMap + name: centralized-kubecost-overrides + optional: true +--- diff --git a/services/kubecost/0.37.9/kustomization.yaml b/services/centralized-kubecost/2.5.0/cosi-storage/kustomization.yaml similarity index 79% rename from services/kubecost/0.37.9/kustomization.yaml rename to services/centralized-kubecost/2.5.0/cosi-storage/kustomization.yaml index f867bff569..c80c842070 100644 --- a/services/kubecost/0.37.9/kustomization.yaml +++ b/services/centralized-kubecost/2.5.0/cosi-storage/kustomization.yaml @@ -1,4 +1,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - kubecost.yaml +- cosi-bucket.yaml diff --git a/services/centralized-kubecost/2.5.0/defaults/cm.yaml b/services/centralized-kubecost/2.5.0/defaults/cm.yaml new file mode 100644 index 0000000000..7188038ea2 --- /dev/null +++ b/services/centralized-kubecost/2.5.0/defaults/cm.yaml @@ -0,0 +1,295 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: centralized-kubecost-2.5.0-d2iq-defaults + namespace: ${releaseNamespace} +data: + values.yaml: | + --- + global: + prometheus: + enabled: true + + grafana: + enabled: false + # Use kommander monitoring Grafana instance + domainName: centralized-grafana.${releaseNamespace}.svc.cluster.local + + notifications: + alertmanager: + # If true, allow kubecost to write to alertmanager + enabled: true + + kubecostAggregator: + persistentConfigsStorage: + storageClass: "" # default storage class + storageRequest: 1Gi + aggregatorDbStorage: + storageClass: "" # default storage class + storageRequest: 128Gi + cloudCost: + # The cloudCost component of Aggregator depends on + # kubecostAggregator.deployMethod: + # kA.dM = "singlepod" -> cloudCost is run as container inside cost-analyzer + # kA.dM = "statefulset" -> cloudCost is run as single-replica Deployment + enabled: false + # Log level for the aggregator container. Options are "trace", "debug", "info", "warn", "error", "fatal", "panic" + logLevel: info + resources: + requests: + cpu: 1000m + memory: 1Gi + jaeger: + # Enable this to use jaeger for tracing, useful for debugging + enabled: false + image: jaegertracing/all-in-one + imageVersion: 1.64.0 # Pin the image here to avoid pulling in latest as that would affect CVE scans + + kubecostFrontend: + enabled: true + fullImageName: gcr.io/kubecost1/frontend:prod-2.5.0 + deployMethod: singlepod # Other possible value is `haMode` that is supported only with enterprise license. + ipv6: + enabled: false + + priority: + enabled: true + name: dkp-high-priority + + forecasting: + # Enable this to use kubecost's cost forecosting model + enabled: false + + # Define persistence volume for cost-analyzer, more information at https://github.com/kubecost/docs/blob/master/storage.md + persistentVolume: + # Upgrades from original default 0.2Gi may break if automatic disk resize is not supported + # https://github.com/kubecost/cost-analyzer-helm-chart/issues/507 + size: 32Gi + # Note that setting this to false means configurations will be wiped out on pod restart. + enabled: true + # storageClass: "-" + + ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: kommander-traefik + ingress.kubernetes.io/auth-response-headers: X-Forwarded-User + traefik.ingress.kubernetes.io/router.tls: "true" + traefik.ingress.kubernetes.io/router.middlewares: "${releaseNamespace}-stripprefixes@kubernetescrd,${releaseNamespace}-forwardauth@kubernetescrd" + paths: + - "/dkp/kommander/kubecost/frontend/" + hosts: + - "" + tls: [] + + prometheus: + kubeStateMetrics: + enabled: false + kube-state-metrics: + disabled: true + + extraScrapeConfigs: | + - job_name: kubecost + honor_labels: true + scrape_interval: 1m + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + dns_sd_configs: + - names: + - {{ .Release.Name }}-cost-analyzer + type: 'A' + port: 9003 + - job_name: kubecost-networking + kubernetes_sd_configs: + - role: pod + relabel_configs: + # Scrape only the the targets matching the following metadata + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: {{ .Release.Name }}-network-costs + + server: + priorityClassName: dkp-high-priority + retention: 14d + image: + repository: quay.io/prometheus/prometheus + tag: v2.55.0 + # If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID + # to use as unique cluster ID in kubecost cost-analyzer deployment. + # This overrides the cluster_id set in prometheus.server.global.external_labels. + # NOTE: This does not affect the external_labels set in prometheus config. + clusterIDConfigmap: kubecost-cluster-info-configmap + extraFlags: + - web.enable-admin-api + - web.enable-lifecycle + - storage.tsdb.wal-compression + resources: + limits: + cpu: 1000m + memory: 2500Mi + requests: + cpu: 300m + memory: 1500Mi + global: + scrape_interval: 1m + scrape_timeout: 10s + evaluation_interval: 1m + external_labels: + cluster_id: $CLUSTER_ID + persistentVolume: + size: 32Gi + enabled: true + extraArgs: + log.level: info + log.format: json + storage.tsdb.min-block-duration: 2h + storage.tsdb.max-block-duration: 2h + query.max-concurrency: 1 + query.max-samples: 100000000 + enableAdminApi: true + service: + gRPC: + enabled: true + configmapReload: + prometheus: + enabled: true + #image: + #repository: ghcr.io/jimmidyson/configmap-reload + #tag: v0.14.0 + alertmanager: + enabled: true + #image: + #repository: ghcr.io/jimmidyson/configmap-reload + #tag: v0.14.0 + alertmanager: + priorityClassName: dkp-high-priority + enabled: true + image: + repository: quay.io/prometheus/alertmanager + tag: v0.27.0 + resources: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 10m + memory: 50Mi + persistentVolume: + enabled: true + pushgateway: + enabled: false + persistentVolume: + enabled: false + serverFiles: + alerts: + groups: + - name: Kubecost + rules: + - alert: kubecostDown + expr: up{job="kubecost"} == 0 + annotations: + message: 'Kubecost metrics endpoint is not being scraped successfully.' + for: 10m + labels: + severity: warning + - alert: kubecostMetricsUnavailable + expr: sum(sum_over_time(node_cpu_hourly_cost[5m])) == 0 + annotations: + message: 'Kubecost metrics are not available in Prometheus.' + for: 10m + labels: + severity: warning + - alert: kubecostRecordingRulesNotEvaluated + expr: avg_over_time(kubecost_cluster_memory_working_set_bytes[5m]) == 0 + annotations: + message: 'Kubecost recording rules are not being successfully evaluated.' + for: 10m + labels: + severity: warning + + grafana: + sidecar: + image: + repository: docker.io/kiwigrid/k8s-sidecar + tag: 1.28.0 + dashboards: + enabled: true + label: grafana_dashboard_kommander + datasources: + enabled: true + defaultDatasourceEnabled: false + label: grafana_datasource_kommander + + kubecostProductConfigs: + grafanaURL: "/dkp/kommander/monitoring/grafana" + clusterName: "" + clusterProfile: production + cloudIntegrationSecret: "" + currencyCode: USD + productKey: + enabled: false + #key: YOUR_KEY + single-cluster-values.yaml: | + --- + kubecostAggregator: + # deployMethod determines how Aggregator is deployed. Current options are + # "singlepod" (within cost-analyzer Pod) "statefulset" (separate + # StatefulSet), and "disabled". + deployMethod: singlepod + multi-cluster-values.yaml: | + --- + kubecostAggregator: + # deployMethod determines how Aggregator is deployed. Current options are + deployMethod: statefulset + federatedETL: + federatedCluster: true + kubecostModel: + federatedStorageConfigSecret: "federated-store" # Secret should have a key named "federated-store.yaml" with the federated storage credentials + # COSI related resources + bucketClasses: # Cluster scoped resource + - name: kubecost-cosi-storage + driverName: rook-ceph.ceph.objectstorage.k8s.io + deletionPolicy: Delete + parameters: + objectStoreUserSecretName: rook-ceph-object-user-dkp-object-store-cosi-admin + objectStoreUserSecretNamespace: kommander + bucketAccessClasses: # Cluster scoped resource + - name: kubecost-cosi-storage + driverName: rook-ceph.ceph.objectstorage.k8s.io + authenticationType: KEY + parameters: + # This secret (backed by a ceph user) is created below in the driver config. + objectStoreUserSecretName: rook-ceph-object-user-dkp-object-store-cosi-admin + objectStoreUserSecretNamespace: kommander + bucketClaims: # Namespace scoped resource + - name: kubecost-cosi-storage + namespace: kubecost + bucketClassName: kubecost-cosi-storage + protocols: + - s3 + bucketAccesses: # Namespace scoped resource + - name: kubecost-cosi-storage + namespace: kubecost + bucketAccessClassName: kubecost-cosi-storage + bucketClaimName: kubecost-cosi-storage + protocol: s3 + credentialsSecretName: federated-store + cosiProviders: + ceph: + driver: + enabled: true + name: ceph-cosi-driver + namespace: kommander + spec: + deploymentStrategy: Auto + adminuser: + enabled: true + name: cosi-admin + namespace: kommander + spec: + displayName: "ceph cosi admin" + store: dkp-object-store # name of the CephObjectStore + capabilities: + bucket: "*" + user: "*" diff --git a/services/centralized-kubecost/0.37.8/defaults/kustomization.yaml b/services/centralized-kubecost/2.5.0/defaults/kustomization.yaml similarity index 100% rename from services/centralized-kubecost/0.37.8/defaults/kustomization.yaml rename to services/centralized-kubecost/2.5.0/defaults/kustomization.yaml diff --git a/services/centralized-kubecost/2.5.0/kustomization.yaml b/services/centralized-kubecost/2.5.0/kustomization.yaml new file mode 100644 index 0000000000..0e370440ce --- /dev/null +++ b/services/centralized-kubecost/2.5.0/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - move-to-konvoy.yaml + - cosi-storage.yaml + - pre-install.yaml + - release.yaml + - post-install.yaml diff --git a/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml b/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml new file mode 100644 index 0000000000..93a9fd184a --- /dev/null +++ b/services/centralized-kubecost/2.5.0/move-to-konvoy.yaml @@ -0,0 +1,20 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: todo-move-to-konvoy + namespace: ${releaseNamespace} +spec: + force: true + prune: false + wait: true + interval: 6h + retryInterval: 1m + path: ./services/centralized-kubecost/2.5.0/move-to-konvoy + sourceRef: + kind: GitRepository + name: management + namespace: kommander-flux + timeout: 1m + postBuild: + substitute: + releaseNamespace: ${releaseNamespace} diff --git a/services/centralized-kubecost/2.5.0/move-to-konvoy/kustomization.yaml b/services/centralized-kubecost/2.5.0/move-to-konvoy/kustomization.yaml new file mode 100644 index 0000000000..25813daecf --- /dev/null +++ b/services/centralized-kubecost/2.5.0/move-to-konvoy/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- move-to-konvoy-cosi-hr.yaml diff --git a/services/centralized-kubecost/2.5.0/move-to-konvoy/move-to-konvoy-cosi-hr.yaml b/services/centralized-kubecost/2.5.0/move-to-konvoy/move-to-konvoy-cosi-hr.yaml new file mode 100644 index 0000000000..e83878265c --- /dev/null +++ b/services/centralized-kubecost/2.5.0/move-to-konvoy/move-to-konvoy-cosi-hr.yaml @@ -0,0 +1,40 @@ +--- +# TODO: delete after merging the cosi-bucket-kit chart +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: takirala + namespace: kommander-flux +spec: + interval: 10m + timeout: 1m + url: https://takirala.github.io/charts/stable +--- +# TODO: https://jira.nutanix.com/browse/NCN-104793 and https://jira.nutanix.com/browse/NCN-104743 +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: cosi-move-to-konvoy-todo + namespace: kommander +spec: + chart: + spec: + chart: cosi + sourceRef: + kind: HelmRepository + name: takirala + # name: mesosphere.github.io-charts-stable + namespace: kommander-flux + version: 0.0.1-alpha.2 + interval: 15s + install: + crds: CreateReplace + remediation: + retries: 30 + createNamespace: true + upgrade: + crds: CreateReplace + remediation: + retries: 30 + releaseName: does-not-matter + targetNamespace: container-object-storage-system diff --git a/services/centralized-kubecost/0.37.8/post-install-jobs.yaml b/services/centralized-kubecost/2.5.0/post-install.yaml similarity index 75% rename from services/centralized-kubecost/0.37.8/post-install-jobs.yaml rename to services/centralized-kubecost/2.5.0/post-install.yaml index 3a33acf9c4..855c8f8a62 100644 --- a/services/centralized-kubecost/0.37.8/post-install-jobs.yaml +++ b/services/centralized-kubecost/2.5.0/post-install.yaml @@ -1,7 +1,7 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: centralized-kubecost-post-install-jobs + name: centralized-kubecost-post-install namespace: ${releaseNamespace} spec: force: true @@ -9,9 +9,10 @@ spec: wait: true interval: 6h retryInterval: 1m - path: ./services/centralized-kubecost/0.37.8/post-install-jobs + path: ./services/centralized-kubecost/2.5.0/post-install dependsOn: - name: centralized-kubecost-release + namespace: ${releaseNamespace} sourceRef: kind: GitRepository name: management diff --git a/services/centralized-kubecost/0.37.8/post-install-jobs/post-install-jobs.yaml b/services/centralized-kubecost/2.5.0/post-install/post-install-jobs.yaml similarity index 100% rename from services/centralized-kubecost/0.37.8/post-install-jobs/post-install-jobs.yaml rename to services/centralized-kubecost/2.5.0/post-install/post-install-jobs.yaml diff --git a/services/centralized-kubecost/2.5.0/pre-install.yaml b/services/centralized-kubecost/2.5.0/pre-install.yaml new file mode 100644 index 0000000000..03b00e11c8 --- /dev/null +++ b/services/centralized-kubecost/2.5.0/pre-install.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: centralized-kubecost-pre-install + namespace: ${releaseNamespace} +spec: + force: true + prune: true + wait: true + interval: 6h + retryInterval: 1m + path: ./services/centralized-kubecost/2.5.0/pre-install + dependsOn: + - name: kubecost-cosi-storage + namespace: ${releaseNamespace} + sourceRef: + kind: GitRepository + name: management + namespace: kommander-flux + timeout: 1m + postBuild: + substituteFrom: + - kind: ConfigMap + name: substitution-vars diff --git a/services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml b/services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml new file mode 100644 index 0000000000..9517679ce5 --- /dev/null +++ b/services/centralized-kubecost/2.5.0/pre-install/pre-install-jobs.yaml @@ -0,0 +1,121 @@ +# Copy grafana-datasource cm after it has been created in the release. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: centralized-kubecost-pre-install + namespace: kubecost +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: centralized-kubecost-pre-install +rules: + - apiGroups: [""] + resources: ["configmaps", "namespaces"] + verbs: ["get", "list", "create"] + - apiGroups: [ "" ] + resources: [ "secrets" ] + verbs: [ "get", "list", "patch" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: centralized-kubecost-pre-install +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: centralized-kubecost-pre-install +subjects: + - kind: ServiceAccount + name: centralized-kubecost-pre-install + namespace: kubecost +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: centralized-kubecost-pre-install + namespace: kubecost +spec: + template: + metadata: + name: centralized-kubecost-pre-install + spec: + serviceAccountName: centralized-kubecost-pre-install + restartPolicy: OnFailure + priorityClassName: dkp-high-priority + containers: + - name: create-kubecost-cluster-info-configmap + image: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" + command: + - sh + - -c + - | + /bin/bash <<'EOF' + set -o nounset + set -o errexit + set -o pipefail + # Skip if the configmap already exists. + if kubectl get configmap -n kubecost kubecost-cluster-info-configmap; then + echo "Configmap kubecost-cluster-info-configmap already exists. Skipping the step." + exit 0 + fi + kubectl create configmap kubecost-cluster-info-configmap -n kubecost -oyaml --dry-run --save-config --from-literal=CLUSTER_ID=$(kubectl get namespace kube-system -o jsonpath="{.metadata.uid}") | kubectl apply -f - + echo "Done replacing \$CLUSTER_ID with $CLUSTERID" + EOF + - name: transform-cosi-secret-to-kubecost-secret + image: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" + command: + - sh + - -c + - | + set -o nounset + set -o errexit + set +x + + # check the value of kubecostClusterMode and exit early if it equals to single-cluster. + if [ "${kubecostClusterMode:=single-cluster}" = "single-cluster" ]; then + echo "kubecostClusterMode is single-cluster. Skipping the step." + exit 0 + fi + + # Wait until federated-store secret is found. + while ! kubectl get secret -n kubecost federated-store; do + echo "federated-store secret not found. Waiting for it to be created." + sleep 5 + done + + echo "federated-store secret found. Fetching bucketInfo..." + bucketInfo=$(kubectl get secret -n kubecost federated-store -o go-template='{{ .data.BucketInfo | base64decode }}') + tmpfile=$(mktemp /tmp/federated-store.XXXXXX) + + echo "Fetched bucketInfo from federated-store secret. Processing it..." + echo "$bucketInfo" | yq eval ' + { + "type": "S3", + "config": { + "bucket": .spec.bucketName, + "endpoint": .spec.secretS3.endpoint | sub(":\\d+$", "") | sub("^http://", "") | sub("^https://", ""), # Remove port and protocol (if any). + "region": .spec.secretS3.region, + "access_key": .spec.secretS3.accessKeyID, + "secret_key": .spec.secretS3.accessSecretKey, + "insecure": .spec.secretS3.endpoint | test("^http://"), # Use insecure if endpoint is http (e.g.: cluster internal endpoint). + "signature_version2": false, # Use signature version 4. + "put_user_metadata": { + "X-Amz-Acl": "bucket-owner-full-control" + }, + "http_config": { + "idle_conn_timeout": "90s", + "response_header_timeout": "2m", + "insecure_skip_verify": false + }, + "trace": { + "enable": false # Enable to debug errors (if any) + }, + "part_size": 10240 # TODO(takirala): Deduce this value logically. + } + }' > "$tmpfile" + echo "Transformed bucketInfo to federated-store.yaml. Updating federated-store secret..." + kubectl create secret generic federated-store -n kubecost --from-file=federated-store.yaml="$tmpfile" --dry-run=client -o yaml | kubectl apply -f - + kubectl label secret federated-store -n kubecost app.kubernetes.io/processed-by-kommander-centralized-kubecost=true --overwrite + rm "$tmpfile" + # TODO(takirala): Test and support nutanix cosi secrets. diff --git a/services/centralized-kubecost/0.37.8/release.yaml b/services/centralized-kubecost/2.5.0/release.yaml similarity index 73% rename from services/centralized-kubecost/0.37.8/release.yaml rename to services/centralized-kubecost/2.5.0/release.yaml index d745445c6c..0085adb0c1 100644 --- a/services/centralized-kubecost/0.37.8/release.yaml +++ b/services/centralized-kubecost/2.5.0/release.yaml @@ -9,7 +9,7 @@ spec: wait: true interval: 6h retryInterval: 1m - path: ./services/centralized-kubecost/0.37.8/release + path: ./services/centralized-kubecost/2.5.0/release sourceRef: kind: GitRepository name: management @@ -19,3 +19,6 @@ spec: substituteFrom: - kind: ConfigMap name: substitution-vars + dependsOn: + - name: centralized-kubecost-pre-install + namespace: ${releaseNamespace} diff --git a/services/centralized-kubecost/2.5.0/release/release.yaml b/services/centralized-kubecost/2.5.0/release/release.yaml new file mode 100644 index 0000000000..6b79f3cc65 --- /dev/null +++ b/services/centralized-kubecost/2.5.0/release/release.yaml @@ -0,0 +1,105 @@ +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: centralized-kubecost + namespace: ${releaseNamespace} +spec: + chart: + spec: + chart: cost-analyzer + sourceRef: + kind: HelmRepository + name: kubecost + namespace: kommander-flux + version: 2.5.0 + interval: 15s + install: + crds: CreateReplace + remediation: + retries: 30 + createNamespace: true + upgrade: + crds: CreateReplace + remediation: + retries: 30 + driftDetection: + mode: enabled + releaseName: centralized-kubecost + valuesFrom: + - kind: ConfigMap + name: centralized-kubecost-2.5.0-d2iq-defaults + - kind: ConfigMap + name: centralized-kubecost-2.5.0-d2iq-defaults + valuesKey: ${kubecostClusterMode:=single-cluster}-values.yaml + targetNamespace: kubecost +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubecost-thanos-configmap-edit + namespace: kubecost +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kubecost-thanos-configmap-edit + namespace: kubecost +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "create", "update", "patch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kubecost-thanos-configmap-edit + namespace: kubecost +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kubecost-thanos-configmap-edit +subjects: + - kind: ServiceAccount + name: kubecost-thanos-configmap-edit + namespace: kubecost +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: dkp-centralized-kubecost-view +rules: + - nonResourceURLs: + - /dkp/kommander/kubecost + - /dkp/kommander/kubecost/* + verbs: + - get + - head +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: dkp-centralized-kubecost-edit +rules: + - nonResourceURLs: + - /dkp/kommander/kubecost + - /dkp/kommander/kubecost/* + verbs: + - get + - head + - post + - put +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: dkp-centralized-kubecost-admin +rules: + - nonResourceURLs: + - /dkp/kommander/kubecost + - /dkp/kommander/kubecost/* + verbs: + - get + - head + - post + - put + - delete diff --git a/services/kommander/0.14.0/defaults/cm.yaml b/services/kommander/0.14.0/defaults/cm.yaml index 5bd7c0560e..eeb6ddb922 100644 --- a/services/kommander/0.14.0/defaults/cm.yaml +++ b/services/kommander/0.14.0/defaults/cm.yaml @@ -68,8 +68,6 @@ data: repository: ${kommanderLicensingControllerWebhookImageRepository} defaultEnterpriseApps: - "centralized-kubecost" - - "kubecost" - - "kubecost-thanos-traefik" - "centralized-grafana" - "karma" - "karma-traefik" @@ -122,7 +120,6 @@ data: - "kube-prometheus-stack" - "prometheus-adapter" - "prometheus-thanos-traefik" - - "kubecost-thanos-traefik" - "cert-manager" - "karma-traefik" - "gatekeeper" diff --git a/services/kubecost/0.37.9/defaults/cm.yaml b/services/kubecost/0.37.9/defaults/cm.yaml deleted file mode 100644 index dbe45c556d..0000000000 --- a/services/kubecost/0.37.9/defaults/cm.yaml +++ /dev/null @@ -1,130 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: kubecost-0.37.9-d2iq-defaults - namespace: ${releaseNamespace} -data: - values.yaml: | - --- - hooks: - clusterID: - kubectlImage: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" - priorityClassName: dkp-high-priority - - cost-analyzer: - kubecostFrontend: - fullImageName: ghcr.io/mesosphere/dkp-container-images/gcr.io/kubecost1/frontend:prod-1.108.1-d2iq.0 - priority: - enabled: true - name: dkp-high-priority - diagnostics: - enabled: false - global: - prometheus: - enabled: true - grafana: - enabled: true - - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: kommander-traefik - ingress.kubernetes.io/auth-response-headers: X-Forwarded-User - traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: "${workspaceNamespace}-stripprefixes@kubernetescrd,${workspaceNamespace}-forwardauth@kubernetescrd" - paths: - - "/dkp/kubecost/frontend/" - hosts: - - "" - tls: [] - - podSecurityPolicy: - enabled: false - - prometheus: - kubeStateMetrics: - enabled: false - kube-state-metrics: - disabled: true - server: - priorityClassName: dkp-high-priority - image: - tag: v2.55.0 - sidecarContainers: - - name: thanos-sidecar - image: quay.io/thanos/thanos:v0.37.1 - args: - - sidecar - - --log.level=debug - - --tsdb.path=/data/ - - --prometheus.url=http://127.0.0.1:9090 - - --reloader.config-file=/etc/config/prometheus.yml - # Start of time range limit to serve. Thanos sidecar will serve only metrics, which happened - # later than this value. Option can be a constant time in RFC3339 format or time duration - # relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y. - - --min-time=-3h - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - ports: - - name: sidecar-http - containerPort: 10902 - - name: grpc - containerPort: 10901 - - name: cluster - containerPort: 10900 - volumeMounts: - - name: config-volume - mountPath: /etc/config - - name: storage-volume - mountPath: /data - subPath: "" - alertmanager: - priorityClassName: dkp-high-priority - image: - repository: quay.io/prometheus/alertmanager - tag: v0.27.0 - grafana: - priorityClassName: dkp-high-priority - image: - repository: ghcr.io/mesosphere/dkp-container-images/docker.io/grafana/grafana - tag: 10.3.3-d2iq.0 - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: kommander-traefik - ingress.kubernetes.io/auth-response-headers: X-Forwarded-User - traefik.ingress.kubernetes.io/router.tls: "true" - traefik.ingress.kubernetes.io/router.middlewares: "${workspaceNamespace}-stripprefixes@kubernetescrd,${workspaceNamespace}-forwardauth@kubernetescrd" - hosts: [""] - path: "/dkp/kubecost/grafana" - grafana.ini: - log: - level: warn - server: - protocol: http - enable_gzip: true - root_url: "%(protocol)s://%(domain)s:%(http_port)s/dkp/kubecost/grafana" - serve_from_sub_path: false # Set to false on Grafana v10+ - auth.proxy: - enabled: true - header_name: X-Forwarded-User - auto-sign-up: true - auth.basic: - enabled: false - users: - auto_assign_org_role: Admin - analytics: - reporting_enabled: false - check_for_updates: false - sidecar: - image: - repository: docker.io/kiwigrid/k8s-sidecar - tag: 1.28.0 - - kubecostProductConfigs: - grafanaURL: "/dkp/kubecost/grafana" - # used for display in Kubecost UI - clusterName: "Kommander Managed Cluster" diff --git a/services/kubecost/0.37.9/kubecost.yaml b/services/kubecost/0.37.9/kubecost.yaml deleted file mode 100644 index 8385fb2214..0000000000 --- a/services/kubecost/0.37.9/kubecost.yaml +++ /dev/null @@ -1,84 +0,0 @@ -apiVersion: helm.toolkit.fluxcd.io/v2beta2 -kind: HelmRelease -metadata: - name: kubecost - namespace: ${releaseNamespace} -spec: - chart: - spec: - chart: kubecost - sourceRef: - kind: HelmRepository - name: mesosphere.github.io-charts-stable - namespace: kommander-flux - version: 0.37.4 - interval: 15s - install: - crds: CreateReplace - remediation: - retries: 30 - createNamespace: true - upgrade: - crds: CreateReplace - remediation: - retries: 30 - releaseName: kubecost - valuesFrom: - - kind: ConfigMap - name: kubecost-0.37.9-d2iq-defaults - targetNamespace: ${releaseNamespace} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: kubecost-app-dashboard-info - namespace: ${releaseNamespace} - labels: - "kommander.d2iq.io/application": "kubecost" -data: - name: "Kubecost" - dashboardLink: "/dkp/kubecost/frontend/overview.html" - docsLink: "http://docs.kubecost.com/" - # From: https://github.com/mesosphere/charts/blob/master/stable/kubecost/Chart.yaml#L2 - version: "1.104.0" ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: dkp-kubecost-view -rules: - - nonResourceURLs: - - /dkp/kubecost/frontend - - /dkp/kubecost/frontend/* - verbs: - - get - - head ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: dkp-kubecost-edit -rules: - - nonResourceURLs: - - /dkp/kubecost/frontend - - /dkp/kubecost/frontend/* - verbs: - - get - - head - - post - - put ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: dkp-kubecost-admin -rules: - - nonResourceURLs: - - /dkp/kubecost/frontend - - /dkp/kubecost/frontend/* - verbs: - - get - - head - - post - - put - - delete diff --git a/services/kubecost/2.5.0/defaults/cm.yaml b/services/kubecost/2.5.0/defaults/cm.yaml new file mode 100644 index 0000000000..1e6136d8cb --- /dev/null +++ b/services/kubecost/2.5.0/defaults/cm.yaml @@ -0,0 +1,170 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubecost-2.5.0-d2iq-defaults + namespace: ${releaseNamespace} +data: + values.yaml: | + --- + global: + prometheus: + enabled: true + + grafana: + enabled: false # Cannot use grafana when federatedETL.agentOnly is true. + + forecasting: + # Enable this to use kubecost's cost forecosting model + enabled: false + upgrade: + toV2: false # TODO(takirala): Handle upgrades. + + federatedETL: + federatedCluster: true + agentOnly: true # disables UI + + ingress: + enabled: false + + kubecostModel: + federatedStorageConfigSecret: "federated-store" # Secret should have a key named "federated-store.yaml" with the federated storage credentials + + kubecostAggregator: + deployMethod: disabled + + priority: + enabled: true + name: dkp-high-priority + + prometheus: + kubeStateMetrics: + enabled: false + kube-state-metrics: + disabled: true + + extraScrapeConfigs: | + - job_name: kubecost + honor_labels: true + scrape_interval: 1m + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + dns_sd_configs: + - names: + - {{ .Release.Name }}-cost-analyzer + type: 'A' + port: 9003 + - job_name: kubecost-networking + kubernetes_sd_configs: + - role: pod + relabel_configs: + # Scrape only the the targets matching the following metadata + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: {{ .Release.Name }}-network-costs + + server: + priorityClassName: dkp-high-priority + retention: 14d + fullnameOverride: "kubecost-prometheus-server" + image: + repository: quay.io/prometheus/prometheus + tag: v2.55.1 + # If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID + # to use as unique cluster ID in kubecost cost-analyzer deployment. + # This overrides the cluster_id set in prometheus.server.global.external_labels. + # NOTE: This does not affect the external_labels set in prometheus config. + clusterIDConfigmap: kubecost-cluster-info-configmap + extraFlags: + - web.enable-admin-api + - web.enable-lifecycle + - storage.tsdb.wal-compression + resources: + limits: + cpu: 1000m + memory: 2500Mi + requests: + cpu: 300m + memory: 1500Mi + global: + scrape_interval: 1m + scrape_timeout: 10s + evaluation_interval: 1m + external_labels: + cluster_id: $CLUSTER_ID + persistentVolume: + size: 32Gi + enabled: true + extraArgs: + log.level: info + log.format: json + storage.tsdb.min-block-duration: 2h + storage.tsdb.max-block-duration: 2h + query.max-concurrency: 1 + query.max-samples: 100000000 + enableAdminApi: true + service: + gRPC: + enabled: true + configmapReload: + prometheus: + enabled: true + #image: + #repository: ghcr.io/jimmidyson/configmap-reload + #tag: v0.14.0 + alertmanager: + enabled: true + #image: + #repository: ghcr.io/jimmidyson/configmap-reload + #tag: v0.14.0 + alertmanager: + fullnameOverride: "kubecost-prometheus-alertmanager" + priorityClassName: dkp-high-priority + enabled: true + image: + repository: quay.io/prometheus/alertmanager + tag: v0.27.0 + resources: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 10m + memory: 50Mi + persistentVolume: + enabled: true + pushgateway: + enabled: false + persistentVolume: + enabled: false + serverFiles: + alerts: + groups: + - name: Kubecost + rules: + - alert: kubecostDown + expr: up{job="kubecost"} == 0 + annotations: + message: 'Kubecost metrics endpoint is not being scraped successfully.' + for: 10m + labels: + severity: warning + - alert: kubecostMetricsUnavailable + expr: sum(sum_over_time(node_cpu_hourly_cost[5m])) == 0 + annotations: + message: 'Kubecost metrics are not available in Prometheus.' + for: 10m + labels: + severity: warning + - alert: kubecostRecordingRulesNotEvaluated + expr: avg_over_time(kubecost_cluster_memory_working_set_bytes[5m]) == 0 + annotations: + message: 'Kubecost recording rules are not being successfully evaluated.' + for: 10m + labels: + severity: warning + + kubecostProductConfigs: + # used for display in Kubecost UI + clusterName: "" + clusterProfile: production diff --git a/services/kubecost/0.37.9/defaults/kustomization.yaml b/services/kubecost/2.5.0/defaults/kustomization.yaml similarity index 100% rename from services/kubecost/0.37.9/defaults/kustomization.yaml rename to services/kubecost/2.5.0/defaults/kustomization.yaml diff --git a/services/centralized-kubecost/0.37.8/kustomization.yaml b/services/kubecost/2.5.0/kustomization.yaml similarity index 77% rename from services/centralized-kubecost/0.37.8/kustomization.yaml rename to services/kubecost/2.5.0/kustomization.yaml index 1b10bcdf3c..840f4608e8 100644 --- a/services/centralized-kubecost/0.37.8/kustomization.yaml +++ b/services/kubecost/2.5.0/kustomization.yaml @@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - release.yaml - - post-install-jobs.yaml + - prerequisites.yaml diff --git a/services/kubecost/2.5.0/prerequisites.yaml b/services/kubecost/2.5.0/prerequisites.yaml new file mode 100644 index 0000000000..a963879246 --- /dev/null +++ b/services/kubecost/2.5.0/prerequisites.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: kubecost-prerequisites + namespace: ${releaseNamespace} +spec: + force: true + prune: true + wait: true + interval: 6h + retryInterval: 1m + path: ./services/kubecost/2.5.0/prerequisites + sourceRef: + kind: GitRepository + name: management + namespace: kommander-flux + timeout: 1m + postBuild: + # The var values which are specified in-line with substitute take precedence over the ones in substituteFrom. + substitute: + releaseNamespace: ${releaseNamespace} + substituteFrom: + - kind: ConfigMap + name: substitution-vars diff --git a/services/kubecost/2.5.0/prerequisites/prerequisites.yaml b/services/kubecost/2.5.0/prerequisites/prerequisites.yaml new file mode 100644 index 0000000000..95876336e6 --- /dev/null +++ b/services/kubecost/2.5.0/prerequisites/prerequisites.yaml @@ -0,0 +1,53 @@ +# Copy grafana-datasource cm after it has been created in the release. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubecost-prerequisites + namespace: ${releaseNamespace} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubecost-prerequisites +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "create", "patch"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubecost-prerequisites +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kubecost-prerequisites +subjects: + - kind: ServiceAccount + name: kubecost-prerequisites + namespace: ${releaseNamespace} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: create-kubecost-cluster-info-configmap + namespace: ${releaseNamespace} +spec: + template: + metadata: + name: create-kubecost-cluster-info-configmap + spec: + serviceAccountName: kubecost-prerequisites + restartPolicy: OnFailure + priorityClassName: dkp-high-priority + containers: + - name: kubectl + image: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" + command: + - sh + - -c + - | + kubectl create configmap kubecost-cluster-info-configmap -n ${releaseNamespace} -oyaml --dry-run=client --save-config --from-literal=CLUSTER_ID=$(kubectl get namespace kube-system -o jsonpath="{.metadata.uid}") | kubectl apply -f - diff --git a/services/kubecost/2.5.0/release.yaml b/services/kubecost/2.5.0/release.yaml new file mode 100644 index 0000000000..3743e76aef --- /dev/null +++ b/services/kubecost/2.5.0/release.yaml @@ -0,0 +1,23 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: kubecost-release + namespace: ${releaseNamespace} +spec: + force: true + prune: true + wait: true + interval: 6h + retryInterval: 1m + dependsOn: + - name: kubecost-prerequisites + namespace: ${releaseNamespace} + path: ./services/kubecost/2.5.0/release + sourceRef: + kind: GitRepository + name: management + namespace: kommander-flux + timeout: 1m + postBuild: + substitute: + releaseNamespace: ${releaseNamespace} diff --git a/services/kubecost/2.5.0/release/release.yaml b/services/kubecost/2.5.0/release/release.yaml new file mode 100644 index 0000000000..13d1902c3f --- /dev/null +++ b/services/kubecost/2.5.0/release/release.yaml @@ -0,0 +1,31 @@ +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: kubecost + namespace: ${releaseNamespace} +spec: + chart: + spec: + chart: cost-analyzer + sourceRef: + kind: HelmRepository + name: kubecost + namespace: kommander-flux + version: 2.5.0 + interval: 15s + install: + crds: CreateReplace + remediation: + retries: 30 + createNamespace: true + upgrade: + crds: CreateReplace + remediation: + retries: 30 + driftDetection: + mode: enabled + releaseName: kubecost + valuesFrom: + - kind: ConfigMap + name: kubecost-2.5.0-d2iq-defaults + targetNamespace: ${releaseNamespace} diff --git a/services/kubecost/metadata.yaml b/services/kubecost/metadata.yaml index 3e0349d54a..a1b200b859 100644 --- a/services/kubecost/metadata.yaml +++ b/services/kubecost/metadata.yaml @@ -2,7 +2,7 @@ displayName: Kubecost description: Provides real-time cost visibility and insights for teams using Kubernetes, helping organizations continuously reduce cloud costs. category: - monitoring -type: partner +type: platform scope: - workspace licensing: