fastmachinelearning · kondratyevd · Feb 13, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -1,3 +1,4 @@
 *.json linguist-detectable
 *.yml linguist-detectable
-*.yaml linguist-detectable
+*.yaml linguist-detectable
+*.tpl linguist-language=Go
diff --git a/.github/workflows/ci-github-cms.yaml b/.github/workflows/ci-github-cms.yaml
@@ -51,6 +51,9 @@ jobs:
 
       - name: Deploy Helm chart
         run: |
+          helm repo add grafana https://grafana.github.io/helm-charts
+          helm repo update
+          helm dependency build ./helm/supersonic
           helm upgrade --install supersonic ./helm/supersonic \
             --values values/values-cms-ci.yaml -n cms
 
@@ -64,12 +67,12 @@ jobs:
 
       - name: Prometheus ready
         run: |
-          kubectl wait --for condition=Ready pod -l app.kubernetes.io/component=prometheus --timeout 120s -n cms
-          kubectl get svc,pod -l app.kubernetes.io/component=prometheus -n cms
+          kubectl wait --for condition=Ready pod -l app.kubernetes.io/name=prometheus --timeout 120s -n cms
+          kubectl get svc,pod -l app.kubernetes.io/name=prometheus -n cms
 
       - name: Grafana ready
         run: |
-          kubectl wait --for condition=Ready pod -l app.kubernetes.io/component=grafana --timeout 120s -n cms
+          kubectl wait --for condition=Ready pod -l app.kubernetes.io/name=grafana --timeout 120s -n cms
 
       - name: Triton server ready
         run: |  

diff --git a/.github/workflows/ci-local.sh b/.github/workflows/ci-local.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+echo "Starting deployment process..."
+
+# 1. Create a Kubernetes cluster with Kind
+echo "Creating Kind cluster..."
+kind create cluster --name gh-k8s-cluster
+
+# 2. (Assuming Helm is installed and at the proper version)
+
+# 3. Create CMS namespace
+echo "Creating CMS namespace..."
+kubectl create namespace cms
+
+# 4. Install Prometheus Operator CRDs
+echo "Installing Prometheus Operator CRDs..."
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo update
+kubectl create namespace monitoring
+helm install prometheus-operator prometheus-community/kube-prometheus-stack \
+  --namespace monitoring \
+  --set prometheusOperator.createCustomResource=false \
+  --set defaultRules.create=false \
+  --set alertmanager.enabled=false \
+  --set prometheus.enabled=false \
+  --set grafana.enabled=false
+
+# 5. Install KEDA Autoscaler
+echo "Installing KEDA Autoscaler..."
+helm repo add kedacore https://kedacore.github.io/charts
+helm repo update
+kubectl create namespace keda
+helm install keda kedacore/keda --namespace keda
+
+# 6. Mount CVMFS
+echo "Mounting CVMFS..."
+kubectl create namespace cvmfs-csi
+helm install -n cvmfs-csi cvmfs-csi oci://registry.cern.ch/kubernetes/charts/cvmfs-csi \
+  --values ci/values-cvmfs-csi.yaml
+kubectl apply -f ci/cvmfs-storageclass.yaml -n cvmfs-csi
+
+# 7. Deploy the Helm chart for supersonic
+echo "Deploying Helm chart for supersonic..."
+helm repo add grafana https://grafana.github.io/helm-charts
+helm repo update
+helm dependency build ./helm/supersonic
+helm upgrade --install supersonic ./helm/supersonic --values values/values-cms-ci.yaml -n cms
+
+# 8. Wait for components to become ready
+
+echo "Waiting for CVMFS pods to be ready..."
+kubectl wait --for=condition=Ready pod --all -n cvmfs-csi --timeout 120s
+
+echo "Waiting for Envoy proxy pods to be ready..."
+kubectl wait --for=condition=Ready pod -l app.kubernetes.io/component=envoy --timeout 120s -n cms
+
+echo "Waiting for Prometheus pods to be ready..."
+kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus --timeout 120s -n cms
+kubectl get svc,pod -l app.kubernetes.io/name=prometheus -n cms
+
+echo "Waiting for Grafana pods to be ready..."
+kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana --timeout 120s -n cms
+
+echo "Waiting for Triton server pods to be ready..."
+kubectl wait --for=condition=Ready pod -l app.kubernetes.io/component=triton --timeout 300s -n cms
+
+echo "Waiting for KEDA Autoscaler to be ready..."
+kubectl wait --for=condition=AbleToScale hpa -l app.kubernetes.io/component=keda --timeout 120s -n cms
+kubectl wait --for=condition=Ready so -l app.kubernetes.io/component=keda --timeout 120s -n cms
+
+# 9. Validate the Deployment
+echo "Validating Deployment in 'cms' namespace..."
+kubectl get all -n cms
+
+# 10. Run Perf Analyzer Job
+echo "Running Perf Analyzer Job..."
+kubectl apply -f ci/perf-analyzer-job.yaml
+kubectl wait --for=condition=complete job/perf-analyzer-job -n cms --timeout=180s || {
+  echo "Perf-analyzer job did not complete in time or failed."
+  exit 1
+}
+
+# Retrieve and print the logs from the Perf Analyzer pod
+POD_NAME=$(kubectl get pods -n cms -l job-name=perf-analyzer-job -o jsonpath="{.items[0].metadata.name}")
+echo "========== Perf Analyzer Logs =========="
+kubectl logs -n cms "$POD_NAME"
+echo "========================================"
+
+# 11. Cleanup the Kind cluster
+echo "Cleaning up: Deleting Kind cluster..."
+kind delete cluster --name gh-k8s-cluster
+
+echo "Deployment process completed successfully!"
diff --git a/.github/workflows/helm-lint.yaml b/.github/workflows/helm-lint.yaml
@@ -55,6 +55,10 @@ jobs:
 
       - name: Lint values.yaml files in values/ directory
         run: |
+          helm repo add grafana https://grafana.github.io/helm-charts
+          helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+          helm repo update
+          helm dependency build ./helm/supersonic
           CHART_PATH="helm/supersonic/"
           VALUES_DIR="values/"
 

diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 # Sphinx Documentation
-docs/_build
+docs/_build
+
+*.tgz
diff --git a/README.md b/README.md
@@ -26,6 +26,8 @@ The main components of SuperSONIC are:
 
 ```
 helm repo add fastml https://fastmachinelearning.org/SuperSONIC
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo add grafana https://grafana.github.io/helm-charts
 helm repo update
 helm install <release-name> fastml/supersonic --values <your-values.yaml> -n <namespace>
 ```

diff --git a/docs/.values-table.md b/docs/.values-table.md
@@ -3,6 +3,8 @@
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | nameOverride | string | `""` | Unique identifier of SuperSONIC instance (equal to release name by default) |
+| serverLoadMetric | string | `""` | A metric used by both KEDA autoscaler and Envoy's prometheus-based rate limiter. # Default metric (inference queue latency) is defined in templates/_helpers.tpl |
+| serverLoadThreshold | int | `100` | Threshold for the metric |
 | triton.replicas | int | `1` | Number of Triton server instances (if autoscaling is disabled) |
 | triton.image | string | `"nvcr.io/nvidia/tritonserver:24.12-py3-min"` | Docker image for the Triton server |
 | triton.command | list | `["/bin/sh","-c"]` | Command and arguments to run in Triton container |
@@ -22,6 +24,7 @@
 | envoy.resources | object | `{"limits":{"cpu":2,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU |
 | envoy.service.type | string | `"ClusterIP"` | This is the client-facing endpoint. In order to be able to connect to it, either enable ingress, or use type: LoadBalancer. |
 | envoy.service.ports | list | `[{"name":"grpc","port":8001,"targetPort":8001},{"name":"admin","port":9901,"targetPort":9901}]` | Envoy Service ports |
+| envoy.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for Envoy |
 | envoy.grpc_route_timeout | string | `"0s"` | Timeout for gRPC route in Envoy; disabled by default (0s), preventing Envoy from closing connections too early. |
 | envoy.rate_limiter.listener_level | object | `{"enabled":false,"fill_interval":"12s","max_tokens":5,"tokens_per_fill":1}` | This rate limiter explicitly controls the number of client connections to the Envoy Proxy. |
 | envoy.rate_limiter.listener_level.enabled | bool | `false` | Enable rate limiter |
@@ -47,22 +50,25 @@
 | autoscaler.scaleDown.window | int | `600` |  |
 | autoscaler.scaleDown.period | int | `120` |  |
 | autoscaler.scaleDown.stepsize | int | `1` |  |
-| prometheus | object | `{"external":true,"ingress":{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""},"port":443,"scheme":"https","serverLoadMetric":"","serverLoadThreshold":100,"url":""}` | Connection to a Prometheus server is required for KEDA autoscaler and Envoy's prometheus-based rate limiter |
-| prometheus.external | bool | `true` | Whether to use external Prometheus instance (true) or deploy internal one (false) |
-| prometheus.url | string | `""` | External Prometheus server url and port number (find in documentation of a given cluster or ask admins) Only used when external=true |
-| prometheus.scheme | string | `"https"` | Specify whether external Prometheus endpoint is exposed as http or https Only used when external=true |
-| prometheus.serverLoadMetric | string | `""` | A metric used by both KEDA autoscaler and Envoy's prometheus-based rate limiter. # Default metric (inference queue latency) is defined in templates/_helpers.tpl |
-| prometheus.serverLoadThreshold | int | `100` | Threshold for the metric |
-| prometheus.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for internal Prometheus web UI (only used when external=false) |
-| ingress.enabled | bool | `false` |  |
-| ingress.hostName | string | `""` |  |
-| ingress.ingressClassName | string | `""` |  |
-| ingress.annotations | object | `{}` |  |
 | nodeSelector | object | `{}` | Node selector for all pods (Triton and Envoy) |
 | tolerations | list | `[]` | Tolerations for all pods (Triton and Envoy) |
-| grafana.enabled | bool | `false` | Enable or disable Grafana deployment |
-| grafana.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":"haproxy"}` | Ingress configuration for Grafana |
-| grafana.ingress.enabled | bool | `false` | Enable or disable ingress for Grafana |
-| grafana.ingress.hostName | string | `""` | Hostname for Grafana ingress |
-| grafana.ingress.ingressClassName | string | `"haproxy"` | Ingress class name (e.g. nginx, haproxy) |
-| grafana.ingress.annotations | object | `{}` | Additional annotations for Grafana ingress |
+| prometheus | object | `{"alertmanager":{"enabled":false},"configmapReload":{"prometheus":{"enabled":false}},"enabled":false,"external":{"enabled":false,"port":443,"scheme":"https","url":""},"kube-state-metrics":{"enabled":false},"prometheus-node-exporter":{"enabled":false},"prometheus-pushgateway":{"enabled":false},"pushgateway":{"enabled":false},"rbac":{"create":false},"server":{"configMapOverrideName":"prometheus-config","global":{"evaluation_interval":"5s","scrape_interval":"5s"},"ingress":{"annotations":{},"enabled":false,"hosts":[],"ingressClassName":"","tls":[{"hosts":[]}]},"persistentVolume":{"enabled":false},"releaseNamespace":true,"resources":{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":"500m","memory":"512Mi"}},"retention":"15d","service":{"enabled":true,"servicePort":9090},"useExistingClusterRoleName":"supersonic-prometheus-role"},"serviceAccounts":{"server":{"create":false,"name":"supersonic-prometheus-sa"}}}` | Connection to a Prometheus server is required for KEDA autoscaler and Envoy's prometheus-based rate limiter |
+| prometheus.external.enabled | bool | `false` | Enable external Prometheus instance |
+| prometheus.external.url | string | `""` | External Prometheus server url |
+| prometheus.external.port | int | `443` | External Prometheus server port number |
+| prometheus.external.scheme | string | `"https"` | Specify whether external Prometheus endpoint is exposed as http or https |
+| prometheus.enabled | bool | `false` | Enable or disable Prometheus subchart deployment |
+| prometheus.server | object | `{"configMapOverrideName":"prometheus-config","global":{"evaluation_interval":"5s","scrape_interval":"5s"},"ingress":{"annotations":{},"enabled":false,"hosts":[],"ingressClassName":"","tls":[{"hosts":[]}]},"persistentVolume":{"enabled":false},"releaseNamespace":true,"resources":{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":"500m","memory":"512Mi"}},"retention":"15d","service":{"enabled":true,"servicePort":9090},"useExistingClusterRoleName":"supersonic-prometheus-role"}` | Prometheus Helm chart configuration (https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus) |
+| grafana.enabled | bool | `false` |  |
+| grafana.adminUser | string | `"admin"` |  |
+| grafana.adminPassword | string | `"admin"` |  |
+| grafana.persistence.enabled | bool | `false` |  |
+| grafana.rbac.create | bool | `false` |  |
+| grafana.serviceAccount.create | bool | `false` |  |
+| grafana.datasources | object | `{"datasources.yaml":{"apiVersion":1,"datasources":[{"access":"proxy","isDefault":true,"jsonData":{"timeInterval":"5s","tlsSkipVerify":true},"name":"prometheus","type":"prometheus","url":"http://supersonic-prometheus-server:9090"}]}}` | Grafana datasources configuration |
+| grafana.dashboardProviders | object | `{"dashboardproviders.yaml":{"apiVersion":1,"providers":[{"disableDeletion":false,"editable":true,"folder":"","name":"default","options":{"path":"/var/lib/grafana/dashboards/default"},"orgId":1,"type":"file"}]}}` | Grafana dashboard providers configuration |
+| grafana.dashboardsConfigMaps | object | `{"default":"supersonic-grafana-default-dashboard"}` | Grafana dashboard ConfigMaps |
+| grafana."grafana.ini" | object | `{"auth":{"disable_login_form":true},"auth.anonymous":{"enabled":true,"org_role":"Admin"},"dashboards":{"default_home_dashboard_path":"/var/lib/grafana/dashboards/default/default.json"}}` | Grafana.ini configuration |
+| grafana.resources | object | `{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}}` | Resource limits and requests for Grafana |
+| grafana.service | object | `{"port":80,"targetPort":3000,"type":"ClusterIP"}` | Service configuration |
+| grafana.ingress | object | `{"annotations":{},"enabled":false,"hosts":[],"ingressClassName":"","path":"/","pathType":"ImplementationSpecific","tls":[]}` | Ingress configuration |