From 9824c6a73f25af3e2115c8463b9e6c24d2d105c1 Mon Sep 17 00:00:00 2001
From: "David H. Irving" <david.irving@noirlab.edu>
Date: Mon, 13 Jan 2025 12:49:45 -0700
Subject: [PATCH] Set up autoscaling for Butler server

Enable pod auto-scaling for Butler server to allow for more concurrent requests.
---
 applications/butler/README.md           |  6 +++---
 applications/butler/templates/hpa.yaml  | 10 +++++++---
 applications/butler/values-idfint.yaml  |  2 ++
 applications/butler/values-idfprod.yaml |  2 ++
 applications/butler/values.yaml         | 19 +++++++++++++++----
 5 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/applications/butler/README.md b/applications/butler/README.md
index 0f3f968ca2..743d3cccc4 100644
--- a/applications/butler/README.md
+++ b/applications/butler/README.md
@@ -11,10 +11,10 @@ Server for Butler data abstraction service
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | affinity | object | `{}` | Affinity rules for the butler deployment pod |
-| autoscaling.enabled | bool | `false` | Enable autoscaling of butler deployment |
-| autoscaling.maxReplicas | int | `100` | Maximum number of butler deployment pods |
+| autoscaling.enabled | bool | `true` | Enable autoscaling of butler deployment |
+| autoscaling.maxReplicas | int | `10` | Maximum number of butler deployment pods  Each replica can have 40 database connections, so we need to make sure the combined connections are under the postgres connection limit. (Which is configurable, but currently set to 400 at the IDF.) |
 | autoscaling.minReplicas | int | `1` | Minimum number of butler deployment pods |
-| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization of butler deployment pods |
+| autoscaling.targetCPUUtilizationPercentage | int | `25` | Target CPU utilization of butler deployment pods  Butler CPU usage is very low in normal operation because most things are I/O bound.  CPU usage can start creeping up if we have many queries running simultaneously (due to serialization overhead and spatial postprocessing.) In this case the thread pool and database connection pool are probably oversubscribed long before we hit 100% cpu usage, so we want to get more replicas up at fairly low CPU usage. |
 | config.additionalS3EndpointUrls | object | No additional URLs | Endpoint URLs for additional S3 services used by the Butler, as a mapping from profile name to URL. |
 | config.dp02ClientServerIsDefault | bool | `false` | True if the 'dp02' Butler repository alias should use client/server Butler.  False if it should use DirectButler. |
 | config.dp02PostgresUri | string | No configuration file for DP02 will be generated. | Postgres connection string pointing to the registry database hosting Data Preview 0.2 data. |
diff --git a/applications/butler/templates/hpa.yaml b/applications/butler/templates/hpa.yaml
index 9eab162305..ddc3b91282 100644
--- a/applications/butler/templates/hpa.yaml
+++ b/applications/butler/templates/hpa.yaml
@@ -1,5 +1,5 @@
 {{- if .Values.autoscaling.enabled }}
-apiVersion: autoscaling/v2beta1
+apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
   name: "butler"
@@ -17,12 +17,16 @@ spec:
     - type: Resource
       resource:
         name: "cpu"
-        targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
     {{- end }}
     {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
     - type: Resource
       resource:
         name: "memory"
-        targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
     {{- end }}
 {{- end }}
diff --git a/applications/butler/values-idfint.yaml b/applications/butler/values-idfint.yaml
index fc3fcb6a8f..89ce224ec3 100644
--- a/applications/butler/values-idfint.yaml
+++ b/applications/butler/values-idfint.yaml
@@ -1,3 +1,5 @@
+autoscaling:
+  minReplicas: 3
 config:
   dp02ClientServerIsDefault: true
   dp02PostgresUri: postgresql://postgres@sqlproxy-butler-int.sqlproxy-cross-project:5432/dp02
diff --git a/applications/butler/values-idfprod.yaml b/applications/butler/values-idfprod.yaml
index 128891095e..908e0fcdc4 100644
--- a/applications/butler/values-idfprod.yaml
+++ b/applications/butler/values-idfprod.yaml
@@ -1,3 +1,5 @@
+autoscaling:
+  minReplicas: 3
 config:
   dp02ClientServerIsDefault: true
   dp02PostgresUri: postgresql://postgres@10.163.0.3/idfdp02
diff --git a/applications/butler/values.yaml b/applications/butler/values.yaml
index 3ea128cf97..bf72f47f5d 100644
--- a/applications/butler/values.yaml
+++ b/applications/butler/values.yaml
@@ -21,16 +21,27 @@ ingress:
 
 autoscaling:
   # -- Enable autoscaling of butler deployment
-  enabled: false
+  enabled: true
 
   # -- Minimum number of butler deployment pods
   minReplicas: 1
 
   # -- Maximum number of butler deployment pods
-  maxReplicas: 100
+  #
+  # Each replica can have 40 database connections, so we need to make sure the
+  # combined connections are under the postgres connection limit. (Which is
+  # configurable, but currently set to 400 at the IDF.)
+  maxReplicas: 10
 
   # -- Target CPU utilization of butler deployment pods
-  targetCPUUtilizationPercentage: 80
+  #
+  # Butler CPU usage is very low in normal operation because most things are
+  # I/O bound.  CPU usage can start creeping up if we have many queries running
+  # simultaneously (due to serialization overhead and spatial postprocessing.)
+  # In this case the thread pool and database connection pool are probably
+  # oversubscribed long before we hit 100% cpu usage, so we want to get more
+  # replicas up at fairly low CPU usage.
+  targetCPUUtilizationPercentage: 25
   # targetMemoryUtilizationPercentage: 80
 
 # -- Annotations for the butler deployment pod
@@ -45,7 +56,7 @@ resources:
     # 40 threads in the thread pool running large queries costing ~35MB each.
     memory: "1.5Gi"
   requests:
-    cpu: "15m"
+    cpu: "1"
     # Butler server uses around 200MB idle at startup, but under dynamic usage
     # Python seems to want to hold onto another couple hundred megabytes of
     # heap.