operator-framework · jianzhangbjz · Jan 27, 2025 · Feb 5, 2025 · Feb 7, 2025 · Feb 10, 2025
@@ -0,0 +1,295 @@
+name: Benchmark Test
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  run-benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+
+      - name: Install dependencies
+        run: |
+          go mod download
+          go mod tidy
+
+      # - name: Debug via SSH
+      #   uses: mxschmitt/action-tmate@v3
+
+      - name: Run benchmark test
+        # working-directory: test/e2e
+        run: |
+          mkdir -p /tmp/artifacts/
+          ARTIFACT_PATH=/tmp/artifacts make test-benchmark
+
+      - name: Convert Benchmark Output to Prometheus Metrics
+        run: |
+          mkdir -p /tmp/artifacts/prometheus/
+          echo "RUN_ID=${{ github.run_id }}"
+          export RUN_ID=${{ github.run_id }}
+          cat << 'EOF' > benchmark_to_prometheus.py
+          import sys
+          import re
+          import os
+
+          def parse_benchmark_output(benchmark_output):
+              metrics = []
+              round = 0
+              value = os.getenv('RUN_ID') #get the github action run id so that those metrics cannot be overwritten
+              for line in benchmark_output.split("\n"):
+                  match = re.match(r"Benchmark([\w\d]+)-\d+\s+\d+\s+([\d]+)\s+ns/op\s+([\d]+)\s+B/op\s+([\d]+)\s+allocs/op", line)
+                  if match:
+                      benchmark_name = match.group(1).lower()
+                      time_ns = match.group(2)
+                      memory_bytes = match.group(3)
+                      allocs = match.group(4)
+
+                      metrics.append(f"benchmark_{benchmark_name}_ns {{run_id=\"{value}\", round=\"{round}\"}} {time_ns}")
+                      metrics.append(f"benchmark_{benchmark_name}_allocs {{run_id=\"{value}\", round=\"{round}\"}} {allocs}")
+                      metrics.append(f"benchmark_{benchmark_name}_mem_bytes {{run_id=\"{value}\", round=\"{round}\"}} {memory_bytes}")
+                      round+=1
+
+              return "\n".join(metrics)
+
+          if __name__ == "__main__":
+              benchmark_output = sys.stdin.read()
+              metrics = parse_benchmark_output(benchmark_output)
+              print(metrics)
+          EOF
+
+          cat /tmp/artifacts/new.txt | python3 benchmark_to_prometheus.py | tee /tmp/artifacts/prometheus/metrics.txt
+
+      # - name: Compare with baseline
+      #   run: |
+      #     go install golang.org/x/perf/cmd/benchstat@latest
+      #     benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output
+
+      - name: Upload Benchmark Metrics     
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-metrics
+          path: /tmp/artifacts/prometheus/
+
+  run-prometheus:
+    needs: run-benchmark
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # ToDo: use GitHub REST API to download artifact across repos
+      - name: Download Prometheus Snapshot
+        run: |
+          echo "Available Artifacts in this run:"
+          gh run list --repo operator-framework/operator-controller --limit 5
+          gh run download --repo operator-framework/operator-controller --name prometheus-snapshot --dir .
+          ls -lh ./
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      # #this step is invalid if download the artifacts in a different job    
+      # - name: Download Prometheus Snapshot2
+      #   uses: actions/download-artifact@v4
+      #   with:
+      #     name: prometheus-snapshot
+      #     path: ./
+      #   env:
+      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Download Benchmark Metrics
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-metrics
+          path: ./
+
+      - name: Get Host IP
+        run: | 
+          echo "HOST_IP=$(ip route get 1 | awk '{print $7}')" | tee -a $GITHUB_ENV
+
+      # localhost doesn't work, use host IP directly
+      - name: Set Up Prometheus Config
+        run: |
+          echo "HOST_IP is $HOST_IP"
+          cat << EOF > prometheus.yml
+          global:
+            scrape_interval: 5s
+          scrape_configs:
+            - job_name: 'benchmark_metrics'
+              static_configs:
+                - targets: ['$HOST_IP:9000']
+          EOF
+          mkdir -p ${{ github.workspace }}/prometheus-data
+          sudo chown -R 65534:65534 ${{ github.workspace }}/prometheus-data
+          sudo chmod -R 777 ${{ github.workspace }}/prometheus-data
+
+      - name: Extract and Restore Prometheus Snapshot
+        run: |
+          SNAPSHOT_ZIP="${{ github.workspace }}/prometheus-snapshot.zip"
+          SNAPSHOT_TAR="${{ github.workspace }}/prometheus_snapshot.tar.gz"
+          SNAPSHOT_DIR="${{ github.workspace }}/prometheus-data/snapshots"
+
+          mkdir -p "$SNAPSHOT_DIR"
+
+          if [[ -f "$SNAPSHOT_ZIP" ]]; then
+            echo "📦 Detected ZIP archive: $SNAPSHOT_ZIP"
+            unzip -o "$SNAPSHOT_ZIP" -d "$SNAPSHOT_DIR"
+            echo "✅ Successfully extracted ZIP snapshot."
+          elif [[ -f "$SNAPSHOT_TAR" ]]; then
+            echo "📦 Detected TAR archive: $SNAPSHOT_TAR"
+            tar -xzf "$SNAPSHOT_TAR" -C "$SNAPSHOT_DIR"
+            echo "✅ Successfully extracted TAR snapshot."
+          else
+            echo "⚠️ WARNING: No snapshot file found. Skipping extraction."
+          fi
+
+      - name: Run Prometheus
+        run: |
+          docker run -d --name prometheus -p 9090:9090 \
+            --user=root \
+            -v ${{ github.workspace }}/prometheus.yml:/etc/prometheus/prometheus.yml \
+            -v ${{ github.workspace }}/prometheus-data:/prometheus \
+            prom/prometheus --config.file=/etc/prometheus/prometheus.yml \
+            --storage.tsdb.path=/prometheus \
+            --storage.tsdb.retention.time=1h \
+            --web.enable-admin-api
+
+      - name: Wait for Prometheus to start
+        run: sleep 10
+
+      - name: Check Prometheus is running
+        run: |
+           set -e
+           curl -s http://localhost:9090/-/ready || (docker logs prometheus && exit 1)
+
+      - name: Start HTTP Server to Expose Metrics
+        run: |
+          cat << 'EOF' > server.py
+          from http.server import SimpleHTTPRequestHandler, HTTPServer
+
+          class MetricsHandler(SimpleHTTPRequestHandler):
+              def do_GET(self):
+                  if self.path == "/metrics":
+                      self.send_response(200)
+                      self.send_header("Content-type", "text/plain")
+                      self.end_headers()
+                      with open("metrics.txt", "r") as f:
+                          self.wfile.write(f.read().encode())
+                  else:
+                      self.send_response(404)
+                      self.end_headers()
+
+          if __name__ == "__main__":
+              server = HTTPServer(('0.0.0.0', 9000), MetricsHandler)
+              print("Serving on port 9000...")
+              server.serve_forever()
+          EOF
+
+          nohup python3 server.py &
+
+      - name: Wait for Prometheus to Collect Data
+        run: sleep 30
+
+      - name: Check Prometheus targets page
+        run: |
+          http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
+          if [ "$http_status" -eq 200 ]; then
+            echo "Prometheus targets page is reachable."
+          else
+            echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
+            exit 1
+          fi
+
+          http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
+          if [ "$http_status" -eq 200 ]; then
+            echo "Prometheus targets page is reachable."
+
+            # Check for lastError field in the targets API
+            error=$(curl -s http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets[].lastError')
+            if [ "$error" != "null" ] && [ -n "$error" ]; then
+              echo "Error: Prometheus target has an error: $error"
+              exit 1
+            else
+              echo "No errors found in Prometheus targets."
+            fi
+
+          else
+            echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
+            exit 1
+          fi
+
+      # - name: Debug via SSH
+      #   uses: mxschmitt/action-tmate@v3
+
+      - name: Check Benchmark Metrics Against Threshold
+        run: |
+          MAX_TIME_NS=1200000000  # 1.2s
+          MAX_ALLOCS=4000
+          MAX_MEM_BYTES=450000
+
+          # Query Prometheus Metrics, get the max value
+          time_ns=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_ns)" | jq -r '.data.result[0].value[1]')
+          allocs=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_allocs)" | jq -r '.data.result[0].value[1]')
+          mem_bytes=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_mem_bytes)" | jq -r '.data.result[0].value[1]')
+
+          echo "⏳ Benchmark Execution Time: $time_ns ns"
+          echo "🛠️ Memory Allocations: $allocs"
+          echo "💾 Memory Usage: $mem_bytes bytes"
+
+          # threshold checking
+          if (( $(echo "$time_ns > $MAX_TIME_NS" | bc -l) )); then
+            echo "❌ ERROR: Execution time exceeds threshold!"
+            exit 1
+          fi
+
+          if (( $(echo "$allocs > $MAX_ALLOCS" | bc -l) )); then
+            echo "❌ ERROR: Too many memory allocations!"
+            exit 1
+          fi
+
+          if (( $(echo "$mem_bytes > $MAX_MEM_BYTES" | bc -l) )); then
+            echo "❌ ERROR: Memory usage exceeds threshold!"
+            exit 1
+          fi
+
+          echo "✅ All benchmarks passed within threshold!"
+
+      - name: Trigger Prometheus Snapshot
+        run: |
+          set -e
+          curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot || (docker logs prometheus && exit 1)
+
+      - name: Find and Upload Prometheus Snapshot
+        run: |
+          SNAPSHOT_PATH=$(ls -td ${{ github.workspace }}/prometheus-data/snapshots/* 2>/dev/null | head -1 || echo "")
+          if [[ -z "$SNAPSHOT_PATH" ]]; then
+            echo "❌ No Prometheus snapshot found!"
+            docker logs prometheus
+            exit 1
+          fi
+
+          echo "✅ Prometheus snapshot stored in: $SNAPSHOT_PATH"
+          tar -czf $GITHUB_WORKSPACE/prometheus_snapshot.tar.gz -C "$SNAPSHOT_PATH" .
+
+
+      - name: Stop Prometheus
+        run: docker stop prometheus
+
+      - name: Upload Prometheus Snapshot
+        uses: actions/upload-artifact@v4
+        with:
+          name: prometheus-snapshot
+          path: prometheus_snapshot.tar.gz
+
@@ -167,6 +167,11 @@ test: manifests generate fmt lint test-unit test-e2e #HELP Run all tests.
 e2e: #EXHELP Run the e2e tests.
 	go test -count=1 -v ./test/e2e/...
 
+.PHONY: benchmark
+benchmark: #EXHELP Run the benchmark tests.
+	export CATALOG_IMG=registry.redhat.io/redhat/redhat-operator-index:v4.18
+	go test -v -run=^$$ -bench=. -benchmem -count=10 -v ./test/e2e/... | tee /tmp/artifacts/new.txt
+
 E2E_REGISTRY_NAME := docker-registry
 E2E_REGISTRY_NAMESPACE := operator-controller-e2e
 
@@ -256,6 +261,12 @@ catalogd-pre-upgrade-setup:
 catalogd-image-registry: ## Setup in-cluster image registry
 	./test/tools/imageregistry/registry.sh $(ISSUER_KIND) $(ISSUER_NAME)
 
+.PHONY: test-benchmark
+test-benchmark: KIND_CLUSTER_NAME := operator-controller-benchmark
+test-benchmark: KUSTOMIZE_BUILD_DIR := config/overlays/e2e
+test-benchmark: GO_BUILD_FLAGS := -cover
+test-benchmark: run image-registry benchmark kind-clean #HELP Run benchmark test suite on local kind cluster
+
 .PHONY: extension-developer-e2e
 extension-developer-e2e: KUSTOMIZE_BUILD_DIR := config/overlays/cert-manager
 extension-developer-e2e: KIND_CLUSTER_NAME := operator-controller-ext-dev-e2e  #EXHELP Run extension-developer e2e on local kind cluster

@@ -0,0 +1,16 @@
+goos: darwin
+goarch: arm64
+pkg: github.com/operator-framework/operator-controller/test/e2e
+cpu: Apple M1 Pro
+BenchmarkCreateClusterCatalog-10    	       1	1352852042 ns/op	  404520 B/op	    3914 allocs/op
+BenchmarkCreateClusterCatalog-10    	      13	  86982353 ns/op	   36907 B/op	     394 allocs/op
+BenchmarkCreateClusterCatalog-10    	      12	  84962496 ns/op	   34555 B/op	     393 allocs/op
+BenchmarkCreateClusterCatalog-10    	      18	  70375363 ns/op	   34880 B/op	     388 allocs/op
+BenchmarkCreateClusterCatalog-10    	      15	  71715708 ns/op	   37654 B/op	     399 allocs/op
+BenchmarkCreateClusterCatalog-10    	      13	  85251170 ns/op	   36572 B/op	     396 allocs/op
+BenchmarkCreateClusterCatalog-10    	      13	  83413260 ns/op	   38435 B/op	     393 allocs/op
+BenchmarkCreateClusterCatalog-10    	      13	  93851487 ns/op	   37249 B/op	     395 allocs/op
+BenchmarkCreateClusterCatalog-10    	      13	  78722212 ns/op	   36593 B/op	     393 allocs/op
+BenchmarkCreateClusterCatalog-10    	      13	  86393522 ns/op	   37404 B/op	     395 allocs/op
+PASS
+ok  	github.com/operator-framework/operator-controller/test/e2e	32.699s
@@ -0,0 +1,40 @@
+package e2e
+
+import (
+	"context"
+	"os"
+	"testing"
+
+	"k8s.io/apimachinery/pkg/util/rand"
+)
+
+func BenchmarkCreateClusterCatalog(b *testing.B) {
+	catalogImageRef := os.Getenv(testCatalogRefEnvVar)
+	if catalogImageRef == "" {
+		b.Fatalf("environment variable %s is not set", testCatalogRefEnvVar)
+	}
+	ctx := context.Background()
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			catalogObj, err := createTestCatalog(ctx, rand.String(6), catalogImageRef)
+			if err != nil {
+				b.Logf("failed to create ClusterCatalog: %v", err)
+			}
+
+			if err := deleteTestCatalog(ctx, catalogObj); err != nil {
+				b.Logf("failed to remove ClusterCatalog: %v", err)
+			}
+		}
+	})
+	// for i := 0; i < b.N; i++ {
+	// 	catalogObj, err := createTestCatalog(ctx, rand.String(8), catalogImageRef)
+	// 	if err != nil {
+	// 		b.Logf("failed to create ClusterCatalog: %v", err)
+	// 	}
+
+	// 	if err := deleteTestCatalog(ctx, catalogObj); err != nil {
+	// 		b.Logf("failed to remove ClusterCatalog: %v", err)
+	// 	}
+	// }
+}
@@ -65,6 +65,10 @@ func createTestCatalog(ctx context.Context, name string, imageRef string) (*cata
 	return catalog, err
 }
 
+func deleteTestCatalog(ctx context.Context, catalog *catalogd.ClusterCatalog) error {
+	return c.Delete(ctx, catalog)
+}
+
 // patchTestCatalog will patch the existing clusterCatalog on the test cluster, provided
 // the context, catalog name, and the image reference. It returns an error
 // if any errors occurred while updating the catalog.