Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 add benchmark pipeline #1651

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
295 changes: 295 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
name: Benchmark Test

on:
pull_request:
branches:
- main

jobs:
run-benchmark:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install Go
uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Install dependencies
run: |
go mod download
go mod tidy

# - name: Debug via SSH
# uses: mxschmitt/action-tmate@v3

- name: Run benchmark test
# working-directory: test/e2e
run: |
mkdir -p /tmp/artifacts/
ARTIFACT_PATH=/tmp/artifacts make test-benchmark

- name: Convert Benchmark Output to Prometheus Metrics
run: |
mkdir -p /tmp/artifacts/prometheus/
echo "RUN_ID=${{ github.run_id }}"
export RUN_ID=${{ github.run_id }}
cat << 'EOF' > benchmark_to_prometheus.py
import sys
import re
import os

def parse_benchmark_output(benchmark_output):
metrics = []
round = 0
value = os.getenv('RUN_ID') #get the github action run id so that those metrics cannot be overwritten
for line in benchmark_output.split("\n"):
match = re.match(r"Benchmark([\w\d]+)-\d+\s+\d+\s+([\d]+)\s+ns/op\s+([\d]+)\s+B/op\s+([\d]+)\s+allocs/op", line)
if match:
benchmark_name = match.group(1).lower()
time_ns = match.group(2)
memory_bytes = match.group(3)
allocs = match.group(4)

metrics.append(f"benchmark_{benchmark_name}_ns {{run_id=\"{value}\", round=\"{round}\"}} {time_ns}")
metrics.append(f"benchmark_{benchmark_name}_allocs {{run_id=\"{value}\", round=\"{round}\"}} {allocs}")
metrics.append(f"benchmark_{benchmark_name}_mem_bytes {{run_id=\"{value}\", round=\"{round}\"}} {memory_bytes}")
round+=1

return "\n".join(metrics)

if __name__ == "__main__":
benchmark_output = sys.stdin.read()
metrics = parse_benchmark_output(benchmark_output)
print(metrics)
EOF

cat /tmp/artifacts/new.txt | python3 benchmark_to_prometheus.py | tee /tmp/artifacts/prometheus/metrics.txt

# - name: Compare with baseline
# run: |
# go install golang.org/x/perf/cmd/benchstat@latest
# benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output

- name: Upload Benchmark Metrics
uses: actions/upload-artifact@v4
with:
name: benchmark-metrics
path: /tmp/artifacts/prometheus/

run-prometheus:
needs: run-benchmark
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

# ToDo: use GitHub REST API to download artifact across repos
- name: Download Prometheus Snapshot
run: |
echo "Available Artifacts in this run:"
gh run list --repo operator-framework/operator-controller --limit 5
gh run download --repo operator-framework/operator-controller --name prometheus-snapshot --dir .
ls -lh ./
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# #this step is invalid if download the artifacts in a different job
# - name: Download Prometheus Snapshot2
# uses: actions/download-artifact@v4
# with:
# name: prometheus-snapshot
# path: ./
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Download Benchmark Metrics
uses: actions/download-artifact@v4
with:
name: benchmark-metrics
path: ./

- name: Get Host IP
run: |
echo "HOST_IP=$(ip route get 1 | awk '{print $7}')" | tee -a $GITHUB_ENV

# localhost doesn't work, use host IP directly
- name: Set Up Prometheus Config
run: |
echo "HOST_IP is $HOST_IP"
cat << EOF > prometheus.yml
global:
scrape_interval: 5s
scrape_configs:
- job_name: 'benchmark_metrics'
static_configs:
- targets: ['$HOST_IP:9000']
EOF
mkdir -p ${{ github.workspace }}/prometheus-data
sudo chown -R 65534:65534 ${{ github.workspace }}/prometheus-data
sudo chmod -R 777 ${{ github.workspace }}/prometheus-data

- name: Extract and Restore Prometheus Snapshot
run: |
SNAPSHOT_ZIP="${{ github.workspace }}/prometheus-snapshot.zip"
SNAPSHOT_TAR="${{ github.workspace }}/prometheus_snapshot.tar.gz"
SNAPSHOT_DIR="${{ github.workspace }}/prometheus-data/snapshots"

mkdir -p "$SNAPSHOT_DIR"

if [[ -f "$SNAPSHOT_ZIP" ]]; then
echo "📦 Detected ZIP archive: $SNAPSHOT_ZIP"
unzip -o "$SNAPSHOT_ZIP" -d "$SNAPSHOT_DIR"
echo "✅ Successfully extracted ZIP snapshot."
elif [[ -f "$SNAPSHOT_TAR" ]]; then
echo "📦 Detected TAR archive: $SNAPSHOT_TAR"
tar -xzf "$SNAPSHOT_TAR" -C "$SNAPSHOT_DIR"
echo "✅ Successfully extracted TAR snapshot."
else
echo "⚠️ WARNING: No snapshot file found. Skipping extraction."
fi

- name: Run Prometheus
run: |
docker run -d --name prometheus -p 9090:9090 \
--user=root \
-v ${{ github.workspace }}/prometheus.yml:/etc/prometheus/prometheus.yml \
-v ${{ github.workspace }}/prometheus-data:/prometheus \
prom/prometheus --config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/prometheus \
--storage.tsdb.retention.time=1h \
--web.enable-admin-api

- name: Wait for Prometheus to start
run: sleep 10

- name: Check Prometheus is running
run: |
set -e
curl -s http://localhost:9090/-/ready || (docker logs prometheus && exit 1)

- name: Start HTTP Server to Expose Metrics
run: |
cat << 'EOF' > server.py
from http.server import SimpleHTTPRequestHandler, HTTPServer

class MetricsHandler(SimpleHTTPRequestHandler):
def do_GET(self):
if self.path == "/metrics":
self.send_response(200)
self.send_header("Content-type", "text/plain")
self.end_headers()
with open("metrics.txt", "r") as f:
self.wfile.write(f.read().encode())
else:
self.send_response(404)
self.end_headers()

if __name__ == "__main__":
server = HTTPServer(('0.0.0.0', 9000), MetricsHandler)
print("Serving on port 9000...")
server.serve_forever()
EOF

nohup python3 server.py &

- name: Wait for Prometheus to Collect Data
run: sleep 30

- name: Check Prometheus targets page
run: |
http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
if [ "$http_status" -eq 200 ]; then
echo "Prometheus targets page is reachable."
else
echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
exit 1
fi

http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
if [ "$http_status" -eq 200 ]; then
echo "Prometheus targets page is reachable."

# Check for lastError field in the targets API
error=$(curl -s http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets[].lastError')
if [ "$error" != "null" ] && [ -n "$error" ]; then
echo "Error: Prometheus target has an error: $error"
exit 1
else
echo "No errors found in Prometheus targets."
fi

else
echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
exit 1
fi

# - name: Debug via SSH
# uses: mxschmitt/action-tmate@v3

- name: Check Benchmark Metrics Against Threshold
run: |
MAX_TIME_NS=1200000000 # 1.2s
MAX_ALLOCS=4000
MAX_MEM_BYTES=450000

# Query Prometheus Metrics, get the max value
time_ns=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_ns)" | jq -r '.data.result[0].value[1]')
allocs=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_allocs)" | jq -r '.data.result[0].value[1]')
mem_bytes=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_mem_bytes)" | jq -r '.data.result[0].value[1]')

echo "⏳ Benchmark Execution Time: $time_ns ns"
echo "🛠️ Memory Allocations: $allocs"
echo "💾 Memory Usage: $mem_bytes bytes"

# threshold checking
if (( $(echo "$time_ns > $MAX_TIME_NS" | bc -l) )); then
echo "❌ ERROR: Execution time exceeds threshold!"
exit 1
fi

if (( $(echo "$allocs > $MAX_ALLOCS" | bc -l) )); then
echo "❌ ERROR: Too many memory allocations!"
exit 1
fi

if (( $(echo "$mem_bytes > $MAX_MEM_BYTES" | bc -l) )); then
echo "❌ ERROR: Memory usage exceeds threshold!"
exit 1
fi

echo "✅ All benchmarks passed within threshold!"

- name: Trigger Prometheus Snapshot
run: |
set -e
curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot || (docker logs prometheus && exit 1)

- name: Find and Upload Prometheus Snapshot
run: |
SNAPSHOT_PATH=$(ls -td ${{ github.workspace }}/prometheus-data/snapshots/* 2>/dev/null | head -1 || echo "")
if [[ -z "$SNAPSHOT_PATH" ]]; then
echo "❌ No Prometheus snapshot found!"
docker logs prometheus
exit 1
fi

echo "✅ Prometheus snapshot stored in: $SNAPSHOT_PATH"
tar -czf $GITHUB_WORKSPACE/prometheus_snapshot.tar.gz -C "$SNAPSHOT_PATH" .


- name: Stop Prometheus
run: docker stop prometheus

- name: Upload Prometheus Snapshot
uses: actions/upload-artifact@v4
with:
name: prometheus-snapshot
path: prometheus_snapshot.tar.gz

11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ test: manifests generate fmt lint test-unit test-e2e #HELP Run all tests.
e2e: #EXHELP Run the e2e tests.
go test -count=1 -v ./test/e2e/...

.PHONY: benchmark
benchmark: #EXHELP Run the benchmark tests.
export CATALOG_IMG=registry.redhat.io/redhat/redhat-operator-index:v4.18
go test -v -run=^$$ -bench=. -benchmem -count=10 -v ./test/e2e/... | tee /tmp/artifacts/new.txt

E2E_REGISTRY_NAME := docker-registry
E2E_REGISTRY_NAMESPACE := operator-controller-e2e

Expand Down Expand Up @@ -256,6 +261,12 @@ catalogd-pre-upgrade-setup:
catalogd-image-registry: ## Setup in-cluster image registry
./test/tools/imageregistry/registry.sh $(ISSUER_KIND) $(ISSUER_NAME)

.PHONY: test-benchmark
test-benchmark: KIND_CLUSTER_NAME := operator-controller-benchmark
test-benchmark: KUSTOMIZE_BUILD_DIR := config/overlays/e2e
test-benchmark: GO_BUILD_FLAGS := -cover
test-benchmark: run image-registry benchmark kind-clean #HELP Run benchmark test suite on local kind cluster

.PHONY: extension-developer-e2e
extension-developer-e2e: KUSTOMIZE_BUILD_DIR := config/overlays/cert-manager
extension-developer-e2e: KIND_CLUSTER_NAME := operator-controller-ext-dev-e2e #EXHELP Run extension-developer e2e on local kind cluster
Expand Down
16 changes: 16 additions & 0 deletions benchmarks/baseline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
goos: darwin
goarch: arm64
pkg: github.com/operator-framework/operator-controller/test/e2e
cpu: Apple M1 Pro
BenchmarkCreateClusterCatalog-10 1 1352852042 ns/op 404520 B/op 3914 allocs/op
BenchmarkCreateClusterCatalog-10 13 86982353 ns/op 36907 B/op 394 allocs/op
BenchmarkCreateClusterCatalog-10 12 84962496 ns/op 34555 B/op 393 allocs/op
BenchmarkCreateClusterCatalog-10 18 70375363 ns/op 34880 B/op 388 allocs/op
BenchmarkCreateClusterCatalog-10 15 71715708 ns/op 37654 B/op 399 allocs/op
BenchmarkCreateClusterCatalog-10 13 85251170 ns/op 36572 B/op 396 allocs/op
BenchmarkCreateClusterCatalog-10 13 83413260 ns/op 38435 B/op 393 allocs/op
BenchmarkCreateClusterCatalog-10 13 93851487 ns/op 37249 B/op 395 allocs/op
BenchmarkCreateClusterCatalog-10 13 78722212 ns/op 36593 B/op 393 allocs/op
BenchmarkCreateClusterCatalog-10 13 86393522 ns/op 37404 B/op 395 allocs/op
PASS
ok github.com/operator-framework/operator-controller/test/e2e 32.699s
40 changes: 40 additions & 0 deletions test/e2e/benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package e2e

import (
"context"
"os"
"testing"

"k8s.io/apimachinery/pkg/util/rand"
)

func BenchmarkCreateClusterCatalog(b *testing.B) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this benchmark tells us a whole lot because it is mostly measuring:

  • client CPU/memory used to submit a CREATE request to the apiserver
  • client CPU/memory used to submit a DELETE request to the apiserver

But it notably isn't able to account for the resources or time spent by the controller to reconcile the ClusterCatalog, which I think is what we actually want to measure.

But regardless, in my mind the biggest open question in is NOT "what machinery do we use to take measurements?". The real question is "where/how do we store/retrieve historical measurements that we can use as a baseline for comparison?"

Copy link
Author

@jianzhangbjz jianzhangbjz Feb 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The real question is "where/how do we store/retrieve historical measurements that we can use as a baseline for comparison?"

The test results will be stored on the GCS bucket if we run these test cases in Prow CI. And then, we can use Big Query to retrieve historical measurements. Similar to Sippy.
I think it's a little bit complex. How about storing the baseline in benchmarks/baseline.txt? The initial benchmark results as the baseline? Once a new version is released, update it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it notably isn't able to account for the resources or time spent by the controller to reconcile the ClusterCatalog, which I think is what we actually want to measure.

Yes, however, I was trying to cover the "real world" case instead of the unit test. Do we need to benchmark some functions? Similar to the unit test?

Copy link
Member

@joelanford joelanford Feb 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about storing the baseline in benchmarks/baseline.txt?

Maybe? But if, for example, we wanted to build a histogram of response times of the catalogd web server over multiple commits and/or CI runs in order to get a smoothed out baseline, that seems hard if a file is committed to the repo because we'd have to update it often.

Maybe another option would be to make use of github's upload-artifact/download-artifact actions? If an artifact can be uploaded/download and shared between GH actions runs, then maybe we could do something like:

  1. download a snapshotted prometheus DB
  2. run prometheus with the DB, and configure scarping with a short interval to pull metrics from our components into the DB
  3. verify via promql that we haven't broken our thresholds, if we have fail the run
  4. snapshot the updated prometheus DB
  5. upload the snapshot
  6. success!

Copy link
Author

@jianzhangbjz jianzhangbjz Feb 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool! If so, we don't need to use the benchstat, right?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to use the Prometheus.

catalogImageRef := os.Getenv(testCatalogRefEnvVar)
if catalogImageRef == "" {
b.Fatalf("environment variable %s is not set", testCatalogRefEnvVar)
}
ctx := context.Background()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
catalogObj, err := createTestCatalog(ctx, rand.String(6), catalogImageRef)
if err != nil {
b.Logf("failed to create ClusterCatalog: %v", err)
}

if err := deleteTestCatalog(ctx, catalogObj); err != nil {
b.Logf("failed to remove ClusterCatalog: %v", err)
}
}
})
// for i := 0; i < b.N; i++ {
// catalogObj, err := createTestCatalog(ctx, rand.String(8), catalogImageRef)
// if err != nil {
// b.Logf("failed to create ClusterCatalog: %v", err)
// }

// if err := deleteTestCatalog(ctx, catalogObj); err != nil {
// b.Logf("failed to remove ClusterCatalog: %v", err)
// }
// }
}
4 changes: 4 additions & 0 deletions test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ func createTestCatalog(ctx context.Context, name string, imageRef string) (*cata
return catalog, err
}

func deleteTestCatalog(ctx context.Context, catalog *catalogd.ClusterCatalog) error {
return c.Delete(ctx, catalog)
}

// patchTestCatalog will patch the existing clusterCatalog on the test cluster, provided
// the context, catalog name, and the image reference. It returns an error
// if any errors occurred while updating the catalog.
Expand Down