Run performance test on a Kubernetes cluster

Run performance test on a Kubernetes cluster #414

Workflow file for this run

.github/workflows/manual-perf-test.yml at 23930c3

	# Copyright (C) 2024 Intel Corporation
	# SPDX-License-Identifier: Apache-2.0

	name: Run performance test on a Kubernetes cluster

	on:
	workflow_dispatch:
	inputs:
	runner:
	default: "aise-perf"
	description: "Runner label 'aise-perf'"
	required: true
	type: string
	cleanup:
	default: true
	description: "Whether to clean up the pods and the labels"
	required: true
	type: boolean
	node_nums:
	default: "1"
	description: "List of test node numbers, e.g., 1,2,4,8"
	required: true
	type: string
	registry:
	default: ""
	description: "Registry to store images, empty string means docker.io, default is empty"
	required: false
	type: string
	tag:
	default: "latest"
	description: "Tag to apply to images"
	required: true
	type: string
	example_branch:
	default: "main"
	description: "GenAIExamples branch to test manifests"
	required: true
	type: string
	infra_branch:
	default: "main"
	description: "GenAIInfra branch to test manifests"
	required: true
	type: string
	eval_branch:
	default: "main"
	description: "GenAIEval branch to test manifests"
	required: true
	type: string
	mode:
	default: "with_rerank:oob"
	description: "The mode of the test, e.g., with_rerank:tuned"
	required: true
	type: string
	test_cases:
	default: "e2e"
	description: "The test cases of chatqna, e.g., e2e, llmserve, embedserve, rerankserve"
	required: false
	type: string
	# user_queries:
	# default: ''
	# description: "The user query list, e.g., '4, 8, 16, 640', empty input means the default setting."
	# required: false
	# type: string
	#load_config:
	# default: "constant:5"
	# description: "configuration for load test in format load_type:value, constant:5 or poisson:1.0"
	# required: false
	# type: string

	jobs:
	get-build-matrix:
	runs-on: ubuntu-latest
	outputs:
	node_nums: ${{ steps.get-services.outputs.node_nums }}
	steps:
	- name: Get test Services
	id: get-services
	run: \|
	set -x
	node_num_list=($(echo ${{ github.event.inputs.node_nums }} \| tr ',' ' '))
	node_nums=$(printf '%s\n' "${node_num_list[@]}" \| sort \| jq -R '.' \| jq -sc '.')
	echo "node_nums=$node_nums" >> $GITHUB_OUTPUT

	run-benchmark:
	needs: [get-build-matrix]
	runs-on: "${{ inputs.runner }}"
	env:
	conda_env_name: "OPEA_perf"
	strategy:
	matrix:
	node_num: ${{ fromJSON(needs.get-build-matrix.outputs.node_nums) }}
	steps:
	- name: Clean Up Working Directory
	run: \|
	sudo rm -rf ${{github.workspace}}/*
	export PATH=${HOME}/miniforge3/bin/:$PATH
	if conda info --envs \| grep -q "$conda_env_name"; then
	echo "$conda_env_name exist!"
	else
	conda create -n ${conda_env_name} python=3.12 -y
	fi

	- name: Checkout out Validation
	uses: actions/checkout@v4
	with:
	path: Validation

	- name: Checkout out GenAIExamples
	uses: actions/checkout@v4
	with:
	repository: opea-project/GenAIExamples
	ref: ${{ inputs.example_branch }}
	path: GenAIExamples

	- name: Checkout out GenAIEval
	uses: actions/checkout@v4
	with:
	repository: opea-project/GenAIEval
	ref: ${{ inputs.eval_branch }}
	path: GenAIEval

	- name: Checkout out GenAIInfra
	uses: actions/checkout@v4
	with:
	repository: opea-project/GenAIInfra
	ref: ${{ inputs.infra_branch }}
	path: GenAIInfra

	- name: Set up stress tool
	run: \|
	export PATH=${HOME}/miniforge3/bin/:$PATH
	source activate ${conda_env_name}
	if ! command -v yq &> /dev/null; then
	sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq
	sudo chmod +x /usr/bin/yq
	fi
	pip install pandas # install python lib pandas for csv processing
	cp Validation/.github/scripts/process_csv.py GenAIEval/evals/benchmark # copy process_csv.py to GenAIEval folder
	cd GenAIEval
	pip install -r requirements.txt

	# config env values
	LOAD_SHAPE=$(echo "${{ inputs.load_config }}" \| cut -d':' -f1 \| xargs)
	second_part=$(echo "${{ inputs.load_config }}" \| cut -d':' -f2 \| xargs)
	if [ "$LOAD_SHAPE" == "constant" ]; then
	echo "LOAD_SHAPE=$LOAD_SHAPE" >> $GITHUB_ENV
	echo "CONCURRENT_LEVEL=$second_part" >> $GITHUB_ENV
	elif [ "$LOAD_SHAPE" == "poisson" ]; then
	echo "LOAD_SHAPE=$LOAD_SHAPE" >> $GITHUB_ENV
	echo "ARRIVAL_RATE=$second_part" >> $GITHUB_ENV
	else
	echo "Unknown LOAD_SHAPE: $LOAD_SHAPE"
	fi

	- name: K8s Label Nodes
	working-directory: ./Validation
	env:
	NODE_NUM: ${{ matrix.node_num }}
	run: \|
	echo "uncordon=true" >> $GITHUB_ENV
	.github/scripts/perf_test.sh --label

	- name: Prepare benchmark configuration
	id: prepare_benchmark
	working-directory: ./Validation
	env:
	TEST_OUTPUT_DIR: /home/sdp/benchmark_output/node_${{ matrix.node_num }}
	TEST_CASES: ${{ inputs.test_cases }}
	USER_QUERIES: ${{ inputs.user_queries }}
	NODE_NUM: ${{ matrix.node_num }}
	run: \|
	rm -rf $TEST_OUTPUT_DIR
	.github/scripts/perf_test.sh --generate_config ${{ matrix.node_num }}
	echo "uninstall=false" >> $GITHUB_ENV
	echo "randomstr=$(echo $RANDOM)" >> $GITHUB_OUTPUT
	echo "TEST_OUTPUT_DIR=$TEST_OUTPUT_DIR" >> $GITHUB_ENV

	- name: Install Workload
	working-directory: ./Validation
	env:
	IMAGE_REPO: ${{ inputs.registry }}
	IMAGE_TAG: ${{ inputs.tag }}
	HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
	MODE: ${{ inputs.mode }}
	NODE_NUM: ${{ matrix.node_num }}
	run: \|
	export PATH=${HOME}/miniforge3/bin/:$PATH
	source activate ${conda_env_name}
	echo "uninstall=true" >> $GITHUB_ENV
	.github/scripts/perf_test.sh --installChatQnA

	- name: Stress Test
	working-directory: ./GenAIEval
	env:
	TEST_OUTPUT_DIR: /home/sdp/benchmark_output/node_${{ matrix.node_num }}
	TEST_CASES: ${{ inputs.test_cases }}
	NODE_NUM: ${{ matrix.node_num }}
	run: \|
	export PATH=${HOME}/miniforge3/bin/:$PATH
	source activate ${conda_env_name}
	cd evals/benchmark
	python benchmark.py

	${{ github.workspace }}/Validation/.github/scripts/perf_test.sh --process_result_data
	cp $TEST_OUTPUT_DIR/*_result.csv ${{ github.workspace }}
	cp $TEST_OUTPUT_DIR/*_testspec.yaml ${{ github.workspace }}
	tar -cvf ${{ github.workspace }}/node_${{ matrix.node_num }}.tar $TEST_OUTPUT_DIR/_result.csv $TEST_OUTPUT_DIR/_testspec.yaml

	- name: Print Test Result
	run: \|
	for file in "${{ github.workspace }}"/*_result.csv; do
	if [[ -f "$file" ]]; then
	echo "Printing contents of: $file"
	cat "$file"
	echo "-----------------------------------"
	fi
	done
	spec_file=$(find "${{ github.workspace }}" -maxdepth 1 -name '*_testspec.yaml' \| head -n 1)
	echo "Dump test specification: $spec_file"
	cat "$spec_file"

	- uses: actions/[email protected]
	with:
	name: ${{ matrix.node_num }}node_raw_data_tar_${{ steps.prepare_benchmark.outputs.randomstr }}
	path: node_${{ matrix.node_num }}.tar
	overwrite: true

	- name: Uninstall Workload
	if: always()
	working-directory: ./Validation
	env:
	MODE: ${{ inputs.mode }}
	NODE_NUM: ${{ matrix.node_num }}
	run: \|
	if [[ "$uninstall" == true && "${{ inputs.cleanup }}" == true ]]; then
	.github/scripts/perf_test.sh --uninstallChatQnA
	sleep 200s
	fi

	- name: K8s Unlabel Nodes
	if: always()
	working-directory: ./Validation
	run: \|
	if [[ "$uncordon" == true && "${{ inputs.cleanup }}" == true ]]; then
	.github/scripts/perf_test.sh --unlabel
	sleep 10s
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Run performance test on a Kubernetes cluster #414

Workflow file

Run performance test on a Kubernetes cluster #414

Jobs

Run details

Workflow file for this run