Skip to content

Run accuracy test on Gaudi #177

Run accuracy test on Gaudi

Run accuracy test on Gaudi #177

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Run accuracy test on Gaudi
on:
workflow_dispatch:
inputs:
examples:
default: "ChatQnA"
description: 'List of examples to test [AudioQnA,ChatQnA,CodeGen,FaqGen]'
required: true
type: string
datasets:
default: "en"
description: "List of services to test [en, zh]"
required: true
type: string
# for debug test
# pull_request:
# branches: [main]
# types: [opened, reopened, ready_for_review, synchronize]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-on-push
cancel-in-progress: true
jobs:
get-test-matrix:
runs-on: ubuntu-latest
outputs:
datasets: ${{ steps.get-matrix.outputs.datasets }}
examples: ${{ steps.get-matrix.outputs.examples }}
steps:
- name: Create Matrix
id: get-matrix
run: |
datasets=($(echo ${{ inputs.datasets }} | tr ',' ' '))
datasets_json=$(printf '%s\n' "${datasets[@]}" | jq -R '.' | jq -sc '.')
echo "datasets=$datasets_json" >> $GITHUB_OUTPUT
examples=($(echo ${{ inputs.examples }} | tr ',' ' '))
examples_json=$(printf '%s\n' "${examples[@]}" | jq -R '.' | jq -sc '.')
echo "examples=$examples_json" >> $GITHUB_OUTPUT
setup-acc:
runs-on: aise-acc
needs: [get-test-matrix]
strategy:
matrix:
dataset: ${{ fromJson(needs.get-test-matrix.outputs.datasets) }}
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
exclude:
- example: AudioQnA
dataset: zh
- example: CodeGen
dataset: zh
- example: FaqGen
dataset: zh
fail-fast: false
steps:
- name: Clean up Working Directory
run: |
sudo rm -rf ${{github.workspace}}/* || true
docker system prune -f
docker rmi $(docker images --filter reference="100.83.111.229:5000/opea/*:latest" -q) || true
- name: Checkout out Validation
uses: actions/checkout@v4
with:
path: Validation
- name: Checkout out GenAIEval
uses: actions/checkout@v4
with:
fetch-depth: 0
repository: opea-project/GenAIEval
path: GenAIEval
- name: Checkout out GenAIExamples
uses: actions/checkout@v4
with:
repository: opea-project/GenAIExamples
path: GenAIExamples
- name: Set up environment
env:
example: ${{ matrix.example }}
run: |
set -x
conda_env_name="OPEA_acc"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist! Recreating..."
conda env remove --name "$conda_env_name"
conda create -n ${conda_env_name} python=3.12 -y
echo "Created!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
conda_version=$(conda --version 2>&1)
version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
if [[ $version -ge 6 ]]; then
source activate ${conda_env_name}
else
source ${HOME}/miniforge3/etc/profile.d/conda.sh
conda activate ${conda_env_name}
fi
echo "Dataset: ${{ matrix.dataset }}"
echo "Example: ${{ matrix.example }}"
cd ${{ github.workspace }}/GenAIEval
pip install -r requirements.txt
if [[ "${example}" == "AudioQnA" ]]; then
cd ${{ github.workspace }}/GenAIExamples/AudioQnA/benchmark/accuracy
pip install -r requirements.txt
elif [[ "${example}" == "CodeGen" ]]; then
pip install -e .
elif [[ "${example}" == "FaqGen" ]]; then
pip install ragas==0.1.19
fi
- name: Evaluation Prepare
shell: bash
if: ${{ matrix.dataset }} == 'en' && ${{ matrix.dataset }} == 'ChatQnA'
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
example: ${{ matrix.example }}
dataset: ${{ matrix.dataset }}
run: |
export PATH=${HOME}/miniforge3/bin/:$PATH
conda_version=$(conda --version 2>&1)
version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
if [[ $version -ge 6 ]]; then
source activate OPEA_acc
else
source ${HOME}/miniforge3/etc/profile.d/conda.sh
conda activate OPEA_acc
fi
[ ! -d "acc-log" ] && mkdir acc-log
cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/
bash ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/acc_test.sh --eval_prepare "${example}" "${dataset}" | tee ${{ github.workspace }}/acc-log/${example}-${dataset}-eval_prepare.txt
- name: Launch Service
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
example: ${{ matrix.example }}
dataset: ${{ matrix.dataset }}
IMAGE_REPO: 100.83.111.229:5000/opea
run: |
export PATH=${HOME}/miniforge3/bin/:$PATH
conda_version=$(conda --version 2>&1)
version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
if [[ $version -ge 6 ]]; then
source activate OPEA_acc
else
source ${HOME}/miniforge3/etc/profile.d/conda.sh
conda activate OPEA_acc
fi
[ ! -d "acc-log" ] && mkdir acc-log
cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/
bash ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/acc_test.sh --launch_service "${example}" "${dataset}" | tee ${{ github.workspace }}/acc-log/${example}-${dataset}-launch_service.txt
- name: Run Evaluation
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
example: ${{ matrix.example }}
dataset: ${{ matrix.dataset }}
port: 9001
run: |
set -x
export PATH=${HOME}/miniforge3/bin/:$PATH
conda_version=$(conda --version 2>&1)
version=$(echo $conda_version | grep -oP 'conda \K[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f2)
if [[ $version -ge 6 ]]; then
source activate OPEA_acc
else
source ${HOME}/miniforge3/etc/profile.d/conda.sh
conda activate OPEA_acc
fi
DPATH=${{ github.workspace }}/GenAIEval/
export PYTHONPATH=$PYTHONPATH:$DPATH
export PATH=$PATH:/bin:/usr/bin
[ ! -d "acc-log" ] && mkdir acc-log
cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/
bash ${{ github.workspace }}/GenAIExamples/${example}/benchmark/accuracy/acc_test.sh --launch_acc "${example}" "${dataset}" | tee ${{ github.workspace }}/acc-log/${example}-${dataset}-acc_test.txt
- name: Clean up container
if: always()
env:
example: ${{ matrix.example }}
run: |
docker system prune -f
cd ${{ github.workspace }}/GenAIExamples/${example}/docker_compose/intel/hpu/gaudi
docker compose stop && docker compose rm -f
docker rmi $(docker images --filter reference="100.83.111.229:5000/opea/*:latest" -q) || true
- name: Publish pipeline artifact
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.example }}-${{ matrix.dataset }}
path: ${{ github.workspace }}/acc-log/*.txt
Process-test-log:
runs-on: aise-acc
needs: [setup-acc, get-test-matrix]
strategy:
matrix:
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
dataset: ${{ fromJson(needs.get-test-matrix.outputs.datasets) }}
exclude:
- example: AudioQnA
dataset: zh
- example: CodeGen
dataset: zh
- example: FaqGen
dataset: zh
fail-fast: false
steps:
- name: Download artifact
uses: actions/download-artifact@v4
with:
name: ${{ matrix.example }}-${{ matrix.dataset }}
path: ${{ github.workspace }}/acc-log/
continue-on-error: false
- name: Use the downloaded artifact
run: |
ls acc-log
cp ${{ github.workspace }}/Validation/.github/scripts/acc_test.sh ${{ github.workspace }}/acc-log/acc_test.sh
bash ${{ github.workspace }}/acc-log/acc_test.sh --process_results "${example}" "${dataset}" | tee -a acc-log/summary.txt
echo "" >> acc-log/summary.txt
- name: Publish pipeline artifact
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/acc-log/summary.txt
name: acc-summary
overwrite: true