Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch #29
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch | |
on: | |
schedule: | |
- cron: '0 3 * * *' # run at 3 AM UTC | |
# This can be used to allow manually triggering nightlies from the web interface | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
do-the-job: | |
name: Build and Run slow tests | |
runs-on: | |
group: gcp-ct5lp-hightpu-8t | |
container: | |
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm | |
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache | |
env: | |
PJRT_DEVICE: TPU | |
HF_TOKEN: ${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} | |
HF_HUB_CACHE: /mnt/hf_cache/cache_huggingface | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Build and install Jetstream Pytorch TGI | |
run: | | |
make jetstream_requirements tgi_server test_installs | |
find text-generation-inference/ -name "text_generation_server-*whl" -exec python -m pip install {} \; | |
- name: Run TGI Jetstream Pytorch - Llama | |
run: | | |
JETSTREAM_PT=1 python -m \ | |
pytest -sv text-generation-inference/tests --runslow -k "jetstream and slow and Llama" | |
- name: Run TGI Jetstream Pytorch - Gemma | |
run: | | |
JETSTREAM_PT=1 python -m \ | |
pytest -sv text-generation-inference/tests --runslow -k "jetstream and slow and gemma" | |
- name: Run TGI Jetstream Pytorch - Mixtral greedy | |
run: | | |
JETSTREAM_PT=1 python -m \ | |
pytest -sv text-generation-inference/tests --runslow -k "jetstream and slow and Mixtral and greedy" |