Skip to content

Update debug-metadata.yml #28

Update debug-metadata.yml

Update debug-metadata.yml #28

name: Debug DIND
on:
push:
jobs:
debug-dind:
name: Run TGI tests
runs-on: gcp-ct5lp-hightpu-8t-usc1-public-211
container:
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:8f1dcd5b03f993e4da5c20d17c77aff6a5f22d5455f8eb042d2e4b16ac460526
options: ${{ vars.V5_LITEPOD_8_ENV }} --shm-size "16gb" --privileged --ipc host -v /mnt/hf_cache:/mnt/cache/ -e PJRT_DEVICE=TPU
env:
PJRT_DEVICE: TPU
steps:
- name: Install docker
run: |
apt-get update -y
apt-get install -y docker.io
- name: Debug XLA (run container)
run: |
docker run \
--privileged \
--ipc host \
--shm-size=16G \
-v /mnt/hf_cache:/mnt/cache/ \
-e PJRT_DEVICE=TPU \
-e TPU_TOPOLOGY \
-e TPU_WORKER_ID \
-e TPU_SKIP_MDS_QUERY \
-e TPU_TOPOLOGY_WRAP \
-e TPU_CHIPS_PER_HOST_BOUNDS \
-e TPU_ACCELERATOR_TYPE \
-e TPU_TOPOLOGY_ALT \
-e TPU_HOST_BOUNDS \
-e TPU_WORKER_HOSTNAMES \
us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:8f1dcd5b03f993e4da5c20d17c77aff6a5f22d5455f8eb042d2e4b16ac460526 \
bash -c "python -c 'import torch_xla.core.xla_model as xm; assert xm.xla_device().type == \"xla\", \"XLA device not available\"'"