Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
baptistecolle committed Dec 4, 2024
1 parent a361254 commit db8c3ca
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 54 deletions.
76 changes: 23 additions & 53 deletions .github/workflows/debug-metadata.yml
Original file line number Diff line number Diff line change
@@ -1,61 +1,31 @@
name: Debug Metadata

on:
# push:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
push:

jobs:
integration-tests:
name: Run TGI Integration Tests
runs-on:
group: gcp-ct5lp-hightpu-8t

debug-dind:
name: Run TGI tests
runs-on: gcp-ct5lp-hightpu-8t-usc1-public-211
container:
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:8f1dcd5b03f993e4da5c20d17c77aff6a5f22d5455f8eb042d2e4b16ac460526
options: ${{ vars.V5_LITEPOD_8_ENV }} --shm-size "16gb" --privileged --ipc host -v /mnt/hf_cache:/mnt/cache/
env:
PJRT_DEVICE: TPU
steps:
- name: Install DNS utilities
- name: Install docker
run: |
sudo apt-get update -y
sudo apt-get install -y dnsutils iputils-ping curl

- name: Debug metadata
apt-get update -y
apt-get install -y docker.io
- name: Debug XLA (run container)
run: |
echo "nslookup metadata.google.internal"
nslookup metadata.google.internal
echo "ping -c 3 metadata.google.internal"
ping -c 3 metadata.google.internal
echo "ping -c 3 169.254.169.254"
ping -c 3 169.254.169.254
echo "Get the IP address of metadata.google.internal using getent"
# Get the IP address of metadata.google.internal using getent
METADATA_IP=$(getent hosts metadata.google.internal | awk '{ print $1 }')
echo "Metadata server IP address: ${METADATA_IP}"
echo "Fetching instance metadata directory:"
curl -s http://metadata.google.internal/computeMetadata/v1/instance/ -H "Metadata-Flavor: Google"
echo
STATUS_CODE=$(curl -s -o /dev/null -w "%{http_code}" "http://metadata.google.internal/computeMetadata/v1/instance/image" -H "Metadata-Flavor: Google")
RESPONSE=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/image" -H "Metadata-Flavor: Google")
echo "Status code: $STATUS_CODE"
echo "Metadata response: $RESPONSE"
if [ "$STATUS_CODE" -ne 200 ]; then
echo "Failed to fetch instance image metadata"
exit 1
fi
STATUS_CODE=$(curl -s -o /dev/null -w "%{http_code}" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/tpu-env" -H "Metadata-Flavor: Google")
RESPONSE=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/attributes/tpu-env" -H "Metadata-Flavor: Google")
echo "Status code: $STATUS_CODE"
echo "Metadata response: $RESPONSE"
if [ "$STATUS_CODE" -ne 200 ]; then
echo "Failed to fetch TPU environment metadata"
exit 1
fi
docker run \
--privileged \
--ipc host \
--shm-size=16G \
-v /mnt/hf_cache:/mnt/cache/ \
-e PJRT_DEVICE=TPU \
us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:8f1dcd5b03f993e4da5c20d17c77aff6a5f22d5455f8eb042d2e4b16ac460526 \
bash -c '
python -c "import torch_xla.core.xla_model as xm; assert xm.xla_device().type == 'xla', 'XLA device not available'"
'
2 changes: 1 addition & 1 deletion .github/workflows/test-pytorch-xla-tpu-tgi-integration.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Optimum TPU / Test TGI on TPU / Integration Tests

on:
push:
# push:
pull_request:
branches: [ main ]
paths:
Expand Down

0 comments on commit db8c3ca

Please sign in to comment.