Skip to content

Commit

Permalink
feat: Update Preset Configs (#243)
Browse files Browse the repository at this point in the history
Update Preset configs so they can deploy statefulset or deployment. 
Add mistral and phi-2 models to preset configs. 
Addition of these configs prevents the missing file error in e2e -
https://github.com/Azure/kaito/actions/runs/7839888760
  • Loading branch information
ishaansehgal99 authored Feb 13, 2024
1 parent c4bc7aa commit 133cab3
Show file tree
Hide file tree
Showing 25 changed files with 498 additions and 255 deletions.
6 changes: 6 additions & 0 deletions .github/e2e-preset-configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@
"node-vm-size": "Standard_NC12s_v3",
"node-osdisk-size": 100
},
{
"name": "phi-2",
"node-count": 1,
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 30
},
{
"name": "llama-2-7b",
"node-count": 1,
Expand Down
40 changes: 26 additions & 14 deletions .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -234,19 +234,25 @@ jobs:
done
echo "Service IP is $SERVICE_IP"
echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT
- name: Replace IP and Deploy Statefulset to K8s
- name: Get Resource Type
id: resource
run: |
RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "statefulset" || echo "deployment")
echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT
- name: Replace IP and Deploy Resource to K8s
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
run: |
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
- name: Wait for Statefulset to be ready
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
- name: Wait for Resource to be ready
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
run: |
kubectl rollout status statefulset/${{ matrix.model.name }}
kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }}
- name: Test home endpoint
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
Expand Down Expand Up @@ -359,16 +365,22 @@ jobs:
- name: Cleanup
if: always()
run: |
# Only proceed if RESOURCE_TYPE is set (else resource wasn't created)
if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then
# Use RESOURCE_TYPE from the previous step
RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}
# Check and Delete K8s Resource (Deployment or StatefulSet)
if kubectl get $RESOURCE_TYPE ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl delete $RESOURCE_TYPE ${{ matrix.model.name }}
fi
fi
# Check and Delete K8s Service if it exists
if kubectl get svc ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl delete svc ${{ matrix.model.name }}
fi
# Check and Delete K8s StatefulSet if it exists
if kubectl get statefulset ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl delete statefulset ${{ matrix.model.name }}
fi
# Check and Delete AKS Nodepool if it exists
if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then
NODEPOOL_EXIST=$(az aks nodepool show \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ metadata:
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-40b-instruct-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: falcon-40b-instruct
spec:
replicas: 1
selector:
matchLabels:
app: falcon
template:
metadata:
labels:
app: falcon
spec:
containers:
- name: falcon-container
image: REPO_HERE.azurecr.io/falcon-40b-instruct:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 4 # Requesting 4 GPUs
limits:
nvidia.com/gpu: 4
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: n40binstruct
7 changes: 3 additions & 4 deletions presets/test/manifests/falcon-40b/falcon-40b-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ metadata:
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-40b-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
56 changes: 0 additions & 56 deletions presets/test/manifests/falcon-40b/falcon-40b-statefulset.yaml

This file was deleted.

55 changes: 55 additions & 0 deletions presets/test/manifests/falcon-40b/falcon-40b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: falcon-40b
spec:
replicas: 1
selector:
matchLabels:
app: falcon
template:
metadata:
labels:
app: falcon
spec:
containers:
- name: falcon-container
image: REPO_HERE.azurecr.io/falcon-40b:TAG_HERE
command:
- /bin/sh
- -c
- accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
resources:
requests:
nvidia.com/gpu: 4 # Requesting 4 GPUs
limits:
nvidia.com/gpu: 4
livenessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 600 # 10 Min
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
volumeMounts:
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
tolerations:
- effect: NoSchedule
key: sku
operator: Equal
value: gpu
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
nodeSelector:
pool: falcon40b
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ metadata:
spec:
selector:
app: falcon
statefulset.kubernetes.io/pod-name: falcon-7b-instruct-0
ports:
- protocol: TCP
port: 80
targetPort: 5000
- protocol: TCP
port: 80
targetPort: 5000
type: LoadBalancer
publishNotReadyAddresses: true
Loading

0 comments on commit 133cab3

Please sign in to comment.