feat: Update Preset Configs (#243)

Update Preset configs so they can deploy statefulset or deployment. Add mistral and phi-2 models to preset configs. Addition of these configs prevents the missing file error in e2e - https://github.com/Azure/kaito/actions/runs/7839888760
kaito-project · Feb 13, 2024 · 133cab3 · 133cab3
1 parent c4bc7aa
commit 133cab3
Show file tree

Hide file tree

Showing 25 changed files with 498 additions and 255 deletions.
diff --git a/.github/e2e-preset-configs.json b/.github/e2e-preset-configs.json
@@ -37,6 +37,12 @@
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100
       },
+      {
+        "name": "phi-2",
+        "node-count": 1,
+        "node-vm-size": "Standard_NC6s_v3",
+        "node-osdisk-size": 30
+      },
       {
         "name": "llama-2-7b",
         "node-count": 1,

diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
@@ -234,19 +234,25 @@ jobs:
             done 
             echo "Service IP is $SERVICE_IP"
             echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT
-        
-      - name: Replace IP and Deploy Statefulset to K8s
+
+      - name: Get Resource Type
+        id: resource
+        run: |
+            RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "statefulset" || echo "deployment")
+            echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT
+      
+      - name: Replace IP and Deploy Resource to K8s
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
         run: |
-            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-            kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-    
-      - name: Wait for Statefulset to be ready
+            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+
+      - name: Wait for Resource to be ready
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
         run: |
-            kubectl rollout status statefulset/${{ matrix.model.name }}
+            kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }}
         
       - name: Test home endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
@@ -359,16 +365,22 @@ jobs:
       - name: Cleanup
         if: always()
         run: |
+            # Only proceed if RESOURCE_TYPE is set (else resource wasn't created)
+            if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then
+                # Use RESOURCE_TYPE from the previous step
+                RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}
+                
+                # Check and Delete K8s Resource (Deployment or StatefulSet)
+                if kubectl get $RESOURCE_TYPE ${{ matrix.model.name }} > /dev/null 2>&1; then
+                    kubectl delete $RESOURCE_TYPE ${{ matrix.model.name }}
+                fi
+            fi
+
             # Check and Delete K8s Service if it exists
             if kubectl get svc ${{ matrix.model.name }} > /dev/null 2>&1; then
                 kubectl delete svc ${{ matrix.model.name }}
             fi
         
-            # Check and Delete K8s StatefulSet if it exists
-            if kubectl get statefulset ${{ matrix.model.name }} > /dev/null 2>&1; then
-                kubectl delete statefulset ${{ matrix.model.name }}
-            fi
-
             # Check and Delete AKS Nodepool if it exists            
             if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then
                 NODEPOOL_EXIST=$(az aks nodepool show \

diff --git a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-service.yaml b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-service.yaml
@@ -5,10 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-40b-instruct-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true
diff --git a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml
diff --git a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct.yaml b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-40b-instruct
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-40b-instruct:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 4  # Requesting 4 GPUs
+          limits:
+            nvidia.com/gpu: 4
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: n40binstruct
diff --git a/presets/test/manifests/falcon-40b/falcon-40b-service.yaml b/presets/test/manifests/falcon-40b/falcon-40b-service.yaml
@@ -5,10 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-40b-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true
diff --git a/presets/test/manifests/falcon-40b/falcon-40b-statefulset.yaml b/presets/test/manifests/falcon-40b/falcon-40b-statefulset.yaml
diff --git a/presets/test/manifests/falcon-40b/falcon-40b.yaml b/presets/test/manifests/falcon-40b/falcon-40b.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-40b
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-40b:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 4  # Requesting 4 GPUs
+          limits:
+            nvidia.com/gpu: 4
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: falcon40b
diff --git a/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-service.yaml b/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-service.yaml
@@ -5,10 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-7b-instruct-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true