diff --git a/.github/e2e-preset-configs.json b/.github/e2e-preset-configs.json
index acbfa98f2..a499d28d6 100644
--- a/.github/e2e-preset-configs.json
+++ b/.github/e2e-preset-configs.json
@@ -37,6 +37,12 @@
         "node-vm-size": "Standard_NC12s_v3",
         "node-osdisk-size": 100
       },
+      {
+        "name": "phi-2",
+        "node-count": 1,
+        "node-vm-size": "Standard_NC6s_v3",
+        "node-osdisk-size": 30
+      },
       {
         "name": "llama-2-7b",
         "node-count": 1,
diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
index 784c292bd..641ac2ab1 100644
--- a/.github/workflows/e2e-preset-test.yml
+++ b/.github/workflows/e2e-preset-test.yml
@@ -234,19 +234,25 @@ jobs:
             done 
             echo "Service IP is $SERVICE_IP"
             echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT
-        
-      - name: Replace IP and Deploy Statefulset to K8s
+
+      - name: Get Resource Type
+        id: resource
+        run: |
+            RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "statefulset" || echo "deployment")
+            echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT
+      
+      - name: Replace IP and Deploy Resource to K8s
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
         run: |
-            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-            kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
-    
-      - name: Wait for Statefulset to be ready
+            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+            kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}.yaml
+
+      - name: Wait for Resource to be ready
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
         run: |
-            kubectl rollout status statefulset/${{ matrix.model.name }}
+            kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }}
         
       - name: Test home endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
@@ -359,16 +365,22 @@ jobs:
       - name: Cleanup
         if: always()
         run: |
+            # Only proceed if RESOURCE_TYPE is set (else resource wasn't created)
+            if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then
+                # Use RESOURCE_TYPE from the previous step
+                RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}
+                
+                # Check and Delete K8s Resource (Deployment or StatefulSet)
+                if kubectl get $RESOURCE_TYPE ${{ matrix.model.name }} > /dev/null 2>&1; then
+                    kubectl delete $RESOURCE_TYPE ${{ matrix.model.name }}
+                fi
+            fi
+
             # Check and Delete K8s Service if it exists
             if kubectl get svc ${{ matrix.model.name }} > /dev/null 2>&1; then
                 kubectl delete svc ${{ matrix.model.name }}
             fi
         
-            # Check and Delete K8s StatefulSet if it exists
-            if kubectl get statefulset ${{ matrix.model.name }} > /dev/null 2>&1; then
-                kubectl delete statefulset ${{ matrix.model.name }}
-            fi
-
             # Check and Delete AKS Nodepool if it exists            
             if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then
                 NODEPOOL_EXIST=$(az aks nodepool show \
diff --git a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-service.yaml b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-service.yaml
index cc41fb6f7..27f21ec46 100644
--- a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-service.yaml
+++ b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-service.yaml
@@ -5,10 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-40b-instruct-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true
diff --git a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml
deleted file mode 100644
index 57326ddf7..000000000
--- a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct-statefulset.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: falcon-40b-instruct
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: falcon
-  podManagementPolicy: Parallel
-  template:
-    metadata:
-      labels:
-        app: falcon
-    spec:
-      containers:
-        - name: falcon-container
-          image: REPO_HERE.azurecr.io/falcon-40b-instruct:TAG_HERE
-          command:
-            - /bin/sh
-            - -c
-            - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
-          resources:
-            requests:
-              nvidia.com/gpu: 4
-            limits:
-              nvidia.com/gpu: 4  # Requesting 4 GPUs
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 600 # 10 Min
-            periodSeconds: 10
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 30
-            periodSeconds: 10
-          volumeMounts:
-            - name: dshm
-              mountPath: /dev/shm
-      volumes:
-        - name: dshm
-          emptyDir:
-            medium: Memory
-      tolerations:
-        - effect: NoSchedule
-          key: sku
-          operator: Equal
-          value: gpu
-        - effect: NoSchedule
-          key: nvidia.com/gpu
-          operator: Exists
-      nodeSelector:
-        pool: n40binstruct
diff --git a/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct.yaml b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct.yaml
new file mode 100644
index 000000000..bd6280b9f
--- /dev/null
+++ b/presets/test/manifests/falcon-40b-instruct/falcon-40b-instruct.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-40b-instruct
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-40b-instruct:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 4  # Requesting 4 GPUs
+          limits:
+            nvidia.com/gpu: 4
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: n40binstruct
diff --git a/presets/test/manifests/falcon-40b/falcon-40b-service.yaml b/presets/test/manifests/falcon-40b/falcon-40b-service.yaml
index 599f70ca5..689361052 100644
--- a/presets/test/manifests/falcon-40b/falcon-40b-service.yaml
+++ b/presets/test/manifests/falcon-40b/falcon-40b-service.yaml
@@ -5,10 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-40b-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true
diff --git a/presets/test/manifests/falcon-40b/falcon-40b-statefulset.yaml b/presets/test/manifests/falcon-40b/falcon-40b-statefulset.yaml
deleted file mode 100644
index 7b77d20df..000000000
--- a/presets/test/manifests/falcon-40b/falcon-40b-statefulset.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: falcon-40b
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: falcon
-  podManagementPolicy: Parallel
-  template:
-    metadata:
-      labels:
-        app: falcon
-    spec:
-      containers:
-        - name: falcon-container
-          image: REPO_HERE.azurecr.io/falcon-40b:TAG_HERE
-          command:
-            - /bin/sh
-            - -c
-            - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
-          resources:
-            requests:
-              nvidia.com/gpu: 4
-            limits:
-              nvidia.com/gpu: 4  # Requesting 4 GPUs
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 600 # 10 Min
-            periodSeconds: 10
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 30
-            periodSeconds: 10
-          volumeMounts:
-            - name: dshm
-              mountPath: /dev/shm
-      volumes:
-        - name: dshm
-          emptyDir:
-            medium: Memory
-      tolerations:
-        - effect: NoSchedule
-          key: sku
-          operator: Equal
-          value: gpu
-        - effect: NoSchedule
-          key: nvidia.com/gpu
-          operator: Exists
-      nodeSelector:
-        pool: falcon40b
diff --git a/presets/test/manifests/falcon-40b/falcon-40b.yaml b/presets/test/manifests/falcon-40b/falcon-40b.yaml
new file mode 100644
index 000000000..a125d838d
--- /dev/null
+++ b/presets/test/manifests/falcon-40b/falcon-40b.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-40b
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-40b:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 4  # Requesting 4 GPUs
+          limits:
+            nvidia.com/gpu: 4
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: falcon40b
diff --git a/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-service.yaml b/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-service.yaml
index e3c20e3b3..6acbe2405 100644
--- a/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-service.yaml
+++ b/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-service.yaml
@@ -5,10 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-7b-instruct-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true
diff --git a/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-statefulset.yaml b/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-statefulset.yaml
deleted file mode 100644
index 0387a092b..000000000
--- a/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct-statefulset.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: falcon-7b-instruct
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: falcon
-  podManagementPolicy: Parallel
-  template:
-    metadata:
-      labels:
-        app: falcon
-    spec:
-      containers:
-        - name: falcon-container
-          image: REPO_HERE.azurecr.io/falcon-7b-instruct:TAG_HERE
-          command:
-            - /bin/sh
-            - -c
-            - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
-          resources:
-            requests:
-              nvidia.com/gpu: 2
-            limits:
-              nvidia.com/gpu: 2  # Requesting 2 GPUs
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 600 # 10 Min
-            periodSeconds: 10
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 30
-            periodSeconds: 10
-          volumeMounts:
-            - name: dshm
-              mountPath: /dev/shm
-      volumes:
-        - name: dshm
-          emptyDir:
-            medium: Memory
-      tolerations:
-        - effect: NoSchedule
-          key: sku
-          operator: Equal
-          value: gpu
-        - effect: NoSchedule
-          key: nvidia.com/gpu
-          operator: Exists
-      nodeSelector:
-        pool: on7binstruct
diff --git a/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct.yaml b/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct.yaml
new file mode 100644
index 000000000..ed8913e76
--- /dev/null
+++ b/presets/test/manifests/falcon-7b-instruct/falcon-7b-instruct.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-7b-instruct
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-7b-instruct:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 2
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: on7binstruct
diff --git a/presets/test/manifests/falcon-7b/falcon-7b-service.yaml b/presets/test/manifests/falcon-7b/falcon-7b-service.yaml
index 1f8fef330..acf56ba74 100644
--- a/presets/test/manifests/falcon-7b/falcon-7b-service.yaml
+++ b/presets/test/manifests/falcon-7b/falcon-7b-service.yaml
@@ -5,11 +5,9 @@ metadata:
 spec:
   selector:
     app: falcon
-    statefulset.kubernetes.io/pod-name: falcon-7b-0
   ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 5000
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
   type: LoadBalancer
   publishNotReadyAddresses: true
-
diff --git a/presets/test/manifests/falcon-7b/falcon-7b-statefulset.yaml b/presets/test/manifests/falcon-7b/falcon-7b-statefulset.yaml
deleted file mode 100644
index 317da8a40..000000000
--- a/presets/test/manifests/falcon-7b/falcon-7b-statefulset.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: falcon-7b
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: falcon
-  podManagementPolicy: Parallel
-  template:
-    metadata:
-      labels:
-        app: falcon
-    spec:
-      containers:
-        - name: falcon-container
-          image: REPO_HERE.azurecr.io/falcon-7b:TAG_HERE
-          command:
-            - /bin/sh
-            - -c
-            - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
-          resources:
-            requests:
-              nvidia.com/gpu: 2
-            limits:
-              nvidia.com/gpu: 2  # Requesting 2 GPUs
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 600 # 10 Min
-            periodSeconds: 10
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 5000
-            initialDelaySeconds: 30
-            periodSeconds: 10
-          volumeMounts:
-            - name: dshm
-              mountPath: /dev/shm
-      volumes:
-        - name: dshm
-          emptyDir:
-            medium: Memory
-      tolerations:
-        - effect: NoSchedule
-          key: sku
-          operator: Equal
-          value: gpu
-        - effect: NoSchedule
-          key: nvidia.com/gpu
-          operator: Exists
-      nodeSelector:
-        pool: falcon7b
diff --git a/presets/test/manifests/falcon-7b/falcon-7b.yaml b/presets/test/manifests/falcon-7b/falcon-7b.yaml
new file mode 100644
index 000000000..2f1aff077
--- /dev/null
+++ b/presets/test/manifests/falcon-7b/falcon-7b.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: falcon-7b
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: falcon
+  template:
+    metadata:
+      labels:
+        app: falcon
+    spec:
+      containers:
+      - name: falcon-container
+        image: REPO_HERE.azurecr.io/falcon-7b:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 2
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: falcon7b
diff --git a/presets/test/manifests/llama-2-13b-chat/llama-2-13b-chat-statefulset.yaml b/presets/test/manifests/llama-2-13b-chat/llama-2-13b-chat.yaml
similarity index 100%
rename from presets/test/manifests/llama-2-13b-chat/llama-2-13b-chat-statefulset.yaml
rename to presets/test/manifests/llama-2-13b-chat/llama-2-13b-chat.yaml
diff --git a/presets/test/manifests/llama-2-13b/llama-2-13b-statefulset.yaml b/presets/test/manifests/llama-2-13b/llama-2-13b.yaml
similarity index 100%
rename from presets/test/manifests/llama-2-13b/llama-2-13b-statefulset.yaml
rename to presets/test/manifests/llama-2-13b/llama-2-13b.yaml
diff --git a/presets/test/manifests/llama-2-7b-chat/llama-2-7b-chat-statefulset.yaml b/presets/test/manifests/llama-2-7b-chat/llama-2-7b-chat.yaml
similarity index 100%
rename from presets/test/manifests/llama-2-7b-chat/llama-2-7b-chat-statefulset.yaml
rename to presets/test/manifests/llama-2-7b-chat/llama-2-7b-chat.yaml
diff --git a/presets/test/manifests/llama-2-7b/llama-2-7b-statefulset.yaml b/presets/test/manifests/llama-2-7b/llama-2-7b.yaml
similarity index 100%
rename from presets/test/manifests/llama-2-7b/llama-2-7b-statefulset.yaml
rename to presets/test/manifests/llama-2-7b/llama-2-7b.yaml
diff --git a/presets/test/manifests/mistral-7b-instruct/mistral-7b-instruct-service.yaml b/presets/test/manifests/mistral-7b-instruct/mistral-7b-instruct-service.yaml
new file mode 100644
index 000000000..31b9206bc
--- /dev/null
+++ b/presets/test/manifests/mistral-7b-instruct/mistral-7b-instruct-service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: mistral-7b-instruct
+spec:
+  selector:
+    app: mistral
+  ports:
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
+  type: LoadBalancer
+  publishNotReadyAddresses: true
diff --git a/presets/test/manifests/mistral-7b-instruct/mistral-7b-instruct.yaml b/presets/test/manifests/mistral-7b-instruct/mistral-7b-instruct.yaml
new file mode 100644
index 000000000..cacfbd484
--- /dev/null
+++ b/presets/test/manifests/mistral-7b-instruct/mistral-7b-instruct.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mistral-7b-instruct
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: mistral
+  template:
+    metadata:
+      labels:
+        app: mistral
+    spec:
+      containers:
+      - name: mistral-instruct-container
+        image: REPO_HERE.azurecr.io/mistral-7b-instruct:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 2
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: al7binstruct
diff --git a/presets/test/manifests/mistral-7b/mistral-7b-service.yaml b/presets/test/manifests/mistral-7b/mistral-7b-service.yaml
new file mode 100644
index 000000000..650422c7c
--- /dev/null
+++ b/presets/test/manifests/mistral-7b/mistral-7b-service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: mistral-7b
+spec:
+  selector:
+    app: mistral
+  ports:
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
+  type: LoadBalancer
+  publishNotReadyAddresses: true
diff --git a/presets/test/manifests/mistral-7b/mistral-7b.yaml b/presets/test/manifests/mistral-7b/mistral-7b.yaml
new file mode 100644
index 000000000..287d435a7
--- /dev/null
+++ b/presets/test/manifests/mistral-7b/mistral-7b.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mistral-7b
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: mistral
+  template:
+    metadata:
+      labels:
+        app: mistral
+    spec:
+      containers:
+      - name: mistral-container
+        image: REPO_HERE.azurecr.io/mistral-7b:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 2
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: mistral7b
diff --git a/presets/test/manifests/phi-2/phi-2-service.yaml b/presets/test/manifests/phi-2/phi-2-service.yaml
new file mode 100644
index 000000000..b81036bd8
--- /dev/null
+++ b/presets/test/manifests/phi-2/phi-2-service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: phi-2
+spec:
+  selector:
+    app: phi-2
+  ports:
+  - protocol: TCP
+    port: 80
+    targetPort: 5000
+  type: LoadBalancer
+  publishNotReadyAddresses: true
diff --git a/presets/test/manifests/phi-2/phi-2.yaml b/presets/test/manifests/phi-2/phi-2.yaml
new file mode 100644
index 000000000..b250d6248
--- /dev/null
+++ b/presets/test/manifests/phi-2/phi-2.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: phi-2
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: phi-2
+  template:
+    metadata:
+      labels:
+        app: phi-2
+    spec:
+      containers:
+      - name: phi-2-container
+        image: REPO_HERE.azurecr.io/phi-2:TAG_HERE
+        command:
+          - /bin/sh
+          - -c
+          - accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference-api.py --pipeline text-generation --torch_dtype bfloat16
+        resources:
+          requests:
+            nvidia.com/gpu: 2
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 600 # 10 Min
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
+      volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      tolerations:
+      - effect: NoSchedule
+        key: sku
+        operator: Equal
+        value: gpu
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+      nodeSelector:
+        pool: phi2
diff --git a/presets/test/manifests/playground/python.yaml b/presets/test/manifests/playground/python.yaml
new file mode 100644
index 000000000..7cbcb876e
--- /dev/null
+++ b/presets/test/manifests/playground/python.yaml
@@ -0,0 +1,30 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: python-gpu-app
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: python-gpu-app
+  template:
+    metadata:
+      labels:
+        app: python-gpu-app
+    spec:
+      containers:
+      - name: python-gpu-container
+        image: python:3.10-slim  # Replace with your Docker image
+        command: ["/bin/sh"]
+        args: ["-c", "sleep infinity"]
+        resources:
+          limits:
+            nvidia.com/gpu: 2  # Requesting 2 GPUs
+      tolerations:
+        - effect: NoSchedule
+          key: sku
+          operator: Equal
+          value: gpu
+        - effect: NoSchedule
+          key: nvidia.com/gpu
+          operator: Exists