Add FP8 and marlin 2:4 tests for lm-eval (#244)

neuralmagic · May 16, 2024 · 59cf939 · 59cf939 · github-actions · May 17, 2024
1 parent 2fcfced
commit 59cf939
Showing 1 changed file with 38 additions and 8 deletions.
diff --git a/tests/accuracy/lm-eval-tasks.yaml b/tests/accuracy/lm-eval-tasks.yaml
@@ -1,12 +1,13 @@
 # Llama 2 7B: FP16, FP16 sparse, marlin
-- model_name: "NousResearch/Llama-2-7b-chat-hf"
-  tasks:
-  - name: "gsm8k"
-    metrics:
-    - name: "exact_match,strict-match"
-      value: 0.2266868840030326
-    - name: "exact_match,flexible-extract"
-      value: 0.22820318423047764
+# NOTE: This model is superseded by Llama 3
+# - model_name: "NousResearch/Llama-2-7b-chat-hf"
+#   tasks:
+#   - name: "gsm8k"
+#     metrics:
+#     - name: "exact_match,strict-match"
+#       value: 0.2266868840030326
+#     - name: "exact_match,flexible-extract"
+#       value: 0.22820318423047764
 - model_name: "neuralmagic/Llama-2-7b-pruned50-retrained-ultrachat"
   tasks:
   - name: "gsm8k"
@@ -52,6 +53,25 @@
 #     - name: "exact_match,flexible-extract"
 #       value: 0.5868081880212282
 
+# Llama 3: FP16, FP8
+- model_name: "NousResearch/Meta-Llama-3-8B-Instruct"
+  tasks:
+  - name: "gsm8k"
+    metrics:
+    - name: "exact_match,strict-match"
+      value: 0.7566
+    - name: "exact_match,flexible-extract"
+      value: 0.7551
+# NOTE: Needs to run on a system with CUDA compute capability >= 8.9
+# - model_name: "neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
+#   tasks:
+#   - name: "gsm8k"
+#     metrics:
+#     - name: "exact_match,strict-match"
+#       value: 0.7445
+#     - name: "exact_match,flexible-extract"
+#       value: 0.7445
+
 # Phi 2: marlin
 # - model_name: "neuralmagic/phi-2-super-marlin"
 #   tasks:
@@ -62,6 +82,16 @@
 #     - name: "exact_match,flexible-extract"
 #       value: 0.5041698256254739
 
+# Llama 2 7B: 2:4 marlin
+- model_name: "nm-testing/Llama-2-7b-pruned2.4-Marlin"
+  tasks:
+  - name: "gsm8k"
+    metrics:
+    - name: "exact_match,strict-match"
+      value: 0.1857
+    - name: "exact_match,flexible-extract"
+      value: 0.0425
+
 # Mixtral: FP16
 # g5.12xlarge runner (4x 24GB A10 GPUs) has insufficient VRAM
 # - model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1"