From d2a0e426e79b789a51112026f8c2f1327aad4cfc Mon Sep 17 00:00:00 2001 From: Domenic Barbuzzi Date: Mon, 13 May 2024 15:13:18 +0000 Subject: [PATCH 1/5] Include TEST-MULTI job in weekly --- .github/workflows/build-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 0fee152825309..df2acd54b17af 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -139,7 +139,7 @@ jobs: TEST-MULTI: needs: [BUILD] - if: success() && contains(fromJSON('["NIGHTLY", "RELEASE"]'), inputs.wf_category) + if: success() && contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category) uses: ./.github/workflows/test.yml with: test_label: ${{ inputs.test_label_multi }} From 25bfdaf4169f38f126b4d45ab6d9c49725b5ea3f Mon Sep 17 00:00:00 2001 From: Domenic Barbuzzi Date: Mon, 13 May 2024 15:14:09 +0000 Subject: [PATCH 2/5] Unify naming for accuracy (lm-eval) jobs --- .github/workflows/build-test.yml | 4 ++-- .../{nm-lm-eval-accuracy.yml => nm-test-accuracy-full.yml} | 4 ++-- .../{nm-lm-eval-smoke.yml => nm-test-accuracy-smoke.yml} | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) rename .github/workflows/{nm-lm-eval-accuracy.yml => nm-test-accuracy-full.yml} (98%) rename .github/workflows/{nm-lm-eval-smoke.yml => nm-test-accuracy-smoke.yml} (98%) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index df2acd54b17af..2100261c0e48a 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -180,7 +180,7 @@ jobs: TEST-ACCURACY-SMOKE: needs: [BUILD] if: inputs.wf_category == 'NIGHTLY' - uses: ./.github/workflows/nm-lm-eval-smoke.yml + uses: ./.github/workflows/nm-test-accuracy-smoke.yml with: label: ${{ inputs.test_label_solo }} timeout: ${{ inputs.benchmark_timeout }} @@ -194,7 +194,7 @@ jobs: TEST-ACCURACY-FULL: needs: [BUILD] if: ${{ inputs.wf_category == 'WEEKLY' || inputs.wf_category == 'RELEASE' }} - uses: ./.github/workflows/nm-lm-eval-accuracy.yml + uses: ./.github/workflows/nm-test-accuracy-full.yml with: label: ${{ inputs.test_label_multi }} timeout: ${{ inputs.benchmark_timeout }} diff --git a/.github/workflows/nm-lm-eval-accuracy.yml b/.github/workflows/nm-test-accuracy-full.yml similarity index 98% rename from .github/workflows/nm-lm-eval-accuracy.yml rename to .github/workflows/nm-test-accuracy-full.yml index f1612ad81d0c2..07f736cbdfbfa 100644 --- a/.github/workflows/nm-lm-eval-accuracy.yml +++ b/.github/workflows/nm-test-accuracy-full.yml @@ -1,4 +1,4 @@ -name: nm-lm-eval-accuracy +name: nm-test-accuracy-full on: # makes workflow reusable workflow_call: @@ -68,7 +68,7 @@ env: VENV_BASE: "LM_EVAL" jobs: - LM-EVAL-FULL: + TEST-ACCURACY-FULL: runs-on: ${{ inputs.label }} timeout-minutes: ${{ fromJSON(inputs.timeout) }} diff --git a/.github/workflows/nm-lm-eval-smoke.yml b/.github/workflows/nm-test-accuracy-smoke.yml similarity index 98% rename from .github/workflows/nm-lm-eval-smoke.yml rename to .github/workflows/nm-test-accuracy-smoke.yml index ddedc1110f796..994a33a1beba9 100644 --- a/.github/workflows/nm-lm-eval-smoke.yml +++ b/.github/workflows/nm-test-accuracy-smoke.yml @@ -1,4 +1,4 @@ -name: nm-lm-eval-smoke +name: nm-test-accuracy-smoke on: # makes workflow reusable workflow_call: @@ -68,7 +68,7 @@ env: VENV_BASE: "LM_EVAL" jobs: - LM-EVAL-SMOKE: + TEST-ACCURACY-SMOKE: runs-on: ${{ inputs.label }} timeout-minutes: ${{ fromJSON(inputs.timeout) }} From 35a7b12caa4654f6290089142cf450a959ec1ac9 Mon Sep 17 00:00:00 2001 From: Domenic Barbuzzi Date: Mon, 13 May 2024 15:34:20 +0000 Subject: [PATCH 3/5] Increase server startup wait time --- tests/utils/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/server.py b/tests/utils/server.py index e055040a144d5..4f1a2accaaa1b 100644 --- a/tests/utils/server.py +++ b/tests/utils/server.py @@ -12,7 +12,7 @@ from tests.utils.logging import log_banner -MAX_SERVER_START_WAIT = 600 # time (seconds) to wait for server to start +MAX_SERVER_START_WAIT = 15 * 60 # time (seconds) to wait for server to start @ray.remote(num_gpus=torch.cuda.device_count()) From 21f189f7a9e7fa6bad3b539f290bfa2e7767bfec Mon Sep 17 00:00:00 2001 From: Domenic Barbuzzi Date: Mon, 13 May 2024 16:00:56 +0000 Subject: [PATCH 4/5] Disable too-large model --- tests/accuracy/lm-eval-tasks.yaml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/accuracy/lm-eval-tasks.yaml b/tests/accuracy/lm-eval-tasks.yaml index 97b420f325495..61b9e14723442 100644 --- a/tests/accuracy/lm-eval-tasks.yaml +++ b/tests/accuracy/lm-eval-tasks.yaml @@ -63,12 +63,13 @@ # value: 0.5041698256254739 # Mixtral: FP16 -- model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1" - tasks: - - name: "gsm8k" - metrics: - - name: "exact_match,strict-match" - value: 0.6550416982562547 - - name: "exact_match,flexible-extract" - value: 0.6603487490523123 - enable_tensor_parallel: true +# g5.12xlarge runner (4x 24GB A10 GPUs) has insufficient VRAM +# - model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1" +# tasks: +# - name: "gsm8k" +# metrics: +# - name: "exact_match,strict-match" +# value: 0.6550416982562547 +# - name: "exact_match,flexible-extract" +# value: 0.6603487490523123 +# enable_tensor_parallel: true From 3b85dc96441b88c6770a21db13cda2b97c3df397 Mon Sep 17 00:00:00 2001 From: Domenic Barbuzzi Date: Mon, 13 May 2024 16:04:49 +0000 Subject: [PATCH 5/5] Increase benchmarking server startup time limit --- neuralmagic/benchmarks/run_benchmark_serving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py index 42b81dc56be7c..de6be7a3e0368 100644 --- a/neuralmagic/benchmarks/run_benchmark_serving.py +++ b/neuralmagic/benchmarks/run_benchmark_serving.py @@ -37,7 +37,7 @@ def get_tensor_parallel_size(config: NamedTuple) -> int: return tensor_parallel_size -def is_server_running(host: str, port: int, timeout=300) -> bool: +def is_server_running(host: str, port: int, timeout=600) -> bool: def try_connection() -> bool: try: