From 3a2545670126854a4a685edd889fe68f2fe250c3 Mon Sep 17 00:00:00 2001 From: Domenic Barbuzzi Date: Tue, 14 May 2024 10:41:26 -0400 Subject: [PATCH] Misc CI/CD updates (#240) --- .github/workflows/build-test.yml | 6 +++--- ...accuracy.yml => nm-test-accuracy-full.yml} | 4 ++-- ...l-smoke.yml => nm-test-accuracy-smoke.yml} | 4 ++-- .../benchmarks/run_benchmark_serving.py | 2 +- tests/accuracy/lm-eval-tasks.yaml | 19 ++++++++++--------- tests/utils/server.py | 2 +- 6 files changed, 19 insertions(+), 18 deletions(-) rename .github/workflows/{nm-lm-eval-accuracy.yml => nm-test-accuracy-full.yml} (98%) rename .github/workflows/{nm-lm-eval-smoke.yml => nm-test-accuracy-smoke.yml} (98%) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 0fee152825309..2100261c0e48a 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -139,7 +139,7 @@ jobs: TEST-MULTI: needs: [BUILD] - if: success() && contains(fromJSON('["NIGHTLY", "RELEASE"]'), inputs.wf_category) + if: success() && contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category) uses: ./.github/workflows/test.yml with: test_label: ${{ inputs.test_label_multi }} @@ -180,7 +180,7 @@ jobs: TEST-ACCURACY-SMOKE: needs: [BUILD] if: inputs.wf_category == 'NIGHTLY' - uses: ./.github/workflows/nm-lm-eval-smoke.yml + uses: ./.github/workflows/nm-test-accuracy-smoke.yml with: label: ${{ inputs.test_label_solo }} timeout: ${{ inputs.benchmark_timeout }} @@ -194,7 +194,7 @@ jobs: TEST-ACCURACY-FULL: needs: [BUILD] if: ${{ inputs.wf_category == 'WEEKLY' || inputs.wf_category == 'RELEASE' }} - uses: ./.github/workflows/nm-lm-eval-accuracy.yml + uses: ./.github/workflows/nm-test-accuracy-full.yml with: label: ${{ inputs.test_label_multi }} timeout: ${{ inputs.benchmark_timeout }} diff --git a/.github/workflows/nm-lm-eval-accuracy.yml b/.github/workflows/nm-test-accuracy-full.yml similarity index 98% rename from .github/workflows/nm-lm-eval-accuracy.yml rename to .github/workflows/nm-test-accuracy-full.yml index f1612ad81d0c2..07f736cbdfbfa 100644 --- a/.github/workflows/nm-lm-eval-accuracy.yml +++ b/.github/workflows/nm-test-accuracy-full.yml @@ -1,4 +1,4 @@ -name: nm-lm-eval-accuracy +name: nm-test-accuracy-full on: # makes workflow reusable workflow_call: @@ -68,7 +68,7 @@ env: VENV_BASE: "LM_EVAL" jobs: - LM-EVAL-FULL: + TEST-ACCURACY-FULL: runs-on: ${{ inputs.label }} timeout-minutes: ${{ fromJSON(inputs.timeout) }} diff --git a/.github/workflows/nm-lm-eval-smoke.yml b/.github/workflows/nm-test-accuracy-smoke.yml similarity index 98% rename from .github/workflows/nm-lm-eval-smoke.yml rename to .github/workflows/nm-test-accuracy-smoke.yml index ddedc1110f796..994a33a1beba9 100644 --- a/.github/workflows/nm-lm-eval-smoke.yml +++ b/.github/workflows/nm-test-accuracy-smoke.yml @@ -1,4 +1,4 @@ -name: nm-lm-eval-smoke +name: nm-test-accuracy-smoke on: # makes workflow reusable workflow_call: @@ -68,7 +68,7 @@ env: VENV_BASE: "LM_EVAL" jobs: - LM-EVAL-SMOKE: + TEST-ACCURACY-SMOKE: runs-on: ${{ inputs.label }} timeout-minutes: ${{ fromJSON(inputs.timeout) }} diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py index 42b81dc56be7c..de6be7a3e0368 100644 --- a/neuralmagic/benchmarks/run_benchmark_serving.py +++ b/neuralmagic/benchmarks/run_benchmark_serving.py @@ -37,7 +37,7 @@ def get_tensor_parallel_size(config: NamedTuple) -> int: return tensor_parallel_size -def is_server_running(host: str, port: int, timeout=300) -> bool: +def is_server_running(host: str, port: int, timeout=600) -> bool: def try_connection() -> bool: try: diff --git a/tests/accuracy/lm-eval-tasks.yaml b/tests/accuracy/lm-eval-tasks.yaml index 97b420f325495..61b9e14723442 100644 --- a/tests/accuracy/lm-eval-tasks.yaml +++ b/tests/accuracy/lm-eval-tasks.yaml @@ -63,12 +63,13 @@ # value: 0.5041698256254739 # Mixtral: FP16 -- model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1" - tasks: - - name: "gsm8k" - metrics: - - name: "exact_match,strict-match" - value: 0.6550416982562547 - - name: "exact_match,flexible-extract" - value: 0.6603487490523123 - enable_tensor_parallel: true +# g5.12xlarge runner (4x 24GB A10 GPUs) has insufficient VRAM +# - model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1" +# tasks: +# - name: "gsm8k" +# metrics: +# - name: "exact_match,strict-match" +# value: 0.6550416982562547 +# - name: "exact_match,flexible-extract" +# value: 0.6603487490523123 +# enable_tensor_parallel: true diff --git a/tests/utils/server.py b/tests/utils/server.py index e055040a144d5..4f1a2accaaa1b 100644 --- a/tests/utils/server.py +++ b/tests/utils/server.py @@ -12,7 +12,7 @@ from tests.utils.logging import log_banner -MAX_SERVER_START_WAIT = 600 # time (seconds) to wait for server to start +MAX_SERVER_START_WAIT = 15 * 60 # time (seconds) to wait for server to start @ray.remote(num_gpus=torch.cuda.device_count())