Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Misc CI/CD updates #240

Merged
merged 5 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ jobs:

TEST-MULTI:
needs: [BUILD]
if: success() && contains(fromJSON('["NIGHTLY", "RELEASE"]'), inputs.wf_category)
if: success() && contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category)
uses: ./.github/workflows/test.yml
with:
test_label: ${{ inputs.test_label_multi }}
Expand Down Expand Up @@ -180,7 +180,7 @@ jobs:
TEST-ACCURACY-SMOKE:
needs: [BUILD]
if: inputs.wf_category == 'NIGHTLY'
uses: ./.github/workflows/nm-lm-eval-smoke.yml
uses: ./.github/workflows/nm-test-accuracy-smoke.yml
with:
label: ${{ inputs.test_label_solo }}
timeout: ${{ inputs.benchmark_timeout }}
Expand All @@ -194,7 +194,7 @@ jobs:
TEST-ACCURACY-FULL:
needs: [BUILD]
if: ${{ inputs.wf_category == 'WEEKLY' || inputs.wf_category == 'RELEASE' }}
uses: ./.github/workflows/nm-lm-eval-accuracy.yml
uses: ./.github/workflows/nm-test-accuracy-full.yml
with:
label: ${{ inputs.test_label_multi }}
timeout: ${{ inputs.benchmark_timeout }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: nm-lm-eval-accuracy
name: nm-test-accuracy-full
on:
# makes workflow reusable
workflow_call:
Expand Down Expand Up @@ -68,7 +68,7 @@ env:
VENV_BASE: "LM_EVAL"

jobs:
LM-EVAL-FULL:
TEST-ACCURACY-FULL:

runs-on: ${{ inputs.label }}
timeout-minutes: ${{ fromJSON(inputs.timeout) }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: nm-lm-eval-smoke
name: nm-test-accuracy-smoke
on:
# makes workflow reusable
workflow_call:
Expand Down Expand Up @@ -68,7 +68,7 @@ env:
VENV_BASE: "LM_EVAL"

jobs:
LM-EVAL-SMOKE:
TEST-ACCURACY-SMOKE:

runs-on: ${{ inputs.label }}
timeout-minutes: ${{ fromJSON(inputs.timeout) }}
Expand Down
2 changes: 1 addition & 1 deletion neuralmagic/benchmarks/run_benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_tensor_parallel_size(config: NamedTuple) -> int:
return tensor_parallel_size


def is_server_running(host: str, port: int, timeout=300) -> bool:
def is_server_running(host: str, port: int, timeout=600) -> bool:

def try_connection() -> bool:
try:
Expand Down
19 changes: 10 additions & 9 deletions tests/accuracy/lm-eval-tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@
# value: 0.5041698256254739

# Mixtral: FP16
- model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1"
tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.6550416982562547
- name: "exact_match,flexible-extract"
value: 0.6603487490523123
enable_tensor_parallel: true
# g5.12xlarge runner (4x 24GB A10 GPUs) has insufficient VRAM
# - model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1"
# tasks:
# - name: "gsm8k"
# metrics:
# - name: "exact_match,strict-match"
# value: 0.6550416982562547
# - name: "exact_match,flexible-extract"
# value: 0.6603487490523123
# enable_tensor_parallel: true
2 changes: 1 addition & 1 deletion tests/utils/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from tests.utils.logging import log_banner

MAX_SERVER_START_WAIT = 600 # time (seconds) to wait for server to start
MAX_SERVER_START_WAIT = 15 * 60 # time (seconds) to wait for server to start


@ray.remote(num_gpus=torch.cuda.device_count())
Expand Down
Loading