requirements-dev.txt and workflow patches (#255)

SUMMARY: * update `requirements-dev.txt` to address `urllib` dependency * update "build test" only be reusable * update "benchmark" workflow to disambiguate artifacts TEST PLAN: runs on remote push --------- Co-authored-by: Michael Goin <[email protected]> Co-authored-by: dhuangnm <[email protected]> Co-authored-by: andy-neuma <[email protected]> Co-authored-by: Domenic Barbuzzi <[email protected]>
neuralmagic · May 22, 2024 · a10b831 · a10b831 · github-actions · May 23, 2024
1 parent 93183d6
commit a10b831
Show file tree

Hide file tree

Showing 11 changed files with 117 additions and 65 deletions.
diff --git a/.github/actions/nm-copy-benchmark-data-to-efs/action.yml b/.github/actions/nm-copy-benchmark-data-to-efs/action.yml
@@ -1,33 +1,33 @@
-name: copy benchmark data to EFS 
+name: copy benchmark data to EFS
 description: "Copies the given input folder to EFS under a well-defined directory structure.
               The directory structure is as follows,
               date/github_event/label/branch/commit-hash/github_run_id/.
               The directory structure is created if it doesn't exist"
 inputs:
   label:
     description: "requested runner label (specifies instance)"
-    type: string
+    required: true
+  python:
+    description: 'python version, e.g. 3.10.12'
     required: true
   src:
     description: "Src benchmark folder to copy"
-    type: string
     required: true
   efs_dst:
     description: "Destination EFS path to copy the src folder to"
-    type: string
     required: true
 
 runs:
   using: composite
   steps:
-  - id: copy_benchmark_data_to_efs 
+  - id: copy_benchmark_data_to_efs
     run: |
       echo "event name ${{ github.event_name }}"
       echo "ref ${{ github.ref }}"
       echo "sha ${{ github.sha }}"
       echo "run ${{ github.run_id }}"
       SUCCESS=0
-      ./.github/scripts/nm-store-benchmark-data.sh -i ${{ inputs.src }}  -o ${{ inputs.efs_dst }} -l ${{ inputs.label }} -e ${{ github.event_name }} -b ${{ github.ref }} -c ${{ github.sha }} -r ${{ github.run_id }} || SUCCESS=$?
+      ./.github/scripts/nm-store-benchmark-data.sh -i ${{ inputs.src }}  -o ${{ inputs.efs_dst }} -l ${{ inputs.label }} -p ${{ inputs.python }} -e ${{ github.event_name }} -b ${{ github.ref }} -c ${{ github.sha }} -r ${{ github.run_id }} || SUCCESS=$?
       echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
       exit ${SUCCESS}
     shell: bash
diff --git a/.github/scripts/nm-store-benchmark-data.sh b/.github/scripts/nm-store-benchmark-data.sh
@@ -17,6 +17,7 @@ usage() {
     echo "  -o    - path to the destination-root"
     echo "  -e    - github event name"
     echo "  -l    - github instance label"
+    echo "  -p    - python version"
     echo "  -b    - github branch name"
     echo "  -c    - github commit hash"
     echo "  -r    - github run id"
@@ -28,11 +29,12 @@ INPUT_PATH=""
 OUTPUT_PATH=""
 GITHUB_EVENT_NAME=""
 GITHUB_LABEL=""
+PYTHON_VERSION=""
 GITHUB_BRANCH=""
 GITHUB_COMMIT=""
 GITHUB_RUN_ID=""
 
-while getopts "hi:o:e:l:b:c:r:" OPT; do
+while getopts "hi:o:e:l:p:b:c:r:" OPT; do
     case "${OPT}" in
         h)
             usage
@@ -50,6 +52,9 @@ while getopts "hi:o:e:l:b:c:r:" OPT; do
         l)
             GITHUB_LABEL="${OPTARG}"
             ;;
+        p)
+            PYTHON_VERSION="${OPTARG}"
+            ;;
         b)
             GITHUB_BRANCH="${OPTARG}"
             ;;
@@ -68,12 +73,13 @@ echo "INPUT_PATH : ${INPUT_PATH}"
 echo "OUTPUT_PATH : ${OUTPUT_PATH}"
 echo "GITHUB_EVENT_NAME : ${GITHUB_EVENT_NAME}"
 echo "GITHUB_LABEL : ${GITHUB_LABEL}"
+echo "PYTHON VERSION: ${PYTHON_VERSION}"
 echo "GITHUB_BRANCH : ${GITHUB_BRANCH}"
 echo "GITHUB_COMMIT : ${GITHUB_COMMIT}"
 echo "GITHUB_RUN_ID : ${GITHUB_RUN_ID}"
 
 # Make sure we have all the information to construct a correct path
-if [[ "${INPUT_PATH}" == "" || "${OUTPUT_PATH}" == "" || "${GITHUB_EVENT_NAME}" == "" || "${GITHUB_LABEL}" == "" || "${GITHUB_BRANCH}" == "" || "${GITHUB_COMMIT}" == "" || "${GITHUB_RUN_ID}" == "" ]];
+if [[ "${INPUT_PATH}" == "" || "${OUTPUT_PATH}" == "" || "${GITHUB_EVENT_NAME}" == "" || "${GITHUB_LABEL}" == "" || "${PYTHON_VERSION}" == "" || "${GITHUB_BRANCH}" == "" || "${GITHUB_COMMIT}" == "" || "${GITHUB_RUN_ID}" == "" ]];
 then
   echo "Error : Incomplete arg list - Atleast one of the arguments is an empty string"
   exit 1
@@ -86,7 +92,7 @@ GITHUB_COMMIT=${GITHUB_COMMIT:0:7}
 # Get today's date
 TODAY=`date '+%Y-%m-%d'`
 
-DESTINATION_DIR=${OUTPUT_PATH}/${TODAY}/${GITHUB_EVENT_NAME}/${GITHUB_LABEL}/${GITHUB_BRANCH}/${GITHUB_COMMIT}/${GITHUB_RUN_ID}
+DESTINATION_DIR=${OUTPUT_PATH}/${TODAY}/${GITHUB_EVENT_NAME}/${GITHUB_LABEL}/${PYTHON_VERSION}/${GITHUB_BRANCH}/${GITHUB_COMMIT}/${GITHUB_RUN_ID}
 echo "Destination DIR : ${DESTINATION_DIR}"
 
 # Create destination dir
@@ -102,6 +108,6 @@ then
   exit 1
 fi
 # Tar file
-tar -cvf ${DESTINATION_TAR} ${INPUT_PATH} 
+tar -cvf ${DESTINATION_TAR} ${INPUT_PATH}
 
 exit 0
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -67,50 +67,6 @@ on:
         type: string
         default: "false"
 
-  # makes workflow manually callable
-  workflow_dispatch:
-    inputs:
-      build_label:
-        description: "requested runner label (specifies instance)"
-        type: string
-        required: true
-      build_timeout:
-        description: "time limit for build in minutes "
-        type: string
-        required: true
-      test_label_solo:
-        description: "requested runner label (specifies instance)"
-        type: string
-        required: true
-      test_label_multi:
-        description: "requested runner label (specifies instance)"
-        type: string
-        required: true
-      test_timeout:
-        description: "time limit for test run in minutes "
-        type: string
-        required: true
-      gitref:
-        description: "git commit hash or branch name"
-        type: string
-        required: true
-      Gi_per_thread:
-        description: 'requested GiB to reserve per thread'
-        type: string
-        required: true
-      nvcc_threads:
-        description: "number of threads nvcc build threads"
-        type: string
-        required: true
-      python:
-        description: "python version, e.g. 3.10.12"
-        type: string
-        required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
-        type: string
-        required: true
-
 jobs:
 
     BUILD:

diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
@@ -135,7 +135,7 @@ jobs:
         if: success()
         uses: actions/upload-artifact@v4
         with:
-          name: ${{ github.run_id }}-${{ inputs.label }}
+          name: ${{ github.run_id }}-${{ inputs.label }}-${{ inputs.python }}
           path: benchmark-results
           retention-days: 2
 
@@ -144,6 +144,7 @@ jobs:
         uses: ./.github/actions/nm-copy-benchmark-data-to-efs
         with:
           label: ${{ inputs.label }}
+          python: ${{ inputs.python }}
           src: benchmark-results
           efs_dst: /EFS/benchmark_results
 
@@ -164,7 +165,7 @@ jobs:
       - name: set gh action benchmark input artifact name
         id: set_gh_action_benchmark_input_artifact_name
         run: |
-          GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME=`echo "gh_action_benchmark_jsons-${{ github.run_id }}-${{ inputs.label }}"`
+          GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME=`echo "gh_action_benchmark_jsons-${{ github.run_id }}-${{ inputs.label }}-${{ inputs.python }}"`
           echo "gh_action_benchmark_input_artifact_name=$GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME" >> $GITHUB_OUTPUT
 
       - name: store gh action benchmark input artifacts
@@ -180,6 +181,7 @@ jobs:
         uses: ./.github/actions/nm-copy-benchmark-data-to-efs
         with:
           label: ${{ inputs.label }}
+          python: ${{ inputs.python }}
           src: gh-action-benchmark-jsons
           efs_dst: /EFS/benchmark_results
 

diff --git a/.github/workflows/remote-push.yml b/.github/workflows/remote-push.yml
@@ -11,7 +11,23 @@ concurrency:
 
 jobs:
 
-    BUILD-TEST:
+    BUILD-TEST-3-8:
+        uses: ./.github/workflows/build-test.yml
+        with:
+            python: 3.8.17
+            gitref: ${{ github.ref }}
+
+            test_label_solo: aws-avx2-32G-a10g-24G
+            test_label_multi: ignore
+            test_timeout: 480
+            test_skip_list: neuralmagic/tests/skip-for-remote-push.txt
+
+            benchmark_label: aws-avx2-32G-a10g-24G
+            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
+            benchmark_timeout: 480
+        secrets: inherit
+
+    BUILD-TEST-3-10:
         uses: ./.github/workflows/build-test.yml
         with:
             python: 3.10.12
@@ -24,5 +40,5 @@ jobs:
 
             benchmark_label: aws-avx2-32G-a10g-24G
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
-            benchmark_timeout: 180
+            benchmark_timeout: 480
         secrets: inherit
diff --git a/neuralmagic/tests/skip-for-remote-push-tmp.txt b/neuralmagic/tests/skip-for-remote-push-tmp.txt
@@ -0,0 +1,53 @@
+tests/test_sequence.py
+tests/metrics/test_metrics.py
+tests/kernels/test_prefix_prefill.py
+tests/kernels/test_pos_encoding.py
+tests/kernels/test_activation.py
+tests/kernels/test_moe.py
+tests/kernels/test_layernorm.py
+tests/kernels/test_attention.py
+tests/core/test_block_manager.py
+tests/distributed/test_basic_distributed_correctness.py
+tests/distributed/test_chunked_prefill_distributed.py
+tests/distributed/test_comm_ops.py
+tests/distributed/test_custom_all_reduce.py
+tests/distributed/test_pynccl_library.py
+tests/distributed/test_pynccl.py
+tests/prefix_caching/test_prefix_caching.py
+tests/models/test_models_logprobs.py
+tests/models/test_models.py
+tests/spec_decode/test_utils.py
+tests/spec_decode/test_multi_step_worker.py
+tests/spec_decode/test_spec_decode_worker.py
+tests/spec_decode/test_batch_expansion.py
+tests/spec_decode/test_metrics.py
+tests/spec_decode/test_ngram_worker.py
+tests/spec_decode/e2e/test_logprobs.py
+tests/spec_decode/e2e/test_ngram_correctness.py
+tests/spec_decode/e2e/test_compatibility.py
+tests/spec_decode/e2e/test_multistep_correctness.py
+tests/spec_decode/test_metrics.py
+tests/test_sampling_params.py
+tests/async_engine/test_async_llm_engine.py
+tests/async_engine/test_chat_template.py
+tests/async_engine/test_request_tracker.py
+tests/samplers/test_logprobs.py
+tests/samplers/test_seeded_generate.py
+tests/samplers/test_rejection_sampler.py
+tests/samplers/test_sampler.py
+tests/entrypoints/test_guided_processors.py
+tests/entrypoints/test_openai_server.py
+tests/lora/test_utils.py
+tests/lora/test_tokenizer.py
+tests/lora/test_layer_variation.py
+tests/lora/test_gemma.py
+tests/lora/test_lora_manager.py
+tests/lora/test_worker.py
+tests/lora/test_mixtral.py
+tests/lora/test_punica.py
+tests/lora/test_lora.py
+tests/worker/test_model_runner.py
+tests/engine/test_detokenize.py
+tests/engine/test_computed_prefix_blocks.py
+tests/accuracy/test_lm_eval_correctness.py
+tests/tensorizer_loader/test_tensorizer.py
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -9,7 +9,7 @@ isort==5.13.2
 # type checking
 mypy==1.9.0
 types-PyYAML
-types-requests
+types-requests==2.31.0.2
 types-setuptools
 
 # testing
@@ -21,7 +21,7 @@ pytest-rerunfailures
 pytest-shard
 httpx
 einops # required for MPT
-requests
+requests==2.31
 ray
 peft
 awscli

diff --git a/tests/engine/test_multiproc_workers.py b/tests/engine/test_multiproc_workers.py
@@ -1,4 +1,6 @@
 import asyncio
+# UPSTREAM SYNC
+import sys
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from time import sleep
@@ -100,6 +102,11 @@ def execute_workers(worker_input: str) -> None:
 def test_local_workers_clean_shutdown() -> None:
     """Test clean shutdown"""
 
+    # UPSTREAM SYNC
+    pytest.mark.skipif(sys.version_info < (3, 10),
+                       reason="This test is inexplicably failing in CI "
+                       "on Python < 3.10")
+
     workers, worker_monitor = _start_workers()
 
     assert worker_monitor.is_alive()

diff --git a/tests/models/test_big_models.py b/tests/models/test_big_models.py
@@ -4,6 +4,9 @@
 
 Run `pytest tests/models/test_big_models.py`.
 """
+# UPSTREAM SYNC
+import sys
+
 import pytest
 
 MODELS = [
@@ -27,6 +30,11 @@
     "EleutherAI/gpt-j-6b",
 ]
 
+# UPSTREAM SYNC
+SKIPPED_MODELS_PY38 = [
+    "mosaicml/mpt-7b",
+]
+
 
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["half"])
@@ -45,6 +53,10 @@ def test_models(
     if model in SKIPPED_MODELS_OOM:
         pytest.skip(reason="These models cause OOM issue on the CPU"
                     "because it is a fp32 checkpoint.")
+    # UPSTREAM SYNC
+    if model in SKIPPED_MODELS_PY38 and sys.version_info < (3, 9):
+        pytest.skip(reason="This model has custom code that does not "
+                    "support Python 3.8")
 
     hf_model = hf_runner(model, dtype=dtype)
     hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)

diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
@@ -1,8 +1,8 @@
 import asyncio
 import time
 from functools import partial
-from typing import (Any, AsyncIterator, Callable, Dict, Iterable, List,
-                    Optional, Set, Tuple, Type, Union)
+from typing import (AsyncIterator, Callable, Dict, Iterable, List, Optional,
+                    Set, Tuple, Type, Union)
 
 from transformers import PreTrainedTokenizer
 
@@ -327,7 +327,7 @@ def __init__(self,
         # We need to keep a reference to unshielded
         # task as well to prevent it from being garbage
         # collected
-        self._background_loop_unshielded: Optional[asyncio.Task[Any]] = None
+        self._background_loop_unshielded: Optional[asyncio.Task] = None
         self.start_engine_loop = start_engine_loop
         self._errored_with: Optional[BaseException] = None
 

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
@@ -4,7 +4,7 @@
 import re
 from contextlib import asynccontextmanager
 from http import HTTPStatus
-from typing import Any, Set
+from typing import Set
 
 import fastapi
 import uvicorn
@@ -34,7 +34,7 @@
 openai_serving_completion: OpenAIServingCompletion
 logger = init_logger(__name__)
 
-_running_tasks: Set[asyncio.Task[Any]] = set()
+_running_tasks: Set[asyncio.Task] = set()
 
 
 @asynccontextmanager