From 26148120b3c05704409a425d017f0a51fca3b7cc Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Thu, 16 May 2024 22:58:25 -0500 Subject: [PATCH] [Build/CI] Extending the set of AMD tests with Regression, Basic Correctness, Distributed, Engine, Llava Tests (#4797) --- .buildkite/run-amd-test.sh | 11 ++++++----- .buildkite/test-pipeline.yaml | 18 +++++++++++++++--- .buildkite/test-template.j2 | 3 +-- tests/engine/test_stop_reason.py | 6 +++++- vllm/config.py | 10 +--------- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index ce508e4748aba..7452423479521 100644 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -1,4 +1,4 @@ -# This script build the ROCm docker image and runs test inside it. +# This script runs test inside the corresponding ROCm docker container. set -ex # Print ROCm version @@ -19,15 +19,16 @@ done echo "--- Building container" sha=$(git rev-parse --short HEAD) -container_name=rocm_${sha} +image_name=rocm_${sha} +container_name=rocm_${sha}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo) docker build \ - -t ${container_name} \ + -t ${image_name} \ -f Dockerfile.rocm \ --progress plain \ . remove_docker_container() { - docker rm -f ${container_name} || docker image rm -f ${container_name} || true + docker rm -f ${container_name} || docker image rm -f ${image_name} || true } trap remove_docker_container EXIT @@ -39,6 +40,6 @@ docker run \ --rm \ -e HF_TOKEN \ --name ${container_name} \ - ${container_name} \ + ${image_name} \ /bin/bash -c "${@}" diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index aa74672f4bf67..d9819881fbbfc 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -5,13 +5,16 @@ steps: - label: Regression Test + mirror_hardwares: [amd] command: pytest -v -s test_regression.py working_dir: "/vllm-workspace/tests" # optional - label: AsyncEngine Test + #mirror_hardwares: [amd] command: pytest -v -s async_engine - label: Basic Correctness Test + mirror_hardwares: [amd] commands: - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py @@ -24,14 +27,15 @@ steps: command: pytest -v -s core - label: Distributed Comm Ops Test + #mirror_hardwares: [amd] command: pytest -v -s distributed/test_comm_ops.py working_dir: "/vllm-workspace/tests" num_gpus: 2 - label: Distributed Tests + mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" num_gpus: 2 - mirror_hardwares: [amd] commands: - pytest -v -s distributed/test_pynccl_library.py - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py @@ -45,16 +49,18 @@ steps: - pytest -v -s spec_decode/e2e/test_integration_dist.py - label: Distributed Tests (Multiple Groups) + #mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" num_gpus: 4 commands: - pytest -v -s distributed/test_pynccl.py - label: Engine Test - #mirror_hardwares: [amd] + mirror_hardwares: [amd] command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py - label: Entrypoints Test + #mirror_hardwares: [amd] commands: # these tests have to be separated, because each one will allocate all posible GPU memory - pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py @@ -74,6 +80,7 @@ steps: - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors - label: Kernels Test %N + #mirror_hardwares: [amd] command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT parallelism: 4 @@ -84,7 +91,7 @@ steps: - pytest -v -s models --ignore=models/test_llava.py - label: Llava Test - #mirror_hardwares: [amd] + mirror_hardwares: [amd] commands: - bash ../.buildkite/download-images.sh - pytest -v -s models/test_llava.py @@ -95,6 +102,7 @@ steps: - pytest -v -s prefix_caching - label: Samplers Test + #mirror_hardwares: [amd] command: pytest -v -s samplers - label: LogitsProcessor Test @@ -110,16 +118,20 @@ steps: command: pytest -v -s spec_decode - label: LoRA Test %N + #mirror_hardwares: [amd] command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT parallelism: 4 - label: Tensorizer Test + #mirror_hardwares: [amd] command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader - label: Metrics Test + mirror_hardwares: [amd] command: pytest -v -s metrics - label: Quantization Test + #mirror_hardwares: [amd] command: pytest -v -s quantization - label: Benchmarks diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 174c756ae74a3..265833e2ccf6e 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -3,9 +3,8 @@ {% set default_working_dir = "/vllm-workspace/tests" %} steps: - - label: ":docker: build image" - commands: + commands: - "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ." - "docker push {{ docker_image }}" env: diff --git a/tests/engine/test_stop_reason.py b/tests/engine/test_stop_reason.py index b2f521a8ae4ce..7b886507c04f2 100644 --- a/tests/engine/test_stop_reason.py +++ b/tests/engine/test_stop_reason.py @@ -32,6 +32,7 @@ def test_stop_reason(vllm_model, example_prompts): # test stop token outputs = llm.generate(example_prompts, sampling_params=SamplingParams( + ignore_eos=True, seed=SEED, max_tokens=MAX_TOKENS, stop_token_ids=[stop_token_id])) @@ -43,7 +44,10 @@ def test_stop_reason(vllm_model, example_prompts): # test stop string outputs = llm.generate(example_prompts, sampling_params=SamplingParams( - seed=SEED, max_tokens=MAX_TOKENS, stop=".")) + ignore_eos=True, + seed=SEED, + max_tokens=MAX_TOKENS, + stop=".")) for output in outputs: output = output.outputs[0] assert output.finish_reason == "stop" diff --git a/vllm/config.py b/vllm/config.py index 77ce8c318d8f1..6be8f353aa389 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1060,7 +1060,7 @@ def get_image_input_enum_type( "bfloat16": torch.bfloat16, } -_ROCM_NOT_SUPPORTED_DTYPE = ["float", "float32"] +_ROCM_NOT_SUPPORTED_DTYPE: List[str] = [] # def _get_and_verify_dtype( @@ -1092,14 +1092,6 @@ def _get_and_verify_dtype( else: raise ValueError(f"Unknown dtype: {dtype}") - if is_hip() and torch_dtype == torch.float32: - rocm_supported_dtypes = [ - k for k, v in _STR_DTYPE_TO_TORCH_DTYPE.items() - if (k not in _ROCM_NOT_SUPPORTED_DTYPE) - ] - raise ValueError(f"dtype '{dtype}' is not supported in ROCm. " - f"Supported dtypes are {rocm_supported_dtypes}") - # Verify the dtype. if torch_dtype != config_dtype: if torch_dtype == torch.float32: