From 26148120b3c05704409a425d017f0a51fca3b7cc Mon Sep 17 00:00:00 2001
From: Alexei-V-Ivanov-AMD
 <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com>
Date: Thu, 16 May 2024 22:58:25 -0500
Subject: [PATCH] [Build/CI] Extending the set of AMD tests with Regression,
 Basic Correctness, Distributed, Engine, Llava Tests (#4797)

---
 .buildkite/run-amd-test.sh       | 11 ++++++-----
 .buildkite/test-pipeline.yaml    | 18 +++++++++++++++---
 .buildkite/test-template.j2      |  3 +--
 tests/engine/test_stop_reason.py |  6 +++++-
 vllm/config.py                   | 10 +---------
 5 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh
index ce508e4748aba..7452423479521 100644
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
@@ -1,4 +1,4 @@
-# This script build the ROCm docker image and runs test inside it.
+# This script runs test inside the corresponding ROCm docker container.
 set -ex
 
 # Print ROCm version
@@ -19,15 +19,16 @@ done
 
 echo "--- Building container"
 sha=$(git rev-parse --short HEAD)
-container_name=rocm_${sha}
+image_name=rocm_${sha}
+container_name=rocm_${sha}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)
 docker build \
-        -t ${container_name} \
+        -t ${image_name} \
         -f Dockerfile.rocm \
         --progress plain \
         .
 
 remove_docker_container() {
-   docker rm -f ${container_name} || docker image rm -f ${container_name} || true
+   docker rm -f ${container_name} || docker image rm -f ${image_name} || true
 }
 trap remove_docker_container EXIT
 
@@ -39,6 +40,6 @@ docker run \
         --rm \
         -e HF_TOKEN \
         --name ${container_name} \
-        ${container_name} \
+        ${image_name} \
         /bin/bash -c "${@}"
 
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index aa74672f4bf67..d9819881fbbfc 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -5,13 +5,16 @@
 
 steps:
 - label: Regression Test
+  mirror_hardwares: [amd]
   command: pytest -v -s test_regression.py
   working_dir: "/vllm-workspace/tests" # optional
 
 - label: AsyncEngine Test
+  #mirror_hardwares: [amd]
   command: pytest -v -s async_engine
 
 - label: Basic Correctness Test
+  mirror_hardwares: [amd]
   commands:
   - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
@@ -24,14 +27,15 @@ steps:
   command: pytest -v -s core
 
 - label: Distributed Comm Ops Test
+  #mirror_hardwares: [amd]
   command: pytest -v -s distributed/test_comm_ops.py
   working_dir: "/vllm-workspace/tests"
   num_gpus: 2
 
 - label: Distributed Tests
+  mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
   num_gpus: 2
-  mirror_hardwares: [amd]
   commands:
   - pytest -v -s distributed/test_pynccl_library.py
   - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
@@ -45,16 +49,18 @@ steps:
   - pytest -v -s spec_decode/e2e/test_integration_dist.py 
 
 - label: Distributed Tests (Multiple Groups)
+  #mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
   num_gpus: 4
   commands:
   - pytest -v -s distributed/test_pynccl.py
 
 - label: Engine Test
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amd]
   command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
 
 - label: Entrypoints Test
+  #mirror_hardwares: [amd]
   commands:
   # these tests have to be separated, because each one will allocate all posible GPU memory
   - pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
@@ -74,6 +80,7 @@ steps:
     - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
 
 - label: Kernels Test %N
+  #mirror_hardwares: [amd]
   command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 4
 
@@ -84,7 +91,7 @@ steps:
     - pytest -v -s models --ignore=models/test_llava.py
 
 - label: Llava Test
-  #mirror_hardwares: [amd]
+  mirror_hardwares: [amd]
   commands:
     - bash ../.buildkite/download-images.sh
     - pytest -v -s models/test_llava.py
@@ -95,6 +102,7 @@ steps:
     - pytest -v -s prefix_caching
 
 - label: Samplers Test
+  #mirror_hardwares: [amd]
   command: pytest -v -s samplers
 
 - label: LogitsProcessor Test
@@ -110,16 +118,20 @@ steps:
   command: pytest -v -s spec_decode
 
 - label: LoRA Test %N
+  #mirror_hardwares: [amd]
   command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 4
 
 - label: Tensorizer Test
+  #mirror_hardwares: [amd]
   command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
 
 - label: Metrics Test
+  mirror_hardwares: [amd]
   command: pytest -v -s metrics
 
 - label: Quantization Test
+  #mirror_hardwares: [amd]
   command: pytest -v -s quantization
 
 - label: Benchmarks
diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
index 174c756ae74a3..265833e2ccf6e 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -3,9 +3,8 @@
 {% set default_working_dir = "/vllm-workspace/tests" %}
 
 steps:
-
   - label: ":docker: build image"
-    commands:
+    commands: 
       - "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
       - "docker push {{ docker_image }}"
     env:
diff --git a/tests/engine/test_stop_reason.py b/tests/engine/test_stop_reason.py
index b2f521a8ae4ce..7b886507c04f2 100644
--- a/tests/engine/test_stop_reason.py
+++ b/tests/engine/test_stop_reason.py
@@ -32,6 +32,7 @@ def test_stop_reason(vllm_model, example_prompts):
     # test stop token
     outputs = llm.generate(example_prompts,
                            sampling_params=SamplingParams(
+                               ignore_eos=True,
                                seed=SEED,
                                max_tokens=MAX_TOKENS,
                                stop_token_ids=[stop_token_id]))
@@ -43,7 +44,10 @@ def test_stop_reason(vllm_model, example_prompts):
     # test stop string
     outputs = llm.generate(example_prompts,
                            sampling_params=SamplingParams(
-                               seed=SEED, max_tokens=MAX_TOKENS, stop="."))
+                               ignore_eos=True,
+                               seed=SEED,
+                               max_tokens=MAX_TOKENS,
+                               stop="."))
     for output in outputs:
         output = output.outputs[0]
         assert output.finish_reason == "stop"
diff --git a/vllm/config.py b/vllm/config.py
index 77ce8c318d8f1..6be8f353aa389 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1060,7 +1060,7 @@ def get_image_input_enum_type(
     "bfloat16": torch.bfloat16,
 }
 
-_ROCM_NOT_SUPPORTED_DTYPE = ["float", "float32"]
+_ROCM_NOT_SUPPORTED_DTYPE: List[str] = []  #
 
 
 def _get_and_verify_dtype(
@@ -1092,14 +1092,6 @@ def _get_and_verify_dtype(
     else:
         raise ValueError(f"Unknown dtype: {dtype}")
 
-    if is_hip() and torch_dtype == torch.float32:
-        rocm_supported_dtypes = [
-            k for k, v in _STR_DTYPE_TO_TORCH_DTYPE.items()
-            if (k not in _ROCM_NOT_SUPPORTED_DTYPE)
-        ]
-        raise ValueError(f"dtype '{dtype}' is not supported in ROCm. "
-                         f"Supported dtypes are {rocm_supported_dtypes}")
-
     # Verify the dtype.
     if torch_dtype != config_dtype:
         if torch_dtype == torch.float32: