From a1e30813761f460d07ed838cd8a65b37a8adf74a Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Thu, 26 Sep 2024 10:06:10 +0000
Subject: [PATCH] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .../pipeline_generator/pipeline_generator.py  | 64 +++++++++--------
 scripts/pipeline_generator/plugin.py          |  1 +
 scripts/pipeline_generator/step.py            |  5 +-
 .../test_pipeline_generator.py                | 69 ++++++++++++-------
 4 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/scripts/pipeline_generator/pipeline_generator.py b/scripts/pipeline_generator/pipeline_generator.py
index 768b6b0..573f6ff 100644
--- a/scripts/pipeline_generator/pipeline_generator.py
+++ b/scripts/pipeline_generator/pipeline_generator.py
@@ -22,13 +22,14 @@
     get_multi_node_test_command,
 )
 from .step import (
-    TestStep, 
-    BuildkiteStep, 
-    BuildkiteBlockStep, 
-    get_block_step, 
+    TestStep,
+    BuildkiteStep,
+    BuildkiteBlockStep,
+    get_block_step,
     get_step_key
 )
 
+
 class PipelineGenerator:
     def __init__(self, run_all: bool, list_file_diff: List[str]):
         self.run_all = run_all
@@ -47,8 +48,8 @@ def step_should_run(self, step: TestStep) -> bool:
             return False
         if not step.source_file_dependencies or self.run_all:
             return True
-        return any(source_file in diff_file 
-                   for source_file in step.source_file_dependencies 
+        return any(source_file in diff_file
+                   for source_file in step.source_file_dependencies
                    for diff_file in self.list_file_diff)
 
     def process_step(self, step: TestStep) -> List[Union[BuildkiteStep, BuildkiteBlockStep]]:
@@ -70,21 +71,25 @@ def generate_build_step(self) -> BuildkiteStep:
         build_commands = self.get_build_commands(docker_image)
 
         return BuildkiteStep(
-            label=":docker: build image", 
-            key="build", 
-            agents={"queue": AgentQueue.AWS_CPU.value}, 
-            env={"DOCKER_BUILDKIT": "1"}, 
+            label=":docker: build image",
+            key="build",
+            agents={"queue": AgentQueue.AWS_CPU.value},
+            env={"DOCKER_BUILDKIT": "1"},
             retry={
                 "automatic": [
-                    {"exit_status": -1, "limit": 2}, 
+                    {"exit_status": -1, "limit": 2},
                     {"exit_status": -10, "limit": 2}
                 ]
-            }, 
+            },
             commands=build_commands,
             depends_on=None,
         )
-    
-    def write_buildkite_steps(self, buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]], output_file_path: str) -> None:
+
+    def write_buildkite_steps(
+            self,
+            buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]],
+            output_file_path: str
+            ) -> None:
         """Output the buildkite steps to the Buildkite pipeline yaml file."""
         buildkite_steps_dict = {"steps": [step.dict(exclude_none=True) for step in buildkite_steps]}
         with open(output_file_path, "w") as f:
@@ -119,8 +124,8 @@ def get_plugin_config(self, step: TestStep) -> Dict:
 
     def create_buildkite_step(self, step: TestStep) -> BuildkiteStep:
         buildkite_step = BuildkiteStep(
-            label=step.label, 
-            key=get_step_key(step.label), 
+            label=step.label,
+            key=get_step_key(step.label),
             parallelism=step.parallelism,
             soft_fail=step.soft_fail,
             plugins=[self.get_plugin_config(step)],
@@ -132,10 +137,10 @@ def create_buildkite_step(self, step: TestStep) -> BuildkiteStep:
 
     def _configure_multi_node_step(self, current_step: BuildkiteStep, step: TestStep):
         current_step.commands = [get_multi_node_test_command(
-                step.commands, 
-                step.working_dir, 
-                step.num_nodes, 
-                step.num_gpus, 
+                step.commands,
+                step.working_dir,
+                step.num_nodes,
+                step.num_gpus,
                 f"{VLLM_ECR_REPO}:{self.commit}"
             )
         ]
@@ -190,22 +195,23 @@ def _mirror_amd_test_steps(self, test_steps: List[TestStep]) -> List[BuildkiteSt
             if test_step.mirror_hardwares and "amd" in test_step.mirror_hardwares:
                 test_commands = [test_step.command] if test_step.command else test_step.commands
                 amd_test_command = [
-                    "bash", 
-                    ".buildkite/run-amd-test.sh", 
+                    "bash",
+                    ".buildkite/run-amd-test.sh",
                     f"'{get_full_test_command(test_commands, test_step.working_dir)}'",
                 ]
                 mirrored_buildkite_step = BuildkiteStep(
-                    label = f"AMD: {test_step.label}",
-                    key = f"amd_{get_step_key(test_step.label)}",
-                    depends_on = "amd-build",
-                    agents = {"queue": AgentQueue.AMD_GPU.value},
-                    soft_fail = test_step.soft_fail,
-                    env = {"DOCKER_BUILDKIT": "1"},
-                    commands = [" ".join(amd_test_command)],
+                    label=f"AMD: {test_step.label}",
+                    key=f"amd_{get_step_key(test_step.label)}",
+                    depends_on="amd-build",
+                    agents={"queue": AgentQueue.AMD_GPU.value},
+                    soft_fail=test_step.soft_fail,
+                    env={"DOCKER_BUILDKIT": "1"},
+                    commands=[" ".join(amd_test_command)],
                 )
                 mirrored_buildkite_steps.append(mirrored_buildkite_step)
         return mirrored_buildkite_steps
 
+
 @click.command()
 @click.option("--run_all", type=str)
 @click.option("--list_file_diff", type=str)
diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py
index d36c0a2..3a79e0e 100644
--- a/scripts/pipeline_generator/plugin.py
+++ b/scripts/pipeline_generator/plugin.py
@@ -39,6 +39,7 @@
 ]
 DEFAULT_KUBERNETES_NODE_SELECTOR = {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}
 
+
 class DockerPluginConfig(BaseModel):
     """
     Configuration for Docker plugin running in a Buildkite step.
diff --git a/scripts/pipeline_generator/step.py b/scripts/pipeline_generator/step.py
index 07b0d29..bdf34ee 100644
--- a/scripts/pipeline_generator/step.py
+++ b/scripts/pipeline_generator/step.py
@@ -1,10 +1,11 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
 
 from .utils import AgentQueue
 
 BUILD_STEP_KEY = "build"
 
+
 class TestStep(BaseModel):
     """This class represents a test step defined in the test configuration file."""
     label: str
@@ -22,6 +23,7 @@ class TestStep(BaseModel):
     command: Optional[str] = None
     commands: Optional[List[str]] = None
 
+
 class BuildkiteStep(BaseModel):
     """This class represents a step in Buildkite format."""
     label: str
@@ -35,6 +37,7 @@ class BuildkiteStep(BaseModel):
     env: Optional[Dict[str, str]] = None
     retry: Optional[Dict[str, Any]] = None
 
+
 class BuildkiteBlockStep(BaseModel):
     """This class represents a block step in Buildkite format."""
     block: str
diff --git a/scripts/tests/pipeline_generator/test_pipeline_generator.py b/scripts/tests/pipeline_generator/test_pipeline_generator.py
index a405bb6..40236c2 100644
--- a/scripts/tests/pipeline_generator/test_pipeline_generator.py
+++ b/scripts/tests/pipeline_generator/test_pipeline_generator.py
@@ -7,11 +7,17 @@
 from scripts.pipeline_generator.step import TestStep, BuildkiteStep, BuildkiteBlockStep
 from scripts.pipeline_generator.utils import (
     AgentQueue,
-    get_full_test_command,
     VLLM_ECR_REPO,
     MULTI_NODE_TEST_SCRIPT,
 )
-from scripts.pipeline_generator.plugin import DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, DEFAULT_DOCKER_VOLUMES, DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, DEFAULT_KUBERNETES_NODE_SELECTOR, DEFAULT_KUBERNETES_POD_VOLUMES
+from scripts.pipeline_generator.plugin import (
+    DEFAULT_DOCKER_ENVIRONMENT_VARIBLES,
+    DEFAULT_DOCKER_VOLUMES,
+    DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS,
+    DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES,
+    DEFAULT_KUBERNETES_NODE_SELECTOR,
+    DEFAULT_KUBERNETES_POD_VOLUMES,
+)
 
 TEST_COMMIT = "123456789abcdef123456789abcdef123456789a"
 TEST_FILE_PATH = "scripts/tests/pipeline_generator/tests.yaml"
@@ -35,7 +41,7 @@ def test_read_test_steps():
     assert steps[2].num_gpus == 2
     assert steps[2].num_nodes == 2
     assert steps[3].gpu == "a100"
-    assert steps[3].optional == True
+    assert steps[3].optional is True
 
 
 @pytest.mark.parametrize(
@@ -96,7 +102,6 @@ def test_read_test_steps():
 )
 def test_get_plugin_config(test_step, expected_plugin_config):
     pipeline_generator = get_test_pipeline_generator()
-    container_image_path = f"{VLLM_ECR_REPO}:{TEST_COMMIT}"
 
     plugin_config = pipeline_generator.get_plugin_config(test_step)
     assert plugin_config == expected_plugin_config
@@ -153,7 +158,7 @@ def test_get_plugin_config(test_step, expected_plugin_config):
                             "podSpec": {
                                 "containers": [
                                     {
-                                        "image": "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:123456789abcdef123456789abcdef123456789a",
+                                        "image": f"{VLLM_ECR_REPO}:{TEST_COMMIT}",
                                         "command": [
                                             'bash -c "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2"'
                                         ],
@@ -184,9 +189,11 @@ def test_get_plugin_config(test_step, expected_plugin_config):
                 label="Test 2",
                 key="test-2",
                 agents={"queue": AgentQueue.AWS_4xL4.value},
-                commands=[f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'"],
+                commands=[
+                    f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'"
+                ],
             ),
-        )
+        ),
     ],
 )
 def test_create_buildkite_step(test_step, expected_buildkite_step):
@@ -195,6 +202,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
     buildkite_step = pipeline_generator.create_buildkite_step(test_step)
     assert buildkite_step == expected_buildkite_step
 
+
 @pytest.mark.parametrize(
     ("test_step", "expected_value_without_runall", "expected_value_with_runall"),
     [
@@ -205,7 +213,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
                 commands=["test command 1", "test command 2"],
             ),
             True,
-            True
+            True,
         ),
         (
             TestStep(
@@ -213,7 +221,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
                 commands=["test command 1", "test command 2"],
             ),
             True,
-            True
+            True,
         ),
         (
             TestStep(
@@ -222,7 +230,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
                 commands=["test command 1", "test command 2"],
             ),
             False,
-            True
+            True,
         ),
         (
             TestStep(
@@ -233,19 +241,24 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
                 num_gpus=4,
             ),
             False,
-            False
+            False,
         ),
     ],
 )
-def test_step_should_run(test_step, expected_value_without_runall, expected_value_with_runall):
+def test_step_should_run(
+    test_step, expected_value_without_runall, expected_value_with_runall
+):
     pipeline_generator = get_test_pipeline_generator()
     pipeline_generator.list_file_diff = ["dir1/a.py", "dir3/file2.py"]
-    assert pipeline_generator.step_should_run(test_step) == expected_value_without_runall
+    assert (
+        pipeline_generator.step_should_run(test_step) == expected_value_without_runall
+    )
 
-    # With run_all 
+    # With run_all
     pipeline_generator.run_all = True
     assert pipeline_generator.step_should_run(test_step) == expected_value_with_runall
 
+
 @pytest.mark.parametrize(
     ("test_step", "expected_buildkite_steps"),
     [
@@ -279,7 +292,7 @@ def test_step_should_run(test_step, expected_value_without_runall, expected_valu
                         }
                     ],
                 ),
-            ]
+            ],
         ),
         # Test doesn't automatically run because dependencies are not matched -> with block step
         (
@@ -314,34 +327,38 @@ def test_step_should_run(test_step, expected_value_without_runall, expected_valu
                         }
                     ],
                 ),
-            ]
-        )
-    ]
+            ],
+        ),
+    ],
 )
 def test_process_step(test_step, expected_buildkite_steps):
     pipeline_generator = get_test_pipeline_generator()
     buildkite_steps = pipeline_generator.process_step(test_step)
     assert buildkite_steps == expected_buildkite_steps
 
+
 def test_generate_build_step():
     pipeline_generator = get_test_pipeline_generator()
-    pipeline_generator.get_build_commands = mock.MagicMock(return_value=["build command 1", "build command 2"])
+    pipeline_generator.get_build_commands = mock.MagicMock(
+        return_value=["build command 1", "build command 2"]
+    )
     build_step = pipeline_generator.generate_build_step()
     expected_build_step = BuildkiteStep(
-        label=":docker: build image", 
-        key="build", 
-        agents={"queue": AgentQueue.AWS_CPU.value}, 
-        env={"DOCKER_BUILDKIT": "1"}, 
+        label=":docker: build image",
+        key="build",
+        agents={"queue": AgentQueue.AWS_CPU.value},
+        env={"DOCKER_BUILDKIT": "1"},
         retry={
             "automatic": [
-                {"exit_status": -1, "limit": 2}, 
-                {"exit_status": -10, "limit": 2}
+                {"exit_status": -1, "limit": 2},
+                {"exit_status": -10, "limit": 2},
             ]
-        }, 
+        },
         commands=["build command 1", "build command 2"],
         depends_on=None,
     )
     assert build_step == expected_build_step
 
+
 if __name__ == "__main__":
     sys.exit(pytest.main(["-v", __file__]))