From a1e30813761f460d07ed838cd8a65b37a8adf74a Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 26 Sep 2024 10:06:10 +0000 Subject: [PATCH] p Signed-off-by: kevin --- .../pipeline_generator/pipeline_generator.py | 64 +++++++++-------- scripts/pipeline_generator/plugin.py | 1 + scripts/pipeline_generator/step.py | 5 +- .../test_pipeline_generator.py | 69 ++++++++++++------- 4 files changed, 83 insertions(+), 56 deletions(-) diff --git a/scripts/pipeline_generator/pipeline_generator.py b/scripts/pipeline_generator/pipeline_generator.py index 768b6b0..573f6ff 100644 --- a/scripts/pipeline_generator/pipeline_generator.py +++ b/scripts/pipeline_generator/pipeline_generator.py @@ -22,13 +22,14 @@ get_multi_node_test_command, ) from .step import ( - TestStep, - BuildkiteStep, - BuildkiteBlockStep, - get_block_step, + TestStep, + BuildkiteStep, + BuildkiteBlockStep, + get_block_step, get_step_key ) + class PipelineGenerator: def __init__(self, run_all: bool, list_file_diff: List[str]): self.run_all = run_all @@ -47,8 +48,8 @@ def step_should_run(self, step: TestStep) -> bool: return False if not step.source_file_dependencies or self.run_all: return True - return any(source_file in diff_file - for source_file in step.source_file_dependencies + return any(source_file in diff_file + for source_file in step.source_file_dependencies for diff_file in self.list_file_diff) def process_step(self, step: TestStep) -> List[Union[BuildkiteStep, BuildkiteBlockStep]]: @@ -70,21 +71,25 @@ def generate_build_step(self) -> BuildkiteStep: build_commands = self.get_build_commands(docker_image) return BuildkiteStep( - label=":docker: build image", - key="build", - agents={"queue": AgentQueue.AWS_CPU.value}, - env={"DOCKER_BUILDKIT": "1"}, + label=":docker: build image", + key="build", + agents={"queue": AgentQueue.AWS_CPU.value}, + env={"DOCKER_BUILDKIT": "1"}, retry={ "automatic": [ - {"exit_status": -1, "limit": 2}, + {"exit_status": -1, "limit": 2}, {"exit_status": -10, "limit": 2} ] - }, + }, commands=build_commands, depends_on=None, ) - - def write_buildkite_steps(self, buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]], output_file_path: str) -> None: + + def write_buildkite_steps( + self, + buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]], + output_file_path: str + ) -> None: """Output the buildkite steps to the Buildkite pipeline yaml file.""" buildkite_steps_dict = {"steps": [step.dict(exclude_none=True) for step in buildkite_steps]} with open(output_file_path, "w") as f: @@ -119,8 +124,8 @@ def get_plugin_config(self, step: TestStep) -> Dict: def create_buildkite_step(self, step: TestStep) -> BuildkiteStep: buildkite_step = BuildkiteStep( - label=step.label, - key=get_step_key(step.label), + label=step.label, + key=get_step_key(step.label), parallelism=step.parallelism, soft_fail=step.soft_fail, plugins=[self.get_plugin_config(step)], @@ -132,10 +137,10 @@ def create_buildkite_step(self, step: TestStep) -> BuildkiteStep: def _configure_multi_node_step(self, current_step: BuildkiteStep, step: TestStep): current_step.commands = [get_multi_node_test_command( - step.commands, - step.working_dir, - step.num_nodes, - step.num_gpus, + step.commands, + step.working_dir, + step.num_nodes, + step.num_gpus, f"{VLLM_ECR_REPO}:{self.commit}" ) ] @@ -190,22 +195,23 @@ def _mirror_amd_test_steps(self, test_steps: List[TestStep]) -> List[BuildkiteSt if test_step.mirror_hardwares and "amd" in test_step.mirror_hardwares: test_commands = [test_step.command] if test_step.command else test_step.commands amd_test_command = [ - "bash", - ".buildkite/run-amd-test.sh", + "bash", + ".buildkite/run-amd-test.sh", f"'{get_full_test_command(test_commands, test_step.working_dir)}'", ] mirrored_buildkite_step = BuildkiteStep( - label = f"AMD: {test_step.label}", - key = f"amd_{get_step_key(test_step.label)}", - depends_on = "amd-build", - agents = {"queue": AgentQueue.AMD_GPU.value}, - soft_fail = test_step.soft_fail, - env = {"DOCKER_BUILDKIT": "1"}, - commands = [" ".join(amd_test_command)], + label=f"AMD: {test_step.label}", + key=f"amd_{get_step_key(test_step.label)}", + depends_on="amd-build", + agents={"queue": AgentQueue.AMD_GPU.value}, + soft_fail=test_step.soft_fail, + env={"DOCKER_BUILDKIT": "1"}, + commands=[" ".join(amd_test_command)], ) mirrored_buildkite_steps.append(mirrored_buildkite_step) return mirrored_buildkite_steps + @click.command() @click.option("--run_all", type=str) @click.option("--list_file_diff", type=str) diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py index d36c0a2..3a79e0e 100644 --- a/scripts/pipeline_generator/plugin.py +++ b/scripts/pipeline_generator/plugin.py @@ -39,6 +39,7 @@ ] DEFAULT_KUBERNETES_NODE_SELECTOR = {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"} + class DockerPluginConfig(BaseModel): """ Configuration for Docker plugin running in a Buildkite step. diff --git a/scripts/pipeline_generator/step.py b/scripts/pipeline_generator/step.py index 07b0d29..bdf34ee 100644 --- a/scripts/pipeline_generator/step.py +++ b/scripts/pipeline_generator/step.py @@ -1,10 +1,11 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel from typing import List, Dict, Any, Optional from .utils import AgentQueue BUILD_STEP_KEY = "build" + class TestStep(BaseModel): """This class represents a test step defined in the test configuration file.""" label: str @@ -22,6 +23,7 @@ class TestStep(BaseModel): command: Optional[str] = None commands: Optional[List[str]] = None + class BuildkiteStep(BaseModel): """This class represents a step in Buildkite format.""" label: str @@ -35,6 +37,7 @@ class BuildkiteStep(BaseModel): env: Optional[Dict[str, str]] = None retry: Optional[Dict[str, Any]] = None + class BuildkiteBlockStep(BaseModel): """This class represents a block step in Buildkite format.""" block: str diff --git a/scripts/tests/pipeline_generator/test_pipeline_generator.py b/scripts/tests/pipeline_generator/test_pipeline_generator.py index a405bb6..40236c2 100644 --- a/scripts/tests/pipeline_generator/test_pipeline_generator.py +++ b/scripts/tests/pipeline_generator/test_pipeline_generator.py @@ -7,11 +7,17 @@ from scripts.pipeline_generator.step import TestStep, BuildkiteStep, BuildkiteBlockStep from scripts.pipeline_generator.utils import ( AgentQueue, - get_full_test_command, VLLM_ECR_REPO, MULTI_NODE_TEST_SCRIPT, ) -from scripts.pipeline_generator.plugin import DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, DEFAULT_DOCKER_VOLUMES, DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, DEFAULT_KUBERNETES_NODE_SELECTOR, DEFAULT_KUBERNETES_POD_VOLUMES +from scripts.pipeline_generator.plugin import ( + DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, + DEFAULT_DOCKER_VOLUMES, + DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, + DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, + DEFAULT_KUBERNETES_NODE_SELECTOR, + DEFAULT_KUBERNETES_POD_VOLUMES, +) TEST_COMMIT = "123456789abcdef123456789abcdef123456789a" TEST_FILE_PATH = "scripts/tests/pipeline_generator/tests.yaml" @@ -35,7 +41,7 @@ def test_read_test_steps(): assert steps[2].num_gpus == 2 assert steps[2].num_nodes == 2 assert steps[3].gpu == "a100" - assert steps[3].optional == True + assert steps[3].optional is True @pytest.mark.parametrize( @@ -96,7 +102,6 @@ def test_read_test_steps(): ) def test_get_plugin_config(test_step, expected_plugin_config): pipeline_generator = get_test_pipeline_generator() - container_image_path = f"{VLLM_ECR_REPO}:{TEST_COMMIT}" plugin_config = pipeline_generator.get_plugin_config(test_step) assert plugin_config == expected_plugin_config @@ -153,7 +158,7 @@ def test_get_plugin_config(test_step, expected_plugin_config): "podSpec": { "containers": [ { - "image": "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:123456789abcdef123456789abcdef123456789a", + "image": f"{VLLM_ECR_REPO}:{TEST_COMMIT}", "command": [ 'bash -c "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2"' ], @@ -184,9 +189,11 @@ def test_get_plugin_config(test_step, expected_plugin_config): label="Test 2", key="test-2", agents={"queue": AgentQueue.AWS_4xL4.value}, - commands=[f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'"], + commands=[ + f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'" + ], ), - ) + ), ], ) def test_create_buildkite_step(test_step, expected_buildkite_step): @@ -195,6 +202,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step): buildkite_step = pipeline_generator.create_buildkite_step(test_step) assert buildkite_step == expected_buildkite_step + @pytest.mark.parametrize( ("test_step", "expected_value_without_runall", "expected_value_with_runall"), [ @@ -205,7 +213,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step): commands=["test command 1", "test command 2"], ), True, - True + True, ), ( TestStep( @@ -213,7 +221,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step): commands=["test command 1", "test command 2"], ), True, - True + True, ), ( TestStep( @@ -222,7 +230,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step): commands=["test command 1", "test command 2"], ), False, - True + True, ), ( TestStep( @@ -233,19 +241,24 @@ def test_create_buildkite_step(test_step, expected_buildkite_step): num_gpus=4, ), False, - False + False, ), ], ) -def test_step_should_run(test_step, expected_value_without_runall, expected_value_with_runall): +def test_step_should_run( + test_step, expected_value_without_runall, expected_value_with_runall +): pipeline_generator = get_test_pipeline_generator() pipeline_generator.list_file_diff = ["dir1/a.py", "dir3/file2.py"] - assert pipeline_generator.step_should_run(test_step) == expected_value_without_runall + assert ( + pipeline_generator.step_should_run(test_step) == expected_value_without_runall + ) - # With run_all + # With run_all pipeline_generator.run_all = True assert pipeline_generator.step_should_run(test_step) == expected_value_with_runall + @pytest.mark.parametrize( ("test_step", "expected_buildkite_steps"), [ @@ -279,7 +292,7 @@ def test_step_should_run(test_step, expected_value_without_runall, expected_valu } ], ), - ] + ], ), # Test doesn't automatically run because dependencies are not matched -> with block step ( @@ -314,34 +327,38 @@ def test_step_should_run(test_step, expected_value_without_runall, expected_valu } ], ), - ] - ) - ] + ], + ), + ], ) def test_process_step(test_step, expected_buildkite_steps): pipeline_generator = get_test_pipeline_generator() buildkite_steps = pipeline_generator.process_step(test_step) assert buildkite_steps == expected_buildkite_steps + def test_generate_build_step(): pipeline_generator = get_test_pipeline_generator() - pipeline_generator.get_build_commands = mock.MagicMock(return_value=["build command 1", "build command 2"]) + pipeline_generator.get_build_commands = mock.MagicMock( + return_value=["build command 1", "build command 2"] + ) build_step = pipeline_generator.generate_build_step() expected_build_step = BuildkiteStep( - label=":docker: build image", - key="build", - agents={"queue": AgentQueue.AWS_CPU.value}, - env={"DOCKER_BUILDKIT": "1"}, + label=":docker: build image", + key="build", + agents={"queue": AgentQueue.AWS_CPU.value}, + env={"DOCKER_BUILDKIT": "1"}, retry={ "automatic": [ - {"exit_status": -1, "limit": 2}, - {"exit_status": -10, "limit": 2} + {"exit_status": -1, "limit": 2}, + {"exit_status": -10, "limit": 2}, ] - }, + }, commands=["build command 1", "build command 2"], depends_on=None, ) assert build_step == expected_build_step + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__]))