Skip to content

Commit

Permalink
p
Browse files Browse the repository at this point in the history
Signed-off-by: kevin <[email protected]>
  • Loading branch information
khluu committed Sep 26, 2024
1 parent 081017a commit a1e3081
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 56 deletions.
64 changes: 35 additions & 29 deletions scripts/pipeline_generator/pipeline_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@
get_multi_node_test_command,
)
from .step import (
TestStep,
BuildkiteStep,
BuildkiteBlockStep,
get_block_step,
TestStep,
BuildkiteStep,
BuildkiteBlockStep,
get_block_step,
get_step_key
)


class PipelineGenerator:
def __init__(self, run_all: bool, list_file_diff: List[str]):
self.run_all = run_all
Expand All @@ -47,8 +48,8 @@ def step_should_run(self, step: TestStep) -> bool:
return False
if not step.source_file_dependencies or self.run_all:
return True
return any(source_file in diff_file
for source_file in step.source_file_dependencies
return any(source_file in diff_file
for source_file in step.source_file_dependencies
for diff_file in self.list_file_diff)

def process_step(self, step: TestStep) -> List[Union[BuildkiteStep, BuildkiteBlockStep]]:
Expand All @@ -70,21 +71,25 @@ def generate_build_step(self) -> BuildkiteStep:
build_commands = self.get_build_commands(docker_image)

return BuildkiteStep(
label=":docker: build image",
key="build",
agents={"queue": AgentQueue.AWS_CPU.value},
env={"DOCKER_BUILDKIT": "1"},
label=":docker: build image",
key="build",
agents={"queue": AgentQueue.AWS_CPU.value},
env={"DOCKER_BUILDKIT": "1"},
retry={
"automatic": [
{"exit_status": -1, "limit": 2},
{"exit_status": -1, "limit": 2},
{"exit_status": -10, "limit": 2}
]
},
},
commands=build_commands,
depends_on=None,
)

def write_buildkite_steps(self, buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]], output_file_path: str) -> None:

def write_buildkite_steps(
self,
buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]],
output_file_path: str
) -> None:
"""Output the buildkite steps to the Buildkite pipeline yaml file."""
buildkite_steps_dict = {"steps": [step.dict(exclude_none=True) for step in buildkite_steps]}
with open(output_file_path, "w") as f:
Expand Down Expand Up @@ -119,8 +124,8 @@ def get_plugin_config(self, step: TestStep) -> Dict:

def create_buildkite_step(self, step: TestStep) -> BuildkiteStep:
buildkite_step = BuildkiteStep(
label=step.label,
key=get_step_key(step.label),
label=step.label,
key=get_step_key(step.label),
parallelism=step.parallelism,
soft_fail=step.soft_fail,
plugins=[self.get_plugin_config(step)],
Expand All @@ -132,10 +137,10 @@ def create_buildkite_step(self, step: TestStep) -> BuildkiteStep:

def _configure_multi_node_step(self, current_step: BuildkiteStep, step: TestStep):
current_step.commands = [get_multi_node_test_command(
step.commands,
step.working_dir,
step.num_nodes,
step.num_gpus,
step.commands,
step.working_dir,
step.num_nodes,
step.num_gpus,
f"{VLLM_ECR_REPO}:{self.commit}"
)
]
Expand Down Expand Up @@ -190,22 +195,23 @@ def _mirror_amd_test_steps(self, test_steps: List[TestStep]) -> List[BuildkiteSt
if test_step.mirror_hardwares and "amd" in test_step.mirror_hardwares:
test_commands = [test_step.command] if test_step.command else test_step.commands
amd_test_command = [
"bash",
".buildkite/run-amd-test.sh",
"bash",
".buildkite/run-amd-test.sh",
f"'{get_full_test_command(test_commands, test_step.working_dir)}'",
]
mirrored_buildkite_step = BuildkiteStep(
label = f"AMD: {test_step.label}",
key = f"amd_{get_step_key(test_step.label)}",
depends_on = "amd-build",
agents = {"queue": AgentQueue.AMD_GPU.value},
soft_fail = test_step.soft_fail,
env = {"DOCKER_BUILDKIT": "1"},
commands = [" ".join(amd_test_command)],
label=f"AMD: {test_step.label}",
key=f"amd_{get_step_key(test_step.label)}",
depends_on="amd-build",
agents={"queue": AgentQueue.AMD_GPU.value},
soft_fail=test_step.soft_fail,
env={"DOCKER_BUILDKIT": "1"},
commands=[" ".join(amd_test_command)],
)
mirrored_buildkite_steps.append(mirrored_buildkite_step)
return mirrored_buildkite_steps


@click.command()
@click.option("--run_all", type=str)
@click.option("--list_file_diff", type=str)
Expand Down
1 change: 1 addition & 0 deletions scripts/pipeline_generator/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
]
DEFAULT_KUBERNETES_NODE_SELECTOR = {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}


class DockerPluginConfig(BaseModel):
"""
Configuration for Docker plugin running in a Buildkite step.
Expand Down
5 changes: 4 additions & 1 deletion scripts/pipeline_generator/step.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from pydantic import BaseModel, Field
from pydantic import BaseModel
from typing import List, Dict, Any, Optional

from .utils import AgentQueue

BUILD_STEP_KEY = "build"


class TestStep(BaseModel):
"""This class represents a test step defined in the test configuration file."""
label: str
Expand All @@ -22,6 +23,7 @@ class TestStep(BaseModel):
command: Optional[str] = None
commands: Optional[List[str]] = None


class BuildkiteStep(BaseModel):
"""This class represents a step in Buildkite format."""
label: str
Expand All @@ -35,6 +37,7 @@ class BuildkiteStep(BaseModel):
env: Optional[Dict[str, str]] = None
retry: Optional[Dict[str, Any]] = None


class BuildkiteBlockStep(BaseModel):
"""This class represents a block step in Buildkite format."""
block: str
Expand Down
69 changes: 43 additions & 26 deletions scripts/tests/pipeline_generator/test_pipeline_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@
from scripts.pipeline_generator.step import TestStep, BuildkiteStep, BuildkiteBlockStep
from scripts.pipeline_generator.utils import (
AgentQueue,
get_full_test_command,
VLLM_ECR_REPO,
MULTI_NODE_TEST_SCRIPT,
)
from scripts.pipeline_generator.plugin import DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, DEFAULT_DOCKER_VOLUMES, DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, DEFAULT_KUBERNETES_NODE_SELECTOR, DEFAULT_KUBERNETES_POD_VOLUMES
from scripts.pipeline_generator.plugin import (
DEFAULT_DOCKER_ENVIRONMENT_VARIBLES,
DEFAULT_DOCKER_VOLUMES,
DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS,
DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES,
DEFAULT_KUBERNETES_NODE_SELECTOR,
DEFAULT_KUBERNETES_POD_VOLUMES,
)

TEST_COMMIT = "123456789abcdef123456789abcdef123456789a"
TEST_FILE_PATH = "scripts/tests/pipeline_generator/tests.yaml"
Expand All @@ -35,7 +41,7 @@ def test_read_test_steps():
assert steps[2].num_gpus == 2
assert steps[2].num_nodes == 2
assert steps[3].gpu == "a100"
assert steps[3].optional == True
assert steps[3].optional is True


@pytest.mark.parametrize(
Expand Down Expand Up @@ -96,7 +102,6 @@ def test_read_test_steps():
)
def test_get_plugin_config(test_step, expected_plugin_config):
pipeline_generator = get_test_pipeline_generator()
container_image_path = f"{VLLM_ECR_REPO}:{TEST_COMMIT}"

plugin_config = pipeline_generator.get_plugin_config(test_step)
assert plugin_config == expected_plugin_config
Expand Down Expand Up @@ -153,7 +158,7 @@ def test_get_plugin_config(test_step, expected_plugin_config):
"podSpec": {
"containers": [
{
"image": "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:123456789abcdef123456789abcdef123456789a",
"image": f"{VLLM_ECR_REPO}:{TEST_COMMIT}",
"command": [
'bash -c "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2"'
],
Expand Down Expand Up @@ -184,9 +189,11 @@ def test_get_plugin_config(test_step, expected_plugin_config):
label="Test 2",
key="test-2",
agents={"queue": AgentQueue.AWS_4xL4.value},
commands=[f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'"],
commands=[
f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'"
],
),
)
),
],
)
def test_create_buildkite_step(test_step, expected_buildkite_step):
Expand All @@ -195,6 +202,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
buildkite_step = pipeline_generator.create_buildkite_step(test_step)
assert buildkite_step == expected_buildkite_step


@pytest.mark.parametrize(
("test_step", "expected_value_without_runall", "expected_value_with_runall"),
[
Expand All @@ -205,15 +213,15 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
commands=["test command 1", "test command 2"],
),
True,
True
True,
),
(
TestStep(
label="Test 0",
commands=["test command 1", "test command 2"],
),
True,
True
True,
),
(
TestStep(
Expand All @@ -222,7 +230,7 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
commands=["test command 1", "test command 2"],
),
False,
True
True,
),
(
TestStep(
Expand All @@ -233,19 +241,24 @@ def test_create_buildkite_step(test_step, expected_buildkite_step):
num_gpus=4,
),
False,
False
False,
),
],
)
def test_step_should_run(test_step, expected_value_without_runall, expected_value_with_runall):
def test_step_should_run(
test_step, expected_value_without_runall, expected_value_with_runall
):
pipeline_generator = get_test_pipeline_generator()
pipeline_generator.list_file_diff = ["dir1/a.py", "dir3/file2.py"]
assert pipeline_generator.step_should_run(test_step) == expected_value_without_runall
assert (
pipeline_generator.step_should_run(test_step) == expected_value_without_runall
)

# With run_all
# With run_all
pipeline_generator.run_all = True
assert pipeline_generator.step_should_run(test_step) == expected_value_with_runall


@pytest.mark.parametrize(
("test_step", "expected_buildkite_steps"),
[
Expand Down Expand Up @@ -279,7 +292,7 @@ def test_step_should_run(test_step, expected_value_without_runall, expected_valu
}
],
),
]
],
),
# Test doesn't automatically run because dependencies are not matched -> with block step
(
Expand Down Expand Up @@ -314,34 +327,38 @@ def test_step_should_run(test_step, expected_value_without_runall, expected_valu
}
],
),
]
)
]
],
),
],
)
def test_process_step(test_step, expected_buildkite_steps):
pipeline_generator = get_test_pipeline_generator()
buildkite_steps = pipeline_generator.process_step(test_step)
assert buildkite_steps == expected_buildkite_steps


def test_generate_build_step():
pipeline_generator = get_test_pipeline_generator()
pipeline_generator.get_build_commands = mock.MagicMock(return_value=["build command 1", "build command 2"])
pipeline_generator.get_build_commands = mock.MagicMock(
return_value=["build command 1", "build command 2"]
)
build_step = pipeline_generator.generate_build_step()
expected_build_step = BuildkiteStep(
label=":docker: build image",
key="build",
agents={"queue": AgentQueue.AWS_CPU.value},
env={"DOCKER_BUILDKIT": "1"},
label=":docker: build image",
key="build",
agents={"queue": AgentQueue.AWS_CPU.value},
env={"DOCKER_BUILDKIT": "1"},
retry={
"automatic": [
{"exit_status": -1, "limit": 2},
{"exit_status": -10, "limit": 2}
{"exit_status": -1, "limit": 2},
{"exit_status": -10, "limit": 2},
]
},
},
commands=["build command 1", "build command 2"],
depends_on=None,
)
assert build_step == expected_build_step


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))

0 comments on commit a1e3081

Please sign in to comment.