From 582d1cda9c098997d8c6f01303981520a274990c Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Tue, 24 Sep 2024 17:18:44 -0700 Subject: [PATCH] Pipeline generator plugins (#33) * p Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin * add req Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin * p Signed-off-by: kevin --------- Signed-off-by: kevin --- scripts/pipeline_generator/plugin.py | 110 +++++++++++++++++ .../tests/pipeline_generator/test_plugin.py | 116 ++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 scripts/pipeline_generator/plugin.py create mode 100644 scripts/tests/pipeline_generator/test_plugin.py diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py new file mode 100644 index 0000000..db093bc --- /dev/null +++ b/scripts/pipeline_generator/plugin.py @@ -0,0 +1,110 @@ +from pydantic import BaseModel, Field +from typing import List, Dict, Any, Optional + +from .utils import HF_HOME + +DOCKER_PLUGIN_NAME = "docker#v5.2.0" +KUBERNETES_PLUGIN_NAME = "kubernetes" + + +class DockerPluginConfig(BaseModel): + """ + Configuration for Docker plugin running in a Buildkite step. + The specification is based on: + https://github.com/buildkite-plugins/docker-buildkite-plugin?tab=readme-ov-file#configuration + """ + image: str = "" + always_pull: bool = Field(default=True, alias="always-pull") + propagate_environment: bool = Field(default=True, alias="propagate-environment") + gpus: Optional[str] = "all" + mount_buildkite_agent: Optional[bool] = Field(default=False, alias="mount-buildkite-agent") + command: List[str] = Field(default_factory=list) + environment: List[str] = [ + f"HF_HOME={HF_HOME}", + "VLLM_USAGE_SOURCE=ci-test", + "HF_TOKEN", + "BUILDKITE_ANALYTICS_TOKEN" + ] + volumes: List[str] = [ + "/dev/shm:/dev/shm", + f"{HF_HOME}:{HF_HOME}" + ] + + +class KubernetesPodContainerConfig(BaseModel): + """ + Configuration for a container running in a Kubernetes pod. + """ + image: str + command: List[str] + resources: Dict[str, Dict[str, int]] + volume_mounts: List[Dict[str, str]] = Field( + alias="volumeMounts", + default=[ + {"name": "devshm", "mountPath": "/dev/shm"}, + {"name": "hf-cache", "mountPath": HF_HOME} + ] + ) + env: List[Dict[str, str]] = Field( + default=[ + {"name": "HF_HOME", "value": HF_HOME}, + {"name": "VLLM_USAGE_SOURCE", "value": "ci-test"}, + { + "name": "HF_TOKEN", + "valueFrom": { + "secretKeyRef": { + "name": "hf-token-secret", + "key": "token" + } + } + }, + ], + ) + + +class KubernetesPodSpec(BaseModel): + """ + Configuration for a Kubernetes pod running in a Buildkite step. + """ + containers: List[KubernetesPodContainerConfig] + priority_class_name: str = Field(default="ci", alias="priorityClassName") + node_selector: Dict[str, Any] = Field( + default={"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}, + alias="nodeSelector" + ) + volumes: List[Dict[str, Any]] = Field( + default=[ + {"name": "devshm", "emptyDir": {"medium": "Memory"}}, + {"name": "hf-cache", "hostPath": {"path": HF_HOME, "type": "Directory"}} + ] + ) + + +class KubernetesPluginConfig(BaseModel): + """ + Configuration for Kubernetes plugin running in a Buildkite step. + """ + pod_spec: KubernetesPodSpec = Field(alias="podSpec") + + +def get_kubernetes_plugin_config(container_image: str, test_bash_command: List[str], num_gpus: int) -> Dict: + pod_spec = KubernetesPodSpec( + containers=[ + KubernetesPodContainerConfig( + image=container_image, + command=[" ".join(test_bash_command)], + resources={"limits": {"nvidia.com/gpu": num_gpus}} + ) + ] + ) + return {KUBERNETES_PLUGIN_NAME: KubernetesPluginConfig(podSpec=pod_spec).dict(by_alias=True)} + + +def get_docker_plugin_config(docker_image_path: str, test_bash_command: List[str], no_gpu: bool) -> Dict: + docker_plugin_config = DockerPluginConfig( + image=docker_image_path, + command=test_bash_command + ) + if no_gpu: + docker_plugin_config.gpus = None + return {DOCKER_PLUGIN_NAME: docker_plugin_config.dict(exclude_none=True, by_alias=True)} diff --git a/scripts/tests/pipeline_generator/test_plugin.py b/scripts/tests/pipeline_generator/test_plugin.py new file mode 100644 index 0000000..7e85ac0 --- /dev/null +++ b/scripts/tests/pipeline_generator/test_plugin.py @@ -0,0 +1,116 @@ +import pytest +import sys + +from scripts.pipeline_generator.plugin import ( + get_kubernetes_plugin_config, + get_docker_plugin_config, + DOCKER_PLUGIN_NAME, + KUBERNETES_PLUGIN_NAME, +) + + +def test_get_kubernetes_plugin_config(): + docker_image_path = "test_image:latest" + test_bash_command = ["echo", "Hello, Kubernetes!"] + num_gpus = 1 + + expected_config = { + KUBERNETES_PLUGIN_NAME: { + "podSpec": { + "containers": [ + { + "image": docker_image_path, + "command": [" ".join(test_bash_command)], + "resources": {"limits": {"nvidia.com/gpu": num_gpus}}, + "volumeMounts": [ + {"name": "devshm", "mountPath": "/dev/shm"}, + {"name": "hf-cache", "mountPath": "/root/.cache/huggingface"} + ], + "env": [ + {"name": "HF_HOME", "value": "/root/.cache/huggingface"}, + {"name": "VLLM_USAGE_SOURCE", "value": "ci-test"}, + { + "name": "HF_TOKEN", + "valueFrom": { + "secretKeyRef": { + "name": "hf-token-secret", + "key": "token" + } + } + }, + ], + } + ], + "priorityClassName": "ci", + "nodeSelector": {"nvidia.com/gpu.product": "NVIDIA-A100-SXM4-80GB"}, + "volumes": [ + {"name": "devshm", "emptyDir": {"medium": "Memory"}}, + {"name": "hf-cache", "hostPath": {"path": "/root/.cache/huggingface", "type": "Directory"}} + ] + } + } + } + + assert get_kubernetes_plugin_config(docker_image_path, test_bash_command, num_gpus) == expected_config + + +@pytest.mark.parametrize( + "docker_image_path, test_bash_command, no_gpu, expected_config", + [ + ( + "test_image:latest", + ["bash", "-c", "echo A;\npytest -v -s a.py"], + False, + { + DOCKER_PLUGIN_NAME: { + "image": "test_image:latest", + "always-pull": True, + "propagate-environment": True, + "gpus": "all", + "command": ["bash", "-c", "echo A;\npytest -v -s a.py"], + "environment": [ + "HF_HOME=/root/.cache/huggingface", + "VLLM_USAGE_SOURCE=ci-test", + "HF_TOKEN", + "BUILDKITE_ANALYTICS_TOKEN" + ], + "mount-buildkite-agent": False, + "volumes": [ + "/dev/shm:/dev/shm", + "/root/.cache/huggingface:/root/.cache/huggingface" + ] + } + } + ), + ( + "cpu_image:latest", + ["bash", "-c", "echo B;\npytest -v -s b.py"], + True, + { + DOCKER_PLUGIN_NAME: { + "image": "cpu_image:latest", + "always-pull": True, + "propagate-environment": True, + "command": ["bash", "-c", "echo B;\npytest -v -s b.py"], + "environment": [ + "HF_HOME=/root/.cache/huggingface", + "VLLM_USAGE_SOURCE=ci-test", + "HF_TOKEN", + "BUILDKITE_ANALYTICS_TOKEN" + ], + "mount-buildkite-agent": False, + "volumes": [ + "/dev/shm:/dev/shm", + "/root/.cache/huggingface:/root/.cache/huggingface" + ] + } + } + ), + ] +) +def test_get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu, expected_config): + assert get_docker_plugin_config(docker_image_path, test_bash_command, no_gpu) == expected_config + + +if __name__ == "__main__": + sys.exit(pytest.main(["-v", __file__]))