From 5c2bf34532c67b75e62e5892bd028b86f3977bfb Mon Sep 17 00:00:00 2001 From: HR Wu <5631010+heiruwu@users.noreply.github.com> Date: Thu, 11 Apr 2024 00:24:07 +0800 Subject: [PATCH] refactor(ray): make model resource config private (#132) Because - we will not expose model resource config through model decorator This commit - make resource config methods private - update default autoscaling config --- .github/workflows/test.yml | 18 ++++++------- instill/helpers/const.py | 6 ++--- instill/helpers/ray_config.py | 49 +++++++++++++++++----------------- samples/tinyllama-cpu/model.py | 4 --- samples/tinyllama-gpu/model.py | 3 --- samples/yolov7-cpu/model.py | 4 --- samples/yolov7-gpu/model.py | 3 --- 7 files changed, 36 insertions(+), 51 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c637ad7..f5b99d1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,12 +38,12 @@ jobs: - name: Test code run: make test - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - directory: ./ - env_vars: OS,PYTHON - fail_ci_if_error: true - files: ./coverage.xml - name: codecov-umbrella - verbose: true + # - name: Upload coverage to Codecov + # uses: codecov/codecov-action@v3 + # with: + # directory: ./ + # env_vars: OS,PYTHON + # fail_ci_if_error: true + # files: ./coverage.xml + # name: codecov-umbrella + # verbose: true diff --git a/instill/helpers/const.py b/instill/helpers/const.py index 416ee7f..03eac93 100644 --- a/instill/helpers/const.py +++ b/instill/helpers/const.py @@ -85,12 +85,12 @@ class VisualQuestionAnsweringInput: "num_cpus": 2, } DEFAULT_AUTOSCALING_CONFIG = { - "target_num_ongoing_requests_per_replica": 1, + "target_num_ongoing_requests_per_replica": 2, "initial_replicas": 1, "min_replicas": 0, "max_replicas": 10, - "upscale_delay_s": 4, - "downscale_delay_s": 600, + "upscale_delay_s": 180, + "downscale_delay_s": 120, "smoothing_factor": 1.0, "upscale_smoothing_factor": 0.8, "downscale_smoothing_factor": 0.8, diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index 5d5bd6b..619243e 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -29,39 +29,38 @@ class InstillDeployable: def __init__(self, deployable: Deployment) -> None: self._deployment: Deployment = deployable - accelerator_type = os.getenv(ENV_RAY_ACCELERATOR_TYPE) - if accelerator_type is not None and accelerator_type != "": - self.update_accelerator_type(accelerator_type) + num_of_cpus = os.getenv(ENV_NUM_OF_CPUS) + if num_of_cpus is not None and num_of_cpus != "": + self._update_num_cpus(float(num_of_cpus)) + else: + self._update_num_cpus(1) num_of_gpus = os.getenv(ENV_NUM_OF_GPUS) + vram = os.getenv(ENV_TOTAL_VRAM) if num_of_gpus is not None and num_of_gpus != "": - self.update_num_gpus(float(num_of_gpus)) + self._update_num_gpus(float(num_of_gpus)) + elif vram is not None and vram != "": + self._update_num_gpus(self._determine_vram_usage(os.getcwd(), vram)) - num_of_cpus = os.getenv(ENV_NUM_OF_CPUS) - if num_of_cpus is not None and num_of_cpus != "": - self.update_num_cpus(float(num_of_cpus)) - else: - self.update_num_cpus(1) + accelerator_type = os.getenv(ENV_RAY_ACCELERATOR_TYPE) + if accelerator_type is not None and accelerator_type != "": + self._update_accelerator_type(accelerator_type) memory = os.getenv(ENV_MEMORY) if memory is not None and memory != "": - self.update_memory(float(memory)) + self._update_memory(float(memory)) num_of_min_replicas = os.getenv(ENV_NUM_OF_MIN_REPLICAS) if num_of_min_replicas is not None and num_of_min_replicas != "": - self.update_min_replicas(int(num_of_min_replicas)) + self._update_min_replicas(int(num_of_min_replicas)) else: - self.update_min_replicas(0) + self._update_min_replicas(0) num_of_max_replicas = os.getenv(ENV_NUM_OF_MAX_REPLICAS) if num_of_max_replicas is not None and num_of_max_replicas != "": - self.update_max_replicas(int(num_of_max_replicas)) + self._update_max_replicas(int(num_of_max_replicas)) else: - self.update_max_replicas(1) - - vram = os.getenv(ENV_TOTAL_VRAM) - if vram is not None and vram != "": - self.update_num_gpus(self._determine_vram_usage(os.getcwd(), vram)) + self._update_max_replicas(1) def _determine_vram_usage(self, model_path: str, total_vram: str): warn( @@ -109,25 +108,25 @@ def _determine_ram_usage(self, model_path: str): ) raise ModelPathException - def update_num_cpus(self, num_cpus: float): + def _update_num_cpus(self, num_cpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_cpus": num_cpus}) return self - def update_memory(self, memory: float): + def _update_memory(self, memory: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"memory": memory}) return self - def update_num_gpus(self, num_gpus: float): + def _update_num_gpus(self, num_gpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_gpus": num_gpus}) return self - def update_accelerator_type(self, accelerator_type: str): + def _update_accelerator_type(self, accelerator_type: str): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update( {"accelerator_type": accelerator_type} @@ -135,7 +134,7 @@ def update_accelerator_type(self, accelerator_type: str): return self - def update_num_custom_resource(self, resource_name: str, num: float): + def _update_num_custom_resource(self, resource_name: str, num: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update( {"resources": {resource_name: num}} @@ -143,7 +142,7 @@ def update_num_custom_resource(self, resource_name: str, num: float): return self - def update_min_replicas(self, num_replicas: int): + def _update_min_replicas(self, num_replicas: int): new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG new_autoscaling_config["min_replicas"] = num_replicas self._deployment = self._deployment.options( @@ -152,7 +151,7 @@ def update_min_replicas(self, num_replicas: int): return self - def update_max_replicas(self, num_replicas: int): + def _update_max_replicas(self, num_replicas: int): new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG new_autoscaling_config["max_replicas"] = num_replicas self._deployment = self._deployment.options( diff --git a/samples/tinyllama-cpu/model.py b/samples/tinyllama-cpu/model.py index 52fdfa7..6330676 100644 --- a/samples/tinyllama-cpu/model.py +++ b/samples/tinyllama-cpu/model.py @@ -147,9 +147,5 @@ async def __call__(self, request): entrypoint = ( InstillDeployable(TinyLlama) - .update_max_replicas(4) - .update_min_replicas(0) - .update_num_cpus(4) - .update_memory(4 * (1024 * 1024 * 1024)) .get_deployment_handle() ) diff --git a/samples/tinyllama-gpu/model.py b/samples/tinyllama-gpu/model.py index 39382a7..985b6f5 100644 --- a/samples/tinyllama-gpu/model.py +++ b/samples/tinyllama-gpu/model.py @@ -147,8 +147,5 @@ async def __call__(self, request): entrypoint = ( InstillDeployable(TinyLlama) - .update_max_replicas(4) - .update_min_replicas(0) - .update_num_gpus(0.25) .get_deployment_handle() ) diff --git a/samples/yolov7-cpu/model.py b/samples/yolov7-cpu/model.py index 83e9e3f..9f41f89 100644 --- a/samples/yolov7-cpu/model.py +++ b/samples/yolov7-cpu/model.py @@ -407,9 +407,5 @@ async def __call__(self, req): entrypoint = ( InstillDeployable(Yolov7) - .update_max_replicas(4) - .update_min_replicas(0) - .update_num_cpus(1) - .update_memory(4 * (1024 * 1024 * 1024)) .get_deployment_handle() ) diff --git a/samples/yolov7-gpu/model.py b/samples/yolov7-gpu/model.py index c1bf201..ac44aed 100644 --- a/samples/yolov7-gpu/model.py +++ b/samples/yolov7-gpu/model.py @@ -409,8 +409,5 @@ async def __call__(self, req): entrypoint = ( InstillDeployable(Yolov7) - .update_max_replicas(4) - .update_min_replicas(0) - .update_num_gpus(0.25) .get_deployment_handle() )