Skip to content

Commit

Permalink
feat(ray): support shorter downscale config for test models (#234)
Browse files Browse the repository at this point in the history
Because

- `30` minutes downscale delay is not suitable for testing scenario

This commit

- support shorter downscale config for test models
  • Loading branch information
heiruwu authored Oct 15, 2024
1 parent b19a59c commit 954f94b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
1 change: 1 addition & 0 deletions instill/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=no-name-in-module
from instill.helpers.protobufs.ray_pb2 import CallRequest, CallResponse
from instill.helpers.ray_config import InstillDeployable, instill_deployment
from instill.helpers.ray_io import (
construct_custom_output,
construct_task_chat_output,
Expand Down
15 changes: 9 additions & 6 deletions instill/helpers/ray_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
class InstillDeployable:
def __init__(self, deployable: Deployment) -> None:
self._deployment: Deployment = deployable
self._autoscaling_config = DEFAULT_AUTOSCALING_CONFIG.copy()

num_of_cpus = os.getenv(ENV_NUM_OF_CPUS)
if num_of_cpus is not None and num_of_cpus != "":
Expand All @@ -39,6 +40,7 @@ def __init__(self, deployable: Deployment) -> None:
is_test_model = os.getenv(ENV_IS_TEST_MODEL)
if is_test_model is not None and is_test_model.lower() == "true":
self._update_num_cpus(float(0.001))
self._update_downscale_delay(60)

memory = os.getenv(ENV_MEMORY)
if memory is not None and memory != "":
Expand Down Expand Up @@ -164,23 +166,24 @@ def _update_custom_resource(self, resource_name: str, ratio=0.001):
return self

def _update_min_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["min_replicas"] = num_replicas
self._autoscaling_config["min_replicas"] = num_replicas
self._deployment = self._deployment.options(
autoscaling_config=new_autoscaling_config
autoscaling_config=self._autoscaling_config
)

return self

def _update_max_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["max_replicas"] = num_replicas
self._autoscaling_config["max_replicas"] = num_replicas
self._deployment = self._deployment.options(
autoscaling_config=new_autoscaling_config
autoscaling_config=self._autoscaling_config
)

return self

def _update_downscale_delay(self, downscale_delay_s: int):
self._autoscaling_config["downscale_delay_s"] = downscale_delay_s

def get_deployment_handle(self):
return self._deployment.bind()

Expand Down

0 comments on commit 954f94b

Please sign in to comment.