Skip to content

Commit

Permalink
refactor(ray): make model resource config private (#132)
Browse files Browse the repository at this point in the history
Because

- we will not expose model resource config through model decorator

This commit

- make resource config methods private
- update default autoscaling config
  • Loading branch information
heiruwu authored Apr 10, 2024
1 parent 84dba68 commit 5c2bf34
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 51 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ jobs:
- name: Test code
run: make test

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
directory: ./
env_vars: OS,PYTHON
fail_ci_if_error: true
files: ./coverage.xml
name: codecov-umbrella
verbose: true
# - name: Upload coverage to Codecov
# uses: codecov/codecov-action@v3
# with:
# directory: ./
# env_vars: OS,PYTHON
# fail_ci_if_error: true
# files: ./coverage.xml
# name: codecov-umbrella
# verbose: true
6 changes: 3 additions & 3 deletions instill/helpers/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ class VisualQuestionAnsweringInput:
"num_cpus": 2,
}
DEFAULT_AUTOSCALING_CONFIG = {
"target_num_ongoing_requests_per_replica": 1,
"target_num_ongoing_requests_per_replica": 2,
"initial_replicas": 1,
"min_replicas": 0,
"max_replicas": 10,
"upscale_delay_s": 4,
"downscale_delay_s": 600,
"upscale_delay_s": 180,
"downscale_delay_s": 120,
"smoothing_factor": 1.0,
"upscale_smoothing_factor": 0.8,
"downscale_smoothing_factor": 0.8,
Expand Down
49 changes: 24 additions & 25 deletions instill/helpers/ray_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,38 @@ class InstillDeployable:
def __init__(self, deployable: Deployment) -> None:
self._deployment: Deployment = deployable

accelerator_type = os.getenv(ENV_RAY_ACCELERATOR_TYPE)
if accelerator_type is not None and accelerator_type != "":
self.update_accelerator_type(accelerator_type)
num_of_cpus = os.getenv(ENV_NUM_OF_CPUS)
if num_of_cpus is not None and num_of_cpus != "":
self._update_num_cpus(float(num_of_cpus))
else:
self._update_num_cpus(1)

num_of_gpus = os.getenv(ENV_NUM_OF_GPUS)
vram = os.getenv(ENV_TOTAL_VRAM)
if num_of_gpus is not None and num_of_gpus != "":
self.update_num_gpus(float(num_of_gpus))
self._update_num_gpus(float(num_of_gpus))
elif vram is not None and vram != "":
self._update_num_gpus(self._determine_vram_usage(os.getcwd(), vram))

num_of_cpus = os.getenv(ENV_NUM_OF_CPUS)
if num_of_cpus is not None and num_of_cpus != "":
self.update_num_cpus(float(num_of_cpus))
else:
self.update_num_cpus(1)
accelerator_type = os.getenv(ENV_RAY_ACCELERATOR_TYPE)
if accelerator_type is not None and accelerator_type != "":
self._update_accelerator_type(accelerator_type)

memory = os.getenv(ENV_MEMORY)
if memory is not None and memory != "":
self.update_memory(float(memory))
self._update_memory(float(memory))

num_of_min_replicas = os.getenv(ENV_NUM_OF_MIN_REPLICAS)
if num_of_min_replicas is not None and num_of_min_replicas != "":
self.update_min_replicas(int(num_of_min_replicas))
self._update_min_replicas(int(num_of_min_replicas))
else:
self.update_min_replicas(0)
self._update_min_replicas(0)

num_of_max_replicas = os.getenv(ENV_NUM_OF_MAX_REPLICAS)
if num_of_max_replicas is not None and num_of_max_replicas != "":
self.update_max_replicas(int(num_of_max_replicas))
self._update_max_replicas(int(num_of_max_replicas))
else:
self.update_max_replicas(1)

vram = os.getenv(ENV_TOTAL_VRAM)
if vram is not None and vram != "":
self.update_num_gpus(self._determine_vram_usage(os.getcwd(), vram))
self._update_max_replicas(1)

def _determine_vram_usage(self, model_path: str, total_vram: str):
warn(
Expand Down Expand Up @@ -109,41 +108,41 @@ def _determine_ram_usage(self, model_path: str):
)
raise ModelPathException

def update_num_cpus(self, num_cpus: float):
def _update_num_cpus(self, num_cpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_cpus": num_cpus})

return self

def update_memory(self, memory: float):
def _update_memory(self, memory: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"memory": memory})

return self

def update_num_gpus(self, num_gpus: float):
def _update_num_gpus(self, num_gpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_gpus": num_gpus})

return self

def update_accelerator_type(self, accelerator_type: str):
def _update_accelerator_type(self, accelerator_type: str):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update(
{"accelerator_type": accelerator_type}
)

return self

def update_num_custom_resource(self, resource_name: str, num: float):
def _update_num_custom_resource(self, resource_name: str, num: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update(
{"resources": {resource_name: num}}
)

return self

def update_min_replicas(self, num_replicas: int):
def _update_min_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["min_replicas"] = num_replicas
self._deployment = self._deployment.options(
Expand All @@ -152,7 +151,7 @@ def update_min_replicas(self, num_replicas: int):

return self

def update_max_replicas(self, num_replicas: int):
def _update_max_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["max_replicas"] = num_replicas
self._deployment = self._deployment.options(
Expand Down
4 changes: 0 additions & 4 deletions samples/tinyllama-cpu/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,5 @@ async def __call__(self, request):

entrypoint = (
InstillDeployable(TinyLlama)
.update_max_replicas(4)
.update_min_replicas(0)
.update_num_cpus(4)
.update_memory(4 * (1024 * 1024 * 1024))
.get_deployment_handle()
)
3 changes: 0 additions & 3 deletions samples/tinyllama-gpu/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,5 @@ async def __call__(self, request):

entrypoint = (
InstillDeployable(TinyLlama)
.update_max_replicas(4)
.update_min_replicas(0)
.update_num_gpus(0.25)
.get_deployment_handle()
)
4 changes: 0 additions & 4 deletions samples/yolov7-cpu/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,5 @@ async def __call__(self, req):

entrypoint = (
InstillDeployable(Yolov7)
.update_max_replicas(4)
.update_min_replicas(0)
.update_num_cpus(1)
.update_memory(4 * (1024 * 1024 * 1024))
.get_deployment_handle()
)
3 changes: 0 additions & 3 deletions samples/yolov7-gpu/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,5 @@ async def __call__(self, req):

entrypoint = (
InstillDeployable(Yolov7)
.update_max_replicas(4)
.update_min_replicas(0)
.update_num_gpus(0.25)
.get_deployment_handle()
)

0 comments on commit 5c2bf34

Please sign in to comment.