From c5a7b275a9aa241d62aa2cb6d1025a46e88f9729 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 22 Aug 2024 09:22:33 -0700 Subject: [PATCH 01/11] DREIMP-10951: autoscale vtgate deployments using HPA --- paasta_tools/setup_kubernetes_cr.py | 4 + paasta_tools/vitesscluster_tools.py | 306 +++++++++++++++++++++++++++- 2 files changed, 303 insertions(+), 7 deletions(-) diff --git a/paasta_tools/setup_kubernetes_cr.py b/paasta_tools/setup_kubernetes_cr.py index 5972b4da72..ea7179704c 100644 --- a/paasta_tools/setup_kubernetes_cr.py +++ b/paasta_tools/setup_kubernetes_cr.py @@ -362,6 +362,7 @@ def reconcile_kubernetes_resource( instance=inst, cluster=cluster, soa_dir=DEFAULT_SOA_DIR, + kube_client=kube_client, ) git_sha = get_git_sha_from_dockerurl(soa_config.get_docker_url(), long=True) formatted_resource = format_custom_resource( @@ -414,6 +415,9 @@ def reconcile_kubernetes_resource( ) else: log.info(f"{desired_resource} is up to date, no action taken") + log.info(f"Ensuring related API objects for {desired_resource} are in sync") + if hasattr(soa_config, "update_related_api_objects"): + soa_config.update_related_api_objects(kube_client) except Exception as e: log.error(str(e)) succeeded = False diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 7d0676334f..af9741be46 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -11,12 +11,25 @@ import service_configuration_lib from kubernetes.client import ApiClient - +from kubernetes.client import V1ObjectMeta +from kubernetes.client import V1OwnerReference +from kubernetes.client import V2beta2CrossVersionObjectReference +from kubernetes.client import V2beta2HorizontalPodAutoscaler +from kubernetes.client import V2beta2HorizontalPodAutoscalerSpec +from kubernetes.client import V2beta2MetricSpec +from kubernetes.client import V2beta2MetricTarget +from kubernetes.client import V2beta2ResourceMetricSource +from kubernetes.client.rest import ApiException + +from paasta_tools.kubernetes_tools import get_cr +from paasta_tools.kubernetes_tools import KubeClient from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig from paasta_tools.kubernetes_tools import KubernetesDeploymentConfigDict from paasta_tools.kubernetes_tools import limit_size_with_hash +from paasta_tools.kubernetes_tools import paasta_prefixed from paasta_tools.kubernetes_tools import sanitised_cr_name from paasta_tools.long_running_service_tools import load_service_namespace_config +from paasta_tools.long_running_service_tools import DEFAULT_AUTOSCALING_SETPOINT from paasta_tools.utils import BranchDictV2 from paasta_tools.utils import deep_merge_dictionaries from paasta_tools.utils import DEFAULT_SOA_DIR @@ -125,6 +138,8 @@ class RequestsDict(TypedDict, total=False): class ResourceConfigDict(TypedDict, total=False): replicas: int + min_instances: int + max_instances: int requests: Dict[str, RequestsDict] limits: Dict[str, RequestsDict] @@ -164,6 +179,8 @@ class VtAdminConfigDict(TypedDict, total=False): extraFlags: Dict[str, str] extraLabels: Dict[str, str] replicas: int + min_instances: Optional[int] + max_instances: Optional[int] readOnly: bool apiResources: Dict[str, Any] webResources: Dict[str, Any] @@ -242,7 +259,6 @@ def get_cell_config( """ get vtgate config """ - replicas = vtgate_resources.get("replicas") requests = vtgate_resources.get( "requests", RequestsDict(cpu="100m", memory="256Mi") ) @@ -288,7 +304,9 @@ def get_cell_config( "mysql_auth_vault_ttl": "60s", }, extraLabels=labels, - replicas=replicas, + replicas=vtgate_resources.get("replicas"), + min_instances=vtgate_resources.get("min_instances"), + max_instances=vtgate_resources.get("max_instances"), resources={ "requests": requests, "limits": requests, @@ -351,6 +369,7 @@ def get_vt_admin_config( get vtadmin config """ replicas = vtadmin_resources.get("replicas") + requests = vtadmin_resources.get( "requests", RequestsDict(cpu="100m", memory="256Mi") ) @@ -487,8 +506,8 @@ def get_tablet_pool_config( "name": "etc-srv-configs", "hostPath": {"path": "/nail/etc/srv-configs"}, }, - {"name": "vttablet-fake-credentials", "hostPath": {"path": "/dev/null"}}, - {"name": "keyspace-fake-init-script", "hostPath": {"path": "/dev/null"}}, + {"name": "vttablet-fake-credentials", "emptyDir": {}}, + {"name": "keyspace-fake-init-script", "emptyDir": {}}, ], replicas=replicas, vttablet={ @@ -834,7 +853,250 @@ def get_keyspaces(self) -> List[KeyspaceConfigDict]: def get_update_strategy(self) -> Dict[str, str]: return {"type": "Immediate"} - def get_vitess_config(self) -> VitessDeploymentConfigDict: + def is_vtgate_autoscaling_enabled(self, cell_config) -> bool: + return cell_config["gateway"]["max_instances"] is not None + + def get_desired_scalable( + self, + kube_client: KubeClient, + name: str, + owner_uid: str, + pod_selector: Dict[str, str], + ) -> Optional[Dict[str, Any]]: + return { + "apiVersion": "yelp.com/v1alpha1", + "kind": "Scalable", + "metadata": { + "name": name, + "namespace": self.get_namespace(), + "labels": { + paasta_prefixed("service"): self.service, + paasta_prefixed("instance"): self.instance, + paasta_prefixed("pool"): self.get_pool(), + paasta_prefixed("managed"): "true", + }, + "ownerReferences": [ + { + "apiVersion": "planetscale.com/v2", + "kind": "VitessCluster", + "uid": owner_uid, + "name": sanitised_cr_name(self.service, self.instance), + "controller": True, + "blockOwnerDeletion": True, + } + ], + }, + "spec": { + "selector": ",".join([f"{k}={v}" for k, v in pod_selector.items()]), + }, + } + + def get_desired_horizontal_pod_autoscaler( + self, + kube_client: KubeClient, + name: str, + owner_uid: str, + min_replicas: Optional[int], + max_replicas: Optional[int], + ) -> Optional[V2beta2HorizontalPodAutoscaler]: + # Returns None if an HPA should not be attached based on the config, + # or the config is invalid. + + if min_replicas == 0 or max_replicas == 0: + log.error( + f"Invalid value for min or max_instances on {name}: {min_replicas}, {max_replicas}" + ) + return None + + return V2beta2HorizontalPodAutoscaler( + kind="HorizontalPodAutoscaler", + metadata=V1ObjectMeta( + name=name, + namespace=self.get_namespace(), + annotations=dict(), + labels={ + paasta_prefixed("service"): self.service, + paasta_prefixed("instance"): self.instance, + paasta_prefixed("pool"): self.get_pool(), + paasta_prefixed("managed"): "true", + }, + owner_references=[ + V1OwnerReference( + api_version="planetscale.com/v2", + kind="VitessCluster", + name=sanitised_cr_name(self.service, self.instance), + uid=owner_uid, + controller=True, + block_owner_deletion=True, + ), + ], + ), + spec=V2beta2HorizontalPodAutoscalerSpec( + behavior={ + "scaleDown": { + "stabilizationWindowSeconds": 300, + # the policy in a human-readable way: scale down every 60s by + # at most 30% of current replicas. + "selectPolicy": "Max", + "policies": [ + {"type": "Percent", "value": 30, "periodSeconds": 60} + ], + } + }, + max_replicas=max_replicas, + min_replicas=min_replicas, + metrics=[ + V2beta2MetricSpec( + type="Resource", + resource=V2beta2ResourceMetricSource( + name="cpu", + target=V2beta2MetricTarget( + type="Utilization", + average_utilization=int( + DEFAULT_AUTOSCALING_SETPOINT * 100 + ), + ), + ), + ) + ], + scale_target_ref=V2beta2CrossVersionObjectReference( + api_version="yelp.com/v1alpha1", kind="Scalable", name=name + ), + ), + ) + + def reconcile_vtgate_hpa( + self, + kube_client: KubeClient, + owner_uid: str, + cell_config: CellConfigDict, + ): + should_exist = self.is_vtgate_autoscaling_enabled(cell_config) + name = self.get_vtgate_hpa_name(cell_config["name"]) + exists = ( + len( + kube_client.autoscaling.list_namespaced_horizontal_pod_autoscaler( + field_selector=f"metadata.name={name}", + namespace=self.get_namespace(), + ).items + ) + > 0 + ) + + if should_exist: + hpa = self.get_desired_horizontal_pod_autoscaler( + kube_client=kube_client, + name=name, + owner_uid=owner_uid, + min_replicas=cell_config["gateway"]["min_instances"], + max_replicas=cell_config["gateway"]["max_instances"], + ) + if exists: + kube_client.autoscaling.replace_namespaced_horizontal_pod_autoscaler( + name=name, + namespace=self.get_namespace(), + body=hpa, + ) + else: + kube_client.autoscaling.create_namespaced_horizontal_pod_autoscaler( + namespace=self.get_namespace(), + body=hpa, + ) + elif exists: + kube_client.autoscaling.delete_namespaced_horizontal_pod_autoscaler( + name=name, + namespace=self.get_namespace(), + ) + return + + def get_vtgate_hpa_name(self, cell_name: str) -> str: + return f"{sanitised_cr_name(self.service, self.instance)}-cell-{cell_name}" + + def reconcile_vtgate_scalable( + self, + kube_client: KubeClient, + owner_uid: str, + cell_config: CellConfigDict, + ): + should_exist = self.is_vtgate_autoscaling_enabled(cell_config) + name = self.get_vtgate_hpa_name(cell_config["name"]) + scalable = self.get_desired_scalable( + kube_client=kube_client, + name=name, + owner_uid=owner_uid, + pod_selector={ + "planetscale.com/component": "vtgate", + "planetscale.com/cell": cell_config["name"], + "planetscale.com/cluster": sanitised_cr_name( + self.service, self.instance + ), + }, + ) + + cr = None + try: + cr = kube_client.custom.get_namespaced_custom_object( + group="yelp.com", + version="v1alpha1", + namespace=self.get_namespace(), + plural="scalables", + name=name, + ) + except ApiException as e: + if e.status != 404: + raise e + exists = cr is not None + + if should_exist: + if exists: + scalable["metadata"]["resourceVersion"] = cr["metadata"][ + "resourceVersion" + ] + kube_client.custom.replace_namespaced_custom_object( + name=name, + group="yelp.com", + version="v1alpha1", + namespace=self.get_namespace(), + plural="scalables", + body=scalable, + ) + else: + kube_client.custom.create_namespaced_custom_object( + group="yelp.com", + version="v1alpha1", + namespace=self.get_namespace(), + plural="scalables", + body=scalable, + ) + elif exists: + kube_client.custom.delete_namespaced_custom_object( + name=name, + group="yelp.com", + version="v1alpha1", + namespace=self.get_namespace(), + plural="scalables", + ) + + def update_related_api_objects(self, kube_client: KubeClient): + vitesscluster_cr = get_cr(kube_client, cr_id(self.service, self.instance)) + if not vitesscluster_cr: + # Nothing to do, HPAs are deleted when the VitessCluster is deleted via the owner reference + return + + vitesscluster_uid = vitesscluster_cr["metadata"]["uid"] + for cell_config in self.get_cells(): + self.reconcile_vtgate_hpa( + kube_client, + owner_uid=vitesscluster_uid, + cell_config=cell_config, + ) + self.reconcile_vtgate_scalable( + kube_client, + owner_uid=vitesscluster_uid, + cell_config=cell_config, + ) + + def get_vitess_config(self, kube_client: KubeClient) -> VitessDeploymentConfigDict: vitess_config = VitessDeploymentConfigDict( namespace=self.get_namespace(), images=self.get_images(), @@ -845,6 +1107,35 @@ def get_vitess_config(self) -> VitessDeploymentConfigDict: keyspaces=self.get_keyspaces(), updateStrategy=self.get_update_strategy(), ) + + for cell in vitess_config["cells"]: + if not self.is_vtgate_autoscaling_enabled(cell): + continue + + name = self.get_vtgate_hpa_name(cell["name"]) + scalable = None + try: + scalable = kube_client.custom.get_namespaced_custom_object( + group="yelp.com", + version="v1alpha1", + namespace=self.get_namespace(), + plural="scalables", + name=name, + ) + except ApiException as e: + if e.status != 404: + raise e + else: + log.error(f"Scalable {name} has been created yet found") + + if not scalable: + log.error(f"Scalable {name} not found") + continue + + autoscaled_replicas = scalable["spec"].get("replicas") + if autoscaled_replicas is not None: + cell["gateway"]["replicas"] = autoscaled_replicas + return vitess_config def validate( @@ -916,11 +1207,12 @@ def load_vitess_service_instance_configs( service: str, instance: str, cluster: str, + kube_client: KubeClient, soa_dir: str = DEFAULT_SOA_DIR, ) -> VitessDeploymentConfigDict: vitess_service_instance_configs = load_vitess_instance_config( service, instance, cluster, soa_dir=soa_dir - ).get_vitess_config() + ).get_vitess_config(kube_client=kube_client) return vitess_service_instance_configs From 39b71f6e05296caa3c50e5ae43673c0a0f46ce46 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 22 Aug 2024 09:53:09 -0700 Subject: [PATCH 02/11] rename Scalable to Vtgate --- paasta_tools/vitesscluster_tools.py | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index af9741be46..56f33614dc 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -856,7 +856,7 @@ def get_update_strategy(self) -> Dict[str, str]: def is_vtgate_autoscaling_enabled(self, cell_config) -> bool: return cell_config["gateway"]["max_instances"] is not None - def get_desired_scalable( + def get_desired_vtgate_cr( self, kube_client: KubeClient, name: str, @@ -865,7 +865,7 @@ def get_desired_scalable( ) -> Optional[Dict[str, Any]]: return { "apiVersion": "yelp.com/v1alpha1", - "kind": "Scalable", + "kind": "Vtgate", "metadata": { "name": name, "namespace": self.get_namespace(), @@ -960,7 +960,7 @@ def get_desired_horizontal_pod_autoscaler( ) ], scale_target_ref=V2beta2CrossVersionObjectReference( - api_version="yelp.com/v1alpha1", kind="Scalable", name=name + api_version="yelp.com/v1alpha1", kind="Vtgate", name=name ), ), ) @@ -1012,7 +1012,7 @@ def reconcile_vtgate_hpa( def get_vtgate_hpa_name(self, cell_name: str) -> str: return f"{sanitised_cr_name(self.service, self.instance)}-cell-{cell_name}" - def reconcile_vtgate_scalable( + def reconcile_vtgate_cr( self, kube_client: KubeClient, owner_uid: str, @@ -1020,7 +1020,7 @@ def reconcile_vtgate_scalable( ): should_exist = self.is_vtgate_autoscaling_enabled(cell_config) name = self.get_vtgate_hpa_name(cell_config["name"]) - scalable = self.get_desired_scalable( + vtgate_cr = self.get_desired_vtgate_cr( kube_client=kube_client, name=name, owner_uid=owner_uid, @@ -1039,7 +1039,7 @@ def reconcile_vtgate_scalable( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="scalables", + plural="vtgates", name=name, ) except ApiException as e: @@ -1049,7 +1049,7 @@ def reconcile_vtgate_scalable( if should_exist: if exists: - scalable["metadata"]["resourceVersion"] = cr["metadata"][ + vtgate_cr["metadata"]["resourceVersion"] = cr["metadata"][ "resourceVersion" ] kube_client.custom.replace_namespaced_custom_object( @@ -1057,16 +1057,16 @@ def reconcile_vtgate_scalable( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="scalables", - body=scalable, + plural="vtgates", + body=vtgate_cr, ) else: kube_client.custom.create_namespaced_custom_object( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="scalables", - body=scalable, + plural="vtgates", + body=vtgate_cr, ) elif exists: kube_client.custom.delete_namespaced_custom_object( @@ -1074,7 +1074,7 @@ def reconcile_vtgate_scalable( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="scalables", + plural="vtgates", ) def update_related_api_objects(self, kube_client: KubeClient): @@ -1090,7 +1090,7 @@ def update_related_api_objects(self, kube_client: KubeClient): owner_uid=vitesscluster_uid, cell_config=cell_config, ) - self.reconcile_vtgate_scalable( + self.reconcile_vtgate_cr( kube_client, owner_uid=vitesscluster_uid, cell_config=cell_config, @@ -1113,26 +1113,26 @@ def get_vitess_config(self, kube_client: KubeClient) -> VitessDeploymentConfigDi continue name = self.get_vtgate_hpa_name(cell["name"]) - scalable = None + vtgate_cr = None try: - scalable = kube_client.custom.get_namespaced_custom_object( + vtgate_cr = kube_client.custom.get_namespaced_custom_object( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="scalables", + plural="vtgates", name=name, ) except ApiException as e: if e.status != 404: raise e else: - log.error(f"Scalable {name} has been created yet found") + log.error(f"Vtgate {name} has been created yet found") - if not scalable: - log.error(f"Scalable {name} not found") + if not vtgate_cr: + log.error(f"Vtgate {name} not found") continue - autoscaled_replicas = scalable["spec"].get("replicas") + autoscaled_replicas = vtgate_cr["spec"].get("replicas") if autoscaled_replicas is not None: cell["gateway"]["replicas"] = autoscaled_replicas From 8758c28846072f702932c6979b48ff5fff94a563 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Tue, 3 Sep 2024 02:44:41 -0700 Subject: [PATCH 03/11] Make it explicit which CRs update related objects --- paasta_tools/setup_kubernetes_cr.py | 20 +++++++++++++++++--- paasta_tools/vitesscluster_tools.py | 12 ++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/paasta_tools/setup_kubernetes_cr.py b/paasta_tools/setup_kubernetes_cr.py index ea7179704c..a4f8dc9e2c 100644 --- a/paasta_tools/setup_kubernetes_cr.py +++ b/paasta_tools/setup_kubernetes_cr.py @@ -50,6 +50,9 @@ from paasta_tools.utils import load_all_configs from paasta_tools.utils import load_system_paasta_config from paasta_tools.vitesscluster_tools import load_vitess_service_instance_configs +from paasta_tools.vitesscluster_tools import ( + update_related_api_objects as update_vitess_cluster_related_api_objects, +) log = logging.getLogger(__name__) @@ -58,6 +61,11 @@ INSTANCE_TYPE_TO_CONFIG_LOADER = {"vitesscluster": load_vitess_service_instance_configs} +INSTANCE_TYPE_TO_RELATED_OBJECTS_UPDATER = { + "vitesscluster": update_vitess_cluster_related_api_objects, +} + + class StdoutKubeClient: """Replace all destructive operations in Kubernetes APIs with writing out YAML to stdout.""" @@ -415,9 +423,15 @@ def reconcile_kubernetes_resource( ) else: log.info(f"{desired_resource} is up to date, no action taken") - log.info(f"Ensuring related API objects for {desired_resource} are in sync") - if hasattr(soa_config, "update_related_api_objects"): - soa_config.update_related_api_objects(kube_client) + + if crd.file_prefix in INSTANCE_TYPE_TO_RELATED_OBJECTS_UPDATER: + INSTANCE_TYPE_TO_RELATED_OBJECTS_UPDATER[crd.file_prefix]( + service=service, + instance=inst, + cluster=cluster, + kube_client=kube_client, + soa_dir=DEFAULT_SOA_DIR, + ) except Exception as e: log.error(str(e)) succeeded = False diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 56f33614dc..d1002da095 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -1216,6 +1216,18 @@ def load_vitess_service_instance_configs( return vitess_service_instance_configs +def update_related_api_objects( + service: str, + instance: str, + cluster: str, + kube_client: KubeClient, + soa_dir: str = DEFAULT_SOA_DIR, +) -> None: + load_vitess_instance_config( + service, instance, cluster, soa_dir=soa_dir + ).update_related_api_objects(kube_client) + + # TODO: read this from CRD in service configs def cr_id(service: str, instance: str) -> Mapping[str, str]: return dict( From bb1f1497ed360ddade943a6a5842ef262c430a38 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Tue, 3 Sep 2024 03:09:49 -0700 Subject: [PATCH 04/11] Remove hard-coded HPA specs --- paasta_tools/vitesscluster_tools.py | 130 ++++++++++++++-------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index d1002da095..6407e8f478 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -12,13 +12,9 @@ import service_configuration_lib from kubernetes.client import ApiClient from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1OwnerReference from kubernetes.client import V2beta2CrossVersionObjectReference from kubernetes.client import V2beta2HorizontalPodAutoscaler from kubernetes.client import V2beta2HorizontalPodAutoscalerSpec -from kubernetes.client import V2beta2MetricSpec -from kubernetes.client import V2beta2MetricTarget -from kubernetes.client import V2beta2ResourceMetricSource from kubernetes.client.rest import ApiException from paasta_tools.kubernetes_tools import get_cr @@ -29,7 +25,6 @@ from paasta_tools.kubernetes_tools import paasta_prefixed from paasta_tools.kubernetes_tools import sanitised_cr_name from paasta_tools.long_running_service_tools import load_service_namespace_config -from paasta_tools.long_running_service_tools import DEFAULT_AUTOSCALING_SETPOINT from paasta_tools.utils import BranchDictV2 from paasta_tools.utils import deep_merge_dictionaries from paasta_tools.utils import DEFAULT_SOA_DIR @@ -854,7 +849,9 @@ def get_update_strategy(self) -> Dict[str, str]: return {"type": "Immediate"} def is_vtgate_autoscaling_enabled(self, cell_config) -> bool: - return cell_config["gateway"]["max_instances"] is not None + min_instances = self.get_min_instances() + max_instances = self.get_max_instances() + return min_instances and max_instances def get_desired_vtgate_cr( self, @@ -891,88 +888,89 @@ def get_desired_vtgate_cr( }, } - def get_desired_horizontal_pod_autoscaler( + def get_autoscaling_target(self, name: str) -> V2beta2CrossVersionObjectReference: + return V2beta2CrossVersionObjectReference( + api_version="yelp.com/v1alpha1", kind="Vtgate", name=name + ) + + def get_autoscaling_metric_spec( self, - kube_client: KubeClient, name: str, - owner_uid: str, - min_replicas: Optional[int], - max_replicas: Optional[int], + cluster: str, + kube_client: KubeClient, + namespace: str, ) -> Optional[V2beta2HorizontalPodAutoscaler]: # Returns None if an HPA should not be attached based on the config, # or the config is invalid. + if self.get_desired_state() == "stop": + return None + + if not self.is_autoscaling_enabled(): + return None + + autoscaling_params = self.get_autoscaling_params() + if autoscaling_params["metrics_providers"][0]["decision_policy"] == "bespoke": + return None + + min_replicas = self.get_min_instances() + max_replicas = self.get_max_instances() if min_replicas == 0 or max_replicas == 0: log.error( f"Invalid value for min or max_instances on {name}: {min_replicas}, {max_replicas}" ) return None - return V2beta2HorizontalPodAutoscaler( + metrics = [] + for provider in autoscaling_params["metrics_providers"]: + spec = self.get_autoscaling_provider_spec(name, namespace, provider) + if spec is not None: + metrics.append(spec) + scaling_policy = self.get_autoscaling_scaling_policy( + max_replicas, + autoscaling_params, + ) + + labels = { + paasta_prefixed("service"): self.service, + paasta_prefixed("instance"): self.instance, + paasta_prefixed("pool"): self.get_pool(), + paasta_prefixed("managed"): "true", + } + + hpa = V2beta2HorizontalPodAutoscaler( kind="HorizontalPodAutoscaler", metadata=V1ObjectMeta( - name=name, - namespace=self.get_namespace(), - annotations=dict(), - labels={ - paasta_prefixed("service"): self.service, - paasta_prefixed("instance"): self.instance, - paasta_prefixed("pool"): self.get_pool(), - paasta_prefixed("managed"): "true", - }, - owner_references=[ - V1OwnerReference( - api_version="planetscale.com/v2", - kind="VitessCluster", - name=sanitised_cr_name(self.service, self.instance), - uid=owner_uid, - controller=True, - block_owner_deletion=True, - ), - ], + name=name, namespace=namespace, annotations=dict(), labels=labels ), spec=V2beta2HorizontalPodAutoscalerSpec( - behavior={ - "scaleDown": { - "stabilizationWindowSeconds": 300, - # the policy in a human-readable way: scale down every 60s by - # at most 30% of current replicas. - "selectPolicy": "Max", - "policies": [ - {"type": "Percent", "value": 30, "periodSeconds": 60} - ], - } - }, + behavior=scaling_policy, max_replicas=max_replicas, min_replicas=min_replicas, - metrics=[ - V2beta2MetricSpec( - type="Resource", - resource=V2beta2ResourceMetricSource( - name="cpu", - target=V2beta2MetricTarget( - type="Utilization", - average_utilization=int( - DEFAULT_AUTOSCALING_SETPOINT * 100 - ), - ), - ), - ) - ], - scale_target_ref=V2beta2CrossVersionObjectReference( - api_version="yelp.com/v1alpha1", kind="Vtgate", name=name - ), + metrics=metrics, + scale_target_ref=self.get_autoscaling_target(name), ), ) + return hpa + + def get_min_instances(self) -> Optional[int]: + vtgate_resources = self.config_dict.get("vtgate_resources") + return vtgate_resources.get("min_instances", 1) + + def get_max_instances(self) -> Optional[int]: + vtgate_resources = self.config_dict.get("vtgate_resources") + return vtgate_resources.get("max_instances") + def reconcile_vtgate_hpa( self, kube_client: KubeClient, owner_uid: str, cell_config: CellConfigDict, ): - should_exist = self.is_vtgate_autoscaling_enabled(cell_config) name = self.get_vtgate_hpa_name(cell_config["name"]) + should_exist = self.is_vtgate_autoscaling_enabled(cell_config) + exists = ( len( kube_client.autoscaling.list_namespaced_horizontal_pod_autoscaler( @@ -984,13 +982,15 @@ def reconcile_vtgate_hpa( ) if should_exist: - hpa = self.get_desired_horizontal_pod_autoscaler( + hpa = self.get_autoscaling_metric_spec( + name=sanitised_cr_name(self.service, self.instance), + cluster=self.get_cluster(), kube_client=kube_client, - name=name, - owner_uid=owner_uid, - min_replicas=cell_config["gateway"]["min_instances"], - max_replicas=cell_config["gateway"]["max_instances"], + namespace=self.get_namespace(), ) + if not hpa: + return + if exists: kube_client.autoscaling.replace_namespaced_horizontal_pod_autoscaler( name=name, From 91411fb20e1d2bdcd8489629379448a6d9782937 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 03:01:32 -0700 Subject: [PATCH 05/11] Rename Vtgate to ScalableVtgate --- paasta_tools/vitesscluster_tools.py | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 6407e8f478..e5a3e25b5a 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -853,7 +853,7 @@ def is_vtgate_autoscaling_enabled(self, cell_config) -> bool: max_instances = self.get_max_instances() return min_instances and max_instances - def get_desired_vtgate_cr( + def get_desired_scalablevtgate_cr( self, kube_client: KubeClient, name: str, @@ -862,7 +862,7 @@ def get_desired_vtgate_cr( ) -> Optional[Dict[str, Any]]: return { "apiVersion": "yelp.com/v1alpha1", - "kind": "Vtgate", + "kind": "ScalableVtgate", "metadata": { "name": name, "namespace": self.get_namespace(), @@ -890,7 +890,7 @@ def get_desired_vtgate_cr( def get_autoscaling_target(self, name: str) -> V2beta2CrossVersionObjectReference: return V2beta2CrossVersionObjectReference( - api_version="yelp.com/v1alpha1", kind="Vtgate", name=name + api_version="yelp.com/v1alpha1", kind="ScalableVtgate", name=name ) def get_autoscaling_metric_spec( @@ -1012,7 +1012,7 @@ def reconcile_vtgate_hpa( def get_vtgate_hpa_name(self, cell_name: str) -> str: return f"{sanitised_cr_name(self.service, self.instance)}-cell-{cell_name}" - def reconcile_vtgate_cr( + def reconcile_scalablevtgate_cr( self, kube_client: KubeClient, owner_uid: str, @@ -1020,7 +1020,7 @@ def reconcile_vtgate_cr( ): should_exist = self.is_vtgate_autoscaling_enabled(cell_config) name = self.get_vtgate_hpa_name(cell_config["name"]) - vtgate_cr = self.get_desired_vtgate_cr( + scalablevtgate_cr = self.get_desired_scalablevtgate_cr( kube_client=kube_client, name=name, owner_uid=owner_uid, @@ -1039,7 +1039,7 @@ def reconcile_vtgate_cr( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="vtgates", + plural="scalablevtgates", name=name, ) except ApiException as e: @@ -1049,7 +1049,7 @@ def reconcile_vtgate_cr( if should_exist: if exists: - vtgate_cr["metadata"]["resourceVersion"] = cr["metadata"][ + scalablevtgate_cr["metadata"]["resourceVersion"] = cr["metadata"][ "resourceVersion" ] kube_client.custom.replace_namespaced_custom_object( @@ -1057,16 +1057,16 @@ def reconcile_vtgate_cr( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="vtgates", - body=vtgate_cr, + plural="scalablevtgates", + body=scalablevtgate_cr, ) else: kube_client.custom.create_namespaced_custom_object( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="vtgates", - body=vtgate_cr, + plural="scalablevtgates", + body=scalablevtgate_cr, ) elif exists: kube_client.custom.delete_namespaced_custom_object( @@ -1074,7 +1074,7 @@ def reconcile_vtgate_cr( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="vtgates", + plural="scalablevtgates", ) def update_related_api_objects(self, kube_client: KubeClient): @@ -1090,7 +1090,7 @@ def update_related_api_objects(self, kube_client: KubeClient): owner_uid=vitesscluster_uid, cell_config=cell_config, ) - self.reconcile_vtgate_cr( + self.reconcile_scalablevtgate_cr( kube_client, owner_uid=vitesscluster_uid, cell_config=cell_config, @@ -1113,26 +1113,26 @@ def get_vitess_config(self, kube_client: KubeClient) -> VitessDeploymentConfigDi continue name = self.get_vtgate_hpa_name(cell["name"]) - vtgate_cr = None + scalablevtgate_cr = None try: - vtgate_cr = kube_client.custom.get_namespaced_custom_object( + scalablevtgate_cr = kube_client.custom.get_namespaced_custom_object( group="yelp.com", version="v1alpha1", namespace=self.get_namespace(), - plural="vtgates", + plural="scalablevtgates", name=name, ) except ApiException as e: if e.status != 404: raise e else: - log.error(f"Vtgate {name} has been created yet found") + log.error(f"ScalableVtgate {name} has been created yet found") - if not vtgate_cr: - log.error(f"Vtgate {name} not found") + if not scalablevtgate_cr: + log.error(f"ScalableVtgate {name} not found") continue - autoscaled_replicas = vtgate_cr["spec"].get("replicas") + autoscaled_replicas = scalablevtgate_cr["spec"].get("replicas") if autoscaled_replicas is not None: cell["gateway"]["replicas"] = autoscaled_replicas From eb997b56cdef8bf78774d0cf99624bf64cc8e5d9 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 03:11:46 -0700 Subject: [PATCH 06/11] Define ScalableVtgate metadata in one place --- paasta_tools/vitesscluster_tools.py | 52 ++++++++++++++++++----------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index e5a3e25b5a..0c57d40814 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -114,6 +114,14 @@ "disable_active_reparents": "true", } +SCALABLEVTGATE_CRD = { + "group": "yelp.com", + "version": "v1alpha1", + "api_version": "yelp.com/v1alpha1", + "plural": "scalablevtgates", + "kind": "ScalableVtgate", +} + class KVEnvVar(TypedDict, total=False): name: str @@ -861,8 +869,8 @@ def get_desired_scalablevtgate_cr( pod_selector: Dict[str, str], ) -> Optional[Dict[str, Any]]: return { - "apiVersion": "yelp.com/v1alpha1", - "kind": "ScalableVtgate", + "apiVersion": SCALABLEVTGATE_CRD["api_version"], + "kind": SCALABLEVTGATE_CRD["kind"], "metadata": { "name": name, "namespace": self.get_namespace(), @@ -890,7 +898,9 @@ def get_desired_scalablevtgate_cr( def get_autoscaling_target(self, name: str) -> V2beta2CrossVersionObjectReference: return V2beta2CrossVersionObjectReference( - api_version="yelp.com/v1alpha1", kind="ScalableVtgate", name=name + api_version=SCALABLEVTGATE_CRD["api_version"], + kind=SCALABLEVTGATE_CRD["kind"], + name=name, ) def get_autoscaling_metric_spec( @@ -1036,10 +1046,10 @@ def reconcile_scalablevtgate_cr( cr = None try: cr = kube_client.custom.get_namespaced_custom_object( - group="yelp.com", - version="v1alpha1", + group=SCALABLEVTGATE_CRD["group"], + version=SCALABLEVTGATE_CRD["version"], namespace=self.get_namespace(), - plural="scalablevtgates", + plural=SCALABLEVTGATE_CRD["plural"], name=name, ) except ApiException as e: @@ -1054,27 +1064,27 @@ def reconcile_scalablevtgate_cr( ] kube_client.custom.replace_namespaced_custom_object( name=name, - group="yelp.com", - version="v1alpha1", + group=SCALABLEVTGATE_CRD["group"], + version=SCALABLEVTGATE_CRD["version"], namespace=self.get_namespace(), - plural="scalablevtgates", + plural=SCALABLEVTGATE_CRD["plural"], body=scalablevtgate_cr, ) else: kube_client.custom.create_namespaced_custom_object( - group="yelp.com", - version="v1alpha1", + group=SCALABLEVTGATE_CRD["group"], + version=SCALABLEVTGATE_CRD["version"], namespace=self.get_namespace(), - plural="scalablevtgates", + plural=SCALABLEVTGATE_CRD["plural"], body=scalablevtgate_cr, ) elif exists: kube_client.custom.delete_namespaced_custom_object( name=name, - group="yelp.com", - version="v1alpha1", + group=SCALABLEVTGATE_CRD["group"], + version=SCALABLEVTGATE_CRD["version"], namespace=self.get_namespace(), - plural="scalablevtgates", + plural=SCALABLEVTGATE_CRD["plural"], ) def update_related_api_objects(self, kube_client: KubeClient): @@ -1116,20 +1126,22 @@ def get_vitess_config(self, kube_client: KubeClient) -> VitessDeploymentConfigDi scalablevtgate_cr = None try: scalablevtgate_cr = kube_client.custom.get_namespaced_custom_object( - group="yelp.com", - version="v1alpha1", + group=SCALABLEVTGATE_CRD["group"], + version=SCALABLEVTGATE_CRD["version"], namespace=self.get_namespace(), - plural="scalablevtgates", + plural=SCALABLEVTGATE_CRD["plural"], name=name, ) except ApiException as e: if e.status != 404: raise e else: - log.error(f"ScalableVtgate {name} has been created yet found") + log.error( + f"{SCALABLEVTGATE_CRD['kind']} {name} has been created yet found" + ) if not scalablevtgate_cr: - log.error(f"ScalableVtgate {name} not found") + log.error(f"{SCALABLEVTGATE_CRD['kind']} {name} not found") continue autoscaled_replicas = scalablevtgate_cr["spec"].get("replicas") From 0daabb8194660463aa23db5944b7b555ed66da25 Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 03:58:53 -0700 Subject: [PATCH 07/11] HPA name should be the same as the cell, not cluster --- paasta_tools/vitesscluster_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 0c57d40814..bd8a36f44a 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -993,7 +993,7 @@ def reconcile_vtgate_hpa( if should_exist: hpa = self.get_autoscaling_metric_spec( - name=sanitised_cr_name(self.service, self.instance), + name=name, cluster=self.get_cluster(), kube_client=kube_client, namespace=self.get_namespace(), From b382e1f8cdb90f5d1857256a862ddc23239ba15c Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 04:38:02 -0700 Subject: [PATCH 08/11] Set owner_references for vtgate HPAs --- paasta_tools/vitesscluster_tools.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index bd8a36f44a..dd22a999f6 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -12,6 +12,7 @@ import service_configuration_lib from kubernetes.client import ApiClient from kubernetes.client import V1ObjectMeta +from kubernetes.client import V1OwnerReference from kubernetes.client import V2beta2CrossVersionObjectReference from kubernetes.client import V2beta2HorizontalPodAutoscaler from kubernetes.client import V2beta2HorizontalPodAutoscalerSpec @@ -909,6 +910,7 @@ def get_autoscaling_metric_spec( cluster: str, kube_client: KubeClient, namespace: str, + owner_uid: str, ) -> Optional[V2beta2HorizontalPodAutoscaler]: # Returns None if an HPA should not be attached based on the config, # or the config is invalid. @@ -951,7 +953,20 @@ def get_autoscaling_metric_spec( hpa = V2beta2HorizontalPodAutoscaler( kind="HorizontalPodAutoscaler", metadata=V1ObjectMeta( - name=name, namespace=namespace, annotations=dict(), labels=labels + name=name, + namespace=namespace, + annotations=dict(), + labels=labels, + owner_references=[ + V1OwnerReference( + api_version="planetscale.com/v2", + kind="VitessCluster", + name=sanitised_cr_name(self.service, self.instance), + uid=owner_uid, + controller=True, + block_owner_deletion=True, + ), + ], ), spec=V2beta2HorizontalPodAutoscalerSpec( behavior=scaling_policy, @@ -997,6 +1012,7 @@ def reconcile_vtgate_hpa( cluster=self.get_cluster(), kube_client=kube_client, namespace=self.get_namespace(), + owner_uid=owner_uid, ) if not hpa: return From e0a8bc9247971d69b83748a3f36dec6ecec2c53b Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 05:48:03 -0700 Subject: [PATCH 09/11] Fix error message --- paasta_tools/vitesscluster_tools.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index dd22a999f6..3154bf22cc 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -1152,9 +1152,7 @@ def get_vitess_config(self, kube_client: KubeClient) -> VitessDeploymentConfigDi if e.status != 404: raise e else: - log.error( - f"{SCALABLEVTGATE_CRD['kind']} {name} has been created yet found" - ) + log.error(f"{SCALABLEVTGATE_CRD['kind']} {name} not found") if not scalablevtgate_cr: log.error(f"{SCALABLEVTGATE_CRD['kind']} {name} not found") From 7288c60cfa05f85a8b059637e5bf1c9309bc80fc Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 06:01:23 -0700 Subject: [PATCH 10/11] Fix mypy errors --- paasta_tools/vitesscluster_tools.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 3154bf22cc..6a7082d230 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -142,8 +142,8 @@ class RequestsDict(TypedDict, total=False): class ResourceConfigDict(TypedDict, total=False): replicas: int - min_instances: int - max_instances: int + min_instances: Optional[int] + max_instances: Optional[int] requests: Dict[str, RequestsDict] limits: Dict[str, RequestsDict] @@ -155,6 +155,8 @@ class GatewayConfigDict(TypedDict, total=False): extraLabels: Dict[str, str] lifecycle: Dict[str, Dict[str, Dict[str, List[str]]]] replicas: int + min_instances: int + max_instances: int resources: Dict[str, Any] annotations: Mapping[str, Any] @@ -858,9 +860,8 @@ def get_update_strategy(self) -> Dict[str, str]: return {"type": "Immediate"} def is_vtgate_autoscaling_enabled(self, cell_config) -> bool: - min_instances = self.get_min_instances() max_instances = self.get_max_instances() - return min_instances and max_instances + return max_instances is not None def get_desired_scalablevtgate_cr( self, @@ -904,7 +905,7 @@ def get_autoscaling_target(self, name: str) -> V2beta2CrossVersionObjectReferenc name=name, ) - def get_autoscaling_metric_spec( + def get_desired_hpa_spec( self, name: str, cluster: str, @@ -1007,7 +1008,7 @@ def reconcile_vtgate_hpa( ) if should_exist: - hpa = self.get_autoscaling_metric_spec( + hpa = self.get_desired_hpa_spec( name=name, cluster=self.get_cluster(), kube_client=kube_client, From d078a306663f7e15cf38a69172493c50fffa43af Mon Sep 17 00:00:00 2001 From: Sina Siadat Date: Thu, 12 Sep 2024 06:20:30 -0700 Subject: [PATCH 11/11] Fix tests --- paasta_tools/vitesscluster_tools.py | 4 ++-- tests/test_vitesscluster_tools.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/paasta_tools/vitesscluster_tools.py b/paasta_tools/vitesscluster_tools.py index 6a7082d230..5350379b1d 100644 --- a/paasta_tools/vitesscluster_tools.py +++ b/paasta_tools/vitesscluster_tools.py @@ -512,8 +512,8 @@ def get_tablet_pool_config( "name": "etc-srv-configs", "hostPath": {"path": "/nail/etc/srv-configs"}, }, - {"name": "vttablet-fake-credentials", "emptyDir": {}}, - {"name": "keyspace-fake-init-script", "emptyDir": {}}, + {"name": "vttablet-fake-credentials", "hostPath": {"path": "/dev/null"}}, + {"name": "keyspace-fake-init-script", "hostPath": {"path": "/dev/null"}}, ], replicas=replicas, vttablet={ diff --git a/tests/test_vitesscluster_tools.py b/tests/test_vitesscluster_tools.py index 19438a2bb8..abd6ce1dae 100644 --- a/tests/test_vitesscluster_tools.py +++ b/tests/test_vitesscluster_tools.py @@ -138,6 +138,8 @@ }, }, "replicas": 1, + "min_instances": None, + "max_instances": None, "resources": { "limits": {"cpu": "100m", "memory": "256Mi"}, "requests": {"cpu": "100m", "memory": "256Mi"}, @@ -690,6 +692,7 @@ def test_load_vitess_service_instance_configs( soa_dir="fake_soa_dir", cluster="fake_cluster", instance="fake_instance", + kube_client="fake_kube_client", ) assert vitess_service_instance_configs == VITESS_CONFIG