Merge remote-tracking branch 'origin/master' into COMPINFRA-2833

Yelp · Jul 19, 2023 · 48577bf · 48577bf
2 parents 960f850 + 8b7ca3d
commit 48577bf
Show file tree

Hide file tree

Showing 16 changed files with 642 additions and 106 deletions.
diff --git a/debian/changelog b/debian/changelog
@@ -1,3 +1,50 @@
+paasta-tools (0.189.0) xenial; urgency=medium
+
+  * 0.189.0 tagged with 'make release'
+    Commit: Revert "TRON-1636: Setup tron secret_volumes in
+    setup_tron_namespace" (#3652)
+
+ -- Wilmer Bandres <[email protected]>  Wed, 19 Jul 2023 05:26:34 -0700
+
+paasta-tools (0.188.0) xenial; urgency=medium
+
+  * 0.188.0 tagged with 'make release'
+    Commit: Fixing script bug (#3651)  * Fixing script bug
+
+ -- Wilmer Bandres <[email protected]>  Wed, 19 Jul 2023 01:34:56 -0700
+
+paasta-tools (0.187.0) xenial; urgency=medium
+
+  * 0.187.0 tagged with 'make release'
+    Commit: Merge pull request #3615 from Yelp/u/vit/tron-1636-add-
+    secret-volume  TRON-1636: Setup tron secret_volumes in
+    setup_tron_namespace
+
+ -- Vincent Thibault <[email protected]>  Tue, 18 Jul 2023 14:12:30 -0700
+
+paasta-tools (0.186.0) xenial; urgency=medium
+
+  * 0.186.0 tagged with 'make release'
+    Commit: Adding extensions to update_crds function (#3649)  * Adding
+    extensions to update_crds function  * removing unused client  *
+    Fixing internal update crd  * fixing internal update crd
+
+ -- Wilmer Bandres <[email protected]>  Mon, 17 Jul 2023 01:23:12 -0700
+
+paasta-tools (0.185.0) xenial; urgency=medium
+
+  * 0.185.0 tagged with 'make release'
+    Commit: Adding support for extensions v1 v1beta1 (#3648)  * Adding
+    support for extensions v1 v1beta1  * Fixing tests  * updating
+    setup_kubernetes_cr  * Fixing call to super  * fixing typo  * adding
+    minimal req for tests  * putting older version back  * Rebasing  *
+    Removing second client  * Adding comments for clairification  *
+    Update paasta_tools/setup_kubernetes_cr.py  Co-authored-by: Luis
+    Pérez <[email protected]>  * Fixing some whitespace errors  ---------
+    Co-authored-by: Luis Pérez <[email protected]>
+
+ -- Wilmer Bandres <[email protected]>  Fri, 14 Jul 2023 00:18:35 -0700
+
 paasta-tools (0.184.0) xenial; urgency=medium
 
   * 0.184.0 tagged with 'make release'

diff --git a/docs/source/autoscaling.rst b/docs/source/autoscaling.rst
@@ -80,6 +80,10 @@ The currently available metrics providers are:
     the port that your uWSGI master process will respond to with stats.
     Defaults to 8889.
 
+:gunicorn:
+  With the ``gunicorn`` metrics provider, Paasta will configure your pods to run an additional container with the `statsd_exporter <https://github.com/prometheus/statsd_exporter>`_ image.
+  This sidecar will listen on port 9117 and receive stats from the gunicorn service. The ``statsd_exporter`` will translate the stats into Prometheus format, which Prometheus will scrape.
+
 
 Decision policies
 ^^^^^^^^^^^^^^^^^

diff --git a/docs/source/yelpsoa_configs.rst b/docs/source/yelpsoa_configs.rst
@@ -387,7 +387,7 @@ instance MAY have:
   * ``autoscaling``: See the `autoscaling docs <autoscaling.html>`_ for details
 
     * ``metrics_provider``: Which method the autoscaler will use to determine a service's utilization.
-      Should be ``cpu`` or ``uwsgi``.
+      Should be ``cpu``, ``uwsgi``, or ``gunicorn``.
 
     * ``decision_policy``: Which method the autoscaler will use to determine when to autoscale a service.
       Should be ``proportional`` or ``bespoke``.

diff --git a/paasta_tools/__init__.py b/paasta_tools/__init__.py
@@ -17,4 +17,4 @@
 # setup phase, the dependencies may not exist on disk yet.
 #
 # Don't bump version manually. See `make release` docs in ./Makefile
-__version__ = "0.184.0"
+__version__ = "0.189.0"
diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py
@@ -47,6 +47,8 @@
 from kubernetes.client import models
 from kubernetes.client import V1Affinity
 from kubernetes.client import V1AWSElasticBlockStoreVolumeSource
+from kubernetes.client import V1beta1CustomResourceDefinition
+from kubernetes.client import V1beta1CustomResourceDefinitionList
 from kubernetes.client import V1beta1PodDisruptionBudget
 from kubernetes.client import V1beta1PodDisruptionBudgetSpec
 from kubernetes.client import V1Capabilities
@@ -64,12 +66,12 @@
 from kubernetes.client import V1EnvVar
 from kubernetes.client import V1EnvVarSource
 from kubernetes.client import V1ExecAction
+from kubernetes.client import V1Handler
 from kubernetes.client import V1HostPathVolumeSource
 from kubernetes.client import V1HTTPGetAction
 from kubernetes.client import V1KeyToPath
 from kubernetes.client import V1LabelSelector
 from kubernetes.client import V1Lifecycle
-from kubernetes.client import V1LifecycleHandler
 from kubernetes.client import V1Namespace
 from kubernetes.client import V1Node
 from kubernetes.client import V1NodeAffinity
@@ -172,7 +174,12 @@
 }
 HACHECK_POD_NAME = "hacheck"
 UWSGI_EXPORTER_POD_NAME = "uwsgi--exporter"
-SIDECAR_CONTAINER_NAMES = [HACHECK_POD_NAME, UWSGI_EXPORTER_POD_NAME]
+GUNICORN_EXPORTER_POD_NAME = "gunicorn--exporter"
+SIDECAR_CONTAINER_NAMES = [
+    HACHECK_POD_NAME,
+    UWSGI_EXPORTER_POD_NAME,
+    GUNICORN_EXPORTER_POD_NAME,
+]
 KUBERNETES_NAMESPACE = "paasta"
 PAASTA_WORKLOAD_OWNER = "compute_infra_platform_experience"
 MAX_EVENTS_TO_RETRIEVE = 200
@@ -320,6 +327,7 @@ def _set_disrupted_pods(self: Any, disrupted_pods: Mapping[str, datetime]) -> No
         "paasta.yelp.com/prometheus_shard": str,
         "paasta.yelp.com/scrape_uwsgi_prometheus": str,
         "paasta.yelp.com/scrape_piscina_prometheus": str,
+        "paasta.yelp.com/scrape_gunicorn_prometheus": str,
         "paasta.yelp.com/service": str,
         "paasta.yelp.com/autoscaled": str,
         "yelp.com/paasta_git_sha": str,
@@ -517,6 +525,13 @@ def __init__(
         self.core = kube_client.CoreV1Api(self.api_client)
         self.policy = kube_client.PolicyV1beta1Api(self.api_client)
         self.apiextensions = kube_client.ApiextensionsV1Api(self.api_client)
+
+        # We need to support apiextensions /v1 and /v1beta1 in order
+        # to make our upgrade to k8s 1.22 smooth, otherwise
+        # updating the CRDs make this script fail
+        self.apiextensions_v1_beta1 = kube_client.ApiextensionsV1beta1Api(
+            self.api_client
+        )
         self.custom = kube_client.CustomObjectsApi(self.api_client)
         self.autoscaling = kube_client.AutoscalingV2beta2Api(self.api_client)
         self.rbac = kube_client.RbacAuthorizationV1Api(self.api_client)
@@ -772,7 +787,7 @@ def get_autoscaling_metric_spec(
                         ),
                     )
                 )
-        elif metrics_provider in {"uwsgi", "piscina"}:
+        elif metrics_provider in {"uwsgi", "piscina", "gunicorn"}:
             metrics.append(
                 V2beta2MetricSpec(
                     type="Object",
@@ -951,12 +966,17 @@ def get_sidecar_containers(
         uwsgi_exporter_container = self.get_uwsgi_exporter_sidecar_container(
             system_paasta_config
         )
+        gunicorn_exporter_container = self.get_gunicorn_exporter_sidecar_container(
+            system_paasta_config
+        )
 
         sidecars = []
         if hacheck_container:
             sidecars.append(hacheck_container)
         if uwsgi_exporter_container:
             sidecars.append(uwsgi_exporter_container)
+        if gunicorn_exporter_container:
+            sidecars.append(gunicorn_exporter_container)
         return sidecars
 
     def get_readiness_check_prefix(
@@ -1016,7 +1036,7 @@ def get_hacheck_sidecar_container(
             return V1Container(
                 image=system_paasta_config.get_hacheck_sidecar_image_url(),
                 lifecycle=V1Lifecycle(
-                    pre_stop=V1LifecycleHandler(
+                    pre_stop=V1Handler(
                         _exec=V1ExecAction(
                             command=[
                                 "/bin/sh",
@@ -1064,7 +1084,7 @@ def get_uwsgi_exporter_sidecar_container(
                 env=self.get_kubernetes_environment() + [stats_port_env],
                 ports=[V1ContainerPort(container_port=9117)],
                 lifecycle=V1Lifecycle(
-                    pre_stop=V1LifecycleHandler(
+                    pre_stop=V1Handler(
                         _exec=V1ExecAction(
                             command=[
                                 "/bin/sh",
@@ -1095,6 +1115,42 @@ def should_run_uwsgi_exporter_sidecar(
                     return True
         return False
 
+    def get_gunicorn_exporter_sidecar_container(
+        self,
+        system_paasta_config: SystemPaastaConfig,
+    ) -> Optional[V1Container]:
+
+        if self.should_run_gunicorn_exporter_sidecar():
+            return V1Container(
+                image=system_paasta_config.get_gunicorn_exporter_sidecar_image_url(),
+                resources=self.get_sidecar_resource_requirements("gunicorn_exporter"),
+                name=GUNICORN_EXPORTER_POD_NAME,
+                env=self.get_kubernetes_environment(),
+                ports=[V1ContainerPort(container_port=9117)],
+                lifecycle=V1Lifecycle(
+                    pre_stop=V1Handler(
+                        _exec=V1ExecAction(
+                            command=[
+                                "/bin/sh",
+                                "-c",
+                                # we sleep for the same amount of time as we do after an hadown to ensure that we have accurate
+                                # metrics up until our Pod dies
+                                f"sleep {DEFAULT_HADOWN_PRESTOP_SLEEP_SECONDS}",
+                            ]
+                        )
+                    )
+                ),
+            )
+
+        return None
+
+    def should_run_gunicorn_exporter_sidecar(self) -> bool:
+        if self.is_autoscaling_enabled():
+            autoscaling_params = self.get_autoscaling_params()
+            if autoscaling_params["metrics_provider"] == "gunicorn":
+                return True
+        return False
+
     def should_setup_piscina_prometheus_scraping(
         self,
     ) -> bool:
@@ -1395,20 +1451,20 @@ def get_readiness_probe(
         else:
             return self.get_liveness_probe(service_namespace_config)
 
-    def get_kubernetes_container_termination_action(self) -> V1LifecycleHandler:
+    def get_kubernetes_container_termination_action(self) -> V1Handler:
         command = self.config_dict.get("lifecycle", KubeLifecycleDict({})).get(
             "pre_stop_command", []
         )
         # default pre stop hook for the container
         if not command:
-            return V1LifecycleHandler(
+            return V1Handler(
                 _exec=V1ExecAction(
                     command=["/bin/sh", "-c", f"sleep {DEFAULT_PRESTOP_SLEEP_SECONDS}"]
                 )
             )
         if isinstance(command, str):
             command = [command]
-        return V1LifecycleHandler(_exec=V1ExecAction(command=command))
+        return V1Handler(_exec=V1ExecAction(command=command))
 
     def get_pod_volumes(
         self,
@@ -1919,14 +1975,15 @@ def has_routable_ip(
     ) -> str:
         """Return whether the routable_ip label should be true or false.
 
-        Services with a `prometheus_port` defined or that use the uwsgi_exporter sidecar must have a routable IP
+        Services with a `prometheus_port` defined or that use certain sidecars must have a routable IP
         address to allow Prometheus shards to scrape metrics.
         """
         if (
             self.config_dict.get("routable_ip", False)
             or service_namespace_config.is_in_smartstack()
             or self.get_prometheus_port() is not None
             or self.should_run_uwsgi_exporter_sidecar(system_paasta_config)
+            or self.should_run_gunicorn_exporter_sidecar()
         ):
             return "true"
         return "false"
@@ -2099,6 +2156,10 @@ def get_pod_template_spec(
             labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
             labels["paasta.yelp.com/scrape_piscina_prometheus"] = "true"
 
+        elif self.should_run_gunicorn_exporter_sidecar():
+            labels["paasta.yelp.com/deploy_group"] = self.get_deploy_group()
+            labels["paasta.yelp.com/scrape_gunicorn_prometheus"] = "true"
+
         return V1PodTemplateSpec(
             metadata=V1ObjectMeta(
                 labels=labels,
@@ -3818,30 +3879,37 @@ def mode_to_int(mode: Optional[Union[str, int]]) -> Optional[int]:
 
 def update_crds(
     kube_client: KubeClient,
-    desired_crds: Collection[V1CustomResourceDefinition],
-    existing_crds: V1CustomResourceDefinitionList,
+    desired_crds: Collection[
+        Union[V1CustomResourceDefinition, V1beta1CustomResourceDefinition]
+    ],
+    existing_crds: Union[
+        V1CustomResourceDefinitionList, V1beta1CustomResourceDefinitionList
+    ],
 ) -> bool:
-    success = True
     for desired_crd in desired_crds:
         existing_crd = None
         for crd in existing_crds.items:
             if crd.metadata.name == desired_crd.metadata["name"]:
                 existing_crd = crd
                 break
-
         try:
+
+            if "apiextensions.k8s.io/v1beta1" == desired_crd.api_version:
+                apiextensions = kube_client.apiextensions_v1_beta1
+            else:
+                apiextensions = kube_client.apiextensions
+
             if existing_crd:
                 desired_crd.metadata[
                     "resourceVersion"
                 ] = existing_crd.metadata.resource_version
-                kube_client.apiextensions.replace_custom_resource_definition(
+
+                apiextensions.replace_custom_resource_definition(
                     name=desired_crd.metadata["name"], body=desired_crd
                 )
             else:
                 try:
-                    kube_client.apiextensions.create_custom_resource_definition(
-                        body=desired_crd
-                    )
+                    apiextensions.create_custom_resource_definition(body=desired_crd)
                 except ValueError as err:
                     # TODO: kubernetes server will sometimes reply with conditions:null,
                     # figure out how to deal with this correctly, for more details:
@@ -3857,9 +3925,9 @@ def update_crds(
                 f"status: {exc.status}, reason: {exc.reason}"
             )
             log.debug(exc.body)
-            success = False
+            return False
 
-    return success
+    return True
 
 
 def sanitise_label_value(value: str) -> str:

diff --git a/paasta_tools/long_running_service_tools.py b/paasta_tools/long_running_service_tools.py
@@ -31,6 +31,7 @@
 DEFAULT_AUTOSCALING_SETPOINT = 0.8
 DEFAULT_UWSGI_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
 DEFAULT_PISCINA_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
+DEFAULT_GUNICORN_AUTOSCALING_MOVING_AVERAGE_WINDOW = 1800
 # we set a different default moving average window so that we can reuse our existing PromQL
 # without having to write a different query for existing users that want to autoscale on
 # instantaneous CPU