From ef436ce2f5c0808f219842cfd83908901fab14a3 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Tue, 16 Jul 2024 11:08:25 -0700 Subject: [PATCH 1/4] Ensure default Spark executor SA is created in Spark clusters This is mostly a disaster-recovery thing since this SA exists already - but to avoid any unpleasant times should we decide to start these clusters from scratch, it'd be best to ensure that this SA actually exists :) --- paasta_tools/setup_tron_namespace.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/paasta_tools/setup_tron_namespace.py b/paasta_tools/setup_tron_namespace.py index 58784f90c7..cf46b8482a 100755 --- a/paasta_tools/setup_tron_namespace.py +++ b/paasta_tools/setup_tron_namespace.py @@ -97,6 +97,26 @@ def ensure_service_accounts(job_configs: List[TronJobConfig]) -> None: namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, kube_client=spark_kube_client, ) + elif ( + action.get_executor() == "spark" + # NOTE: we only enter this block if there's no IAM role specified as otherwise the above block would do + # the same thing as calling ensure_service_account() with iam_role=get_spark_executor_iam_role() + and not action.get_iam_role() + # the default spark executor IAM role comes from SystemPaastaConfig - so let's guard against it missing + # (that should never happen - but operator error is a thing :p) + and action.get_spark_executor_iam_role() + ): + # this will look quite similar to the above, but we're ensuring that a potentially different SA exists: + # if a Spark job is created without an explicit IAM role, we'll use a default one that only has access + # to spark-required resources (e.g., event logs and whatnot) + spark_kube_client = KubeClient( + config_file=system_paasta_config.get_spark_kubeconfig() + ) + ensure_service_account( + action.get_spark_executor_iam_role(), + namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, + kube_client=spark_kube_client, + ) def main(): From 2bf40c28cd6e7b86d9b4030083ad61eca706ce65 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Wed, 17 Jul 2024 08:55:44 -0700 Subject: [PATCH 2/4] Fix spark executor iam role handling The previous iteration of this would have never been hit since in normal operation action.get_iam_role() will always be truthy for executor=spark --- paasta_tools/setup_tron_namespace.py | 33 +++++++++++----------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/paasta_tools/setup_tron_namespace.py b/paasta_tools/setup_tron_namespace.py index cf46b8482a..472355df07 100755 --- a/paasta_tools/setup_tron_namespace.py +++ b/paasta_tools/setup_tron_namespace.py @@ -92,31 +92,24 @@ def ensure_service_accounts(job_configs: List[TronJobConfig]) -> None: spark_kube_client = KubeClient( config_file=system_paasta_config.get_spark_kubeconfig() ) + # NOTE: the above get_iam_role() should always return a value for executor=spark since we fallback + # to a default role so that the drivers have access to a basic set of resources that are required + # for how we run spark ensure_service_account( action.get_iam_role(), namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, kube_client=spark_kube_client, ) - elif ( - action.get_executor() == "spark" - # NOTE: we only enter this block if there's no IAM role specified as otherwise the above block would do - # the same thing as calling ensure_service_account() with iam_role=get_spark_executor_iam_role() - and not action.get_iam_role() - # the default spark executor IAM role comes from SystemPaastaConfig - so let's guard against it missing - # (that should never happen - but operator error is a thing :p) - and action.get_spark_executor_iam_role() - ): - # this will look quite similar to the above, but we're ensuring that a potentially different SA exists: - # if a Spark job is created without an explicit IAM role, we'll use a default one that only has access - # to spark-required resources (e.g., event logs and whatnot) - spark_kube_client = KubeClient( - config_file=system_paasta_config.get_spark_kubeconfig() - ) - ensure_service_account( - action.get_spark_executor_iam_role(), - namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, - kube_client=spark_kube_client, - ) + # this should always be truthy, but let's be safe since this comes from SystemPaastaConfig + if action.get_spark_executor_iam_role(): + # this will look quite similar to the above, but we're ensuring that a potentially different SA exists: + # this one is for the actual spark executors to use. if an iam_role is set, we'll use that, otherwise + # there's an executor-specifc default role just like there is for the drivers :) + ensure_service_account( + action.get_spark_executor_iam_role(), + namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, + kube_client=spark_kube_client, + ) def main(): From 1cd8303cc4f892ff8ad90ccbf6fe3e0448c86d3a Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Wed, 17 Jul 2024 09:05:41 -0700 Subject: [PATCH 3/4] drivers don't run in the paasta-spark ns i am dumb and sameer reminded me about how this all actually works (i think i was getting my wires crossed between how spark works from the batch boxes vs on k8s :p) --- paasta_tools/setup_tron_namespace.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/paasta_tools/setup_tron_namespace.py b/paasta_tools/setup_tron_namespace.py index 472355df07..255acf8125 100755 --- a/paasta_tools/setup_tron_namespace.py +++ b/paasta_tools/setup_tron_namespace.py @@ -85,21 +85,13 @@ def ensure_service_accounts(job_configs: List[TronJobConfig]) -> None: kube_client=kube_client, ) # spark executors are special in that we want the SA to exist in two namespaces: - # the tron namespace - for the spark driver - # and the spark namespace - for the spark executor + # the tron namespace - for the spark driver (which will be created by the ensure_service_account() above) + # and the spark namespace - for the spark executor (which we'll create below) if action.get_executor() == "spark": # this kubeclient creation is lru_cache'd so it should be fine to call this for every spark action spark_kube_client = KubeClient( config_file=system_paasta_config.get_spark_kubeconfig() ) - # NOTE: the above get_iam_role() should always return a value for executor=spark since we fallback - # to a default role so that the drivers have access to a basic set of resources that are required - # for how we run spark - ensure_service_account( - action.get_iam_role(), - namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, - kube_client=spark_kube_client, - ) # this should always be truthy, but let's be safe since this comes from SystemPaastaConfig if action.get_spark_executor_iam_role(): # this will look quite similar to the above, but we're ensuring that a potentially different SA exists: From 44c4199a5b8518c01db27d79971861c0dbee5f88 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Wed, 17 Jul 2024 09:09:04 -0700 Subject: [PATCH 4/4] Refactor a tiny bit --- paasta_tools/setup_tron_namespace.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/paasta_tools/setup_tron_namespace.py b/paasta_tools/setup_tron_namespace.py index 255acf8125..be5112707e 100755 --- a/paasta_tools/setup_tron_namespace.py +++ b/paasta_tools/setup_tron_namespace.py @@ -87,21 +87,23 @@ def ensure_service_accounts(job_configs: List[TronJobConfig]) -> None: # spark executors are special in that we want the SA to exist in two namespaces: # the tron namespace - for the spark driver (which will be created by the ensure_service_account() above) # and the spark namespace - for the spark executor (which we'll create below) - if action.get_executor() == "spark": + if ( + action.get_executor() == "spark" + # this should always be truthy, but let's be safe since this comes from SystemPaastaConfig + and action.get_spark_executor_iam_role() + ): # this kubeclient creation is lru_cache'd so it should be fine to call this for every spark action spark_kube_client = KubeClient( config_file=system_paasta_config.get_spark_kubeconfig() ) - # this should always be truthy, but let's be safe since this comes from SystemPaastaConfig - if action.get_spark_executor_iam_role(): - # this will look quite similar to the above, but we're ensuring that a potentially different SA exists: - # this one is for the actual spark executors to use. if an iam_role is set, we'll use that, otherwise - # there's an executor-specifc default role just like there is for the drivers :) - ensure_service_account( - action.get_spark_executor_iam_role(), - namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, - kube_client=spark_kube_client, - ) + # this will look quite similar to the above, but we're ensuring that a potentially different SA exists: + # this one is for the actual spark executors to use. if an iam_role is set, we'll use that, otherwise + # there's an executor-specifc default role just like there is for the drivers :) + ensure_service_account( + action.get_spark_executor_iam_role(), + namespace=spark_tools.SPARK_EXECUTOR_NAMESPACE, + kube_client=spark_kube_client, + ) def main():