diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 194d90a8d3..63fe4b1e45 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -531,8 +531,8 @@ def limit_size_with_hash(name: str, limit: int = 63, suffix: int = 4) -> str: """ if len(name) > limit: digest = hashlib.md5(name.encode()).digest() - hash = base64.b32encode(digest).decode().replace("=", "").lower() - return f"{name[:(limit-suffix-1)]}-{hash[:suffix]}" + hashed = base64.b32encode(digest).decode().replace("=", "").lower() + return f"{name[:(limit-suffix-1)]}-{hashed[:suffix]}" else: return name diff --git a/paasta_tools/tron_tools.py b/paasta_tools/tron_tools.py index b3a94f798c..16db8e2e16 100644 --- a/paasta_tools/tron_tools.py +++ b/paasta_tools/tron_tools.py @@ -364,6 +364,18 @@ def build_spark_config(self) -> Dict[str, str]: ), user=spark_tools.SPARK_JOB_USER, ) + # delete the dynamically generated spark.app.id to prevent frequent config updates in Tron. + # spark.app.id will be generated later by yelp spark-submit wrapper or Spark itself. + spark_conf.pop("spark.app.id", None) + # use a static spark.app.name to prevent frequent config updates in Tron. + # md5 and base64 will always generate the same encoding for a string. + # This spark.app.name might be overridden by yelp spark-submit wrapper. + if "spark.app.name" in spark_conf: + spark_conf["spark.app.name"] = limit_size_with_hash( + f"tron_spark_{self.get_service()}_{self.get_instance()}_{self.get_action_name()}" + if "spark.app.name" not in stringified_spark_args + else stringified_spark_args["spark.app.name"] + ) # TODO: Remove this once dynamic pod template is generated inside the driver using spark-submit wrapper if "spark.kubernetes.executor.podTemplateFile" in spark_conf: print( diff --git a/tests/test_tron_tools.py b/tests/test_tron_tools.py index 7c1886658d..43ff04c01f 100644 --- a/tests/test_tron_tools.py +++ b/tests/test_tron_tools.py @@ -1216,12 +1216,9 @@ def test_format_tron_action_dict_spark( confs = result["command"].split(" ") spark_app_name = "" - spark_app_id = "" for s in confs: if s.startswith("spark.app.name"): spark_app_name = s.split("=")[1] - if s.startswith("spark.app.id"): - spark_app_id = s.split("=")[1] expected = { "command": "timeout 12h spark-submit " @@ -1230,7 +1227,6 @@ def test_format_tron_action_dict_spark( "--conf spark.executor.memory=1g " "--conf spark.executor.cores=2 " f"--conf spark.app.name={spark_app_name} " - f"--conf spark.app.id={spark_app_id} " "--conf spark.ui.port=39091 " "--conf spark.executor.instances=0 " "--conf spark.kubernetes.executor.limit.cores=2 "