diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index 05996ccf1c1..1a423d67b88 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -323,7 +323,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | | kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | | kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | -| kyuubi.kubernetes.spark.deleteDriverPodOnTermination.enabled | false | If set to true then Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. | boolean | 1.8.1 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod | NONE | Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. Available options are NONE, ALL, COMPLETED and default value is None which means none of the pod will be deleted | string | 1.8.1 | | kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled | false | Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | | kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled | false | Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter Pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | | kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 35629b66768..78896bfe1a9 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -1231,13 +1231,20 @@ object KyuubiConf { .checkValue(_ > 0, "must be positive number") .createWithDefault(Duration.ofMinutes(5).toMillis) - val KUBERNETES_SPARK_DELETE_DRIVER_POD_ON_TERMINATION_ENABLED: ConfigEntry[Boolean] = - buildConf("kyuubi.kubernetes.spark.deleteDriverPodOnTermination.enabled") - .doc("If set to true then Kyuubi server will delete the spark driver pod after " + - s"the application terminates for ${KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD.key}.") + val KUBERNETES_SPARK_CLEANUP_TERMINATED_DRIVER_POD: ConfigEntry[String] = + buildConf("kyuubi.kubernetes.spark.cleanupTerminatedDriverPod") + .doc("Kyuubi server will delete the spark driver pod after " + + s"the application terminates for ${KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD.key}. " + + "Available options are NONE, ALL, COMPLETED and " + + "default value is None which means none of the pod will be deleted") .version("1.8.1") - .booleanConf - .createWithDefault(false) + .stringConf + .createWithDefault(KubernetesCleanupDriverPodStrategy.NONE.toString) + + object KubernetesCleanupDriverPodStrategy extends Enumeration { + type KubernetesCleanupDriverPodStrategy = Value + val NONE, ALL, COMPLETED = Value + } val KUBERNETES_APPLICATION_STATE_CONTAINER: ConfigEntry[String] = buildConf("kyuubi.kubernetes.application.state.container") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala index 95f68d4b639..c8828f5d83c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala @@ -30,8 +30,9 @@ import io.fabric8.kubernetes.client.informers.{ResourceEventHandler, SharedIndex import org.apache.kyuubi.{KyuubiException, Logging, Utils} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.KubernetesApplicationStateSource +import org.apache.kyuubi.config.KyuubiConf.{KubernetesApplicationStateSource, KubernetesCleanupDriverPodStrategy} import org.apache.kyuubi.config.KyuubiConf.KubernetesApplicationStateSource.KubernetesApplicationStateSource +import org.apache.kyuubi.config.KyuubiConf.KubernetesCleanupDriverPodStrategy.{ALL, COMPLETED, NONE} import org.apache.kyuubi.engine.ApplicationState.{isTerminated, ApplicationState, FAILED, FINISHED, NOT_FOUND, PENDING, RUNNING, UNKNOWN} import org.apache.kyuubi.util.KubernetesUtils @@ -107,14 +108,19 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { submitTimeout = conf.get(KyuubiConf.ENGINE_KUBERNETES_SUBMIT_TIMEOUT) // Defer cleaning terminated application information val retainPeriod = conf.get(KyuubiConf.KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD) - val deleteSparkDriverPodOnTermination = - conf.get(KyuubiConf.KUBERNETES_SPARK_DELETE_DRIVER_POD_ON_TERMINATION_ENABLED) + val cleanupDriverPodStrategy = KubernetesCleanupDriverPodStrategy.withName( + conf.get(KyuubiConf.KUBERNETES_SPARK_CLEANUP_TERMINATED_DRIVER_POD)) cleanupTerminatedAppInfoTrigger = CacheBuilder.newBuilder() .expireAfterWrite(retainPeriod, TimeUnit.MILLISECONDS) .removalListener((notification: RemovalNotification[String, ApplicationState]) => { Option(appInfoStore.remove(notification.getKey)).foreach { case (kubernetesInfo, removed) => val appLabel = notification.getKey - if (deleteSparkDriverPodOnTermination) { + val shouldDelete = cleanupDriverPodStrategy match { + case NONE => false + case ALL => true + case COMPLETED => !ApplicationState.isFailed(notification.getValue) + } + if (shouldDelete) { val podName = removed.name try { val kubernetesClient = getOrCreateKubernetesClient(kubernetesInfo)