From ccc80b832e8eecf2ed8370333b81ef668b592e96 Mon Sep 17 00:00:00 2001 From: Daniel Posada Date: Fri, 8 Oct 2021 16:30:31 -0500 Subject: [PATCH 1/3] Allows choosing init and sidecar image on job submission --- scheduler/src/cook/kubernetes/api.clj | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scheduler/src/cook/kubernetes/api.clj b/scheduler/src/cook/kubernetes/api.clj index 4f879f8b5d..88a9aea20f 100644 --- a/scheduler/src/cook/kubernetes/api.clj +++ b/scheduler/src/cook/kubernetes/api.clj @@ -1394,9 +1394,11 @@ resources (V1ResourceRequirements.)] ; container (.setName container cook-init-container-name) - (.setImage container (resolve-image-from-incremental-config - job passport-event-base passport/init-container-image-selected - image image-fallback)) + (.setImage container + (or (get main-env "COOK_INIT_CONTAINER_IMAGE") + (resolve-image-from-incremental-config + job passport-event-base passport/init-container-image-selected + image image-fallback))) (.setCommand container command) (.setWorkingDir container init-container-workdir) (.setEnv container main-env-vars) @@ -1415,9 +1417,11 @@ resources (V1ResourceRequirements.)] ; container (.setName container cook-container-name-for-file-server) - (.setImage container (resolve-image-from-incremental-config - job passport-event-base passport/sidecar-image-selected - image image-fallback)) + (.setImage container + (or (get main-env "COOK_SIDECAR_CONTAINER_IMAGE") + (resolve-image-from-incremental-config + job passport-event-base passport/sidecar-image-selected + image image-fallback))) (.setCommand container (conj command (str port))) (.setWorkingDir container sidecar-workdir) (.setPorts container [(.containerPort (V1ContainerPort.) (int port))]) From 54de2cf8bddecd42c605946fa1e321c0e4c4e413 Mon Sep 17 00:00:00 2001 From: Daniel Posada Date: Mon, 11 Oct 2021 10:01:26 -0500 Subject: [PATCH 2/3] Exposes environment variables for all 3 container images --- scheduler/src/cook/kubernetes/api.clj | 59 +++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/scheduler/src/cook/kubernetes/api.clj b/scheduler/src/cook/kubernetes/api.clj index 88a9aea20f..76bb1e4ab5 100644 --- a/scheduler/src/cook/kubernetes/api.clj +++ b/scheduler/src/cook/kubernetes/api.clj @@ -1283,6 +1283,45 @@ (and include-telemetry telemetry-version-var-name) (assoc telemetry-version-var-name (or (:application/version application) "undefined"))) + ; Calculate and add to the env the main container image + main-container-image-var-name "COOK_MAIN_CONTAINER_IMAGE" + main-env (let [{:keys [image]} docker + main-container-image + (if (synthetic-pod? pod-name) + image + (let [checkpoint (calculate-effective-checkpointing-config job task-id) + job-submit-time (tools/job->submit-time job)] + (calculate-effective-image + (config/kubernetes) job-submit-time image checkpoint task-id)))] + (assoc main-env main-container-image-var-name main-container-image)) + ; Calculate and add to the env the init container image, if applicable + init-container-image-var-name "COOK_INIT_CONTAINER_IMAGE" + main-env (if use-cook-init? + (let [{:keys [image image-fallback]} init-container + init-container-image + (or + ; We allow the init container image to + ; be overriden via the same env variable + (get main-env-base init-container-image-var-name) + (resolve-image-from-incremental-config + job passport-event-base passport/init-container-image-selected + image image-fallback))] + (assoc main-env init-container-image-var-name init-container-image)) + main-env) + ; Calculate and add to the env the sidecar container image, if applicable + sidecar-container-image-var-name "COOK_SIDECAR_CONTAINER_IMAGE" + main-env (if use-cook-sidecar? + (let [{:keys [image image-fallback]} sidecar + sidecar-container-image + (or + ; We allow the sidecar container image to + ; be overriden via the same env variable + (get main-env sidecar-container-image-var-name) + (resolve-image-from-incremental-config + job passport-event-base passport/sidecar-image-selected + image image-fallback))] + (assoc main-env sidecar-container-image-var-name sidecar-container-image)) + main-env) main-env-vars (cond->> (-> main-env (merge (get-default-env-for-pool pool-name)) make-filtered-env-vars) @@ -1309,7 +1348,7 @@ (let [{:keys [resources]} task-request ;; NOTE: The scheduler's adjust-job-resources-for-pool-fn may modify :resources, ;; whereas :scalar-requests always contains the unmodified job resource values. - {:keys [image port-mapping]} docker + {:keys [port-mapping]} docker ; gpu count is not stored in scalar-requests because Fenzo does not handle gpus in binpacking gpus (or (:gpus resources) 0) gpu-model-requested (constraints/job->gpu-model-requested gpus job pool-name) @@ -1321,11 +1360,7 @@ disk-limit (when enable-disk-constraint? (-> resources :disk :limit)) ;; if user did not specify disk type, use default on pool disk-type (when enable-disk-constraint? (constraints/job-resources->disk-type resources pool-name)) - checkpoint (calculate-effective-checkpointing-config job task-id) - job-submit-time (tools/job->submit-time job) - image (if (synthetic-pod? pod-name) - image - (calculate-effective-image (config/kubernetes) job-submit-time image checkpoint task-id)) + image (get main-env main-container-image-var-name) container (V1Container.) resources (V1ResourceRequirements.)] @@ -1382,7 +1417,7 @@ ;init container (when use-cook-init? - (let [{:keys [command image image-fallback]} init-container + (let [{:keys [command]} init-container container (V1Container.) get-resource-requirements-fn (fn [fieldname] (if use-cook-sidecar? (get-in sidecar [:resource-requirements fieldname]) @@ -1394,11 +1429,15 @@ resources (V1ResourceRequirements.)] ; container (.setName container cook-init-container-name) +<<<<<<< Updated upstream (.setImage container (or (get main-env "COOK_INIT_CONTAINER_IMAGE") (resolve-image-from-incremental-config job passport-event-base passport/init-container-image-selected image image-fallback))) +======= + (.setImage container (get main-env init-container-image-var-name)) +>>>>>>> Stashed changes (.setCommand container command) (.setWorkingDir container init-container-workdir) (.setEnv container main-env-vars) @@ -1411,17 +1450,21 @@ ; sandbox file server container (when use-cook-sidecar? - (let [{:keys [command health-check-endpoint image image-fallback port resource-requirements]} sidecar + (let [{:keys [command health-check-endpoint port resource-requirements]} sidecar {:keys [cpu-request cpu-limit memory-request memory-limit]} resource-requirements container (V1Container.) resources (V1ResourceRequirements.)] ; container (.setName container cook-container-name-for-file-server) +<<<<<<< Updated upstream (.setImage container (or (get main-env "COOK_SIDECAR_CONTAINER_IMAGE") (resolve-image-from-incremental-config job passport-event-base passport/sidecar-image-selected image image-fallback))) +======= + (.setImage container (get main-env sidecar-container-image-var-name)) +>>>>>>> Stashed changes (.setCommand container (conj command (str port))) (.setWorkingDir container sidecar-workdir) (.setPorts container [(.containerPort (V1ContainerPort.) (int port))]) From bd48fb3484c216ee463e82ed77f159348ff88144 Mon Sep 17 00:00:00 2001 From: Daniel Posada Date: Mon, 11 Oct 2021 10:03:14 -0500 Subject: [PATCH 3/3] Fixes merge conflicts --- scheduler/src/cook/kubernetes/api.clj | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/scheduler/src/cook/kubernetes/api.clj b/scheduler/src/cook/kubernetes/api.clj index 76bb1e4ab5..88998aaee5 100644 --- a/scheduler/src/cook/kubernetes/api.clj +++ b/scheduler/src/cook/kubernetes/api.clj @@ -1429,15 +1429,7 @@ resources (V1ResourceRequirements.)] ; container (.setName container cook-init-container-name) -<<<<<<< Updated upstream - (.setImage container - (or (get main-env "COOK_INIT_CONTAINER_IMAGE") - (resolve-image-from-incremental-config - job passport-event-base passport/init-container-image-selected - image image-fallback))) -======= (.setImage container (get main-env init-container-image-var-name)) ->>>>>>> Stashed changes (.setCommand container command) (.setWorkingDir container init-container-workdir) (.setEnv container main-env-vars) @@ -1456,15 +1448,7 @@ resources (V1ResourceRequirements.)] ; container (.setName container cook-container-name-for-file-server) -<<<<<<< Updated upstream - (.setImage container - (or (get main-env "COOK_SIDECAR_CONTAINER_IMAGE") - (resolve-image-from-incremental-config - job passport-event-base passport/sidecar-image-selected - image image-fallback))) -======= (.setImage container (get main-env sidecar-container-image-var-name)) ->>>>>>> Stashed changes (.setCommand container (conj command (str port))) (.setWorkingDir container sidecar-workdir) (.setPorts container [(.containerPort (V1ContainerPort.) (int port))])