Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes required for Jupyter-Scheduler integration #1832

Merged
merged 17 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/_nebari/stages/input_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,12 @@ def stage_07_kubernetes_services(stage_outputs, config):
"*/*": ["viewer"],
},
},
"argo-workflows-jupyter-scheduler": {
Adam-D-Lewis marked this conversation as resolved.
Show resolved Hide resolved
"primary_namespace": "",
"role_bindings": {
"*/*": ["viewer"],
},
},
},
"conda-store-default-namespace": config.get("conda_store", {}).get(
"default_namespace", "nebari-git"
Expand Down
13 changes: 7 additions & 6 deletions src/_nebari/template/stages/07-kubernetes-services/jupyterhub.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ module "jupyterhub" {

shared-pvc = module.jupyterhub-nfs-mount.persistent_volume_claim.name

conda-store-pvc = module.conda-store-nfs-mount.persistent_volume_claim.name
conda-store-mount = "/home/conda"
conda-store-environments = var.conda-store-environments
default-conda-store-namespace = var.conda-store-default-namespace
conda-store-cdsdashboard-token = module.kubernetes-conda-store-server.service-tokens.cdsdashboards
conda-store-service-name = module.kubernetes-conda-store-server.service_name
conda-store-pvc = module.conda-store-nfs-mount.persistent_volume_claim.name
conda-store-mount = "/home/conda"
conda-store-environments = var.conda-store-environments
default-conda-store-namespace = var.conda-store-default-namespace
conda-store-cdsdashboard-token = module.kubernetes-conda-store-server.service-tokens.cdsdashboards
conda-store-argo-workflows-jupyter-scheduler-token = module.kubernetes-conda-store-server.service-tokens.argo-workflows-jupyter-scheduler
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

passing token so it can be set on jupyter user pods?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is jupyter scheduler a pod that runs?

Copy link
Member Author

@iameskild iameskild Jul 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Jupyter-Scheduler is a lab extension that runs while the user pod is running. Getting access to view the conda-store makes it so we can more reliably set the path for the conda-store environments (mostly for this function).

The conda-store feature is also a traitlet, albeit just a basic toggle.

conda-store-service-name = module.kubernetes-conda-store-server.service_name

extra-mounts = {
"/etc/dask" = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
locals {
name = "argo-workflows"
argo-workflows-prefix = "argo"
# roles
admin = "argo-admin"
developer = "argo-developer"
viewer = "argo-viewer"
}

resource "helm_release" "argo-workflows" {
Expand Down Expand Up @@ -30,7 +34,7 @@ resource "helm_release" "argo-workflows" {

server = {
# `sso` for OIDC/OAuth
extraArgs = ["--auth-mode=sso", "--insecure-skip-verify"]
extraArgs = ["--auth-mode=sso", "--auth-mode=client", "--insecure-skip-verify"]
# to enable TLS, `secure = true`
secure = false
baseHref = "/${local.argo-workflows-prefix}/"
Expand Down Expand Up @@ -83,9 +87,9 @@ module "argo-workflow-openid-client" {
client_id = "argo-server-sso"
external-url = var.external-url
role_mapping = {
"admin" = ["argo_admin"]
"developer" = ["argo_developer"]
"analyst" = ["argo_viewer"]
"admin" = ["${local.admin}"]
"developer" = ["${local.developer}"]
"analyst" = ["${local.viewer}"]
}

callback-url-paths = [
Expand Down Expand Up @@ -183,18 +187,18 @@ resource "kubernetes_manifest" "argo-workflows-ingress-route" {

resource "kubernetes_service_account_v1" "argo-admin-sa" {
metadata {
name = "argo-admin"
name = local.admin
namespace = var.namespace
annotations = {
"workflows.argoproj.io/rbac-rule" : "'argo_admin' in groups"
"workflows.argoproj.io/rbac-rule" : "'${local.admin}' in groups"
"workflows.argoproj.io/rbac-rule-precedence" : "11"
}
}
}

resource "kubernetes_secret_v1" "argo_admin_sa_token" {
resource "kubernetes_secret_v1" "argo-admin-sa-token" {
metadata {
name = "argo-admin.service-account-token"
name = "${local.admin}.service-account-token"
namespace = var.namespace
annotations = {
"kubernetes.io/service-account.name" = kubernetes_service_account_v1.argo-admin-sa.metadata[0].name
Expand All @@ -206,7 +210,7 @@ resource "kubernetes_secret_v1" "argo_admin_sa_token" {

resource "kubernetes_cluster_role_binding" "argo-admin-rb" {
metadata {
name = "argo-admin"
name = local.admin
}

role_ref {
Expand All @@ -221,31 +225,31 @@ resource "kubernetes_cluster_role_binding" "argo-admin-rb" {
}
}

resource "kubernetes_service_account_v1" "argo-dev-sa" {
resource "kubernetes_service_account_v1" "argo-developer-sa" {
metadata {
name = "argo-dev"
name = local.developer
namespace = var.namespace
annotations = {
"workflows.argoproj.io/rbac-rule" : "'argo_developer' in groups"
"workflows.argoproj.io/rbac-rule" : "'${local.developer}' in groups"
"workflows.argoproj.io/rbac-rule-precedence" : "10"
}
}
}

resource "kubernetes_secret_v1" "argo_dev_sa_token" {
metadata {
name = "argo-dev.service-account-token"
name = "${local.developer}.service-account-token"
namespace = var.namespace
annotations = {
"kubernetes.io/service-account.name" = kubernetes_service_account_v1.argo-dev-sa.metadata[0].name
"kubernetes.io/service-account.name" = kubernetes_service_account_v1.argo-developer-sa.metadata[0].name
}
}
type = "kubernetes.io/service-account-token"
}

resource "kubernetes_cluster_role_binding" "argo-dev-rb" {
resource "kubernetes_cluster_role_binding" "argo-developer-rb" {
metadata {
name = "argo-dev"
name = local.developer
}

role_ref {
Expand All @@ -255,7 +259,7 @@ resource "kubernetes_cluster_role_binding" "argo-dev-rb" {
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account_v1.argo-dev-sa.metadata.0.name
name = kubernetes_service_account_v1.argo-developer-sa.metadata.0.name
namespace = var.namespace
}
}
Expand All @@ -266,13 +270,13 @@ resource "kubernetes_service_account_v1" "argo-view-sa" {
name = "argo-viewer"
namespace = var.namespace
annotations = {
"workflows.argoproj.io/rbac-rule" : "'argo_viewer' in groups"
"workflows.argoproj.io/rbac-rule" : "'${local.viewer}' in groups"
"workflows.argoproj.io/rbac-rule-precedence" : "9"
}
}
}

resource "kubernetes_secret_v1" "argo_viewer_sa_token" {
resource "kubernetes_secret_v1" "argo-viewer-sa-token" {
metadata {
name = "argo-viewer.service-account-token"
namespace = var.namespace
Expand Down Expand Up @@ -534,10 +538,25 @@ resource "kubernetes_manifest" "deployment_admission_controller" {
"value" = var.namespace
},
]
"volumeMounts" = [
{
"mountPath" = "/etc/config"
iameskild marked this conversation as resolved.
Show resolved Hide resolved
"name" = "valid-argo-roles"
"readOnly" = true
},
]
"image" = "quay.io/nebari/nebari-workflow-controller:${var.workflow-controller-image-tag}"
"name" = "admission-controller"
},
]
"volumes" = [
{
"name" = "valid-argo-roles"
"configMap" = {
"name" = "valid-argo-roles"
}
},
]
}
}
}
Expand Down Expand Up @@ -566,3 +585,14 @@ resource "kubernetes_manifest" "service_admission_controller" {
}
}
}

resource "kubernetes_config_map" "valid-argo-roles" {
metadata {
name = "valid-argo-roles"
namespace = var.namespace
}

data = {
"valid-argo-roles" = jsonencode([local.admin, local.developer])
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
locals {
jupyter-notebook-config-py-template = templatefile("${path.module}/files/jupyter/jupyter_notebook_config.py.tpl", {
jupyter-notebook-config-py-template = templatefile("${path.module}/files/jupyter/jupyter_server_config.py.tpl", {
Adam-D-Lewis marked this conversation as resolved.
Show resolved Hide resolved
terminal_cull_inactive_timeout = var.idle-culler-settings.terminal_cull_inactive_timeout
terminal_cull_interval = var.idle-culler-settings.terminal_cull_interval
kernel_cull_idle_timeout = var.idle-culler-settings.kernel_cull_idle_timeout
Expand All @@ -12,9 +12,9 @@ locals {
}


resource "local_file" "jupyter_notebook_config_py" {
resource "local_file" "jupyter_server_config_py" {
content = local.jupyter-notebook-config-py-template
filename = "${path.module}/files/jupyter/jupyter_notebook_config.py"
filename = "${path.module}/files/jupyter/jupyter_server_config.py"
}


Expand All @@ -33,7 +33,7 @@ resource "kubernetes_config_map" "etc-ipython" {

resource "kubernetes_config_map" "etc-jupyter" {
depends_on = [
local_file.jupyter_notebook_config_py
local_file.jupyter_server_config_py
]

metadata {
Expand All @@ -42,7 +42,7 @@ resource "kubernetes_config_map" "etc-jupyter" {
}

data = {
"jupyter_notebook_config.py" : local_file.jupyter_notebook_config_py.content
"jupyter_server_config.py" : local_file.jupyter_server_config_py.content
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,10 @@ c.NotebookApp.shutdown_no_activity_timeout = ${server_shutdown_no_activity_timeo
###############################################################################
# JupyterHub idle culler total timeout corresponds (approximately) to:
# max(cull_idle_timeout, cull_inactive_timeout) + shutdown_no_activity_timeout

from argo_workflows_executor.executor import ArgoExecutor
from argo_workflows_executor.scheduler import ArgoScheduler

c.Scheduler.execution_manager_class=ArgoExecutor
c.SchedulerApp.scheduler_class=ArgoScheduler
c.SchedulerApp.scheduler_class.use_conda_store_env=True
iameskild marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,62 @@ def configure_user(username, groups, uid=1000, gid=100):
}


def profile_argo_token(groups):
# TODO: create a more robust check user's Argo-Workflow role

domain = z2jh.get_config("custom.external-url")

ADMIN = "admin"
DEVELOPER = "developer"
ANALYST = "analyst"

base = "argo-"
argo_sa = None

if ANALYST in groups:
argo_sa = base + "view"
if DEVELOPER in groups:
argo_sa = base + "developer"
if ADMIN in groups:
argo_sa = base + "admin"
if not argo_sa:
return {}

return {
"ARGO_BASE_HREF": "/argo",
"ARGO_SERVER": f"{domain}:443",
"ARGO_TOKEN": {
"valueFrom": {
"secretKeyRef": {
"name": f"{argo_sa}.service-account-token",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did the modifications you made create this token or did it already exist?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This token already exists, it's the default argo-admin, argo-dev and argo-view secrets. I'm not entirely sure if they were being used prior to this... The ARGO_TOKEN used when someone creates a workflow from the UI (and the one that can be copied from there) are not the same as any of those above.

I have also included this the enhancements issue, namely, the ability to create short-lived tokens on a per user basis.

"key": "token",
}
}
},
}


def profile_conda_store_viewer_token():
return {
"CONDA_STORE_TOKEN": {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any way this token could be misused by the user? It's used by ARGO_WORKFLOWS_EXECUTOR, I see - https://github.com/search?q=repo%3Anebari-dev%2Fargo-workflows-executor+CONDA_STORE_TOKEN&type=code

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is Argo Workflows Executor a package that's installable or a pod that's running or its included on the jupyterhub pod?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This token is view-only in scope so if the user was motivated enough, they could view all of the namespaces/environments that exist.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later down the road we should scope this token to just what the user can see. But for now this works.

"valueFrom": {
"secretKeyRef": {
"name": "argo-workflows-conda-store-token",
"key": "conda-store-api-token",
}
}
},
"CONDA_STORE_SERVICE": {
"valueFrom": {
"secretKeyRef": {
"name": "argo-workflows-conda-store-token",
"key": "conda-store-service-name",
}
}
},
}


def render_profile(profile, username, groups, keycloak_profilenames):
"""Render each profile for user.

Expand Down Expand Up @@ -366,7 +422,12 @@ def render_profile(profile, username, groups, keycloak_profilenames):
def preserve_envvars(spawner):
# This adds in JUPYTERHUB_ANYONE/GROUP rather than overwrite all env vars,
# if set in the spawner for a dashboard to control access.
return {**envvars_fixed, **spawner.environment}
return {
**envvars_fixed,
**spawner.environment,
**profile_argo_token(groups),
**profile_conda_store_viewer_token(),
}

profile["kubespawner_override"]["environment"] = preserve_envvars

Expand Down Expand Up @@ -404,6 +465,10 @@ def render_profiles(spawner):
)


c.KubeSpawner.args = ["--debug"]
c.KubeSpawner.environment = {
"JUPYTERHUB_SINGLEUSER_APP": "jupyter_server.serverapp.ServerApp",
}
c.KubeSpawner.profile_list = render_profiles


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ resource "helm_release" "jupyterhub" {
jsonencode({
# custom values can be accessed via z2jh.get_config('custom.<path>')
custom = {
external-url = var.external-url
theme = var.theme
profiles = var.profiles
cdsdashboards = var.cdsdashboards
Expand All @@ -34,6 +35,7 @@ resource "helm_release" "jupyterhub" {
default-conda-store-namespace = var.default-conda-store-namespace
conda-store-service-name = var.conda-store-service-name
conda-store-cdsdashboards = var.conda-store-cdsdashboard-token
conda-store-jupyter-scheduler = var.conda-store-argo-workflows-jupyter-scheduler-token
skel-mount = {
name = kubernetes_config_map.etc-skel.metadata.0.name
namespace = kubernetes_config_map.etc-skel.metadata.0.namespace
Expand Down Expand Up @@ -209,3 +211,18 @@ module "jupyterhub-openid-client" {
]
jupyterlab_profiles_mapper = true
}


resource "kubernetes_secret" "argo-workflows-conda-store-token" {
metadata {
name = "argo-workflows-conda-store-token"
namespace = var.namespace
}

data = {
"conda-store-api-token" = var.conda-store-argo-workflows-jupyter-scheduler-token
"conda-store-service-name" = var.conda-store-service-name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a url I guess. I want to double check this later and make sure there's not a better way.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this is an internal service url. conda-store-service-name, as a variable name, is used elsewhere in the other services so I just kept the same name.

}

type = "Opaque"
}
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ variable "conda-store-cdsdashboard-token" {
default = ""
}

variable "conda-store-argo-workflows-jupyter-scheduler-token" {
description = "Token for argo-workflows-jupyter-schedule to use conda-store"
type = string
}

variable "jupyterhub-logout-redirect-url" {
description = "Next redirect destination following a Keycloak logout"
type = string
Expand Down