Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kfp-operators upgrades from 2.0.0-alpha.7 -> 2.0.3 for CKF 1.8 release #362

Merged
merged 15 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .github/workflows/integrate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
matrix:
charm:
- kfp-api
- kfp-metadata-writer
- kfp-persistence
- kfp-profile-controller
- kfp-schedwf
Expand All @@ -33,6 +34,7 @@ jobs:
matrix:
charm:
- kfp-api
- kfp-metadata-writer
- kfp-persistence
- kfp-profile-controller
- kfp-schedwf
Expand All @@ -52,6 +54,7 @@ jobs:
matrix:
charm:
- kfp-api
- kfp-metadata-writer
- kfp-persistence
- kfp-profile-controller
- kfp-schedwf
Expand Down Expand Up @@ -106,7 +109,11 @@ jobs:
test-bundle:
name: Test the bundle
runs-on: ubuntu-20.04

strategy:
matrix:
sdk:
- v1
- v2
steps:
# This is a workaround for https://github.com/canonical/kfp-operators/issues/250
# Ideally we'd use self-hosted runners, but this effort is still not stable
Expand Down Expand Up @@ -142,7 +149,7 @@ jobs:
# Run integration tests against the 1.7 generic install bundle definition
# Using destructive mode because of https://github.com/canonical/charmcraft/issues/1132
# and https://github.com/canonical/charmcraft/issues/1138
sg snap_microk8s -c "tox -e bundle-integration -- --model kubeflow --bundle=./tests/integration/bundles/kfp_1.7_stable_install.yaml.j2 --destructive-mode"
sg snap_microk8s -c "tox -e bundle-integration-${{ matrix.sdk }} -- --model kubeflow --bundle=./tests/integration/bundles/kfp_latest_edge.yaml.j2 --destructive-mode"

- name: Get all
run: kubectl get all -A
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__/
.coverage
.idea
.vscode
venv
2 changes: 1 addition & 1 deletion charms/kfp-api/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ options:
Used if service account is left unspecified when creating a run
init-connection-timeout:
type: string
default: '5s'
default: '6m'
description: |
Connection timeout used when initializing clients for related services.
The format used can be anything accepted by `time.ParseDuration`.
Expand Down
4 changes: 2 additions & 2 deletions charms/kfp-api/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ website: https://charmhub.io/kfp-api
source: https://github.com/canonical/kfp-operators
issues: https://github.com/canonical/kfp-operators/issues
containers:
ml-pipeline-api-server:
apiserver:
resource: oci-image
resources:
oci-image:
type: oci-image
description: Backing OCI image
upstream-source: charmedkubeflow/api-server:2.0.0-alpha.7_20.04_1
upstream-source: gcr.io/ml-pipeline/api-server:2.0.3
requires:
mysql:
interface: mysql
Expand Down
144 changes: 52 additions & 92 deletions charms/kfp-api/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
https://github.com/canonical/kfp-operators/
"""

import json
import logging
from pathlib import Path

Expand All @@ -24,14 +23,7 @@
from lightkube.models.core_v1 import ServicePort
from ops.charm import CharmBase
from ops.main import main
from ops.model import (
ActiveStatus,
BlockedStatus,
Container,
MaintenanceStatus,
ModelError,
WaitingStatus,
)
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, ModelError, WaitingStatus
from ops.pebble import CheckStatus, Layer
from serialized_data_interface import (
NoCompatibleVersions,
Expand Down Expand Up @@ -77,7 +69,7 @@ def __init__(self, *args):
f"--sampleconfig={SAMPLE_CONFIG} "
"-logtostderr=true "
)
self._container_name = "ml-pipeline-api-server"
self._container_name = "apiserver"
self._database_name = "mlpipeline"
self._container = self.unit.get_container(self._container_name)

Expand All @@ -101,7 +93,7 @@ def __init__(self, *args):
# setup events to be handled by main event handler
self.framework.observe(self.on.leader_elected, self._on_event)
self.framework.observe(self.on.config_changed, self._on_event)
self.framework.observe(self.on.ml_pipeline_api_server_pebble_ready, self._on_event)
self.framework.observe(self.on.apiserver_pebble_ready, self._on_event)
change_events = [
self.on["object-storage"].relation_changed,
self.on["kfp-viz"].relation_changed,
Expand Down Expand Up @@ -175,9 +167,7 @@ def k8s_resource_handler(self, handler: KubernetesResourceHandler):
@property
def service_environment(self):
"""Return environment variables based on model configuration."""
ret_env_vars = {"POD_NAMESPACE": self.model.name}

return ret_env_vars
return self._generate_environment()

@property
def _kfp_api_layer(self) -> Layer:
Expand Down Expand Up @@ -210,76 +200,71 @@ def _kfp_api_layer(self) -> Layer:

return Layer(layer_config)

def _generate_config(self, interfaces):
"""Generate configuration based on supplied data.
def _generate_environment(self) -> dict:
"""Generate environment based on supplied data.

Configuration is generated based on:
- Supplied interfaces.
- Database data: from MySQL relation data or from data platform library.
- Model configuration.

Return:
env_vars(dict): a dictionary of environment variables for the api server.
"""

config = self.model.config
try:
interfaces = self._get_interfaces()
db_data = self._get_db_data()
os = self._get_object_storage(interfaces)
viz = self._get_viz(interfaces)
object_storage = self._get_object_storage(interfaces)
viz_data = self._get_viz(interfaces)
except ErrorWithStatus as error:
self.logger.error("Failed to generate container configuration.")
raise error

# at this point all data is correctly populated and proper config can be generated
config_json = {
"DBConfig": {
"ConMaxLifeTime": "120s",
"DBName": db_data["db_name"],
"DriverName": "mysql",
"GroupConcatMaxLen": "4194304",
"Host": db_data["db_host"],
"Password": db_data["db_password"],
"Port": db_data["db_port"],
"User": db_data["db_username"],
},
"ObjectStoreConfig": {
"AccessKey": os["access-key"],
"BucketName": config["object-store-bucket-name"],
"Host": f"{os['service']}.{os['namespace']}",
"Multipart": {"Disable": "true"},
"PipelinePath": "pipelines",
"Port": str(os["port"]),
"Region": "",
"SecretAccessKey": os["secret-key"],
"Secure": str(os["secure"]).lower(),
},
"ARCHIVE_CONFIG_LOG_FILE_NAME": config["log-archive-filename"],
"ARCHIVE_CONFIG_LOG_PATH_PREFIX": config["log-archive-prefix"],
"AUTO_UPDATE_PIPELINE_DEFAULT_VERSION": str(
config["auto-update-default-version"]
).lower(),
"CACHE_IMAGE": config["cache-image"],
"CACHE_NODE_RESTRICTIONS": "false",
"CacheEnabled": str(config["cache-enabled"]).lower(),
"DefaultPipelineRunnerServiceAccount": config["runner-sa"],
"InitConnectionTimeout": config["init-connection-timeout"],
env_vars = {
# Configurations that are also defined in the upstream manifests
"AUTO_UPDATE_PIPELINE_DEFAULT_VERSION": self.model.config[
"auto-update-default-version"
],
"KFP_API_SERVICE_NAME": KFP_API_SERVICE_NAME,
"KUBEFLOW_USERID_HEADER": "kubeflow-userid",
"KUBEFLOW_USERID_PREFIX": "",
"POD_NAMESPACE": self.model.name,
"OBJECTSTORECONFIG_SECURE": "false",
"OBJECTSTORECONFIG_BUCKETNAME": self.model.config["object-store-bucket-name"],
"DBCONFIG_CONMAXLIFETIME": "120s",
"DB_DRIVER_NAME": "mysql",
"DBCONFIG_MYSQLCONFIG_USER": db_data["db_username"],
"DBCONFIG_MYSQLCONFIG_PASSWORD": db_data["db_password"],
"DBCONFIG_MYSQLCONFIG_DBNAME": db_data["db_name"],
"DBCONFIG_MYSQLCONFIG_HOST": db_data["db_host"],
"DBCONFIG_MYSQLCONFIG_PORT": db_data["db_port"],
"OBJECTSTORECONFIG_ACCESSKEY": object_storage["access-key"],
"OBJECTSTORECONFIG_SECRETACCESSKEY": object_storage["secret-key"],
"DEFAULTPIPELINERUNNERSERVICEACCOUNT": "default-editor",
"MULTIUSER": "true",
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST": viz["service-name"],
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT": viz["service-port"],
"VISUALIZATIONSERVICE_NAME": viz_data["service-name"],
"VISUALIZATIONSERVICE_PORT": viz_data["service-port"],
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST": viz_data["service-name"],
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT": viz_data["service-port"],
"CACHE_IMAGE": self.model.config["cache-image"],
# Configurations charmed-kubeflow adds to those of upstream
"ARCHIVE_CONFIG_LOG_FILE_NAME": self.model.config["log-archive-filename"],
"ARCHIVE_CONFIG_LOG_PATH_PREFIX": self.model.config["log-archive-prefix"],
# OBJECTSTORECONFIG_HOST and _PORT set the object storage configurations,
# taking precedence over configuration in the config.json or
# MINIO_SERVICE_SERVICE_* environment variables.
# NOTE: While OBJECTSTORECONFIG_HOST and _PORT control the object store
# that the apiserver connects to, other parts of kfp currently cannot use
# object stores with arbitrary names. See
# https://github.com/kubeflow/pipelines/issues/9689 and
# https://github.com/canonical/minio-operator/pull/151 for more details.
"OBJECTSTORECONFIG_HOST": f"{object_storage['service']}.{object_storage['namespace']}",
"OBJECTSTORECONFIG_PORT": str(object_storage["port"]),
"OBJECTSTORECONFIG_REGION": "",
}
return config_json

def _check_container_connection(self, container: Container) -> None:
"""Check if connection can be made with container.

Args:
container: the named container in a unit to check.

Raises:
ErrorWithStatus if the connection cannot be made.
"""
if not container.can_connect():
raise ErrorWithStatus("Pod startup is not complete", MaintenanceStatus)
return env_vars

def _check_status(self):
"""Check status of workload and set status accordingly."""
Expand All @@ -301,28 +286,6 @@ def _check_status(self):
raise ErrorWithStatus("Workload failed health check", MaintenanceStatus)
self.model.unit.status = ActiveStatus()

def _upload_files_to_container(self, config_json):
"""Upload required files to container."""
try:
self._check_container_connection(self.container)
except ErrorWithStatus as error:
self.model.unit.status = error.status
raise error
try:
with open("src/sample_config.json", "r") as sample_config:
file_content = sample_config.read()
self.container.push(SAMPLE_CONFIG, file_content, make_dirs=True)
except ErrorWithStatus as error:
self.logger.error("Failed to upload sample config to container.")
raise error
try:
file_content = json.dumps(config_json)
config = CONFIG_DIR / "config.json"
self.container.push(config, file_content, make_dirs=True)
except ErrorWithStatus as error:
self.logger.error("Failed to upload config to container.")
raise error

def _send_info(self, interfaces):
if interfaces["kfp-api"]:
interfaces["kfp-api"].send_data(
Expand Down Expand Up @@ -680,12 +643,9 @@ def _on_event(self, event, force_conflicts: bool = False) -> None:
# Set up all relations/fetch required data
try:
self._check_leader()
interfaces = self._get_interfaces()
config_json = self._generate_config(interfaces)
self._upload_files_to_container(config_json)
self._apply_k8s_resources(force_conflicts=force_conflicts)
update_layer(self._container_name, self._container, self._kfp_api_layer, self.logger)
self._send_info(interfaces)
self._send_info(self._get_interfaces())
except ErrorWithStatus as err:
self.model.unit.status = err.status
self.logger.error(f"Failed to handle {event} with error: {err}")
Expand Down
12 changes: 0 additions & 12 deletions charms/kfp-api/src/sample_config.json

This file was deleted.

28 changes: 24 additions & 4 deletions charms/kfp-api/src/templates/auth_manifests.yaml.j2
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# Source manifests/apps/pipeline/upstream/base/installs/multi-user/api-service/cluster-role**.yaml
# These manifest files have been modified to suit the needs of the charm; the app label, metadata name,
# and namespace fields will be rendered with information from the application and the model.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ app_name }}
labels:
app: {{ app_name }}
name: {{ app_name }}-role
rules:
- apiGroups:
- ''
Expand Down Expand Up @@ -51,21 +56,32 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ app_name }}
labels:
app: {{ app_name }}
name: {{ app_name }}-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ app_name }}
name: {{ app_name }}-role
subjects:
- kind: ServiceAccount
name: {{ app_name }}
name: {{ app_name }}-sa
namespace: {{ namespace }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: {{ app_name }}
name: {{ app_name }}-sa
namespace: {{ namespace }}
---
# manifests/apps/pipeline/upstream/base/installs/multi-user/view-edit-cluster-roles.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
name: kubeflow-pipelines-edit
aggregationRule:
Expand All @@ -81,6 +97,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pipelines-edit: "true"
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
name: kubeflow-pipelines-view
Expand All @@ -97,6 +114,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
app.kubernetes.io/component: ml-pipeline
app.kubernetes.io/name: kubeflow-pipelines
application-crd-id: kubeflow-pipelines
Expand Down Expand Up @@ -165,6 +183,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
app.kubernetes.io/component: ml-pipeline
app.kubernetes.io/name: kubeflow-pipelines
application-crd-id: kubeflow-pipelines
Expand Down Expand Up @@ -208,6 +227,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
application-crd-id: kubeflow-pipelines
rbac.authorization.k8s.io/aggregate-to-admin: "true"
name: argo-aggregate-to-admin
Expand Down
5 changes: 4 additions & 1 deletion charms/kfp-api/src/templates/ml-pipeline-service.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@ spec:
protocol: TCP
targetPort: 8888
selector:
app.kubernetes.io/name: {{ service }}
# This selector ensures this Service identifies
# the kfp-api Pod correctly as ti will have
# the same tag
app.kubernetes.io/name: {{ app_name }}
Loading
Loading