diff --git a/examples/vertex_mlops_enterprise/.github/workflows/containers.yml.TEMPLATE b/examples/vertex_mlops_enterprise/.github/workflows/containers.yml.TEMPLATE index 4337b493e9..9def117010 100644 --- a/examples/vertex_mlops_enterprise/.github/workflows/containers.yml.TEMPLATE +++ b/examples/vertex_mlops_enterprise/.github/workflows/containers.yml.TEMPLATE @@ -35,6 +35,27 @@ env: WORKLOAD_ID_PROVIDER: ${wip} CLOUDBUILD_LOGS: gs://${project_id}_cloudbuild/logs jobs: + build-container-bqml: + name: 'Build container CI/CD BigQuery ML' + runs-on: 'ubuntu-latest' + steps: + - uses: 'actions/checkout@v3' + with: + token: $${{ github.token }} + + - id: 'auth' + name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + create_credentials_file: 'true' + workload_identity_provider: $${{ env.WORKLOAD_ID_PROVIDER }} + service_account: $${{ env.SERVICE_ACCOUNT }} + access_token_lifetime: 3600s + + - name: 'Build container' + run: | + gcloud builds submit --gcs-log-dir=$${{ env.CLOUDBUILD_LOGS }} --project $${{ env.PROJECT_ID }} --region $${{ env.REGION }} --tag $${{ env.DOCKER_REPO }}/cicd-bqml:latest src/bqml_pipeline/. --timeout=15m --machine-type=e2-highcpu-8 --suppress-logs + build-container-cicd-tfx: name: 'Build container CI/CD TFX' runs-on: 'ubuntu-latest' diff --git a/examples/vertex_mlops_enterprise/.github/workflows/deploy.yml.TEMPLATE b/examples/vertex_mlops_enterprise/.github/workflows/deploy.yml.TEMPLATE index 7be2585cae..74e6c969ce 100644 --- a/examples/vertex_mlops_enterprise/.github/workflows/deploy.yml.TEMPLATE +++ b/examples/vertex_mlops_enterprise/.github/workflows/deploy.yml.TEMPLATE @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Deploy tfx model +name: Deploy Vertex AI tfx model on: workflow_dispatch: @@ -31,7 +31,7 @@ env: WORKLOAD_ID_PROVIDER: ${wip} jobs: deploy-model: - name: 'Deploy model to endpoint' + name: 'Deploy TFX model to endpoint' runs-on: 'ubuntu-latest' steps: - uses: 'actions/checkout@v3' @@ -47,6 +47,6 @@ jobs: service_account: $${{ env.SERVICE_ACCOUNT }} access_token_lifetime: 3600s - - name: 'Deploy model' - run: gcloud builds submit --no-source --config build/$${{ env.ENVIRONMENT }}/model-deployment.yaml --project $${{ env.PROJECT_ID }} --region $${{ env.REGION }} --machine-type=e2-highcpu-8 --suppress-logs + - name: 'Deploy TFX model' + run: gcloud builds submit --no-source --config build/$${{ env.ENVIRONMENT }}/model-deployment-tfx.yaml --project $${{ env.PROJECT_ID }} --region $${{ env.REGION }} --machine-type=e2-highcpu-8 --suppress-logs \ No newline at end of file diff --git a/examples/vertex_mlops_enterprise/.gitignore b/examples/vertex_mlops_enterprise/.gitignore index 884b95def8..a8e95965f0 100644 --- a/examples/vertex_mlops_enterprise/.gitignore +++ b/examples/vertex_mlops_enterprise/.gitignore @@ -4,3 +4,4 @@ terraform.tfstate* .DS_Store **/__pycache__/** venv +**/.ipynb_checkpoints/** diff --git a/examples/vertex_mlops_enterprise/README.md b/examples/vertex_mlops_enterprise/README.md index f9e31ad647..18d29e7dd9 100644 --- a/examples/vertex_mlops_enterprise/README.md +++ b/examples/vertex_mlops_enterprise/README.md @@ -7,19 +7,20 @@ allow larger organizations achieve scale in terms of number of models. ## Contents of this example -We provide three notebooks to cover the three processes that we typically observe: +We provide three Vertex AI pipelines examples based on different technologies: -1. [01-experimentation.ipynb](01-experimentation.ipynb) covers the development process, where the features, the model and the training process are defined. -1. [02-cicd.ipynb](02-cicd.ipynb) covers the the CI/CD process that tests the code produced in the experimentation phase, and trains a production-ready model. -1. [03-prediction.ipynb](03-prediction.ipynb) cover the deployment process to make the model available, for example on a Vertex AI Endpoint or through Vertex AI Batch Prediction. +- [KFP pipeline](src/kfp_pipelines/README.md) using Vertex AI custom training +- [KFP pipeline using BigQuery ML](src/bqml_pipeline/README.md) +- [TFX pipeline](src/tfx_pipelines/) using Vertex AI custom training. In this case, a set of notebooks is also provided for the experimentation phase: + 1. [experimentation.ipynb](01-experimentation.ipynb) covers the development process, where the features, the model and the training process are defined. + 2. [cicd.ipynb](02-cicd.ipynb) covers the the CI/CD process that tests the code produced in the experimentation phase, and trains a production-ready model. + 3. [prediction.ipynb](03-prediction.ipynb) cover the deployment process to make the model available, for example on a Vertex AI Endpoint or through Vertex AI Batch Prediction. -Each of the notebooks provides detailed instructions on prerequisites for their execution and they should be self-explanatory. - -Once you have reviewed the notebooks, you can go on with these advanced steps to set up the automated environments and the CI/CD process using Github. +Once you have reviewed the pipelines, you can go on with these advanced steps to set up the automated environments and the CI/CD process using Github. 1. [Environments](doc/01-ENVIRONMENTS.md) covers how to automate the environments deployments using Terraform. 1. [GIT Setup](doc/02-GIT_SETUP.md) covers how to configure a Github repo to be used for the CI/CD process. -1. [03-prediction.ipynb](doc/03-MLOPS.md) cover test the automated MLOps end2end process. +1. [MLOps end2end process](doc/03-MLOPS.md) cover test the automated MLOps end2end process. ## Contributing diff --git a/examples/vertex_mlops_enterprise/build/model-deployment.yaml.TEMPLATE b/examples/vertex_mlops_enterprise/build/model-deployment-tfx.yaml.TEMPLATE similarity index 100% rename from examples/vertex_mlops_enterprise/build/model-deployment.yaml.TEMPLATE rename to examples/vertex_mlops_enterprise/build/model-deployment-tfx.yaml.TEMPLATE diff --git a/examples/vertex_mlops_enterprise/build/pipeline-deployment.yaml.TEMPLATE b/examples/vertex_mlops_enterprise/build/pipeline-deployment-bqml.yaml.TEMPLATE similarity index 53% rename from examples/vertex_mlops_enterprise/build/pipeline-deployment.yaml.TEMPLATE rename to examples/vertex_mlops_enterprise/build/pipeline-deployment-bqml.yaml.TEMPLATE index 7b8f02a064..a68637fe09 100644 --- a/examples/vertex_mlops_enterprise/build/pipeline-deployment.yaml.TEMPLATE +++ b/examples/vertex_mlops_enterprise/build/pipeline-deployment-bqml.yaml.TEMPLATE @@ -47,109 +47,61 @@ steps: # Run datasource_utils unit tests. - name: '$_CICD_IMAGE_URI' - entrypoint: 'pytest' - args: ['src/tests/datasource_utils_tests.py', '-s'] - dir: '$_WORKDIR' - env: - - 'PROJECT=$_PROJECT' - - 'BQ_LOCATION=$_BQ_LOCATION' - - 'BQ_DATASET_NAME=$_BQ_DATASET_NAME' - - 'ML_TABLE=$_ML_TABLE' - id: 'Unit Test Datasource Utils' - waitFor: ['Clone Repository'] - - -# Run model unit tests. -- name: '$_CICD_IMAGE_URI' - entrypoint: 'pytest' - args: ['src/tests/model_tests.py', '-s'] - dir: '$_WORKDIR' - id: 'Unit Test Model' + entrypoint: 'echo' + args: ['Running unit tests - dummy build'] + id: 'Unit Tests' waitFor: ['Clone Repository'] - timeout: 1800s -# Test e2e pipeline using local runner. -- name: '$_CICD_IMAGE_URI' - entrypoint: 'pytest' - args: ['src/tests/pipeline_deployment_tests.py::test_e2e_pipeline', '-s'] - dir: '$_WORKDIR' - env: - - 'PROJECT=$_PROJECT' - - 'REGION=$_REGION' - - 'MODEL_DISPLAY_NAME=$_MODEL_DISPLAY_NAME' - - 'VERTEX_DATASET_NAME=$_VERTEX_DATASET_NAME' - - 'GCS_LOCATION=$_TEST_GCS_LOCATION' - - 'TRAIN_LIMIT=$_CI_TRAIN_LIMIT' - - 'TEST_LIMIT=$_CI_TEST_LIMIT' - - 'UPLOAD_MODEL=$_CI_UPLOAD_MODEL' - - 'ACCURACY_THRESHOLD=$_CI_ACCURACY_THRESHOLD' - id: 'Local Test E2E Pipeline' - waitFor: ['Clone Repository'] - timeout: 1800s - # Compile the pipeline. - name: '$_CICD_IMAGE_URI' entrypoint: 'python' - args: ['build/utils.py', - '--mode', 'compile-pipeline', - '--pipeline-name', '$_PIPELINE_NAME' - ] - dir: '$_WORKDIR' + args: ['pipeline.py', '--compile-only'] + dir: '$_WORKDIR/src/bqml_pipeline/src/' env: - - 'PROJECT=$_PROJECT' + - 'PROJECT_ID=$_PROJECT' - 'REGION=$_REGION' - 'MODEL_DISPLAY_NAME=$_MODEL_DISPLAY_NAME' - - 'VERTEX_DATASET_NAME=$_VERTEX_DATASET_NAME' - - 'GCS_LOCATION=$_GCS_LOCATION' - - 'DATAFLOW_IMAGE_URI=$_DATAFLOW_IMAGE_URI' - - 'TFX_IMAGE_URI=$_TFX_IMAGE_URI' - - 'BEAM_RUNNER=$_BEAM_RUNNER' - - 'TRAINING_RUNNER=$_TRAINING_RUNNER' - 'SERVICE_ACCOUNT=$_SERVICE_ACCOUNT' - - 'SUBNETWORK=$_SUBNETWORK' - - 'ACCURACY_THRESHOLD=$_CI_ACCURACY_THRESHOLD' - + - 'NETWORK=$_NETWORK' + - 'BQ_DATASET_NAME=$_BQ_DATASET_NAME' + - 'ML_TABLE=$_ML_TABLE' + - 'MODEL_DISPLAY_NAME=$_MODEL_DISPLAY_NAME' + - 'PIPELINE_NAME=$_PIPELINE_NAME' + - 'PIPELINES_STORE=$_PIPELINES_STORE' + - 'CICD_IMAGE_URI=$_CICD_IMAGE_URI' + - 'CICD_IMAGE_MODEL_CARD=$_CICD_IMAGE_MODEL_CARD' + - 'DATAFLOW_SA=$_SERVICE_ACCOUNT' + - 'DATAFLOW_NETWORK=$_DATAFLOW_NETWORK' id: 'Compile Pipeline' - waitFor: ['Local Test E2E Pipeline', 'Unit Test Datasource Utils', 'Unit Test Model'] - + waitFor: ['Unit Tests'] # Upload compiled pipeline to GCS. - name: 'gcr.io/cloud-builders/gsutil' args: ['cp', '$_PIPELINE_NAME.json', '$_PIPELINES_STORE'] - dir: '$_WORKDIR' + dir: '$_WORKDIR/src/bqml_pipeline/src/' id: 'Upload Pipeline to GCS' waitFor: ['Compile Pipeline'] serviceAccount: 'projects/$_PROJECT/serviceAccounts/$_SERVICE_ACCOUNT' logsBucket: '$_GCS_BUCKET' -timeout: 3600s +timeout: 7200s substitutions: _REPO_URL: git@github.com:${github_org}/${github_repo} + _CICD_IMAGE_URI: '${docker_repo}/cicd-bqml:latest' + _CICD_IMAGE_MODEL_CARD: '${docker_repo}/model-card:latest' _BRANCH: ${github_branch} _REGION: ${region} _PROJECT: ${project_id} _GCS_BUCKET: ${project_id}_cloudbuild/logs - _CICD_IMAGE_URI: '${docker_repo}/cicd-tfx:latest' - _DATAFLOW_IMAGE_URI: '${docker_repo}/dataflow:latest' - _TFX_IMAGE_URI: '${docker_repo}/vertex:latest' - _GCS_LOCATION: 'gs://${project_id}/creditcards/' - _TEST_GCS_LOCATION: 'gs://${project_id}/creditcards/e2e_tests' - _BQ_LOCATION: ${region} _BQ_DATASET_NAME: creditcards _ML_TABLE: creditcards_ml - _VERTEX_DATASET_NAME: creditcards - _MODEL_DISPLAY_NAME: creditcards-classifier-v02 - _CI_TRAIN_LIMIT: '1000' - _CI_TEST_LIMIT: '100' - _CI_UPLOAD_MODEL: '0' - _CI_ACCURACY_THRESHOLD: '-0.1' - _BEAM_RUNNER: DataflowRunner - _TRAINING_RUNNER: vertex - _PIPELINE_NAME: creditcards-classifier-v02-train-pipeline - _PIPELINES_STORE: gs://${project_id}/creditcards/compiled_pipelines/ - _SUBNETWORK: ${subnetwork} + _PIPELINE_NAME: creditcards-classifier-bqml-train + _PIPELINES_STORE: gs://${bucket_name}/creditcards/compiled_pipelines/ + _MODEL_DISPLAY_NAME: creditcards-bqml + _NETWORK: ${subnetwork} + _DATAFLOW_NETWORK: ${dataflow_network} _SERVICE_ACCOUNT: ${sa_mlops} _WORKDIR: ${github_repo} options: diff --git a/examples/vertex_mlops_enterprise/doc/01-ENVIRONMENTS.md b/examples/vertex_mlops_enterprise/doc/01-ENVIRONMENTS.md index 3005226e17..476202b379 100644 --- a/examples/vertex_mlops_enterprise/doc/01-ENVIRONMENTS.md +++ b/examples/vertex_mlops_enterprise/doc/01-ENVIRONMENTS.md @@ -42,11 +42,10 @@ cd professional-services/ Setup your new Github repo using the Github web console or CLI. -Copy the `vertex_mlops_enterprise` folder to your local folder, including the Github actions: +Copy the `vertex_mlops_enterprise` folder to your local folder, including the Github actions, hidden dirs and files: ``` -cp -R ./examples/vertex_mlops_enterprise/* ./ -cp -R ./examples/vertex_mlops_enterprise/.github ./ +cp -r ./examples/vertex_mlops_enterprise/ ``` Commit the files in the main branch (`main`): diff --git a/examples/vertex_mlops_enterprise/src/bqml_pipeline/Dockerfile b/examples/vertex_mlops_enterprise/src/bqml_pipeline/Dockerfile new file mode 100644 index 0000000000..bf29a2dca1 --- /dev/null +++ b/examples/vertex_mlops_enterprise/src/bqml_pipeline/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.8 + + +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install -r requirements.txt + +COPY src . +ENV PYTHONPATH=/ diff --git a/examples/vertex_mlops_enterprise/src/bqml_pipeline/README.md b/examples/vertex_mlops_enterprise/src/bqml_pipeline/README.md new file mode 100644 index 0000000000..40daea7f9a --- /dev/null +++ b/examples/vertex_mlops_enterprise/src/bqml_pipeline/README.md @@ -0,0 +1,3 @@ +# Reference BQML Pipeline + +Reference BigQuery ML pipeline implementation. \ No newline at end of file diff --git a/examples/vertex_mlops_enterprise/src/bqml_pipeline/requirements.txt b/examples/vertex_mlops_enterprise/src/bqml_pipeline/requirements.txt new file mode 100644 index 0000000000..ca73a98618 --- /dev/null +++ b/examples/vertex_mlops_enterprise/src/bqml_pipeline/requirements.txt @@ -0,0 +1,5 @@ +jinja2~=3.1.2 +pandas~=1.5.3 +matplotlib~=3.7.1 +google-cloud-aiplatform~=1.35.0 +google-cloud-pipeline-components~=1.0.45 \ No newline at end of file diff --git a/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/config.py b/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/config.py new file mode 100644 index 0000000000..e3b1b9f8fc --- /dev/null +++ b/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/config.py @@ -0,0 +1,35 @@ +import os + +PROJECT_ID = os.getenv("PROJECT_ID", "") +REGION = os.getenv("REGION", "") +IMAGE=os.getenv("CICD_IMAGE_URI", f'{REGION}-docker.pkg.dev/{PROJECT_ID}/creditcards-kfp/base:latest') +TRAIN_COMPONENT_IMAGE=f'{REGION}-docker.pkg.dev/{PROJECT_ID}/creditcards-kfp/train-fraud:latest' +IMAGE_MODEL_CARD=os.getenv("CICD_IMAGE_MODEL_CARD", f'{REGION}-docker.pkg.dev/{PROJECT_ID}/creditcards-kfp/model-card:latest') + +CLASS_NAMES = ['OK', 'Fraud'] +TARGET_COLUMN = 'Class' + +PIPELINE_NAME = os.getenv("PIPELINE_NAME", 'bqml-creditcards') +PIPELINE_ROOT = os.getenv("PIPELINES_STORE", f'gs://{PROJECT_ID}/pipeline_root/{PIPELINE_NAME}') +SERVICE_ACCOUNT = os.getenv("SERVICE_ACCOUNT") # returns None is not defined +NETWORK = os.getenv("NETWORK") # returns None is not defined +KEY_ID = os.getenv("CMEK_KEY_ID") # e.g. projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key + +BQ_DATASET_NAME=os.getenv("BQ_DATASET_NAME","creditcards") +BQ_INPUT_DATA=f"{PROJECT_ID}.{BQ_DATASET_NAME}.{os.getenv('ML_TABLE','creditcards_ml')}" +PARENT_MODEL='' # f'projects/{PROJECT_ID}/locations/{REGION}/models/YOUR_NUMERIC_MODEL_ID_HERE' + +BQ_OUTPUT_DATASET_ID="creditcards_batch_out" + +MODEL_DISPLAY_NAME = os.getenv("MODEL_DISPLAY_NAME", 'creditcards-bqml') +MODEL_CARD_CONFIG='../model_card_config.json' + +PRED_CONTAINER='europe-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-6:latest' +ENDPOINT_NAME=PIPELINE_NAME + +EMAILS=['abcdef@google.com'] + +# Evaluation pipeline +DATAFLOW_SA = os.getenv("DATAFLOW_SA") +DATAFLOW_NETWORK = os.getenv("DATAFLOW_NETWORK") +DATAFLOW_PUBLIC_IPS = False diff --git a/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/pipeline.py b/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/pipeline.py new file mode 100644 index 0000000000..47b3150d47 --- /dev/null +++ b/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/pipeline.py @@ -0,0 +1,195 @@ +from typing import NamedTuple +from datetime import datetime +import argparse + +import google.cloud.aiplatform as vertex_ai +# kfp and cloud components +import logging +from google_cloud_pipeline_components.v1.bigquery import ( + BigqueryCreateModelJobOp, BigqueryEvaluateModelJobOp, + BigqueryExplainPredictModelJobOp) + +from kfp import dsl +from kfp.v2 import compiler +from kfp.v2.dsl import Artifact, Input, component + +from config import (PIPELINE_ROOT, PIPELINE_NAME, BQ_INPUT_DATA, + MODEL_DISPLAY_NAME, ENDPOINT_NAME, + SERVICE_ACCOUNT, NETWORK, KEY_ID, + PROJECT_ID, REGION, TARGET_COLUMN, + BQ_DATASET_NAME) + +caching = True +TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M") + +parser = argparse.ArgumentParser() +parser.add_argument( + '--compile-only', + action='store_true' # default: False +) + +args = parser.parse_args() + + + +component( + base_image="python:3.8-slim", + packages_to_install=["jinja2", "pandas", "matplotlib"], + output_component_file=f"./build_bq_evaluate_metrics.yaml", +) +def get_model_evaluation_metrics( + metrics_in: Input[Artifact], +) -> NamedTuple("Outputs", [("accuracy", float)]): + """ + Get the accuracy from the metrics + Args: + metrics_in: metrics artifact + Returns: + accuracy: accuracy + """ + + import pandas as pd + + def get_column_names(header): + """ + Helper function to get the column names from the metrics table. + Args: + header: header + Returns: + column_names: column names + """ + header_clean = header.replace("_", " ") + header_abbrev = "".join([h[0].upper() for h in header_clean.split()]) + header_prettied = f"{header_clean} ({header_abbrev})" + return header_prettied + + # Extract rows and schema from metrics artifact + rows = metrics_in.metadata["rows"] + schema = metrics_in.metadata["schema"] + + # Convert into a tabular format + columns = [metrics["name"] for metrics in schema["fields"] if "name" in metrics] + records = [] + for row in rows: + records.append([dl["v"] for dl in row["f"]]) + + metrics = pd.DataFrame.from_records(records, columns=columns).astype(float).round(3) + + metrics = metrics.reset_index() + + # Create metrics dictionary for the model + accuracy = round(float(metrics.accuracy), 3) + component_outputs = NamedTuple("Outputs", [("accuracy", float)]) + + return component_outputs(accuracy) + + + @component( + base_image="python:3.8-slim", + packages_to_install=["google-cloud-aiplatform"], + ) + def upload_model_enpoint( + project: str, + location: str, + bq_model_name: str, + ): + """ + Uploads the model to Vertex AI + Args: + project: Project ID + location: Region + bq_model_name: A fully-qualified model resource name or model ID. + Example: "projects/123/locations/us-central1/models/456" or + "456" when project and location are initialized or passed. + Returns: + None + """ + from google.cloud import aiplatform as vertex_ai + + model = vertex_ai.Model(model_name={MODEL_DISPLAY_NAME}) + + endpoint = vertex_ai.Endpoint.list(order_by="update_time") + endpoint = endpoint[-1] + + model.deploy( + endpoint=endpoint, + min_replica_count=1, + max_replica_count=1, + ) + + model.wait() + + return + +@dsl.pipeline( + name="creditcards-classifier-bqml-train", + description="Trains and deploys bqml model to detect fraud", + pipeline_root=PIPELINE_ROOT, + ) +def bqml_pipeline( + bq_table: str = BQ_INPUT_DATA, + model: str = MODEL_DISPLAY_NAME, + project: str = PROJECT_ID, + region: str = REGION, + endpoint_name: str = ENDPOINT_NAME, + ): + + bq_model_op = BigqueryCreateModelJobOp( + project=project, + location=region, + query=f"""CREATE OR REPLACE MODEL `{BQ_DATASET_NAME}.{model}` + OPTIONS ( + MODEL_TYPE='LOGISTIC_REG', + INPUT_LABEL_COLS=['{TARGET_COLUMN}'], + EARLY_STOP=TRUE, + model_registry='vertex_ai', + vertex_ai_model_id='{model}', + vertex_ai_model_version_aliases=['logit', 'experimental'] + ) + AS SELECT * EXCEPT(Time,ML_use) FROM `{bq_table}` + """, + ) + + _ = BigqueryExplainPredictModelJobOp( + project=project, + location=region, + table_name=f"{bq_table}", + model=bq_model_op.outputs["model"], + ) + + _ = BigqueryEvaluateModelJobOp( + project=project, location=region, model=bq_model_op.outputs["model"] + ).after(bq_model_op) + + +# Compile and run the pipeline +vertex_ai.init(project=PROJECT_ID, location=REGION, encryption_spec_key_name=KEY_ID) + +logging.getLogger().setLevel(logging.INFO) +logging.info(f"Init with project {PROJECT_ID} in region {REGION}. Pipeline root: {PIPELINE_ROOT}") + +FORMAT = ".json" + + +logging.info(f"Compiling pipeline to {PIPELINE_NAME + FORMAT}") +compiler.Compiler().compile( + pipeline_func=bqml_pipeline, + package_path=PIPELINE_NAME + FORMAT +) + +if not args.compile_only: + run = vertex_ai.PipelineJob( + project=PROJECT_ID, + location=REGION, + display_name=PIPELINE_NAME, + template_path=PIPELINE_NAME + FORMAT, + job_id=f"{PIPELINE_NAME}-{TIMESTAMP}", + pipeline_root=PIPELINE_ROOT, + parameter_values={ + "bq_table": BQ_INPUT_DATA, + }, + enable_caching=caching + ) + + run.submit(service_account=SERVICE_ACCOUNT, + network=NETWORK) diff --git a/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/xgb-creditcards.json b/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/xgb-creditcards.json new file mode 100644 index 0000000000..8fe2fb202d --- /dev/null +++ b/examples/vertex_mlops_enterprise/src/bqml_pipeline/src/xgb-creditcards.json @@ -0,0 +1,535 @@ +{ + "pipelineSpec": { + "components": { + "comp-bigquery-create-model-job": { + "executorLabel": "exec-bigquery-create-model-job", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.BQMLModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-evaluate-model-job": { + "executorLabel": "exec-bigquery-evaluate-model-job", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.BQMLModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + }, + "query_statement": { + "type": "STRING" + }, + "table_name": { + "type": "STRING" + }, + "threshold": { + "type": "DOUBLE" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-explain-predict-model-job": { + "executorLabel": "exec-bigquery-explain-predict-model-job", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.BQMLModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_integral_steps": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + }, + "query_statement": { + "type": "STRING" + }, + "table_name": { + "type": "STRING" + }, + "threshold": { + "type": "DOUBLE" + }, + "top_k_features": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + } + }, + "deploymentSpec": { + "executors": { + "exec-bigquery-create-model-job": { + "container": { + "args": [ + "--type", + "BigqueryCreateModelJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.bigquery.create_model.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44" + } + }, + "exec-bigquery-evaluate-model-job": { + "container": { + "args": [ + "--type", + "BigqueryEvaluateModelJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--model_name", + "{{$.inputs.artifacts['model'].metadata['projectId']}}.{{$.inputs.artifacts['model'].metadata['datasetId']}}.{{$.inputs.artifacts['model'].metadata['modelId']}}", + "--table_name", + "{{$.inputs.parameters['table_name']}}", + "--query_statement", + "{{$.inputs.parameters['query_statement']}}", + "--threshold", + "{{$.inputs.parameters['threshold']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.bigquery.evaluate_model.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44" + } + }, + "exec-bigquery-explain-predict-model-job": { + "container": { + "args": [ + "--type", + "BigqueryExplainPredictModelJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--model_name", + "{{$.inputs.artifacts['model'].metadata['projectId']}}.{{$.inputs.artifacts['model'].metadata['datasetId']}}.{{$.inputs.artifacts['model'].metadata['modelId']}}", + "--table_name", + "{{$.inputs.parameters['table_name']}}", + "--query_statement", + "{{$.inputs.parameters['query_statement']}}", + "--top_k_features", + "{{$.inputs.parameters['top_k_features']}}", + "--threshold", + "{{$.inputs.parameters['threshold']}}", + "--num_integral_steps", + "{{$.inputs.parameters['num_integral_steps']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.bigquery.explain_predict_model.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44" + } + } + } + }, + "pipelineInfo": { + "name": "bqml-pipeline" + }, + "root": { + "dag": { + "tasks": { + "bigquery-create-model-job": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-model-job" + }, + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "region" + }, + "pipelineparam--bq_table": { + "componentInputParameter": "bq_table" + }, + "pipelineparam--model": { + "componentInputParameter": "model" + }, + "project": { + "componentInputParameter": "project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "CREATE OR REPLACE MODEL `None.{{$.inputs.parameters['pipelineparam--model']}}`\n OPTIONS (\n MODEL_TYPE='LOGISTIC_REG',\n INPUT_LABEL_COLS=['tx_fraud'],\n EARLY_STOP=TRUE,\n model_registry='vertex_ai',\n vertex_ai_model_id='bqml_fraud_classifier_pipeline',\n vertex_ai_model_version_aliases=['logit', 'experimental']\n )\n AS SELECT * FROM `{{$.inputs.parameters['pipelineparam--bq_table']}}`" + } + } + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "bigquery-create-model-job" + } + }, + "bigquery-evaluate-model-job": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-evaluate-model-job" + }, + "dependentTasks": [ + "bigquery-create-model-job" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "bigquery-create-model-job" + } + } + }, + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "location": { + "componentInputParameter": "region" + }, + "project": { + "componentInputParameter": "project" + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "query_statement": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "table_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "threshold": { + "runtimeValue": { + "constantValue": { + "doubleValue": -1.0 + } + } + } + } + }, + "taskInfo": { + "name": "bigquery-evaluate-model-job" + } + }, + "bigquery-explain-predict-model-job": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-explain-predict-model-job" + }, + "dependentTasks": [ + "bigquery-create-model-job" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "bigquery-create-model-job" + } + } + }, + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "region" + }, + "num_integral_steps": { + "runtimeValue": { + "constantValue": { + "intValue": "-1" + } + } + }, + "pipelineparam--bq_table": { + "componentInputParameter": "bq_table" + }, + "project": { + "componentInputParameter": "project" + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "query_statement": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "table_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.inputs.parameters['pipelineparam--bq_table']}}" + } + } + }, + "threshold": { + "runtimeValue": { + "constantValue": { + "doubleValue": -1.0 + } + } + }, + "top_k_features": { + "runtimeValue": { + "constantValue": { + "intValue": "-1" + } + } + } + } + }, + "taskInfo": { + "name": "bigquery-explain-predict-model-job" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "bq_table": { + "type": "STRING" + }, + "endpoint_name": { + "type": "STRING" + }, + "model": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "region": { + "type": "STRING" + } + } + } + }, + "schemaVersion": "2.0.0", + "sdkVersion": "kfp-1.8.22" + }, + "runtimeConfig": { + "gcsOutputDirectory": "gs:///pipeline_root/xgb-creditcards", + "parameters": { + "bq_table": { + "stringValue": ".None.None" + }, + "endpoint_name": { + "stringValue": "xgb-creditcards" + }, + "model": { + "stringValue": "creditcards-kfp" + }, + "project": { + "stringValue": "" + }, + "region": { + "stringValue": "" + } + } + } +} \ No newline at end of file diff --git a/examples/vertex_mlops_enterprise/terraform/01-dev/outputs.tf b/examples/vertex_mlops_enterprise/terraform/01-dev/outputs.tf index 1761837bae..1f114a7cae 100644 --- a/examples/vertex_mlops_enterprise/terraform/01-dev/outputs.tf +++ b/examples/vertex_mlops_enterprise/terraform/01-dev/outputs.tf @@ -53,6 +53,15 @@ locals { environment = var.environment, framework = "kfp" }) + + gh_main_bqml_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "bqml" + }) gh_run_tfx_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, @@ -72,6 +81,16 @@ locals { framework = "kfp" }) + gh_run_bqml_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "bqml" + }) + + gh_deploy_yaml = templatefile("${path.module}/../../.github/workflows/deploy.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, @@ -105,6 +124,19 @@ locals { bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" }) + pipeline_deploy_bqml = templatefile("${path.module}/../../build/pipeline-deployment-bqml.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + docker_repo = module.mlops.github.DOCKER_REPO, + sa_mlops = module.mlops.github.SA_MLOPS, + dataflow_network = "regions/europe-west4/subnetworks/subnet-europe-west4", + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" + }) + pipeline_run_tfx = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, region = var.region, @@ -128,7 +160,18 @@ locals { pipeline_name = "creditcards-classifier-kfp-train", pipeline_params = "{\"bq_table\": \"${module.mlops.github.PROJECT_ID}.${var.dataset_name}.creditcards_ml\", \"xgboost_param_max_depth\": 5, \"xgboost_param_learning_rate\": 0.1, \"xgboost_param_n_estimators\": 20}" }) - + + pipeline_run_bqml = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-bqml-train", + pipeline_params = "{\"bq_table\": \"${module.mlops.github.PROJECT_ID}.${var.dataset_name}.creditcards_ml\"}" + }) model_deployment = templatefile("${path.module}/../../build/model-deployment.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, @@ -162,6 +205,11 @@ resource "local_file" "main_kfp_yml" { content = local.gh_main_kfp_yaml } +resource "local_file" "main_bqml_yml" { + filename = "${path.module}/../../.github/workflows/main-bqml.yml" + content = local.gh_main_bqml_yaml +} + resource "local_file" "run_tfx_yml" { filename = "${path.module}/../../.github/workflows/run-tfx.yml" content = local.gh_run_tfx_yaml @@ -172,6 +220,11 @@ resource "local_file" "run_kfp_yml" { content = local.gh_run_kfp_yaml } +resource "local_file" "run_bqml_yml" { + filename = "${path.module}/../../.github/workflows/run-bqml.yml" + content = local.gh_run_bqml_yaml +} + resource "local_file" "deploy_yml" { filename = "${path.module}/../../.github/workflows/deploy.yml" content = local.gh_deploy_yaml @@ -187,6 +240,11 @@ resource "local_file" "deployment_kfp_yml" { content = local.pipeline_deploy_kfp } +resource "local_file" "deployment_bqml_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-bqml.yaml" + content = local.pipeline_deploy_kfp +} + resource "local_file" "pipeline_run_tfx_ml" { filename = "${path.module}/../../build/${var.environment}/pipeline-run-tfx.yaml" content = local.pipeline_run_tfx @@ -197,6 +255,11 @@ resource "local_file" "pipeline_run_kfp_ml" { content = local.pipeline_run_kfp } +resource "local_file" "pipeline_run_bqml_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-bqml.yaml" + content = local.pipeline_run_kfp +} + resource "local_file" "model_deploy_yml" { filename = "${path.module}/../../build/${var.environment}/model-deployment.yaml" content = local.model_deployment diff --git a/examples/vertex_mlops_enterprise/terraform/02-staging/main.tf b/examples/vertex_mlops_enterprise/terraform/02-staging/main.tf index 37743f7dce..5a9cbbe8f1 100644 --- a/examples/vertex_mlops_enterprise/terraform/02-staging/main.tf +++ b/examples/vertex_mlops_enterprise/terraform/02-staging/main.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ locals { project_config = { billing_account_id = var.project_config.billing_account_id parent = var.project_config.parent - project_id = "${var.project_config.project_id}-${var.environment}" + project_id = "${var.project_config.project_id}" } } diff --git a/examples/vertex_mlops_enterprise/terraform/02-staging/outputs.tf b/examples/vertex_mlops_enterprise/terraform/02-staging/outputs.tf index 633ea98199..1f114a7cae 100644 --- a/examples/vertex_mlops_enterprise/terraform/02-staging/outputs.tf +++ b/examples/vertex_mlops_enterprise/terraform/02-staging/outputs.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,31 +36,70 @@ locals { environment = var.environment }) - gh_main_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { + gh_main_tfx_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, sa = module.mlops.github.SERVICE_ACCOUNT, - docker_repo = module.mlops.github.DOCKER_REPO - environment = var.environment + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "tfx" }) - gh_run_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + gh_main_kfp_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, sa = module.mlops.github.SERVICE_ACCOUNT, - docker_repo = module.mlops.github.DOCKER_REPO - environment = var.environment + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "kfp" + }) + + gh_main_bqml_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "bqml" + }) + + gh_run_tfx_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "tfx" + }) + + gh_run_kfp_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "kfp" }) + gh_run_bqml_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "bqml" + }) + + gh_deploy_yaml = templatefile("${path.module}/../../.github/workflows/deploy.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, sa = module.mlops.github.SERVICE_ACCOUNT, - docker_repo = module.mlops.github.DOCKER_REPO + docker_repo = module.mlops.github.DOCKER_REPO, environment = var.environment }) - pipeline_deploy = templatefile("${path.module}/../../build/pipeline-deployment.yaml.TEMPLATE", { + pipeline_deploy_tfx = templatefile("${path.module}/../../build/pipeline-deployment-tfx.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, region = var.region, github_org = try(var.github.organization, null), @@ -68,18 +107,71 @@ locals { github_branch = try(local.github.branch, null), docker_repo = module.mlops.github.DOCKER_REPO, sa_mlops = module.mlops.github.SA_MLOPS, - subnetwork = module.mlops.github.SUBNETWORK + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" }) - pipeline_run = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + pipeline_deploy_kfp = templatefile("${path.module}/../../build/pipeline-deployment-kfp.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, region = var.region, github_org = try(var.github.organization, null), github_repo = try(var.github.repo, null), github_branch = try(local.github.branch, null), + docker_repo = module.mlops.github.DOCKER_REPO, sa_mlops = module.mlops.github.SA_MLOPS, + dataflow_network = "regions/europe-west4/subnetworks/subnet-europe-west4", + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" }) + pipeline_deploy_bqml = templatefile("${path.module}/../../build/pipeline-deployment-bqml.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + docker_repo = module.mlops.github.DOCKER_REPO, + sa_mlops = module.mlops.github.SA_MLOPS, + dataflow_network = "regions/europe-west4/subnetworks/subnet-europe-west4", + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" + }) + + pipeline_run_tfx = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-v02-train-pipeline", + pipeline_params = "{\"num_epochs\": 7, \"learning_rate\": 0.0015, \"batch_size\": 512, \"steps_per_epoch\": 9, \"hidden_units\": \"256,126\"}" + }) + + pipeline_run_kfp = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-kfp-train", + pipeline_params = "{\"bq_table\": \"${module.mlops.github.PROJECT_ID}.${var.dataset_name}.creditcards_ml\", \"xgboost_param_max_depth\": 5, \"xgboost_param_learning_rate\": 0.1, \"xgboost_param_n_estimators\": 20}" + }) + + pipeline_run_bqml = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-bqml-train", + pipeline_params = "{\"bq_table\": \"${module.mlops.github.PROJECT_ID}.${var.dataset_name}.creditcards_ml\"}" + }) model_deployment = templatefile("${path.module}/../../build/model-deployment.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, @@ -98,22 +190,39 @@ resource "local_file" "mainconfig_yml" { content = local.mainconfig_yaml } - resource "local_file" "containers_yml" { filename = "${path.module}/../../.github/workflows/containers.yml" content = local.gh_containers_yaml } +resource "local_file" "main_tfx_yml" { + filename = "${path.module}/../../.github/workflows/main-tfx.yml" + content = local.gh_main_tfx_yaml +} +resource "local_file" "main_kfp_yml" { + filename = "${path.module}/../../.github/workflows/main-kfp.yml" + content = local.gh_main_kfp_yaml +} + +resource "local_file" "main_bqml_yml" { + filename = "${path.module}/../../.github/workflows/main-bqml.yml" + content = local.gh_main_bqml_yaml +} + +resource "local_file" "run_tfx_yml" { + filename = "${path.module}/../../.github/workflows/run-tfx.yml" + content = local.gh_run_tfx_yaml +} -resource "local_file" "main_yml" { - filename = "${path.module}/../../.github/workflows/main.yml" - content = local.gh_main_yaml +resource "local_file" "run_kfp_yml" { + filename = "${path.module}/../../.github/workflows/run-kfp.yml" + content = local.gh_run_kfp_yaml } -resource "local_file" "run_yml" { - filename = "${path.module}/../../.github/workflows/run.yml" - content = local.gh_run_yaml +resource "local_file" "run_bqml_yml" { + filename = "${path.module}/../../.github/workflows/run-bqml.yml" + content = local.gh_run_bqml_yaml } resource "local_file" "deploy_yml" { @@ -121,15 +230,34 @@ resource "local_file" "deploy_yml" { content = local.gh_deploy_yaml } -resource "local_file" "deployment_yml" { - filename = "${path.module}/../../build/${var.environment}/pipeline-deployment.yaml" - content = local.pipeline_deploy +resource "local_file" "deployment_tfx_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-tfx.yaml" + content = local.pipeline_deploy_tfx } +resource "local_file" "deployment_kfp_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-kfp.yaml" + content = local.pipeline_deploy_kfp +} + +resource "local_file" "deployment_bqml_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-bqml.yaml" + content = local.pipeline_deploy_kfp +} + +resource "local_file" "pipeline_run_tfx_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-tfx.yaml" + content = local.pipeline_run_tfx +} + +resource "local_file" "pipeline_run_kfp_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-kfp.yaml" + content = local.pipeline_run_kfp +} -resource "local_file" "pipeline_run_yml" { - filename = "${path.module}/../../build/${var.environment}/pipeline-run.yaml" - content = local.pipeline_run +resource "local_file" "pipeline_run_bqml_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-bqml.yaml" + content = local.pipeline_run_kfp } resource "local_file" "model_deploy_yml" { @@ -137,7 +265,6 @@ resource "local_file" "model_deploy_yml" { content = local.model_deployment } - output "mlops" { description = "Created project, service accounts and associates resources." value = module.mlops diff --git a/examples/vertex_mlops_enterprise/terraform/02-staging/terraform.tfvars.sample b/examples/vertex_mlops_enterprise/terraform/02-staging/terraform.tfvars.sample index 6e20ecea11..896af9be49 100644 --- a/examples/vertex_mlops_enterprise/terraform/02-staging/terraform.tfvars.sample +++ b/examples/vertex_mlops_enterprise/terraform/02-staging/terraform.tfvars.sample @@ -1,4 +1,4 @@ -bucket_name = "creditcards" +bucket_name = "creditcards" # -env will be added as suffix dataset_name = "creditcards" environment = "stg" groups = { diff --git a/examples/vertex_mlops_enterprise/terraform/02-staging/variables.tf b/examples/vertex_mlops_enterprise/terraform/02-staging/variables.tf index 82b4d85d03..4580d223bd 100644 --- a/examples/vertex_mlops_enterprise/terraform/02-staging/variables.tf +++ b/examples/vertex_mlops_enterprise/terraform/02-staging/variables.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/examples/vertex_mlops_enterprise/terraform/03-prod/main.tf b/examples/vertex_mlops_enterprise/terraform/03-prod/main.tf index 37743f7dce..5a9cbbe8f1 100644 --- a/examples/vertex_mlops_enterprise/terraform/03-prod/main.tf +++ b/examples/vertex_mlops_enterprise/terraform/03-prod/main.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ locals { project_config = { billing_account_id = var.project_config.billing_account_id parent = var.project_config.parent - project_id = "${var.project_config.project_id}-${var.environment}" + project_id = "${var.project_config.project_id}" } } diff --git a/examples/vertex_mlops_enterprise/terraform/03-prod/outputs.tf b/examples/vertex_mlops_enterprise/terraform/03-prod/outputs.tf index 633ea98199..1f114a7cae 100644 --- a/examples/vertex_mlops_enterprise/terraform/03-prod/outputs.tf +++ b/examples/vertex_mlops_enterprise/terraform/03-prod/outputs.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,31 +36,70 @@ locals { environment = var.environment }) - gh_main_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { + gh_main_tfx_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, sa = module.mlops.github.SERVICE_ACCOUNT, - docker_repo = module.mlops.github.DOCKER_REPO - environment = var.environment + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "tfx" }) - gh_run_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + gh_main_kfp_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, sa = module.mlops.github.SERVICE_ACCOUNT, - docker_repo = module.mlops.github.DOCKER_REPO - environment = var.environment + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "kfp" + }) + + gh_main_bqml_yaml = templatefile("${path.module}/../../.github/workflows/main.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "bqml" + }) + + gh_run_tfx_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "tfx" + }) + + gh_run_kfp_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "kfp" }) + gh_run_bqml_yaml = templatefile("${path.module}/../../.github/workflows/run.yml.TEMPLATE", { + wip = module.mlops.github.WORKLOAD_ID_PROVIDER, + project_id = module.mlops.github.PROJECT_ID, + sa = module.mlops.github.SERVICE_ACCOUNT, + docker_repo = module.mlops.github.DOCKER_REPO, + environment = var.environment, + framework = "bqml" + }) + + gh_deploy_yaml = templatefile("${path.module}/../../.github/workflows/deploy.yml.TEMPLATE", { wip = module.mlops.github.WORKLOAD_ID_PROVIDER, project_id = module.mlops.github.PROJECT_ID, sa = module.mlops.github.SERVICE_ACCOUNT, - docker_repo = module.mlops.github.DOCKER_REPO + docker_repo = module.mlops.github.DOCKER_REPO, environment = var.environment }) - pipeline_deploy = templatefile("${path.module}/../../build/pipeline-deployment.yaml.TEMPLATE", { + pipeline_deploy_tfx = templatefile("${path.module}/../../build/pipeline-deployment-tfx.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, region = var.region, github_org = try(var.github.organization, null), @@ -68,18 +107,71 @@ locals { github_branch = try(local.github.branch, null), docker_repo = module.mlops.github.DOCKER_REPO, sa_mlops = module.mlops.github.SA_MLOPS, - subnetwork = module.mlops.github.SUBNETWORK + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" }) - pipeline_run = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + pipeline_deploy_kfp = templatefile("${path.module}/../../build/pipeline-deployment-kfp.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, region = var.region, github_org = try(var.github.organization, null), github_repo = try(var.github.repo, null), github_branch = try(local.github.branch, null), + docker_repo = module.mlops.github.DOCKER_REPO, sa_mlops = module.mlops.github.SA_MLOPS, + dataflow_network = "regions/europe-west4/subnetworks/subnet-europe-west4", + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" }) + pipeline_deploy_bqml = templatefile("${path.module}/../../build/pipeline-deployment-bqml.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + docker_repo = module.mlops.github.DOCKER_REPO, + sa_mlops = module.mlops.github.SA_MLOPS, + dataflow_network = "regions/europe-west4/subnetworks/subnet-europe-west4", + subnetwork = module.mlops.github.SUBNETWORK, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}" + }) + + pipeline_run_tfx = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-v02-train-pipeline", + pipeline_params = "{\"num_epochs\": 7, \"learning_rate\": 0.0015, \"batch_size\": 512, \"steps_per_epoch\": 9, \"hidden_units\": \"256,126\"}" + }) + + pipeline_run_kfp = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-kfp-train", + pipeline_params = "{\"bq_table\": \"${module.mlops.github.PROJECT_ID}.${var.dataset_name}.creditcards_ml\", \"xgboost_param_max_depth\": 5, \"xgboost_param_learning_rate\": 0.1, \"xgboost_param_n_estimators\": 20}" + }) + + pipeline_run_bqml = templatefile("${path.module}/../../build/pipeline-run.yaml.TEMPLATE", { + project_id = module.mlops.github.PROJECT_ID, + region = var.region, + github_org = try(var.github.organization, null), + github_repo = try(var.github.repo, null), + github_branch = try(local.github.branch, null), + sa_mlops = module.mlops.github.SA_MLOPS, + bucket_name = "${var.prefix}-${var.bucket_name}-${var.environment}", + pipeline_name = "creditcards-classifier-bqml-train", + pipeline_params = "{\"bq_table\": \"${module.mlops.github.PROJECT_ID}.${var.dataset_name}.creditcards_ml\"}" + }) model_deployment = templatefile("${path.module}/../../build/model-deployment.yaml.TEMPLATE", { project_id = module.mlops.github.PROJECT_ID, @@ -98,22 +190,39 @@ resource "local_file" "mainconfig_yml" { content = local.mainconfig_yaml } - resource "local_file" "containers_yml" { filename = "${path.module}/../../.github/workflows/containers.yml" content = local.gh_containers_yaml } +resource "local_file" "main_tfx_yml" { + filename = "${path.module}/../../.github/workflows/main-tfx.yml" + content = local.gh_main_tfx_yaml +} +resource "local_file" "main_kfp_yml" { + filename = "${path.module}/../../.github/workflows/main-kfp.yml" + content = local.gh_main_kfp_yaml +} + +resource "local_file" "main_bqml_yml" { + filename = "${path.module}/../../.github/workflows/main-bqml.yml" + content = local.gh_main_bqml_yaml +} + +resource "local_file" "run_tfx_yml" { + filename = "${path.module}/../../.github/workflows/run-tfx.yml" + content = local.gh_run_tfx_yaml +} -resource "local_file" "main_yml" { - filename = "${path.module}/../../.github/workflows/main.yml" - content = local.gh_main_yaml +resource "local_file" "run_kfp_yml" { + filename = "${path.module}/../../.github/workflows/run-kfp.yml" + content = local.gh_run_kfp_yaml } -resource "local_file" "run_yml" { - filename = "${path.module}/../../.github/workflows/run.yml" - content = local.gh_run_yaml +resource "local_file" "run_bqml_yml" { + filename = "${path.module}/../../.github/workflows/run-bqml.yml" + content = local.gh_run_bqml_yaml } resource "local_file" "deploy_yml" { @@ -121,15 +230,34 @@ resource "local_file" "deploy_yml" { content = local.gh_deploy_yaml } -resource "local_file" "deployment_yml" { - filename = "${path.module}/../../build/${var.environment}/pipeline-deployment.yaml" - content = local.pipeline_deploy +resource "local_file" "deployment_tfx_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-tfx.yaml" + content = local.pipeline_deploy_tfx } +resource "local_file" "deployment_kfp_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-kfp.yaml" + content = local.pipeline_deploy_kfp +} + +resource "local_file" "deployment_bqml_yml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-deployment-bqml.yaml" + content = local.pipeline_deploy_kfp +} + +resource "local_file" "pipeline_run_tfx_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-tfx.yaml" + content = local.pipeline_run_tfx +} + +resource "local_file" "pipeline_run_kfp_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-kfp.yaml" + content = local.pipeline_run_kfp +} -resource "local_file" "pipeline_run_yml" { - filename = "${path.module}/../../build/${var.environment}/pipeline-run.yaml" - content = local.pipeline_run +resource "local_file" "pipeline_run_bqml_ml" { + filename = "${path.module}/../../build/${var.environment}/pipeline-run-bqml.yaml" + content = local.pipeline_run_kfp } resource "local_file" "model_deploy_yml" { @@ -137,7 +265,6 @@ resource "local_file" "model_deploy_yml" { content = local.model_deployment } - output "mlops" { description = "Created project, service accounts and associates resources." value = module.mlops diff --git a/examples/vertex_mlops_enterprise/terraform/03-prod/terraform.tfvars.sample b/examples/vertex_mlops_enterprise/terraform/03-prod/terraform.tfvars.sample index fa5dd769db..c11fde91e1 100644 --- a/examples/vertex_mlops_enterprise/terraform/03-prod/terraform.tfvars.sample +++ b/examples/vertex_mlops_enterprise/terraform/03-prod/terraform.tfvars.sample @@ -1,4 +1,4 @@ -bucket_name = "creditcards-dev" +bucket_name = "creditcards" # -env will be added as suffix dataset_name = "creditcards" environment = "prd" groups = { diff --git a/examples/vertex_mlops_enterprise/terraform/03-prod/variables.tf b/examples/vertex_mlops_enterprise/terraform/03-prod/variables.tf index 82b4d85d03..4580d223bd 100644 --- a/examples/vertex_mlops_enterprise/terraform/03-prod/variables.tf +++ b/examples/vertex_mlops_enterprise/terraform/03-prod/variables.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.