-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Update model-deployment.yaml.TEMPLATE Fixed container * Renamed model deployment pipeline * Renamed model deployment pipeline * Added BQ ML pipeline * Added BQ ML pipeline * Added BQ ML pipeline * Update examples/vertex_mlops_enterprise/.github/workflows/deploy.yml.TEMPLATE Co-authored-by: Sergio Vidiella Pinto <[email protected]> * Update 01-ENVIRONMENTS.md * Update pipeline.py * Apply suggestions from code review Co-authored-by: Sergio Vidiella Pinto <[email protected]> --------- Co-authored-by: Andrew Gold <[email protected]> Co-authored-by: Sergio Vidiella Pinto <[email protected]>
- Loading branch information
1 parent
47de5bd
commit 1ed818b
Showing
22 changed files
with
1,221 additions
and
148 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ terraform.tfstate* | |
.DS_Store | ||
**/__pycache__/** | ||
venv | ||
**/.ipynb_checkpoints/** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,109 +47,61 @@ steps: | |
|
||
# Run datasource_utils unit tests. | ||
- name: '$_CICD_IMAGE_URI' | ||
entrypoint: 'pytest' | ||
args: ['src/tests/datasource_utils_tests.py', '-s'] | ||
dir: '$_WORKDIR' | ||
env: | ||
- 'PROJECT=$_PROJECT' | ||
- 'BQ_LOCATION=$_BQ_LOCATION' | ||
- 'BQ_DATASET_NAME=$_BQ_DATASET_NAME' | ||
- 'ML_TABLE=$_ML_TABLE' | ||
id: 'Unit Test Datasource Utils' | ||
waitFor: ['Clone Repository'] | ||
|
||
|
||
# Run model unit tests. | ||
- name: '$_CICD_IMAGE_URI' | ||
entrypoint: 'pytest' | ||
args: ['src/tests/model_tests.py', '-s'] | ||
dir: '$_WORKDIR' | ||
id: 'Unit Test Model' | ||
entrypoint: 'echo' | ||
args: ['Running unit tests - dummy build'] | ||
id: 'Unit Tests' | ||
waitFor: ['Clone Repository'] | ||
timeout: 1800s | ||
|
||
|
||
# Test e2e pipeline using local runner. | ||
- name: '$_CICD_IMAGE_URI' | ||
entrypoint: 'pytest' | ||
args: ['src/tests/pipeline_deployment_tests.py::test_e2e_pipeline', '-s'] | ||
dir: '$_WORKDIR' | ||
env: | ||
- 'PROJECT=$_PROJECT' | ||
- 'REGION=$_REGION' | ||
- 'MODEL_DISPLAY_NAME=$_MODEL_DISPLAY_NAME' | ||
- 'VERTEX_DATASET_NAME=$_VERTEX_DATASET_NAME' | ||
- 'GCS_LOCATION=$_TEST_GCS_LOCATION' | ||
- 'TRAIN_LIMIT=$_CI_TRAIN_LIMIT' | ||
- 'TEST_LIMIT=$_CI_TEST_LIMIT' | ||
- 'UPLOAD_MODEL=$_CI_UPLOAD_MODEL' | ||
- 'ACCURACY_THRESHOLD=$_CI_ACCURACY_THRESHOLD' | ||
id: 'Local Test E2E Pipeline' | ||
waitFor: ['Clone Repository'] | ||
timeout: 1800s | ||
|
||
# Compile the pipeline. | ||
- name: '$_CICD_IMAGE_URI' | ||
entrypoint: 'python' | ||
args: ['build/utils.py', | ||
'--mode', 'compile-pipeline', | ||
'--pipeline-name', '$_PIPELINE_NAME' | ||
] | ||
dir: '$_WORKDIR' | ||
args: ['pipeline.py', '--compile-only'] | ||
dir: '$_WORKDIR/src/bqml_pipeline/src/' | ||
env: | ||
- 'PROJECT=$_PROJECT' | ||
- 'PROJECT_ID=$_PROJECT' | ||
- 'REGION=$_REGION' | ||
- 'MODEL_DISPLAY_NAME=$_MODEL_DISPLAY_NAME' | ||
- 'VERTEX_DATASET_NAME=$_VERTEX_DATASET_NAME' | ||
- 'GCS_LOCATION=$_GCS_LOCATION' | ||
- 'DATAFLOW_IMAGE_URI=$_DATAFLOW_IMAGE_URI' | ||
- 'TFX_IMAGE_URI=$_TFX_IMAGE_URI' | ||
- 'BEAM_RUNNER=$_BEAM_RUNNER' | ||
- 'TRAINING_RUNNER=$_TRAINING_RUNNER' | ||
- 'SERVICE_ACCOUNT=$_SERVICE_ACCOUNT' | ||
- 'SUBNETWORK=$_SUBNETWORK' | ||
- 'ACCURACY_THRESHOLD=$_CI_ACCURACY_THRESHOLD' | ||
|
||
- 'NETWORK=$_NETWORK' | ||
- 'BQ_DATASET_NAME=$_BQ_DATASET_NAME' | ||
- 'ML_TABLE=$_ML_TABLE' | ||
- 'MODEL_DISPLAY_NAME=$_MODEL_DISPLAY_NAME' | ||
- 'PIPELINE_NAME=$_PIPELINE_NAME' | ||
- 'PIPELINES_STORE=$_PIPELINES_STORE' | ||
- 'CICD_IMAGE_URI=$_CICD_IMAGE_URI' | ||
- 'CICD_IMAGE_MODEL_CARD=$_CICD_IMAGE_MODEL_CARD' | ||
- 'DATAFLOW_SA=$_SERVICE_ACCOUNT' | ||
- 'DATAFLOW_NETWORK=$_DATAFLOW_NETWORK' | ||
id: 'Compile Pipeline' | ||
waitFor: ['Local Test E2E Pipeline', 'Unit Test Datasource Utils', 'Unit Test Model'] | ||
|
||
waitFor: ['Unit Tests'] | ||
|
||
# Upload compiled pipeline to GCS. | ||
- name: 'gcr.io/cloud-builders/gsutil' | ||
args: ['cp', '$_PIPELINE_NAME.json', '$_PIPELINES_STORE'] | ||
dir: '$_WORKDIR' | ||
dir: '$_WORKDIR/src/bqml_pipeline/src/' | ||
id: 'Upload Pipeline to GCS' | ||
waitFor: ['Compile Pipeline'] | ||
|
||
|
||
serviceAccount: 'projects/$_PROJECT/serviceAccounts/$_SERVICE_ACCOUNT' | ||
logsBucket: '$_GCS_BUCKET' | ||
timeout: 3600s | ||
timeout: 7200s | ||
substitutions: | ||
_REPO_URL: [email protected]:${github_org}/${github_repo} | ||
_CICD_IMAGE_URI: '${docker_repo}/cicd-bqml:latest' | ||
_CICD_IMAGE_MODEL_CARD: '${docker_repo}/model-card:latest' | ||
_BRANCH: ${github_branch} | ||
_REGION: ${region} | ||
_PROJECT: ${project_id} | ||
_GCS_BUCKET: ${project_id}_cloudbuild/logs | ||
_CICD_IMAGE_URI: '${docker_repo}/cicd-tfx:latest' | ||
_DATAFLOW_IMAGE_URI: '${docker_repo}/dataflow:latest' | ||
_TFX_IMAGE_URI: '${docker_repo}/vertex:latest' | ||
_GCS_LOCATION: 'gs://${project_id}/creditcards/' | ||
_TEST_GCS_LOCATION: 'gs://${project_id}/creditcards/e2e_tests' | ||
_BQ_LOCATION: ${region} | ||
_BQ_DATASET_NAME: creditcards | ||
_ML_TABLE: creditcards_ml | ||
_VERTEX_DATASET_NAME: creditcards | ||
_MODEL_DISPLAY_NAME: creditcards-classifier-v02 | ||
_CI_TRAIN_LIMIT: '1000' | ||
_CI_TEST_LIMIT: '100' | ||
_CI_UPLOAD_MODEL: '0' | ||
_CI_ACCURACY_THRESHOLD: '-0.1' | ||
_BEAM_RUNNER: DataflowRunner | ||
_TRAINING_RUNNER: vertex | ||
_PIPELINE_NAME: creditcards-classifier-v02-train-pipeline | ||
_PIPELINES_STORE: gs://${project_id}/creditcards/compiled_pipelines/ | ||
_SUBNETWORK: ${subnetwork} | ||
_PIPELINE_NAME: creditcards-classifier-bqml-train | ||
_PIPELINES_STORE: gs://${bucket_name}/creditcards/compiled_pipelines/ | ||
_MODEL_DISPLAY_NAME: creditcards-bqml | ||
_NETWORK: ${subnetwork} | ||
_DATAFLOW_NETWORK: ${dataflow_network} | ||
_SERVICE_ACCOUNT: ${sa_mlops} | ||
_WORKDIR: ${github_repo} | ||
options: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
examples/vertex_mlops_enterprise/src/bqml_pipeline/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
FROM python:3.8 | ||
|
||
|
||
COPY requirements.txt . | ||
RUN pip install --upgrade pip | ||
RUN pip install -r requirements.txt | ||
|
||
COPY src . | ||
ENV PYTHONPATH=/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Reference BQML Pipeline | ||
|
||
Reference BigQuery ML pipeline implementation. |
5 changes: 5 additions & 0 deletions
5
examples/vertex_mlops_enterprise/src/bqml_pipeline/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
jinja2~=3.1.2 | ||
pandas~=1.5.3 | ||
matplotlib~=3.7.1 | ||
google-cloud-aiplatform~=1.35.0 | ||
google-cloud-pipeline-components~=1.0.45 |
35 changes: 35 additions & 0 deletions
35
examples/vertex_mlops_enterprise/src/bqml_pipeline/src/config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import os | ||
|
||
PROJECT_ID = os.getenv("PROJECT_ID", "") | ||
REGION = os.getenv("REGION", "") | ||
IMAGE=os.getenv("CICD_IMAGE_URI", f'{REGION}-docker.pkg.dev/{PROJECT_ID}/creditcards-kfp/base:latest') | ||
TRAIN_COMPONENT_IMAGE=f'{REGION}-docker.pkg.dev/{PROJECT_ID}/creditcards-kfp/train-fraud:latest' | ||
IMAGE_MODEL_CARD=os.getenv("CICD_IMAGE_MODEL_CARD", f'{REGION}-docker.pkg.dev/{PROJECT_ID}/creditcards-kfp/model-card:latest') | ||
|
||
CLASS_NAMES = ['OK', 'Fraud'] | ||
TARGET_COLUMN = 'Class' | ||
|
||
PIPELINE_NAME = os.getenv("PIPELINE_NAME", 'bqml-creditcards') | ||
PIPELINE_ROOT = os.getenv("PIPELINES_STORE", f'gs://{PROJECT_ID}/pipeline_root/{PIPELINE_NAME}') | ||
SERVICE_ACCOUNT = os.getenv("SERVICE_ACCOUNT") # returns None is not defined | ||
NETWORK = os.getenv("NETWORK") # returns None is not defined | ||
KEY_ID = os.getenv("CMEK_KEY_ID") # e.g. projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key | ||
|
||
BQ_DATASET_NAME=os.getenv("BQ_DATASET_NAME","creditcards") | ||
BQ_INPUT_DATA=f"{PROJECT_ID}.{BQ_DATASET_NAME}.{os.getenv('ML_TABLE','creditcards_ml')}" | ||
PARENT_MODEL='' # f'projects/{PROJECT_ID}/locations/{REGION}/models/YOUR_NUMERIC_MODEL_ID_HERE' | ||
|
||
BQ_OUTPUT_DATASET_ID="creditcards_batch_out" | ||
|
||
MODEL_DISPLAY_NAME = os.getenv("MODEL_DISPLAY_NAME", 'creditcards-bqml') | ||
MODEL_CARD_CONFIG='../model_card_config.json' | ||
|
||
PRED_CONTAINER='europe-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-6:latest' | ||
ENDPOINT_NAME=PIPELINE_NAME | ||
|
||
EMAILS=['[email protected]'] | ||
|
||
# Evaluation pipeline | ||
DATAFLOW_SA = os.getenv("DATAFLOW_SA") | ||
DATAFLOW_NETWORK = os.getenv("DATAFLOW_NETWORK") | ||
DATAFLOW_PUBLIC_IPS = False |
Oops, something went wrong.