-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable automation test for featurestore samples (#2585)
* Enable automation test for featurestore samples * Update setup resources * Fix typo and reformat the notebook * Add python package to cluster * Add import package * Run script sequentially * Put script run in the same process * Update to python output command * Replace output command in all notebooks during test * Update mounted path * Update the relative path * Restructure the file hierarchy * Regenerate feature store notebook workflow * Update relateive path * List folder in current path * Update relative folder path * Update storage account name and user object id * Get signed in user id * Update user object id * Update user id * Enable notebook 1 first
- Loading branch information
Showing
9 changed files
with
249 additions
and
6 deletions.
There are no files selected for viewing
80 changes: 80 additions & 0 deletions
80
.github/workflows/sdk-featurestore_sample-test_featurestore_sdk_samples.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# This code is autogenerated. | ||
# Code is generated by running custom script: python3 readme.py | ||
# Any manual changes to this file may cause incorrect behavior. | ||
# Any manual changes will be overwritten if the code is regenerated. | ||
|
||
name: sdk-featurestore_sample-test_featurestore_sdk_samples | ||
# This file is created by sdk/python/readme.py. | ||
# Please do not edit directly. | ||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: "22 2/12 * * *" | ||
pull_request: | ||
branches: | ||
- main | ||
paths: | ||
- sdk/python/featurestore_sample/** | ||
- .github/workflows/sdk-featurestore_sample-test_featurestore_sdk_samples.yml | ||
- sdk/python/dev-requirements.txt | ||
- infra/bootstrapping/** | ||
- sdk/python/setup.sh | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | ||
cancel-in-progress: true | ||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: check out repo | ||
uses: actions/checkout@v2 | ||
- name: setup python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: "3.8" | ||
- name: pip install notebook reqs | ||
run: pip install -r sdk/python/dev-requirements.txt | ||
- name: azure login | ||
uses: azure/login@v1 | ||
with: | ||
creds: ${{secrets.AZUREML_CREDENTIALS}} | ||
- name: bootstrap resources | ||
run: | | ||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; | ||
bash bootstrap.sh | ||
working-directory: infra/bootstrapping | ||
continue-on-error: false | ||
- name: setup SDK | ||
run: | | ||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; | ||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; | ||
bash setup.sh | ||
working-directory: sdk/python | ||
continue-on-error: true | ||
- name: setup-cli | ||
run: | | ||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; | ||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; | ||
bash setup.sh | ||
working-directory: cli | ||
continue-on-error: true | ||
- name: setup feature-store resources | ||
run: | | ||
bash -x setup-resources.sh test_featurestore_sdk_samples.ipynb | ||
working-directory: sdk/python/featurestore_sample | ||
continue-on-error: true | ||
- name: run featurestore_sample/test_featurestore_sdk_samples.ipynb | ||
run: | | ||
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; | ||
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; | ||
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; | ||
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "test_featurestore_sdk_samples.ipynb"; | ||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config"; | ||
papermill -k python test_featurestore_sdk_samples.ipynb test_featurestore_sdk_samples.output.ipynb | ||
working-directory: sdk/python/featurestore_sample | ||
- name: upload notebook's working folder as an artifact | ||
if: ${{ always() }} | ||
uses: actions/upload-artifact@v2 | ||
with: | ||
name: test_featurestore_sdk_samples | ||
path: sdk/python/featurestore_sample |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from pyspark.sql import SparkSession | ||
|
||
spark = SparkSession.builder.appName("AccessData").getOrCreate() | ||
|
||
print("=======Test Notebook 1============") | ||
with open( | ||
"notebooks/sdk_only/1. Develop a feature set and register with managed feature store.py" | ||
) as f: | ||
exec(f.read()) | ||
|
||
## Enable test for notebook 1 first | ||
# print("=======Test Notebook 2============") | ||
# with open( | ||
# "notebooks/sdk_only/2. Enable materialization and backfill feature data.py" | ||
# ) as f: | ||
# exec(f.read()) | ||
|
||
# print("=======Test Notebook 3============") | ||
# with open("notebooks/sdk_only/3. Experiment and train models using features.py") as f: | ||
# exec(f.read()) | ||
|
||
# print("=======Test Notebook 3============") | ||
# with open( | ||
# "notebooks/sdk_only/4. Enable recurrent materialization and run batch inference.py" | ||
# ) as f: | ||
# exec(f.read()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
pip install --upgrade jupytext | ||
|
||
# <create_variables> | ||
SUBSCRIPTION_ID=$(az account show --query id -o tsv) | ||
LOCATION=$(az ml workspace show --query location -o tsv) | ||
RESOURCE_GROUP=$(az group show --query name -o tsv) | ||
AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv) | ||
OUTPUT_COMMAND="print" | ||
FEATURE_STORAGE_ACCOUNT_NAME=${RESOURCE_GROUP}fs | ||
USER_ID="36b5b70a-a2b2-45e6-a496-df3c2ffde085" | ||
|
||
# </create_variables> | ||
|
||
# <convert_notebook_to_py> | ||
NOTEBOOK_1="notebooks/sdk_only/1. Develop a feature set and register with managed feature store" | ||
NOTEBOOK_2="notebooks/sdk_only/2. Enable materialization and backfill feature data" | ||
NOTEBOOK_3="notebooks/sdk_only/3. Experiment and train models using features" | ||
NOTEBOOK_4="notebooks/sdk_only/4. Enable recurrent materialization and run batch inference" | ||
jupytext --to py "${NOTEBOOK_1}.ipynb" | ||
jupytext --to py "${NOTEBOOK_2}.ipynb" | ||
jupytext --to py "${NOTEBOOK_3}.ipynb" | ||
jupytext --to py "${NOTEBOOK_4}.ipynb" | ||
# <convert_notebook_to_py> | ||
|
||
#<replace_template_values> | ||
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g; | ||
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g; | ||
s/<AML_WORKSPACE_NAME>/$AML_WORKSPACE_NAME/g;" $1 | ||
|
||
#<replace_template_values> | ||
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;" "${NOTEBOOK_1}.py" | ||
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g; | ||
s/<FEATURE_STORAGE_ACCOUNT_NAME>/$FEATURE_STORAGE_ACCOUNT_NAME/g; | ||
s/<USER_AAD_OBJECTID>/$USER_ID/g;" "${NOTEBOOK_2}.py" | ||
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;" "${NOTEBOOK_3}.py" | ||
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;" "${NOTEBOOK_4}.py" |
88 changes: 88 additions & 0 deletions
88
sdk/python/featurestore_sample/test_featurestore_sdk_samples.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Use a serverless Spark compute" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"You should have an attached Synapse Spark pool available in your workspace. Please see documentation page: [Attach and manage a Synapse Spark pool in Azure Machine Learning (preview)](https://learn.microsoft.com/azure/machine-learning/how-to-manage-synapse-spark-pool) for more details.\n", | ||
"\n", | ||
"**Note** - To ensure successful execution of Spark job, the identity being used for the Spark job should be assigned **Contributor** and **Storage Blob Data Contributor** roles on the Azure storage account used for data input and output." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from azure.ai.ml import MLClient, spark, Input, Output\n", | ||
"from azure.identity import DefaultAzureCredential\n", | ||
"from azure.ai.ml.entities import Environment\n", | ||
"\n", | ||
"subscription_id = \"<SUBSCRIPTION_ID>\"\n", | ||
"resource_group = \"<RESOURCE_GROUP>\"\n", | ||
"workspace = \"<AML_WORKSPACE_NAME>\"\n", | ||
"ml_client = MLClient(\n", | ||
" DefaultAzureCredential(), subscription_id, resource_group, workspace\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"spark_job = spark(\n", | ||
" display_name=\"featurestore_sample_test\",\n", | ||
" code=\"./\",\n", | ||
" entry={\"file\": \"featurestore_sdk_job.py\"},\n", | ||
" driver_cores=1,\n", | ||
" driver_memory=\"1g\",\n", | ||
" executor_cores=1,\n", | ||
" executor_memory=\"1g\",\n", | ||
" executor_instances=1,\n", | ||
" resources={\n", | ||
" \"instance_type\": \"Standard_E8S_V3\",\n", | ||
" \"runtime_version\": \"3.2.0\",\n", | ||
" },\n", | ||
" environment=Environment(conda_file=\"project/env/conda.yml\"),\n", | ||
")\n", | ||
"\n", | ||
"returned_spark_job = ml_client.jobs.create_or_update(spark_job)\n", | ||
"\n", | ||
"print(returned_spark_job.id)\n", | ||
"# Wait until the job completes\n", | ||
"ml_client.jobs.stream(returned_spark_job.name)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3.10 - SDK V2", | ||
"language": "python", | ||
"name": "python310-sdkv2" | ||
}, | ||
"language_info": { | ||
"name": "python", | ||
"version": "3.7.10" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "6aeff17a1aa7735c2f7cb3a6d691fe1b4d4c3b8d2d650f644ad0f24e1b8e3f3f" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters