Skip to content

Commit

Permalink
Enable automation test for featurestore samples (#2585)
Browse files Browse the repository at this point in the history
* Enable automation test for featurestore samples

* Update setup resources

* Fix typo and reformat the notebook

* Add python package to cluster

* Add import package

* Run script sequentially

* Put script run in the same process

* Update to python output command

* Replace output command in all notebooks during test

* Update mounted path

* Update the relative path

* Restructure the file hierarchy

* Regenerate feature store notebook workflow

* Update relateive path

* List folder in current path

* Update relative folder path

* Update storage account name and user object id

* Get signed in user id

* Update user object id

* Update user id

* Enable notebook 1 first
  • Loading branch information
fredms authored Sep 7, 2023
1 parent 4c1c62f commit a3be24d
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: sdk-featurestore_sample-test_featurestore_sdk_samples
# This file is created by sdk/python/readme.py.
# Please do not edit directly.
on:
workflow_dispatch:
schedule:
- cron: "22 2/12 * * *"
pull_request:
branches:
- main
paths:
- sdk/python/featurestore_sample/**
- .github/workflows/sdk-featurestore_sample-test_featurestore_sdk_samples.yml
- sdk/python/dev-requirements.txt
- infra/bootstrapping/**
- sdk/python/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: setup python
uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: pip install notebook reqs
run: pip install -r sdk/python/dev-requirements.txt
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup SDK
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: sdk/python
continue-on-error: true
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: setup feature-store resources
run: |
bash -x setup-resources.sh test_featurestore_sdk_samples.ipynb
working-directory: sdk/python/featurestore_sample
continue-on-error: true
- name: run featurestore_sample/test_featurestore_sdk_samples.ipynb
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "test_featurestore_sdk_samples.ipynb";
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
papermill -k python test_featurestore_sdk_samples.ipynb test_featurestore_sdk_samples.output.ipynb
working-directory: sdk/python/featurestore_sample
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: test_featurestore_sdk_samples
path: sdk/python/featurestore_sample
26 changes: 26 additions & 0 deletions sdk/python/featurestore_sample/featurestore_sdk_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("AccessData").getOrCreate()

print("=======Test Notebook 1============")
with open(
"notebooks/sdk_only/1. Develop a feature set and register with managed feature store.py"
) as f:
exec(f.read())

## Enable test for notebook 1 first
# print("=======Test Notebook 2============")
# with open(
# "notebooks/sdk_only/2. Enable materialization and backfill feature data.py"
# ) as f:
# exec(f.read())

# print("=======Test Notebook 3============")
# with open("notebooks/sdk_only/3. Experiment and train models using features.py") as f:
# exec(f.read())

# print("=======Test Notebook 3============")
# with open(
# "notebooks/sdk_only/4. Enable recurrent materialization and run batch inference.py"
# ) as f:
# exec(f.read())
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
"\n",
"# Please update your alias belpw (or any custom directory you uploaded the samples to).\n",
"# You can find the name from the directory structure in the left nav\n",
"root_dir = \"./Users/<your user alias>/featurestore_sample\"\n",
"root_dir = \"./Users/<your_user_alias>/featurestore_sample\"\n",
"\n",
"if os.path.isdir(root_dir):\n",
" print(\"The folder exists.\")\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
"\n",
"# please update the dir to ./Users/{your-alias} (or any custom directory you uploaded the samples to).\n",
"# You can find the name from the directory structure inm the left nav\n",
"root_dir = \"./Users/<your user alias>/featurestore_sample\"\n",
"root_dir = \"./Users/<your_user_alias>/featurestore_sample\"\n",
"\n",
"if os.path.isdir(root_dir):\n",
" print(\"The folder exists.\")\n",
Expand Down Expand Up @@ -419,7 +419,7 @@
"# storage\n",
"storage_subscription_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"storage_resource_group_name = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"storage_account_name = \"fstorestorage\"\n",
"storage_account_name = \"<FEATURE_STORAGE_ACCOUNT_NAME>\"\n",
"storage_location = ws_location\n",
"storage_file_system_name = \"offlinestore\""
]
Expand Down Expand Up @@ -759,7 +759,7 @@
"outputs": [],
"source": [
"# This utility function is created for ease of use in the docs tutorials. It uses standard azure API's. You can optionally inspect it `featurestore/setup/setup_storage_uai.py`\n",
"your_aad_objectid = \"<your_aad_objectId>\"\n",
"your_aad_objectid = \"<USER_AAD_OBJECTID>\"\n",
"\n",
"grant_user_aad_storage_data_reader_role(\n",
" AzureMLOnBehalfOfCredential(),\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
"\n",
"# please update the dir to ./Users/{your-alias} (or any custom directory you uploaded the samples to).\n",
"# You can find the name from the directory structure inm the left nav\n",
"root_dir = \"./Users/<your user alias>/featurestore_sample\"\n",
"root_dir = \"./Users/<your_user_alias>/featurestore_sample\"\n",
"\n",
"if os.path.isdir(root_dir):\n",
" print(\"The folder exists.\")\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
"\n",
"# please update the dir to ./Users/{your-alias} (or any custom directory you uploaded the samples to).\n",
"# You can find the name from the directory structure inm the left nav\n",
"root_dir = \"./Users/<your user alias>/featurestore_sample\"\n",
"root_dir = \"./Users/<your_user_alias>/featurestore_sample\"\n",
"\n",
"if os.path.isdir(root_dir):\n",
" print(\"The folder exists.\")\n",
Expand Down
36 changes: 36 additions & 0 deletions sdk/python/featurestore_sample/setup-resources.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
pip install --upgrade jupytext

# <create_variables>
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
LOCATION=$(az ml workspace show --query location -o tsv)
RESOURCE_GROUP=$(az group show --query name -o tsv)
AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv)
OUTPUT_COMMAND="print"
FEATURE_STORAGE_ACCOUNT_NAME=${RESOURCE_GROUP}fs
USER_ID="36b5b70a-a2b2-45e6-a496-df3c2ffde085"

# </create_variables>

# <convert_notebook_to_py>
NOTEBOOK_1="notebooks/sdk_only/1. Develop a feature set and register with managed feature store"
NOTEBOOK_2="notebooks/sdk_only/2. Enable materialization and backfill feature data"
NOTEBOOK_3="notebooks/sdk_only/3. Experiment and train models using features"
NOTEBOOK_4="notebooks/sdk_only/4. Enable recurrent materialization and run batch inference"
jupytext --to py "${NOTEBOOK_1}.ipynb"
jupytext --to py "${NOTEBOOK_2}.ipynb"
jupytext --to py "${NOTEBOOK_3}.ipynb"
jupytext --to py "${NOTEBOOK_4}.ipynb"
# <convert_notebook_to_py>

#<replace_template_values>
sed -i "s/<SUBSCRIPTION_ID>/$SUBSCRIPTION_ID/g;
s/<RESOURCE_GROUP>/$RESOURCE_GROUP/g;
s/<AML_WORKSPACE_NAME>/$AML_WORKSPACE_NAME/g;" $1

#<replace_template_values>
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;" "${NOTEBOOK_1}.py"
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;
s/<FEATURE_STORAGE_ACCOUNT_NAME>/$FEATURE_STORAGE_ACCOUNT_NAME/g;
s/<USER_AAD_OBJECTID>/$USER_ID/g;" "${NOTEBOOK_2}.py"
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;" "${NOTEBOOK_3}.py"
sed -i "s/display/$OUTPUT_COMMAND/g;s/.\/Users\/<your_user_alias>\/featurestore_sample/.\//g;" "${NOTEBOOK_4}.py"
88 changes: 88 additions & 0 deletions sdk/python/featurestore_sample/test_featurestore_sdk_samples.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use a serverless Spark compute"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"You should have an attached Synapse Spark pool available in your workspace. Please see documentation page: [Attach and manage a Synapse Spark pool in Azure Machine Learning (preview)](https://learn.microsoft.com/azure/machine-learning/how-to-manage-synapse-spark-pool) for more details.\n",
"\n",
"**Note** - To ensure successful execution of Spark job, the identity being used for the Spark job should be assigned **Contributor** and **Storage Blob Data Contributor** roles on the Azure storage account used for data input and output."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azure.ai.ml import MLClient, spark, Input, Output\n",
"from azure.identity import DefaultAzureCredential\n",
"from azure.ai.ml.entities import Environment\n",
"\n",
"subscription_id = \"<SUBSCRIPTION_ID>\"\n",
"resource_group = \"<RESOURCE_GROUP>\"\n",
"workspace = \"<AML_WORKSPACE_NAME>\"\n",
"ml_client = MLClient(\n",
" DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spark_job = spark(\n",
" display_name=\"featurestore_sample_test\",\n",
" code=\"./\",\n",
" entry={\"file\": \"featurestore_sdk_job.py\"},\n",
" driver_cores=1,\n",
" driver_memory=\"1g\",\n",
" executor_cores=1,\n",
" executor_memory=\"1g\",\n",
" executor_instances=1,\n",
" resources={\n",
" \"instance_type\": \"Standard_E8S_V3\",\n",
" \"runtime_version\": \"3.2.0\",\n",
" },\n",
" environment=Environment(conda_file=\"project/env/conda.yml\"),\n",
")\n",
"\n",
"returned_spark_job = ml_client.jobs.create_or_update(spark_job)\n",
"\n",
"print(returned_spark_job.id)\n",
"# Wait until the job completes\n",
"ml_client.jobs.stream(returned_spark_job.name)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10 - SDK V2",
"language": "python",
"name": "python310-sdkv2"
},
"language_info": {
"name": "python",
"version": "3.7.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "6aeff17a1aa7735c2f7cb3a6d691fe1b4d4c3b8d2d650f644ad0f24e1b8e3f3f"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
13 changes: 13 additions & 0 deletions sdk/python/readme.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def write_notebook_workflow(
"assets-component" in classification
)
is_spark_notebook_sample = ("jobs-spark" in classification) or ("_spark_" in name)
is_featurestore_sample = "featurestore_sample" in classification
creds = "${{secrets.AZUREML_CREDENTIALS}}"
# Duplicate name in working directory during checkout
# https://github.com/actions/checkout/issues/739
Expand Down Expand Up @@ -278,6 +279,8 @@ def write_notebook_workflow(
continue-on-error: true\n"""
if is_spark_notebook_sample:
workflow_yaml += get_spark_config_workflow(posix_folder, name)
if is_featurestore_sample:
workflow_yaml += get_featurestore_config_workflow(posix_folder, name)
workflow_yaml += f""" - name: run {posix_notebook}
run: |
source "{github_workspace}/infra/bootstrapping/sdk_helpers.sh";
Expand Down Expand Up @@ -468,6 +471,16 @@ def get_spark_config_workflow(folder_name, file_name):
return workflow


def get_featurestore_config_workflow(folder_name, file_name):
workflow = f""" - name: setup feature-store resources
run: |
bash -x setup-resources.sh {file_name}.ipynb
working-directory: sdk/python/featurestore_sample
continue-on-error: true\n"""

return workflow


@contextlib.contextmanager
def change_working_dir(path):
"""Context manager for changing the current working directory"""
Expand Down

0 comments on commit a3be24d

Please sign in to comment.