Skip to content

Commit

Permalink
Batch inference sample scripts for foundation models (#2367)
Browse files Browse the repository at this point in the history
* fill-mask, qna, summarization

* Add all tasks

* black formatting

* Make delete compute step optional

* Fix wording

---------

Co-authored-by: Sumadhva Sridhar <[email protected]>
  • Loading branch information
sumadhva30 and sumadhva30 authored Jul 26, 2023
1 parent a484911 commit 33394b7
Show file tree
Hide file tree
Showing 17 changed files with 925 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
set -x
# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-asr

# script inputs
registry_name="azureml"
subscription_id="<SUBSCRIPTION_ID>"
resource_group_name="<RESOURCE_GROUP>"
workspace_name="<WORKSPACE_NAME>"

# This is the model from system registry that needs to be deployed
model_name="openai-whisper-large"

# Validate the existence of the model in the registry and get the latest version
model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
if [[ ${model_list} == *"[]"* ]]; then
echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
fi
version_temp=${model_list#*\"version\": \"}
model_version=${version_temp%%\"*}

version=$(date +%s)
endpoint_name="asr-$version"
job_name="asr-job-$version"

# todo: fetch compute_sku from the min_inference_sku tag of the model
compute_sku="Standard_DS5_v2"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
exit 1
fi

az account set -s $subscription_id
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"

# 2. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name
then
echo "Model $model_name:$model_version does not exist in registry $registry_name"
exit 1
fi

# Prepare the input data for the batch endpoint
inputs_dir="./input"
wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/librispeech-dataset/batch/batch_input.csv -P $inputs_dir || {
echo "prepare batch inputs failed"; exit 1;
}

# Create an AML compute for the batch deployment
az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
echo "compute create failed"; exit 1;
}

# 3. Deploy the model to an endpoint
# create batch endpoint
az ml batch-endpoint create --name $endpoint_name $workspace_info || {
echo "endpoint create failed"; exit 1;
}

# deploy model from registry to endpoint in workspace
az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
echo "deployment create failed"; exit 1;
}

# 4. Invoke a job on the batch endpoint
invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
echo "endpoint invoke failed"; exit 1;
}
invoke_temp=${invoke_output#*\"name\": \"}
job_name=${invoke_temp%%\"*}

# 5. Stream the job logs
az ml job stream --name $job_name $workspace_info || {
echo "job stream-logs failed"; exit 1;
}

# 6. Download the job output
az ml job download --name $job_name --download-path ./output $workspace_info || {
echo "job output download failed"; exit 1;
}

# 5. Delete the endpoint
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
echo "endpoint delete failed"; exit 1;
}

# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
# az ml compute delete --name cpu-cluster $workspace_info --yes || {
# echo "compute delete failed"; exit 1;
# }

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: demo
compute: cpu-cluster
resources:
instance_count: 1
error_threshold: 0
logging_level: info
max_concurrency_per_instance: 2
mini_batch_size: 10
output_file_name: predictions.csv
retry_settings:
max_retries: 3
timeout: 300
13 changes: 13 additions & 0 deletions cli/foundation-models/system/inference/fill-mask/batch-deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: demo
compute: cpu-cluster
resources:
instance_count: 1
error_threshold: 0
logging_level: info
max_concurrency_per_instance: 2
mini_batch_size: 10
output_file_name: predictions.csv
retry_settings:
max_retries: 3
timeout: 300
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
set -x
# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-fill-mask

# script inputs
registry_name="azureml"
subscription_id="<SUBSCRIPTION_ID>"
resource_group_name="<RESOURCE_GROUP>"
workspace_name="<WORKSPACE_NAME>"

# This is the model from system registry that needs to be deployed
model_name="bert-base-uncased"

# Validate the existence of the model in the registry and get the latest version
model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
if [[ ${model_list} == *"[]"* ]]; then
echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
fi
version_temp=${model_list#*\"version\": \"}
model_version=${version_temp%%\"*}

version=$(date +%s)
endpoint_name="fill-mask-$version"
job_name="fill-mask-job-$version"

# todo: fetch compute_sku from the min_inference_sku tag of the model
compute_sku="Standard_DS3_v2"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
exit 1
fi

az account set -s $subscription_id
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"

# 2. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name
then
echo "Model $model_name:$model_version does not exist in registry $registry_name"
exit 1
fi

# Prepare the input data for the batch endpoint
inputs_dir="./batch/inputs"
python prepare-batch-dataset.py --model_name $model_name || {
echo "prepare batch inputs failed"; exit 1;
}

# Create an AML compute for the batch deployment
az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
echo "compute create failed"; exit 1;
}

# 3. Deploy the model to an endpoint
# create batch endpoint
az ml batch-endpoint create --name $endpoint_name $workspace_info || {
echo "endpoint create failed"; exit 1;
}

# deploy model from registry to endpoint in workspace
az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
echo "deployment create failed"; exit 1;
}

# 4. Invoke a job on the batch endpoint
invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
echo "endpoint invoke failed"; exit 1;
}
invoke_temp=${invoke_output#*\"name\": \"}
job_name=${invoke_temp%%\"*}

# 5. Stream the job logs
az ml job stream --name $job_name $workspace_info || {
echo "job stream-logs failed"; exit 1;
}

# 6. Download the job output
az ml job download --name $job_name --download-path ./output $workspace_info || {
echo "job output download failed"; exit 1;
}

# 5. Delete the endpoint
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
echo "endpoint delete failed"; exit 1;
}

# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
# az ml compute delete --name cpu-cluster $workspace_info --yes || {
# echo "compute delete failed"; exit 1;
# }

Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import csv
import json
import random
import urllib
import argparse
import datasets
import pandas as pd

# Get the model name from argument
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="bert-base-uncased")
args = parser.parse_args()

# Define directories and filenames as variables
dataset_dir = "dataset"
test_datafile = "test_100.csv"

batch_dir = "batch"
batch_inputs_dir = os.path.join(batch_dir, "inputs")
batch_input_file = "batch_input.csv"
os.makedirs(dataset_dir, exist_ok=True)
os.makedirs(batch_dir, exist_ok=True)
os.makedirs(batch_inputs_dir, exist_ok=True)

testdata = datasets.load_dataset("bookcorpus", split="train", streaming=True)

test_df = pd.DataFrame(data=testdata.take(100))

# Get the right mask token from huggingface
with urllib.request.urlopen(
f"https://huggingface.co/api/models/{args.model_name}"
) as url:
data = json.load(url)
mask_token = data["mask_token"]

# Take the value of the "text" column, replace a random word with the mask token, and save the result in the "masked_text" column
test_df["masked_text"] = test_df["text"].apply(
lambda x: x.replace(random.choice(x.split()), mask_token, 1)
)

# Save the test_df dataframe to a csv file in the ./bookcorpus-dataset folder
test_df.to_csv(os.path.join(".", dataset_dir, test_datafile), index=False)

batch_df = test_df[["masked_text"]].rename(columns={"masked_text": "input_string"})

# Divide this into files of 10 rows each
batch_size_per_predict = 10
for i in range(0, len(batch_df), batch_size_per_predict):
j = i + batch_size_per_predict
batch_df[i:j].to_csv(
os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: demo
compute: cpu-cluster
resources:
instance_count: 1
error_threshold: 0
logging_level: info
max_concurrency_per_instance: 2
mini_batch_size: 10
output_file_name: predictions.csv
retry_settings:
max_retries: 3
timeout: 300
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
set -x
# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-question-answering

# script inputs
registry_name="azureml"
subscription_id="<SUBSCRIPTION_ID>"
resource_group_name="<RESOURCE_GROUP>"
workspace_name="<WORKSPACE_NAME>"

# This is the model from system registry that needs to be deployed
model_name="deepset-minilm-uncased-squad2"

# Validate the existence of the model in the registry and get the latest version
model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
if [[ ${model_list} == *"[]"* ]]; then
echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
fi
version_temp=${model_list#*\"version\": \"}
model_version=${version_temp%%\"*}

version=$(date +%s)
endpoint_name="question-answering-$version"
job_name="question-answering-job-$version"

# todo: fetch compute_sku from the min_inference_sku tag of the model
compute_sku="Standard_DS3_v2"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
[ "$workspace_name" = "<WORKSPACE_NAME>" ]; then
echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
exit 1
fi

az account set -s $subscription_id
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"

# 2. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name
then
echo "Model $model_name:$model_version does not exist in registry $registry_name"
exit 1
fi

# Prepare the input data for the batch endpoint
inputs_dir="./input"
wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/squad-dataset/batch/batch_input.csv -P $inputs_dir || {
echo "prepare batch inputs failed"; exit 1;
}

# Create an AML compute for the batch deployment
az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
echo "compute create failed"; exit 1;
}

# 3. Deploy the model to an endpoint
# create batch endpoint
az ml batch-endpoint create --name $endpoint_name $workspace_info || {
echo "endpoint create failed"; exit 1;
}

# deploy model from registry to endpoint in workspace
az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
echo "deployment create failed"; exit 1;
}

# 4. Invoke a job on the batch endpoint
invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
echo "endpoint invoke failed"; exit 1;
}
invoke_temp=${invoke_output#*\"name\": \"}
job_name=${invoke_temp%%\"*}

# 5. Stream the job logs
az ml job stream --name $job_name $workspace_info || {
echo "job stream-logs failed"; exit 1;
}

# 6. Download the job output
az ml job download --name $job_name --download-path ./output $workspace_info || {
echo "job output download failed"; exit 1;
}

# 5. Delete the endpoint
az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
echo "endpoint delete failed"; exit 1;
}

# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
# az ml compute delete --name cpu-cluster $workspace_info --yes || {
# echo "compute delete failed"; exit 1;
# }

Loading

0 comments on commit 33394b7

Please sign in to comment.