From 33394b700e58f9305c2103e7b2ea92176d03cc01 Mon Sep 17 00:00:00 2001
From: Sumadhva Sridhar <109793745+sumadhva30@users.noreply.github.com>
Date: Wed, 26 Jul 2023 13:48:05 +0530
Subject: [PATCH] Batch inference sample scripts for foundation models (#2367)

* fill-mask, qna, summarization

* Add all tasks

* black formatting

* Make delete compute step optional

* Fix wording

---------

Co-authored-by: Sumadhva Sridhar <109793745+susridhar@users.noreply.github.com>
---
 .../asr-batch-endpoint.sh                     | 96 +++++++++++++++++++
 .../batch-deploy.yml                          | 13 +++
 .../inference/fill-mask/batch-deploy.yml      | 13 +++
 .../fill-mask/fill-mask-batch-endpoint.sh     | 96 +++++++++++++++++++
 .../fill-mask/prepare-batch-dataset.py        | 53 ++++++++++
 .../question-answering/batch-deploy.yml       | 13 +++
 .../question-answering-batch-endpoint.sh      | 96 +++++++++++++++++++
 .../inference/summarization/batch-deploy.yml  | 13 +++
 .../summarization-batch-endpoint.sh           | 96 +++++++++++++++++++
 .../text-classification/batch-deploy.yml      | 13 +++
 .../text-classification-batch-endpoint.sh     | 96 +++++++++++++++++++
 .../text-generation/batch-deploy.yml          | 13 +++
 .../text-generation-batch-endpoint.sh         | 96 +++++++++++++++++++
 .../token-classification/batch-deploy.yml     | 13 +++
 .../token-classification-batch-endpoint.sh    | 96 +++++++++++++++++++
 .../inference/translation/batch-deploy.yml    | 13 +++
 .../translation/translation-batch-endpoint.sh | 96 +++++++++++++++++++
 17 files changed, 925 insertions(+)
 create mode 100644 cli/foundation-models/system/inference/automatic-speech-recognition/asr-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/automatic-speech-recognition/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/fill-mask/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/fill-mask/prepare-batch-dataset.py
 create mode 100644 cli/foundation-models/system/inference/question-answering/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/summarization/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/summarization/summarization-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/text-classification/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/text-classification/text-classification-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/text-generation/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/token-classification/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.sh
 create mode 100644 cli/foundation-models/system/inference/translation/batch-deploy.yml
 create mode 100644 cli/foundation-models/system/inference/translation/translation-batch-endpoint.sh

diff --git a/cli/foundation-models/system/inference/automatic-speech-recognition/asr-batch-endpoint.sh b/cli/foundation-models/system/inference/automatic-speech-recognition/asr-batch-endpoint.sh
new file mode 100644
index 00000000000..e121f839bee
--- /dev/null
+++ b/cli/foundation-models/system/inference/automatic-speech-recognition/asr-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-asr
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="openai-whisper-large"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="asr-$version"
+job_name="asr-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS5_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/librispeech-dataset/batch/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/automatic-speech-recognition/batch-deploy.yml b/cli/foundation-models/system/inference/automatic-speech-recognition/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/automatic-speech-recognition/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/fill-mask/batch-deploy.yml b/cli/foundation-models/system/inference/fill-mask/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/fill-mask/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.sh b/cli/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.sh
new file mode 100644
index 00000000000..217039be0c9
--- /dev/null
+++ b/cli/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-fill-mask
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="bert-base-uncased"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="fill-mask-$version"
+job_name="fill-mask-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS3_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./batch/inputs"
+python prepare-batch-dataset.py --model_name $model_name || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/fill-mask/prepare-batch-dataset.py b/cli/foundation-models/system/inference/fill-mask/prepare-batch-dataset.py
new file mode 100644
index 00000000000..2131700967a
--- /dev/null
+++ b/cli/foundation-models/system/inference/fill-mask/prepare-batch-dataset.py
@@ -0,0 +1,53 @@
+import os
+import csv
+import json
+import random
+import urllib
+import argparse
+import datasets
+import pandas as pd
+
+# Get the model name from argument
+parser = argparse.ArgumentParser()
+parser.add_argument("--model_name", type=str, default="bert-base-uncased")
+args = parser.parse_args()
+
+# Define directories and filenames as variables
+dataset_dir = "dataset"
+test_datafile = "test_100.csv"
+
+batch_dir = "batch"
+batch_inputs_dir = os.path.join(batch_dir, "inputs")
+batch_input_file = "batch_input.csv"
+os.makedirs(dataset_dir, exist_ok=True)
+os.makedirs(batch_dir, exist_ok=True)
+os.makedirs(batch_inputs_dir, exist_ok=True)
+
+testdata = datasets.load_dataset("bookcorpus", split="train", streaming=True)
+
+test_df = pd.DataFrame(data=testdata.take(100))
+
+# Get the right mask token from huggingface
+with urllib.request.urlopen(
+    f"https://huggingface.co/api/models/{args.model_name}"
+) as url:
+    data = json.load(url)
+    mask_token = data["mask_token"]
+
+# Take the value of the "text" column, replace a random word with the mask token, and save the result in the "masked_text" column
+test_df["masked_text"] = test_df["text"].apply(
+    lambda x: x.replace(random.choice(x.split()), mask_token, 1)
+)
+
+# Save the test_df dataframe to a csv file in the ./bookcorpus-dataset folder
+test_df.to_csv(os.path.join(".", dataset_dir, test_datafile), index=False)
+
+batch_df = test_df[["masked_text"]].rename(columns={"masked_text": "input_string"})
+
+# Divide this into files of 10 rows each
+batch_size_per_predict = 10
+for i in range(0, len(batch_df), batch_size_per_predict):
+    j = i + batch_size_per_predict
+    batch_df[i:j].to_csv(
+        os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL
+    )
diff --git a/cli/foundation-models/system/inference/question-answering/batch-deploy.yml b/cli/foundation-models/system/inference/question-answering/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/question-answering/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.sh b/cli/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.sh
new file mode 100644
index 00000000000..138210f5209
--- /dev/null
+++ b/cli/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-question-answering
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="deepset-minilm-uncased-squad2"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="question-answering-$version"
+job_name="question-answering-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS3_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/squad-dataset/batch/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/summarization/batch-deploy.yml b/cli/foundation-models/system/inference/summarization/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/summarization/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/summarization/summarization-batch-endpoint.sh b/cli/foundation-models/system/inference/summarization/summarization-batch-endpoint.sh
new file mode 100644
index 00000000000..b6673410641
--- /dev/null
+++ b/cli/foundation-models/system/inference/summarization/summarization-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-summarization
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="sshleifer-distilbart-cnn-12-6"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="summarization-$version"
+job_name="summarization-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS5_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/news-dataset/batch/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/text-classification/batch-deploy.yml b/cli/foundation-models/system/inference/text-classification/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-classification/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/text-classification/text-classification-batch-endpoint.sh b/cli/foundation-models/system/inference/text-classification/text-classification-batch-endpoint.sh
new file mode 100644
index 00000000000..a1e31a8bb6c
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-classification/text-classification-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-text-classification
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="microsoft-deberta-base-mnli"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="text-classification-$version"
+job_name="text-classification-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS3_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/glue-mnli-dataset/batch/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/text-generation/batch-deploy.yml b/cli/foundation-models/system/inference/text-generation/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-generation/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.sh b/cli/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.sh
new file mode 100644
index 00000000000..0d46a1c8ee9
--- /dev/null
+++ b/cli/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-text-generation
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="gpt2"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="text-generation-$version"
+job_name="text-generation-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS3_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/bookcorpus-dataset/batch_generation/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/token-classification/batch-deploy.yml b/cli/foundation-models/system/inference/token-classification/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/token-classification/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.sh b/cli/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.sh
new file mode 100644
index 00000000000..c35b17412fd
--- /dev/null
+++ b/cli/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-token-classification
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="Jean-Baptiste-camembert-ner"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="token-classification-$version"
+job_name="token-classification-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS3_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/Jean-Baptiste-wikiner_fr/batch/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+
diff --git a/cli/foundation-models/system/inference/translation/batch-deploy.yml b/cli/foundation-models/system/inference/translation/batch-deploy.yml
new file mode 100644
index 00000000000..2f66fbc630d
--- /dev/null
+++ b/cli/foundation-models/system/inference/translation/batch-deploy.yml
@@ -0,0 +1,13 @@
+$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
+name: demo
+compute: cpu-cluster
+resources:
+  instance_count: 1
+error_threshold: 0
+logging_level: info
+max_concurrency_per_instance: 2
+mini_batch_size: 10
+output_file_name: predictions.csv
+retry_settings:
+  max_retries: 3
+  timeout: 300
\ No newline at end of file
diff --git a/cli/foundation-models/system/inference/translation/translation-batch-endpoint.sh b/cli/foundation-models/system/inference/translation/translation-batch-endpoint.sh
new file mode 100644
index 00000000000..105cb829ca1
--- /dev/null
+++ b/cli/foundation-models/system/inference/translation/translation-batch-endpoint.sh
@@ -0,0 +1,96 @@
+set -x
+# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-batch-sdk-translation
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+# This is the model from system registry that needs to be deployed
+model_name="t5-small"
+
+# Validate the existence of the model in the registry and get the latest version
+model_list=$(az ml model list --name ${model_name} --registry-name ${registry_name} 2>&1)
+if [[ ${model_list} == *"[]"* ]]; then
+    echo "Model doesn't exist in registry. Check the model list and try again."; exit 1;
+fi
+version_temp=${model_list#*\"version\": \"}
+model_version=${version_temp%%\"*}
+
+version=$(date +%s)
+endpoint_name="translation-$version"
+job_name="translation-job-$version"
+
+# todo: fetch compute_sku from the min_inference_sku tag of the model
+compute_sku="Standard_DS3_v2"
+
+# 1. Setup pre-requisites
+if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
+   ["$resource_group_name" = "<RESOURCE_GROUP>" ] || \
+   [ "$workspace_name" = "<WORKSPACE_NAME>" ]; then 
+    echo "Please update the script with the subscription_id, resource_group_name and workspace_name"
+    exit 1
+fi
+
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# 2. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name 
+then
+    echo "Model $model_name:$model_version does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Prepare the input data for the batch endpoint
+inputs_dir="./input"
+wget https://foundationmodelsamples.blob.core.windows.net/batch-inference-datasets/wmt16-de-en-dataset/batch/batch_input.csv -P $inputs_dir || {
+    echo "prepare batch inputs failed"; exit 1;
+}
+
+# Create an AML compute for the batch deployment
+az ml compute create --name cpu-cluster --type AmlCompute --min-instances 0 --max-instances 3 --size $compute_sku $workspace_info || {
+    echo "compute create failed"; exit 1;
+}
+
+# 3. Deploy the model to an endpoint
+# create batch endpoint 
+az ml batch-endpoint create --name $endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# deploy model from registry to endpoint in workspace
+az ml batch-deployment create --file batch-deploy.yml --set-default $workspace_info --set \
+  endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version || {
+    echo "deployment create failed"; exit 1;
+}
+
+# 4. Invoke a job on the batch endpoint
+invoke_output=$(az ml batch-endpoint invoke --name $endpoint_name --input $inputs_dir $workspace_info 2>&1) || {
+    echo "endpoint invoke failed"; exit 1;
+}
+invoke_temp=${invoke_output#*\"name\": \"}
+job_name=${invoke_temp%%\"*}
+
+# 5. Stream the job logs
+az ml job stream --name $job_name $workspace_info || {
+    echo "job stream-logs failed"; exit 1;
+}
+
+# 6. Download the job output
+az ml job download --name $job_name --download-path ./output $workspace_info || {
+    echo "job output download failed"; exit 1;
+}
+
+# 5. Delete the endpoint
+az ml batch-endpoint delete --name $endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 6. Delete the compute cluster (Uncomment the below lines to delete the created cluster)
+# az ml compute delete --name cpu-cluster $workspace_info --yes || {
+#     echo "compute delete failed"; exit 1;
+# }
+