From b42558885227089813c85bf789fdc2b7e7615f3c Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 19:16:36 +0530 Subject: [PATCH 1/9] setting location as ncus --- ...nai-oai-v2-openai_completions_finetune.yml | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml index 6974181132..7979082d07 100644 --- a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml +++ b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml @@ -26,31 +26,32 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: check out repo - uses: actions/checkout@v2 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | + - name: check out repo + uses: actions/checkout@v2 + - name: azure login + uses: azure/login@v1 + with: + creds: ${{secrets.AZUREML_CREDENTIALS}} + - name: bootstrap resources + run: | echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; bash bootstrap.sh - working-directory: infra/bootstrapping - continue-on-error: false - - name: setup-cli - run: | + working-directory: infra/bootstrapping + continue-on-error: false + - name: setup-cli + run: | source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; bash setup.sh - working-directory: cli - continue-on-error: true - - name: run job - run: | + working-directory: cli + continue-on-error: true + - name: run job + run: | source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + export LOCATION="northcentralus"; bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; bash -x ../dataset-create.sh bash -x ../../../../run-job.sh openai_completions_finetune_pipeline_spec.yaml - working-directory: cli/foundation-models/azure_openai/oai-v2/openai_completions_finetune_pipeline + working-directory: cli/foundation-models/azure_openai/oai-v2/openai_completions_finetune_pipeline From 567a0daa93381fb115c1a76121beb19d50d0fcfa Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 19:23:52 +0530 Subject: [PATCH 2/9] set in setup-cli step --- ...n-models-azure_openai-oai-v2-openai_completions_finetune.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml index 7979082d07..b981b92d0a 100644 --- a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml +++ b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml @@ -42,6 +42,7 @@ jobs: run: | source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + export LOCATION="northcentralus"; bash setup.sh working-directory: cli continue-on-error: true @@ -49,7 +50,6 @@ jobs: run: | source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; - export LOCATION="northcentralus"; bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; bash -x ../dataset-create.sh From 863929245261e2acf2de5dd8f45e391279d90964 Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 19:34:02 +0530 Subject: [PATCH 3/9] new init and setup script for oai v2 --- ...nai-oai-v2-openai_completions_finetune.yml | 7 +- cli/setup_oai_v2.sh | 47 ++++++ .../bootstrapping/init_environment_oai_v2.sh | 147 ++++++++++++++++++ 3 files changed, 197 insertions(+), 4 deletions(-) create mode 100644 cli/setup_oai_v2.sh create mode 100644 infra/bootstrapping/init_environment_oai_v2.sh diff --git a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml index b981b92d0a..c480b31a16 100644 --- a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml +++ b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml @@ -41,15 +41,14 @@ jobs: - name: setup-cli run: | source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; - export LOCATION="northcentralus"; - bash setup.sh + source "${{ github.workspace }}/infra/bootstrapping/init_environment_oai_v2.sh"; + bash setup_oai_v2.sh working-directory: cli continue-on-error: true - name: run job run: | source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh"; + source "${{ github.workspace }}/infra/bootstrapping/init_environment_oai_v2.sh"; bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; bash -x ../dataset-create.sh diff --git a/cli/setup_oai_v2.sh b/cli/setup_oai_v2.sh new file mode 100644 index 0000000000..0f548da389 --- /dev/null +++ b/cli/setup_oai_v2.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +### If installing a release candidate: +### * Update the "$wheel_url" +### * Uncomment the following block surrounded by {} +### * Comment the ml extension install within + +# { +# wheel_url='https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-cli-v2-public/ml-2.9.0-py3-none-any.whl' +# +# az extension remove -n ml +# if ! az extension add --yes --upgrade --source "$wheel_url"; then +# +# echo "Error: Failed to install release candidate" +# exit 1 +# fi +# az version +# unset wheel_url +# } + + +# +az extension add -n ml -y +# + +## For backward compatibility - running on old subscription +# +GROUP="azureml-examples" +LOCATION="northcentralus" +WORKSPACE="main" +# + +# If RESOURCE_GROUP_NAME is empty, the az configure is pending. +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:-} +if [[ -z "$RESOURCE_GROUP_NAME" ]] +then + echo "No resource group name [RESOURCE_GROUP_NAME] specified, defaulting to ${GROUP}." + # Installing extension temporarily assuming the run is on old subscription + # without bootstrap script. + + # + az configure --defaults group=$GROUP workspace=$WORKSPACE location=$LOCATION + # + echo "Default resource group set to $GROUP" +else + echo "Workflows are using the new subscription." +fi \ No newline at end of file diff --git a/infra/bootstrapping/init_environment_oai_v2.sh b/infra/bootstrapping/init_environment_oai_v2.sh new file mode 100644 index 0000000000..b834a47f5d --- /dev/null +++ b/infra/bootstrapping/init_environment_oai_v2.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +################### +set -o errexit +set -o pipefail +set -o nounset +# set -o xtrace # For debugging + + +################### +# REQUIRED ENVIRONMENT VARIABLES: +# +# PREFIX +# SUFFIX +# DATE_ONLY + +############### + +################### +# OPTIONAL ENVIRONMENT VARIABLES: +# +# RESOURCE_GROUP_NAME +# WORKSPACE_NAME +# SUBSCRIPTION_ID +# CPU_COMPUTE_NAME +# GPU_COMPUTE_NAME + +############### + +################### +# Names of parameters +################### + +# Global variables +export MAX_RETRIES=60 +export SLEEP_SECONDS=20 + +# default values for script invocation parameter +# export RUN_DEBUG=false # -x +# export CONTINUE_ON_ERR=true # -E - true or false + +# let "DATE_ONLY=`date +'%y%m%d'`" +# let "DATE_ONLY=$(date +'%y%m%U')" +# Add 10# to the front of variables to avoid the "Value too great for base" error when value has leading zeros. +# Ref: https://stackoverflow.com/questions/21049822/value-too-great-for-base-error-token-is-09 +let "DATE_ONLY=10#$(date -d '+2 days' +'%y%m')" +let "REGISTRY_TODAY=10#$(date +'%m%d')" +let "REGISTRY_TOMORROW=10#$(date -d '+1 days' +'%m%d')" + + +export PREFIX=aml +export SUFFIX=sdkv202 +export APP_NAME="github-sp-amlsdkv2-gh-2" +export timestamp=$(date +%s) +# export RESOURCE_GROUP_NAME=test-data-rg +# export WORKSPACE_NAME=${PREFIX}${SUFFIX}${DATE_ONLY}-ws +# export SUBSCRIPTION_ID=test +# export AZURE_SERVICE_PRINCIPAL="github-sp-${PREFIX}${SUFFIX}" + +# check if the required variables are specified. + +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:-} +if [[ -z "$RESOURCE_GROUP_NAME" ]] +then + export RESOURCE_GROUP_NAME="${PREFIX}${SUFFIX}${DATE_ONLY}" + echo_warning "No resource group name [RESOURCE_GROUP_NAME] specified, defaulting to ${RESOURCE_GROUP_NAME}." +fi + +WORKSPACE_NAME=${WORKSPACE_NAME:-} +if [[ -z "$WORKSPACE_NAME" ]] +then + export WORKSPACE_NAME="${PREFIX}${SUFFIX}${DATE_ONLY}-ws" + echo_warning "No workspace name [WORKSPACE_NAME] specified, defaulting to ${WORKSPACE_NAME}." +fi + +if [[ -z "$LOCATION" ]] +then + export LOCATION="northcentralus" + echo_warning "No resource group location [LOCATION] specified, defaulting to ${LOCATION}." +fi + +REGISTRY_NAME=${REGISTRY_NAME:-} +if [[ -z "$REGISTRY_NAME" ]] +then + export REGISTRY_NAME="DemoRegistry${REGISTRY_TODAY}" + echo_warning "No registry name [REGISTRY_NAME] specified, defaulting to ${REGISTRY_NAME}." +fi +export REGISTRY_NAME_TOMORROW="DemoRegistry${REGISTRY_TOMORROW}" + +# Check if user is logged in +[[ -n $(az account show 2> /dev/null) ]] || { echo_warning "Please login via the Azure CLI."; az login; } + +# ACR name must contain only small caps +export MOE_ACR_NAME="sdk${PREFIX}${SUFFIX}${DATE_ONLY}acr" + +SUBSCRIPTION_ID=${SUBSCRIPTION_ID:-} +if [ -z "$SUBSCRIPTION_ID" ] +then + # Grab the Azure subscription ID + subscriptionId=$(az account show --output tsv --query id) + # bash substitution to strip \r + subscriptionId="${subscriptionId%%[[:cntrl:]]}" + [[ -z "${subscriptionId}" ]] && echo_warning "Not logged into Azure as expected." + export SUBSCRIPTION_ID=${subscriptionId} + echo_warning "No Azure subscription id [SUBSCRIPTION_ID] specified. Using default subscription id." +fi + +#login to azure using your credentials +az account show 1> /dev/null +if [ $? != 0 ]; +then + az login +fi + +echo_title "RESOURCE_GROUP_NAME = \"${RESOURCE_GROUP_NAME}\" & LOCATION=\"${LOCATION}\" set as defaults. " +az configure --defaults group="${RESOURCE_GROUP_NAME}" workspace="${WORKSPACE_NAME}" location="${LOCATION}" # for subsequent commands. +az account set -s "${SUBSCRIPTION_ID}" || exit 1 + +# AKS +# export AKS_CLUSTER_PREFIX="${AKS_CLUSTER_PREFIX:-amlarc-aks}" +export VM_SKU="${VM_SKU:-Standard_D4s_v3}" +export MIN_COUNT="${MIN_COUNT:-3}" +export MAX_COUNT="${MAX_COUNT:-8}" + +# Extension +export EXT_AUTO_UPGRADE='false' +export RELEASE_TRAIN="${RELEASE_TRAIN:-staging}" +export RELEASE_NAMESPACE="${RELEASE_NAMESPACE:-azureml}" +export EXTENSION_NAME="${EXTENSION_NAME:-amlarc-extension}" +export EXTENSION_TYPE="${EXTENSION_TYPE:-Microsoft.AzureML.Kubernetes}" +export EXTENSION_SETTINGS="${EXTENSION_SETTINGS:-enableTraining=True enableInference=True allowInsecureConnections=True inferenceRouterServiceType=loadBalancer}" +export CLUSTER_TYPE="${CLUSTER_TYPE:-connectedClusters}" # or managedClusters + + +# ARC Compute +# export WORKSPACE="${WORKSPACE:-amlarc-githubtest-ws}" # $((1 + $RANDOM % 100)) +export ARC_CLUSTER_NAME="${ARC_CLUSTER_NAME:-amlarc-inference}" +export ARC_COMPUTE_NAME="${ARC_COMPUTE_NAME:-inferencecompute}" +export INSTANCE_TYPE_NAME="${INSTANCE_TYPE_NAME:-defaultinstancetype}" +export CPU="${CPU:-1}" +export MEMORY="${MEMORY:-4Gi}" +export GPU="${GPU:-null}" +export CPU_INSTANCE_TYPE="2 4Gi" +export GPU_INSTANCE_TYPE="4 40Gi 2" + +export VNET_CIDR="${VNET_CIDR:-10.0.0.0/8}" +export MASTER_SUBNET="${MASTER_SUBNET:-10.0.0.0/23}" \ No newline at end of file From c643baec87d892438ec585384dd4502388503578 Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 19:43:49 +0530 Subject: [PATCH 4/9] correcting syntax for init sh --- infra/bootstrapping/init_environment_oai_v2.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/bootstrapping/init_environment_oai_v2.sh b/infra/bootstrapping/init_environment_oai_v2.sh index b834a47f5d..41b7fa24bf 100644 --- a/infra/bootstrapping/init_environment_oai_v2.sh +++ b/infra/bootstrapping/init_environment_oai_v2.sh @@ -73,6 +73,7 @@ then echo_warning "No workspace name [WORKSPACE_NAME] specified, defaulting to ${WORKSPACE_NAME}." fi +LOCATION=${LOCATION:-} if [[ -z "$LOCATION" ]] then export LOCATION="northcentralus" From b209feec320f4e6f6cfe97ebb6172f485e2081bd Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 19:54:42 +0530 Subject: [PATCH 5/9] fix init oai v2 script --- infra/bootstrapping/init_environment_oai_v2.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/infra/bootstrapping/init_environment_oai_v2.sh b/infra/bootstrapping/init_environment_oai_v2.sh index 41b7fa24bf..3592afa234 100644 --- a/infra/bootstrapping/init_environment_oai_v2.sh +++ b/infra/bootstrapping/init_environment_oai_v2.sh @@ -73,12 +73,12 @@ then echo_warning "No workspace name [WORKSPACE_NAME] specified, defaulting to ${WORKSPACE_NAME}." fi -LOCATION=${LOCATION:-} -if [[ -z "$LOCATION" ]] -then - export LOCATION="northcentralus" - echo_warning "No resource group location [LOCATION] specified, defaulting to ${LOCATION}." -fi +# LOCATION=${LOCATION:-} +# if [[ -z "$LOCATION" ]] +# then +export LOCATION="northcentralus" +echo_warning "No resource group location [LOCATION] specified, defaulting to ${LOCATION}." +# fi REGISTRY_NAME=${REGISTRY_NAME:-} if [[ -z "$REGISTRY_NAME" ]] From 5cd36fef1dd7d846b7687e761ff05623cfa18590 Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 22:38:39 +0530 Subject: [PATCH 6/9] fix --- infra/bootstrapping/init_environment_oai_v2.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/infra/bootstrapping/init_environment_oai_v2.sh b/infra/bootstrapping/init_environment_oai_v2.sh index 3592afa234..1e44c55f0b 100644 --- a/infra/bootstrapping/init_environment_oai_v2.sh +++ b/infra/bootstrapping/init_environment_oai_v2.sh @@ -73,12 +73,8 @@ then echo_warning "No workspace name [WORKSPACE_NAME] specified, defaulting to ${WORKSPACE_NAME}." fi -# LOCATION=${LOCATION:-} -# if [[ -z "$LOCATION" ]] -# then -export LOCATION="northcentralus" +LOCATION=${LOCATION:-"northcentralus"} echo_warning "No resource group location [LOCATION] specified, defaulting to ${LOCATION}." -# fi REGISTRY_NAME=${REGISTRY_NAME:-} if [[ -z "$REGISTRY_NAME" ]] From 75c4f618ed4f46ea066e61e9443756a2d7c4d350 Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Mon, 9 Oct 2023 22:59:26 +0530 Subject: [PATCH 7/9] fix 1 --- ...nai-oai-v2-openai_completions_finetune.yml | 2 +- infra/bootstrapping/bootstrap_oai_v2.sh | 166 ++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 infra/bootstrapping/bootstrap_oai_v2.sh diff --git a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml index c480b31a16..2f45cea76b 100644 --- a/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml +++ b/.github/workflows/cli-foundation-models-azure_openai-oai-v2-openai_completions_finetune.yml @@ -35,7 +35,7 @@ jobs: - name: bootstrap resources run: | echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh + bash bootstrap_oai_v2.sh working-directory: infra/bootstrapping continue-on-error: false - name: setup-cli diff --git a/infra/bootstrapping/bootstrap_oai_v2.sh b/infra/bootstrapping/bootstrap_oai_v2.sh new file mode 100644 index 0000000000..e096ddfa05 --- /dev/null +++ b/infra/bootstrapping/bootstrap_oai_v2.sh @@ -0,0 +1,166 @@ +#!/bin/bash +# set -xe +# Strict mode, fail on any error +set -euo pipefail + +set -o errexit +set -o pipefail +set -o nounset +# set -o xtrace # For debugging + +# set -Eeuo pipefail # https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ +# set -o xtrace # For debugging + +### Usage bash ./infra/bootstrapping/bootstrap.sh +### Bootstrapping script that creates Resource group and Workspace +### This assumes you have performed az login and have sufficient permissions + +# The filename of this script for help messages +SCRIPT_PATH="${BASH_SOURCE[0]:-$0}" +SCRIPT_DIR="$( cd "$( dirname "${SCRIPT_PATH}" )" && pwd )" + +################### +# REQUIRED ENVIRONMENT VARIABLES: +# +# RESOURCE_GROUP_NAME +# WORKSPACE_NAME +# LOCATION +# SUBSCRIPTION_ID + +############### + +# update directory with full permissions +if [ -d "$SCRIPT_DIR" ]; then + sudo chmod -R 777 "$SCRIPT_DIR" +fi + +if [ -f "$SCRIPT_DIR"/sdk_helpers.sh ]; then + source "$SCRIPT_DIR"/sdk_helpers.sh; +else + echo "---------------------------------------------------------" + echo -e "ERROR: sdk_helpers.sh not found." + echo "---------------------------------------------------------" +fi + +if [ -f "$SCRIPT_DIR"/init_environment_oai_v2.sh ]; then + source "$SCRIPT_DIR"/init_environment_oai_v2.sh; +else + echo "---------------------------------------------------------" + echo -e "ERROR: init_environment_oai_v2.sh not found." + echo "---------------------------------------------------------" +fi + +echo_title "Installing tools" +"$SCRIPT_DIR"/sdk_helpers.sh install_tools + +################### +# validate dependencies if the required utilities are installed +################### + +"$SCRIPT_DIR"/sdk_helpers.sh validate_tool az || exit 1 +"$SCRIPT_DIR"/sdk_helpers.sh validate_tool sed || exit 1 + +#login to azure using your credentials +az account show 1> /dev/null +if [[ $? != 0 ]]; +then + az login +fi + +echo_title "RESOURCE_GROUP_NAME = \"${RESOURCE_GROUP_NAME}\" & LOCATION=\"${LOCATION}\" set as defaults. " +az configure --defaults group="${RESOURCE_GROUP_NAME}" workspace="${WORKSPACE_NAME}" location="${LOCATION}" # for subsequent commands. +az account set -s "${SUBSCRIPTION_ID}" || exit 1 + + +# RUN_BOOTSTRAP=1 +if [[ ! -z "${RUN_BOOTSTRAP:-}" ]]; then + + echo_title "Ensuring Resource group" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_resourcegroup + echo_title "Ensuring Workspace" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_ml_workspace "${WORKSPACE_NAME}" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_ml_workspace "mlw-mevnet" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_vnet "vnet-mevnet" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_subnet "vnet-mevnet" "snet-scoring" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_identity "uaimevnet" + "$SCRIPT_DIR"/sdk_helpers.sh grant_permission_identity_on_acr "uaimevnet" + + echo_title "Ensuring Permissions on RG" + "$SCRIPT_DIR"/sdk_helpers.sh grant_permission_app_id_on_rg "${APP_NAME}" + + echo_title "Ensuring Registry ${REGISTRY_NAME}" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_registry "${REGISTRY_NAME}" + echo_title "Ensuring Registry of tomorrow ${REGISTRY_NAME_TOMORROW}" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_registry "${REGISTRY_NAME_TOMORROW}" + + echo_title "Ensuring CPU compute" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "cpu-cluster" 0 20 "Standard_DS3_v2" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-cpu-cluster" 0 4 "Standard_DS3_v2" + # Larger CPU cluster for Dask and Spark examples + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "cpu-cluster-lg" 0 4 "Standard_DS15_v2" + + echo_title "Ensuring GPU compute" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-cluster" 0 20 "STANDARD_NC6s_v3" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-gpu-cluster" 0 4 "STANDARD_NC6s_v3" + # v100 single GPU cluster for pytorch 2.0 based notebooks + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-1GPU-cluster" 0 4 "Standard_NC6s_v3" + # v100 GPU cluster for deepspeed cli examples + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-cluster" 0 2 "Standard_ND40rs_v2" + + echo_title "Running prerequisites" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_prerequisites_in_workspace + "$SCRIPT_DIR"/sdk_helpers.sh update_dataset + "$SCRIPT_DIR"/sdk_helpers.sh ensure_prerequisites_in_registry + + "$SCRIPT_DIR"/sdk_helpers.sh register_providers + + echo_title "Creating AKS clusters." + configure_aks_cluster=( + aks-cpu-is + aks-cpu-ml + aks-cpu-od + aks-cpu-mc + scoring-explain + ) + for aks_compute in "${configure_aks_cluster[@]}"; do + ( + echo_info "Creating AKS cluster: '$aks_compute'" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aks_compute "${aks_compute}" 1 3 "STANDARD_D3_V2" + ) & + done + wait # until all AKS are created + for aks_compute in "${configure_aks_cluster[@]}"; do + ( + echo_info "Attaching AKS cluster: '$aks_compute'" + "$SCRIPT_DIR"/sdk_helpers.sh install_k8s_extension "${aks_compute}" "managedClusters" "Microsoft.ContainerService/managedClusters" + "$SCRIPT_DIR"/sdk_helpers.sh setup_compute "${aks_compute}" "${aks_compute}" "managedClusters" "azureml" + ) + done + echo_info ">>> Done creating AKS clusters" + + # Arc cluster configuration + configure_arc_cluster=( + ${ARC_CLUSTER_NAME} + ) + for arc_compute in "${configure_arc_cluster[@]}"; do + ( + echo_info "Creating amlarc cluster: '$arc_compute'" + "$SCRIPT_DIR"/sdk_helpers.sh ensure_aks_compute "${arc_compute}" 1 3 "STANDARD_D3_V2" + "$SCRIPT_DIR"/sdk_helpers.sh install_k8s_extension "${arc_compute}" "connectedClusters" "Microsoft.Kubernetes/connectedClusters" + "$SCRIPT_DIR"/sdk_helpers.sh setup_compute "${arc_compute}-arc" "${ARC_COMPUTE_NAME}" "connectedClusters" "azureml" + "$SCRIPT_DIR"/sdk_helpers.sh setup_instance_type_aml_arc "${arc_compute}" + ) + done + echo_info ">>> Done creating amlarc clusters" + "$SCRIPT_DIR"/sdk_helpers.sh vmss_upgrade_policy_all_rg + # echo_title "Copying data" + # "$SCRIPT_DIR"/sdk_helpers.sh install_azcopy + # "$SCRIPT_DIR"/sdk_helpers.sh copy_dataset + +else + "$SCRIPT_DIR"/sdk_helpers.sh update_dataset + echo_info "Skipping Bootstrapping. Set the RUN_BOOTSTRAP environment variable to enable bootstrapping." +fi + +echo_title "✅ Resource provisioning completed..." + From aaefd71aeceda7677469775118fac780d3dd7dd7 Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Tue, 10 Oct 2023 00:05:08 +0530 Subject: [PATCH 8/9] fix 2 --- infra/bootstrapping/init_environment_oai_v2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/bootstrapping/init_environment_oai_v2.sh b/infra/bootstrapping/init_environment_oai_v2.sh index 1e44c55f0b..144522fa45 100644 --- a/infra/bootstrapping/init_environment_oai_v2.sh +++ b/infra/bootstrapping/init_environment_oai_v2.sh @@ -69,7 +69,7 @@ fi WORKSPACE_NAME=${WORKSPACE_NAME:-} if [[ -z "$WORKSPACE_NAME" ]] then - export WORKSPACE_NAME="${PREFIX}${SUFFIX}${DATE_ONLY}-ws" + export WORKSPACE_NAME="${PREFIX}${SUFFIX}${DATE_ONLY}-dv3-ws" echo_warning "No workspace name [WORKSPACE_NAME] specified, defaulting to ${WORKSPACE_NAME}." fi From 8c37ef2739d7212070de5202f4e8790791b4577a Mon Sep 17 00:00:00 2001 From: vvatsalya Date: Tue, 10 Oct 2023 00:28:58 +0530 Subject: [PATCH 9/9] change training dataset name in cli oai v2 example --- .../openai_completions_finetune_pipeline_spec.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cli/foundation-models/azure_openai/oai-v2/openai_completions_finetune_pipeline/openai_completions_finetune_pipeline_spec.yaml b/cli/foundation-models/azure_openai/oai-v2/openai_completions_finetune_pipeline/openai_completions_finetune_pipeline_spec.yaml index 41c4ada58f..7f509f4af8 100644 --- a/cli/foundation-models/azure_openai/oai-v2/openai_completions_finetune_pipeline/openai_completions_finetune_pipeline_spec.yaml +++ b/cli/foundation-models/azure_openai/oai-v2/openai_completions_finetune_pipeline/openai_completions_finetune_pipeline_spec.yaml @@ -18,15 +18,15 @@ jobs: type: pipeline component: azureml://registries/azureml/components/openai_completions_finetune_pipeline/versions/0.0.9 inputs: - train_dataset: + train_dataset: type: uri_folder - path: azureml:identity-train-chat:1 - validation_dataset: + path: azureml:oai_sample_training_data:1 + validation_dataset: type: uri_folder - path: azureml:identity-train-chat:1 + path: azureml:oai_sample_training_data:1 registered_model_name: ${{parent.inputs.registered_model_name}} model: ${{parent.inputs.model}} task_type: ${{parent.inputs.task_type}} n_epochs: ${{parent.inputs.n_epochs}} learning_rate_multiplier: ${{parent.inputs.learning_rate_multiplier}} - batch_size: ${{parent.inputs.batch_size}} \ No newline at end of file + batch_size: ${{parent.inputs.batch_size}}