-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Vvatsalya/fix cli oai v2 workflow (#2717)
* setting location as ncus * set in setup-cli step * new init and setup script for oai v2 * correcting syntax for init sh * fix init oai v2 script * fix * fix 1 * fix 2 * change training dataset name in cli oai v2 example
- Loading branch information
Showing
5 changed files
with
383 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/bin/bash | ||
|
||
### If installing a release candidate: | ||
### * Update the "$wheel_url" | ||
### * Uncomment the following block surrounded by {} | ||
### * Comment the ml extension install within <az_ml_install> | ||
|
||
# { | ||
# wheel_url='https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-cli-v2-public/ml-2.9.0-py3-none-any.whl' | ||
# | ||
# az extension remove -n ml | ||
# if ! az extension add --yes --upgrade --source "$wheel_url"; then | ||
# | ||
# echo "Error: Failed to install release candidate" | ||
# exit 1 | ||
# fi | ||
# az version | ||
# unset wheel_url | ||
# } | ||
|
||
|
||
# <az_ml_install> | ||
az extension add -n ml -y | ||
# </az_ml_install> | ||
|
||
## For backward compatibility - running on old subscription | ||
# <set_variables> | ||
GROUP="azureml-examples" | ||
LOCATION="northcentralus" | ||
WORKSPACE="main" | ||
# </set_variables> | ||
|
||
# If RESOURCE_GROUP_NAME is empty, the az configure is pending. | ||
RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:-} | ||
if [[ -z "$RESOURCE_GROUP_NAME" ]] | ||
then | ||
echo "No resource group name [RESOURCE_GROUP_NAME] specified, defaulting to ${GROUP}." | ||
# Installing extension temporarily assuming the run is on old subscription | ||
# without bootstrap script. | ||
|
||
# <az_configure_defaults> | ||
az configure --defaults group=$GROUP workspace=$WORKSPACE location=$LOCATION | ||
# </az_configure_defaults> | ||
echo "Default resource group set to $GROUP" | ||
else | ||
echo "Workflows are using the new subscription." | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#!/bin/bash | ||
# set -xe | ||
# Strict mode, fail on any error | ||
set -euo pipefail | ||
|
||
set -o errexit | ||
set -o pipefail | ||
set -o nounset | ||
# set -o xtrace # For debugging | ||
|
||
# set -Eeuo pipefail # https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ | ||
# set -o xtrace # For debugging | ||
|
||
### Usage bash ./infra/bootstrapping/bootstrap.sh | ||
### Bootstrapping script that creates Resource group and Workspace | ||
### This assumes you have performed az login and have sufficient permissions | ||
|
||
# The filename of this script for help messages | ||
SCRIPT_PATH="${BASH_SOURCE[0]:-$0}" | ||
SCRIPT_DIR="$( cd "$( dirname "${SCRIPT_PATH}" )" && pwd )" | ||
|
||
################### | ||
# REQUIRED ENVIRONMENT VARIABLES: | ||
# | ||
# RESOURCE_GROUP_NAME | ||
# WORKSPACE_NAME | ||
# LOCATION | ||
# SUBSCRIPTION_ID | ||
|
||
############### | ||
|
||
# update directory with full permissions | ||
if [ -d "$SCRIPT_DIR" ]; then | ||
sudo chmod -R 777 "$SCRIPT_DIR" | ||
fi | ||
|
||
if [ -f "$SCRIPT_DIR"/sdk_helpers.sh ]; then | ||
source "$SCRIPT_DIR"/sdk_helpers.sh; | ||
else | ||
echo "---------------------------------------------------------" | ||
echo -e "ERROR: sdk_helpers.sh not found." | ||
echo "---------------------------------------------------------" | ||
fi | ||
|
||
if [ -f "$SCRIPT_DIR"/init_environment_oai_v2.sh ]; then | ||
source "$SCRIPT_DIR"/init_environment_oai_v2.sh; | ||
else | ||
echo "---------------------------------------------------------" | ||
echo -e "ERROR: init_environment_oai_v2.sh not found." | ||
echo "---------------------------------------------------------" | ||
fi | ||
|
||
echo_title "Installing tools" | ||
"$SCRIPT_DIR"/sdk_helpers.sh install_tools | ||
|
||
################### | ||
# validate dependencies if the required utilities are installed | ||
################### | ||
|
||
"$SCRIPT_DIR"/sdk_helpers.sh validate_tool az || exit 1 | ||
"$SCRIPT_DIR"/sdk_helpers.sh validate_tool sed || exit 1 | ||
|
||
#login to azure using your credentials | ||
az account show 1> /dev/null | ||
if [[ $? != 0 ]]; | ||
then | ||
az login | ||
fi | ||
|
||
echo_title "RESOURCE_GROUP_NAME = \"${RESOURCE_GROUP_NAME}\" & LOCATION=\"${LOCATION}\" set as defaults. " | ||
az configure --defaults group="${RESOURCE_GROUP_NAME}" workspace="${WORKSPACE_NAME}" location="${LOCATION}" # for subsequent commands. | ||
az account set -s "${SUBSCRIPTION_ID}" || exit 1 | ||
|
||
|
||
# RUN_BOOTSTRAP=1 | ||
if [[ ! -z "${RUN_BOOTSTRAP:-}" ]]; then | ||
|
||
echo_title "Ensuring Resource group" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_resourcegroup | ||
echo_title "Ensuring Workspace" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_ml_workspace "${WORKSPACE_NAME}" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_ml_workspace "mlw-mevnet" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_vnet "vnet-mevnet" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_subnet "vnet-mevnet" "snet-scoring" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_identity "uaimevnet" | ||
"$SCRIPT_DIR"/sdk_helpers.sh grant_permission_identity_on_acr "uaimevnet" | ||
|
||
echo_title "Ensuring Permissions on RG" | ||
"$SCRIPT_DIR"/sdk_helpers.sh grant_permission_app_id_on_rg "${APP_NAME}" | ||
|
||
echo_title "Ensuring Registry ${REGISTRY_NAME}" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_registry "${REGISTRY_NAME}" | ||
echo_title "Ensuring Registry of tomorrow ${REGISTRY_NAME_TOMORROW}" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_registry "${REGISTRY_NAME_TOMORROW}" | ||
|
||
echo_title "Ensuring CPU compute" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "cpu-cluster" 0 20 "Standard_DS3_v2" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-cpu-cluster" 0 4 "Standard_DS3_v2" | ||
# Larger CPU cluster for Dask and Spark examples | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "cpu-cluster-lg" 0 4 "Standard_DS15_v2" | ||
|
||
echo_title "Ensuring GPU compute" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-cluster" 0 20 "STANDARD_NC6s_v3" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-gpu-cluster" 0 4 "STANDARD_NC6s_v3" | ||
# v100 single GPU cluster for pytorch 2.0 based notebooks | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-1GPU-cluster" 0 4 "Standard_NC6s_v3" | ||
# v100 GPU cluster for deepspeed cli examples | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-cluster" 0 2 "Standard_ND40rs_v2" | ||
|
||
echo_title "Running prerequisites" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_prerequisites_in_workspace | ||
"$SCRIPT_DIR"/sdk_helpers.sh update_dataset | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_prerequisites_in_registry | ||
|
||
"$SCRIPT_DIR"/sdk_helpers.sh register_providers | ||
|
||
echo_title "Creating AKS clusters." | ||
configure_aks_cluster=( | ||
aks-cpu-is | ||
aks-cpu-ml | ||
aks-cpu-od | ||
aks-cpu-mc | ||
scoring-explain | ||
) | ||
for aks_compute in "${configure_aks_cluster[@]}"; do | ||
( | ||
echo_info "Creating AKS cluster: '$aks_compute'" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aks_compute "${aks_compute}" 1 3 "STANDARD_D3_V2" | ||
) & | ||
done | ||
wait # until all AKS are created | ||
for aks_compute in "${configure_aks_cluster[@]}"; do | ||
( | ||
echo_info "Attaching AKS cluster: '$aks_compute'" | ||
"$SCRIPT_DIR"/sdk_helpers.sh install_k8s_extension "${aks_compute}" "managedClusters" "Microsoft.ContainerService/managedClusters" | ||
"$SCRIPT_DIR"/sdk_helpers.sh setup_compute "${aks_compute}" "${aks_compute}" "managedClusters" "azureml" | ||
) | ||
done | ||
echo_info ">>> Done creating AKS clusters" | ||
|
||
# Arc cluster configuration | ||
configure_arc_cluster=( | ||
${ARC_CLUSTER_NAME} | ||
) | ||
for arc_compute in "${configure_arc_cluster[@]}"; do | ||
( | ||
echo_info "Creating amlarc cluster: '$arc_compute'" | ||
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aks_compute "${arc_compute}" 1 3 "STANDARD_D3_V2" | ||
"$SCRIPT_DIR"/sdk_helpers.sh install_k8s_extension "${arc_compute}" "connectedClusters" "Microsoft.Kubernetes/connectedClusters" | ||
"$SCRIPT_DIR"/sdk_helpers.sh setup_compute "${arc_compute}-arc" "${ARC_COMPUTE_NAME}" "connectedClusters" "azureml" | ||
"$SCRIPT_DIR"/sdk_helpers.sh setup_instance_type_aml_arc "${arc_compute}" | ||
) | ||
done | ||
echo_info ">>> Done creating amlarc clusters" | ||
"$SCRIPT_DIR"/sdk_helpers.sh vmss_upgrade_policy_all_rg | ||
# echo_title "Copying data" | ||
# "$SCRIPT_DIR"/sdk_helpers.sh install_azcopy | ||
# "$SCRIPT_DIR"/sdk_helpers.sh copy_dataset | ||
|
||
else | ||
"$SCRIPT_DIR"/sdk_helpers.sh update_dataset | ||
echo_info "Skipping Bootstrapping. Set the RUN_BOOTSTRAP environment variable to enable bootstrapping." | ||
fi | ||
|
||
echo_title "✅ Resource provisioning completed..." | ||
|
Oops, something went wrong.