From 5fb2810b83d1559966852fcfbad094c8c88956ec Mon Sep 17 00:00:00 2001 From: Rupal jain Date: Tue, 29 Aug 2023 15:27:29 +0530 Subject: [PATCH] Update import model for MMDetection Models (#2586) --- ...stem-import-import_model_into_registry.yml | 3 + ...t-import_model_into_registry_new_model.yml | 2 + ...l_into_registry_new_model_image_tasks.yml} | 6 +- ...port_mmdetection_model_into_registry.ipynb | 560 ++++++++++++++++++ .../import/import_model_into_registry.ipynb | 146 +++-- 5 files changed, 663 insertions(+), 54 deletions(-) rename .github/workflows/{sdk-foundation-models-system-import-import_model_into_registry_new_model_image_classification.yml => sdk-foundation-models-system-import-import_model_into_registry_new_model_image_tasks.yml} (94%) create mode 100644 sdk/python/foundation-models/system/import/import_mmdetection_model_into_registry.ipynb diff --git a/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry.yml b/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry.yml index 53cae6dbc6..d1bf2daac1 100644 --- a/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry.yml +++ b/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry.yml @@ -19,6 +19,9 @@ on: - sdk/python/dev-requirements.txt - infra/bootstrapping/** - sdk/python/setup.sh +env: + MMDETECTION_MODEL_ID: None + MMDETECTION_TASK_NAME: image-object-detection concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true diff --git a/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model.yml b/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model.yml index 42fa800652..5549122363 100644 --- a/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model.yml +++ b/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model.yml @@ -17,6 +17,8 @@ on: env: MODEL_ID: ai4bharat/indic-bert TASK_NAME: fill-mask + MMDETECTION_MODEL_ID: None + MMDETECTION_TASK_NAME: image-object-detection concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true diff --git a/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_classification.yml b/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_tasks.yml similarity index 94% rename from .github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_classification.yml rename to .github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_tasks.yml index 41eba4d334..4ddadfc61c 100644 --- a/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_classification.yml +++ b/.github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_tasks.yml @@ -1,4 +1,4 @@ -name: sdk-foundation-models-system-import-import_model_into_registry_new_model_image_classification +name: sdk-foundation-models-system-import-import_model_into_registry_new_model_image_tasks # This file is created by sdk/python/readme.py. # Please do not edit directly. on: @@ -10,13 +10,15 @@ on: - main paths: - sdk/python/foundation-models/system/import/** - - .github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_classification.yml + - .github/workflows/sdk-foundation-models-system-import-import_model_into_registry_new_model_image_tasks.yml - sdk/python/dev-requirements.txt - infra/bootstrapping/** - sdk/python/setup.sh env: MODEL_ID: microsoft/resnet-18 TASK_NAME: image-classification + MMDETECTION_MODEL_ID: faster_rcnn_r50_fpn_1x_coco + MMDETECTION_TASK_NAME: image-object-detection concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true diff --git a/sdk/python/foundation-models/system/import/import_mmdetection_model_into_registry.ipynb b/sdk/python/foundation-models/system/import/import_mmdetection_model_into_registry.ipynb new file mode 100644 index 0000000000..533a599518 --- /dev/null +++ b/sdk/python/foundation-models/system/import/import_mmdetection_model_into_registry.ipynb @@ -0,0 +1,560 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "db3fa6e6", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "## Import models from MMDetection hub\n", + "This sample shows how to import and register `image-object-detection` and `image-instance-segmentation`, models from MMDetection.\n", + "\n", + "### How does import work?\n", + "The import process runs as a job in your AzureML workspace using components from the `azureml` system registry. The models are downloaded and converted to MLflow packaged format. You can then register the models to your AzureML Workspace or Registry that makes them available for inference or fine tuning. \n", + "\n", + "### What models are supported for import?\n", + "Any object detection and instance segmentation model from MMDetection can be imported. Only the following set of tasks are supported for MLflow conversion:\n", + "* image-object-detection\n", + "* image-instance-segmentation\n", + "\n", + "\n", + "### Why convert to MLflow?\n", + "MLflow is AzureML's recommended model packaging format. \n", + "* **Evaluation benefits**: Foundation models imported and converted to MLflow format can be Evaluated using AzureML's Evaluation pipelines. You can use the no-code UI wizards, or the code-based job submission with the SDK or CLI/YAML. AzureML's Evaluation pipelines are built using components. This gives you the flexibility to compose your own Evaluation pipelines using AzureML Evaluation Components and evaluate your Model's performance on standard or custom dataset(s) to take informed decision on whether to deploy the base model directly or further fine-tune it before deployment. Learn more about AzureML Evaluation pipelines using [SDK](https://github.com/Azure/azureml-examples/tree/mabables/foundation-models/sdk/python/foundation-models/system/evaluation) or [CLI](https://github.com/Azure/azureml-examples/tree/mabables/foundation-models/cli/foundation-models/system/evaluation).\n", + "* **Inference benefits**: AzureML supports no-code-deployment for models packaged as MLflow that enables a seamless inference experience for the models. Learn more about [MLflow model packaging](https://learn.microsoft.com/en-us/azure/machine-learning/concept-mlflow-models) and [no-code-deployment](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models-online-endpoints?tabs=sdk). \n", + "* **Fine tuning benefits**: Foundation models imported and converted to MLflow format can be fine tuned using AzureML's fine tuning pipelines. You can use the no-code UI wizards, or the code based job submission with the SDK or CLI/YAML. AzureML's fine tuning pipelines are built using components. This gives you the flexibility to compose your own fine tuning pipelines containing your own jobs for data transformation, post processing and the AzureML fine tuning components. Learn more about pipelines using [sdk](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipeline-python) or [CLI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipelines-cli).\n", + "\n", + "### What happens if I just download model and register models without converting to MLflow? That's because the task of the model I'm interested in is not among the supported list of tasks.\n", + "You can still download and register the model using the outputs of the `download_model` job. You need to [write your own inference code](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-online-endpoints?tabs=python) in this case. It also means that fine tuning is not yet supported if the task type of the model you are interested in is not in the supported list.\n", + "\n", + "### Outline\n", + "1. Connect to Azure Machine Learning Workspace\n", + "2. Create a pipeline job using components for MMDetection models\n", + "3. Get the registered model\n", + "\n", + "\n", + "**Requirements** - In order to benefit from this tutorial, you will need:\n", + "- A basic understanding of Machine Learning\n", + "- An Azure account with an active subscription - [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n", + "- An Azure ML workspace with computer cluster - [Configure workspace](https://aka.ms/azureml-workspace-configuration)\n", + "- A python environment\n", + "- Installed Azure Machine Learning Python SDK v2 - [install instructions](https://aka.ms/azureml-sdkv2-install) - check the getting started section\n", + "\n", + "\n", + "**Motivations** - This notebook explains how to create model importing/publishing pipeline job in workspace using pipeline component registered in a registry" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "88ba6a51", + "metadata": {}, + "source": [ + "## 1. Connect to Azure Machine Learning Workspace\n", + "\n", + "The [workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace) is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning. In this section, we will connect to the workspace in which the job will be run.\n", + "\n", + "### 1.1 Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a85623da", + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "from azure.ai.ml import MLClient, UserIdentityConfiguration\n", + "from azure.identity import (\n", + " DefaultAzureCredential,\n", + " InteractiveBrowserCredential,\n", + ")\n", + "from azure.ai.ml.dsl import pipeline" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8911a778", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "### 1.2 Configure credential\n", + "\n", + "We are using `DefaultAzureCredential` to get access to the workspace. \n", + "`DefaultAzureCredential` should be capable of handling most Azure SDK authentication scenarios. \n", + "\n", + "Reference for more available credentials if it does not work for you: [configure credential example](https://aka.ms/azureml-workspace-configuration), [azure-identity reference doc](https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity?view=azure-python)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8503c314", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " credential = DefaultAzureCredential()\n", + " # Check if given credential can get token successfully.\n", + " credential.get_token(\"https://management.azure.com/.default\")\n", + "except Exception as ex:\n", + " # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", + " credential = InteractiveBrowserCredential()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "6cab6857", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "### 1.3 Get a handle to the workspace and the registry\n", + "\n", + "We use the config file to connect to a workspace. The Azure ML workspace should be configured with a computer cluster. [Check this notebook for configure a workspace](https://aka.ms/azureml-workspace-configuration)\n", + "\n", + "If config file is not available user can update following parameters in place holders\n", + "- SUBSCRIPTION_ID\n", + "- RESOURCE_GROUP\n", + "- WORKSPACE_NAME" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b7159db", + "metadata": {}, + "outputs": [], + "source": [ + "# Get a handle to workspace\n", + "try:\n", + " ml_client_ws = MLClient.from_config(credential=credential)\n", + "except:\n", + " ml_client_ws = MLClient(\n", + " credential,\n", + " subscription_id=\"\",\n", + " resource_group_name=\"\",\n", + " workspace_name=\"\",\n", + " )\n", + "\n", + "ml_client_registry = MLClient(credential, registry_name=\"azureml\")\n", + "\n", + "experiment_name = f\"Import Model Pipeline\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d6c2cede", + "metadata": {}, + "source": [ + "### 1.4 Compute target setup\n", + "\n", + "#### Create or Attach existing AmlCompute\n", + "A compute target is required to execute the Automated ML run. In this tutorial, you create AmlCompute as your training compute resource.\n", + "\n", + "#### Creation of AmlCompute takes approximately 5 minutes. \n", + "If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5002db87", + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import AmlCompute\n", + "from azure.core.exceptions import ResourceNotFoundError\n", + "\n", + "compute_name = \"model-import-cluster\"\n", + "\n", + "try:\n", + " _ = ml_client_ws.compute.get(compute_name)\n", + " print(\"Found existing compute target.\")\n", + "except ResourceNotFoundError:\n", + " print(\"Creating a new compute target...\")\n", + " compute_config = AmlCompute(\n", + " name=compute_name,\n", + " type=\"amlcompute\",\n", + " size=\"STANDARD_NC6s_v3\",\n", + " idle_time_before_scale_down=120,\n", + " min_instances=0,\n", + " max_instances=6,\n", + " )\n", + " ml_client_ws.begin_create_or_update(compute_config).result()" + ] + }, + { + "cell_type": "markdown", + "id": "924d5ea1", + "metadata": {}, + "source": [ + "## 2. Create a pipeline job using components for MMDetection models" + ] + }, + { + "cell_type": "markdown", + "id": "c8b0e42e", + "metadata": {}, + "source": [ + "### 2.1 Load components from the registry to create a pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5005fd22", + "metadata": {}, + "outputs": [], + "source": [ + "mmdetection_import_model_component = ml_client_registry.components.get(\n", + " name=\"mmdetection_image_objectdetection_instancesegmentation_model_import\",\n", + " label=\"latest\",\n", + ")\n", + "convert_model_to_mlflow_component = ml_client_registry.components.get(\n", + " name=\"convert_model_to_mlflow\", label=\"latest\"\n", + ")\n", + "register_model_component = ml_client_registry.components.get(\n", + " name=\"register_model\", label=\"latest\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "55a45e75", + "metadata": {}, + "source": [ + "### 2.2 Necessary parameters\n" + ] + }, + { + "cell_type": "markdown", + "id": "c0662cd4", + "metadata": {}, + "source": [ + "__NOTE: Models from MMDetection are subject to third party license terms available on the MMDetection model details page respectively. It is your responsibility to comply with the model's license terms.__\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21983a23", + "metadata": {}, + "outputs": [], + "source": [ + "MMDETECTION_MODEL_ID = \"faster_rcnn_r50_fpn_1x_coco\"\n", + "MMDETECTION_TASK_NAME = \"image-object-detection\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e6eb7dc", + "metadata": {}, + "outputs": [], + "source": [ + "COMPUTE = compute_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c5ae8c3", + "metadata": {}, + "outputs": [], + "source": [ + "mmdetection_model_exists_in_registry = False\n", + "try:\n", + " REG_MMDETECTION_MODEL_ID = MMDETECTION_MODEL_ID.replace(\n", + " \"/\", \"-\"\n", + " ) # model name in registry doesn't contain '/'\n", + " models = ml_client_registry.models.list(name=REG_MMDETECTION_MODEL_ID)\n", + " if models:\n", + " max_version = (max(models, key=lambda x: int(x.version))).version\n", + " model_version = str(int(max_version))\n", + " print(\n", + " f\"Model already exists in azureml with name {REG_MMDETECTION_MODEL_ID} and version {model_version}\"\n", + " )\n", + " mmdetection_model_exists_in_registry = True\n", + "except:\n", + " print(\n", + " f\"Model {MMDETECTION_MODEL_ID} has not been imported into the registry. Please continue importing the model.\"\n", + " )\n", + " model_version = \"1\"" + ] + }, + { + "cell_type": "markdown", + "id": "29e44d38", + "metadata": {}, + "source": [ + "### 2.3 Create pipeline using components" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29f472cf", + "metadata": {}, + "outputs": [], + "source": [ + "@pipeline\n", + "def mmdetection_model_import_pipeline(model_id, model_version, task_name, compute):\n", + " \"\"\"\n", + " Create model import pipeline using pipeline component.\n", + "\n", + " Parameters\n", + " ----------\n", + " model_id : str\n", + " model_version: str\n", + " task_name: str\n", + " compute : str\n", + "\n", + " Returns\n", + " -------\n", + " model_registration_details : dict\n", + " \"\"\"\n", + " model_selection_args = {\n", + " # model_selection_args\n", + " \"model_family\": \"MmDetectionImage\",\n", + " \"model_name\": model_id,\n", + " }\n", + " mmdetection_import_model = mmdetection_import_model_component(\n", + " **model_selection_args\n", + " )\n", + " mmdetection_import_model.code = None\n", + " mmdetection_import_model.compute = compute\n", + " mmdetection_import_model.experiment_name = experiment_name\n", + "\n", + " convert_model_to_mlflow = convert_model_to_mlflow_component(\n", + " model_path=mmdetection_import_model.outputs.output_dir,\n", + " mlflow_flavor=\"mmlab_pyfunc\",\n", + " task_name=task_name,\n", + " )\n", + " convert_model_to_mlflow.code = None\n", + " convert_model_to_mlflow.compute = compute\n", + " convert_model_to_mlflow.experiment_name = experiment_name\n", + "\n", + " register_model = register_model_component(\n", + " model_path=convert_model_to_mlflow.outputs.mlflow_model_folder,\n", + " model_name=model_id,\n", + " model_version=model_version,\n", + " model_import_job_path=convert_model_to_mlflow.outputs.model_import_job_path,\n", + " )\n", + " register_model.code = None\n", + " register_model.compute = compute\n", + " register_model.experiment_name = experiment_name\n", + "\n", + " return {\n", + " \"model_registration_details\": register_model.outputs.registration_details_folder\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "23ba545c", + "metadata": {}, + "source": [ + "### 2.4 Create pipeline object\n", + "Assign User Identity Configuration to pipeline object, so that individual pipeline components can get identity credentials if required. \n", + "Click [here](https://learn.microsoft.com/en-us/samples/azure/azureml-examples/azureml---on-behalf-of-feature/) to know more about OBO credentials." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08dc83c4", + "metadata": {}, + "outputs": [], + "source": [ + "mmdetection_pipeline_object = mmdetection_model_import_pipeline(\n", + " model_id=MMDETECTION_MODEL_ID,\n", + " model_version=model_version,\n", + " compute=COMPUTE,\n", + " task_name=MMDETECTION_TASK_NAME,\n", + ")\n", + "mmdetection_pipeline_object.identity = UserIdentityConfiguration()\n", + "\n", + "mmdetection_pipeline_object.settings.force_rerun = True\n", + "\n", + "# Set job to not continue on failure\n", + "mmdetection_pipeline_object.settings.continue_on_step_failure = False\n", + "\n", + "mmdetection_pipeline_object.settings.default_compute = COMPUTE" + ] + }, + { + "cell_type": "markdown", + "id": "c8429231", + "metadata": {}, + "source": [ + "### 2.5 Submit model importing pipeline job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "044e2643", + "metadata": {}, + "outputs": [], + "source": [ + "schedule_mmdetection_model_import = (\n", + " not mmdetection_model_exists_in_registry\n", + " and MMDETECTION_MODEL_ID not in [None, \"None\"]\n", + " and len(MMDETECTION_MODEL_ID) > 1\n", + ")\n", + "print(\n", + " f\"Need to schedule run for importing {MMDETECTION_MODEL_ID}: {schedule_mmdetection_model_import}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e42587a", + "metadata": {}, + "outputs": [], + "source": [ + "mmdetection_pipeline_job = None\n", + "if schedule_mmdetection_model_import:\n", + " # submit the pipeline job\n", + " mmdetection_pipeline_job = ml_client_ws.jobs.create_or_update(\n", + " mmdetection_pipeline_object, experiment_name=experiment_name\n", + " )\n", + "\n", + " # wait for the pipeline job to complete\n", + " ml_client_ws.jobs.stream(mmdetection_pipeline_job.name)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f33d5c80", + "metadata": {}, + "source": [ + "## 3. Get the registered model" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "dda1f809", + "metadata": {}, + "source": [ + "### 3.1 Download model registration details in local file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0086f32e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "\n", + "download_path = \"./pipeline_outputs/\"\n", + "\n", + "# delete the folder if already exists\n", + "if os.path.exists(download_path):\n", + " shutil.rmtree(download_path)\n", + "\n", + "# if pipeline job was not scheduled, skip\n", + "if mmdetection_pipeline_job is not None:\n", + "\n", + " print(\"Pipeline job: \" + mmdetection_pipeline_job.name)\n", + " print(\"Downloading pipeline job output: model_registration_details\")\n", + "\n", + " pipeline_download_path = os.path.join(download_path, mmdetection_pipeline_job.name)\n", + " os.makedirs(pipeline_download_path, exist_ok=True)\n", + "\n", + " ml_client_ws.jobs.download(\n", + " name=mmdetection_pipeline_job.name,\n", + " download_path=pipeline_download_path,\n", + " output_name=\"model_registration_details\",\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e5504e6f", + "metadata": {}, + "source": [ + "### 3.2 Read registration details and get model from registry/workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e9e9ac0", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# if pipeline job was not scheduled, skip\n", + "if mmdetection_pipeline_job is not None:\n", + "\n", + " with open(\n", + " f\"./pipeline_outputs/{mmdetection_pipeline_job.name}/named-outputs/model_registration_details/model_registration_details.json\",\n", + " \"r\",\n", + " ) as f:\n", + " registration_details = json.load(f)\n", + "\n", + " model_name = registration_details[\"name\"]\n", + " model_version = registration_details[\"version\"]\n", + "\n", + " # Get the model object from workspace\n", + " model = ml_client_ws.models.get(name=model_name, version=model_version)\n", + " print(f\"\\n{model_name}\")\n", + " print(model.__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "890037d5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "Python 3.10 - SDK V2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sdk/python/foundation-models/system/import/import_model_into_registry.ipynb b/sdk/python/foundation-models/system/import/import_model_into_registry.ipynb index c11af9dabd..753fb5c7b0 100644 --- a/sdk/python/foundation-models/system/import/import_model_into_registry.ipynb +++ b/sdk/python/foundation-models/system/import/import_model_into_registry.ipynb @@ -10,8 +10,10 @@ } }, "source": [ - "## Import models from Hugging Face hub\n", - "This sample shows how to import and register models from [HuggingFace hub](https://huggingface.co/models). \n", + "## Import models from Hugging Face and MMDetection hub\n", + "This sample shows how to import and register models from [HuggingFace hub](https://huggingface.co/models).\n", + "\n", + "For `image-object-detection` and `image-instance-segmentation`, models from MMDetection are supported. Please refer to [import_mmdetection_model_into_registry.ipynb](./import_mmdetection_model_into_registry.ipynb) to import these.\n", "\n", "### How does import work?\n", "The import process runs as a job in your AzureML workspace using components from the `azureml` system registry. The models are downloaded and converted to MLflow packaged format. You can then register the models to your AzureML Workspace or Registry that makes them available for inference or fine tuning. \n", @@ -27,6 +29,8 @@ "* translation\n", "* image-classification\n", "\n", + "Please note that, we also support `image-object-detection` and `image-instance-segmentation` models from MMDetection. To import these, refer to [import_mmdetection_model_into_registry.ipynb](./import_mmdetection_model_into_registry.ipynb).\n", + "\n", "### Limitations of Model Import component where MLFlow conversion of model(s) would fail: \n", "1. If you attempt to download a model that has a task type other than the above with error - `Exception: Unsupported task {task name}`. \n", "2. If Hugging Face model is on transformers version > 4.28.1. \n", @@ -44,11 +48,9 @@ "You can still download and register the model using the outputs of the `download_model` job. You need to [write your own inference code](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-online-endpoints?tabs=python) in this case. It also means that fine tuning is not yet supported if the task type of the model you are interested in is not in the supported list.\n", "\n", "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick a model to import.\n", - "* Configure the import job.\n", - "* Run the fine tuning job.\n", - "* Register the fine tuned model. \n", + "1. Connect to Azure Machine Learning Workspace\n", + "2. Create a pipeline job using the pipeline component for HuggingFace models\n", + "3. Get the registered model\n", "\n", "\n", "**Requirements** - In order to benefit from this tutorial, you will need:\n", @@ -163,7 +165,9 @@ " workspace_name=\"\",\n", " )\n", "\n", - "ml_client_registry = MLClient(credential, registry_name=\"azureml\")" + "ml_client_registry = MLClient(credential, registry_name=\"azureml\")\n", + "\n", + "experiment_name = f\"Import Model Pipeline\"" ] }, { @@ -213,41 +217,39 @@ { "attachments": {}, "cell_type": "markdown", - "id": "bb705c33", + "id": "54e53eb8", + "metadata": {}, + "source": [ + "## 2. Create a pipeline job using the pipeline component for HuggingFace models" + ] + }, + { + "cell_type": "markdown", + "id": "534f1f1f", "metadata": {}, "source": [ - "## 2. Load pipeline component from the registry to create a pipeline\n", + "### 2.1 Load pipeline component from the registry to create a pipeline\n", "\n", - "#### Component\n", "- import_model - Pipeline component which downloads the model, converts it into mlflow, validates locally and then register it.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "694f479f", + "id": "fc04dcbd", "metadata": {}, "outputs": [], "source": [ "import_model = ml_client_registry.components.get(name=\"import_model\", label=\"latest\")" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "54e53eb8", - "metadata": {}, - "source": [ - "## 3. Create a pipeline job using the pipeline component" - ] - }, { "attachments": {}, "cell_type": "markdown", "id": "f9c1c389", "metadata": {}, "source": [ - "### 3.1 Create pipeline object using necessary parameters\n" + "### 2.2 Create pipeline object using necessary parameters\n" ] }, { @@ -279,7 +281,7 @@ "id": "9fb68542", "metadata": {}, "source": [ - "__NOTE: Models from Hugging Face are subject to third party license terms available on the Hugging Face model details page. It is your responsibility to comply with the model's license terms.__\n", + "__NOTE: Models from Hugging Face or MMDetection are subject to third party license terms available on the Hugging Face model details page or MMDetection model details page respectively. It is your responsibility to comply with the model's license terms.__\n", "#### Set parameter values\n" ] }, @@ -324,6 +326,7 @@ "metadata": {}, "outputs": [], "source": [ + "huggingface_model_exists_in_registry = False\n", "try:\n", " REG_MODEL_ID = MODEL_ID.replace(\n", " \"/\", \"-\"\n", @@ -335,6 +338,7 @@ " print(\n", " f\"Model already exists in azureml with name {REG_MODEL_ID} and version {model_version}\"\n", " )\n", + " huggingface_model_exists_in_registry = True\n", "except:\n", " print(\n", " f\"Model {MODEL_ID} has not been imported into the registry. Please continue importing the model.\"\n", @@ -347,7 +351,7 @@ "id": "ecd925e0", "metadata": {}, "source": [ - "### 3.2 Create pipeline using pipeline component" + "### 2.3 Create pipeline using pipeline component" ] }, { @@ -389,7 +393,7 @@ "id": "c94c3616", "metadata": {}, "source": [ - "### 3.3 Create pipeline object\n", + "### 2.4 Create pipeline object\n", "Assign User Identity Configuration to pipeline object, so that individual pipeline components can get identity credentials if required. \n", "Click [here](https://learn.microsoft.com/en-us/samples/azure/azureml-examples/azureml---on-behalf-of-feature/) to know more about OBO credentials." ] @@ -402,10 +406,16 @@ "outputs": [], "source": [ "pipeline_object = model_import_pipeline(\n", - " model_id=MODEL_ID, compute=COMPUTE, task_name=TASK_NAME\n", + " model_id=MODEL_ID,\n", + " compute=COMPUTE,\n", + " task_name=TASK_NAME,\n", ")\n", "pipeline_object.identity = UserIdentityConfiguration()\n", - "pipeline_object.settings.force_rerun = True" + "\n", + "pipeline_object.settings.force_rerun = True\n", + "\n", + "\n", + "pipeline_object.settings.default_compute = COMPUTE" ] }, { @@ -414,22 +424,41 @@ "id": "03f5265f", "metadata": {}, "source": [ - "### 3.4 Submit model importing pipeline job" + "### 2.5 Submit model importing pipeline job" ] }, { "cell_type": "code", "execution_count": null, - "id": "8c1732c9", + "id": "47762852", "metadata": {}, "outputs": [], "source": [ - "# submit the pipeline job\n", - "pipeline_job = ml_client_ws.jobs.create_or_update(\n", - " pipeline_object, experiment_name=f\"Import Model Pipeline\"\n", + "schedule_huggingface_model_import = (\n", + " not huggingface_model_exists_in_registry\n", + " and MODEL_ID not in [None, \"None\"]\n", + " and len(MODEL_ID) > 1\n", ")\n", - "# wait for the pipeline job to complete\n", - "ml_client_ws.jobs.stream(pipeline_job.name)" + "print(\n", + " f\"Need to schedule run for importing {MODEL_ID}: {schedule_huggingface_model_import}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c1732c9", + "metadata": {}, + "outputs": [], + "source": [ + "huggingface_pipeline_job = None\n", + "if schedule_huggingface_model_import:\n", + " # submit the pipeline job\n", + " huggingface_pipeline_job = ml_client_ws.jobs.create_or_update(\n", + " pipeline_object, experiment_name=experiment_name\n", + " )\n", + " # wait for the pipeline job to complete\n", + " ml_client_ws.jobs.stream(huggingface_pipeline_job.name)" ] }, { @@ -438,7 +467,7 @@ "id": "f33d5c80", "metadata": {}, "source": [ - "## 4. Get the registered model" + "## 3. Get the registered model" ] }, { @@ -447,7 +476,7 @@ "id": "dda1f809", "metadata": {}, "source": [ - "### 4.1 Download model registration details in local file" + "### 3.1 Download model registration details in local file" ] }, { @@ -466,11 +495,20 @@ "if os.path.exists(download_path):\n", " shutil.rmtree(download_path)\n", "\n", - "ml_client_ws.jobs.download(\n", - " name=pipeline_job.name,\n", - " download_path=download_path,\n", - " output_name=\"model_registration_details\",\n", - ")" + "# if pipeline job was not scheduled, skip\n", + "if huggingface_pipeline_job is not None:\n", + "\n", + " print(\"Pipeline job: \" + huggingface_pipeline_job.name)\n", + " print(\"Downloading pipeline job output: model_registration_details\")\n", + "\n", + " pipeline_download_path = os.path.join(download_path, huggingface_pipeline_job.name)\n", + " os.makedirs(pipeline_download_path, exist_ok=True)\n", + "\n", + " ml_client_ws.jobs.download(\n", + " name=huggingface_pipeline_job.name,\n", + " download_path=pipeline_download_path,\n", + " output_name=\"model_registration_details\",\n", + " )" ] }, { @@ -479,7 +517,7 @@ "id": "e5504e6f", "metadata": {}, "source": [ - "### 4.2 Read registration details and get model from registry/workspace" + "### 3.2 Read registration details and get model from registry/workspace" ] }, { @@ -491,18 +529,22 @@ "source": [ "import json\n", "\n", - "with open(\n", - " \"./pipeline_outputs/named-outputs/model_registration_details/model_registration_details.json\",\n", - " \"r\",\n", - ") as f:\n", - " registration_details = json.load(f)\n", + "# if pipeline job was not scheduled, skip\n", + "if huggingface_pipeline_job is not None:\n", + "\n", + " with open(\n", + " f\"./pipeline_outputs/{huggingface_pipeline_job.name}/named-outputs/model_registration_details/model_registration_details.json\",\n", + " \"r\",\n", + " ) as f:\n", + " registration_details = json.load(f)\n", "\n", - "model_name = registration_details[\"name\"]\n", - "model_version = registration_details[\"version\"]\n", + " model_name = registration_details[\"name\"]\n", + " model_version = registration_details[\"version\"]\n", "\n", - "# Get the model object from workspace\n", - "model = ml_client_ws.models.get(name=model_name, version=model_version)\n", - "model.__dict__" + " # Get the model object from workspace\n", + " model = ml_client_ws.models.get(name=model_name, version=model_version)\n", + " print(f\"\\n{model_name}\")\n", + " print(model.__dict__)" ] } ],