mdc/monitoring cli samples (#2479)

* add data collector cli samples * add custom monitoring signal samples * add relvant supporting files and additional samples * remove data from this PR, update custom samples * remove model from this PR * update email:
Azure · Jul 26, 2023 · f19b04d · f19b04d
1 parent 7626e1a
commit f19b04d
Show file tree

Hide file tree

Showing 16 changed files with 597 additions and 0 deletions.
diff --git a/cli/endpoints/online/data-collector/README.md b/cli/endpoints/online/data-collector/README.md
@@ -0,0 +1,7 @@
+# Azure Model Data Collector (MDC) Examples
+
+This directory contains examples on how to use **AzureML Model Data Collector (MDC)**. The feature is used to collect production inference data to a Blob storage container of your choice. The data can then be used for model monitoring purposes. Please find the documentation for the feature [here](https://learn.microsoft.com/en-us/azure/machine-learning/concept-data-collection?view=azureml-api-2).
+
+In this directory there are two sub-folders: (1) `workspace-blob-storage` and (2) `custom-blob-storage`. These folders refer to the data sink options within the data collector configuration. If you are interested in sending the data to the default sink (the workspace Blob storage), see the examples in the `workspace-blob-storage` folder. Otherwise, if you want to use a custom Blob storage container as the sink, see the examples in the `custom-blob-storage` folder.
+
+**AzureML Model Data Collector (MDC)** supports data logging for online endpoints (managed and Kubernetes).
diff --git a/cli/endpoints/online/data-collector/custom-blob-storage/README.md b/cli/endpoints/online/data-collector/custom-blob-storage/README.md
@@ -0,0 +1,11 @@
+# Collect data to workspace Blob storage
+
+This directory contains YAML configuration samples for **AzureML Model Data Collection (MDC)** logging production inference data to a Blob storage container of your choice.
+
+Before creating your deployment with these configuration YAMLs, follow the steps in [the documentation](https://learn.microsoft.com/en-us/azure/machine-learning/concept-data-collection?view=azureml-api-2) to ensure your endpoint has sufficient permissions to write to the Blob storage container of your choice.
+
+## Contents
+
+file|description
+-|-
+`custom-blob-storage.YAML`|Collect data to custom Blob storage sinks
diff --git a/cli/endpoints/online/data-collector/custom-blob-storage/custom-blob-storage.YAML b/cli/endpoints/online/data-collector/custom-blob-storage/custom-blob-storage.YAML
@@ -0,0 +1,18 @@
+endpoint_name: my_endpoint 
+name: blue 
+model: azureml:my-model-m1:1 
+environment: azureml:env-m1:1 
+data_collector:
+  collections:
+    model_inputs:
+      enabled: 'True'
+      data: 
+        name: my_model_inputs_data_asset
+        path: azureml://datastores/workspaceblobstore/paths/modelDataCollector/my_endpoint/blue/model_inputs
+        version: 1
+    model_outputs:
+      enabled: 'True'
+      data: 
+        name: my_model_outputs_data_asset
+        path: azureml://datastores/workspaceblobstore/paths/modelDataCollector/my_endpoint/blue/model_outputs 
+        version: 1
diff --git a/cli/endpoints/online/data-collector/workspace-blob-storage/README.md b/cli/endpoints/online/data-collector/workspace-blob-storage/README.md
@@ -0,0 +1,12 @@
+# Collect data to workspace Blob storage
+
+This directory contains YAML configuration samples for **AzureML Model Data Collection (MDC)** with the data sink as your AzureML workspace Blob storage.
+
+## Contents
+
+file|description
+-|-
+`workspace-blob-storage-custom1.YAML`|Collect custom logging data (model_inputs, model_outputs)
+`workspace-blob-storage-custom2.YAML`|Collect both payload data (request and response) and custom logging data (model_inputs, model_outputs), adjust rolling_rate and sampling_rate
+`workspace-blob-storage-payload1.YAML`|Collect payload data (request and response)
+`workspace-blob-storage-payload2.YAML`|Collect payload data (request and response), adjust rolling_rate and sampling_rate
diff --git a/...ndpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-custom1.YAML b/...ndpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-custom1.YAML
@@ -0,0 +1,10 @@
+endpoint_name: my_endpoint 
+name: blue 
+model: azureml:my-model-m1:1 
+environment: azureml:env-m1:1 
+data_collector:
+    collections:
+        model_inputs:
+            enabled: 'True' # <'True', 'False'>
+        model_outputs:
+            enabled: 'True' # <'True', 'False'>
diff --git a/...ndpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-custom2.YAML b/...ndpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-custom2.YAML
@@ -0,0 +1,16 @@
+endpoint_name: my_endpoint 
+name: blue 
+model: azureml:my-model-m1:1 
+environment: azureml:env-m1:1 
+data_collector:
+  collections:
+      request: 
+          enabled: 'True' # <'True', 'False'>
+      response:
+          enabled: 'True' # <'True', 'False'>
+      model_inputs:
+          enabled: 'True' # <'True', 'False'>
+      model_outputs:
+          enabled: 'True' # <'True', 'False'>
+  rolling_rate: hour # optional
+  sampling_rate: 1.0 # optional, 0.0-1.0, number to specify random sample % of traffic 
diff --git a/...dpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-payload1.YAML b/...dpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-payload1.YAML
@@ -0,0 +1,10 @@
+endpoint_name: my_endpoint 
+name: blue 
+model: azureml:my-model-m1:1 
+environment: azureml:env-m1:1 
+data_collector:
+  collections:
+    request:
+      enabled: 'True' # <'True', 'False'>
+    response:
+      enabled: 'True' # <'True', 'False'>
diff --git a/...dpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-payload2.YAML b/...dpoints/online/data-collector/workspace-blob-storage/workspace-blob-storage-payload2.YAML
@@ -0,0 +1,12 @@
+endpoint_name: my_endpoint 
+name: blue 
+model: azureml:my-model-m1:1 
+environment: azureml:env-m1:1 
+data_collector:
+  collections:
+    request:
+      enabled: 'True' # <'True', 'False'>
+    response:
+      enabled: 'True' # <'True', 'False'>
+  rolling_rate: hour # optional
+  sampling_rate: 1.0 # optional, 0.0-1.0, number to specify random sample % of traffic 
diff --git a/cli/monitoring/README.md b/cli/monitoring/README.md
@@ -0,0 +1,29 @@
+# AzureML Model Monitoring
+
+AzureML model monitoring enables you to track the performance of your model from a data science perspective in production. This directory contains samples for different scenarios you may encounter when trying to monitor your models. Comprehensive documentation on model monitoring can be found [here](https://learn.microsoft.com/en-us/azure/machine-learning/concept-model-monitoring?view=azureml-api-2). 
+
+## Scenario coverage
+
+### 1. Deploy model with AzureML online endpoints; out-of-box configuration
+
+In this scenario, you have deployed your model to AzureML online endpoints (managed or kubernetes). You have enabled production inference data collection (documentation for it can be found [here](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-collect-production-data?view=azureml-api-2&tabs=azure-cli)) for your deployment. With the `out-of-box-monitoring.yaml`, you can create a model monitor with the default signals (data drift, prediction drift, data quality), metrics, and thresholds - all of which can be adjusted later.
+
+Schedule your model monitor with this command: `az ml schedule create -f out-of-box-monitoring.yaml`
+
+### 2. Deploy model with AzureML online endpoints; advanced configuration with feature importance
+
+In this scenario, you have deployed your model to AzureML online endpoints (managed or kubernetes). You have enabled production inference data collection (documentation for it can be found [here](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-collect-production-data?view=azureml-api-2&tabs=azure-cli)) for your deployment. With the `advanced-monitoring.yaml`, you can create a model monitor with configurable signals, metrics, and thresholds. The provided sample also determines the most important features and only computes the metrics for those features.
+
+Schedule your model monitor with this command: `az ml schedule create -f advanced-with-feature-importance-monitoring.yaml`
+
+### 3. Deploy model with AzureML batch endpoints, AKS, or outside of AzureML
+
+In this scenario, you can bring your own data to use as input to your monitoring job. When you bring your own production data, you need to provide a custom preprocessing component to get the data into MLTable format for the monitoring job to use. An example custom preprocessing component can be found in the `components/custom_preprocessing` directory. You will need to register your custom preprocessing component. From that directory, you can use the command `az ml component create -f spec.yaml --subscription <subscription_id> --workspace <workspace> --resource-group <resource_group>` to register the component. Then, you can schedule your monitoring job (found in the main `monitoring/` directory) with the following command: `az ml schedule create -f custom_monitoring.yaml --subscription <subscription_id> --workspace <workspace> --resource-group <resource_group>`.
+
+**Note**: The `custom-monitoring.yaml` configuration file is configured to use both custom preprocessing component and a custom monitoring signal. If you only want to use a custom preprocessing component to bring your own data, then format the configuration file with the included signals in the documentation.
+
+### 4. Create your own custom monitoring signal
+
+In this scenario, you can create your own custom monitoring signal. The custom signal component can be found in the `components/custom_signal/` directory. You will need to register your custom signal component. From that directory, you can use the command `az ml component create -f spec.yaml --subscription <subscription_id> --workspace <workspace> --resource-group <resource_group>` to register the component. Then, you can schedule your monitoring job (found in the main `monitoring/` directory) with the following command: `az ml schedule create -f custom_monitoring.yaml --subscription <subscription_id> --workspace <workspace> --resource-group <resource_group>`.
+
+**Note**: The `custom-monitoring.yaml` configuration file is configured to use both custom preprocessing component and a custom monitoring signal. If you only want to use a custom signal, you can remove the custom preprocessing component line.
diff --git a/cli/monitoring/advanced-monitoring.yaml b/cli/monitoring/advanced-monitoring.yaml
@@ -0,0 +1,82 @@
+$schema:  http://azureml/sdk-2-0/Schedule.json
+name: fraud_detection_model_monitoring
+display_name: Fraud detection model monitoring
+description: Fraud detection model monitoring with advanced configurations
+
+trigger:
+  # perform model monitoring activity daily at 3:15am
+  type: recurrence
+  frequency: day #can be minute, hour, day, week, month
+  interval: 1 # #every day
+  schedule: 
+    hours: 3 # at 3am
+    minutes: 15 # at 15 mins after 3am
+
+create_monitor:
+  compute: 
+    instance_type: standard_e4s_v3
+    runtime_version: 3.2
+  monitoring_target:
+    endpoint_deployment_id: azureml:fraud-detection-endpoint:fraud-detection-deployment
+
+  monitoring_signals:
+    advanced_data_drift: # monitoring signal name, any user defined name works
+      type: data_drift
+      # target_dataset is optional. By default target dataset is the production inference data associated with Azure Machine Learning online endpoint
+      baseline_dataset:
+        input_dataset:
+          path: azureml:my_model_training_data:1 # use training data as comparison baseline
+          type: mltable
+        dataset_context: training
+        target_column_name: fraud_detected
+      features: 
+        top_n_feature_importance: 20 # monitor drift for top 20 features
+      metric_thresholds:
+        - applicable_feature_type: numerical
+          metric_name: jensen_shannon_distance
+          threshold: 0.01
+        - applicable_feature_type: categorical
+          metric_name: pearsons_chi_squared_test
+          threshold: 0.02
+    advanced_data_quality:
+      type: data_quality
+      # target_dataset is optional. By default target dataset is the production inference data associated with Azure Machine Learning online depoint
+      baseline_dataset:
+        input_dataset:
+          path: azureml:my_model_training_data:1
+          type: mltable
+        dataset_context: training
+      features: # monitor data quality for 3 individual features only
+        - feature_A
+        - feature_B
+        - feature_C
+      metric_thresholds:
+        - applicable_feature_type: numerical
+          metric_name: null_value_rate
+          # use default threshold from training data baseline
+        - applicable_feature_type: categorical
+          metric_name: out_of_bounds_rate
+          # use default threshold from training data baseline
+    feature_attribution_drift_signal:
+      type: feature_attribution_drift
+      target_dataset:
+         dataset:
+            input_dataset:
+               path: azureml:my_model_production_data:1
+               type: uri_folder
+            dataset_context: model_inputs_outputs
+      baseline_dataset:
+        input_dataset:
+          path: azureml:my_model_training_data:1
+          type: mltable
+        dataset_context: training
+        target_column_name: fraud_detected
+      model_type: classification
+      # if no metric_thresholds defined, use the default metric_thresholds
+      metric_thresholds:
+         threshold: 0.9
+
+  alert_notification:
+    emails:
+      - [email protected]
+      - [email protected]
diff --git a/cli/monitoring/components/custom_preprocessing/spec.yaml b/cli/monitoring/components/custom_preprocessing/spec.yaml
@@ -0,0 +1,53 @@
+$schema: http://azureml/sdk-2-0/SparkComponent.json
+type: spark
+
+name: custom_preprocessor
+display_name: My Custom Preprocessor
+description: Filters the data based on the window provided.
+version: 1.0.0
+is_deterministic: true
+
+code: ./src
+entry:
+  file: ./run.py
+
+inputs:
+  data_window_end:
+    type: string
+  data_window_start:
+    type: string
+  input_data:
+    type: uri_folder
+    mode: direct
+outputs:
+  preprocessed_input_data:
+    type: mltable
+    mode: direct
+conf:
+  spark.driver.cores: 1
+  spark.driver.memory: 2g
+  spark.executor.cores: 2
+  spark.executor.memory: 2g
+  spark.executor.instances: 1
+  spark.dynamicAllocation.enabled: True
+  spark.dynamicAllocation.minExecutors: 1
+  spark.dynamicAllocation.maxExecutors: 4
+  spark.synapse.library.python.env: |
+    channels:
+      - conda-forge
+    dependencies:
+      - python=3.8
+      - pip:
+        - scipy~=1.10.0
+        - numpy~=1.21.0
+        - pandas~=1.4.3
+        - azureml-mlflow~=1.49.0
+        - mltable~=1.3.0
+        - azureml-fsspec
+        - fsspec~=2023.4.0
+    name: momo-base-spark
+args: >-
+  --data_window_end ${{inputs.data_window_end}}
+  --data_window_start ${{inputs.data_window_start}}
+  --input_data ${{inputs.input_data}}
+  --preprocessed_input_data ${{outputs.preprocessed_input_data}}