-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
pipeline.yml
53 lines (47 loc) · 1.75 KB
/
pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
description: Pipeline using AutoML Text Classification Multilabel task
display_name: pipeline-with-text-classification-multilabel
experiment_name: pipeline-with-automl
settings:
default_compute: azureml:gpu-cluster
inputs:
text_classification_multilabel_training_data:
type: mltable
path: ./training-mltable-folder
text_classification_multilabel_validation_data:
type: mltable
path: ./validation-mltable-folder
jobs:
preprocessing_node:
type: command
component: file:./components/component_preprocessing.yaml
inputs:
train_data: ${{parent.inputs.text_classification_multilabel_training_data}}
validation_data: ${{parent.inputs.text_classification_multilabel_validation_data}}
outputs:
preprocessed_train_data:
type: mltable
preprocessed_validation_data:
type: mltable
text_classification_multilabel_node:
type: automl
task: text_classification_multilabel
log_verbosity: info
primary_metric: accuracy
limits:
max_trials: 1
timeout_minutes: 60
target_column_name: terms
training_data: ${{parent.jobs.preprocessing_node.outputs.preprocessed_train_data}}
validation_data: ${{parent.jobs.preprocessing_node.outputs.preprocessed_validation_data}}
# currently need to specify outputs "mlflow_model" explicitly to reference it in following nodes
outputs:
best_model:
type: mlflow_model
register_model_node:
type: command
component: file:./components/component_register_model.yaml
inputs:
model_input_path: ${{parent.jobs.text_classification_multilabel_node.outputs.best_model}}
model_base_name: paper_categorization_model