-
Notifications
You must be signed in to change notification settings - Fork 18
/
model_training.yaml
143 lines (133 loc) · 5.54 KB
/
model_training.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# train: (optional)
# trainable: (optional) Indicate the model is trainable. Default: False
# tested_platforms(optional list): platform on which this model can trained (current options: wml, ffdl, kubeflow)
# model_source: (Required for trainable)
# initial_model: (Required for trainable)
# data_store: (Required) datastore for the model code source
# bucket: (Required) Bucket that has the model code source
# path: (Required) Bucket path that has the model code source
# url: (Optional) Link to the model
# initial_model_local: (Optional)
# path: (Optional) Initial model code in the user local machine
# model_training_results: (Required for trainable)
# trained_model: (Required for trainable)
# data_store: (Required) datastore for the training result source
# bucket: (Required) Bucket that has the training result source
# path: (Required) Bucket path that has the training result source
# url: (Optional) Link to the model
# trained_model_local: (Optional)
# path: (Optional) Path to pull trained model in the user local machine
# data_source: (Optional)
# training_data: (Required for trainable)
# data_store: (Required) datastore for the model data source
# bucket: (Required) Bucket that has the model data source
# path: (Required) Bucket path that has the model data source
# url: (Optional) Link to the model
# training_data_local: (Optional)
# path: (Optional) Initial data files in the user local machine
# mount_type: (Optional) object storage mount type
# evaluation_metrics: (optional) Define the metrics for the training job.
# type: (Required) evaluation_metrics type
# in: (Required) Path to store the evaluation_metrics
# training_container_image: (Optional)
# container_image_url: (Optional) Custom training container image url
# container_store: (Optional) container_store for the custom training image
# execution: (Required for trainable)
# command: (Required) Entrypoint commands to execute model code
# name: (Required) T-shirt size for training on Watson Machine Learning
# nodes: (Required) Number of nodes needed for this training job. Default: 1
# training_params: (Optional) list of hyperparameters for the training model
# - (optional) list of key(param name):value(param value)
train:
trainable: true
tested_platforms:
- wml
- ffdl
model_source:
initial_model:
data_store: age_datastore
bucket: facial-age-estimator
path: 1.0/assets/
url: ""
initial_model_local:
path: /local/1.0/assets/
model_training_results:
trained_model:
data_store: age_datastore
bucket: facial-age-estimator
path: 1.0/assets/
url: ""
trained_model_local:
path: /local/1.0/assets/
data_source:
training_data:
data_store: age_datastore
bucket: facial-age-estimator
path: 1.0/assets/
training_data_url:
training_data_local:
path: /local/1.0/assets/
mount_type: mount_cos
evaluation_metrics:
type: tensorboard
in: "$JOB_STATE_DIR/logs/tb/test"
training_container_image:
container_image_url: tensorflow/tensorflow:latest-gpu-py3
container_store: container_store
execution:
command: python3 convolutional_network.py --trainImagesFile ${DATA_DIR}/train-images-idx3-ubyte.gz
--trainLabelsFile ${DATA_DIR}/train-labels-idx1-ubyte.gz --testImagesFile ${DATA_DIR}/t10k-images-idx3-ubyte.gz
--testLabelsFile ${DATA_DIR}/t10k-labels-idx1-ubyte.gz --learningRate 0.001 --trainingIters 20000
compute_configuration:
name: k80
nodes: 1
training_params:
- learning_rate:
- loss:
- batch_size:
- epoch:
- optimizer:
- xxx
- yyy
- train_op:
# process: (Optional)
# - name: (Required) Script Process name. Can mix any kind of process here
# params: (Optional) Free flowing list of key:value paisrs
# staging_dir: (Optional) Staging directory within the local machine
# trained_model_path: (Optional) trained model path within the object storage bucket
process:
- name: training_post_process
params:
key: value
staging_dir: training_output/
trained_model_path:
# data_stores: (Required for trainable)
# - name: (Required) name of the data_stores
# connection:
# endpoing: (Required) Object Storage endpoint URL or public Object Storage key link.
# access_key_id: (Required) Object Storage access_key_id
# secret_access_key: (Required) Object secret_access_key
data_stores:
- name: age_datastore
type: s3
connection:
endpoint: https://s3-api.us-geo.objectstorage.softlayer.net
access_key_id: xxxxxxxxxx
secret_access_key: xxxxxxxxxxxxx
# data_stores_file_paths: (Optional. To be used if there are multiple files, else the path field can be used as above) -
# - name: (Optional) name of any additional training file paths to assign. Should be referenced from a datastore if needed
# key: value
data_store_file_paths:
- name: training_file_paths
training_file_one: 2.0/assets/training_file_one
training_file_two: 2.0/assets/training_file_two
# container_stores: (Optional)
# - name: (Required) name of the container_store
# connection:
# container_registry: (Required) container registry for this container_store
# container_registry_token: (Required if container registry is private) container registry token
container_stores:
- name: container_store
connection:
container_registry: docker.io
container_registry_token: ""