forked from karolzak/cntk-hotel-pictures-classificator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFasterRCNN.py
751 lines (632 loc) · 38.7 KB
/
FasterRCNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import numpy as np
import os, sys
import argparse
import yaml # pip install pyyaml
import easydict # pip install easydict
import cntk
import easydict
from cntk import Trainer, UnitType, load_model, Axis, input_variable, parameter, times, combine, \
softmax, roipooling, plus, element_times, CloneMethod, alias, Communicator, reduce_sum
from cntk.core import Value
from cntk.io import MinibatchData
from cntk.initializer import normal
from cntk.layers import placeholder, Constant, Sequential
from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_schedule
from cntk.logging import log_number_of_parameters, ProgressPrinter
from cntk.logging.graph import find_by_name, plot
from cntk.losses import cross_entropy_with_softmax
from cntk.metrics import classification_error
from _cntk_py import force_deterministic_algorithms
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, ".."))
from utils.rpn.rpn_helpers import create_rpn, create_proposal_target_layer
from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
from utils.map.map_helpers import evaluate_detections
from utils.annotations.annotations_helper import parse_class_map_file
from config import cfg
from od_mb_source import ObjectDetectionMinibatchSource
from cntk_helpers import regress_rois
###############################################################
###############################################################
mb_size = cfg["CNTK"].MB_SIZE
image_width = cfg["CNTK"].IMAGE_WIDTH
image_height = cfg["CNTK"].IMAGE_HEIGHT
num_channels = cfg["CNTK"].NUM_CHANNELS
# dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
dims_input_const = MinibatchData(Value(batch=np.asarray(
[image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False)
# Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114]
normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]])
globalvars = {}
globalvars['output_path'] = os.path.join(abs_path, "Output")
# dataset specific parameters
map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH)
globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE
globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE
globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE
globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE
globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE
epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES
num_test_images = cfg["CNTK"].NUM_TEST_IMAGES
# model specific parameters
model_folder = os.path.join(abs_path, "..", "..", "PretrainedModels")
base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE)
feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME
start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME
pool_node_name = cfg["CNTK"].POOL_NODE_NAME
last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME
roi_dim = cfg["CNTK"].ROI_DIM
###############################################################
###############################################################
def set_global_vars(use_arg_parser = True):
data_path = map_file_path
# set and overwrite learning parameters
globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS
globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS
globalvars['rnd_seed'] = cfg.RNG_SEED
globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E
if use_arg_parser:
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located',
required=False, default=data_path)
parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models',
required=False, default=None)
parser.add_argument('-logdir', '--logdir', help='Log file',
required=False, default=None)
parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int,
required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS)
parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int,
required=False, default=mb_size)
parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int,
required=False, default=epoch_size)
parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int,
required=False, default='32')
parser.add_argument('-r', '--restart',
help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
action='store_true')
parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device",
required=False, default=None)
parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False)
parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False)
parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False)
parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False)
parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False)
parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False)
parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False)
parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False)
parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False)
parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False)
args = vars(parser.parse_args())
if args['rpnLrFactor'] is not None:
globalvars['rpn_lr_factor'] = args['rpnLrFactor']
if args['frcnLrFactor'] is not None:
globalvars['frcn_lr_factor'] = args['frcnLrFactor']
if args['e2eLrFactor'] is not None:
globalvars['e2e_lr_factor'] = args['e2eLrFactor']
if args['momentumPerMb'] is not None:
globalvars['momentum_per_mb'] = args['momentumPerMb']
if args['e2eEpochs'] is not None:
globalvars['e2e_epochs'] = args['e2eEpochs']
if args['rpnEpochs'] is not None:
globalvars['rpn_epochs'] = args['rpnEpochs']
if args['frcnEpochs'] is not None:
globalvars['frcn_epochs'] = args['frcnEpochs']
if args['rndSeed'] is not None:
globalvars['rnd_seed'] = args['rndSeed']
if args['trainConv'] is not None:
globalvars['train_conv'] = True if args['trainConv']==1 else False
if args['trainE2E'] is not None:
globalvars['train_e2e'] = True if args['trainE2E']==1 else False
if args['outputdir'] is not None:
globalvars['output_path'] = args['outputdir']
if args['logdir'] is not None:
log_dir = args['logdir']
if args['device'] is not None:
# Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
if Communicator.rank() == 0:
cntk.device.try_set_default_device(cntk.device.gpu(args['device']))
else:
cntk.device.try_set_default_device(cntk.device.cpu())
if args['datadir'] is not None:
data_path = args['datadir']
if not os.path.isdir(data_path):
raise RuntimeError("Directory %s does not exist" % data_path)
globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file'])
globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file'])
globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file'])
globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file'])
globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file'])
if cfg["CNTK"].FORCE_DETERMINISTIC:
force_deterministic_algorithms()
np.random.seed(seed=globalvars['rnd_seed'])
globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
globalvars['num_classes'] = len(globalvars['classes'])
if cfg["CNTK"].DEBUG_OUTPUT:
# report args
print("Using the following parameters:")
print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED))
print("Train conv layers: {}".format(globalvars['train_conv']))
print("Random seed : {}".format(globalvars['rnd_seed']))
print("Momentum per MB : {}".format(globalvars['momentum_per_mb']))
if globalvars['train_e2e']:
print("E2E epochs : {}".format(globalvars['e2e_epochs']))
else:
print("RPN lr factor : {}".format(globalvars['rpn_lr_factor']))
print("RPN epochs : {}".format(globalvars['rpn_epochs']))
print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor']))
print("FRCN epochs : {}".format(globalvars['frcn_epochs']))
###############################################################
###############################################################
def clone_model(base_model, from_node_names, to_node_names, clone_method):
from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names]
if None in from_nodes:
print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}"
.format(from_node_names, from_nodes))
to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names]
if None in to_nodes:
print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}"
.format(to_node_names, to_nodes))
input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes]))
cloned_net = combine(to_nodes).clone(clone_method, input_placeholders)
return cloned_net
def clone_conv_layers(base_model):
if not globalvars['train_conv']:
conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
elif feature_node_name == start_train_conv_node_name:
conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone)
else:
fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name],
CloneMethod.freeze)
train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name],
CloneMethod.clone)
conv_layers = Sequential([fixed_conv_layers, train_conv_layers])
return conv_layers
# Please keep in sync with Readme.md
def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
# RCNN
roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
fc_out = fc_layers(roi_out)
# prediction head
W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')
# regression head
W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')
return cls_score, bbox_pred
# Please keep in sync with Readme.md
# Defines the Faster R-CNN network model for detecting objects in images
def create_faster_rcnn_predictor(base_model_file_name, features, scaled_gt_boxes, dims_input):
# Load the pre-trained classification net and clone layers
base_model = load_model(base_model_file_name)
conv_layers = clone_conv_layers(base_model)
fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], clone_method=CloneMethod.clone)
# Normalization and conv layers
feat_norm = features - normalization_const
conv_out = conv_layers(feat_norm)
# RPN and prediction targets
rpn_rois, rpn_losses = \
create_rpn(conv_out, scaled_gt_boxes, dims_input, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
rois, label_targets, bbox_targets, bbox_inside_weights = \
create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes=globalvars['num_classes'])
# Fast RCNN and losses
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
loss = rpn_losses + detection_losses
pred_error = classification_error(cls_score, label_targets, axis=1)
return loss, pred_error
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
# classification loss
cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)
p_cls_loss = placeholder()
p_rois = placeholder()
# The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
roi_indicator = reduce_sum(p_rois, axis=1)
cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
cls_normalization_factor = 1.0 / cls_num_terms
normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor
reduced_cls_loss = cntk.as_block(normalized_cls_loss,
[(p_cls_loss, cls_loss), (p_rois, rois)],
'Normalize', 'norm_cls_loss')
# regression loss
p_bbox_pred = placeholder()
p_bbox_targets = placeholder()
p_bbox_inside_weights = placeholder()
bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
# The bbox loss is normalized by the batch size
bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor
reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
[(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
'SmoothL1Loss', 'norm_bbox_loss')
detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")
return detection_losses
def create_eval_model(model, image_input, dims_input, rpn_model=None):
print("creating eval model")
conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
model_with_rpn = model if rpn_model is None else rpn_model
rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze)
rpn_rois = rpn(conv_out, dims_input)
roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze)
pred_net = roi_fc_layers(conv_out, rpn_rois)
cls_score = pred_net.outputs[0]
bbox_regr = pred_net.outputs[1]
if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
num_boxes = int(bbox_regr.shape[1] / 4)
bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes)
bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes)
bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr')
cls_pred = softmax(cls_score, axis=1, name='cls_pred')
eval_model = combine([cls_pred, rpn_rois, bbox_regr])
return eval_model
def train_model(image_input, roi_input, dims_input, loss, pred_error,
lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train,
rpn_rois_input=None, buffered_rpn_proposals=None):
if isinstance(loss, cntk.Variable):
loss = combine([loss])
params = loss.parameters
biases = [p for p in params if '.b' in p.name or 'b' == p.name]
others = [p for p in params if not p in biases]
bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
if cfg["CNTK"].DEBUG_OUTPUT:
print("biases")
for p in biases: print(p)
print("others")
for p in others: print(p)
print("bias_lr_mult: {}".format(bias_lr_mult))
# Instantiate the learners and the trainer object
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])
# Get minibatches of images and perform model training
print("Training model for %s epochs." % epochs_to_train)
log_number_of_parameters(loss)
# Create the minibatch source
od_minibatch_source = ObjectDetectionMinibatchSource(
globalvars['train_map_file'], globalvars['train_roi_file'],
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED,
max_images=cfg["CNTK"].NUM_TRAIN_IMAGES,
buffered_rpn_proposals=buffered_rpn_proposals)
# define mapping from reader streams to network inputs
input_map = {
od_minibatch_source.image_si: image_input,
od_minibatch_source.roi_si: roi_input,
od_minibatch_source.dims_si: dims_input
}
use_buffered_proposals = buffered_rpn_proposals is not None
progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
for epoch in range(epochs_to_train): # loop over epochs
sample_count = 0
while sample_count < epoch_size: # loop over minibatches in the epoch
data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map)
if use_buffered_proposals:
data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False)
# remove dims input if no rpn is required to avoid warnings
del data[[k for k in data if '[6]' in str(k)][0]]
trainer.train_minibatch(data) # update model with it
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
if sample_count % 100 == 0:
print("Processed {} samples".format(sample_count))
progress_printer.epoch_summary(with_metric=True)
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input):
num_images = cfg["CNTK"].NUM_TRAIN_IMAGES
# Create the minibatch source
od_minibatch_source = ObjectDetectionMinibatchSource(
globalvars['train_map_file'], globalvars['train_roi_file'],
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
max_images=num_images,
randomize=False, use_flipping=False)
# define mapping from reader streams to network inputs
input_map = {
od_minibatch_source.image_si: image_input,
od_minibatch_source.roi_si: roi_input,
od_minibatch_source.dims_si: dims_input
}
# setting pre- and post-nms top N to training values since buffered proposals are used for further training
test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N
buffered_proposals = [None for _ in range(num_images)]
sample_count = 0
while sample_count < num_images:
data = od_minibatch_source.next_minibatch(1, input_map=input_map)
output = rpn_model.eval(data)
out_dict = dict([(k.name, k) for k in output])
out_rpn_rois = output[out_dict['rpn_rois']][0]
buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16)
sample_count += 1
if sample_count % 500 == 0:
print("Buffered proposals for {} samples".format(sample_count))
# resetting config values to original test values
cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
cfg["TEST"].RPN_POST_NMS_TOP_N = test_post
return buffered_proposals
# Trains a Faster R-CNN model end-to-end
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False):
# Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
dims_node = alias(dims_input, name='dims_input')
# Instantiate the Faster R-CNN prediction model and loss function
loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node)
if debug_output:
print("Storing graphs and models to %s." % globalvars['output_path'])
plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE))
# Set learning parameters
e2e_lr_factor = globalvars['e2e_lr_factor']
e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE]
mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL))
print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled))
train_model(image_input, roi_input, dims_input, loss, pred_error,
e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs'])
return create_eval_model(loss, image_input, dims_input)
# Trains a Faster R-CNN model using 4-stage alternating training
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False):
'''
4-Step Alternating Training scheme from the Faster R-CNN paper:
# Create initial network, only rpn, without detection network
# --> train only the rpn (and conv3_1 and up for VGG16)
# buffer region proposals from rpn
# Create full network, initialize conv layers with imagenet, use buffered proposals
# --> train only detection network (and conv3_1 and up for VGG16)
# Keep conv weights from detection network and fix them
# --> train only rpn
# buffer region proposals from rpn
# Keep conv and rpn weights from step 3 and fix them
# --> train only detection network
'''
# Learning parameters
rpn_lr_factor = globalvars['rpn_lr_factor']
rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE]
frcn_lr_factor = globalvars['frcn_lr_factor']
frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE]
l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
mm_schedule = momentum_schedule(globalvars['momentum_per_mb'])
rpn_epochs = globalvars['rpn_epochs']
frcn_epochs = globalvars['frcn_epochs']
print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL))
print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled))
print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled))
if debug_output:
print("Storing graphs and models to %s." % globalvars['output_path'])
# Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()],
name=feature_node_name)
feat_norm = image_input - normalization_const
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
scaled_gt_boxes = alias(roi_input, name='roi_input')
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
dims_node = alias(dims_input, name='dims_input')
rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()])
rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois')
# base image classification model (e.g. VGG16 or AlexNet)
base_model = load_model(base_model_file_name)
print("stage 1a - rpn")
if True:
# Create initial network, only rpn, without detection network
# initial weights train?
# conv: base_model only conv3_1 and up
# rpn: init new yes
# frcn: - -
# conv layers
conv_layers = clone_conv_layers(base_model)
conv_out = conv_layers(feat_norm)
# RPN and losses
rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS)
stage1_rpn_network = combine([rpn_rois, rpn_losses])
# train
if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
print("stage 1a - buffering rpn proposals")
buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input)
print("stage 1b - frcn")
if True:
# Create full network, initialize conv layers with imagenet, fix rpn weights
# initial weights train?
# conv: base_model only conv3_1 and up
# rpn: stage1a rpn model no --> use buffered proposals
# frcn: base_model + new yes
# conv_layers
conv_layers = clone_conv_layers(base_model)
conv_out = conv_layers(feat_norm)
# use buffered proposals in target layer
rois, label_targets, bbox_targets, bbox_inside_weights = \
create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes'])
# Fast RCNN and losses
fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone)
cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers)
detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights)
pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error")
stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error])
# train
if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1)
buffered_proposals_s1 = None
print("stage 2a - rpn")
if True:
# Keep conv weights from detection network and fix them
# initial weights train?
# conv: stage1b frcn model no
# rpn: stage1a rpn model yes
# frcn: - -
# conv_layers
conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
# RPN and losses
rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone)
rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes)
rpn_rois = rpn_net.outputs[0]
rpn_losses = rpn_net.outputs[1]
stage2_rpn_network = combine([rpn_rois, rpn_losses])
# train
if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses,
rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs)
print("stage 2a - buffering rpn proposals")
buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input)
print("stage 2b - frcn")
if True:
# Keep conv and rpn weights from step 3 and fix them
# initial weights train?
# conv: stage2a rpn model no
# rpn: stage2a rpn model no --> use buffered proposals
# frcn: stage1b frcn model yes -
# conv_layers
conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
conv_out = conv_layers(image_input)
# Fast RCNN and losses
frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"],
["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone)
stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes)
detection_losses = stage2_frcn_network.outputs[3]
pred_error = stage2_frcn_network.outputs[4]
# train
if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE))
train_model(image_input, roi_input, dims_input, detection_losses, pred_error,
frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs,
rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2)
buffered_proposals_s2 = None
return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
def eval_faster_rcnn_mAP(eval_model):
img_map_file = globalvars['test_map_file']
roi_map_file = globalvars['test_roi_file']
classes = globalvars['classes']
image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
frcn_eval = eval_model(image_input, dims_input)
# Create the minibatch source
minibatch_source = ObjectDetectionMinibatchSource(
img_map_file, roi_map_file,
max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
randomize=False, use_flipping=False,
max_images=cfg["CNTK"].NUM_TEST_IMAGES)
# define mapping from reader streams to network inputs
input_map = {
minibatch_source.image_si: image_input,
minibatch_source.roi_si: roi_input,
minibatch_source.dims_si: dims_input
}
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])]
# evaluate test images and write netwrok output to file
print("Evaluating Faster R-CNN model for %s images." % num_test_images)
all_gt_infos = {key: [] for key in classes}
for img_i in range(0, num_test_images):
mb_data = minibatch_source.next_minibatch(1, input_map=input_map)
gt_row = mb_data[roi_input].asarray()
gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]
for cls_index, cls_name in enumerate(classes):
if cls_index == 0: continue
cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
'difficult': [False] * len(cls_gt_boxes),
'det': [False] * len(cls_gt_boxes)})
output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]})
out_dict = dict([(k.name, k) for k in output])
out_cls_pred = output[out_dict['cls_pred']][0]
out_rpn_rois = output[out_dict['rpn_rois']][0]
out_bbox_regr = output[out_dict['bbox_regr']][0]
labels = out_cls_pred.argmax(axis=1)
scores = out_cls_pred.max(axis=1)
regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())
labels.shape = labels.shape + (1,)
scores.shape = scores.shape + (1,)
coords_score_label = np.hstack((regressed_rois, scores, labels))
# shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
for cls_j in range(1, globalvars['num_classes']):
coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)
if (img_i+1) % 100 == 0:
print("Processed {} samples".format(img_i+1))
# calculate mAP
aps = evaluate_detections(all_boxes, all_gt_infos, classes,
nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD)
ap_list = []
for class_name in aps:
ap_list += [aps[class_name]]
print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name]))
meanAP = np.nanmean(ap_list)
print('Mean AP = {:.4f}'.format(meanAP))
return meanAP
# The main method trains and evaluates a Fast R-CNN model.
# If a trained model is already available it is loaded an no training will be performed (if MAKE_MODE=True).
if __name__ == '__main__':
running_locally = os.path.exists(map_file_path)
if running_locally:
os.chdir(map_file_path)
if not os.path.exists(os.path.join(abs_path, "Output")):
os.makedirs(os.path.join(abs_path, "Output"))
if not os.path.exists(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET)):
os.makedirs(os.path.join(abs_path, "Output", cfg["CNTK"].DATASET))
else:
# disable debug and plot outputs when running on GPU cluster
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].VISUALIZE_RESULTS = False
set_global_vars()
model_path = os.path.join(globalvars['output_path'], "faster_rcnn_eval_{}_{}.model"
.format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage"))
# Train only if no model exists yet
if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
print("Loading existing model from %s" % model_path)
eval_model = load_model(model_path)
else:
if globalvars['train_e2e']:
eval_model = train_faster_rcnn_e2e(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
else:
eval_model = train_faster_rcnn_alternating(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT)
eval_model.save(model_path)
if cfg["CNTK"].DEBUG_OUTPUT:
plot(eval_model, os.path.join(globalvars['output_path'], "graph_frcn_eval_{}_{}.{}"
.format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE)))
print("Stored eval model at %s" % model_path)
# Compute mean average precision on test set
eval_faster_rcnn_mAP(eval_model)
# Plot results on test set
if cfg["CNTK"].VISUALIZE_RESULTS:
from plot_helpers import eval_and_plot_faster_rcnn
num_eval = min(num_test_images, 100)
img_shape = (num_channels, image_height, image_width)
results_folder = os.path.join(globalvars['output_path'], cfg["CNTK"].DATASET)
eval_and_plot_faster_rcnn(eval_model, num_eval, globalvars['test_map_file'], img_shape,
results_folder, feature_node_name, globalvars['classes'],
drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS,
nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD)