-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathyolo_head.py
72 lines (51 loc) · 2.33 KB
/
yolo_head.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
from scipy.special import expit
def yolo_head(predictions, num_classes, input_dims):
"""
YOLO Head to process predictions from Darknet
:param num_classes: Total number of classes
:param input_dims: Input dimensions of the image
:param predictions: A list of three tensors with shape (N, 19, 19, 255), (N,38, 38, 255) and (N, 76, 76, 255)
:return: A tensor with the shape (N, num_boxes, 85)
"""
anchors = [
[[116, 90], [156, 198], [373, 326]],
[[30, 61], [62, 45], [59, 119]],
[[10, 13], [16, 30], [33, 23]]
]
results = []
for i, prediction in enumerate(predictions):
results.append(_yolo_head(prediction, num_classes, anchors[i], input_dims))
return np.concatenate(results, axis=1)
def _yolo_head(prediction, num_classes, anchors, input_dims):
batch_size = np.shape(prediction)[0]
stride = input_dims[0] // np.shape(prediction)[1]
grid_size = input_dims[0] // stride
num_anchors = len(anchors)
prediction = np.reshape(prediction,
(batch_size, num_anchors * grid_size * grid_size, num_classes + 5))
box_xy = expit(prediction[:, :, :2]) # t_x (box x and y coordinates)
objectness = expit(prediction[:, :, 4]) # p_o (objectness score)
objectness = np.expand_dims(objectness, 2) # To make the same number of values for axis 0 and 1
grid = np.arange(grid_size)
a, b = np.meshgrid(grid, grid)
x_offset = np.reshape(a, (-1, 1))
y_offset = np.reshape(b, (-1, 1))
x_y_offset = np.concatenate((x_offset, y_offset), axis=1)
x_y_offset = np.tile(x_y_offset, (1, num_anchors))
x_y_offset = np.reshape(x_y_offset, (-1, 2))
x_y_offset = np.expand_dims(x_y_offset, 0)
box_xy += x_y_offset
# Log space transform of the height and width
anchors = [(a[0] / stride, a[1] / stride) for a in anchors]
anchors = np.tile(anchors, (grid_size * grid_size, 1))
anchors = np.expand_dims(anchors, 0)
box_wh = np.exp(prediction[:, :, 2:4]) * anchors
# Sigmoid class scores
class_scores = expit(prediction[:, :, 5:])
# Resize detection map back to the input image size
box_xy *= stride
box_wh *= stride
# Convert centoids to top left coordinates
box_xy -= box_wh / 2
return np.concatenate([box_xy, box_wh, objectness, class_scores], axis=2)