forked from karolzak/cntk-hotel-pictures-classificator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathod_reader.py
230 lines (190 loc) · 10.1 KB
/
od_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import zipfile
import cv2 # pip install opencv-python
import numpy as np
import os
import pdb
DEBUG = False
if DEBUG:
import matplotlib.pyplot as mp
class ObjectDetectionReader:
def __init__(self, img_map_file, roi_map_file, max_annotations_per_image,
pad_width, pad_height, pad_value, randomize, use_flipping,
max_images=None, buffered_rpn_proposals=None):
self._pad_width = pad_width
self._pad_height = pad_height
self._pad_value = pad_value
self._randomize = randomize
self._use_flipping = use_flipping
self._flip_image = True # will be set to False in the first call to _reset_reading_order
self._buffered_rpn_proposals = buffered_rpn_proposals
self._img_file_paths = []
self._gt_annotations = []
self._num_images = self._parse_map_files(img_map_file, roi_map_file, max_annotations_per_image, max_images)
self._img_stats = [None for _ in range(self._num_images)]
self._reading_order = None
self._reading_index = -1
def get_next_input(self):
'''
Reads image data and return image, annotations and shape information
:return:
img_data - The image data in CNTK format. The image is scale to fit into the size given in the constructor, centered and padded.
roi_data - The ground truth annotations as numpy array of shape (max_annotations_per_image, 5), i.e. 4 coords + label per roi.
img_dims - (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
'''
index = self._get_next_image_index()
roi_data = self._get_gt_annotations(index)
if DEBUG:
img_data, img_dims, resized_with_pad = self._load_resize_and_pad_image(index)
self._debug_plot(resized_with_pad, roi_data)
else:
img_data, img_dims = self._load_resize_and_pad_image(index)
buffered_proposals = self._get_buffered_proposals(index)
return img_data, roi_data, img_dims, buffered_proposals
def sweep_end(self):
return self._reading_index >= self._num_images
def _debug_plot(self, img_data, roi_data):
color = (0, 255, 0)
thickness = 2
for rect in roi_data:
pt1 = tuple([int(float(x)) for x in rect[0:2]])
pt2 = tuple([int(float(x)) for x in rect[2:4]])
try:
cv2.rectangle(img_data, pt1, pt2, color, thickness)
except:
pdb.set_trace()
print("Unexpected error:", sys.exc_info()[0])
mp.imshow(img_data)
mp.plot()
mp.show()
def _parse_map_files(self, img_map_file, roi_map_file, max_annotations_per_image, max_images):
# read image map file and buffer sequence numbers
with open(img_map_file) as f:
img_map_lines = f.readlines()
img_map_lines = [line for line in img_map_lines if len(line) > 0]
if max_images is not None:
img_map_lines = img_map_lines[:max_images]
img_sequence_numbers = [int(x.split('\t')[0]) for x in img_map_lines]
img_base_path = os.path.dirname(os.path.abspath(img_map_file))
self._img_file_paths = [os.path.join(img_base_path, x.split('\t')[1]) for x in img_map_lines]
# read roi map file
with open(roi_map_file) as f:
roi_map_lines = f.readlines()
# TODO: check whether this avoids reading empty lines
roi_map_lines = [line for line in roi_map_lines if len(line) > 0]
if max_images is not None:
roi_map_lines = roi_map_lines[:max_images]
roi_sequence_numbers = []
for roi_line in roi_map_lines:
roi_sequence_numbers.append(int(roi_line[:roi_line.find(' ')]))
rest = roi_line[roi_line.find(' ')+1:]
bbox_input = rest[rest.find(' ')+1:]
bbox_floats = np.fromstring(bbox_input, dtype=np.float32, sep=' ')
num_floats = len(bbox_floats)
assert num_floats % 5 == 0, "Ground truth annotation file is corrupt. Lines must contain 4 coordinates and a label per roi."
annotations = np.zeros((max_annotations_per_image, 5))
num_annotations = int(num_floats / 5)
if num_annotations > max_annotations_per_image:
print('Warning: The number of ground truth annotations ({}) is larger than the provided maximum number ({}).'
.format(num_annotations, max_annotations_per_image))
bbox_floats = bbox_floats[:(max_annotations_per_image * 5)]
num_annotations = max_annotations_per_image
annotations[:num_annotations,:] = np.array(bbox_floats).reshape((num_annotations, 5))
self._gt_annotations.append(annotations)
# make sure sequence numbers match
assert len(img_sequence_numbers) == len(roi_sequence_numbers), "number of images and annotation lines do not match"
assert np.allclose(img_sequence_numbers, roi_sequence_numbers, 0, 0), "the sequence numbers in image and roi map files do not match"
return len(img_sequence_numbers)
def _reset_reading_order(self):
self._reading_order = np.arange(self._num_images)
if self._randomize:
np.random.shuffle(self._reading_order)
# if flipping should be used then we alternate between epochs from flipped to non-flipped
self._flip_image = not self._flip_image if self._use_flipping else False
self._reading_index = 0
def _read_image(self, image_path):
if "@" in image_path:
at = str.find(image_path, '@')
zip_file = image_path[:at]
img_name = image_path[(at + 2):]
archive = zipfile.ZipFile(zip_file, 'r')
imgdata = archive.read(img_name)
imgnp = np.array(bytearray(imgdata), dtype=np.uint8)
img = cv2.imdecode(imgnp, 1)
else:
img = cv2.imread(image_path)
return img
def _prepare_annotations_and_image_stats(self, index, img_width, img_height):
annotations = self._gt_annotations[index]
do_scale_w = img_width > img_height
target_w = self._pad_width
target_h = self._pad_height
if do_scale_w:
scale_factor = float(self._pad_width) / float(img_width)
target_h = int(np.round(img_height * scale_factor))
else:
scale_factor = float(self._pad_height) / float(img_height)
target_w = int(np.round(img_width * scale_factor))
top = int(max(0, np.round((self._pad_height - target_h) / 2)))
left = int(max(0, np.round((self._pad_width - target_w) / 2)))
bottom = self._pad_height - top - target_h
right = self._pad_width - left - target_w
xyxy = annotations[:, :4]
xyxy *= scale_factor
xyxy += (left, top, left, top)
# not needed since xyxy is just a reference: annotations[:, :4] = xyxy
# TODO: do we need to round/floor/ceil xyxy coords?
annotations[:, 0] = np.round(annotations[:, 0])
annotations[:, 1] = np.round(annotations[:, 1])
annotations[:, 2] = np.round(annotations[:, 2])
annotations[:, 3] = np.round(annotations[:, 3])
# keep image stats for scaling and padding images later
img_stats = [target_w, target_h, img_width, img_height, top, bottom, left, right]
self._img_stats[index] = img_stats
def _get_next_image_index(self):
if self._reading_index < 0 or self._reading_index >= self._num_images:
self._reset_reading_order()
next_image_index = self._reading_order[self._reading_index]
self._reading_index += 1
return next_image_index
def _load_resize_and_pad_image(self, index):
image_path = self._img_file_paths[index]
img = self._read_image(image_path)
if self._img_stats[index] is None:
img_width = len(img[0])
img_height = len(img)
self._prepare_annotations_and_image_stats(index, img_width, img_height)
target_w, target_h, img_width, img_height, top, bottom, left, right = self._img_stats[index]
resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST)
resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=self._pad_value)
if self._flip_image:
resized_with_pad = cv2.flip(resized_with_pad, 1)
# transpose(2,0,1) converts the image to the HWC format which CNTK accepts
model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
# dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height
dims = (self._pad_width, self._pad_height, target_w, target_h, img_width, img_height)
if DEBUG:
return model_arg_rep, dims, resized_with_pad
return model_arg_rep, dims
def _get_gt_annotations(self, index):
annotations = self._gt_annotations[index]
if self._flip_image:
flipped_annotations = np.array(annotations)
flipped_annotations[:,0] = self._pad_width - annotations[:,2] - 1
flipped_annotations[:,2] = self._pad_width - annotations[:,0] - 1
return flipped_annotations
return annotations
def _get_buffered_proposals(self, index):
if self._buffered_rpn_proposals is None:
return None
buffered_proposals = self._buffered_rpn_proposals[index]
if self._flip_image:
flipped_proposals = np.array(buffered_proposals, dtype=np.float32)
flipped_proposals[:,0] = self._pad_width - buffered_proposals[:,2] - 1
flipped_proposals[:,2] = self._pad_width - buffered_proposals[:,0] - 1
return flipped_proposals
return buffered_proposals