diff --git a/docs/structure.py b/docs/structure.py index 83cf83a92..a11113c2c 100644 --- a/docs/structure.py +++ b/docs/structure.py @@ -41,7 +41,7 @@ boxes.to_center_form, boxes.to_one_hot, boxes.to_normalized_coordinates, - boxes.to_point_form + boxes.to_corner_form ], }, diff --git a/examples/object_detection/boxes.py b/examples/object_detection/boxes.py new file mode 100644 index 000000000..b5ae1c449 --- /dev/null +++ b/examples/object_detection/boxes.py @@ -0,0 +1,40 @@ +import numpy as np +from paz.backend.boxes import compute_ious, to_corner_form + + +def match(boxes, prior_boxes, iou_threshold=0.5): + """Matches each prior box with a ground truth box (box from `boxes`). + It then selects which matched box will be considered positive e.g. iou > .5 + and returns for each prior box a ground truth box that is either positive + (with a class argument different than 0) or negative. + + # Arguments + boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`, + where the first the first four coordinates correspond to + box coordinates and the last coordinates is the class + argument. This boxes should be the ground truth boxes. + prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`. + where the four coordinates are in center form coordinates. + iou_threshold: Float between [0, 1]. Intersection over union + used to determine which box is considered a positive box. + + # Returns + numpy array of shape `(num_prior_boxes, 4 + 1)`. + where the first the first four coordinates correspond to point + form box coordinates and the last coordinates is the class + argument. + """ + ious = compute_ious(boxes, to_corner_form(np.float32(prior_boxes))) + per_prior_which_box_iou = np.max(ious, axis=0) + per_prior_which_box_arg = np.argmax(ious, 0) + + # overwriting per_prior_which_box_arg if they are the best prior box + per_box_which_prior_arg = np.argmax(ious, 1) + per_prior_which_box_iou[per_box_which_prior_arg] = 2 + for box_arg in range(len(per_box_which_prior_arg)): + best_prior_box_arg = per_box_which_prior_arg[box_arg] + per_prior_which_box_arg[best_prior_box_arg] = box_arg + + matches = boxes[per_prior_which_box_arg] + matches[per_prior_which_box_iou < iou_threshold, 4] = 0 + return matches diff --git a/examples/object_detection/debugger.py b/examples/object_detection/debugger.py index da322c553..7269c8a79 100644 --- a/examples/object_detection/debugger.py +++ b/examples/object_detection/debugger.py @@ -1,12 +1,20 @@ +import tensorflow as tf +gpus = tf.config.experimental.list_physical_devices('GPU') +tf.config.experimental.set_memory_growth(gpus[0], True) + + import numpy as np from paz.models import SSD300 from paz.datasets import VOC from paz.abstract import Processor, SequentialProcessor from paz import processors as pr -from paz.pipelines import AugmentDetection +from detection import AugmentDetection +# from paz.pipelines import AugmentDetection + class ShowBoxes(Processor): - def __init__(self, class_names, prior_boxes, variances=[.1, .2]): + def __init__(self, class_names, prior_boxes, + variances=[0.1, 0.1, 0.2, 0.2]): super(ShowBoxes, self).__init__() self.deprocess_boxes = SequentialProcessor([ pr.DecodeBoxes(prior_boxes, variances), @@ -15,8 +23,10 @@ def __init__(self, class_names, prior_boxes, variances=[.1, .2]): self.denormalize_boxes2D = pr.DenormalizeBoxes2D() self.draw_boxes2D = pr.DrawBoxes2D(class_names) self.show_image = pr.ShowImage() + self.resize_image = pr.ResizeImage((600, 600)) def call(self, image, boxes): + image = self.resize_image(image) boxes2D = self.deprocess_boxes(boxes) boxes2D = self.denormalize_boxes2D(image, boxes2D) image = self.draw_boxes2D(image, boxes2D) @@ -35,7 +45,8 @@ def call(self, image, boxes): data = data_manager.load_data() class_names = data_manager.class_names -model = SSD300(base_weights='VGG', head_weights=None) +# model = SSD300(base_weights='VGG', head_weights=None) +model = SSD300() prior_boxes = model.prior_boxes testor_encoder = AugmentDetection(prior_boxes) diff --git a/examples/object_detection/demo.py b/examples/object_detection/demo.py index 851a9f5bc..5787f463d 100644 --- a/examples/object_detection/demo.py +++ b/examples/object_detection/demo.py @@ -2,6 +2,11 @@ from paz.pipelines import SSD300FAT, SSD300VOC, SSD512COCO, SSD512YCBVideo from paz.backend.camera import VideoPlayer, Camera +import tensorflow as tf +gpus = tf.config.experimental.list_physical_devices('GPU') +tf.config.experimental.set_memory_growth(gpus[0], True) + + parser = argparse.ArgumentParser(description='SSD object detection demo') diff --git a/examples/object_detection/detection.py b/examples/object_detection/detection.py new file mode 100644 index 000000000..745da9078 --- /dev/null +++ b/examples/object_detection/detection.py @@ -0,0 +1,113 @@ + +from paz import processors as pr +from paz.abstract import SequentialProcessor +from processors import MatchBoxes + + +class AugmentImage(SequentialProcessor): + """Augments an RGB image by randomly changing contrast, brightness + saturation and hue. + """ + def __init__(self): + super(AugmentImage, self).__init__() + self.add(pr.RandomContrast()) + self.add(pr.RandomBrightness()) + self.add(pr.RandomSaturation(0.7)) + self.add(pr.RandomHue()) + + +class PreprocessImage(SequentialProcessor): + """Preprocess RGB image by resizing it to the given ``shape``. If a + ``mean`` is given it is substracted from image and it not the image gets + normalized. + + # Arguments + shape: List of two Ints. + mean: List of three Ints indicating the per-channel mean to be + subtracted. + """ + def __init__(self, shape, mean=pr.BGR_IMAGENET_MEAN): + super(PreprocessImage, self).__init__() + self.add(pr.ResizeImage(shape)) + self.add(pr.CastImage(float)) + if mean is None: + self.add(pr.NormalizeImage()) + else: + self.add(pr.SubtractMeanImage(mean)) + + +class AugmentBoxes(SequentialProcessor): + """Perform data augmentation with bounding boxes. + + # Arguments + mean: List of three elements used to fill empty image spaces. + """ + def __init__(self, mean=pr.BGR_IMAGENET_MEAN): + super(AugmentBoxes, self).__init__() + self.add(pr.ToImageBoxCoordinates()) + self.add(pr.Expand(mean=mean)) + # RandomSampleCrop was commented out + self.add(pr.RandomSampleCrop()) + self.add(pr.RandomFlipBoxesLeftRight()) + self.add(pr.ToNormalizedBoxCoordinates()) + + +class PreprocessBoxes(SequentialProcessor): + """Preprocess bounding boxes + + # Arguments + num_classes: Int. + prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing + prior/default bounding boxes. + IOU: Float. Intersection over union used to match boxes. + variances: List of two floats indicating variances to be encoded + for encoding bounding boxes. + """ + def __init__(self, num_classes, prior_boxes, IOU, variances): + super(PreprocessBoxes, self).__init__() + self.add(MatchBoxes(prior_boxes, IOU),) + self.add(pr.EncodeBoxes(prior_boxes, variances)) + self.add(pr.BoxClassToOneHotVector(num_classes)) + + +class AugmentDetection(SequentialProcessor): + """Augment boxes and images for object detection. + + # Arguments + prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing + prior/default bounding boxes. + split: Flag from `paz.processors.TRAIN`, ``paz.processors.VAL`` + or ``paz.processors.TEST``. Certain transformations would take + place depending on the flag. + num_classes: Int. + size: Int. Image size. + mean: List of three elements indicating the per channel mean. + IOU: Float. Intersection over union used to match boxes. + variances: List of two floats indicating variances to be encoded + for encoding bounding boxes. + """ + def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300, + mean=pr.BGR_IMAGENET_MEAN, IOU=.5, + variances=[0.1, 0.1, 0.2, 0.2]): + super(AugmentDetection, self).__init__() + # image processors + self.augment_image = AugmentImage() + self.augment_image.add(pr.ConvertColorSpace(pr.RGB2BGR)) + self.preprocess_image = PreprocessImage((size, size), mean) + + # box processors + self.augment_boxes = AugmentBoxes() + args = (num_classes, prior_boxes, IOU, variances) + self.preprocess_boxes = PreprocessBoxes(*args) + + # pipeline + self.add(pr.UnpackDictionary(['image', 'boxes'])) + self.add(pr.ControlMap(pr.LoadImage(), [0], [0])) + if split == pr.TRAIN: + self.add(pr.ControlMap(self.augment_image, [0], [0])) + self.add(pr.ControlMap(self.augment_boxes, [0, 1], [0, 1])) + self.add(pr.ControlMap(self.preprocess_image, [0], [0])) + self.add(pr.ControlMap(self.preprocess_boxes, [1], [1])) + self.add(pr.SequenceWrapper( + {0: {'image': [size, size, 3]}}, + {1: {'boxes': [len(prior_boxes), 4 + num_classes]}})) diff --git a/examples/object_detection/processors.py b/examples/object_detection/processors.py new file mode 100644 index 000000000..58d98fda2 --- /dev/null +++ b/examples/object_detection/processors.py @@ -0,0 +1,22 @@ +from paz.abstract import Processor +from boxes import match + + +class MatchBoxes(Processor): + """Match prior boxes with ground truth boxes. + + # Arguments + prior_boxes: Numpy array of shape (num_boxes, 4). + iou: Float in [0, 1]. Intersection over union in which prior boxes + will be considered positive. A positive box is box with a class + different than `background`. + variance: List of two floats. + """ + def __init__(self, prior_boxes, iou=.5): + self.prior_boxes = prior_boxes + self.iou = iou + super(MatchBoxes, self).__init__() + + def call(self, boxes): + boxes = match(boxes, self.prior_boxes, self.iou) + return boxes diff --git a/examples/object_detection/train.py b/examples/object_detection/train.py index b0b98d07b..239ff87d7 100644 --- a/examples/object_detection/train.py +++ b/examples/object_detection/train.py @@ -1,10 +1,17 @@ import os import argparse +import tensorflow as tf +gpus = tf.config.experimental.list_physical_devices('GPU') +tf.config.experimental.set_memory_growth(gpus[0], True) +# from tensorflow.python.framework.ops import disable_eager_execution +# disable_eager_execution() +# import tensorflow as tf +# tf.compat.v1.experimental.output_all_intermediates(True) from tensorflow.keras.optimizers import SGD from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint from paz.optimization.callbacks import LearningRateScheduler -from paz.pipelines import AugmentDetection +from detection import AugmentDetection from paz.models import SSD300 from paz.datasets import VOC from paz.optimization import MultiBoxLoss @@ -17,9 +24,7 @@ parser = argparse.ArgumentParser(description=description) parser.add_argument('-bs', '--batch_size', default=32, type=int, help='Batch size for training') -parser.add_argument('-st', '--steps_per_epoch', default=1000, type=int, - help='Batch size for training') -parser.add_argument('-et', '--evaluation_period', default=1, type=int, +parser.add_argument('-et', '--evaluation_period', default=10, type=int, help='evaluation frequency') parser.add_argument('-lr', '--learning_rate', default=0.001, type=float, help='Initial learning rate for SGD') @@ -27,7 +32,7 @@ help='Momentum for SGD') parser.add_argument('-g', '--gamma_decay', default=0.1, type=float, help='Gamma decay for learning rate scheduler') -parser.add_argument('-e', '--num_epochs', default=120, type=int, +parser.add_argument('-e', '--num_epochs', default=240, type=int, help='Maximum number of epochs before finishing') parser.add_argument('-iou', '--AP_IOU', default=0.5, type=float, help='Average precision IOU used for evaluation') @@ -36,7 +41,7 @@ parser.add_argument('-dp', '--data_path', default='VOCdevkit/', type=str, help='Path for writing model weights and logs') parser.add_argument('-se', '--scheduled_epochs', nargs='+', type=int, - default=[55, 76], help='Epochs for reducing learning rate') + default=[110, 152], help='Epoch learning rate reduction') parser.add_argument('-mp', '--multiprocessing', default=False, type=bool, help='Select True for multiprocessing') parser.add_argument('-w', '--workers', default=1, type=int, @@ -100,9 +105,8 @@ args.AP_IOU) # training -model.fit_generator( +model.fit( sequencers[0], - steps_per_epoch=args.steps_per_epoch, epochs=args.num_epochs, verbose=1, callbacks=[checkpoint, log, schedule, evaluate], diff --git a/examples/tutorials/object_detection_pipeline.py b/examples/tutorials/object_detection_pipeline.py index a3486edbc..82b6625bf 100644 --- a/examples/tutorials/object_detection_pipeline.py +++ b/examples/tutorials/object_detection_pipeline.py @@ -102,7 +102,8 @@ def __init__(self, num_classes, prior_boxes, IOU, variances): # Putting everything together in a single processor: class AugmentDetection(SequentialProcessor): def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300, - mean=pr.BGR_IMAGENET_MEAN, IOU=.5, variances=[.1, .2]): + mean=pr.BGR_IMAGENET_MEAN, IOU=.5, + variances=[0.1, 0.1, 0.2, 0.2]): super(AugmentDetection, self).__init__() # image processors diff --git a/paz/backend/boxes.py b/paz/backend/boxes.py index e8c069976..51da143f0 100644 --- a/paz/backend/boxes.py +++ b/paz/backend/boxes.py @@ -1,57 +1,8 @@ import numpy as np -def compute_iou(box, boxes): - """Calculates the intersection over union between 'box' and all 'boxes'. - Both `box` and `boxes` are in corner coordinates. - - # Arguments - box: Numpy array with length at least of 4. - boxes: Numpy array with shape `(num_boxes, 4)`. - - # Returns - Numpy array of shape `(num_boxes, 1)`. - """ - - x_min_A, y_min_A, x_max_A, y_max_A = box[:4] - x_min_B, y_min_B = boxes[:, 0], boxes[:, 1] - x_max_B, y_max_B = boxes[:, 2], boxes[:, 3] - # calculating the intersection - inner_x_min = np.maximum(x_min_B, x_min_A) - inner_y_min = np.maximum(y_min_B, y_min_A) - inner_x_max = np.minimum(x_max_B, x_max_A) - inner_y_max = np.minimum(y_max_B, y_max_A) - inner_w = np.maximum((inner_x_max - inner_x_min), 0) - inner_h = np.maximum((inner_y_max - inner_y_min), 0) - intersection_area = inner_w * inner_h - # calculating the union - box_area_B = (x_max_B - x_min_B) * (y_max_B - y_min_B) - box_area_A = (x_max_A - x_min_A) * (y_max_A - y_min_A) - union_area = box_area_A + box_area_B - intersection_area - intersection_over_union = intersection_area / union_area - return intersection_over_union - - -def compute_ious(boxes_A, boxes_B): - """Calculates the intersection over union between `boxes_A` and `boxes_B`. - For each box present in the rows of `boxes_A` it calculates - the intersection over union with respect to all boxes in `boxes_B`. - The variables `boxes_A` and `boxes_B` contain the corner coordinates - of the left-top corner `(x_min, y_min)` and the right-bottom - `(x_max, y_max)` corner. - - # Arguments - boxes_A: Numpy array with shape `(num_boxes_A, 4)`. - boxes_B: Numpy array with shape `(num_boxes_B, 4)`. - - # Returns - Numpy array of shape `(num_boxes_A, num_boxes_B)`. - """ - return np.apply_along_axis(compute_iou, 1, boxes_A, boxes_B) - - -def to_point_form(boxes): - """Transform from center coordinates to corner coordinates. +def to_center_form(boxes): + """Transform from corner coordinates to center coordinates. # Arguments boxes: Numpy array with shape `(num_boxes, 4)`. @@ -59,18 +10,17 @@ def to_point_form(boxes): # Returns Numpy array with shape `(num_boxes, 4)`. """ - center_x, center_y = boxes[:, 0], boxes[:, 1] - width, height = boxes[:, 2], boxes[:, 3] - x_min = center_x - (width / 2.0) - x_max = center_x + (width / 2.0) - y_min = center_y - (height / 2.0) - y_max = center_y + (height / 2.0) - return np.concatenate([x_min[:, None], y_min[:, None], - x_max[:, None], y_max[:, None]], axis=1) + x_min, y_min = boxes[:, 0:1], boxes[:, 1:2] + x_max, y_max = boxes[:, 2:3], boxes[:, 3:4] + center_x = (x_max + x_min) / 2.0 + center_y = (y_max + y_min) / 2.0 + W = x_max - x_min + H = y_max - y_min + return np.concatenate([center_x, center_y, W, H], axis=1) -def to_center_form(boxes): - """Transform from corner coordinates to center coordinates. +def to_corner_form(boxes): + """Transform from center coordinates to corner coordinates. # Arguments boxes: Numpy array with shape `(num_boxes, 4)`. @@ -78,17 +28,16 @@ def to_center_form(boxes): # Returns Numpy array with shape `(num_boxes, 4)`. """ - x_min, y_min = boxes[:, 0], boxes[:, 1] - x_max, y_max = boxes[:, 2], boxes[:, 3] - center_x = (x_max + x_min) / 2. - center_y = (y_max + y_min) / 2. - width = x_max - x_min - height = y_max - y_min - return np.concatenate([center_x[:, None], center_y[:, None], - width[:, None], height[:, None]], axis=1) + center_x, center_y = boxes[:, 0:1], boxes[:, 1:2] + W, H = boxes[:, 2:3], boxes[:, 3:4] + x_min = center_x - (W / 2.0) + x_max = center_x + (W / 2.0) + y_min = center_y - (H / 2.0) + y_max = center_y + (H / 2.0) + return np.concatenate([x_min, y_min, x_max, y_max], axis=1) -def encode(matched, priors, variances): +def encode(matched, priors, variances=[0.1, 0.1, 0.2, 0.2]): """Encode the variances from the priorbox layers into the ground truth boxes we have matched (based on jaccard overlap) with the prior boxes. @@ -102,19 +51,23 @@ def encode(matched, priors, variances): # Returns encoded boxes: Numpy array of shape `(num_priors, 4)`. """ - - # dist b/t match center and prior's center - g_cxcy = (matched[:, :2] + matched[:, 2:4]) / 2.0 - priors[:, :2] - # encode variance - g_cxcy /= (variances[0] * priors[:, 2:4]) - # match wh / prior wh - g_wh = (matched[:, 2:4] - matched[:, :2]) / priors[:, 2:4] - g_wh = np.log(np.abs(g_wh) + 1e-4) / variances[1] - # return target for smooth_l1_loss - return np.concatenate([g_cxcy, g_wh, matched[:, 4:]], 1) # [num_priors,4] - - -def decode(predictions, priors, variances): + boxes = matched[:, :4] + boxes = to_center_form(boxes) + center_difference_x = boxes[:, 0:1] - priors[:, 0:1] + encoded_center_x = center_difference_x / priors[:, 2:3] + center_difference_y = boxes[:, 1:2] - priors[:, 1:2] + encoded_center_y = center_difference_y / priors[:, 3:4] + encoded_center_x = encoded_center_x / variances[0] + encoded_center_y = encoded_center_y / variances[1] + encoded_W = np.log((boxes[:, 2:3] / priors[:, 2:3]) + 1e-8) + encoded_H = np.log((boxes[:, 3:4] / priors[:, 3:4]) + 1e-8) + encoded_W = encoded_W / variances[2] + encoded_H = encoded_H / variances[3] + encoded_boxes = [encoded_center_x, encoded_center_y, encoded_W, encoded_H] + return np.concatenate(encoded_boxes + [matched[:, 4:]], axis=1) + + +def decode(predictions, priors, variances=[0.1, 0.1, 0.2, 0.2]): """Decode default boxes into the ground truth boxes # Arguments @@ -125,33 +78,106 @@ def decode(predictions, priors, variances): # Returns decoded boxes: Numpy array of shape `(num_priors, 4)`. """ - - boxes = np.concatenate(( - priors[:, :2] + predictions[:, :2] * variances[0] * priors[:, 2:4], - priors[:, 2:4] * np.exp(predictions[:, 2:4] * variances[1])), 1) - boxes[:, :2] = boxes[:, :2] - (boxes[:, 2:4] / 2.0) - boxes[:, 2:4] = boxes[:, 2:4] + boxes[:, :2] + center_x = predictions[:, 0:1] * priors[:, 2:3] * variances[0] + center_x = center_x + priors[:, 0:1] + center_y = predictions[:, 1:2] * priors[:, 3:4] * variances[1] + center_y = center_y + priors[:, 1:2] + W = priors[:, 2:3] * np.exp(predictions[:, 2:3] * variances[2]) + H = priors[:, 3:4] * np.exp(predictions[:, 3:4] * variances[3]) + boxes = np.concatenate([center_x, center_y, W, H], axis=1) + boxes = to_corner_form(boxes) return np.concatenate([boxes, predictions[:, 4:]], 1) + + +def compute_ious(boxes_A, boxes_B): + """Calculates the intersection over union between `boxes_A` and `boxes_B`. + For each box present in the rows of `boxes_A` it calculates + the intersection over union with respect to all boxes in `boxes_B`. + The variables `boxes_A` and `boxes_B` contain the corner coordinates + of the left-top corner `(x_min, y_min)` and the right-bottom + `(x_max, y_max)` corner. + + # Arguments + boxes_A: Numpy array with shape `(num_boxes_A, 4)`. + boxes_B: Numpy array with shape `(num_boxes_B, 4)`. + + # Returns + Numpy array of shape `(num_boxes_A, num_boxes_B)`. + """ + xy_min = np.maximum(boxes_A[:, None, 0:2], boxes_B[:, 0:2]) + xy_max = np.minimum(boxes_A[:, None, 2:4], boxes_B[:, 2:4]) + intersection = np.maximum(0.0, xy_max - xy_min) + intersection_area = intersection[:, :, 0] * intersection[:, :, 1] + areas_A = (boxes_A[:, 2] - boxes_A[:, 0]) * (boxes_A[:, 3] - boxes_A[:, 1]) + areas_B = (boxes_B[:, 2] - boxes_B[:, 0]) * (boxes_B[:, 3] - boxes_B[:, 1]) + # broadcasting for outer sum i.e. a sum of all possible combinations + union_area = (areas_A[:, np.newaxis] + areas_B) - intersection_area + union_area = np.maximum(union_area, 1e-8) + return np.clip(intersection_area / union_area, 0.0, 1.0) + + +def compute_max_matches(boxes, prior_boxes): + iou_matrix = compute_ious(prior_boxes, boxes) + per_prior_which_box_iou = np.max(iou_matrix, axis=1) + per_prior_which_box_arg = np.argmax(iou_matrix, axis=1) + return per_prior_which_box_iou, per_prior_which_box_arg + + +def get_matches_masks(boxes, prior_boxes, positive_iou=0.5, negative_iou=0.4): + prior_boxes = to_corner_form(prior_boxes) + max_matches = compute_max_matches(boxes, prior_boxes) + per_prior_which_box_iou, per_prior_which_box_arg = max_matches + positive_mask = np.greater_equal(per_prior_which_box_iou, positive_iou) + negative_mask = np.less(per_prior_which_box_iou, negative_iou) + not_ignoring_mask = np.logical_or(positive_mask, negative_mask) + # ignoring mask are all masks not positive or negative + ignoring_mask = np.logical_not(not_ignoring_mask) + return per_prior_which_box_arg, positive_mask, ignoring_mask + + +def mask_classes(boxes, positive_mask, ignoring_mask): + class_indices = boxes[:, 4] + negative_mask = np.not_equal(positive_mask, 1.0) + class_indices = np.where(negative_mask, 0.0, class_indices) + # ignoring_mask = np.equal(ignoring_mask, 1.0) + # class_indices = np.where(ignoring_mask, -1.0, class_indices) + class_indices = np.expand_dims(class_indices, axis=-1) + boxes[:, 4:5] = class_indices return boxes -def reversed_argmax(array, axis): - """Copycat of function torch.max(). In case of multiple occurrences of - the maximum values, the indices corresponding to the last - occurrence are returned. +def match(boxes, prior_boxes, positive_iou=0.5, negative_iou=0.0): + """Matches each prior box with a ground truth box (box from `boxes`). + It then selects which matched box will be considered positive e.g. iou > .5 + and returns for each prior box a ground truth box that is either positive + (with a class argument different than 0) or negative. # Arguments - array: Numpy array. - axis: int, argmax operation along this specified axis. + boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`, + where the first the first four coordinates correspond to + box coordinates and the last coordinates is the class + argument. This boxes should be the ground truth boxes. + prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`. + where the four coordinates are in center form coordinates. + positive_iou: Float between [0, 1]. Intersection over union + used to determine which box is considered a positive box. + negative_iou: Float between [0, 1]. Intersection over union + used to determine which box is considered a negative box. # Returns - index_array : Numpy array of ints. + numpy array of shape `(num_prior_boxes, 4 + 1)`. + where the first the first four coordinates correspond to point + form box coordinates and the last coordinates is the class + argument. """ - array_flip = np.flip(array, axis=axis) - return array.shape[axis] - np.argmax(array_flip, axis=axis) - 1 + matches = get_matches_masks(boxes, prior_boxes, positive_iou, negative_iou) + per_prior_box_which_box_arg, positive_mask, ignoring_mask = matches + matched_boxes = np.take(boxes, per_prior_box_which_box_arg, axis=0) + matched_boxes = mask_classes(matched_boxes, positive_mask, ignoring_mask) + return matched_boxes -def match(boxes, prior_boxes, iou_threshold=0.5): +def match2(boxes, prior_boxes, iou_threshold=0.5): """Matches each prior box with a ground truth box (box from `boxes`). It then selects which matched box will be considered positive e.g. iou > .5 and returns for each prior box a ground truth box that is either positive @@ -173,23 +199,53 @@ def match(boxes, prior_boxes, iou_threshold=0.5): form box coordinates and the last coordinates is the class argument. """ - ious = compute_ious(boxes, to_point_form(np.float32(prior_boxes))) - best_box_iou_per_prior_box = np.max(ious, axis=0) - - best_box_arg_per_prior_box = reversed_argmax(ious, 0) - best_prior_box_arg_per_box = reversed_argmax(ious, 1) - - best_box_iou_per_prior_box[best_prior_box_arg_per_box] = 2 - # overwriting best_box_arg_per_prior_box if they are the best prior box - for box_arg in range(len(best_prior_box_arg_per_box)): - best_prior_box_arg = best_prior_box_arg_per_box[box_arg] - best_box_arg_per_prior_box[best_prior_box_arg] = box_arg - matches = boxes[best_box_arg_per_prior_box] - # setting class value to 0 (background argument) - matches[best_box_iou_per_prior_box < iou_threshold, 4] = 0 + ious = compute_ious(boxes, to_corner_form(np.float32(prior_boxes))) + per_prior_which_box_iou = np.max(ious, axis=0) + per_prior_which_box_arg = np.argmax(ious, 0) + + # overwriting per_prior_which_box_arg if they are the best prior box + per_box_which_prior_arg = np.argmax(ious, 1) + per_prior_which_box_iou[per_box_which_prior_arg] = 2 + for box_arg in range(len(per_box_which_prior_arg)): + best_prior_box_arg = per_box_which_prior_arg[box_arg] + per_prior_which_box_arg[best_prior_box_arg] = box_arg + + matches = boxes[per_prior_which_box_arg] + matches[per_prior_which_box_iou < iou_threshold, 4] = 0 return matches +def compute_iou(box, boxes): + """Calculates the intersection over union between 'box' and all 'boxes'. + Both `box` and `boxes` are in corner coordinates. + + # Arguments + box: Numpy array with length at least of 4. + boxes: Numpy array with shape `(num_boxes, 4)`. + + # Returns + Numpy array of shape `(num_boxes, 1)`. + """ + + x_min_A, y_min_A, x_max_A, y_max_A = box[:4] + x_min_B, y_min_B = boxes[:, 0], boxes[:, 1] + x_max_B, y_max_B = boxes[:, 2], boxes[:, 3] + # calculating the intersection + inner_x_min = np.maximum(x_min_B, x_min_A) + inner_y_min = np.maximum(y_min_B, y_min_A) + inner_x_max = np.minimum(x_max_B, x_max_A) + inner_y_max = np.minimum(y_max_B, y_max_A) + inner_w = np.maximum((inner_x_max - inner_x_min), 0) + inner_h = np.maximum((inner_y_max - inner_y_min), 0) + intersection_area = inner_w * inner_h + # calculating the union + box_area_B = (x_max_B - x_min_B) * (y_max_B - y_min_B) + box_area_A = (x_max_A - x_min_A) * (y_max_A - y_min_A) + union_area = box_area_A + box_area_B - intersection_area + intersection_over_union = intersection_area / union_area + return intersection_over_union + + def apply_non_max_suppression(boxes, scores, iou_thresh=.45, top_k=200): """Apply non maximum suppression. diff --git a/paz/pipelines/detection.py b/paz/pipelines/detection.py index 98a917296..aa81bfff2 100644 --- a/paz/pipelines/detection.py +++ b/paz/pipelines/detection.py @@ -60,7 +60,8 @@ class AugmentDetection(SequentialProcessor): for encoding bounding boxes. """ def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300, - mean=pr.BGR_IMAGENET_MEAN, IOU=.5, variances=[.1, .2]): + mean=pr.BGR_IMAGENET_MEAN, IOU=.5, + variances=[0.1, 0.1, 0.2, 0.2]): super(AugmentDetection, self).__init__() # image processors self.augment_image = AugmentImage() @@ -97,11 +98,13 @@ class DetectSingleShot(Processor): draw: Boolean. If ``True`` prediction are drawn in the returned image. """ def __init__(self, model, class_names, score_thresh, nms_thresh, - mean=pr.BGR_IMAGENET_MEAN, draw=True): + mean=pr.BGR_IMAGENET_MEAN, variances=[0.1, 0.1, 0.2, 0.2], + draw=True): self.model = model self.class_names = class_names self.score_thresh = score_thresh self.nms_thresh = nms_thresh + self.variances = variances self.draw = draw super(DetectSingleShot, self).__init__() @@ -113,7 +116,7 @@ def __init__(self, model, class_names, score_thresh, nms_thresh, pr.ExpandDims(axis=0)]) postprocessing = SequentialProcessor( [pr.Squeeze(axis=None), - pr.DecodeBoxes(self.model.prior_boxes, variances=[.1, .2]), + pr.DecodeBoxes(self.model.prior_boxes, self.variances), pr.NonMaximumSuppressionPerClass(self.nms_thresh), pr.FilterBoxes(self.class_names, self.score_thresh)]) self.predict = pr.Predict(self.model, preprocessing, postprocessing) diff --git a/paz/processors/detection.py b/paz/processors/detection.py index f2ce0f559..1eaf19b3f 100644 --- a/paz/processors/detection.py +++ b/paz/processors/detection.py @@ -169,7 +169,7 @@ class EncodeBoxes(Processor): prior_boxes: Numpy array of shape (num_boxes, 4). variances: List of two float values. """ - def __init__(self, prior_boxes, variances=[.1, .2]): + def __init__(self, prior_boxes, variances=[0.1, 0.1, 0.2, 0.2]): self.prior_boxes = prior_boxes self.variances = variances super(EncodeBoxes, self).__init__() @@ -186,7 +186,7 @@ class DecodeBoxes(Processor): prior_boxes: Numpy array of shape (num_boxes, 4). variances: List of two float values. """ - def __init__(self, prior_boxes, variances=[.1, .2]): + def __init__(self, prior_boxes, variances=[0.1, 0.1, 0.2, 0.2]): self.prior_boxes = prior_boxes self.variances = variances super(DecodeBoxes, self).__init__() diff --git a/tests/paz/backend/numpy_ops_test.py b/tests/paz/backend/boxes_test.py similarity index 93% rename from tests/paz/backend/numpy_ops_test.py rename to tests/paz/backend/boxes_test.py index 814155534..d2673d948 100644 --- a/tests/paz/backend/numpy_ops_test.py +++ b/tests/paz/backend/boxes_test.py @@ -4,7 +4,7 @@ from paz.backend.boxes import compute_iou from paz.backend.boxes import compute_ious from paz.backend.boxes import denormalize_box -from paz.backend.boxes import to_point_form +from paz.backend.boxes import to_corner_form from paz.backend.boxes import to_center_form from paz.backend.quaternion import rotation_vector_to_quaternion from paz.backend.boxes import encode @@ -53,7 +53,8 @@ def boxes_with_label(): @pytest.fixture def target_unique_matches(): - return np.array([[238., 155., 306., 204.]]) + # return np.array([[238., 155., 306., 204.]]) + return np.array([[47.0, 239.0, 194.0, 370.0]]) @pytest.fixture @@ -120,18 +121,18 @@ def test_denormalize_box(box): def test_to_center_form_inverse(boxes): box_A = boxes[0] - assert np.all(to_point_form(to_center_form(box_A)) == box_A) + assert np.all(to_corner_form(to_center_form(box_A)) == box_A) -def test_to_point_form_inverse(boxes): +def test_to_corner_form_inverse(boxes): box_A = boxes[0] - assert np.all(to_point_form(to_center_form(box_A)) == box_A) + assert np.all(to_corner_form(to_center_form(box_A)) == box_A) def test_to_center_form(boxes): box_A = boxes[0] boxes = to_center_form(box_A) - boxes_A_result = to_point_form(boxes) + boxes_A_result = to_corner_form(boxes) assert(boxes_A_result.all() == box_A.all()) @@ -149,7 +150,7 @@ def test_match_box(boxes_with_label, target_unique_matches): def test_to_encode(boxes_with_label): priors = create_prior_boxes('VOC') matches = match(boxes_with_label, priors) - variances = [.1, .2] + variances = [0.1, 0.1, 0.2, 0.2] encoded_boxes = encode(matches, priors, variances) decoded_boxes = decode(encoded_boxes, priors, variances) assert np.all(np.round(decoded_boxes) == matches) @@ -158,7 +159,7 @@ def test_to_encode(boxes_with_label): def test_to_decode(boxes_with_label): priors = create_prior_boxes('VOC') matches = match(boxes_with_label, priors) - variances = [.1, .2] + variances = [0.1, 0.1, 0.2, 0.2] encoded_boxes = encode(matches, priors, variances) decoded_boxes = decode(encoded_boxes, priors, variances) assert np.all(np.round(decoded_boxes) == matches)