diff --git a/anchors.py b/anchors.py new file mode 100644 index 0000000..af78a84 --- /dev/null +++ b/anchors.py @@ -0,0 +1,66 @@ +import numpy as np +import tensorflow as tf +from helpers import IoUTensorial + +''' +Returns array of anchors in XYXY format +return shape: (image_height/base_size, image_width/base_size, scales*ratios, 4) +''' +def generate_anchors(image_shape, scales = [1/2, 1, 2], ratios = [1/2, 1, 2], stride = 32, base_size = 64): + + height, width = image_shape[0], image_shape[1] + anchors = np.zeros((height//stride, width//stride, len(scales)*len(ratios), 4)) + for scale_idx in range(len(scales)): + scale = scales[scale_idx] + for ratio_idx in range(len(ratios)): + ratio = ratios[ratio_idx] + area = base_size * base_size * scale + anchor_width = np.sqrt(ratio * area) + anchor_height = area / anchor_width + for i in range(0, height//stride*stride, stride): + for j in range(0, width//stride*stride, stride): + anchor = [j-anchor_width/2, i-anchor_height/2, j+anchor_width/2, i+anchor_height/2] + anchors[i//stride, j//stride,len(scales)*scale_idx + ratio_idx] = anchor + return np.array(anchors) + +''' + ground_truths: np array, XYXY format, shape (n, 4) + anchors: np array, XYXY format, shape (feature_map_height, feature_map_width, num_anchors, 4) + We assign a positive label to two kinds of anchors: + (i) the anchor/anchors with the highest Intersection-overUnion (IoU) overlap with a ground-truth box, or + (ii) an anchor that has an IoU overlap higher than 0.7 with any ground-truth box + returns: + -1 for negative + 0 for ignored + 1 for positive +''' +def classify_anchors(ground_truths, anchors, positive_iou_threshold = 0.7, negative_iou_threshold = 0.3): + ious = IoUTensorial(ground_truths, anchors,format='XYXY') + anchor_classes = np.zeros((anchors.shape[0])) + anchor_classes[ious.T.max(axis=1) <= negative_iou_threshold] = -1 + # (i) + anchor_classes[ious.argmax(axis=1)] = 1 + # (ii) + anchor_classes[ious.T.max(axis=1) >= positive_iou_threshold] = 1 + return anchor_classes + + +def generate_minibatch_mask(anchors, ground_truths, batch_size=256, positives_ratio=0.5, positive_iou_threshold = 0.7, negative_iou_threshold = 0.3): + anchor_classes = classify_anchors(ground_truths,anchors.reshape(-1,4), positive_iou_threshold, negative_iou_threshold) + anchor_classes = anchor_classes.reshape((anchors.shape[0], anchors.shape[1], anchors.shape[2])) + + n_positives = int(min((anchor_classes == 1).sum(), batch_size * positives_ratio)) + n_negatives = batch_size - n_positives + positives_indices = np.argwhere(anchor_classes == 1) + np.random.shuffle(positives_indices) + positives_indices_batch = positives_indices[:n_positives] + + negatives_indices = np.argwhere(anchor_classes == -1) + np.random.shuffle(negatives_indices) + negatives_indices_batch = negatives_indices[:n_negatives] + + anchors_batch_indices = np.zeros((anchors.shape[0], anchors.shape[1], anchors.shape[2])) + # https://docs.scipy.org/doc/numpy/user/basics.indexing.html#indexing-multi-dimensional-arrays + anchors_batch_indices[list(positives_indices_batch.T)] = 1 + anchors_batch_indices[list(negatives_indices_batch.T)] = -1 + return anchors_batch_indices, positives_indices_batch, negatives_indices_batch \ No newline at end of file diff --git a/anchors_tests.py b/anchors_tests.py new file mode 100644 index 0000000..524c279 --- /dev/null +++ b/anchors_tests.py @@ -0,0 +1,62 @@ +import os +import numpy as np +from anchors import generate_anchors, classify_anchors, generate_minibatch_mask +import unittest +import cv2 + +class AnchorsTest(unittest.TestCase): + def test_generate_anchors(self): + DEBUG = False + if DEBUG: + image = cv2.imread(os.path.join('images','1.jpg')) + anchors = generate_anchors(image.shape, scales=[1/2,2], base_size=32, stride=32) + print(anchors.shape) + anchors = anchors.reshape(-1,4) + for anchor in anchors: + cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0,0,255),1) + cv2.imshow('anchors', image) + cv2.waitKey(0) + + def test_classify_anchors(self): + DEBUG = False + if DEBUG: + image = np.ones((500,500,3)) + box_size = 60 + bounding_boxes = np.array([[100,100,100+box_size,100+box_size],[300,300,300+box_size,300+box_size]]) + for box in bounding_boxes: + image[box[1]:box[3],box[0]:box[2]] = 0 + anchors = generate_anchors(image.shape).reshape(-1,4) + anchors_classes = classify_anchors(bounding_boxes, anchors) + for anchor in anchors[anchors_classes == -1]: + cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0,0,255),1) + for anchor in anchors[anchors_classes == 0]: + cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (255,0,0),1) + for anchor in anchors[anchors_classes == 1]: + cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0,255,0),1) + + cv2.imshow('anchors', image) + cv2.waitKey(0) + + def test_generate_minibatch(self): + DEBUG = True + if DEBUG: + image = np.ones((500,500,3)) + box_size = 60 + bounding_boxes = np.array([[100,100,100+box_size,100+box_size],[300,300,300+box_size,300+box_size]]) + for box in bounding_boxes: + image[box[1]:box[3],box[0]:box[2]] = 0 + anchors = generate_anchors(image.shape) + anchors_batch_indices, _, _ = generate_minibatch_mask(anchors, bounding_boxes,batch_size=64) + anchors = anchors.reshape(-1,4) + anchors_indices = anchors_batch_indices.reshape(-1,) + for anchor in anchors[anchors_indices == -1,:]: + cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0,0,255),1) + for anchor in anchors[anchors_indices == 1,:]: + cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0,255,0),1) + + cv2.imshow('anchors', image) + cv2.waitKey(0) + + +if __name__ == '__main__': + unittest.main() diff --git a/bounding_box_helpers.py b/bounding_box_helpers.py new file mode 100644 index 0000000..e69de29 diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..611c96a --- /dev/null +++ b/helpers.py @@ -0,0 +1,61 @@ +import cv2 +import numpy as np +import random +import copy + +def IoUTensorial(A: np.ndarray, B: np.ndarray, format: str = 'XYWH') -> np.ndarray: + ''' + Computes the Intersection over Union (IoU) of the rectangles in A vs those in B + Rectangles are all in format (left, top, width, height) or all in format (left, top, right, bottom) + A - tensor containing rectangles + B - tensor containing rectangles + + Returns a tensor IoU of shape (|A|, |B|), containing the IoU of each rectangle pair (a,b), where a is in A and b is in B + ''' + if A.shape[0] == 0 or B.shape[0] == 0: + return np.zeros((A.shape[0], B.shape[0]), dtype=np.float) + + assert A.shape[1] == 4 and B.shape[1] == 4 + assert format in ['XYWH', 'XYXY'] + #don't alter original data + A = np.copy(A) + B = np.copy(B) + + nrA = A.shape[0] # number of rectangles in A + nrB = B.shape[0] # number of rectangles in B + + #compute ares and then convert to (left, top, right, bottom) format + if format == 'XYWH': + #compute areas of recangles while we still have their width and height + A_areas = A[:, 2] * A[:, 3] + B_areas = B[:, 2] * B[:, 3] + #convert to (left, top, right, bottom) format + A[:, 2] = A[:, 0] + A[:, 2] - 1 + A[:, 3] = A[:, 1] + A[:, 3] - 1 + B[:, 2] = B[:, 0] + B[:, 2] - 1 + B[:, 3] = B[:, 1] + B[:, 3] - 1 + else: + #compute areas of recangles + A_areas = (A[:, 2] - A[:, 0] + 1) * (A[:, 3] - A[:, 1] + 1) + B_areas = (B[:, 2] - B[:, 0] + 1) * (B[:, 3] - B[:, 1] + 1) + + + #compute sum of areas of all the pairs of rectangles + eA_areas = np.repeat(A_areas[:, np.newaxis, np.newaxis], nrB, 1) # shape = (nrA, nrB, 1) contains the areas of rectangles in A + eB_areas = np.repeat(B_areas[np.newaxis, :, np.newaxis], nrA, 0) # shape = (nrA, nrB, 1) contains the areas of rectangles in B + sum_area = np.sum(np.concatenate([eA_areas, eB_areas], axis=2), axis=2) + + # make two tensors eA and eB so that the first dimension chooses a box in A, the second dimension chooses a box in B, the third dimension chooses box attribute + eA = np.repeat(A[:, None, :], nrB, 1) # shape = (nrA, nrB, 4) contains the rectangles in A + eB = np.repeat(B[None, :, :], nrA, 0) # shape = (nrA, nrB, 4) contains the rectangles in B + # split eA and eB into halfs and perform max and min + half_shape = eA[:, :, 0:2].shape + ul = np.maximum(eA[:, :, 0:2].ravel(), eB[:, :, 0:2].ravel()).reshape(half_shape) #upper left corner of intersection rectangle + br = np.minimum(eA[:, :, 2:4].ravel(), eB[:, :, 2:4].ravel()).reshape(half_shape) #bottom right corner of intersection rectangle + + w = np.clip(br[:, :, 0] - ul[:, :, 0] + 1, 0, np.Infinity) #width of the intersection rectangle + h = np.clip(br[:, :, 1] - ul[:, :, 1] + 1, 0, np.Infinity) #height of the intersection rectangle + I = np.clip(w * h, 0, np.Infinity) # the intersection areas + U = sum_area.reshape(I.shape) - I # the union areas + IoU = I / U # the IoU scores of all the rectangle pairs in A and B + return IoU.reshape((nrA, nrB)) \ No newline at end of file diff --git a/images/1.bboxes.tsv b/images/1.bboxes.tsv new file mode 100644 index 0000000..e3b5cc6 --- /dev/null +++ b/images/1.bboxes.tsv @@ -0,0 +1,16 @@ +190 97 262 146 icon +280 373 305 396 button +905 96 978 146 button +550 835 973 879 button +547 770 970 814 icon +206 104 241 141 icon +548 643 974 687 icon +548 706 970 751 icon +27 107 70 134 arrow_button +27 108 70 134 icon +548 409 579 440 icon +547 456 578 487 icon +90 100 166 144 icon +11 100 92 143 icon +103 107 155 134 icon +0 46 999 1024 arrow_button diff --git a/images/1.jpg b/images/1.jpg new file mode 100644 index 0000000..fdc1e1a Binary files /dev/null and b/images/1.jpg differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..8319ce8 --- /dev/null +++ b/main.py @@ -0,0 +1,4 @@ +import tensorflow as tf +import numpy as np + + diff --git a/rpn_builder.py b/rpn_builder.py new file mode 100644 index 0000000..d672e11 --- /dev/null +++ b/rpn_builder.py @@ -0,0 +1,52 @@ +from anchors import generate_anchors, generate_minibatch_mask +import numpy as np +import tensorflow as tf +keras = tf.keras + +class RegionProposalNetwork(keras.Model): + def __init__(self, backbone, scales, ratios): + super(RegionProposalNetwork, self).__init__() + # hard-coded parameters (for now) + self.stride = 32 + self.base_anchor_size = 64 + self.positive_iou_threshold = 0.7 + self.negative_iou_threshold = 0.3 + self.batch_size = 256 + self.positives_ratio = 0.5 + self.max_number_of_predictions = 400 + # parameters + self.backbone = backbone + self.scales = scales + self.ratios = ratios + + # layers + self.image = tf.placeholder(dtype=float, shape=(None, None, None, 3), name='image') + self.ground_truth_boxes = tf.placeholder(dtype=float, shape=(None, None, 4), name='ground_truth_boxes') + + self.conv1 = keras.layers.Conv2D(filters=256,kernel_size=3,activation='relu') + self.box_regression = keras.layers.Conv2D(4, 1) + self.box_classification = keras.layers.Conv2D(1,1,activation='softmax') + + def call(self, input, training=False): + x = self.backbone(input) + x = self.conv1(x) + output_regression = self.box_regression(x) + output_classification = self.box_classification(x) + + return output_regression, output_classification + + def build_loss(self, ground_truths, predictions): + pass + # generate anchors + # assign anchors to predictions + # build minibatch + # apply loss to minibatch + + def generate_minibatch(self): + pass + anchors = generate_anchors(image.shape) + anchors_batch_indices, positive_anchors_indices, negative_anchors_indices = generate_minibatch_mask(anchors, ground_truths) + + + + diff --git a/rpn_tests.py b/rpn_tests.py new file mode 100644 index 0000000..3723a33 --- /dev/null +++ b/rpn_tests.py @@ -0,0 +1,25 @@ +import os +import numpy as np +from anchors import generate_anchors, classify_anchors, generate_minibatch +from rpn_builder import build_rpn +import unittest +import cv2 +import tensorflow as tf +keras = tf.keras + +class RpnTest(unittest.TestCase): + def test_rpn(self): + DEBUG = True + if DEBUG: + scales = [1] + ratios = [1] + image = cv2.imread(os.path.join('images','1.jpg')) + + image_shape = (512, 512, 3) + input_tensor = tf.placeholder(dtype=float, shape=(None, 512, 512, 3)) + base_model = keras.applications.ResNet50(input_tensor=input_tensor, include_top=False) + base_model.trainable = False + rpn = build_rpn(base_model) + +if __name__ == '__main__': + unittest.main()