diff --git a/mrcnn/utils.py b/mrcnn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5195ae0852affdd38ae4194e15e48aa1f9adcf14 --- /dev/null +++ b/mrcnn/utils.py @@ -0,0 +1,927 @@ +""" +MRCNN Particle Detection +Common utility functions and classes. + +The source code of "MRCNN Particle Detection" (https://git.rwth-aachen.de/avt-fvt/private/mrcnn-particle-detection) +is based on the source code of "Mask R-CNN" (https://github.com/matterport/Mask_RCNN). + +The source code of "Mask R-CNN" is licensed under the MIT License (MIT). +Copyright (c) 2017 Matterport, Inc. +Written by Waleed Abdulla + +All source code modifications to the source code of "Mask R-CNN" in "MRCNN Particle Detection" +are licensed under the Eclipse Public License v2.0 (EPL 2.0). +Copyright (c) 2022-2023 Fluid Process Engineering (AVT.FVT), RWTH Aachen University +Edited by Stepan Sibirtsev, Mathias Neufang & Jakob Seiler + +The coyprights and license terms are given in LICENSE. + +Ideas and a small code snippets were adapted from these sources: +https://github.com/mat02/Mask_RCNN +""" + +import sys +import os +import logging +import math +import random +import numpy as np +import tensorflow as tf +import scipy +import skimage.color +import skimage.io +import skimage.transform +import urllib.request +import shutil +import warnings +from distutils.version import LooseVersion + +# URL from which to download the latest COCO trained weights +COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" + + +############################################################ +# Bounding Boxes +############################################################ + +def extract_bboxes(mask): + """Compute bounding boxes from masks. + mask: [height, width, num_instances]. Mask pixels are either 1 or 0. + + Returns: bbox array [num_instances, (y1, x1, y2, x2)]. + """ + boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) + for i in range(mask.shape[-1]): + m = mask[:, :, i] + # Bounding box. + horizontal_indicies = np.where(np.any(m, axis=0))[0] + vertical_indicies = np.where(np.any(m, axis=1))[0] + if horizontal_indicies.shape[0]: + x1, x2 = horizontal_indicies[[0, -1]] + y1, y2 = vertical_indicies[[0, -1]] + # x2 and y2 should not be part of the box. Increment by 1. + x2 += 1 + y2 += 1 + else: + # No mask for this instance. Might happen due to + # resizing or cropping. Set bbox to zeros + x1, x2, y1, y2 = 0, 0, 0, 0 + boxes[i] = np.array([y1, x1, y2, x2]) + return boxes.astype(np.int32) + + +def compute_iou(box, boxes, box_area, boxes_area): + """Calculates IoU of the given box with the array of the given boxes. + box: 1D vector [y1, x1, y2, x2] + boxes: [boxes_count, (y1, x1, y2, x2)] + box_area: float. the area of 'box' + boxes_area: array of length boxes_count. + + Note: the areas are passed in rather than calculated here for + efficiency. Calculate once in the caller to avoid duplicate work. + """ + # Calculate intersection areas + y1 = np.maximum(box[0], boxes[:, 0]) + y2 = np.minimum(box[2], boxes[:, 2]) + x1 = np.maximum(box[1], boxes[:, 1]) + x2 = np.minimum(box[3], boxes[:, 3]) + intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) + union = box_area + boxes_area[:] - intersection[:] + iou = intersection / union + return iou + + +def compute_overlaps(boxes1, boxes2): + """Computes IoU overlaps between two sets of boxes. + boxes1, boxes2: [N, (y1, x1, y2, x2)]. + + For better performance, pass the largest set first and the smaller second. + """ + # Areas of anchors and GT boxes + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) + + # Compute overlaps to generate matrix [boxes1 count, boxes2 count] + # Each cell contains the IoU value. + overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) + for i in range(overlaps.shape[1]): + box2 = boxes2[i] + overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) + return overlaps + + +def compute_overlaps_masks(masks1, masks2): + """Computes IoU overlaps between two sets of masks. + masks1, masks2: [Height, Width, instances] + """ + + # If either set of masks is empty return empty result + if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: + return np.zeros((masks1.shape[-1], masks2.shape[-1])) + # flatten masks and compute their areas + masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) + masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) + area1 = np.sum(masks1, axis=0) + area2 = np.sum(masks2, axis=0) + + # intersections and union + intersections = np.dot(masks1.T, masks2) + union = area1[:, None] + area2[None, :] - intersections + overlaps = intersections / union + + return overlaps + + +def non_max_suppression(boxes, scores, threshold): + """Performs non-maximum suppression and returns indices of kept boxes. + boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. + scores: 1-D array of box scores. + threshold: Float. IoU threshold to use for filtering. + """ + assert boxes.shape[0] > 0 + if boxes.dtype.kind != "f": + boxes = boxes.astype(np.float32) + + # Compute box areas + y1 = boxes[:, 0] + x1 = boxes[:, 1] + y2 = boxes[:, 2] + x2 = boxes[:, 3] + area = (y2 - y1) * (x2 - x1) + + # Get indicies of boxes sorted by scores (highest first) + ixs = scores.argsort()[::-1] + + pick = [] + while len(ixs) > 0: + # Pick top box and add its index to the list + i = ixs[0] + pick.append(i) + # Compute IoU of the picked box with the rest + iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) + # Identify boxes with IoU over the threshold. This + # returns indices into ixs[1:], so add 1 to get + # indices into ixs. + remove_ixs = np.where(iou > threshold)[0] + 1 + # Remove indices of the picked and overlapped boxes. + ixs = np.delete(ixs, remove_ixs) + ixs = np.delete(ixs, 0) + return np.array(pick, dtype=np.int32) + + +def apply_box_deltas(boxes, deltas): + """Applies the given deltas to the given boxes. + boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. + deltas: [N, (dy, dx, log(dh), log(dw))] + """ + boxes = boxes.astype(np.float32) + # Convert to y, x, h, w + height = boxes[:, 2] - boxes[:, 0] + width = boxes[:, 3] - boxes[:, 1] + center_y = boxes[:, 0] + 0.5 * height + center_x = boxes[:, 1] + 0.5 * width + # Apply deltas + center_y += deltas[:, 0] * height + center_x += deltas[:, 1] * width + height *= np.exp(deltas[:, 2]) + width *= np.exp(deltas[:, 3]) + # Convert back to y1, x1, y2, x2 + y1 = center_y - 0.5 * height + x1 = center_x - 0.5 * width + y2 = y1 + height + x2 = x1 + width + return np.stack([y1, x1, y2, x2], axis=1) + + +def box_refinement_graph(box, gt_box): + """Compute refinement needed to transform box to gt_box. + box and gt_box are [N, (y1, x1, y2, x2)] + """ + box = tf.cast(box, tf.float32) + gt_box = tf.cast(gt_box, tf.float32) + + height = box[:, 2] - box[:, 0] + width = box[:, 3] - box[:, 1] + center_y = box[:, 0] + 0.5 * height + center_x = box[:, 1] + 0.5 * width + + gt_height = gt_box[:, 2] - gt_box[:, 0] + gt_width = gt_box[:, 3] - gt_box[:, 1] + gt_center_y = gt_box[:, 0] + 0.5 * gt_height + gt_center_x = gt_box[:, 1] + 0.5 * gt_width + + dy = (gt_center_y - center_y) / height + dx = (gt_center_x - center_x) / width + dh = tf.log(gt_height / height) + dw = tf.log(gt_width / width) + + result = tf.stack([dy, dx, dh, dw], axis=1) + return result + + +def box_refinement(box, gt_box): + """Compute refinement needed to transform box to gt_box. + box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is + assumed to be outside the box. + """ + box = box.astype(np.float32) + gt_box = gt_box.astype(np.float32) + + height = box[:, 2] - box[:, 0] + width = box[:, 3] - box[:, 1] + center_y = box[:, 0] + 0.5 * height + center_x = box[:, 1] + 0.5 * width + + gt_height = gt_box[:, 2] - gt_box[:, 0] + gt_width = gt_box[:, 3] - gt_box[:, 1] + gt_center_y = gt_box[:, 0] + 0.5 * gt_height + gt_center_x = gt_box[:, 1] + 0.5 * gt_width + + dy = (gt_center_y - center_y) / height + dx = (gt_center_x - center_x) / width + dh = np.log(gt_height / height) + dw = np.log(gt_width / width) + + return np.stack([dy, dx, dh, dw], axis=1) + + +############################################################ +# Dataset +############################################################ + +class Dataset(object): + """The base class for dataset classes. + To use it, create a new class that adds functions specific to the dataset + you want to use. For example: + + class CatsAndDogsDataset(Dataset): + def load_cats_and_dogs(self): + ... + def load_mask(self, image_id): + ... + def image_reference(self, image_id): + ... + + See COCODataset and ShapesDataset as examples. + """ + + def __init__(self, class_map=None): + self._image_ids = [] + self.image_info = [] + # Background is always the first class + self.class_info = [{"source": "", "id": 0, "name": "BG"}] + self.source_class_ids = {} + + def add_class(self, source, class_id, class_name): + assert "." not in source, "Source name cannot contain a dot" + # Does the class exist already? + for info in self.class_info: + if info['source'] == source and info["id"] == class_id: + # source.class_id combination already available, skip + return + # Add the class + self.class_info.append({ + "source": source, + "id": class_id, + "name": class_name, + }) + + def add_image(self, source, image_id, path, **kwargs): + image_info = { + "id": image_id, + "source": source, + "path": path, + } + image_info.update(kwargs) + self.image_info.append(image_info) + + def image_reference(self, image_id): + """Return a link to the image in its source Website or details about + the image that help looking it up or debugging it. + + Override for your dataset, but pass to this function + if you encounter images not in your dataset. + """ + return "" + + def prepare(self, class_map=None): + """Prepares the Dataset class for use. + + TODO: class map is not supported yet. When done, it should handle mapping + classes from different datasets to the same class ID. + """ + + def clean_name(name): + """Returns a shorter version of object names for cleaner display.""" + return ",".join(name.split(",")[:1]) + + # Build (or rebuild) everything else from the info dicts. + self.num_classes = len(self.class_info) + self.class_ids = np.arange(self.num_classes) + self.class_names = [clean_name(c["name"]) for c in self.class_info] + self.num_images = len(self.image_info) + self._image_ids = np.arange(self.num_images) + + # Mapping from source class and image IDs to internal IDs + self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id + for info, id in zip(self.class_info, self.class_ids)} + self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id + for info, id in zip(self.image_info, self.image_ids)} + + # Map sources to class_ids they support + self.sources = list(set([i['source'] for i in self.class_info])) + self.source_class_ids = {} + # Loop over datasets + for source in self.sources: + self.source_class_ids[source] = [] + # Find classes that belong to this dataset + for i, info in enumerate(self.class_info): + # Include BG class in all datasets + if i == 0 or source == info['source']: + self.source_class_ids[source].append(i) + + def map_source_class_id(self, source_class_id): + """Takes a source class ID and returns the int class ID assigned to it. + + For example: + dataset.map_source_class_id("coco.12") -> 23 + """ + return self.class_from_source_map[source_class_id] + + def get_source_class_id(self, class_id, source): + """Map an internal class ID to the corresponding class ID in the source dataset.""" + info = self.class_info[class_id] + assert info['source'] == source + return info['id'] + + @property + def image_ids(self): + return self._image_ids + + def source_image_link(self, image_id): + """Returns the path or URL to the image. + Override this to return a URL to the image if it's available online for easy + debugging. + """ + return self.image_info[image_id]["path"] + + def load_image(self, image_id): + """Load the specified image and return a [H,W,3] Numpy array. + """ + # Load image + image = skimage.io.imread(self.image_info[image_id]['path']) + # If grayscale. Convert to RGB for consistency. + if image.ndim != 3: + image = skimage.color.gray2rgb(image) + # If has an alpha channel, remove it for consistency + if image.shape[-1] == 4: + image = image[..., :3] + return image + + def load_mask(self, image_id): + """Load instance masks for the given image. + + Different datasets use different ways to store masks. Override this + method to load instance masks and return them in the form of am + array of binary masks of shape [height, width, instances]. + + Returns: + masks: A bool array of shape [height, width, instance count] with + a binary mask per instance. + class_ids: a 1D array of class IDs of the instance masks. + """ + # Override this function to load a mask from your dataset. + # Otherwise, it returns an empty mask. + logging.warning("You are using the default load_mask(), maybe you need to define your own one.") + mask = np.empty([0, 0, 0]) + class_ids = np.empty([0], np.int32) + return mask, class_ids + + +def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): + """Resizes an image keeping the aspect ratio unchanged. + + min_dim: if provided, resizes the image such that it's smaller + dimension == min_dim + max_dim: if provided, ensures that the image longest side doesn't + exceed this value. + min_scale: if provided, ensure that the image is scaled up by at least + this percent even if min_dim doesn't require it. + mode: Resizing mode. + none: No resizing. Return the image unchanged. + square: Resize and pad with zeros to get a square image + of size [max_dim, max_dim]. + pad64: Pads width and height with zeros to make them multiples of 64. + If min_dim or min_scale are provided, it scales the image up + before padding. max_dim is ignored in this mode. + The multiple of 64 is needed to ensure smooth scaling of feature + maps up and down the 6 levels of the FPN pyramid (2**6=64). + crop: Picks random crops from the image. First, scales the image based + on min_dim and min_scale, then picks a random crop of + size min_dim x min_dim. Can be used in training only. + max_dim is not used in this mode. + + Returns: + image: the resized image + window: (y1, x1, y2, x2). If max_dim is provided, padding might + be inserted in the returned image. If so, this window is the + coordinates of the image part of the full image (excluding + the padding). The x2, y2 pixels are not included. + scale: The scale factor used to resize the image + padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] + """ + # Keep track of image dtype and return results in the same dtype + image_dtype = image.dtype + # Default window (y1, x1, y2, x2) and default scale == 1. + h, w = image.shape[:2] + window = (0, 0, h, w) + scale = 1 + padding = [(0, 0), (0, 0), (0, 0)] + crop = None + + if mode == "none": + return image, window, scale, padding, crop + + # Scale? + if min_dim: + # Scale up but not down + scale = max(1, min_dim / min(h, w)) + if min_scale and scale < min_scale: + scale = min_scale + + # Does it exceed max dim? + if max_dim and mode == "square": + image_max = max(h, w) + if round(image_max * scale) > max_dim: + scale = max_dim / image_max + + # Resize image using bilinear interpolation + if scale != 1: + image = resize(image, (round(h * scale), round(w * scale)), + preserve_range=True) + + # Need padding or cropping? + if mode == "square": + # Get new height and width + h, w = image.shape[:2] + top_pad = (max_dim - h) // 2 + bottom_pad = max_dim - h - top_pad + left_pad = (max_dim - w) // 2 + right_pad = max_dim - w - left_pad + padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] + image = np.pad(image, padding, mode='constant', constant_values=0) + window = (top_pad, left_pad, h + top_pad, w + left_pad) + elif mode == "pad64": + h, w = image.shape[:2] + # Both sides must be divisible by 64 + assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" + # Height + if h % 64 > 0: + max_h = h - (h % 64) + 64 + top_pad = (max_h - h) // 2 + bottom_pad = max_h - h - top_pad + else: + top_pad = bottom_pad = 0 + # Width + if w % 64 > 0: + max_w = w - (w % 64) + 64 + left_pad = (max_w - w) // 2 + right_pad = max_w - w - left_pad + else: + left_pad = right_pad = 0 + padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] + image = np.pad(image, padding, mode='constant', constant_values=0) + window = (top_pad, left_pad, h + top_pad, w + left_pad) + elif mode == "crop": + # Pick a random crop + h, w = image.shape[:2] + y = random.randint(0, (h - min_dim)) + x = random.randint(0, (w - min_dim)) + crop = (y, x, min_dim, min_dim) + image = image[y:y + min_dim, x:x + min_dim] + window = (0, 0, min_dim, min_dim) + else: + raise Exception("Mode {} not supported".format(mode)) + return image.astype(image_dtype), window, scale, padding, crop + + +def resize_mask(mask, scale, padding, crop=None): + """Resizes a mask using the given scale and padding. + Typically, you get the scale and padding from resize_image() to + ensure both, the image and the mask, are resized consistently. + + scale: mask scaling factor + padding: Padding to add to the mask in the form + [(top, bottom), (left, right), (0, 0)] + """ + # Suppress warning from scipy 0.13.0, the output shape of zoom() is + # calculated with round() instead of int() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) + if crop is not None: + y, x, h, w = crop + mask = mask[y:y + h, x:x + w] + else: + mask = np.pad(mask, padding, mode='constant', constant_values=0) + return mask + + +def minimize_mask(bbox, mask, mini_shape): + """Resize masks to a smaller version to reduce memory load. + Mini-masks can be resized back to image scale using expand_masks() + + See inspect_data.ipynb notebook for more details. + """ + mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) + for i in range(mask.shape[-1]): + # Pick slice and cast to bool in case load_mask() returned wrong dtype + m = mask[:, :, i].astype(bool) + y1, x1, y2, x2 = bbox[i][:4] + m = m[y1:y2, x1:x2] + if m.size == 0: + raise Exception("Invalid bounding box with area of zero") + # Resize with bilinear interpolation + m = resize(m, mini_shape) + mini_mask[:, :, i] = np.around(m).astype(np.bool) + return mini_mask + + +def expand_mask(bbox, mini_mask, image_shape): + """Resizes mini masks back to image size. Reverses the change + of minimize_mask(). + + See inspect_data.ipynb notebook for more details. + """ + mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) + for i in range(mask.shape[-1]): + m = mini_mask[:, :, i] + y1, x1, y2, x2 = bbox[i][:4] + h = y2 - y1 + w = x2 - x1 + # Resize with bilinear interpolation + m = resize(m, (h, w)) + mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) + return mask + + +# TODO: Build and use this function to reduce code duplication +def mold_mask(mask, config): + pass + + +def unmold_mask(mask, bbox, image_shape): + """Converts a mask generated by the neural network to a format similar + to its original shape. + mask: [height, width] of type float. A small, typically 28x28 mask. + bbox: [y1, x1, y2, x2]. The box to fit the mask in. + + Returns a binary mask with the same size as the original image. + """ + threshold = 0.5 + y1, x1, y2, x2 = bbox + mask = resize(mask, (y2 - y1, x2 - x1)) + mask = np.where(mask >= threshold, 1, 0).astype(np.bool) + + # Put the mask in the right location. + full_mask = np.zeros(image_shape[:2], dtype=np.bool) + full_mask[y1:y2, x1:x2] = mask + return full_mask + + +############################################################ +# Anchors +############################################################ + +def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): + """ + scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] + ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] + shape: [height, width] spatial shape of the feature map over which + to generate anchors. + feature_stride: Stride of the feature map relative to the image in pixels. + anchor_stride: Stride of anchors on the feature map. For example, if the + value is 2 then generate anchors for every other feature map pixel. + """ + # Get all combinations of scales and ratios + scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) + scales = scales.flatten() + ratios = ratios.flatten() + + # Enumerate heights and widths from scales and ratios + heights = scales / np.sqrt(ratios) + widths = scales * np.sqrt(ratios) + + # Enumerate shifts in feature space + shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride + shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride + shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) + + # Enumerate combinations of shifts, widths, and heights + box_widths, box_centers_x = np.meshgrid(widths, shifts_x) + box_heights, box_centers_y = np.meshgrid(heights, shifts_y) + + # Reshape to get a list of (y, x) and a list of (h, w) + box_centers = np.stack( + [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) + box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) + + # Convert to corner coordinates (y1, x1, y2, x2) + boxes = np.concatenate([box_centers - 0.5 * box_sizes, + box_centers + 0.5 * box_sizes], axis=1) + return boxes + + +def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, + anchor_stride): + """Generate anchors at different levels of a feature pyramid. Each scale + is associated with a level of the pyramid, but each ratio is used in + all levels of the pyramid. + + Returns: + anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted + with the same order of the given scales. So, anchors of scale[0] come + first, then anchors of scale[1], and so on. + """ + # Anchors + # [anchor_count, (y1, x1, y2, x2)] + anchors = [] + for i in range(len(scales)): + anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], + feature_strides[i], anchor_stride)) + return np.concatenate(anchors, axis=0) + + +############################################################ +# Miscellaneous +############################################################ + +def trim_zeros(x): + """It's common to have tensors larger than the available data and + pad with zeros. This function removes rows that are all zeros. + + x: [rows, columns]. + """ + assert len(x.shape) == 2 + return x[~np.all(x == 0, axis=1)] + + +def compute_matches(gt_boxes, gt_class_ids, gt_masks, + pred_boxes, pred_class_ids, pred_scores, pred_masks, + iou_threshold=0.5, score_threshold=0.0): + """Finds matches between prediction and ground truth instances. + + Returns: + gt_match: 1-D array. For each GT box it has the index of the matched + predicted box. + pred_match: 1-D array. For each predicted box, it has the index of + the matched ground truth box. + overlaps: [pred_boxes, gt_boxes] IoU overlaps. + """ + # Trim zero padding + # TODO: cleaner to do zero unpadding upstream + gt_boxes = trim_zeros(gt_boxes) + gt_masks = gt_masks[..., :gt_boxes.shape[0]] + pred_boxes = trim_zeros(pred_boxes) + pred_scores = pred_scores[:pred_boxes.shape[0]] + # Sort predictions by score from high to low + indices = np.argsort(pred_scores)[::-1] + pred_boxes = pred_boxes[indices] + pred_class_ids = pred_class_ids[indices] + pred_scores = pred_scores[indices] + + if pred_masks: + pred_masks = pred_masks[..., indices] + # Compute IoU overlaps [pred_masks, gt_masks] + overlaps = compute_overlaps_masks(pred_masks, gt_masks) + else: + # Compute overlaps [pred_boxes, gt_boxes] + overlaps = compute_overlaps(pred_boxes, gt_boxes) + + + # Loop through predictions and find matching ground truth boxes + match_count = 0 + pred_match = -1 * np.ones([pred_boxes.shape[0]]) + gt_match = -1 * np.ones([gt_boxes.shape[0]]) + for i in range(len(pred_boxes)): + # Find best matching ground truth box + # 1. Sort matches by score + sorted_ixs = np.argsort(overlaps[i])[::-1] + # 2. Remove low scores + low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] + if low_score_idx.size > 0: + sorted_ixs = sorted_ixs[:low_score_idx[0]] + # 3. Find the match + for j in sorted_ixs: + # If ground truth box is already matched, go to next one + if gt_match[j] > -1: + continue + # If we reach IoU smaller than the threshold, end the loop + iou = overlaps[i, j] + if iou < iou_threshold: + break + # Do we have a match? + if pred_class_ids[i] == gt_class_ids[j]: + match_count += 1 + gt_match[j] = i + pred_match[i] = j + break + + return gt_match, pred_match, overlaps + + +def compute_ap(gt_boxes, gt_class_ids, gt_masks, + pred_boxes, pred_class_ids, pred_scores, pred_masks, + iou_threshold=0.5): + """Compute Average Precision at a set IoU threshold (default 0.5). + + Returns: + mAP: Mean Average Precision + precisions: List of precisions at different class score thresholds. + recalls: List of recall values at different class score thresholds. + overlaps: [pred_boxes, gt_boxes] IoU overlaps. + """ + # Get matches and overlaps + gt_match, pred_match, overlaps = compute_matches( + gt_boxes, gt_class_ids, gt_masks, + pred_boxes, pred_class_ids, pred_scores, pred_masks, + iou_threshold) + + # Compute precision and recall at each prediction box step + precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) + recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) + + # Pad with start and end values to simplify the math + precisions = np.concatenate([[0], precisions, [0]]) + recalls = np.concatenate([[0], recalls, [1]]) + + # Ensure precision values decrease but don't increase. This way, the + # precision value at each recall threshold is the maximum it can be + # for all following recall thresholds, as specified by the VOC paper. + for i in range(len(precisions) - 2, -1, -1): + precisions[i] = np.maximum(precisions[i], precisions[i + 1]) + + # Compute mean AP over recall range + indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 + mAP = np.sum((recalls[indices] - recalls[indices - 1]) * + precisions[indices]) + + return mAP, precisions, recalls, overlaps + + +def compute_ap_range(gt_box, gt_class_id, gt_mask, + pred_box, pred_class_id, pred_score, pred_mask, + iou_thresholds=None, verbose=1): + """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" + # Default is 0.5 to 0.95 with increments of 0.05 + iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) + + # Compute AP over range of IoU thresholds + AP = [] + for iou_threshold in iou_thresholds: + ap, precisions, recalls, overlaps =\ + compute_ap(gt_box, gt_class_id, gt_mask, + pred_box, pred_class_id, pred_score, pred_mask, + iou_threshold=iou_threshold) + if verbose: + print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) + AP.append(ap) + AP = np.array(AP).mean() + if verbose: + print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( + iou_thresholds[0], iou_thresholds[-1], AP)) + return AP + + +def compute_recall(pred_boxes, gt_boxes, iou): + """Compute the recall at the given IoU threshold. It's an indication + of how many GT boxes were found by the given prediction boxes. + + pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates + gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates + """ + # Measure overlaps + overlaps = compute_overlaps(pred_boxes, gt_boxes) + iou_max = np.max(overlaps, axis=1) + iou_argmax = np.argmax(overlaps, axis=1) + positive_ids = np.where(iou_max >= iou)[0] + matched_gt_boxes = iou_argmax[positive_ids] + + recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] + return recall, positive_ids + + +# ## Batch Slicing +# Some custom layers support a batch size of 1 only, and require a lot of work +# to support batches greater than 1. This function slices an input tensor +# across the batch dimension and feeds batches of size 1. Effectively, +# an easy way to support batches > 1 quickly with little code modification. +# In the long run, it's more efficient to modify the code to support large +# batches and getting rid of this function. Consider this a temporary solution +def batch_slice(inputs, graph_fn, batch_size, names=None): + """Splits inputs into slices and feeds each slice to a copy of the given + computation graph and then combines the results. It allows you to run a + graph on a batch of inputs even if the graph is written to support one + instance only. + + inputs: list of tensors. All must have the same first dimension length + graph_fn: A function that returns a TF tensor that's part of a graph. + batch_size: number of slices to divide the data into. + names: If provided, assigns names to the resulting tensors. + """ + if not isinstance(inputs, list): + inputs = [inputs] + + outputs = [] + for i in range(batch_size): + inputs_slice = [x[i] for x in inputs] + output_slice = graph_fn(*inputs_slice) + if not isinstance(output_slice, (tuple, list)): + output_slice = [output_slice] + outputs.append(output_slice) + # Change outputs from a list of slices where each is + # a list of outputs to a list of outputs and each has + # a list of slices + outputs = list(zip(*outputs)) + + if names is None: + names = [None] * len(outputs) + + result = [tf.stack(o, axis=0, name=n) + for o, n in zip(outputs, names)] + if len(result) == 1: + result = result[0] + + return result + + +def download_trained_weights(coco_model_path, verbose=1): + """Download COCO trained weights from Releases. + + coco_model_path: local path of COCO trained weights + """ + if verbose > 0: + print("Downloading pretrained model to " + coco_model_path + " ...") + with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: + shutil.copyfileobj(resp, out) + if verbose > 0: + print("... done downloading pretrained model!") + + +def norm_boxes(boxes, shape): + """Converts boxes from pixel coordinates to normalized coordinates. + boxes: [N, (y1, x1, y2, x2)] in pixel coordinates + shape: [..., (height, width)] in pixels + + Note: In pixel coordinates (y2, x2) is outside the box. But in normalized + coordinates it's inside the box. + + Returns: + [N, (y1, x1, y2, x2)] in normalized coordinates + """ + h, w = shape + scale = np.array([h - 1, w - 1, h - 1, w - 1]) + shift = np.array([0, 0, 1, 1]) + return np.divide((boxes - shift), scale).astype(np.float32) + + +def denorm_boxes(boxes, shape): + """Converts boxes from normalized coordinates to pixel coordinates. + boxes: [N, (y1, x1, y2, x2)] in normalized coordinates + shape: [..., (height, width)] in pixels + + Note: In pixel coordinates (y2, x2) is outside the box. But in normalized + coordinates it's inside the box. + + Returns: + [N, (y1, x1, y2, x2)] in pixel coordinates + """ + h, w = shape + scale = np.array([h - 1, w - 1, h - 1, w - 1]) + shift = np.array([0, 0, 1, 1]) + #print(boxes) + return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) + + +def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, + preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None): + """A wrapper for Scikit-Image resize(). + + Scikit-Image generates warnings on every call to resize() if it doesn't + receive the right parameters. The right parameters depend on the version + of skimage. This solves the problem by using different parameters per + version. And it provides a central place to control resizing defaults. + """ + if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): + # New in 0.14: anti_aliasing. Default it to False for backward + # compatibility with skimage 0.13. + return skimage.transform.resize( + image, output_shape, + order=order, mode=mode, cval=cval, clip=clip, + preserve_range=preserve_range, anti_aliasing=anti_aliasing, + anti_aliasing_sigma=anti_aliasing_sigma) + else: + return skimage.transform.resize( + image, output_shape, + order=order, mode=mode, cval=cval, clip=clip, + preserve_range=preserve_range)