PKqfNɅ``cropmask/__init__.py"""AI for Earth project to map center pivots and monitor crop water use.""" __version__ = '0.1'PKaN cropmask/datasets.pyfrom cropmask.mrcnn import utils import os import pandas as pd import skimage.io as skio import numpy as np import yaml class ImageDataset(utils.Dataset): """Generates the Imagery dataset used by mrcnn.""" def load_image(self, image_id): """Load the specified image and return a [H,W,N] Numpy array. Channels are ordered [B, G, R, ...]. This is called by the Keras data_generator function """ # Load image image = skio.imread(self.image_info[image_id]["path"]) assert image.ndim == 3 return image def load_imagery(self, dataset_dir, subset, image_source, class_name, train_test_split_dir): """Load a subset of the fields dataset. dataset_dir: Root directory of the dataset subset: Subset to load. * train: training images/masks excluding testing * test: testing images moved by train/test split func image_source: string identifier for imagery. "wv2" or "planet" or "landsat" class_name: string name for class. "agriculture" or another name depending on labels. use self.add_class for a multi class model. train_test_split_dir: the directory to hold the train_ids and test_ids from PreprocessWorflow """ # Add classes here self.add_class(image_source, 1, class_name) assert subset in ["train", "test"] dataset_dir = os.path.join(dataset_dir, subset) train_ids = pd.read_csv(os.path.join(train_test_split_dir, "train_ids.csv")) train_list = list(train_ids["train"]) test_ids = pd.read_csv(os.path.join(train_test_split_dir, "test_ids.csv")) test_list = list(test_ids["test"]) if subset == "test": image_ids = test_list else: image_ids = train_list # Add images for image_id in image_ids: self.add_image( image_source, image_id=image_id, path=os.path.join( dataset_dir, str(image_id), "image/{}.tif".format(str(image_id)) ), ) def load_mask(self, image_id): """Generate instance masks for an image. Returns: masks: A bool array of shape [height, width, instance count] with one mask per instance. class_ids: a 1D array of class IDs of the instance masks. """ info = self.image_info[image_id] # Get mask directory from image path mask_dir = os.path.join(os.path.dirname(os.path.dirname(info["path"])), "mask") # Read mask files from image tile_folder_path = '/'.join(self.image_info[image_id]['path'].split('/')[:-2]) mask_name = os.listdir(os.path.join(tile_folder_path,'mask'))[0] m = skio.imread(os.path.join(tile_folder_path, 'mask',mask_name)).astype(np.bool) # Return mask, and array of class IDs of each instance. Since we have # one class ID, we return an array of ones if len(m.shape) < 3: m = np.expand_dims(m,2) # this conditional has had to be placed throughout to deal with images without labels. Need a better, less fragile way, could do this in the preprocess step. return m, np.ones([m.shape[-1]], dtype=np.int32) def image_reference(self, image_id): """Return the path of the image.""" info = self.image_info[image_id] if info["source"] == "field": return info["id"] else: super(self.__class__, self).image_reference(image_id) PKaN-cropmask/grid_pooled.pyimport os from itertools import product import rasterio from rasterio import windows from multiprocessing.dummy import Pool as ThreadPool from multiprocessing import Pool, cpu_count def get_tiles_for_threaded_map(ds, width, height): """ Returns a list of tuple where each tuple is the window and transform information for the image chip. Args: ds (rasterio dataset): A rasterio object read with open() width (int): the width of a tile/window/chip height (int): height of the tile/window/chip Returns: a list of tuples, where the first element of the tuple is a window and the next is the transform """ nols, nrows = ds.meta['width'], ds.meta['height'] offsets = product(range(0, nols, width), range(0, nrows, height)) big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows) chip_list = [] def get_win(ds, col_off, row_off, width, height, big_window): """Helper func to get the window and transform for a particular section of an image Args: ds (rasterio dataset): A rasterio object read with rasterio.open() col_off (int): the column of the window, the upper left corner row_off (int): the row of the window, the upper left corner width (int): the width of a tile/window/chip height (int): height of the tile/window/chip big_window (rasterio.windows.Window): used to deal with windows that extend beyond the source image Returns: Returns the bounds of each image chip/tile as a rasterio window object as well as the transform as a tuple like (rasterio.windows.Window, transform) """ window =windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window) transform = windows.transform(window, ds.transform) return (window, transform) chip_list = list(map(lambda x: get_win(ds, x[0], x[1], width, height, big_window), offsets)) return chip_list def chunk_chips(l, CHUNK_SIZE=10): """ Takes a list and returns list of lists where each sub list is a chunk to be passed to the write_by window function to be written sequentially in a single thread. A chunk = a list of chips. a chip is a window and transform tuple """ return [l[i:i+CHUNK_SIZE] for i in range(0, len(l), CHUNK_SIZE)] def write_by_window(in_path, out_dir, output_name_template, chip_list): """Writes out a window of a larger image given a widnow and transform. Args: ds (rasterio dataset): A rasterio object read with open() out_dir (str): the output directory for the image chip output_name_template (str): string with curly braces for naming tiles by indices for uniquiness meta (dict): meta data of the ds window (rasterio.windows.Window): the window to read and write transform (rasterio transform object): the affine transformation for the window Returns: Returns the outpath of the window that has been written as a tile """ # open the part we need right here outpaths = [] with rasterio.open(in_path, shared=False) as ds: for chip in chip_list: window, transform = chip ds.meta['transform'] = transform ds.meta['width'], ds.meta['height'] = window.width, window.height outpath = os.path.join(out_dir,output_name_template.format(int(window.col_off), int(window.row_off))) with rasterio.open(outpath, 'w', **ds.meta, shared=False) as outds: outds.write(ds.read(window=window)) outpaths.append(outpath) return outpaths def map_threads(func, sequence, MAX_THREADS=10): """ Set MAX_THREADS in preprocess_config.yaml """ threads = min(len(sequence), MAX_THREADS) pool = ThreadPool(threads) results = pool.map(func, sequence) pool.close() pool.join() return results def map_processes(func, args_list, MAX_PROCESSES): """ Set MAX_PROCESSES in preprocess_config.yaml args_sequence is a list of lists of args """ processes = min(cpu_count(), MAX_PROCESSES) pool = Pool(processes) results = pool.starmap(func, args_list) pool.close() pool.join() return results def grid_images_rasterio_controlled_threads(in_path, out_dir, output_name_template='tile_{}-{}.tif', MAX_THREADS=10, CHUNK_SIZE=10, grid_size=512): """Combines get_tiles_for_threaded_map, map_threads, and write_by_window to write out tiles of an image Args: in_path (str): Path to a raster for which to read with raterio.open() out_dir (str): the output directory for the image chip output_name_template (str): string with curly braces for naming tiles by indices for uniquiness grid_size (int): length in pixels of a side of a single window/tile/chip Returns: Returns the outpaths of the tiles. """ with rasterio.open(in_path, shared=False) as src: all_chip_list = get_tiles_for_threaded_map(src, width=grid_size, height=grid_size) chunk_list = chunk_chips(all_chip_list, CHUNK_SIZE) # a chunk is a list of chips return list(map_threads(lambda x: write_by_window(in_path, out_dir, output_name_template, x), chunk_list, MAX_THREADS=MAX_THREADS)) def grid_images_rasterio_controlled_processes(in_path, out_dir, output_name_template='tile_{}-{}.tif', MAX_PROCESSES=4, grid_size=512): """Combines get_tiles_for_threaded_map, map_threads, and write_by_window to write out tiles of an image Args: in_path (str): Path to a raster for which to read with raterio.open() out_dir (str): the output directory for the image chip output_name_template (str): string with curly braces for naming tiles by indices for uniquiness grid_size (int): length in pixels of a side of a single window/tile/chip Returns: Returns the outpaths of the tiles. """ with rasterio.open(in_path, shared=False) as src: all_chip_list = get_tiles_for_threaded_map(src, width=grid_size, height=grid_size) processes = min(cpu_count(), MAX_PROCESSES) chunk_list = chunk_chips(all_chip_list, len(all_chip_list)//processes) # a chunk is a list of chips, we want a chunk for each process print(len(chunk_list)) args_list = [[in_path, out_dir, output_name_template]+[chunk] for chunk in chunk_list] print(len(args_list)) return list(map_processes(write_by_window, args_list, MAX_PROCESSES=MAX_PROCESSES)) PKaN57cropmask/io_utils.pyimport xarray as xr import rioxarray import glob import os import numpy as np import matplotlib.pyplot as plt import cartopy.crs as ccrs def open_rasterio_lsr(path): """Reads in a Landsat surface reflectance band and correctly assigns the band metadata. Args: path (str): Path of form .../LT05_L1TP_032031_20050406_20160912_01_T1_sr_band5.tif. Returns: bool: Returns an xarray data array """ band_int = int(os.path.splitext(path)[0][-1]) data_array = xr.open_rasterio(path, chunks={'band': 1}) #chunks makes i lazyily executed band_val = data_array['band'] band_val.data = np.array((band_int, )) return data_array def read_bands_lsr(path_list): """ Concatenates a list of landsat paths into a single data array. Args: path_list (str): Paths of form .../LT05_L1TP_032031_20050406_20160912_01_T1_sr_band5.tif. in a list Returns: bool: Returns an xarray data array """ band_arrs = [open_rasterio_lsr(path) for path in path_list] return xr.concat(band_arrs, dim="band") def write_xarray_lsr(xr_arr, fpath): xr_arr.rio.to_raster(fpath) def read_scenes(scenes_folder_pattern): """ Reads in mu.tiple Landsat surface reflectance scenes given a regex pattern. Args: path (str): Path of form "../*". Returns: bool: Returns an xarray data array with dimensions for x, y, band, and time. """ scene_folders = glob.glob(scenes_folder_pattern) # only select files that contain a band sr_paths = [glob.glob(scene_folder+'/*band*') for scene_folder in scene_folders] xr_arrs = [read_bands_lsr(paths) for paths in sr_paths] return xr.concat(xr_arrs, dim="time")PKaNl1cropmask/label_prep.pyimport geopandas as gpd import rasterio from shapely.geometry import Polygon import skimage.io as skio import numpy as np import os from skimage import measure def get_epsg(tif_path): "Gets the epsg code of the tif." with rasterio.open(tif_path) as src: meta = src.meta.copy() epsg_dict = meta['crs'].to_dict() return epsg_dict def rio_bbox_to_polygon(tif_bounds): """Converts rasterio Bounding Box to a shapely geometry. access rasterio bounding box with rasterio.open and then src.bounds""" return Polygon([[tif_bounds.left, tif_bounds.bottom],[tif_bounds.left, tif_bounds.top], [tif_bounds.right,tif_bounds.top],[tif_bounds.right,tif_bounds.bottom]]) def connected_components(arr): """ Extracts individual instances into their own tif files. Saves them in each folder ID in train folder. If an image has no instances, saves it with a empty mask. In this function geometry info is discarded, need to address. """ unique_vals = np.unique(arr) # for imgs with no instances, create empty mask if len(unique_vals) == 1: return np.zeros_like(arr) else: # only run connected comp if there is at least one instance blob_labels = measure.label(arr, background=0) return blob_labels def extract_labels(blob_labels): """ Takes the output of connected_componenets and returns a list of arrays where each array is 1 where the instance is present and 0 where it is not. """ blob_vals = np.unique(blob_labels) label_list = [] for blob_val in blob_vals[blob_vals != 0]: labels_copy = blob_labels.copy() labels_copy[blob_labels != blob_val] = 0 labels_copy[blob_labels == blob_val] = 1 label_list.append(labels_copy) return label_listPKaNa!xxcropmask/misc.py### currently random, useful functions from skimage import exposure import numpy as np import yaml import shutil import os def percentile_rescale(arr): """ Rescales and applies other exposure functions to improve image vis. http://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.rescale_intensity """ rescaled_arr = np.zeros_like(arr) for i in range(0, arr.shape[-1]): val_range = (np.percentile(arr[:, :, i], 1), np.percentile(arr[:, :, i], 99)) rescaled_channel = exposure.rescale_intensity(arr[:, :, i], val_range) rescaled_arr[:, :, i] = rescaled_channel # rescaled_arr= exposure.adjust_gamma(rescaled_arr, gamma=1) #adjust from 1 either way # rescaled_arr= exposure.adjust_sigmoid(rescaled_arr, cutoff=.50) #adjust from .5 either way return rescaled_arr def remove_dirs(directory_list): """ Removes all files and sub-folders in each folder. """ for f in directory_list: if os.path.exists(f): shutil.rmtree(f) def max_normalize(arr): arr *= 255.0 / arr.max() return arr def parse_yaml(input_file): """Parse yaml file of configuration parameters.""" with open(input_file, "r") as yaml_file: params = yaml.safe_load(yaml_file) return params def make_dirs(directory_list): # Make directory and subdirectories for d in directory_list: try: os.mkdir(d) except: print("Whole directory list: ", directory_list) print("The directory "+d+" exists already. Check it and maybe delete it or change config.") raise FileExistsError PKaN%%cropmask/model_configs.pyfrom cropmask.mrcnn.config import Config import numpy as np ############################################################ # Configurations ############################################################ class LandsatConfig(Config): """Configuration for training on landsat imagery. Overrides values specific to Landsat center pivot imagery. Descriptive documentation for each attribute is at https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/config.py There are many more hyperparameters to edit than are set in this subclass""" def __init__(self, N): """Set values of computed attributes. Channel dimension is overriden, replaced 3 with N as per this guideline: https://github.com/matterport/Mask_RCNN/issues/314 THERE MAY BE OTHER CODE CHANGES TO ACCOUNT FOR 3 vs N channels. See other comments.""" # https://github.com/matterport/Mask_RCNN/wiki helpful for N channels # Effective batch size self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT # Input image size if self.IMAGE_RESIZE_MODE == "crop": self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, N]) else: self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, N]) # Image meta data length # See compose_image_meta() for details self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES self.CHANNELS_NUM = N LEARNING_RATE = 0.0003 # Image mean from inspect_data ipynb (preprocess.py differs for some reason, only slightly by 1os of digits or 1s of digits) MEAN_PIXEL = np.array([711.1, 995.51, 1097.56]) # Give the configuration a recognizable name NAME = "landsat-512-cp" # Batch size is 4 (GPUs * images/GPU). # Keras 2.1.6 works for multi-gpu but takes longer than single GPU currently GPU_COUNT = 1 IMAGES_PER_GPU = 3 # Number of classes (including background) NUM_CLASSES = 1 + 1 # background + ag # Don't exclude based on confidence. Since we have two classes # then 0.5 is the minimum anyway as it picks between nucleus and BG DETECTION_MIN_CONFIDENCE = 0.7 # Use small images for faster training. Determines the image shape. # From build() in model.py # Exception("Image size must be dividable by 2 at least 6 times " # "to avoid fractions when downscaling and upscaling." # "For example, use 256, 320, 384, 448, 512, ... etc. " IMAGE_RESIZE_MODE = "square" IMAGE_MIN_DIM = 512 IMAGE_MAX_DIM = 512 # IMAGE_MIN_SCALE = 2.0 # anchor side in pixels, determined using inspect_crop_data.ipynb. can specify more or less scales RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) # for cp # Aim to allow ROI sampling to pick 33% positive ROIs. This is always 33% in inspect_data nb, unsure if that is accurate. TRAIN_ROIS_PER_IMAGE = 600 # ROIs kept after non-maximum supression (training and inference) POST_NMS_ROIS_TRAINING = 1000 POST_NMS_ROIS_INFERENCE = 2000 # Non-max suppression threshold to filter RPN proposals. # You can increase this during training to generate more propsals. RPN_NMS_THRESHOLD = 0.7 # How many anchors per image to use for RPN training RPN_TRAIN_ANCHORS_PER_IMAGE = 128 # 64 # Unsure what best step size is but nucleus used 100 STEPS_PER_EPOCH = 100 # reduces the max number of field instances # MAX_GT_INSTANCES = 29 # for smallholder determined using inspect_crop_data.ipynb MAX_GT_INSTANCES = 195 # for cp determined using inspect_crop_data.ipynb # Max number of final detections per image DETECTION_MAX_INSTANCES = 400 # use small validation steps since the epoch is small VALIDATION_STEPS = 50 # Backbone network architecture # Supported values are: resnet50, resnet101. # You can also provide a callable that should have the signature # of model.resnet_graph. If you do so, you need to supply a callable # to COMPUTE_BACKBONE_SHAPE as well BACKBONE = "resnet50" # If enabled, resizes instance masks to a smaller size to reduce # memory load. Recommended when using high-resolution images. USE_MINI_MASK = False MINI_MASK_SHAPE = (28, 28) # (height, width) of the mini-mask # Loss weights for more precise optimization. It has been suggested that mrcnn_mask_loss should be weighted higher # Can be used for R-CNN training setup. LOSS_WEIGHTS = { "rpn_class_loss": 1.0, "rpn_bbox_loss": 1.0, "mrcnn_class_loss": 1.0, "mrcnn_bbox_loss": 1.0, "mrcnn_mask_loss": 3.0, } class LandsatInferenceConfig(LandsatConfig): # Set batch size to 1 to run one image at a time GPU_COUNT = 1 IMAGES_PER_GPU = 1 # Don't resize imagery for inferencing IMAGE_RESIZE_MODE = "pad64" # Non-max suppression threshold to filter RPN proposals. # You can increase this during training to generate more propsals. RPN_NMS_THRESHOLD = 0.7 #must be set to what pretrained resnet model expects, see https://github.com/matterport/Mask_RCNN/issues/1291 RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) class WV2Config(Config): """Configuration for training on worldview-2 imagery. Overrides values specific to WV2. Descriptive documentation for each attribute is at https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/config.py There are many more hyperparameters to edit than are set in this subclass""" def __init__(self, N): """Set values of computed attributes. Channel dimension is overriden, replaced 3 with N as per this guideline: https://github.com/matterport/Mask_RCNN/issues/314 THERE MAY BE OTHER CODE CHANGES TO ACCOUNT FOR 3 vs N channels. See other comments.""" # https://github.com/matterport/Mask_RCNN/wiki helpful for N channels # Effective batch size self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT # Input image size if self.IMAGE_RESIZE_MODE == "crop": self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, N]) else: self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, N]) # Image meta data length # See compose_image_meta() for details self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES self.CHANNELS_NUM = N LEARNING_RATE = 0.00005 # Image mean (RGBN RGBN) from WV2_MRCNN_PRE.ipynb # filling with N values, need to compute mean of each channel # values are for gridded wv2 no partial grids MEAN_PIXEL = np.array([225.25, 308.74, 184.93]) # Give the configuration a recognizable name NAME = "wv2-1024-cp" # Batch size is 4 (GPUs * images/GPU). # Keras 2.1.6 works for multi-gpu but takes longer than single GPU currently GPU_COUNT = 1 IMAGES_PER_GPU = 2 # Number of classes (including background) NUM_CLASSES = 1 + 1 # background + ag # Use small images for faster training. Determines the image shape. # From build() in model.py # Exception("Image size must be dividable by 2 at least 6 times " # "to avoid fractions when downscaling and upscaling." # "For example, use 256, 320, 384, 448, 512, ... etc. " IMAGE_RESIZE_MODE = "square" IMAGE_MIN_DIM = 1024 IMAGE_MAX_DIM = 1024 # anchor side in pixels, determined using inspect_crop_data.ipynb. can specify more or less scales RPN_ANCHOR_SCALES = (100, 150, 250, 375) # for cp # RPN_ANCHOR_SCALES = (20, 60, 100, 140) # for smallholder # Aim to allow ROI sampling to pick 33% positive ROIs. This is always 33% in inspect_data nb, unsure if that is accurate. TRAIN_ROIS_PER_IMAGE = 300 # Unsure what best step size is but nucleus used 100. Doubling because smallholder is more complex STEPS_PER_EPOCH = 400 # reduces the max number of field instances # MAX_GT_INSTANCES = 29 # for smallholder determined using inspect_crop_data.ipynb MAX_GT_INSTANCES = 7 # for cp determined using inspect_crop_data.ipynb # use small validation steps since the epoch is small VALIDATION_STEPS = 100 # Backbone network architecture # Supported values are: resnet50, resnet101. # You can also provide a callable that should have the signature # of model.resnet_graph. If you do so, you need to supply a callable # to COMPUTE_BACKBONE_SHAPE as well BACKBONE = "resnet50" # If enabled, resizes instance masks to a smaller size to reduce # memory load. Recommended when using high-resolution images. USE_MINI_MASK = False MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask # Loss weights for more precise optimization. It has been suggested that mrcnn_mask_loss should be weighted higher # Can be used for R-CNN training setup. LOSS_WEIGHTS = { "rpn_class_loss": 1.0, "rpn_bbox_loss": 1.0, "mrcnn_class_loss": 1.0, "mrcnn_bbox_loss": 1.0, "mrcnn_mask_loss": 3.0, } class WV2InferenceConfig(WV2Config): # Set batch size to 1 to run one image at a time GPU_COUNT = 1 IMAGES_PER_GPU = 1 # Don't resize imagery for inferencing IMAGE_RESIZE_MODE = "pad64" # Non-max suppression threshold to filter RPN proposals. # You can increase this during training to generate more propsals. RPN_NMS_THRESHOLD = 0.7 PKaNIDDcropmask/preprocess.pyimport random import os import shutil import copy import skimage.io as skio import warnings import pandas as pd import numpy as np import geopandas as gpd from rasterio import features, coords from rasterio.plot import reshape_as_raster import rasterio from shapely.geometry import shape from osgeo import gdal from itertools import product from rasterio import windows from cropmask.label_prep import rio_bbox_to_polygon from cropmask.misc import parse_yaml, make_dirs from cropmask import sequential_grid, label_prep from cropmask import io_utils random.seed(42) class PreprocessWorkflow(): """ Worflow for loading and gridding a single satellite image and reference dataset of the same extent. """ def __init__(self, param_path, scene_dir_path='nopath/', source_label_path='nopath/'): params = parse_yaml(param_path) self.params = params self.source_label_path = source_label_path # if there is a referenc label self.scene_dir_path = scene_dir_path # path to the unpacked tar archive on azure storage self.scene_id = self.scene_dir_path.split("/")[-2] # gets the name of the folder the bands are in, the scene_id # the folder structure for the unique run self.ROOT = params['dirs']["root"] self.TMP = params['dirs']["tmp"] self.DATASET = os.path.join(self.ROOT, params['dirs']["dataset"]) self.SCENE = os.path.join(self.DATASET, params['dirs']["scene"]) self.TRAIN = os.path.join(self.DATASET, params['dirs']["train"]) self.TEST = os.path.join(self.DATASET, params['dirs']["test"]) self.GRIDDED_IMGS = os.path.join(self.TMP, params['dirs']["gridded_imgs"]) self.GRIDDED_LABELS = os.path.join(self.TMP, params['dirs']["gridded_labels"]) self.NEG_BUFFERED = os.path.join(self.DATASET, params['dirs']["neg_buffered_labels"]) self.RESULTS = os.path.join(self.ROOT, params['dirs']["results"], params['dirs']["dataset"]) # scene specific paths and variables self.rasterized_label_path = '' self.band_list = [] # the band indices self.meta = {} # meta data for the scene self.chip_ids = [] # list of chip ids of form [scene_id]_[random number] self.small_area_filter = params['label_vals']['small_area_filter'] self.neg_buffer = params['label_vals']['neg_buffer'] self.ag_class_int = params['label_vals']['ag_class_int'] # TO DO, not implemented but needs to be for multi class self.dataset_name = params['image_vals']['dataset_name'] self.grid_size = params['image_vals']['grid_size'] self.usable_threshold = params['image_vals']['usable_thresh'] self.split = params['image_vals']['split'] self.MAX_THREADS = params['processing']['MAX_THREADS'] self.CHUNK_SIZE = params['processing']['CHUNK_SIZE'] self.MAX_PROCESSES = params['processing']['MAX_PROCESSES'] def yaml_to_band_index(self): """Parses config booleans to a list of band indexes to be stacked. For example, Landsat 5 has 6 bands (7 if you count band 6, thermal) that we can use for masking. Args: params (dict): The configuration dictionary that is read with yaml. Returns: list: A list of strings for the band numbers, starting from 1. For Landsat 5 1 would represent the blue band, 2 green, and so on. For Landsat 8, band 1 would be coastal blue, band 2 would be blue, and so on. See https://landsat.usgs.gov/what-are-band-designations-landsat-satellites .. _PEP 484: https://www.python.org/dev/peps/pep-0484/ """ if self.params["image_vals"]["dataset_name"] == "landsat-5": bands = self.params["landsat_bands_to_include"] for i, band in enumerate(bands): if list(band.values())[0] == True: self.band_list.append(str(i+1)) return self.band_list def setup_dirs(self): """ This folder structure is used for each unique pre processing and modeling workflow and is made unique by specifying a unique DATASET name or ROOT path (if working on a different container.). ROOT should be the path to the azure container mounted with blobfuse, and should already exist. The RESULTS folder should be created in a folder named from param["results"], and this should also already exist. """ directory_list = [ self.TMP, self.DATASET, self.SCENE, self.TRAIN, self.TEST, self.GRIDDED_IMGS, self.GRIDDED_LABELS, self.NEG_BUFFERED, self.RESULTS, ] if os.path.exists(os.path.join(self.ROOT, self.params['dirs']["results"])) == False: os.mkdir(os.path.join(self.ROOT, self.params['dirs']["results"])) make_dirs(directory_list) return directory_list def get_product_paths(self, band_list): # Load image product_list = os.listdir(self.scene_dir_path) # below works because only products that are bands have a int in the 5th to last position filtered_product_list = [band for band in product_list if band[-5] in band_list and 'band' in band] filtered_product_list = sorted(filtered_product_list) filtered_product_paths = [os.path.join(self.scene_dir_path, fname) for fname in filtered_product_list] return filtered_product_paths def load_meta_and_bounds(self, product_paths): # get metadata and edit meta obj for stacked raster with rasterio.open(product_paths[0]) as rast: meta = rast.meta.copy() meta.update(compress="lzw") meta["count"] = len(product_paths) self.meta=meta self.bounds = rast.bounds return self.meta, self.bounds def load_single_scene(self, product_paths): return io_utils.read_bands_lsr(product_paths) def stack_and_save_bands(self): """Load the landsat bands specified by yaml_to_band_index and returns a [H,W,N] Numpy array for a single scene, where N is the number of bands and H and W are the height and width of the original band arrays. Channels are ordered in band order. Args: scene_dir_path (str): The path to the scene directory. The dir name should be the standard scene id that is the same as as the blob name of the folder that has the landsat product bands downloaded using lsru or download_utils. band_list (str): a list of band indices to include Returns: ndarray:k .. _PEP 484:k https://www.python.org/dev/peps/pep-0484/ """ product_paths = self.get_product_paths(self.band_list) scene_arr = self.load_single_scene(product_paths) scene_name = os.path.basename(product_paths[0])[:-10] + ".tif" scene_path = os.path.join(self.SCENE, scene_name) self.scene_path = scene_path io_utils.write_xarray_lsr(scene_arr, scene_path) def preprocess_labels(self): """For preprcoessing reference dataset""" shp_frame = gpd.read_file(self.source_label_path) # keeps the class of interest if it is there and the polygon of raster extent shp_frame = shp_frame.to_crs(self.meta['crs'].to_dict()) # reprojects to landsat's utm zone tif_polygon = rio_bbox_to_polygon(self.bounds) shp_series = shp_frame.intersection(tif_polygon) # clips by landsat's bounds including nodata corners shp_series = shp_series.loc[shp_series.area > self.small_area_filter] shp_series = shp_series.buffer(self.neg_buffer) return shp_series.loc[shp_series.is_empty==False] def negative_buffer_and_small_filter(self, neg_buffer, small_area_filter): """ Applies a negative buffer to labels since some are too close together and produce conjoined instances when connected components is run (even after erosion/dilation). This may not get rid of all conjoinments and should be adjusted. It relies too on the source projection of the label file to calculate distances for the negative buffer. It's assumed that the projection is in meters and that a negative buffer in meter units will work with this projection. Args: source_label_path (str): the path to the reference shapefile dataset. Should be the same extent as a Landsat scene neg_buffer (float): The distance in meters to use for the negative buffer. Should at least be 1 pixel width. small_area_filter (float): The area thershold to remove spurious small fields. Particularly useful to remove fields to small to be commercial agriculture Returns rasterized labels that are ready to be gridded """ shp_series = self.preprocess_labels() meta = self.meta.copy() meta.update({'count':1}) tifname = os.path.splitext(os.path.basename(self.source_label_path))[0] + ".tif" self.rasterized_label_path = os.path.join(self.NEG_BUFFERED, tifname) with rasterio.open(self.rasterized_label_path, "w+", **meta) as out: out_arr = out.read(1) # https://gis.stackexchange.com/questions/151339/rasterize-a-shapefile-with-geopandas-or-fiona-python#151861 shapes = shp_series.values.tolist() burned = features.rasterize( shapes=shapes, fill=0, out_shape=(meta['height'],meta['width']), transform=out.transform, default_value=1, ) burned[burned < 0] = 0 burned[burned > 0] = 1 burned = burned.astype(np.int16, copy=False) out.write(burned, 1) print( "Done applying negbuff of {negbuff} and filtering small labels of area less than {area}".format( negbuff=self.neg_buffer, area=self.small_area_filter ) ) def grid_images(self): """ Grids up imagery to a variable size. Filters out imagery with too little usable data. appends a random unique id to each tif and label pair, appending string 'label' to the mask. """ chip_img_paths = sequential_grid.grid_images_rasterio_sequential(self.scene_path, self.GRIDDED_IMGS, output_name_template='tile_{}-{}.tif', grid_size=self.grid_size) chip_label_paths = sequential_grid.grid_images_rasterio_sequential(self.rasterized_label_path, self.GRIDDED_LABELS, output_name_template='tile_{}-{}_label.tif', grid_size=self.grid_size) return (chip_img_paths, chip_label_paths) def rm_mostly_empty(self, scene_path, label_path): """ Removes a grid that is emptier than the usable data threshold and corrects bad no data value to 0. Ignore the User Warning, unsure why it pops up but doesn't seem to impact the array shape. Used because very empty grid chips seem to throw off the model by increasing detections at the edges between good data and nodata. """ arr = skio.imread(scene_path) arr[arr < 0] = 0 skio.imsave(scene_path, arr) pixel_count = arr.shape[0] * arr.shape[1] nodata_pixel_count = (arr == 0).sum() if nodata_pixel_count / pixel_count > self.usable_threshold: os.remove(scene_path) os.remove(label_path) print("removed scene and label, {}% bad data".format(self.usable_threshold)) def remove_from_gridded(self, chip_img_paths, chip_label_paths): def get_chip_id(string,ignore_string): return string.replace(ignore_string, '') for img, label in zip(sorted(chip_img_paths, key=lambda x: get_chip_id(x,'.tif')), sorted(chip_label_paths, key=lambda x: get_chip_id(x,'_label.tif'))): self.rm_mostly_empty(img,label) def move_chips_to_folder(self): """Moves a file with identifier pattern 760165086.tif to a folder path ZA0165086/image/ZA0165086.tif """ for chip_id in os.listdir(self.GRIDDED_IMGS): chip_id = os.path.splitext(chip_id)[0] chip_folder_path = os.path.join(self.TRAIN, chip_id) if os.path.exists(chip_folder_path) == False: os.mkdir(chip_folder_path) else: raise Exception('{} should not exist prior to being created in this function, it has not been deleted properly prior to a new run'.format(folder_path)) new_chip_path = os.path.join(chip_folder_path, "image") mask_path = os.path.join(chip_folder_path, "mask") os.mkdir(new_chip_path) os.mkdir(mask_path) old_chip_path = os.path.join(self.GRIDDED_IMGS, chip_id+'.tif') shutil.copyfile(old_chip_path, os.path.join(new_chip_path, chip_id + ".tif")) # moves the chips os.remove(old_chip_path) def connected_components(self): """ Extracts individual instances into their own tif files. Saves them in each folder ID in train folder. If an image has no instances, saves it with a empty mask. """ for chip_id in os.listdir(self.TRAIN): chip_label_path = os.path.join(self.GRIDDED_LABELS, chip_id+"_label.tif") arr = skio.imread(chip_label_path) blob_labels = label_prep.connected_components(arr) # for imgs with no instances, create empty mask if len(np.unique(blob_labels)) == 1: mask_folder = os.path.join(self.TRAIN, chip_id, "mask") skio.imsave(os.path.join(mask_folder, chip_id + ".tif"), blob_labels) else: # only run connected comp if there is at least one instance label_list = label_prep.extract_labels(blob_labels) label_arrs = np.stack(label_list, axis=-1) label_name = chip_id + "_labels.tif" mask_path = os.path.join(self.TRAIN, chip_id, "mask") label_path = os.path.join(mask_path, label_name) skio.imsave(label_path, label_arrs) def train_test_split(self): """Takes a sample of folder ids and copies them to a test directory from a directory with all folder ids. Each sample folder contains an images and corresponding masks folder.""" sample_list = next(os.walk(self.TRAIN))[1] k = round(self.split * len(sample_list)) test_list = random.sample(sample_list, k) for test_sample in test_list: os.rename( os.path.join(self.TRAIN, test_sample), os.path.join(self.TEST, test_sample) ) train_list = list(set(next(os.walk(self.TRAIN))[1]) - set(next(os.walk(self.TEST))[1])) train_df = pd.DataFrame({"train": train_list}) test_df = pd.DataFrame({"test": test_list}) train_df.to_csv(os.path.join(self.RESULTS, "train_ids.csv")) # used for reading in trainging and testing in the modeling step test_df.to_csv(os.path.join(self.RESULTS, "test_ids.csv")) def get_arr_channel_mean(self, channel): """ Calculate the mean of a given channel across all training samples. """ means = [] train_list = next(os.walk(self.TRAIN))[1] for i, fid in enumerate(train_list): im_folder = os.path.join(self.TRAIN, fid, "image") im_path = os.path.join(im_folder, os.listdir(im_folder)[0]) arr = skio.imread(im_path) arr = arr.astype(np.float32, copy=False) # added because no data values different for wv2 and landsat, need to exclude from mean nodata_value = 0 # best to do no data masking up front and set bad qa bands to 0 rather than assuming 0 is no data. This is assumed from looking at no data values at corners being equal to 0 arr[arr == nodata_value] = np.nan means.append(np.nanmean(arr[:, :, channel])) return np.mean(means) def run_single_scene(self): start = time.time() self.setup_dirs() band_list = self.yaml_to_band_index() product_list = self.get_product_paths(band_list) meta, bounds = wflow.load_meta_and_bounds(product_list) scene = wflow.load_single_scene(product_list) self.stack_and_save_bands() self.negative_buffer_and_small_filter(-31, 100) img_paths, label_paths = self.grid_images() self.remove_from_gridded(img_paths, label_paths) self.move_chips_to_folder() self.connected_components() self.train_test_split() print("channel means, put these in model_configs.py subclass") for i in band_list: print("Band index {} mean for normalization: ".format(i), self.get_arr_channel_mean(int(i)-1)) print("preprocessing complete, ready to run model.") stop = time.time() print(stop-start, " seconds")PKaNvcropmask/preprocess_config.yamldirs: root: /permmnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/ tmp: /home/ryan/work/tmp/ # rasterio write operations to vm disk are an order of magnitude faster than writing to mounted Azure File Store dataset: landsat-1024-xarray # rename this based on the imagery and labels scene: scene train: train test: test gridded_imgs: landsat_grid_imgs gridded_labels: landsat_grid_labels results: results region_name: western_nebraska # should be the same as azure_configs used for downloading the order, where source image folders (unpacked tars) are located region_labels: north_platte_landsat_tile_labels neg_buffered_labels: neg_buffered_labels landsat_bands_to_include: - blue: true - green: true - red: true - near-IR : false image_vals: dataset_name: "landsat-5" grid_size: 512 # in pixel units usable_thresh: .02 #image must have no more than this percentage of values that are zero from either clouds or no_data (tile edges) split: .10 # the proportion of samples that will be used for testing label_vals: ag_class_int: 3 # TO DO # if there are more than one classes, ag_class_int is the integer label for the class that will be negative buffered and size filtered' neg_buffer: -30 # in meters, applied on vectors before they are rasterized to reduce spurious instance overlap small_area_filter: 3600 # in area meters, removes fields with area less than 100 meters from labels kernel: 5 # square kernel size for erosion then dilation of raterized instance labels to reduce overlap processing: CHUNK_SIZE: 10 MAX_PROCESSES: 100 MAX_THREADS: 10PKaNC.C.cropmask/run_cropmask.py""" Original nucleus.py example written by Waleed Abdulla at https://github.com/matterport/Mask_RCNN/blob/master/samples/nucleus/nucleus.py ------------------------------------------------------------ Usage: import the module (see Jupyter notebooks for examples), or run from the command line as such: # Train a new model starting from ImageNet weights python3 crop_mask.py train --dataset=data/wv2 --subset=train --weights=imagenet # Train a new model starting from specific weights file python3 crop_mask.py train --dataset=data/wv2 --subset=train --weights=/path/to/weights.h5 # Resume training a model that you had trained earlier python3 crop_mask.py train --dataset=data/wv2 --subset=train --weights=last # Generate submission file python3 crop_mask.py detect --dataset=data/wv2 --subset=train --weights= """ ############################################################ # Pre-processing and train/test split ############################################################ # Set matplotlib backend # This has to be done before other importa that might # set it, but only if we're running in script mode # rather than being imported. if __name__ == "__main__": import matplotlib # Agg backend runs without a display matplotlib.use("Agg") import matplotlib.pyplot as plt ############################################################ # Set model paths and imports ############################################################ import sys import os import datetime from imgaug import augmenters as iaa # Import cropmask and mrcnn from cropmask.preprocess import PreprocessWorkflow from cropmask import datasets, model_configs from cropmask.mrcnn import model as modellib from cropmask.mrcnn import visualize import numpy as np # Path to trained weights file ROOT_DIR = "/home/ryan/work/CropMask_RCNN" COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "models/mask_rcnn_coco.h5") # Directory to save logs and model checkpoints, if not provided # through the command line argument --logs DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") #contains paths as instance attributes wflow = PreprocessWorkflow(os.path.join(ROOT_DIR,"cropmask/preprocess_config.yaml")) ############################################################ # Training ############################################################ def train(model, dataset_dir, subset, config): """Train the model.""" # Training dataset. dataset_train = datasets.ImageDataset() dataset_train.load_imagery( dataset_dir, "train", image_source="landsat", class_name="agriculture" ) dataset_train.prepare() # Validation dataset dataset_val = datasets.ImageDataset() dataset_val.load_imagery( dataset_dir, "test", image_source="landsat", class_name="agriculture" ) dataset_val.prepare() # Image augmentation # http://imgaug.readthedocs.io/en/latest/source/augmenters.html augmentation = iaa.SomeOf( (0, 2), [ iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.OneOf( [iaa.Affine(rotate=90), iaa.Affine(rotate=180), iaa.Affine(rotate=270)] ), ], ) # *** This training schedule is an example. Update to your needs *** print("Train all layers") model.train( dataset_train, dataset_val, learning_rate=config.LEARNING_RATE, epochs=100, augmentation=augmentation, layers="all", ) ############################################################ # RLE Encoding ############################################################ def rle_encode(mask): """Encodes a mask in Run Length Encoding (RLE). Returns a string of space-separated values. """ assert mask.ndim == 2, "Mask must be of shape [Height, Width]" # Flatten it column wise m = mask.T.flatten() # Compute gradient. Equals 1 or -1 at transition points g = np.diff(np.concatenate([[0], m, [0]]), n=1) # 1-based indicies of transition points (where gradient != 0) rle = np.where(g != 0)[0].reshape([-1, 2]) + 1 # Convert second index in each pair to lenth rle[:, 1] = rle[:, 1] - rle[:, 0] return " ".join(map(str, rle.flatten())) def rle_decode(rle, shape): """Decodes an RLE encoded list of space separated numbers and returns a binary mask.""" rle = list(map(int, rle.split())) rle = np.array(rle, dtype=np.int32).reshape([-1, 2]) rle[:, 1] += rle[:, 0] rle -= 1 mask = np.zeros([shape[0] * shape[1]], np.bool) for s, e in rle: assert 0 <= s < mask.shape[0] assert 1 <= e <= mask.shape[0], "shape: {} s {} e {}".format(shape, s, e) mask[s:e] = 1 # Reshape and transpose mask = mask.reshape([shape[1], shape[0]]).T return mask def mask_to_rle(image_id, mask, scores): "Encodes instance masks to submission format." assert mask.ndim == 3, "Mask must be [H, W, count]" # If mask is empty, return line with image ID only if mask.shape[-1] == 0: return "{},".format(image_id) # Remove mask overlaps # Multiply each instance mask by its score order # then take the maximum across the last dimension order = np.argsort(scores)[::-1] + 1 # 1-based descending mask = np.max(mask * np.reshape(order, [1, 1, -1]), -1) # Loop over instance masks lines = [] for o in order: m = np.where(mask == o, 1, 0) # Skip if empty if m.sum() == 0.0: continue rle = rle_encode(m) lines.append("{}, {}".format(image_id, rle)) return "\n".join(lines) ############################################################ # Detection ############################################################ def detect(model, dataset_dir, subset, wflow): """Run detection on images in the given directory.""" print("Running on {}".format(dataset_dir)) # Create directory if not os.path.exists(wflow.RESULTS): os.makedirs(wflow.RESULTS) submit_dir = "submit_{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()) submit_dir = os.path.join(wflow.RESULTS, submit_dir) os.makedirs(submit_dir) # Read dataset dataset = datasets.ImageDataset(3) dataset.load_imagery( dataset_dir, subset, image_source="landsat", class_name="agriculture", train_test_split_dir=wflow.RESULTS ) dataset.prepare() # Load over images submission = [] for image_id in dataset.image_ids: # Load image and run detection image = dataset.load_image(image_id) # Detect objects r = model.detect([image], verbose=0)[0] # Encode image to RLE. Returns a string of multiple lines source_id = dataset.image_info[image_id]["id"] rle = mask_to_rle(source_id, r["masks"], r["scores"]) submission.append(rle) # Save image with masks # commented out lines to only return output of detection, no viz or saves for api calls # visualize.display_instances( # image, # r["rois"], # r["masks"], # r["class_ids"], # dataset.class_names, # r["scores"], # show_bbox=False, # show_mask=False, # title="Predictions", # ) # plt.savefig("{}/{}.png".format(submit_dir, dataset.image_info[image_id]["id"])) # Save to csv file submission = "ImageId,EncodedPixels\n" + "\n".join(submission) # file_path = os.path.join(submit_dir, "submit.csv") # with open(file_path, "w") as f: # f.write(submission) # print("Saved to ", submit_dir) return submission ############################################################ # Command Line ############################################################ if __name__ == "__main__": import argparse # Parse command line arguments parser = argparse.ArgumentParser( description="Mask R-CNN for fields counting and segmentation" ) parser.add_argument( "command", metavar="", help="'preprocess' or 'train' or 'detect. preprocess takes no arguments.'", ) parser.add_argument( "--dataset", required=False, metavar="/path/to/dataset/", help="Root directory of the dataset", ) parser.add_argument( "--weights", required=False, metavar="/path/to/weights.h5", help="Path to weights .h5 file or 'coco'", ) parser.add_argument( "--logs", required=False, default=DEFAULT_LOGS_DIR, metavar="/path/to/logs/", help="Logs and checkpoints directory (default=logs/)", ) parser.add_argument( "--subset", required=False, metavar="Dataset sub-directory", help="Subset of dataset to run prediction on", ) args = parser.parse_args() if args.command == "preprocess": wflow = PreprocessWorkflow() wflow.run_single_scene() else: # Validate arguments if args.command == "train": assert args.dataset, "Argument --dataset is required for training" elif args.command == "detect": assert args.subset, "Provide --subset to run prediction on" print("Weights: ", args.weights) print("Dataset: ", args.dataset) if args.subset: print("Subset: ", args.subset) print("Logs: ", args.logs) # Configurations if args.command == "train": config = model_configs.LandsatConfig(3) else: config = model_configs.LandsatInferenceConfig(3) config.display() # Create model if args.command == "train": model = modellib.MaskRCNN( mode="training", config=config, model_dir=args.logs ) else: model = modellib.MaskRCNN( mode="inference", config=config, model_dir=args.logs ) if args.weights is not None: # Select weights file to load if args.weights.lower() == "coco": weights_path = COCO_WEIGHTS_PATH # Download weights file if not os.path.exists(weights_path): utils.download_trained_weights(weights_path) elif args.weights.lower() == "last": # Find last trained weights weights_path = model.find_last() elif args.weights.lower() == "imagenet": # Start from ImageNet trained weights weights_path = model.get_imagenet_weights() else: weights_path = args.weights # Load weights print("Loading weights ", weights_path) if args.weights.lower() == "coco": # Exclude the last layers because they require a matching # number of classes model.load_weights( weights_path, by_name=True, exclude=[ "mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask", ], ) else: model.load_weights(weights_path, by_name=True) # Train or evaluate if args.command == "train": os.chdir(ROOT_DIR) print(os.getcwd(), "current working dir") train(model, args.dataset, args.subset, config) elif args.command == "detect": detect(model, args.dataset, args.subset, wflow) else: print( "'{}' is not recognized. " "Use 'train' or 'detect'".format(args.command) ) PKaNJcropmask/run_preprocess.pyfrom cropmask.preprocess import * import time wflow = PreprocessWorkflow("/home/ryan/work/CropMask_RCNN/cropmask/preprocess_config.yaml", "/mnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/unpacked_landsat_downloads/032031/LT050320312005082801T1-SC20190418222350/", "/mnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/external/nebraska_pivots_projected.geojson") if __name__ == "__main__": wflow.run_single_scene()PKaNC77cropmask/sequential_grid.py#sequential gridding of a single landsat scene is currently faster than threading or multiprocessing in grid.py import os from itertools import product import rasterio from rasterio import windows from multiprocessing.dummy import Pool as ThreadPool def get_tiles_for_threaded_map(ds, width, height): """ Returns a list of tuple where each tuple is the window and transform information for the image chip. Args: ds (rasterio dataset): A rasterio object read with open() width (int): the width of a tile/window/chip height (int): height of the tile/window/chip Returns: a list of tuples, where the first element of the tuple is a window and the next is the transform """ nols, nrows = ds.meta['width'], ds.meta['height'] offsets = product(range(0, nols, width), range(0, nrows, height)) big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows) chip_list = [] def get_win(ds, col_off, row_off, width, height, big_window): """Helper func to get the window and transform for a particular section of an image Args: ds (rasterio dataset): A rasterio object read with rasterio.open() col_off (int): the column of the window, the upper left corner row_off (int): the row of the window, the upper left corner width (int): the width of a tile/window/chip height (int): height of the tile/window/chip big_window (rasterio.windows.Window): used to deal with windows that extend beyond the source image Returns: Returns the bounds of each image chip/tile as a rasterio window object as well as the transform as a tuple like (rasterio.windows.Window, transform) """ window =windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window) transform = windows.transform(window, ds.transform) return (window, transform) chip_list = list(map(lambda x: get_win(ds, x[0], x[1], width, height, big_window), offsets)) return chip_list def write_by_window(ds, out_dir, output_name_template, meta, window, transform): """Writes out a window of a larger image given a widnow and transform. Args: ds (rasterio dataset): A rasterio object read with open() out_dir (str): the output directory for the image chip output_name_template (str): string with curly braces for naming tiles by indices for uniquiness meta (dict): meta data of the ds window (rasterio.windows.Window): the window to read and write transform (rasterio transform object): the affine transformation for the window Returns: Returns the outpath of the window that has been written as a tile """ meta['transform'] = transform meta['width'], meta['height'] = window.width, window.height outpath = os.path.join(out_dir,output_name_template.format(int(window.col_off), int(window.row_off))) with rasterio.open(outpath, 'w', **meta) as outds: outds.write(ds.read(window=window)) return outpath def grid_images_rasterio_sequential(in_path, out_dir, output_name_template='tile_{}-{}.tif', grid_size=512): """Combines get_tiles_for_threaded_map, map_threads, and write_by_window to write out tiles of an image Args: in_path (str): Path to a raster for which to read with raterio.open() out_dir (str): the output directory for the image chip output_name_template (str): string with curly braces for naming tiles by indices for uniquiness grid_size (int): length in pixels of a side of a single window/tile/chip Returns: Returns the outpaths of the tiles. """ with rasterio.open(in_path) as src: meta = src.meta.copy() chip_list = get_tiles_for_threaded_map(src, width=grid_size, height=grid_size) out_paths = list(map(lambda x: write_by_window(src, out_dir, output_name_template, meta, x[0], x[1]), chip_list)) #change to map_threads for threading but currently fails partway return out_paths PKaNcropmask/azuresetup/__init__.pyPKaN9,,'cropmask/azuresetup/create_workspace.pyimport yaml from azureml.core import Workspace from azureml.core.authentication import ServicePrincipalAuthentication with open("/home/rave/azure_configs.yaml") as f: configs = yaml.safe_load(f) ws = Workspace.create( name=configs["account"]["workspace_name"], subscription_id=configs["account"]["subscription_id"], resource_group=configs["account"]["perm_resource_group"], # make sure this is different from terraform's resource group or terraform desstroy will delete it, very bad!!! location=configs["account"]["location"], auth=ServicePrincipalAuthentication( configs["account"]["tenant_id"], configs["account"]["app_id"], configs["account"]["app_key"], ), storage_account=configs["account"]['resource_id'] # get from properties of storage account ) PKaNfcropmask/azuresetup/get_SAS.pyhttps://docs.microsoft.com/en-us/azure/storage/common/storage-dotnet-shared-access-signature-part-1?toc=%2fazure%2fstorage%2fblobs%2ftoc.json https://github.com/Azure/azure-storage-python/blob/master/samples/blob/sas_usage.pyPKaN4 IMAGE_MAX_DIM. Then the image is # padded with zeros to make it a square so multiple images can be put # in one batch. # Available resizing modes: # none: No resizing or padding. Return the image unchanged. # square: Resize and pad with zeros to get a square image # of size [max_dim, max_dim]. # pad64: Pads width and height with zeros to make them multiples of 64. # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales # up before padding. IMAGE_MAX_DIM is ignored in this mode. # The multiple of 64 is needed to ensure smooth scaling of feature # maps up and down the 6 levels of the FPN pyramid (2**6=64). # crop: Picks random crops from the image. First, scales the image based # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. # IMAGE_MAX_DIM is not used in this mode. IMAGE_RESIZE_MODE = "square" IMAGE_MIN_DIM = 800 IMAGE_MAX_DIM = 1024 # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further # up scaling. For example, if set to 2 then images are scaled up to double # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. IMAGE_MIN_SCALE = 0 # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 # Changing this requires other changes in the code. See the WIKI for more # details: https://github.com/matterport/Mask_RCNN/wiki IMAGE_CHANNEL_COUNT = 3 # Image mean (RGB) MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) # Number of ROIs per image to feed to classifier/mask heads # The Mask RCNN paper uses 512 but often the RPN doesn't generate # enough positive proposals to fill this and keep a positive:negative # ratio of 1:3. You can increase the number of proposals by adjusting # the RPN NMS threshold. TRAIN_ROIS_PER_IMAGE = 200 # Percent of positive ROIs used to train classifier/mask heads ROI_POSITIVE_RATIO = 0.33 # Pooled ROIs POOL_SIZE = 7 MASK_POOL_SIZE = 14 # Shape of output mask # To change this you also need to change the neural network mask branch MASK_SHAPE = [28, 28] # Maximum number of ground truth instances to use in one image MAX_GT_INSTANCES = 100 # Bounding box refinement standard deviation for RPN and final detections. RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) # Max number of final detections DETECTION_MAX_INSTANCES = 100 # Minimum probability value to accept a detected instance # ROIs below this threshold are skipped DETECTION_MIN_CONFIDENCE = 0.7 # Non-maximum suppression threshold for detection DETECTION_NMS_THRESHOLD = 0.3 # Learning rate and momentum # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes # weights to explode. Likely due to differences in optimizer # implementation. LEARNING_RATE = 0.001 LEARNING_MOMENTUM = 0.9 # Weight decay regularization WEIGHT_DECAY = 0.0001 # Loss weights for more precise optimization. # Can be used for R-CNN training setup. LOSS_WEIGHTS = { "rpn_class_loss": 1.0, "rpn_bbox_loss": 1.0, "mrcnn_class_loss": 1.0, "mrcnn_bbox_loss": 1.0, "mrcnn_mask_loss": 1.0, } # Use RPN ROIs or externally generated ROIs for training # Keep this True for most situations. Set to False if you want to train # the head branches on ROI generated by code rather than the ROIs from # the RPN. For example, to debug the classifier head without having to # train the RPN. USE_RPN_ROIS = True # Train or freeze batch normalization layers # None: Train BN layers. This is the normal mode # False: Freeze BN layers. Good when using a small batch size # True: (don't use). Set layer in training mode even when predicting TRAIN_BN = False # Defaulting to False since batch size is often small # Gradient norm clipping GRADIENT_CLIP_NORM = 5.0 def __init__(self): """Set values of computed attributes.""" # Effective batch size self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT # Input image size if self.IMAGE_RESIZE_MODE == "crop": self.IMAGE_SHAPE = np.array( [self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, self.IMAGE_CHANNEL_COUNT] ) else: self.IMAGE_SHAPE = np.array( [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, self.IMAGE_CHANNEL_COUNT] ) # Image meta data length # See compose_image_meta() for details self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES def display(self): """Display Configuration values.""" print("\nConfigurations:") for a in dir(self): if not a.startswith("__") and not callable(getattr(self, a)): print("{:30} {}".format(a, getattr(self, a))) print("\n") PKaN cropmask/mrcnn/model.py""" Mask R-CNN The main Mask R-CNN model implementation. Copyright (c) 2017 Matterport, Inc. Licensed under the MIT License (see LICENSE for details) Written by Waleed Abdulla """ import os import random import datetime import re import math import logging from collections import OrderedDict import multiprocessing import numpy as np import tensorflow as tf import keras import keras.backend as K import keras.layers as KL import keras.engine as KE import keras.models as KM from cropmask.mrcnn import utils # Requires TensorFlow 1.3+ and Keras 2.0.8+. from distutils.version import LooseVersion assert LooseVersion(tf.__version__) >= LooseVersion("1.3") assert LooseVersion(keras.__version__) >= LooseVersion("2.0.8") ############################################################ # Utility Functions ############################################################ def log(text, array=None): """Prints a text message. And, optionally, if a Numpy array is provided it prints it's shape, min, and max values. """ if array is not None: text = text.ljust(25) text += "shape: {:20} ".format(str(array.shape)) if array.size: text += "min: {:10.5f} max: {:10.5f}".format(array.min(), array.max()) else: text += "min: {:10} max: {:10}".format("", "") text += " {}".format(array.dtype) print(text) class BatchNorm(KL.BatchNormalization): """Extends the Keras BatchNormalization class to allow a central place to make changes if needed. Batch normalization has a negative effect on training if batches are small so this layer is often frozen (via setting in Config class) and functions as linear layer. """ def call(self, inputs, training=None): """ Note about training values: None: Train BN layers. This is the normal mode False: Freeze BN layers. Good when batch size is small True: (don't use). Set layer in training mode even when making inferences """ return super(self.__class__, self).call(inputs, training=training) def compute_backbone_shapes(config, image_shape): """Computes the width and height of each stage of the backbone network. Returns: [N, (height, width)]. Where N is the number of stages """ if callable(config.BACKBONE): return config.COMPUTE_BACKBONE_SHAPE(image_shape) # Currently supports ResNet only assert config.BACKBONE in ["resnet50", "resnet101"] return np.array( [ [ int(math.ceil(image_shape[0] / stride)), int(math.ceil(image_shape[1] / stride)), ] for stride in config.BACKBONE_STRIDES ] ) ############################################################ # Resnet Graph ############################################################ # Code adopted from: # https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py def identity_block( input_tensor, kernel_size, filters, stage, block, use_bias=True, train_bn=True ): """The identity_block is the block that has no conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names use_bias: Boolean. To use or not use a bias in conv layers. train_bn: Boolean. Train or freeze Batch Norm layers """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = "res" + str(stage) + block + "_branch" bn_name_base = "bn" + str(stage) + block + "_branch" x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + "2a", use_bias=use_bias)( input_tensor ) x = BatchNorm(name=bn_name_base + "2a")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.Conv2D( nb_filter2, (kernel_size, kernel_size), padding="same", name=conv_name_base + "2b", use_bias=use_bias, )(x) x = BatchNorm(name=bn_name_base + "2b")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + "2c", use_bias=use_bias)(x) x = BatchNorm(name=bn_name_base + "2c")(x, training=train_bn) x = KL.Add()([x, input_tensor]) x = KL.Activation("relu", name="res" + str(stage) + block + "_out")(x) return x def conv_block( input_tensor, kernel_size, filters, stage, block, strides=(2, 2), use_bias=True, train_bn=True, ): """conv_block is the block that has a conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names use_bias: Boolean. To use or not use a bias in conv layers. train_bn: Boolean. Train or freeze Batch Norm layers Note that from stage 3, the first conv layer at main path is with subsample=(2,2) And the shortcut should have subsample=(2,2) as well """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = "res" + str(stage) + block + "_branch" bn_name_base = "bn" + str(stage) + block + "_branch" x = KL.Conv2D( nb_filter1, (1, 1), strides=strides, name=conv_name_base + "2a", use_bias=use_bias, )(input_tensor) x = BatchNorm(name=bn_name_base + "2a")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.Conv2D( nb_filter2, (kernel_size, kernel_size), padding="same", name=conv_name_base + "2b", use_bias=use_bias, )(x) x = BatchNorm(name=bn_name_base + "2b")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + "2c", use_bias=use_bias)(x) x = BatchNorm(name=bn_name_base + "2c")(x, training=train_bn) shortcut = KL.Conv2D( nb_filter3, (1, 1), strides=strides, name=conv_name_base + "1", use_bias=use_bias, )(input_tensor) shortcut = BatchNorm(name=bn_name_base + "1")(shortcut, training=train_bn) x = KL.Add()([x, shortcut]) x = KL.Activation("relu", name="res" + str(stage) + block + "_out")(x) return x def resnet_graph(input_image, architecture, stage5=False, train_bn=True): """Build a ResNet graph. architecture: Can be resnet50 or resnet101 stage5: Boolean. If False, stage5 of the network is not created train_bn: Boolean. Train or freeze Batch Norm layers """ assert architecture in ["resnet50", "resnet101"] # Stage 1 x = KL.ZeroPadding2D((3, 3))(input_image) x = KL.Conv2D(64, (7, 7), strides=(2, 2), name="conv1", use_bias=True)(x) x = BatchNorm(name="bn_conv1")(x, training=train_bn) x = KL.Activation("relu")(x) C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) # Stage 2 x = conv_block( x, 3, [64, 64, 256], stage=2, block="a", strides=(1, 1), train_bn=train_bn ) x = identity_block(x, 3, [64, 64, 256], stage=2, block="b", train_bn=train_bn) C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block="c", train_bn=train_bn) # Stage 3 x = conv_block(x, 3, [128, 128, 512], stage=3, block="a", train_bn=train_bn) x = identity_block(x, 3, [128, 128, 512], stage=3, block="b", train_bn=train_bn) x = identity_block(x, 3, [128, 128, 512], stage=3, block="c", train_bn=train_bn) C3 = x = identity_block( x, 3, [128, 128, 512], stage=3, block="d", train_bn=train_bn ) # Stage 4 x = conv_block(x, 3, [256, 256, 1024], stage=4, block="a", train_bn=train_bn) block_count = {"resnet50": 5, "resnet101": 22}[architecture] for i in range(block_count): x = identity_block( x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), train_bn=train_bn ) C4 = x # Stage 5 if stage5: x = conv_block(x, 3, [512, 512, 2048], stage=5, block="a", train_bn=train_bn) x = identity_block( x, 3, [512, 512, 2048], stage=5, block="b", train_bn=train_bn ) C5 = x = identity_block( x, 3, [512, 512, 2048], stage=5, block="c", train_bn=train_bn ) else: C5 = None return [C1, C2, C3, C4, C5] ############################################################ # Proposal Layer ############################################################ def apply_box_deltas_graph(boxes, deltas): """Applies the given deltas to the given boxes. boxes: [N, (y1, x1, y2, x2)] boxes to update deltas: [N, (dy, dx, log(dh), log(dw))] refinements to apply """ # Convert to y, x, h, w height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] center_y = boxes[:, 0] + 0.5 * height center_x = boxes[:, 1] + 0.5 * width # Apply deltas center_y += deltas[:, 0] * height center_x += deltas[:, 1] * width height *= tf.exp(deltas[:, 2]) width *= tf.exp(deltas[:, 3]) # Convert back to y1, x1, y2, x2 y1 = center_y - 0.5 * height x1 = center_x - 0.5 * width y2 = y1 + height x2 = x1 + width result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out") return result def clip_boxes_graph(boxes, window): """ boxes: [N, (y1, x1, y2, x2)] window: [4] in the form y1, x1, y2, x2 """ # Split wy1, wx1, wy2, wx2 = tf.split(window, 4) y1, x1, y2, x2 = tf.split(boxes, 4, axis=1) # Clip y1 = tf.maximum(tf.minimum(y1, wy2), wy1) x1 = tf.maximum(tf.minimum(x1, wx2), wx1) y2 = tf.maximum(tf.minimum(y2, wy2), wy1) x2 = tf.maximum(tf.minimum(x2, wx2), wx1) clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes") clipped.set_shape((clipped.shape[0], 4)) return clipped class ProposalLayer(KE.Layer): """Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinement deltas to anchors. Inputs: rpn_probs: [batch, num_anchors, (bg prob, fg prob)] rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))] anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates Returns: Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] """ def __init__(self, proposal_count, nms_threshold, config=None, **kwargs): super(ProposalLayer, self).__init__(**kwargs) self.config = config self.proposal_count = proposal_count self.nms_threshold = nms_threshold def call(self, inputs): # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) # Anchors anchors = inputs[2] # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices scores = utils.batch_slice( [scores, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU ) deltas = utils.batch_slice( [deltas, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU ) pre_nms_anchors = utils.batch_slice( [anchors, ix], lambda a, x: tf.gather(a, x), self.config.IMAGES_PER_GPU, names=["pre_nms_anchors"], ) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice( [pre_nms_anchors, deltas], lambda x, y: apply_box_deltas_graph(x, y), self.config.IMAGES_PER_GPU, names=["refined_anchors"], ) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice( boxes, lambda x: clip_boxes_graph(x, window), self.config.IMAGES_PER_GPU, names=["refined_anchors_clipped"], ) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression( boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression", ) proposals = tf.gather(boxes, indices) # Pad if needed padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, self.config.IMAGES_PER_GPU) return proposals def compute_output_shape(self, input_shape): return (None, self.proposal_count, 4) ############################################################ # ROIAlign Layer ############################################################ def log2_graph(x): """Implementation of Log2. TF doesn't have a native implementation.""" return tf.log(x) / tf.log(2.0) class PyramidROIAlign(KE.Layer): """Implements ROI Pooling on multiple levels of the feature pyramid. Params: - pool_shape: [pool_height, pool_width] of the output pooled regions. Usually [7, 7] Inputs: - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized coordinates. Possibly padded with zeros if not enough boxes to fill the array. - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - feature_maps: List of feature maps from different levels of the pyramid. Each is [batch, height, width, channels] Output: Pooled regions in the shape: [batch, num_boxes, pool_height, pool_width, channels]. The width and height are those specific in the pool_shape in the layer constructor. """ def __init__(self, pool_shape, **kwargs): super(PyramidROIAlign, self).__init__(**kwargs) self.pool_shape = tuple(pool_shape) def call(self, inputs): # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords boxes = inputs[0] # Image meta # Holds details about the image. See compose_image_meta() image_meta = inputs[1] # Feature Maps. List of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] feature_maps = inputs[2:] # Assign each ROI to a level in the pyramid based on the ROI area. y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) h = y2 - y1 w = x2 - x1 # Use shape of first image. Images in a batch must have the same size. image_shape = parse_image_meta_graph(image_meta)["image_shape"][0] # Equation 1 in the Feature Pyramid Networks paper. Account for # the fact that our coordinates are normalized here. # e.g. a 224x224 ROI (in pixels) maps to P4 image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32) roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) roi_level = tf.minimum( 5, tf.maximum(2, 4 + tf.cast(tf.round(roi_level), tf.int32)) ) roi_level = tf.squeeze(roi_level, 2) # Loop through levels and apply ROI pooling to each. P2 to P5. pooled = [] box_to_level = [] for i, level in enumerate(range(2, 6)): ix = tf.where(tf.equal(roi_level, level)) level_boxes = tf.gather_nd(boxes, ix) # Box indices for crop_and_resize. box_indices = tf.cast(ix[:, 0], tf.int32) # Keep track of which box is mapped to which level box_to_level.append(ix) # Stop gradient propogation to ROI proposals level_boxes = tf.stop_gradient(level_boxes) box_indices = tf.stop_gradient(box_indices) # Crop and Resize # From Mask R-CNN paper: "We sample four regular locations, so # that we can evaluate either max or average pooling. In fact, # interpolating only a single value at each bin center (without # pooling) is nearly as effective." # # Here we use the simplified approach of a single value per bin, # which is how it's done in tf.crop_and_resize() # Result: [batch * num_boxes, pool_height, pool_width, channels] pooled.append( tf.image.crop_and_resize( feature_maps[i], level_boxes, box_indices, self.pool_shape, method="bilinear", ) ) # Pack pooled features into one tensor pooled = tf.concat(pooled, axis=0) # Pack box_to_level mapping into one array and add another # column representing the order of pooled boxes box_to_level = tf.concat(box_to_level, axis=0) box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], axis=1) # Rearrange pooled features to match the order of the original boxes # Sort box_to_level by batch then box index # TF doesn't have a way to sort by two columns, so merge them and sort. sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] ix = tf.nn.top_k(sorting_tensor, k=tf.shape(box_to_level)[0]).indices[::-1] ix = tf.gather(box_to_level[:, 2], ix) pooled = tf.gather(pooled, ix) # Re-add the batch dimension shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0) pooled = tf.reshape(pooled, shape) return pooled def compute_output_shape(self, input_shape): return input_shape[0][:2] + self.pool_shape + (input_shape[2][-1],) ############################################################ # Detection Target Layer ############################################################ def overlaps_graph(boxes1, boxes2): """Computes IoU overlaps between two sets of boxes. boxes1, boxes2: [N, (y1, x1, y2, x2)]. """ # 1. Tile boxes2 and repeat boxes1. This allows us to compare # every boxes1 against every boxes2 without loops. # TF doesn't have an equivalent to np.repeat() so simulate it # using tf.tile() and tf.reshape. b1 = tf.reshape( tf.tile(tf.expand_dims(boxes1, 1), [1, 1, tf.shape(boxes2)[0]]), [-1, 4] ) b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) # 2. Compute intersections b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1) b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1) y1 = tf.maximum(b1_y1, b2_y1) x1 = tf.maximum(b1_x1, b2_x1) y2 = tf.minimum(b1_y2, b2_y2) x2 = tf.minimum(b1_x2, b2_x2) intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0) # 3. Compute unions b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1) b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1) union = b1_area + b2_area - intersection # 4. Compute IoU and reshape to [boxes1, boxes2] iou = intersection / union overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) return overlaps def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): """Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas, and masks for each. Inputs: proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))] masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Assertions asserts = [ tf.Assert( tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion" ) ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") gt_masks = tf.gather( gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks" ) # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with crowd boxes [proposals, crowd_boxes] crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = crowd_iou_max < 0.001 # Determine positive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = roi_iou_max >= 0.5 positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = ( tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count ) negative_indices = tf.random_shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64), ) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Assign positive ROIs to GT masks # Permute masks to [N, height, width, 1] transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # Pick the right mask for each ROI roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets boxes = positive_rois if config.USE_MINI_MASK: # Transform ROI coordinates from normalized image space # to normalized mini-mask space. y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) gt_h = gt_y2 - gt_y1 gt_w = gt_x2 - gt_x1 y1 = (y1 - gt_y1) / gt_h x1 = (x1 - gt_x1) / gt_w y2 = (y2 - gt_y1) / gt_h x2 = (x2 - gt_x1) / gt_w boxes = tf.concat([y1, x1, y2, x2], 1) box_ids = tf.range(0, tf.shape(roi_masks)[0]) masks = tf.image.crop_and_resize( tf.cast(roi_masks, tf.float32), boxes, box_ids, config.MASK_SHAPE ) # Remove the extra dimension from masks. masks = tf.squeeze(masks, axis=3) # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with # binary cross entropy loss. masks = tf.round(masks) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) return rois, roi_gt_class_ids, deltas, masks class DetectionTargetLayer(KE.Layer): """Subsamples proposals and generates target box refinement, class_ids, and masks for each. Inputs: proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)] target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width] Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ def __init__(self, config, **kwargs): super(DetectionTargetLayer, self).__init__(**kwargs) self.config = config def call(self, inputs): proposals = inputs[0] gt_class_ids = inputs[1] gt_boxes = inputs[2] gt_masks = inputs[3] # Slice the batch and run a graph for each slice # TODO: Rename target_bbox to target_deltas for clarity names = ["rois", "target_class_ids", "target_bbox", "target_mask"] outputs = utils.batch_slice( [proposals, gt_class_ids, gt_boxes, gt_masks], lambda w, x, y, z: detection_targets_graph(w, x, y, z, self.config), self.config.IMAGES_PER_GPU, names=names, ) return outputs def compute_output_shape(self, input_shape): return [ (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois (None, self.config.TRAIN_ROIS_PER_IMAGE), # class_ids (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas ( None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0], self.config.MASK_SHAPE[1], ), # masks ] def compute_mask(self, inputs, mask=None): return [None, None, None, None] ############################################################ # Detection Layer ############################################################ def refine_detections_graph(rois, probs, deltas, window, config): """Refine classified proposals and filter overlaps and return final detections. Inputs: rois: [N, (y1, x1, y2, x2)] in normalized coordinates probs: [N, num_classes]. Class probabilities. deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific bounding box deltas. window: (y1, x1, y2, x2) in normalized coordinates. The part of the image that contains the image excluding the padding. Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. """ # Class IDs per ROI class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(deltas, indices) # Apply bounding box deltas # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates refined_rois = apply_box_deltas_graph(rois, deltas_specific * config.BBOX_STD_DEV) # Clip boxes to image window refined_rois = clip_boxes_graph(refined_rois, window) # TODO: Filter out boxes with zero area # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if config.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection( tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0) ) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD, ) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode="CONSTANT", constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection( tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0) ) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat( [ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis], ], axis=1, ) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections class DetectionLayer(KE.Layer): """Takes classified proposal boxes and their bounding box deltas and returns the final detection boxes. Returns: [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where coordinates are normalized. """ def __init__(self, config=None, **kwargs): super(DetectionLayer, self).__init__(**kwargs) self.config = config def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] # Get windows of images in normalized coordinates. Windows are the area # in the image that excludes the padding. # Use the shape of the first image in the batch to normalize the window # because we know that all images get resized to the same size. m = parse_image_meta_graph(image_meta) image_shape = m["image_shape"][0] window = norm_boxes_graph(m["window"], image_shape[:2]) # Run detection refinement graph on each item in the batch detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), self.config.IMAGES_PER_GPU, ) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape( detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6], ) def compute_output_shape(self, input_shape): return (None, self.config.DETECTION_MAX_INSTANCES, 6) ############################################################ # Region Proposal Network (RPN) ############################################################ def rpn_graph(feature_map, anchors_per_location, anchor_stride): """Builds the computation graph of Region Proposal Network. feature_map: backbone features [batch, height, width, depth] anchors_per_location: number of anchors per pixel in the feature map anchor_stride: Controls the density of anchors. Typically 1 (anchors for every pixel in the feature map), or 2 (every other pixel). Returns: rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be applied to anchors. """ # TODO: check if stride of 2 causes alignment issues if the feature map # is not even. # Shared convolutional base of the RPN shared = KL.Conv2D( 512, (3, 3), padding="same", activation="relu", strides=anchor_stride, name="rpn_conv_shared", )(feature_map) # Anchor Score. [batch, height, width, anchors per location * 2]. x = KL.Conv2D( 2 * anchors_per_location, (1, 1), padding="valid", activation="linear", name="rpn_class_raw", )(shared) # Reshape to [batch, anchors, 2] rpn_class_logits = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x) # Softmax on last dimension of BG/FG. rpn_probs = KL.Activation("softmax", name="rpn_class_xxx")(rpn_class_logits) # Bounding box refinement. [batch, H, W, anchors per location * depth] # where depth is [x, y, log(w), log(h)] x = KL.Conv2D( anchors_per_location * 4, (1, 1), padding="valid", activation="linear", name="rpn_bbox_pred", )(shared) # Reshape to [batch, anchors, 4] rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x) return [rpn_class_logits, rpn_probs, rpn_bbox] def build_rpn_model(anchor_stride, anchors_per_location, depth): """Builds a Keras model of the Region Proposal Network. It wraps the RPN graph so it can be used multiple times with shared weights. anchors_per_location: number of anchors per pixel in the feature map anchor_stride: Controls the density of anchors. Typically 1 (anchors for every pixel in the feature map), or 2 (every other pixel). depth: Depth of the backbone feature map. Returns a Keras Model object. The model outputs, when called, are: rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be applied to anchors. """ input_feature_map = KL.Input( shape=[None, None, depth], name="input_rpn_feature_map" ) outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride) return KM.Model([input_feature_map], outputs, name="rpn_model") ############################################################ # Feature Pyramid Network Heads ############################################################ def fpn_classifier_graph( rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True, fc_layers_size=1024, ): """Builds the computation graph of the feature pyramid network classifier and regressor heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers fc_layers_size: Size of the 2 FC layers Returns: logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) probs: [batch, num_rois, NUM_CLASSES] classifier probabilities bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")( [rois, image_meta] + feature_maps ) # Two 1024 FC layers (implemented with Conv2D for consistency) x = KL.TimeDistributed( KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1", )(x) x = KL.TimeDistributed(BatchNorm(), name="mrcnn_class_bn1")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)), name="mrcnn_class_conv2")( x ) x = KL.TimeDistributed(BatchNorm(), name="mrcnn_class_bn2")(x, training=train_bn) x = KL.Activation("relu")(x) shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze")(x) # Classifier head mrcnn_class_logits = KL.TimeDistributed( KL.Dense(num_classes), name="mrcnn_class_logits" )(shared) mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"), name="mrcnn_class")( mrcnn_class_logits ) # BBox head # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))] x = KL.TimeDistributed( KL.Dense(num_classes * 4, activation="linear"), name="mrcnn_bbox_fc" )(shared) # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] s = K.int_shape(x) mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox def build_fpn_mask_graph( rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True ): """Builds the computation graph of the mask head of Feature Pyramid Network. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES] """ # ROI Pooling # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_mask")( [rois, image_meta] + feature_maps ) # Conv layers x = KL.TimeDistributed( KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1" )(x) x = KL.TimeDistributed(BatchNorm(), name="mrcnn_mask_bn1")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.TimeDistributed( KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2" )(x) x = KL.TimeDistributed(BatchNorm(), name="mrcnn_mask_bn2")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.TimeDistributed( KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3" )(x) x = KL.TimeDistributed(BatchNorm(), name="mrcnn_mask_bn3")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.TimeDistributed( KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4" )(x) x = KL.TimeDistributed(BatchNorm(), name="mrcnn_mask_bn4")(x, training=train_bn) x = KL.Activation("relu")(x) x = KL.TimeDistributed( KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv", )(x) x = KL.TimeDistributed( KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"), name="mrcnn_mask", )(x) return x ############################################################ # Loss Functions ############################################################ def smooth_l1_loss(y_true, y_pred): """Implements Smooth-L1 loss. y_true and y_pred are typically: [N, 4], but could be any shape. """ diff = K.abs(y_true - y_pred) less_than_one = K.cast(K.less(diff, 1.0), "float32") loss = (less_than_one * 0.5 * diff ** 2) + (1 - less_than_one) * (diff - 0.5) return loss def rpn_class_loss_graph(rpn_match, rpn_class_logits): """RPN anchor classifier loss. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG. """ # Squeeze last dim to simplify rpn_match = tf.squeeze(rpn_match, -1) # Get anchor classes. Convert the -1/+1 match to 0/1 values. anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) # Positive and Negative anchors contribute to the loss, # but neutral anchors (match value = 0) don't. indices = tf.where(K.not_equal(rpn_match, 0)) # Pick rows that contribute to the loss and filter out the rest. rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) anchor_class = tf.gather_nd(anchor_class, indices) # Cross entropy loss loss = K.sparse_categorical_crossentropy( target=anchor_class, output=rpn_class_logits, from_logits=True ) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): """Return the RPN bounding box loss graph. config: the model config object. target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. Uses 0 padding to fill in unsed bbox deltas. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] """ # Positive anchors contribute to the loss, but negative and # neutral anchors (match value of 0 or -1) don't. rpn_match = K.squeeze(rpn_match, -1) indices = tf.where(K.equal(rpn_match, 1)) # Pick bbox deltas that contribute to the loss rpn_bbox = tf.gather_nd(rpn_bbox, indices) # Trim target bounding box deltas to the same length as rpn_bbox. batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) target_bbox = batch_pack_graph(target_bbox, batch_counts, config.IMAGES_PER_GPU) loss = smooth_l1_loss(target_bbox, rpn_bbox) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, active_class_ids): """Loss for the classifier head of Mask RCNN. target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero padding to fill in the array. pred_class_logits: [batch, num_rois, num_classes] active_class_ids: [batch, num_classes]. Has a value of 1 for classes that are in the dataset of the image, and 0 for classes that are not in the dataset. """ # During model building, Keras calls this function with # target_class_ids of type float32. Unclear why. Cast it # to int to get around it. target_class_ids = tf.cast(target_class_ids, "int64") # Find predictions of classes that are not in the dataset. pred_class_ids = tf.argmax(pred_class_logits, axis=2) # TODO: Update this line to work with batch > 1. Right now it assumes all # images in a batch have the same active_class_ids pred_active = tf.gather(active_class_ids[0], pred_class_ids) # Loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_class_ids, logits=pred_class_logits ) # Erase losses of predictions of classes that are not in the active # classes of the image. loss = loss * pred_active # Computer loss mean. Use only predictions that contribute # to the loss to get a correct mean. loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) return loss def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): """Loss for Mask R-CNN bounding box refinement. target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] target_class_ids: [batch, num_rois]. Integer class IDs. pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] """ # Reshape to merge batch and roi dimensions for simplicity. target_class_ids = K.reshape(target_class_ids, (-1,)) target_bbox = K.reshape(target_bbox, (-1, 4)) pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) # Only positive ROIs contribute to the loss. And only # the right class_id of each ROI. Get their indices. positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] positive_roi_class_ids = tf.cast( tf.gather(target_class_ids, positive_roi_ix), tf.int64 ) indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) # Gather the deltas (predicted and true) that contribute to loss target_bbox = tf.gather(target_bbox, positive_roi_ix) pred_bbox = tf.gather_nd(pred_bbox, indices) # Smooth-L1 Loss loss = K.switch( tf.size(target_bbox) > 0, smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), tf.constant(0.0), ) loss = K.mean(loss) return loss def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks): """Mask binary cross-entropy loss for the masks head. target_masks: [batch, num_rois, height, width]. A float32 tensor of values 0 or 1. Uses zero padding to fill array. target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. pred_masks: [batch, proposals, height, width, num_classes] float32 tensor with values from 0 to 1. """ # Reshape for simplicity. Merge first two dimensions into one. target_class_ids = K.reshape(target_class_ids, (-1,)) mask_shape = tf.shape(target_masks) target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3])) pred_shape = tf.shape(pred_masks) pred_masks = K.reshape( pred_masks, (-1, pred_shape[2], pred_shape[3], pred_shape[4]) ) # Permute predicted masks to [N, num_classes, height, width] pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2]) # Only positive ROIs contribute to the loss. And only # the class specific mask of each ROI. positive_ix = tf.where(target_class_ids > 0)[:, 0] positive_class_ids = tf.cast(tf.gather(target_class_ids, positive_ix), tf.int64) indices = tf.stack([positive_ix, positive_class_ids], axis=1) # Gather the masks (predicted and true) that contribute to loss y_true = tf.gather(target_masks, positive_ix) y_pred = tf.gather_nd(pred_masks, indices) # Compute binary cross entropy. If no positive ROIs, then return 0. # shape: [batch, roi, num_classes] loss = K.switch( tf.size(y_true) > 0, K.binary_crossentropy(target=y_true, output=y_pred), tf.constant(0.0), ) loss = K.mean(loss) return loss ############################################################ # Data Generator ############################################################ def load_image_gt( dataset, config, image_id, augment=False, augmentation=None, use_mini_mask=False ): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. use_mini_mask: If False, returns full-size masks that are the same height and width as the original image. These can be big, for example 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, 224x224 and are generated by extracting the bounding box of the object and resizing it to MINI_MASK_SHAPE. Returns: image: [height, width, 3] shape: the original shape of the image before resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. """ # Load image and mask image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) original_shape = image.shape image, window, scale, padding, crop = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, min_scale=config.IMAGE_MIN_SCALE, max_dim=config.IMAGE_MAX_DIM, mode=config.IMAGE_RESIZE_MODE, ) if len(mask.shape) < 3: print("Added axis to shape of: ", mask.shape) mask = np.expand_dims(mask,2) mask = utils.resize_mask(mask, scale, padding, crop) # Random horizontal flips. # TODO: will be removed in a future update in favor of augmentation if augment: logging.warning("'augment' is deprecated. Use 'augmentation' instead.") if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) # Augmentation # This requires the imgaug lib (https://github.com/aleju/imgaug) if augmentation: import imgaug # Augmenters that are safe to apply to masks # Some, such as Affine, have settings that make them unsafe, so always # test your augmentation on masks MASK_AUGMENTERS = [ "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud", "CropAndPad", "Affine", "PiecewiseAffine", ] def hook(images, augmenter, parents, default): """Determines which augmenters to apply to masks.""" return augmenter.__class__.__name__ in MASK_AUGMENTERS # Store shapes before augmentation to compare image_shape = image.shape mask_shape = mask.shape # Make augmenters deterministic to apply similarly to images and masks det = augmentation.to_deterministic() image = det.augment_image(image) # Change mask to np.uint8 because imgaug doesn't support np.bool mask = det.augment_image( mask.astype(np.uint8), hooks=imgaug.HooksImages(activator=hook) ) # Verify that shapes didn't change assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" # Change mask back to bool mask = mask.astype(np.bool) # Note that some boxes might be all zeros if the corresponding mask got cropped out. # and here is to filter them out _idx = np.sum(mask, axis=(0, 1)) > 0 mask = mask[:, :, _idx] class_ids = class_ids[_idx] # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = utils.extract_bboxes(mask) # Active classes # Different datasets have different classes, so track the # classes supported in the dataset of this image. active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] active_class_ids[source_class_ids] = 1 # Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) # Image meta data image_meta = compose_image_meta( image_id, original_shape, image.shape, window, scale, active_class_ids ) return image, image_meta, class_ids, bbox, mask def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config): """Generate targets for training Stage 2 classifier and mask heads. This is not used in normal training. It's useful for debugging or to train the Mask RCNN heads without using the RPN head. Inputs: rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes. gt_class_ids: [instance count] Integer class IDs gt_boxes: [instance count, (y1, x1, y2, x2)] gt_masks: [height, width, instance count] Ground truth masks. Can be full size or mini-masks. Returns: rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific bbox refinements. masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped to bbox boundaries and resized to neural network output size. """ assert rpn_rois.shape[0] > 0 assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format( gt_class_ids.dtype ) assert gt_boxes.dtype == np.int32, "Expected int but got {}".format(gt_boxes.dtype) assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format(gt_masks.dtype) # It's common to add GT Boxes to ROIs but we don't do that here because # according to XinLei Chen's paper, it doesn't help. # Trim empty padding in gt_boxes and gt_masks parts instance_ids = np.where(gt_class_ids > 0)[0] assert instance_ids.shape[0] > 0, "Image must contain instances." gt_class_ids = gt_class_ids[instance_ids] gt_boxes = gt_boxes[instance_ids] gt_masks = gt_masks[:, :, instance_ids] # Compute areas of ROIs and ground truth boxes. rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * (rpn_rois[:, 3] - rpn_rois[:, 1]) gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1]) # Compute overlaps [rpn_rois, gt_boxes] overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0])) for i in range(overlaps.shape[1]): gt = gt_boxes[i] overlaps[:, i] = utils.compute_iou(gt, rpn_rois, gt_box_area[i], rpn_roi_area) # Assign ROIs to GT boxes rpn_roi_iou_argmax = np.argmax(overlaps, axis=1) rpn_roi_iou_max = overlaps[np.arange(overlaps.shape[0]), rpn_roi_iou_argmax] # GT box assigned to each ROI rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax] rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax] # Positive ROIs are those with >= 0.5 IoU with a GT box. fg_ids = np.where(rpn_roi_iou_max > 0.5)[0] # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining) # TODO: To hard example mine or not to hard example mine, that's the question # bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0] bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] # Subsample ROIs. Aim for 33% foreground. # FG fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) if fg_ids.shape[0] > fg_roi_count: keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False) else: keep_fg_ids = fg_ids # BG remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0] if bg_ids.shape[0] > remaining: keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) else: keep_bg_ids = bg_ids # Combine indices of ROIs to keep keep = np.concatenate([keep_fg_ids, keep_bg_ids]) # Need more? remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0] if remaining > 0: # Looks like we don't have enough samples to maintain the desired # balance. Reduce requirements and fill in the rest. This is # likely different from the Mask RCNN paper. # There is a small chance we have neither fg nor bg samples. if keep.shape[0] == 0: # Pick bg regions with easier IoU threshold bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] assert bg_ids.shape[0] >= remaining keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) assert keep_bg_ids.shape[0] == remaining keep = np.concatenate([keep, keep_bg_ids]) else: # Fill the rest with repeated bg rois. keep_extra_ids = np.random.choice(keep_bg_ids, remaining, replace=True) keep = np.concatenate([keep, keep_extra_ids]) assert ( keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE ), "keep doesn't match ROI batch size {}, {}".format( keep.shape[0], config.TRAIN_ROIS_PER_IMAGE ) # Reset the gt boxes assigned to BG ROIs. rpn_roi_gt_boxes[keep_bg_ids, :] = 0 rpn_roi_gt_class_ids[keep_bg_ids] = 0 # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement. rois = rpn_rois[keep] roi_gt_boxes = rpn_roi_gt_boxes[keep] roi_gt_class_ids = rpn_roi_gt_class_ids[keep] roi_gt_assignment = rpn_roi_iou_argmax[keep] # Class-aware bbox deltas. [y, x, log(h), log(w)] bboxes = np.zeros( (config.TRAIN_ROIS_PER_IMAGE, config.NUM_CLASSES, 4), dtype=np.float32 ) pos_ids = np.where(roi_gt_class_ids > 0)[0] bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement( rois[pos_ids], roi_gt_boxes[pos_ids, :4] ) # Normalize bbox refinements bboxes /= config.BBOX_STD_DEV # Generate class-specific target masks masks = np.zeros( ( config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES, ), dtype=np.float32, ) for i in pos_ids: class_id = roi_gt_class_ids[i] assert class_id > 0, "class id must be greater than 0" gt_id = roi_gt_assignment[i] class_mask = gt_masks[:, :, gt_id] if config.USE_MINI_MASK: # Create a mask placeholder, the size of the image placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool) # GT box gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id] gt_w = gt_x2 - gt_x1 gt_h = gt_y2 - gt_y1 # Resize mini mask to size of GT box placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = np.round( utils.resize(class_mask, (gt_h, gt_w)) ).astype(bool) # Place the mini batch in the placeholder class_mask = placeholder # Pick part of the mask and resize it y1, x1, y2, x2 = rois[i].astype(np.int32) m = class_mask[y1:y2, x1:x2] mask = utils.resize(m, config.MASK_SHAPE) masks[i, :, :, class_id] = mask return rois, roi_gt_class_ids, bboxes, masks def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): """Given the anchors and GT boxes, compute overlaps and identify positive anchors and deltas to refine them to match their corresponding GT boxes. anchors: [num_anchors, (y1, x1, y2, x2)] gt_class_ids: [num_gt_boxes] Integer class IDs. gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] Returns: rpn_match: [N] (int32) matches between anchors and GT boxes. 1 = positive anchor, -1 = negative anchor, 0 = neutral rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. """ # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))] rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = np.where(gt_class_ids < 0)[0] if crowd_ix.shape[0] > 0: # Filter out crowds from ground truth class IDs and boxes non_crowd_ix = np.where(gt_class_ids > 0)[0] crowd_boxes = gt_boxes[crowd_ix] gt_class_ids = gt_class_ids[non_crowd_ix] gt_boxes = gt_boxes[non_crowd_ix] # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) crowd_iou_max = np.amax(crowd_overlaps, axis=1) no_crowd_bool = crowd_iou_max < 0.001 else: # All anchors don't intersect a crowd no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) # Compute overlaps [num_anchors, num_gt_boxes] overlaps = utils.compute_overlaps(anchors, gt_boxes) # Match anchors to GT Boxes # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive. # If an anchor overlaps a GT box with IoU < 0.3 then it's negative. # Neutral anchors are those that don't match the conditions above, # and they don't influence the loss function. # However, don't keep any GT box unmatched (rare, but happens). Instead, # match it to the closest anchor (even if its max IoU is < 0.3). # # 1. Set negative anchors first. They get overwritten below if a GT box is # matched to them. Skip boxes in crowd areas. anchor_iou_argmax = np.argmax(overlaps, axis=1) anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 # 2. Set an anchor for each GT box (regardless of IoU value). # If multiple anchors have the same IoU match all of them gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:,0] rpn_match[gt_iou_argmax] = 1 # 3. Set anchors with high overlap as positive. rpn_match[anchor_iou_max >= 0.7] = 1 # Subsample to balance positive and negative anchors # Don't let positives be more than half the anchors ids = np.where(rpn_match == 1)[0] extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) if extra > 0: # Reset the extra ones to neutral ids = np.random.choice(ids, extra, replace=False) rpn_match[ids] = 0 # Same for negative proposals ids = np.where(rpn_match == -1)[0] extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - np.sum(rpn_match == 1)) if extra > 0: # Rest the extra ones to neutral ids = np.random.choice(ids, extra, replace=False) rpn_match[ids] = 0 # For positive anchors, compute shift and scale needed to transform them # to match the corresponding GT boxes. ids = np.where(rpn_match == 1)[0] ix = 0 # index into rpn_bbox # TODO: use box_refinement() rather than duplicating the code here for i, a in zip(ids, anchors[ids]): # Closest gt box (it might have IoU < 0.7) gt = gt_boxes[anchor_iou_argmax[i]] # Convert coordinates to center plus width/height. # GT Box gt_h = gt[2] - gt[0] gt_w = gt[3] - gt[1] gt_center_y = gt[0] + 0.5 * gt_h gt_center_x = gt[1] + 0.5 * gt_w # Anchor a_h = a[2] - a[0] a_w = a[3] - a[1] a_center_y = a[0] + 0.5 * a_h a_center_x = a[1] + 0.5 * a_w # Compute the bbox refinement that the RPN should predict. rpn_bbox[ix] = [ (gt_center_y - a_center_y) / a_h, (gt_center_x - a_center_x) / a_w, np.log(gt_h / a_h), np.log(gt_w / a_w), ] # Normalize rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV ix += 1 return rpn_match, rpn_bbox def generate_random_rois(image_shape, count, gt_class_ids, gt_boxes): """Generates ROI proposals similar to what a region proposal network would generate. image_shape: [Height, Width, Depth] count: Number of ROIs to generate gt_class_ids: [N] Integer ground truth class IDs gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels. Returns: [count, (y1, x1, y2, x2)] ROI boxes in pixels. """ # placeholder rois = np.zeros((count, 4), dtype=np.int32) # Generate random ROIs around GT boxes (90% of count) rois_per_box = int(0.9 * count / gt_boxes.shape[0]) for i in range(gt_boxes.shape[0]): gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i] h = gt_y2 - gt_y1 w = gt_x2 - gt_x1 # random boundaries r_y1 = max(gt_y1 - h, 0) r_y2 = min(gt_y2 + h, image_shape[0]) r_x1 = max(gt_x1 - w, 0) r_x2 = min(gt_x2 + w, image_shape[1]) # To avoid generating boxes with zero area, we generate double what # we need and filter out the extra. If we get fewer valid boxes # than we need, we loop and try again. while True: y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2)) x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2)) # Filter out zero area boxes threshold = 1 y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= threshold][:rois_per_box] x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= threshold][:rois_per_box] if y1y2.shape[0] == rois_per_box and x1x2.shape[0] == rois_per_box: break # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape # into x1, y1, x2, y2 order x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) box_rois = np.hstack([y1, x1, y2, x2]) rois[rois_per_box * i : rois_per_box * (i + 1)] = box_rois # Generate random ROIs anywhere in the image (10% of count) remaining_count = count - (rois_per_box * gt_boxes.shape[0]) # To avoid generating boxes with zero area, we generate double what # we need and filter out the extra. If we get fewer valid boxes # than we need, we loop and try again. while True: y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2)) x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2)) # Filter out zero area boxes threshold = 1 y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= threshold][:remaining_count] x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= threshold][:remaining_count] if y1y2.shape[0] == remaining_count and x1x2.shape[0] == remaining_count: break # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape # into x1, y1, x2, y2 order x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) global_rois = np.hstack([y1, x1, y2, x2]) rois[-remaining_count:] = global_rois return rois def data_generator( dataset, config, shuffle=True, augment=False, augmentation=None, random_rois=0, batch_size=1, detection_targets=False, no_augmentation_sources=None, ): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. random_rois: If > 0 then generate proposals to be used to train the network classifier and mask heads. Useful if training the Mask RCNN part without the RPN. batch_size: How many images to return in each call detection_targets: If True, generate detection targets (class IDs, bbox deltas, and masks). Typically for debugging or visualizations because in trainig detection targets are generated by DetectionTargetLayer. no_augmentation_sources: Optional. List of sources to exclude for augmentation. A source is string that identifies a dataset and is defined in the Dataset class. Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The contents of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ b = 0 # batch item index image_index = -1 image_ids = np.copy(dataset.image_ids) error_count = 0 no_augmentation_sources = no_augmentation_sources or [] # Anchors # [anchor_count, (y1, x1, y2, x2)] backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE) anchors = utils.generate_pyramid_anchors( config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE, ) # Keras requires a generator to run indefinitely. while True: try: # Increment index to pick next image. Shuffle if at the start of an epoch. image_index = (image_index + 1) % len(image_ids) if shuffle and image_index == 0: np.random.shuffle(image_ids) # Get GT bounding boxes and masks for image. image_id = image_ids[image_index] # If the image source is not to be augmented pass None as augmentation if dataset.image_info[image_id]["source"] in no_augmentation_sources: image, image_meta, gt_class_ids, gt_boxes, gt_masks = load_image_gt( dataset, config, image_id, augment=augment, augmentation=None, use_mini_mask=config.USE_MINI_MASK, ) else: image, image_meta, gt_class_ids, gt_boxes, gt_masks = load_image_gt( dataset, config, image_id, augment=augment, augmentation=augmentation, use_mini_mask=config.USE_MINI_MASK, ) # Skip images that have no instances. This can happen in cases # where we train on a subset of classes and the image doesn't # have any of the classes we care about. if not np.any(gt_class_ids > 0): continue # RPN Targets rpn_match, rpn_bbox = build_rpn_targets( image.shape, anchors, gt_class_ids, gt_boxes, config ) # Mask R-CNN Targets if random_rois: rpn_rois = generate_random_rois( image.shape, random_rois, gt_class_ids, gt_boxes ) if detection_targets: rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask = build_detection_targets( rpn_rois, gt_class_ids, gt_boxes, gt_masks, config ) # Init batch arrays if b == 0: batch_image_meta = np.zeros( (batch_size,) + image_meta.shape, dtype=image_meta.dtype ) batch_rpn_match = np.zeros( [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype ) batch_rpn_bbox = np.zeros( [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype, ) batch_images = np.zeros((batch_size,) + image.shape, dtype=np.float32) batch_gt_class_ids = np.zeros( (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32 ) batch_gt_boxes = np.zeros( (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32 ) batch_gt_masks = np.zeros( ( batch_size, gt_masks.shape[0], gt_masks.shape[1], config.MAX_GT_INSTANCES, ), dtype=gt_masks.dtype, ) if random_rois: batch_rpn_rois = np.zeros( (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype ) if detection_targets: batch_rois = np.zeros( (batch_size,) + rois.shape, dtype=rois.dtype ) batch_mrcnn_class_ids = np.zeros( (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype, ) batch_mrcnn_bbox = np.zeros( (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype ) batch_mrcnn_mask = np.zeros( (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype ) # If more instances than fits in the array, sub-sample from them. if gt_boxes.shape[0] > config.MAX_GT_INSTANCES: ids = np.random.choice( np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False ) gt_class_ids = gt_class_ids[ids] gt_boxes = gt_boxes[ids] gt_masks = gt_masks[:, :, ids] # Add to batch batch_image_meta[b] = image_meta batch_rpn_match[b] = rpn_match[:, np.newaxis] batch_rpn_bbox[b] = rpn_bbox batch_images[b] = mold_image(image.astype(np.float32), config) batch_gt_class_ids[b, : gt_class_ids.shape[0]] = gt_class_ids batch_gt_boxes[b, : gt_boxes.shape[0]] = gt_boxes batch_gt_masks[b, :, :, : gt_masks.shape[-1]] = gt_masks if random_rois: batch_rpn_rois[b] = rpn_rois if detection_targets: batch_rois[b] = rois batch_mrcnn_class_ids[b] = mrcnn_class_ids batch_mrcnn_bbox[b] = mrcnn_bbox batch_mrcnn_mask[b] = mrcnn_mask b += 1 # Batch full? if b >= batch_size: inputs = [ batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes, batch_gt_masks, ] outputs = [] if random_rois: inputs.extend([batch_rpn_rois]) if detection_targets: inputs.extend([batch_rois]) # Keras requires that output and targets have the same number of dimensions batch_mrcnn_class_ids = np.expand_dims( batch_mrcnn_class_ids, -1 ) outputs.extend( [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask] ) yield inputs, outputs # start a new batch b = 0 except (GeneratorExit, KeyboardInterrupt): raise except: # Log it and skip the image logging.exception( "Error processing image {}".format(dataset.image_info[image_id]) ) error_count += 1 if error_count > 5: raise ############################################################ # MaskRCNN Class ############################################################ class MaskRCNN: """Encapsulates the Mask RCNN model functionality. The actual Keras model is in the keras_model property. """ def __init__(self, mode, config, model_dir): """ mode: Either "training" or "inference" config: A Sub-class of the Config class model_dir: Directory to save training logs and trained weights """ assert mode in ["training", "inference"] self.mode = mode self.config = config self.model_dir = model_dir self.set_log_dir() self.keras_model = self.build(mode=mode, config=config) def build(self, mode, config): """Build Mask R-CNN architecture. input_shape: The shape of the input image. mode: Either "training" or "inference". The inputs and outputs of the model differ accordingly. """ assert mode in ["training", "inference"] # Image size must be dividable by 2 multiple times h, w = config.IMAGE_SHAPE[:2] if h / 2 ** 6 != int(h / 2 ** 6) or w / 2 ** 6 != int(w / 2 ** 6): raise Exception( "Image size must be dividable by 2 at least 6 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 320, 384, 448, 512, ... etc. " ) # Inputs input_image = KL.Input( shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image" ) input_image_meta = KL.Input( shape=[config.IMAGE_META_SIZE], name="input_image_meta" ) if mode == "training": # RPN GT input_rpn_match = KL.Input( shape=[None, 1], name="input_rpn_match", dtype=tf.int32 ) input_rpn_bbox = KL.Input( shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32 ) # Detection GT (class IDs, bounding boxes, and masks) # 1. GT Class IDs (zero padded) input_gt_class_ids = KL.Input( shape=[None], name="input_gt_class_ids", dtype=tf.int32 ) # 2. GT Boxes in pixels (zero padded) # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates input_gt_boxes = KL.Input( shape=[None, 4], name="input_gt_boxes", dtype=tf.float32 ) # Normalize coordinates gt_boxes = KL.Lambda( lambda x: norm_boxes_graph(x, K.shape(input_image)[1:3]) )(input_gt_boxes) # 3. GT Masks (zero padded) # [batch, height, width, MAX_GT_INSTANCES] if config.USE_MINI_MASK: input_gt_masks = KL.Input( shape=[config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], None], name="input_gt_masks", dtype=bool, ) else: input_gt_masks = KL.Input( shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], name="input_gt_masks", dtype=bool, ) elif mode == "inference": # Anchors in normalized coordinates input_anchors = KL.Input(shape=[None, 4], name="input_anchors") # Build the shared convolutional layers. # Bottom-up Layers # Returns a list of the last layers of each stage, 5 in total. # Don't create the thead (stage 5), so we pick the 4th item in the list. if callable(config.BACKBONE): _, C2, C3, C4, C5 = config.BACKBONE( input_image, stage5=True, train_bn=config.TRAIN_BN ) else: _, C2, C3, C4, C5 = resnet_graph( input_image, config.BACKBONE, stage5=True, train_bn=config.TRAIN_BN ) # Top-down Layers # TODO: add assert to varify feature map sizes match what's in config P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name="fpn_c5p5")(C5) P4 = KL.Add(name="fpn_p4add")( [ KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name="fpn_c4p4")(C4), ] ) P3 = KL.Add(name="fpn_p3add")( [ KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name="fpn_c3p3")(C3), ] ) P2 = KL.Add(name="fpn_p2add")( [ KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name="fpn_c2p2")(C2), ] ) # Attach 3x3 conv to all P layers to get the final feature maps. P2 = KL.Conv2D( config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2" )(P2) P3 = KL.Conv2D( config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3" )(P3) P4 = KL.Conv2D( config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4" )(P4) P5 = KL.Conv2D( config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5" )(P5) # P6 is used for the 5th anchor scale in RPN. Generated by # subsampling from P5 with stride of 2. P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) # Note that P6 is used in RPN, but not in the classifier heads. rpn_feature_maps = [P2, P3, P4, P5, P6] mrcnn_feature_maps = [P2, P3, P4, P5] # Anchors if mode == "training": anchors = self.get_anchors(config.IMAGE_SHAPE) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape) # A hack to get around Keras's bad support for constants anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")( input_image ) else: anchors = input_anchors # RPN Model rpn = build_rpn_model( config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE, ) # Loop through pyramid layers layer_outputs = [] # list of lists for p in rpn_feature_maps: layer_outputs.append(rpn([p])) # Concatenate layer outputs # Convert from list of lists of level outputs to list of lists # of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] outputs = list(zip(*layer_outputs)) outputs = [ KL.Concatenate(axis=1, name=n)(list(o)) for o, n in zip(outputs, output_names) ] rpn_class_logits, rpn_class, rpn_bbox = outputs # Generate proposals # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates # and zero padded. proposal_count = ( config.POST_NMS_ROIS_TRAINING if mode == "training" else config.POST_NMS_ROIS_INFERENCE ) rpn_rois = ProposalLayer( proposal_count=proposal_count, nms_threshold=config.RPN_NMS_THRESHOLD, name="ROI", config=config, )([rpn_class, rpn_bbox, anchors]) if mode == "training": # Class ID mask to mark class IDs supported by the dataset the image # came from. active_class_ids = KL.Lambda( lambda x: parse_image_meta_graph(x)["active_class_ids"] )(input_image_meta) if not config.USE_RPN_ROIS: # Ignore predicted ROIs and use ROIs provided as an input. input_rois = KL.Input( shape=[config.POST_NMS_ROIS_TRAINING, 4], name="input_roi", dtype=np.int32, ) # Normalize coordinates target_rois = KL.Lambda( lambda x: norm_boxes_graph(x, K.shape(input_image)[1:3]) )(input_rois) else: target_rois = rpn_rois # Generate detection targets # Subsamples proposals and generates target outputs for training # Note that proposal class IDs, gt_boxes, and gt_masks are zero # padded. Equally, returned rois and targets are zero padded. rois, target_class_ids, target_bbox, target_mask = DetectionTargetLayer( config, name="proposal_targets" )([target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) # Network Heads # TODO: verify that this handles zero padded ROIs mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifier_graph( rois, mrcnn_feature_maps, input_image_meta, config.POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN, fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE, ) mrcnn_mask = build_fpn_mask_graph( rois, mrcnn_feature_maps, input_image_meta, config.MASK_POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN, ) # TODO: clean up (use tf.identify if necessary) output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) # Losses rpn_class_loss = KL.Lambda( lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss" )([input_rpn_match, rpn_class_logits]) rpn_bbox_loss = KL.Lambda( lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss" )([input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = KL.Lambda( lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss" )([target_class_ids, mrcnn_class_logits, active_class_ids]) bbox_loss = KL.Lambda( lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss" )([target_bbox, target_class_ids, mrcnn_bbox]) mask_loss = KL.Lambda( lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss" )([target_mask, target_class_ids, mrcnn_mask]) # Model inputs = [ input_image, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks, ] if not config.USE_RPN_ROIS: inputs.append(input_rois) outputs = [ rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss, ] model = KM.Model(inputs, outputs, name="mask_rcnn") else: # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifier_graph( rpn_rois, mrcnn_feature_maps, input_image_meta, config.POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN, fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE, ) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in # normalized coordinates detections = DetectionLayer(config, name="mrcnn_detection")( [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta] ) # Create masks for detections detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections) mrcnn_mask = build_fpn_mask_graph( detection_boxes, mrcnn_feature_maps, input_image_meta, config.MASK_POOL_SIZE, config.NUM_CLASSES, train_bn=config.TRAIN_BN, ) model = KM.Model( [input_image, input_image_meta, input_anchors], [ detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox, ], name="mask_rcnn", ) # Add multi-GPU support. if config.GPU_COUNT > 1: from mrcnn.parallel_model import ParallelModel model = ParallelModel(model, config.GPU_COUNT) return model def find_last(self): """Finds the last checkpoint file of the last trained model in the model directory. Returns: The path of the last checkpoint file """ # Get directory names. Each directory corresponds to a model dir_names = next(os.walk(self.model_dir))[1] key = self.config.NAME.lower() dir_names = filter(lambda f: f.startswith(key), dir_names) dir_names = sorted(dir_names) if not dir_names: import errno raise FileNotFoundError( errno.ENOENT, "Could not find model directory under {}".format(self.model_dir), ) # Pick last directory dir_name = os.path.join(self.model_dir, dir_names[-1]) # Find the last checkpoint checkpoints = next(os.walk(dir_name))[2] checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints) checkpoints = sorted(checkpoints) if not checkpoints: import errno raise FileNotFoundError( errno.ENOENT, "Could not find weight files in {}".format(dir_name) ) checkpoint = os.path.join(dir_name, checkpoints[-1]) return checkpoint def load_weights(self, filepath, by_name=False, exclude=None): """Modified version of the corresponding Keras function with the addition of multi-GPU support and the ability to exclude some layers from loading. exclude: list of layer names to exclude """ import h5py # Conditional import to support versions of Keras before 2.2 # TODO: remove in about 6 months (end of 2018) try: from keras.engine import saving except ImportError: # Keras before 2.2 used the 'topology' namespace. from keras.engine import topology as saving if exclude: by_name = True if h5py is None: raise ImportError("`load_weights` requires h5py.") f = h5py.File(filepath, mode="r") if "layer_names" not in f.attrs and "model_weights" in f: f = f["model_weights"] # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. keras_model = self.keras_model layers = ( keras_model.inner_model.layers if hasattr(keras_model, "inner_model") else keras_model.layers ) # Exclude some layers if exclude: layers = filter(lambda l: l.name not in exclude, layers) if by_name: saving.load_weights_from_hdf5_group_by_name(f, layers) else: saving.load_weights_from_hdf5_group(f, layers) if hasattr(f, "close"): f.close() # Update the log directory self.set_log_dir(filepath) def get_imagenet_weights(self): """Downloads ImageNet trained weights from Keras. Returns path to weights file. """ from keras.utils.data_utils import get_file TF_WEIGHTS_PATH_NO_TOP = ( "https://github.com/fchollet/deep-learning-models/" "releases/download/v0.2/" "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5" ) weights_path = get_file( "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", TF_WEIGHTS_PATH_NO_TOP, cache_subdir="models", md5_hash="a268eb855778b3df3c7506639542a6af", ) return weights_path def compile(self, learning_rate, momentum): """Gets the model ready for training. Adds losses, regularization, and metrics. Then calls the Keras compile() function. """ # Optimizer object optimizer = keras.optimizers.SGD( lr=learning_rate, momentum=momentum, clipnorm=self.config.GRADIENT_CLIP_NORM ) # Add Losses # First, clear previously set losses to avoid duplication self.keras_model._losses = [] self.keras_model._per_input_losses = {} loss_names = [ "rpn_class_loss", "rpn_bbox_loss", "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss", ] for name in loss_names: layer = self.keras_model.get_layer(name) if layer.output in self.keras_model.losses: continue loss = tf.reduce_mean( layer.output, keepdims=True ) * self.config.LOSS_WEIGHTS.get(name, 1.0) self.keras_model.add_loss(loss) # Add L2 Regularization # Skip gamma and beta weights of batch normalization layers. reg_losses = [ keras.regularizers.l2(self.config.WEIGHT_DECAY)(w) / tf.cast(tf.size(w), tf.float32) for w in self.keras_model.trainable_weights if "gamma" not in w.name and "beta" not in w.name ] self.keras_model.add_loss(tf.add_n(reg_losses)) # Compile self.keras_model.compile( optimizer=optimizer, loss=[None] * len(self.keras_model.outputs) ) # Add metrics for losses for name in loss_names: if name in self.keras_model.metrics_names: continue layer = self.keras_model.get_layer(name) self.keras_model.metrics_names.append(name) loss = tf.reduce_mean( layer.output, keepdims=True ) * self.config.LOSS_WEIGHTS.get(name, 1.0) self.keras_model.metrics_tensors.append(loss) def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1): """Sets model layers as trainable if their names match the given regular expression. """ # Print message on the first call (but not on recursive calls) if verbose > 0 and keras_model is None: log("Selecting layers to train") keras_model = keras_model or self.keras_model # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. layers = ( keras_model.inner_model.layers if hasattr(keras_model, "inner_model") else keras_model.layers ) for layer in layers: # Is the layer a model? if layer.__class__.__name__ == "Model": print("In model: ", layer.name) self.set_trainable(layer_regex, keras_model=layer, indent=indent + 4) continue if not layer.weights: continue # Is it trainable? trainable = bool(re.fullmatch(layer_regex, layer.name)) # Update layer. If layer is a container, update inner layer. if layer.__class__.__name__ == "TimeDistributed": layer.layer.trainable = trainable else: layer.trainable = trainable # Print trainable layer names if trainable and verbose > 0: log( "{}{:20} ({})".format( " " * indent, layer.name, layer.__class__.__name__ ) ) def set_log_dir(self, model_path=None): """Sets the model log directory and epoch counter. model_path: If None, or a format different from what this code uses then set a new log directory and start epochs from 0. Otherwise, extract the log directory and the epoch counter from the file name. """ # Set date and epoch counter as if starting a new model self.epoch = 0 now = datetime.datetime.now() # If we have a model path with date and epochs use them if model_path: # Continue from we left of. Get epoch and date from the file name # A sample model path might look like: # \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows) # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux) regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5" m = re.match(regex, model_path) if m: now = datetime.datetime( int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), ) # Epoch number in file is 1-based, and in Keras code it's 0-based. # So, adjust for that then increment by one to start from the next epoch self.epoch = int(m.group(6)) - 1 + 1 print("Re-starting from epoch %d" % self.epoch) # Directory for training logs self.log_dir = os.path.join( self.model_dir, "{}{:%Y%m%dT%H%M}".format(self.config.NAME.lower(), now) ) # Path to save after each epoch. Include placeholders that get filled by Keras. self.checkpoint_path = os.path.join( self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(self.config.NAME.lower()) ) self.checkpoint_path = self.checkpoint_path.replace("*epoch*", "{epoch:04d}") def train( self, train_dataset, val_dataset, learning_rate, epochs, layers, augmentation=None, custom_callbacks=None, no_augmentation_sources=None, ): """Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with epochs: Number of training epochs. Note that previous training epochs are considered to be done alreay, so this actually determines the epochs to train in total rather than in this particaular call. layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. You can pass complex augmentations as well. This augmentation applies 50% of the time, and when it does it flips images right/left half the time and adds a Gaussian blur with a random sigma in range 0 to 5. augmentation = imgaug.augmenters.Sometimes(0.5, [ imgaug.augmenters.Fliplr(0.5), imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0)) ]) custom_callbacks: Optional. Add custom callbacks to be called with the keras fit_generator method. Must be list of type keras.callbacks. no_augmentation_sources: Optional. List of sources to exclude for augmentation. A source is string that identifies a dataset and is defined in the Dataset class. """ assert self.mode == "training", "Create model in training mode." # Pre-defined layer regular expressions layer_regex = { # all layers but the backbone "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", # From a specific Resnet stage and up "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", # All layers "all": ".*", } if layers in layer_regex.keys(): layers = layer_regex[layers] # Data generators train_generator = data_generator( train_dataset, self.config, shuffle=True, augmentation=augmentation, batch_size=self.config.BATCH_SIZE, no_augmentation_sources=no_augmentation_sources, ) val_generator = data_generator( val_dataset, self.config, shuffle=True, batch_size=self.config.BATCH_SIZE ) # Create log_dir if it does not exist if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) # Callbacks callbacks = [ keras.callbacks.TensorBoard( log_dir=self.log_dir, histogram_freq=0, write_graph=True, write_images=False, ), keras.callbacks.ModelCheckpoint( self.checkpoint_path, verbose=0, save_weights_only=True ), ] # Add custom callbacks to the list if custom_callbacks: callbacks += custom_callbacks # Train log("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate)) log("Checkpoint Path: {}".format(self.checkpoint_path)) self.set_trainable(layers) self.compile(learning_rate, self.config.LEARNING_MOMENTUM) # Work-around for Windows: Keras fails on Windows when using # multiprocessing workers. See discussion here: # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 if os.name is "nt": workers = 0 else: workers = multiprocessing.cpu_count() self.keras_model.fit_generator( train_generator, initial_epoch=self.epoch, epochs=epochs, steps_per_epoch=self.config.STEPS_PER_EPOCH, callbacks=callbacks, validation_data=val_generator, validation_steps=self.config.VALIDATION_STEPS, max_queue_size=100, workers=workers, use_multiprocessing=True, ) self.epoch = max(self.epoch, epochs) def mold_inputs(self, images): """Takes a list of images and modifies them to the format expected as an input to the neural network. images: List of image matrices [height,width,depth]. Images can have different sizes. Returns 3 Numpy matrices: molded_images: [N, h, w, 3]. Images resized and normalized. image_metas: [N, length of meta data]. Details about each image. windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the original image (padding excluded). """ molded_images = [] image_metas = [] windows = [] for image in images: # Resize image # TODO: move resizing to mold_image() molded_image, window, scale, padding, crop = utils.resize_image( image, min_dim=self.config.IMAGE_MIN_DIM, min_scale=self.config.IMAGE_MIN_SCALE, max_dim=self.config.IMAGE_MAX_DIM, mode=self.config.IMAGE_RESIZE_MODE, ) molded_image = mold_image(molded_image, self.config) # Build image_meta image_meta = compose_image_meta( 0, image.shape, molded_image.shape, window, scale, np.zeros([self.config.NUM_CLASSES], dtype=np.int32), ) # Append molded_images.append(molded_image) windows.append(window) image_metas.append(image_meta) # Pack into arrays molded_images = np.stack(molded_images) image_metas = np.stack(image_metas) windows = np.stack(windows) return molded_images, image_metas, windows def unmold_detections( self, detections, mrcnn_mask, original_image_shape, image_shape, window ): """Reformats the detections of one image from the format of the neural network output to a format suitable for use in the rest of the application. detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates mrcnn_mask: [N, height, width, num_classes] original_image_shape: [H, W, C] Original image shape before resizing image_shape: [H, W, C] Shape of the image after resizing and padding window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real image is excluding the padding. Returns: boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels class_ids: [N] Integer class IDs for each bounding box scores: [N] Float probability scores of the class_id masks: [height, width, num_instances] Instance masks """ # How many detections do we have? # Detections array is padded with zeros. Find the first class_id == 0. zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores, and class-specific masks boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] masks = mrcnn_mask[np.arange(N), :, :, class_ids] # Translate normalized coordinates in the resized image to pixel # coordinates in the original image before resizing window = utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where( (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0 )[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) N = class_ids.shape[0] # Resize masks to original image size and set boundary threshold. full_masks = [] for i in range(N): # Convert neural network mask to full size mask full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape) full_masks.append(full_mask) full_masks = ( np.stack(full_masks, axis=-1) if full_masks else np.empty(original_image_shape[:2] + (0,)) ) return boxes, class_ids, scores, full_masks def detect(self, images, verbose=0): """Runs the detection pipeline. images: List of images, potentially of different sizes. Returns a list of dicts, one dict per image. The dict contains: rois: [N, (y1, x1, y2, x2)] detection bounding boxes class_ids: [N] int class IDs scores: [N] float probability scores for the class IDs masks: [H, W, N] instance binary masks """ assert self.mode == "inference", "Create model in inference mode." assert ( len(images) == self.config.BATCH_SIZE ), "len(images) must be equal to BATCH_SIZE" if verbose: log("Processing {} images".format(len(images))) for image in images: log("image", image) # Mold inputs to format expected by the neural network molded_images, image_metas, windows = self.mold_inputs(images) # Validate image sizes # All images in a batch MUST be of the same size image_shape = molded_images[0].shape for g in molded_images[1:]: assert ( g.shape == image_shape ), "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes." # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape) if verbose: log("molded_images", molded_images) log("image_metas", image_metas) log("anchors", anchors) # Run object detection detections, _, _, mrcnn_mask, _, _, _ = self.keras_model.predict( [molded_images, image_metas, anchors], verbose=0 ) # Process detections results = [] for i, image in enumerate(images): final_rois, final_class_ids, final_scores, final_masks = self.unmold_detections( detections[i], mrcnn_mask[i], image.shape, molded_images[i].shape, windows[i], ) results.append( { "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, } ) return results def detect_molded(self, molded_images, image_metas, verbose=0): """Runs the detection pipeline, but expect inputs that are molded already. Used mostly for debugging and inspecting the model. molded_images: List of images loaded using load_image_gt() image_metas: image meta data, also returned by load_image_gt() Returns a list of dicts, one dict per image. The dict contains: rois: [N, (y1, x1, y2, x2)] detection bounding boxes class_ids: [N] int class IDs scores: [N] float probability scores for the class IDs masks: [H, W, N] instance binary masks """ assert self.mode == "inference", "Create model in inference mode." assert ( len(molded_images) == self.config.BATCH_SIZE ), "Number of images must be equal to BATCH_SIZE" if verbose: log("Processing {} images".format(len(molded_images))) for image in molded_images: log("image", image) # Validate image sizes # All images in a batch MUST be of the same size image_shape = molded_images[0].shape for g in molded_images[1:]: assert g.shape == image_shape, "Images must have the same size" # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape) if verbose: log("molded_images", molded_images) log("image_metas", image_metas) log("anchors", anchors) # Run object detection detections, _, _, mrcnn_mask, _, _, _ = self.keras_model.predict( [molded_images, image_metas, anchors], verbose=0 ) # Process detections results = [] for i, image in enumerate(molded_images): window = [0, 0, image.shape[0], image.shape[1]] final_rois, final_class_ids, final_scores, final_masks = self.unmold_detections( detections[i], mrcnn_mask[i], image.shape, molded_images[i].shape, window, ) results.append( { "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, } ) return results def get_anchors(self, image_shape): """Returns anchor pyramid for the given image size.""" backbone_shapes = compute_backbone_shapes(self.config, image_shape) # Cache anchors and reuse if image shape is the same if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: # Generate Anchors a = utils.generate_pyramid_anchors( self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS, backbone_shapes, self.config.BACKBONE_STRIDES, self.config.RPN_ANCHOR_STRIDE, ) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it self.anchors = a # Normalize coordinates self._anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2] ) return self._anchor_cache[tuple(image_shape)] def ancestor(self, tensor, name, checked=None): """Finds the ancestor of a TF tensor in the computation graph. tensor: TensorFlow symbolic tensor. name: Name of ancestor tensor to find checked: For internal use. A list of tensors that were already searched to avoid loops in traversing the graph. """ checked = checked if checked is not None else [] # Put a limit on how deep we go to avoid very long loops if len(checked) > 500: return None # Convert name to a regex and allow matching a number prefix # because Keras adds them automatically if isinstance(name, str): name = re.compile(name.replace("/", r"(\_\d+)*/")) parents = tensor.op.inputs for p in parents: if p in checked: continue if bool(re.fullmatch(name, p.name)): return p checked.append(p) a = self.ancestor(p, name, checked) if a is not None: return a return None def find_trainable_layer(self, layer): """If a layer is encapsulated by another layer, this function digs through the encapsulation and returns the layer that holds the weights. """ if layer.__class__.__name__ == "TimeDistributed": return self.find_trainable_layer(layer.layer) return layer def get_trainable_layers(self): """Returns a list of layers that have weights.""" layers = [] # Loop through all layers for l in self.keras_model.layers: # If layer is a wrapper, find inner trainable layer l = self.find_trainable_layer(l) # Include layer if it has weights if l.get_weights(): layers.append(l) return layers def run_graph(self, images, outputs, image_metas=None): """Runs a sub-set of the computation graph that computes the given outputs. image_metas: If provided, the images are assumed to be already molded (i.e. resized, padded, and normalized) outputs: List of tuples (name, tensor) to compute. The tensors are symbolic TensorFlow tensors and the names are for easy tracking. Returns an ordered dict of results. Keys are the names received in the input and values are Numpy arrays. """ model = self.keras_model # Organize desired outputs into an ordered dict outputs = OrderedDict(outputs) for o in outputs.values(): assert o is not None # Build a Keras function to run parts of the computation graph inputs = model.inputs if model.uses_learning_phase and not isinstance(K.learning_phase(), int): inputs += [K.learning_phase()] kf = K.function(model.inputs, list(outputs.values())) # Prepare inputs if image_metas is None: molded_images, image_metas, _ = self.mold_inputs(images) else: molded_images = images image_shape = molded_images[0].shape # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape) model_in = [molded_images, image_metas, anchors] # Run inference if model.uses_learning_phase and not isinstance(K.learning_phase(), int): model_in.append(0.0) outputs_np = kf(model_in) # Pack the generated Numpy arrays into a a dict and log the results. outputs_np = OrderedDict([(k, v) for k, v in zip(outputs.keys(), outputs_np)]) for k, v in outputs_np.items(): log(k, v) return outputs_np ############################################################ # Data Formatting ############################################################ def compose_image_meta( image_id, original_image_shape, image_shape, window, scale, active_class_ids ): """Takes attributes of an image and puts them in one 1D array. image_id: An int ID of the image. Useful for debugging. original_image_shape: [H, W, C] before resizing or padding. image_shape: [H, W, C] after resizing and padding window: (y1, x1, y2, x2) in pixels. The area of the image where the real image is (excluding the padding) scale: The scaling factor applied to the original image (float32) active_class_ids: List of class_ids available in the dataset from which the image came. Useful if training on images from multiple datasets where not all classes are present in all datasets. """ meta = np.array( [image_id] + list(original_image_shape) # size=1 + list(image_shape) # size=3 + list(window) # size=3 + [scale] # size=4 (y1, x1, y2, x2) in image cooredinates + list(active_class_ids) # size=1 # size=num_classes ) return meta def parse_image_meta(meta): """Parses an array that contains image attributes to its components. See compose_image_meta() for more details. meta: [batch, meta length] where meta length depends on NUM_CLASSES Returns a dict of the parsed values. """ image_id = meta[:, 0] original_image_shape = meta[:, 1:4] image_shape = meta[:, 4:7] window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels scale = meta[:, 11] active_class_ids = meta[:, 12:] return { "image_id": image_id.astype(np.int32), "original_image_shape": original_image_shape.astype(np.int32), "image_shape": image_shape.astype(np.int32), "window": window.astype(np.int32), "scale": scale.astype(np.float32), "active_class_ids": active_class_ids.astype(np.int32), } def parse_image_meta_graph(meta): """Parses a tensor that contains image attributes to its components. See compose_image_meta() for more details. meta: [batch, meta length] where meta length depends on NUM_CLASSES Returns a dict of the parsed tensors. """ image_id = meta[:, 0] original_image_shape = meta[:, 1:4] image_shape = meta[:, 4:7] window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels scale = meta[:, 11] active_class_ids = meta[:, 12:] return { "image_id": image_id, "original_image_shape": original_image_shape, "image_shape": image_shape, "window": window, "scale": scale, "active_class_ids": active_class_ids, } def mold_image(images, config): """Expects an RGB image (or array of images) and subtracts the mean pixel and converts it to float. Expects image colors in RGB order. """ return images.astype(np.float32) - config.MEAN_PIXEL def unmold_image(normalized_images, config): """Takes a image normalized with mold() and returns the original.""" return (normalized_images + config.MEAN_PIXEL).astype(np.uint8) ############################################################ # Miscellenous Graph Functions ############################################################ def trim_zeros_graph(boxes, name="trim_zeros"): """Often boxes are represented with matrices of shape [N, 4] and are padded with zeros. This removes zero boxes. boxes: [N, 4] matrix of boxes. non_zeros: [N] a 1D boolean mask identifying the rows to keep """ non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool) boxes = tf.boolean_mask(boxes, non_zeros, name=name) return boxes, non_zeros def batch_pack_graph(x, counts, num_rows): """Picks different number of values from each row in x depending on the values in counts. """ outputs = [] for i in range(num_rows): outputs.append(x[i, : counts[i]]) return tf.concat(outputs, axis=0) def norm_boxes_graph(boxes, shape): """Converts boxes from pixel coordinates to normalized coordinates. boxes: [..., (y1, x1, y2, x2)] in pixel coordinates shape: [..., (height, width)] in pixels Note: In pixel coordinates (y2, x2) is outside the box. But in normalized coordinates it's inside the box. Returns: [..., (y1, x1, y2, x2)] in normalized coordinates """ h, w = tf.split(tf.cast(shape, tf.float32), 2) scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0) shift = tf.constant([0.0, 0.0, 1.0, 1.0]) return tf.divide(boxes - shift, scale) def denorm_boxes_graph(boxes, shape): """Converts boxes from normalized coordinates to pixel coordinates. boxes: [..., (y1, x1, y2, x2)] in normalized coordinates shape: [..., (height, width)] in pixels Note: In pixel coordinates (y2, x2) is outside the box. But in normalized coordinates it's inside the box. Returns: [..., (y1, x1, y2, x2)] in pixel coordinates """ h, w = tf.split(tf.cast(shape, tf.float32), 2) scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0) shift = tf.constant([0.0, 0.0, 1.0, 1.0]) return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32) PKaN'* cropmask/mrcnn/parallel_model.py""" Mask R-CNN Multi-GPU Support for Keras. Copyright (c) 2017 Matterport, Inc. Licensed under the MIT License (see LICENSE for details) Written by Waleed Abdulla Ideas and a small code snippets from these sources: https://github.com/fchollet/keras/issues/2436 https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py """ import tensorflow as tf import keras.backend as K import keras.layers as KL import keras.models as KM class ParallelModel(KM.Model): """Subclasses the standard Keras Model and adds multi-GPU support. It works by creating a copy of the model on each GPU. Then it slices the inputs and sends a slice to each copy of the model, and then merges the outputs together and applies the loss on the combined outputs. """ def __init__(self, keras_model, gpu_count): """Class constructor. keras_model: The Keras model to parallelize gpu_count: Number of GPUs. Must be > 1 """ self.inner_model = keras_model self.gpu_count = gpu_count merged_outputs = self.make_parallel() super(ParallelModel, self).__init__( inputs=self.inner_model.inputs, outputs=merged_outputs ) def __getattribute__(self, attrname): """Redirect loading and saving methods to the inner model. That's where the weights are stored.""" if "load" in attrname or "save" in attrname: return getattr(self.inner_model, attrname) return super(ParallelModel, self).__getattribute__(attrname) def summary(self, *args, **kwargs): """Override summary() to display summaries of both, the wrapper and inner models.""" super(ParallelModel, self).summary(*args, **kwargs) self.inner_model.summary(*args, **kwargs) def make_parallel(self): """Creates a new wrapper model that consists of multiple replicas of the original model placed on different GPUs. """ # Slice inputs. Slice inputs on the CPU to avoid sending a copy # of the full inputs to all GPUs. Saves on bandwidth and memory. input_slices = { name: tf.split(x, self.gpu_count) for name, x in zip(self.inner_model.input_names, self.inner_model.inputs) } output_names = self.inner_model.output_names outputs_all = [] for i in range(len(self.inner_model.outputs)): outputs_all.append([]) # Run the model call() on each GPU to place the ops there for i in range(self.gpu_count): with tf.device("/gpu:%d" % i): with tf.name_scope("tower_%d" % i): # Run a slice of inputs through this replica zipped_inputs = zip( self.inner_model.input_names, self.inner_model.inputs ) inputs = [ KL.Lambda( lambda s: input_slices[name][i], output_shape=lambda s: (None,) + s[1:], )(tensor) for name, tensor in zipped_inputs ] # Create the model replica and get the outputs outputs = self.inner_model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later for l, o in enumerate(outputs): outputs_all[l].append(o) # Merge outputs on CPU with tf.device("/cpu:0"): merged = [] for outputs, name in zip(outputs_all, output_names): # Concatenate or average outputs? # Outputs usually have a batch dimension and we concatenate # across it. If they don't, then the output is likely a loss # or a metric value that gets averaged across the batch. # Keras expects losses and metrics to be scalars. if K.int_shape(outputs[0]) == (): # Average m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)( outputs ) else: # Concatenate m = KL.Concatenate(axis=0, name=name)(outputs) merged.append(m) return merged if __name__ == "__main__": # Testing code below. It creates a simple model to train on MNIST and # tries to run it on 2 GPUs. It saves the graph so it can be viewed # in TensorBoard. Run it as: # # python3 parallel_model.py import os import numpy as np import keras.optimizers from keras.datasets import mnist from keras.preprocessing.image import ImageDataGenerator GPU_COUNT = 2 # Root directory of the project ROOT_DIR = os.path.abspath("../") # Directory to save logs and trained model MODEL_DIR = os.path.join(ROOT_DIR, "logs") def build_model(x_train, num_classes): # Reset default graph. Keras leaves old ops in the graph, # which are ignored for execution but clutter graph # visualization in TensorBoard. tf.reset_default_graph() inputs = KL.Input(shape=x_train.shape[1:], name="input_image") x = KL.Conv2D(32, (3, 3), activation="relu", padding="same", name="conv1")( inputs ) x = KL.Conv2D(64, (3, 3), activation="relu", padding="same", name="conv2")(x) x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) x = KL.Flatten(name="flat1")(x) x = KL.Dense(128, activation="relu", name="dense1")(x) x = KL.Dense(num_classes, activation="softmax", name="dense2")(x) return KM.Model(inputs, x, "digit_classifier_model") # Load MNIST Data (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = np.expand_dims(x_train, -1).astype("float32") / 255 x_test = np.expand_dims(x_test, -1).astype("float32") / 255 print("x_train shape:", x_train.shape) print("x_test shape:", x_test.shape) # Build data generator and model datagen = ImageDataGenerator() model = build_model(x_train, 10) # Add multi-GPU support. model = ParallelModel(model, GPU_COUNT) optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) model.compile( loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"], ) model.summary() # Train model.fit_generator( datagen.flow(x_train, y_train, batch_size=64), steps_per_epoch=50, epochs=10, verbose=1, validation_data=(x_test, y_test), callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, write_graph=True)], ) PKaNQcropmask/mrcnn/utils.py""" Mask R-CNN Common utility functions and classes. Copyright (c) 2017 Matterport, Inc. Licensed under the MIT License (see LICENSE for details) Written by Waleed Abdulla """ import sys import os import logging import math import random import numpy as np import tensorflow as tf import scipy import skimage.color import skimage.io import skimage.transform import urllib.request import shutil import warnings from distutils.version import LooseVersion # URL from which to download the latest COCO trained weights COCO_MODEL_URL = ( "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" ) ############################################################ # Bounding Boxes ############################################################ def extract_bboxes(mask): """Compute bounding boxes from masks. mask: [height, width, num_instances]. Mask pixels are either 1 or 0. Returns: bbox array [num_instances, (y1, x1, y2, x2)]. """ if len(mask.shape) == 2: mask = np.expand_dims(mask, 2) boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) for i in range(mask.shape[-1]): m = mask[:, :, i] # Bounding box. horizontal_indicies = np.where(np.any(m, axis=0))[0] vertical_indicies = np.where(np.any(m, axis=1))[0] if horizontal_indicies.shape[0]: x1, x2 = horizontal_indicies[[0, -1]] y1, y2 = vertical_indicies[[0, -1]] # x2 and y2 should not be part of the box. Increment by 1. x2 += 1 y2 += 1 else: # No mask for this instance. Might happen due to # resizing or cropping. Set bbox to zeros x1, x2, y1, y2 = 0, 0, 0, 0 boxes[i] = np.array([y1, x1, y2, x2]) return boxes.astype(np.int32) def compute_iou(box, boxes, box_area, boxes_area): """Calculates IoU of the given box with the array of the given boxes. box: 1D vector [y1, x1, y2, x2] boxes: [boxes_count, (y1, x1, y2, x2)] box_area: float. the area of 'box' boxes_area: array of length boxes_count. Note: the areas are passed in rather than calculated here for efficiency. Calculate once in the caller to avoid duplicate work. """ # Calculate intersection areas y1 = np.maximum(box[0], boxes[:, 0]) y2 = np.minimum(box[2], boxes[:, 2]) x1 = np.maximum(box[1], boxes[:, 1]) x2 = np.minimum(box[3], boxes[:, 3]) intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) union = box_area + boxes_area[:] - intersection[:] iou = intersection / union return iou def compute_overlaps(boxes1, boxes2): """Computes IoU overlaps between two sets of boxes. boxes1, boxes2: [N, (y1, x1, y2, x2)]. For better performance, pass the largest set first and the smaller second. """ # Areas of anchors and GT boxes area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) # Compute overlaps to generate matrix [boxes1 count, boxes2 count] # Each cell contains the IoU value. overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) for i in range(overlaps.shape[1]): box2 = boxes2[i] overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) return overlaps def compute_overlaps_masks(masks1, masks2): """Computes IoU overlaps between two sets of masks. masks1, masks2: [Height, Width, instances] """ # If either set of masks is empty return empty result if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: return np.zeros((masks1.shape[-1], masks2.shape[-1])) # flatten masks and compute their areas masks1 = np.reshape(masks1 > 0.5, (-1, masks1.shape[-1])).astype(np.float32) masks2 = np.reshape(masks2 > 0.5, (-1, masks2.shape[-1])).astype(np.float32) area1 = np.sum(masks1, axis=0) area2 = np.sum(masks2, axis=0) # intersections and union intersections = np.dot(masks1.T, masks2) union = area1[:, None] + area2[None, :] - intersections overlaps = intersections / union return overlaps def non_max_suppression(boxes, scores, threshold): """Performs non-maximum suppression and returns indices of kept boxes. boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. scores: 1-D array of box scores. threshold: Float. IoU threshold to use for filtering. """ assert boxes.shape[0] > 0 if boxes.dtype.kind != "f": boxes = boxes.astype(np.float32) # Compute box areas y1 = boxes[:, 0] x1 = boxes[:, 1] y2 = boxes[:, 2] x2 = boxes[:, 3] area = (y2 - y1) * (x2 - x1) # Get indicies of boxes sorted by scores (highest first) ixs = scores.argsort()[::-1] pick = [] while len(ixs) > 0: # Pick top box and add its index to the list i = ixs[0] pick.append(i) # Compute IoU of the picked box with the rest iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) # Identify boxes with IoU over the threshold. This # returns indices into ixs[1:], so add 1 to get # indices into ixs. remove_ixs = np.where(iou > threshold)[0] + 1 # Remove indices of the picked and overlapped boxes. ixs = np.delete(ixs, remove_ixs) ixs = np.delete(ixs, 0) return np.array(pick, dtype=np.int32) def apply_box_deltas(boxes, deltas): """Applies the given deltas to the given boxes. boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. deltas: [N, (dy, dx, log(dh), log(dw))] """ boxes = boxes.astype(np.float32) # Convert to y, x, h, w height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] center_y = boxes[:, 0] + 0.5 * height center_x = boxes[:, 1] + 0.5 * width # Apply deltas center_y += deltas[:, 0] * height center_x += deltas[:, 1] * width height *= np.exp(deltas[:, 2]) width *= np.exp(deltas[:, 3]) # Convert back to y1, x1, y2, x2 y1 = center_y - 0.5 * height x1 = center_x - 0.5 * width y2 = y1 + height x2 = x1 + width return np.stack([y1, x1, y2, x2], axis=1) def box_refinement_graph(box, gt_box): """Compute refinement needed to transform box to gt_box. box and gt_box are [N, (y1, x1, y2, x2)] """ box = tf.cast(box, tf.float32) gt_box = tf.cast(gt_box, tf.float32) height = box[:, 2] - box[:, 0] width = box[:, 3] - box[:, 1] center_y = box[:, 0] + 0.5 * height center_x = box[:, 1] + 0.5 * width gt_height = gt_box[:, 2] - gt_box[:, 0] gt_width = gt_box[:, 3] - gt_box[:, 1] gt_center_y = gt_box[:, 0] + 0.5 * gt_height gt_center_x = gt_box[:, 1] + 0.5 * gt_width dy = (gt_center_y - center_y) / height dx = (gt_center_x - center_x) / width dh = tf.log(gt_height / height) dw = tf.log(gt_width / width) result = tf.stack([dy, dx, dh, dw], axis=1) return result def box_refinement(box, gt_box): """Compute refinement needed to transform box to gt_box. box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is assumed to be outside the box. """ box = box.astype(np.float32) gt_box = gt_box.astype(np.float32) height = box[:, 2] - box[:, 0] width = box[:, 3] - box[:, 1] center_y = box[:, 0] + 0.5 * height center_x = box[:, 1] + 0.5 * width gt_height = gt_box[:, 2] - gt_box[:, 0] gt_width = gt_box[:, 3] - gt_box[:, 1] gt_center_y = gt_box[:, 0] + 0.5 * gt_height gt_center_x = gt_box[:, 1] + 0.5 * gt_width dy = (gt_center_y - center_y) / height dx = (gt_center_x - center_x) / width dh = np.log(gt_height / height) dw = np.log(gt_width / width) return np.stack([dy, dx, dh, dw], axis=1) ############################################################ # Dataset ############################################################ class Dataset(object): """The base class for dataset classes. To use it, create a new class that adds functions specific to the dataset you want to use. For example: class CatsAndDogsDataset(Dataset): def load_cats_and_dogs(self): ... def load_mask(self, image_id): ... def image_reference(self, image_id): ... See COCODataset and ShapesDataset as examples. """ def __init__(self, class_map=None): self._image_ids = [] self.image_info = [] # Background is always the first class self.class_info = [{"source": "", "id": 0, "name": "BG"}] self.source_class_ids = {} def add_class(self, source, class_id, class_name): assert "." not in source, "Source name cannot contain a dot" # Does the class exist already? for info in self.class_info: if info["source"] == source and info["id"] == class_id: # source.class_id combination already available, skip return # Add the class self.class_info.append({"source": source, "id": class_id, "name": class_name}) def add_image(self, source, image_id, path, **kwargs): image_info = {"id": image_id, "source": source, "path": path} image_info.update(kwargs) self.image_info.append(image_info) def image_reference(self, image_id): """Return a link to the image in its source Website or details about the image that help looking it up or debugging it. Override for your dataset, but pass to this function if you encounter images not in your dataset. """ return "" def prepare(self, class_map=None): """Prepares the Dataset class for use. TODO: class map is not supported yet. When done, it should handle mapping classes from different datasets to the same class ID. """ def clean_name(name): """Returns a shorter version of object names for cleaner display.""" return ",".join(name.split(",")[:1]) # Build (or rebuild) everything else from the info dicts. self.num_classes = len(self.class_info) self.class_ids = np.arange(self.num_classes) self.class_names = [clean_name(c["name"]) for c in self.class_info] self.num_images = len(self.image_info) self._image_ids = np.arange(self.num_images) # Mapping from source class and image IDs to internal IDs self.class_from_source_map = { "{}.{}".format(info["source"], info["id"]): id for info, id in zip(self.class_info, self.class_ids) } self.image_from_source_map = { "{}.{}".format(info["source"], info["id"]): id for info, id in zip(self.image_info, self.image_ids) } # Map sources to class_ids they support self.sources = list(set([i["source"] for i in self.class_info])) self.source_class_ids = {} # Loop over datasets for source in self.sources: self.source_class_ids[source] = [] # Find classes that belong to this dataset for i, info in enumerate(self.class_info): # Include BG class in all datasets if i == 0 or source == info["source"]: self.source_class_ids[source].append(i) def map_source_class_id(self, source_class_id): """Takes a source class ID and returns the int class ID assigned to it. For example: dataset.map_source_class_id("coco.12") -> 23 """ return self.class_from_source_map[source_class_id] def get_source_class_id(self, class_id, source): """Map an internal class ID to the corresponding class ID in the source dataset.""" info = self.class_info[class_id] assert info["source"] == source return info["id"] @property def image_ids(self): return self._image_ids def source_image_link(self, image_id): """Returns the path or URL to the image. Override this to return a URL to the image if it's available online for easy debugging. """ return self.image_info[image_id]["path"] def load_image(self, image_id): """Load the specified image and return a [H,W,3] Numpy array. """ # Load image image = skimage.io.imread(self.image_info[image_id]["path"]) # If grayscale. Convert to RGB for consistency. if image.ndim != 3: image = skimage.color.gray2rgb(image) # If has an alpha channel, remove it for consistency if image.shape[-1] == 4: image = image[..., :3] return image def load_mask(self, image_id): """Load instance masks for the given image. Different datasets use different ways to store masks. Override this method to load instance masks and return them in the form of am array of binary masks of shape [height, width, instances]. Returns: masks: A bool array of shape [height, width, instance count] with a binary mask per instance. class_ids: a 1D array of class IDs of the instance masks. """ # Override this function to load a mask from your dataset. # Otherwise, it returns an empty mask. logging.warning("You are using the default load_mask(), maybe you need to define your own one.") mask = np.empty([0, 0, 0]) class_ids = np.empty([0], np.int32) return mask, class_ids def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): """Resizes an image keeping the aspect ratio unchanged. min_dim: if provided, resizes the image such that it's smaller dimension == min_dim max_dim: if provided, ensures that the image longest side doesn't exceed this value. min_scale: if provided, ensure that the image is scaled up by at least this percent even if min_dim doesn't require it. mode: Resizing mode. none: No resizing. Return the image unchanged. square: Resize and pad with zeros to get a square image of size [max_dim, max_dim]. pad64: Pads width and height with zeros to make them multiples of 64. If min_dim or min_scale are provided, it scales the image up before padding. max_dim is ignored in this mode. The multiple of 64 is needed to ensure smooth scaling of feature maps up and down the 6 levels of the FPN pyramid (2**6=64). crop: Picks random crops from the image. First, scales the image based on min_dim and min_scale, then picks a random crop of size min_dim x min_dim. Can be used in training only. max_dim is not used in this mode. Returns: image: the resized image window: (y1, x1, y2, x2). If max_dim is provided, padding might be inserted in the returned image. If so, this window is the coordinates of the image part of the full image (excluding the padding). The x2, y2 pixels are not included. scale: The scale factor used to resize the image padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] """ # Keep track of image dtype and return results in the same dtype image_dtype = image.dtype # Default window (y1, x1, y2, x2) and default scale == 1. h, w = image.shape[:2] window = (0, 0, h, w) scale = 1 padding = [(0, 0), (0, 0), (0, 0)] crop = None if mode == "none": return image, window, scale, padding, crop # Scale? if min_dim: # Scale up but not down scale = max(1, min_dim / min(h, w)) if min_scale and scale < min_scale: scale = min_scale # Does it exceed max dim? if max_dim and mode == "square": image_max = max(h, w) if round(image_max * scale) > max_dim: scale = max_dim / image_max # Resize image using bilinear interpolation if scale != 1: image = resize(image, (round(h * scale), round(w * scale)), preserve_range=True) # Need padding or cropping? if mode == "square": # Get new height and width h, w = image.shape[:2] top_pad = (max_dim - h) // 2 bottom_pad = max_dim - h - top_pad left_pad = (max_dim - w) // 2 right_pad = max_dim - w - left_pad padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] image = np.pad(image, padding, mode="constant", constant_values=0) window = (top_pad, left_pad, h + top_pad, w + left_pad) elif mode == "pad64": h, w = image.shape[:2] # Both sides must be divisible by 64 assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" # Height if h % 64 > 0: max_h = h - (h % 64) + 64 top_pad = (max_h - h) // 2 bottom_pad = max_h - h - top_pad else: top_pad = bottom_pad = 0 # Width if w % 64 > 0: max_w = w - (w % 64) + 64 left_pad = (max_w - w) // 2 right_pad = max_w - w - left_pad else: left_pad = right_pad = 0 padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] image = np.pad(image, padding, mode="constant", constant_values=0) window = (top_pad, left_pad, h + top_pad, w + left_pad) elif mode == "crop": # Pick a random crop h, w = image.shape[:2] y = random.randint(0, (h - min_dim)) x = random.randint(0, (w - min_dim)) crop = (y, x, min_dim, min_dim) image = image[y : y + min_dim, x : x + min_dim] window = (0, 0, min_dim, min_dim) else: raise Exception("Mode {} not supported".format(mode)) return image.astype(image_dtype), window, scale, padding, crop def resize_mask(mask, scale, padding, crop=None): """Resizes a mask using the given scale and padding. Typically, you get the scale and padding from resize_image() to ensure both, the image and the mask, are resized consistently. scale: mask scaling factor padding: Padding to add to the mask in the form [(top, bottom), (left, right), (0, 0)] """ # Suppress warning from scipy 0.13.0, the output shape of zoom() is # calculated with round() instead of int() with warnings.catch_warnings(): warnings.simplefilter("ignore") mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) if crop is not None: y, x, h, w = crop mask = mask[y : y + h, x : x + w] else: mask = np.pad(mask, padding, mode="constant", constant_values=0) return mask def minimize_mask(bbox, mask, mini_shape): """Resize masks to a smaller version to reduce memory load. Mini-masks can be resized back to image scale using expand_masks() See inspect_data.ipynb notebook for more details. """ mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) for i in range(mask.shape[-1]): # Pick slice and cast to bool in case load_mask() returned wrong dtype m = mask[:, :, i].astype(bool) y1, x1, y2, x2 = bbox[i][:4] m = m[y1:y2, x1:x2] if m.size == 0: raise Exception("Invalid bounding box with area of zero") # Resize with bilinear interpolation m = resize(m, mini_shape) mini_mask[:, :, i] = np.around(m).astype(np.bool) return mini_mask def expand_mask(bbox, mini_mask, image_shape): """Resizes mini masks back to image size. Reverses the change of minimize_mask(). See inspect_data.ipynb notebook for more details. """ mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) for i in range(mask.shape[-1]): m = mini_mask[:, :, i] y1, x1, y2, x2 = bbox[i][:4] h = y2 - y1 w = x2 - x1 # Resize with bilinear interpolation m = resize(m, (h, w)) mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) return mask # TODO: Build and use this function to reduce code duplication def mold_mask(mask, config): pass def unmold_mask(mask, bbox, image_shape): """Converts a mask generated by the neural network to a format similar to its original shape. mask: [height, width] of type float. A small, typically 28x28 mask. bbox: [y1, x1, y2, x2]. The box to fit the mask in. Returns a binary mask with the same size as the original image. """ threshold = 0.5 y1, x1, y2, x2 = bbox mask = resize(mask, (y2 - y1, x2 - x1)) mask = np.where(mask >= threshold, 1, 0).astype(np.bool) # Put the mask in the right location. full_mask = np.zeros(image_shape[:2], dtype=np.bool) full_mask[y1:y2, x1:x2] = mask return full_mask ############################################################ # Anchors ############################################################ def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): """ scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] shape: [height, width] spatial shape of the feature map over which to generate anchors. feature_stride: Stride of the feature map relative to the image in pixels. anchor_stride: Stride of anchors on the feature map. For example, if the value is 2 then generate anchors for every other feature map pixel. """ # Get all combinations of scales and ratios scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) scales = scales.flatten() ratios = ratios.flatten() # Enumerate heights and widths from scales and ratios heights = scales / np.sqrt(ratios) widths = scales * np.sqrt(ratios) # Enumerate shifts in feature space shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) # Enumerate combinations of shifts, widths, and heights box_widths, box_centers_x = np.meshgrid(widths, shifts_x) box_heights, box_centers_y = np.meshgrid(heights, shifts_y) # Reshape to get a list of (y, x) and a list of (h, w) box_centers = np.stack([box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) # Convert to corner coordinates (y1, x1, y2, x2) boxes = np.concatenate( [box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1 ) return boxes def generate_pyramid_anchors( scales, ratios, feature_shapes, feature_strides, anchor_stride ): """Generate anchors at different levels of a feature pyramid. Each scale is associated with a level of the pyramid, but each ratio is used in all levels of the pyramid. Returns: anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted with the same order of the given scales. So, anchors of scale[0] come first, then anchors of scale[1], and so on. """ # Anchors # [anchor_count, (y1, x1, y2, x2)] anchors = [] for i in range(len(scales)): anchors.append( generate_anchors( scales[i], ratios, feature_shapes[i], feature_strides[i], anchor_stride ) ) return np.concatenate(anchors, axis=0) ############################################################ # Miscellaneous ############################################################ def trim_zeros(x): """It's common to have tensors larger than the available data and pad with zeros. This function removes rows that are all zeros. x: [rows, columns]. """ assert len(x.shape) == 2 return x[~np.all(x == 0, axis=1)] def compute_matches( gt_boxes, gt_class_ids, gt_masks, pred_boxes, pred_class_ids, pred_scores, pred_masks, iou_threshold=0.5, score_threshold=0.0, ): """Finds matches between prediction and ground truth instances. Returns: gt_match: 1-D array. For each GT box it has the index of the matched predicted box. pred_match: 1-D array. For each predicted box, it has the index of the matched ground truth box. overlaps: [pred_boxes, gt_boxes] IoU overlaps. """ # Trim zero padding # TODO: cleaner to do zero unpadding upstream gt_boxes = trim_zeros(gt_boxes) gt_masks = gt_masks[..., : gt_boxes.shape[0]] pred_boxes = trim_zeros(pred_boxes) pred_scores = pred_scores[: pred_boxes.shape[0]] # Sort predictions by score from high to low indices = np.argsort(pred_scores)[::-1] pred_boxes = pred_boxes[indices] pred_class_ids = pred_class_ids[indices] pred_scores = pred_scores[indices] pred_masks = pred_masks[..., indices] # Compute IoU overlaps [pred_masks, gt_masks] overlaps = compute_overlaps_masks(pred_masks, gt_masks) # Loop through predictions and find matching ground truth boxes match_count = 0 pred_match = -1 * np.ones([pred_boxes.shape[0]]) gt_match = -1 * np.ones([gt_boxes.shape[0]]) for i in range(len(pred_boxes)): # Find best matching ground truth box # 1. Sort matches by score sorted_ixs = np.argsort(overlaps[i])[::-1] # 2. Remove low scores low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] if low_score_idx.size > 0: sorted_ixs = sorted_ixs[: low_score_idx[0]] # 3. Find the match for j in sorted_ixs: # If ground truth box is already matched, go to next one if gt_match[j] > -1: continue # If we reach IoU smaller than the threshold, end the loop iou = overlaps[i, j] if iou < iou_threshold: break # Do we have a match? if pred_class_ids[i] == gt_class_ids[j]: match_count += 1 gt_match[j] = i pred_match[i] = j break return gt_match, pred_match, overlaps def compute_ap( gt_boxes, gt_class_ids, gt_masks, pred_boxes, pred_class_ids, pred_scores, pred_masks, iou_threshold=0.5, ): """Compute Average Precision at a set IoU threshold (default 0.5). Returns: mAP: Mean Average Precision precisions: List of precisions at different class score thresholds. recalls: List of recall values at different class score thresholds. overlaps: [pred_boxes, gt_boxes] IoU overlaps. """ # Get matches and overlaps gt_match, pred_match, overlaps = compute_matches( gt_boxes, gt_class_ids, gt_masks, pred_boxes, pred_class_ids, pred_scores, pred_masks, iou_threshold, ) # Compute precision and recall at each prediction box step precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) # Pad with start and end values to simplify the math precisions = np.concatenate([[0], precisions, [0]]) recalls = np.concatenate([[0], recalls, [1]]) # Ensure precision values decrease but don't increase. This way, the # precision value at each recall threshold is the maximum it can be # for all following recall thresholds, as specified by the VOC paper. for i in range(len(precisions) - 2, -1, -1): precisions[i] = np.maximum(precisions[i], precisions[i + 1]) # Compute mean AP over recall range indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 mAP = np.sum((recalls[indices] - recalls[indices - 1]) * precisions[indices]) return mAP, precisions, recalls, overlaps def compute_ap_range( gt_box, gt_class_id, gt_mask, pred_box, pred_class_id, pred_score, pred_mask, iou_thresholds=None, verbose=1, ): """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" # Default is 0.5 to 0.95 with increments of 0.05 iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) # Compute AP over range of IoU thresholds AP = [] for iou_threshold in iou_thresholds: ap, precisions, recalls, overlaps = compute_ap( gt_box, gt_class_id, gt_mask, pred_box, pred_class_id, pred_score, pred_mask, iou_threshold=iou_threshold, ) if verbose: print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) AP.append(ap) AP = np.array(AP).mean() if verbose: print( "AP @{:.2f}-{:.2f}:\t {:.3f}".format( iou_thresholds[0], iou_thresholds[-1], AP ) ) return AP def compute_recall(pred_boxes, gt_boxes, iou): """Compute the recall at the given IoU threshold. It's an indication of how many GT boxes were found by the given prediction boxes. pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates """ # Measure overlaps overlaps = compute_overlaps(pred_boxes, gt_boxes) iou_max = np.max(overlaps, axis=1) iou_argmax = np.argmax(overlaps, axis=1) positive_ids = np.where(iou_max >= iou)[0] matched_gt_boxes = iou_argmax[positive_ids] recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] return recall, positive_ids # ## Batch Slicing # Some custom layers support a batch size of 1 only, and require a lot of work # to support batches greater than 1. This function slices an input tensor # across the batch dimension and feeds batches of size 1. Effectively, # an easy way to support batches > 1 quickly with little code modification. # In the long run, it's more efficient to modify the code to support large # batches and getting rid of this function. Consider this a temporary solution def batch_slice(inputs, graph_fn, batch_size, names=None): """Splits inputs into slices and feeds each slice to a copy of the given computation graph and then combines the results. It allows you to run a graph on a batch of inputs even if the graph is written to support one instance only. inputs: list of tensors. All must have the same first dimension length graph_fn: A function that returns a TF tensor that's part of a graph. batch_size: number of slices to divide the data into. names: If provided, assigns names to the resulting tensors. """ if not isinstance(inputs, list): inputs = [inputs] outputs = [] for i in range(batch_size): inputs_slice = [x[i] for x in inputs] output_slice = graph_fn(*inputs_slice) if not isinstance(output_slice, (tuple, list)): output_slice = [output_slice] outputs.append(output_slice) # Change outputs from a list of slices where each is # a list of outputs to a list of outputs and each has # a list of slices outputs = list(zip(*outputs)) if names is None: names = [None] * len(outputs) result = [tf.stack(o, axis=0, name=n) for o, n in zip(outputs, names)] if len(result) == 1: result = result[0] return result def download_trained_weights(coco_model_path, verbose=1): """Download COCO trained weights from Releases. coco_model_path: local path of COCO trained weights """ if verbose > 0: print("Downloading pretrained model to " + coco_model_path + " ...") with urllib.request.urlopen(COCO_MODEL_URL) as resp, open( coco_model_path, "wb" ) as out: shutil.copyfileobj(resp, out) if verbose > 0: print("... done downloading pretrained model!") def norm_boxes(boxes, shape): """Converts boxes from pixel coordinates to normalized coordinates. boxes: [N, (y1, x1, y2, x2)] in pixel coordinates shape: [..., (height, width)] in pixels Note: In pixel coordinates (y2, x2) is outside the box. But in normalized coordinates it's inside the box. Returns: [N, (y1, x1, y2, x2)] in normalized coordinates """ h, w = shape scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) return np.divide((boxes - shift), scale).astype(np.float32) def denorm_boxes(boxes, shape): """Converts boxes from normalized coordinates to pixel coordinates. boxes: [N, (y1, x1, y2, x2)] in normalized coordinates shape: [..., (height, width)] in pixels Note: In pixel coordinates (y2, x2) is outside the box. But in normalized coordinates it's inside the box. Returns: [N, (y1, x1, y2, x2)] in pixel coordinates """ h, w = shape scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) def resize( image, output_shape, order=1, mode="constant", cval=0, clip=True, preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None, ): """A wrapper for Scikit-Image resize(). Scikit-Image generates warnings on every call to resize() if it doesn't receive the right parameters. The right parameters depend on the version of skimage. This solves the problem by using different parameters per version. And it provides a central place to control resizing defaults. """ if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): # New in 0.14: anti_aliasing. Default it to False for backward # compatibility with skimage 0.13. return skimage.transform.resize( image, output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range, anti_aliasing=anti_aliasing, anti_aliasing_sigma=anti_aliasing_sigma, ) else: return skimage.transform.resize( image, output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range, ) PKaN8WWcropmask/mrcnn/visualize.py""" Mask R-CNN Display and Visualization Functions. Copyright (c) 2017 Matterport, Inc. Licensed under the MIT License (see LICENSE for details) Written by Waleed Abdulla """ import os import sys import random import itertools import colorsys import numpy as np from skimage.measure import find_contours from skimage import exposure import matplotlib.pyplot as plt from matplotlib import patches, lines from matplotlib.patches import Polygon import IPython.display from rasterio.plot import reshape_as_image # Root directory of the project ROOT_DIR = os.path.abspath("../") # Import Mask RCNN sys.path.append(ROOT_DIR) # To find local version of the library from cropmask.mrcnn import utils ############################################################ # Visualization ############################################################ def normalize(arr): """ Function to normalize an input array to 0-1 """ arr_max = np.max(arr) return arr / arr_max def norm_rgb(image): """Takes and RGB ordered image and returns normalized array""" return reshape_as_image(np.stack([normalize(image[:,:,0]),normalize(image[:,:,1]),normalize(image[:,:,2])])) def reorder_to_brg(image): """reorders wv2 bands ordered like RGBNRGN for off/onseason to blue, red, green for imshow """ blue = normalize(image[:, :, 2]) green = normalize(image[:, :, 1]) red = normalize(image[:, :, 0]) nir = normalize(image[:, :, 3]) return np.stack([blue, red, green], axis=-1) def apply_normalize(arr): """ Rescales and applies other exposure functions to improve image vis. http://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.rescale_intensity """ arr = arr.astype(float) for i in range(arr.shape[-1]): arr[:,:,i]=normalize(arr[:,:,i]) return arr def display_images( images, titles=None, cols=4, cmap=None, norm=None, interpolation=None ): """Display the given set of images, optionally with titles. images: list or array of image tensors in HWC format. titles: optional. A list of titles to display with each image. cols: number of images per row cmap: Optional. Color map to use. For example, "Blues". norm: Optional. A Normalize instance to map values to colors. interpolation: Optional. Image interpolation to use for display. """ titles = titles if titles is not None else [""] * len(images) rows = len(images) // cols + 1 plt.figure(figsize=(14, 14 * rows // cols)) i = 1 for image, title in zip(images, titles): if i == 1 and image.shape[-1] == 8: # added for wv2 brg = reorder_to_brg(image) brg_adap = exposure.equalize_adapthist(brg, clip_limit=0.0055) plt.figure() plt.subplot(rows, cols, i) plt.title(title, fontsize=9) plt.axis("off") plt.imshow(brg_adap, cmap="brg", norm=norm, interpolation=interpolation) i += 1 elif ( i == 1 and image.shape[-1] == 3 ): # added for RGB satellite imagery, tested with wv2 image[image < 0] = 0 image = apply_normalize(image) plt.figure() plt.subplot(rows, cols, i) plt.title(title, fontsize=9) plt.axis("off") plt.imshow(image, cmap="brg", norm=norm, interpolation=interpolation) i += 1 else: plt.subplot(rows, cols, i) plt.title(title, fontsize=9) plt.axis("off") plt.imshow(image, cmap=cmap, norm=norm, interpolation=interpolation) i += 1 plt.show() def random_colors(N, bright=True): """ Generate random colors. To get visually distinct colors, generate them in HSV space then convert to RGB. """ brightness = 1.0 if bright else 0.7 hsv = [(i / N, 1, brightness) for i in range(N)] colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) random.shuffle(colors) return colors def apply_mask(image, mask, color, alpha=0.5): """Apply the given mask to the image. """ for c in range(3): image[:, :, c] = np.where( mask == 1, image[:, :, c] * (1 - alpha) + alpha * color[c] * 255, image[:, :, c], ) return image def display_instances( image, boxes, masks, class_ids, class_names, scores=None, title="", figsize=(16, 16), ax=None, show_mask=True, show_bbox=True, colors=None, captions=None, ): """ boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. masks: [height, width, num_instances] class_ids: [num_instances] class_names: list of class names of the dataset scores: (optional) confidence scores for each box title: (optional) Figure title show_mask, show_bbox: To show masks and bounding boxes or not figsize: (optional) the size of the image colors: (optional) An array or colors to use with each object captions: (optional) A list of strings to use as captions for each object """ # Number of instances N = boxes.shape[0] if not N: print("\n*** No instances to display *** \n") else: assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] # If no axis is passed, create one and automatically call show() auto_show = False if not ax: _, ax = plt.subplots(1, figsize=figsize) auto_show = True # Generate random colors colors = colors or random_colors(N) # Show area outside image boundaries. height, width = image.shape[:2] ax.set_ylim(height + 10, -10) ax.set_xlim(-10, width + 10) ax.axis("off") ax.set_title(title) masked_image = image.astype(np.uint32).copy() for i in range(N): color = colors[i] # Bounding box if not np.any(boxes[i]): # Skip this instance. Has no bbox. Likely lost in image cropping. continue y1, x1, y2, x2 = boxes[i] if show_bbox: p = patches.Rectangle( (x1, y1), x2 - x1, y2 - y1, linewidth=2, alpha=0.7, linestyle="dashed", edgecolor=color, facecolor="none", ) ax.add_patch(p) # Label if not captions: class_id = class_ids[i] score = scores[i] if scores is not None else None label = class_names[class_id] caption = "{} {:.3f}".format(label, score) if score else label else: caption = captions[i] ax.text(x1, y1 + 8, caption, color="w", size=11, backgroundcolor="none") # Mask mask = masks[:, :, i] if show_mask: masked_image = apply_mask(masked_image, mask, color) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros((mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) padded_mask[1:-1, 1:-1] = mask contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor="none", edgecolor=color) ax.add_patch(p) if image.shape[-1] == 8: # added for wv2 using RGBNRGB for two seasons brg = reorder_to_brg(image) brg_adap = exposure.equalize_adapthist(brg, clip_limit=0.0055) ax.imshow(brg_adap) # added band reordering for wv2 and adaptive stretch else: image[image < 0] = 0 image = apply_normalize(image) ax.imshow(image, cmap="brg") if auto_show: plt.show() def display_differences( image, gt_box, gt_class_id, gt_mask, pred_box, pred_class_id, pred_score, pred_mask, class_names, title="", ax=None, show_mask=True, show_box=True, iou_threshold=0.5, score_threshold=0.5, ): """Display ground truth and prediction instances on the same image.""" # Match predictions to ground truth gt_match, pred_match, overlaps = utils.compute_matches( gt_box, gt_class_id, gt_mask, pred_box, pred_class_id, pred_score, pred_mask, iou_threshold=iou_threshold, score_threshold=score_threshold, ) # Ground truth = green. Predictions = red colors = [(0, 1, 0, 0.8)] * len(gt_match) + [(1, 0, 0, 1)] * len(pred_match) # Concatenate GT and predictions class_ids = np.concatenate([gt_class_id, pred_class_id]) scores = np.concatenate([np.zeros([len(gt_match)]), pred_score]) boxes = np.concatenate([gt_box, pred_box]) masks = np.concatenate([gt_mask, pred_mask], axis=-1) # Captions per instance show score/IoU captions = ["" for m in gt_match] + [ "{:.2f} / {:.2f}".format( pred_score[i], ( overlaps[i, int(pred_match[i])] if pred_match[i] > -1 else overlaps[i].max() ), ) for i in range(len(pred_match)) ] # Set title if not provided title = ( title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU" ) # Display display_instances( image, boxes, masks, class_ids, class_names, scores, ax=ax, show_bbox=show_box, show_mask=show_mask, colors=colors, captions=captions, title=title, ) def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): """ anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. proposals: [n, 4] the same anchors but refined to fit objects better. """ masked_image = image.copy() # Pick random anchors in case there are too many. ids = np.arange(rois.shape[0], dtype=np.int32) ids = np.random.choice(ids, limit, replace=False) if ids.shape[0] > limit else ids fig, ax = plt.subplots(1, figsize=(12, 12)) if rois.shape[0] > limit: plt.title("Showing {} random ROIs out of {}".format(len(ids), rois.shape[0])) else: plt.title("{} ROIs".format(len(ids))) # Show area outside image boundaries. ax.set_ylim(image.shape[0] + 20, -20) ax.set_xlim(-50, image.shape[1] + 20) ax.axis("off") for i, id in enumerate(ids): color = np.random.rand(3) class_id = class_ids[id] # ROI y1, x1, y2, x2 = rois[id] p = patches.Rectangle( (x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=color if class_id else "gray", facecolor="none", linestyle="dashed", ) ax.add_patch(p) # Refined ROI if class_id: ry1, rx1, ry2, rx2 = refined_rois[id] p = patches.Rectangle( (rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, edgecolor=color, facecolor="none", ) ax.add_patch(p) # Connect the top-left corners of the anchor and proposal for easy visualization ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) # Label label = class_names[class_id] ax.text( rx1, ry1 + 8, "{}".format(label), color="w", size=11, backgroundcolor="none", ) # Mask m = utils.unmold_mask(mask[id], rois[id][:4].astype(np.int32), image.shape) masked_image = apply_mask(masked_image, m, color) if image.shape[-1] == 8: # added for wv2 brg = reorder_to_brg(image) brg_adap = exposure.equalize_adapthist(brg, clip_limit=0.0055) ax.imshow(brg_adap) else: ax.imshow(masked_image) # Print stats print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) print( "Positive Ratio: {:.2f}".format( class_ids[class_ids > 0].shape[0] / class_ids.shape[0] ) ) # TODO: Replace with matplotlib equivalent? def draw_box(image, box, color): """Draw 3-pixel width bounding boxes on the given image array. color: list of 3 int values for RGB. """ if image.shape[-1] == 8: # added for 8 channel wv2 image = reorder_to_brg(image) y1, x1, y2, x2 = box image[y1 : y1 + 2, x1:x2] = color image[y2 : y2 + 2, x1:x2] = color image[y1:y2, x1 : x1 + 2] = color image[y1:y2, x2 : x2 + 2] = color return image def display_top_masks(image, mask, class_ids, class_names, limit=4): """Display the given image and the top few class masks.""" to_display = [] titles = [] to_display.append(image) titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) # Pick top prominent classes in this image unique_class_ids = np.unique(class_ids) mask_area = [ np.sum(mask[:, :, np.where(class_ids == i)[0]]) for i in unique_class_ids ] top_ids = [ v[0] for v in sorted( zip(unique_class_ids, mask_area), key=lambda r: r[1], reverse=True ) if v[1] > 0 ] # Generate images and titles for i in range(limit): class_id = top_ids[i] if i < len(top_ids) else -1 # Pull masks of instances belonging to the same class. m = mask[:, :, np.where(class_ids == class_id)[0]] m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) to_display.append(m) titles.append(class_names[class_id] if class_id != -1 else "-") display_images(to_display, titles=titles, cols=limit + 1, cmap="brg") def plot_precision_recall(AP, precisions, recalls): """Draw the precision-recall curve. AP: Average precision at IoU >= 0.5 precisions: list of precision values recalls: list of recall values """ # Plot the Precision-Recall curve _, ax = plt.subplots(1) ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) ax.set_ylim(0, 1.1) ax.set_xlim(0, 1.1) _ = ax.plot(recalls, precisions) def plot_overlaps( gt_class_ids, pred_class_ids, pred_scores, overlaps, class_names, threshold=0.5 ): """Draw a grid showing how ground truth objects are classified. gt_class_ids: [N] int. Ground truth class IDs pred_class_id: [N] int. Predicted class IDs pred_scores: [N] float. The probability scores of predicted classes overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes. class_names: list of all class names in the dataset threshold: Float. The prediction probability required to predict a class """ gt_class_ids = gt_class_ids[gt_class_ids != 0] pred_class_ids = pred_class_ids[pred_class_ids != 0] plt.figure(figsize=(12, 10)) plt.imshow(overlaps, interpolation="nearest", cmap=plt.cm.Blues) plt.yticks( np.arange(len(pred_class_ids)), [ "{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) for i, id in enumerate(pred_class_ids) ], ) plt.xticks( np.arange(len(gt_class_ids)), [class_names[int(id)] for id in gt_class_ids], rotation=90, ) thresh = overlaps.max() / 2.0 for i, j in itertools.product(range(overlaps.shape[0]), range(overlaps.shape[1])): text = "" if overlaps[i, j] > threshold: text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" color = ( "white" if overlaps[i, j] > thresh else "black" if overlaps[i, j] > 0 else "grey" ) plt.text( j, i, "{:.3f}\n{}".format(overlaps[i, j], text), horizontalalignment="center", verticalalignment="center", fontsize=9, color=color, ) plt.tight_layout() plt.xlabel("Ground Truth") plt.ylabel("Predictions") def draw_boxes( image, boxes=None, refined_boxes=None, masks=None, captions=None, visibilities=None, title="", ax=None, ): """Draw bounding boxes and segmentation masks with different customizations. boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. refined_boxes: Like boxes, but draw with solid lines to show that they're the result of refining 'boxes'. masks: [N, height, width] captions: List of N titles to display on each box visibilities: (optional) List of values of 0, 1, or 2. Determine how prominent each bounding box should be. title: An optional title to show over the image ax: (optional) Matplotlib axis to draw on. """ # Number of boxes assert boxes is not None or refined_boxes is not None N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] # Matplotlib Axis if not ax: _, ax = plt.subplots(1, figsize=(12, 12)) # Generate random colors colors = random_colors(N) # Show area outside image boundaries. margin = image.shape[0] // 10 ax.set_ylim(image.shape[0] + margin, -margin) ax.set_xlim(-margin, image.shape[1] + margin) ax.axis("off") ax.set_title(title) masked_image = image.astype(np.uint32).copy() for i in range(N): # Box visibility visibility = visibilities[i] if visibilities is not None else 1 if visibility == 0: color = "gray" style = "dotted" alpha = 0.5 elif visibility == 1: color = colors[i] style = "dotted" alpha = 1 elif visibility == 2: color = colors[i] style = "solid" alpha = 1 # Boxes if boxes is not None: if not np.any(boxes[i]): # Skip this instance. Has no bbox. Likely lost in cropping. continue y1, x1, y2, x2 = boxes[i] p = patches.Rectangle( (x1, y1), x2 - x1, y2 - y1, linewidth=2, alpha=alpha, linestyle=style, edgecolor=color, facecolor="none", ) ax.add_patch(p) # Refined boxes if refined_boxes is not None and visibility > 0: ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) p = patches.Rectangle( (rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, edgecolor=color, facecolor="none", ) ax.add_patch(p) # Connect the top-left corners of the anchor and proposal if boxes is not None: ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) # Captions if captions is not None: caption = captions[i] # If there are refined boxes, display captions on them if refined_boxes is not None: y1, x1, y2, x2 = ry1, rx1, ry2, rx2 x = random.randint(x1, (x1 + x2) // 2) ax.text( x1, y1, caption, size=11, verticalalignment="top", color="w", backgroundcolor="none", bbox={"facecolor": color, "alpha": 0.5, "pad": 2, "edgecolor": "none"}, ) # Masks if masks is not None: mask = masks[:, :, i] masked_image = apply_mask(masked_image, mask, color) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros( (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8 ) padded_mask[1:-1, 1:-1] = mask contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor="none", edgecolor=color) ax.add_patch(p) if image.shape[-1] == 8: # added for wv2 brg = reorder_to_brg(image) brg_adap = exposure.equalize_adapthist(brg, clip_limit=0.0055) ax.imshow(brg_adap) else: ax.imshow(norm_rgb(masked_image)) def display_table(table): """Display values in a table format. table: an iterable of rows, and each row is an iterable of values. """ html = "" for row in table: row_html = "" for col in row: row_html += "{:40}".format(str(col)) html += "" + row_html + "" html = "" + html + "
" IPython.display.display(IPython.display.HTML(html)) def display_weight_stats(model): """Scans all the weights in the model and returns a list of tuples that contain stats about each weight. """ layers = model.get_trainable_layers() table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] for l in layers: weight_values = l.get_weights() # list of Numpy arrays weight_tensors = l.weights # list of TF tensors for i, w in enumerate(weight_values): weight_name = weight_tensors[i].name # Detect problematic layers. Exclude biases of conv layers. alert = "" if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): alert += "*** dead?" if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: alert += "*** Overflow?" # Add row table.append( [ weight_name + alert, str(w.shape), "{:+9.4f}".format(w.min()), "{:+10.4f}".format(w.max()), "{:+9.4f}".format(w.std()), ] ) display_table(table) PKaN=cropmask/tests/data-test.pydef parse_yaml(input_file): """Parse yaml file of configuration parameters.""" with open(input_file, "r") as yaml_file: params = yaml.load(yaml_file) return params params = parse_yaml("preprocess_config.yaml") ROOT = params["dirs"]["root"] DATASET = os.path.join(ROOT, params["dirs"]["dataset"]) REORDER = os.path.join(DATASET, params["dirs"]["reorder"]) TRAIN = os.path.join(DATASET, params["dirs"]["train"]) TEST = os.path.join(DATASET, params["dirs"]["test"]) GRIDDED_IMGS = os.path.join(DATASET, params["dirs"]["gridded_imgs"]) GRIDDED_LABELS = os.path.join(DATASET, params["dirs"]["gridded_labels"]) OPENED = os.path.join(DATASET, params["dirs"]["opened"]) INSTANCES = os.path.join(DATASET, params["dirs"]["instances"]) RESULTS = os.path.join( ROOT, "../", params["dirs"]["results"], params["dirs"]["dataset"] ) SOURCE_IMGS = os.path.join(ROOT, params["dirs"]["source_imgs"]) SOURCE_LABELS = os.path.join(ROOT, params["dirs"]["source_labels"]) # all files, including ones we don't care about file_ids_all = next(os.walk(SOURCE_IMGS))[2] # all multispectral on and off season tifs image_ids_all = [ image_id for image_id in file_ids_all if "MS" in image_id and ".aux" not in image_id ] # check for duplicates assert len(image_ids_all) == len(set(image_ids_all)) image_ids_gs = [image_id for image_id in image_ids_all if "GS" in image_id] image_ids_os = [image_id for image_id in image_ids_all if "OS" in image_id] # check for equality assert len(image_ids_os) == len(image_ids_gs) # only select growing season images image_ids_short = [image_id[0:9] for image_id in image_ids_gs] for imid in image_ids_short: load_merge_wv2(imid, WV2_DIR) image_list = next(os.walk(REORDERED_DIR))[2] PKaN` cropmask/tests/download_tests.pyimport os from lsru import Usgs from lsru import Espa import yaml from cropmask.download import download_utils as du import datetime import time import pytest @pytest.fixture def configs(): config_path = "/home/rave/azure_configs.yaml" with open(config_path) as f: configs = yaml.safe_load(f) return configs @pytest.fixture def setup_order(configs): """Orders a single scene from path/row for testing, or uses existing order""" usgs = Usgs(conf=configs["download"]["lsru_config"]) usgs.login() espa = Espa(conf=configs["download"]["lsru_config"]) try: order = espa.orders[-1] order.urls_completed = order.urls_completed[-1] # speeds up testing except: bbox = [-102.255, 40.76, -101.255, 41.76] scene_list = du.get_scene_list( collection="LANDSAT_TM_C1", bbox=bbox, begin=datetime.datetime(2005, 1, 1), end=datetime.datetime(2006, 1, 1), max_results=10, max_cloud_cover=10, ) pathrow_list_western_nb = ["032031"] scene_list = du.filter_scenes_by_path_row(scene_list, pathrow_list_western_nb) product_list = ["sr"] order = du.submit_order(scene_list, product_list) return order def test_order_complete(setup_order): while setup_order.is_complete == False: time.sleep(10) assert setup_order.status == 200 def test_url_retrieve(setup_order): url = setup_order.urls_completed[-1] r = url_retrieve(url) assert r.status == 200 def test_azure_download(setup_order, configs): from azure.storage.blob import BlockBlobService as blob # if there is an existing order, we just want to use that and not set up a test order. setup_order.urls_completed = setup_order.urls_completed[-1] # speeds up testing setup_order.download_all_complete_azure( configs["storage"]["container"], configs["storage"]["storage_name"], configs["storage"]["storage_key"], ) url = setup_order.urls_completed[-1] blob_name = url.split("/")[-1].split(".")[0] assert blob.exists( container=configs["storage"]["storage_name"], blob_name=blob_name ) PKaNj2++"cropmask/tests/preprocess_tests.pyimport cropmask.preprocess as pp from cropmask.misc import make_dirs, remove_dirs import os import pytest @pytest.fixture def wflow(): wflow = pp.PreprocessWorkflow("/home/ryan/work/CropMask_RCNN/cropmask/preprocess_config.yaml", "/mnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/unpacked_landsat_downloads/032031/LT050320312005082801T1-SC20190418222350/", "/mnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/external/nebraska_pivots_projected.geojson") return wflow def test_init(wflow): assert wflow def test_make_dir(): directory_list = ["/mnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/pytest_dir"] make_dirs(directory_list) try: assert os.path.exists(directory_list[0]) except AssertionError: remove_dirs(directory_list) print("The directory was not created.") remove_dirs(directory_list) def test_make_dirs(wflow): directory_list = wflow.setup_dirs() for i in directory_list: try: assert os.path.exists(i) except AssertionError: remove_dirs(directory_list) print("The directory "+i+" was not created.") remove_dirs(directory_list) def test_yaml_to_band_index(wflow): band_list = wflow.yaml_to_band_index() try: assert band_list == ['1','2','3'] except AssertionError: print("The band list "+band_list+" is not "+['1','2','3']) def test_list_products(): path = "/mnt/azureml-filestore-896933ab-f4fd-42b2-a154-0abb35dfb0b0/unpacked_landsat_downloads/032031/LT050320312005082801T1-SC20190418222350/" try: product_list = os.listdir(path) assert product_list except AssertionError: print("The product list is empty, check this path: "+ path) def test_get_product_paths(wflow): band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) assert product_list assert len(product_list) == len(band_list) def test_load_and_stack_bands(wflow): # fails because product list empty band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) stacked_arr = wflow.load_and_stack_bands(product_list) assert stacked_arr.shape[-1] == len(product_list) def test_stack_and_save_bands(wflow): directory_list = wflow.setup_dirs() band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) stacked_arr = wflow.load_and_stack_bands(product_list) try: wflow.stack_and_save_bands() except: remove_dirs(directory_list) print("The function didn't complete.") try: assert os.path.exists(wflow.stacked_path) remove_dirs(directory_list) except AssertionError: remove_dirs(directory_list) print("The stacked tif was not saved at the location "+wflow.stacked_path) def test_negative_buffer_and_small_filter(wflow): directory_list = wflow.setup_dirs() band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) stacked_arr = wflow.load_and_stack_bands(product_list) wflow.stack_and_save_bands() try: assert wflow.negative_buffer_and_small_filter(-31, 100) == np.array([0, 1]) # for the single class case, where 1 are cp pixels except: remove_dirs(directory_list) print("The function didn't complete.") try: assert os.path.exists(wflow.rasterized_label_path) remove_dirs(directory_list) except AssertionError: remove_dirs(directory_list) print("The rasterized label tif was not saved at the location "+wflow.rasterized_label_path) def test_grid_images(wflow): directory_list = wflow.setup_dirs() band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) stacked_arr = wflow.load_and_stack_bands(product_list) wflow.stack_and_save_bands() wflow.negative_buffer_and_small_filter(-31, 100) try: img_paths, label_paths = wflow.grid_images() assert len(img_paths) > 0 assert len(img_paths) == len(label_paths) except AssertionError: remove_dirs(directory_list) print("Less than one chip was saved") def test_move_chips_to_folder(wflow): directory_list = wflow.setup_dirs() band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) stacked_arr = wflow.load_and_stack_bands(product_list) wflow.stack_and_save_bands() wflow.negative_buffer_and_small_filter(-31, 100) img_paths, label_paths = wflow.grid_images() wflow.remove_mostly_empty(img_paths, label_paths) try: assert wflow.move_chips_to_folder() assert len(os.listdir(wflow.TRAIN)) > 1 assert len(os.listdir(os.listdir(wflow.TRAIN))[0]) > 0 except AssertionError: remove_dirs(directory_list) print("Less than one chip directory was made") def test_connected_components(wflow): directory_list = wflow.setup_dirs() band_list = wflow.yaml_to_band_index() product_list = wflow.get_product_paths(band_list) stacked_arr = wflow.load_and_stack_bands(product_list) wflow.stack_and_save_bands() wflow.negative_buffer_and_small_filter(-31, 100) img_paths, label_paths = wflow.grid_images() wflow.remove_mostly_empty(img_paths, label_paths) wflow.move_chips_to_folder() try: assert wflow.connected_components() except AssertionError: print("Connected components did not complete") PKaNd@77cropmask-0.1.dist-info/LICENSE The MIT License (MIT) Copyright (c) 2018, Ryan Avery Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PK!HMuSacropmask-0.1.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,szd&Y)r$[)T&UD"PK!H爻cropmask-0.1.dist-info/METADATAXnH}Wp,qK_# ]-UK5.Un+{X L,!+E&.S:0<Ԛ1Zlڔ M.ia, SRԔKKZHTR|)6N{Sq-r+I"}coQU+ԙI"eBoGAcOh࢜/ :I3)[@#H6-y%kX $TsDM?a )'̶1]zBwxݣpUmTwC%k> ! j-7݇o*Z托q|A>}`܁QfNP#? xRPv@xKmRk,ܘKK%@x N S@|Bq:^NBsm&NQ˺LA -=!ohKsBو| xIu 6< 9Gk31ETogӪ"+k2eZ-mRGL^4ʐv(h*ܘG &"ȥ뮀N/ꦟGE]*Z8ƉӵnJ{ I0.Zz2G]} muE_z"KuA _bI2gV @ߡ]׈\9lR[r_C9]9 QĘa 4Vft'l.#A 50+d zwV*FGݭ{r. mc(Sw5G/Hm<ܠT?d۸r޳gѤaƀմ^ր(  ,M̐\u9=PA&55 nCt\Y tTJ nh6f.ѻ Y-D%x,Ww/:BfJqȃ"3\Z0)hp4wkg쿳Ƃ YmMcz29:y@#݀p0? țADXgrTe܊R/!NÐN1Y j dxni2S=PKSFTG{E>P$12"7'^;@Wa`P/>ʎ3Aƾ*cw<Y;9nܘu|R|J'fbe!=/FM`!%~KFK\ | ?H5cc,TOw1zCᢘf\ ¯`G>Ņ/5л yHϧty4\?[P,vKk% ͫPe 4PaޜހU^^`Zc 5qr!{ЬDG\2UL{7u~U_M;-{XY狩u( bE/ ZeuIA&b0 [f:͢B: dXP` q[2cny~w2{ {K6+_J/Xe޿Ѩ%!*Aby:W/i( !4jqA׭Y~wEMz]pf{PK!Hs{wS cropmask-0.1.dist-info/RECORDֹHP^ 6*6!p 5ٿ~tM1Uq8s̈$웶 @gu6v@`$O]եx-nUK SRI9+G!C~PQ0 cM밫M;k7J>p뀺>zVp?"m2v^]9_"w8JRYRQ$M|zYCVLw0pId[qvQ 9lрײ'&V>nwyUWshfҒ&AӠ:@(r2 3"CͲycL+̞yOp=We o^G抦vRctrzR,âXRE ^`Af!ZDpB(J?foB0@]<:AuD=f$+"] Ƹ=Y[Ua\܌CMc[ Uӎܙ6N&GhsZS¤ǣEEZw OQt G4` aa7~0ݜ|D7yHa ֝D؏|Z;Ur`nވrvqOz~Cz èvK ѽ*UTtC; Q3eDX| fªPQ閟nz>3VdGa*׼΄@KhH飸ˠ@0Ch$@/2kH cAioqQK;aoUO`, (R=B1qk>{\ԟЯ޿#SCo{%%yÖAƞ*#D\EWуPueAP?PSrWLK|{uc&On p !'RlZ4XNj|%Ás "p% M[)=ArMG?PKqfNɅ``cropmask/__init__.pyPKaN cropmask/datasets.pyPKaN-cropmask/grid_pooled.pyPKaN57(cropmask/io_utils.pyPKaNl1/cropmask/label_prep.pyPKaNa!xx7cropmask/misc.pyPKaN%%=cropmask/model_configs.pyPKaNIDDccropmask/preprocess.pyPKaNv٧cropmask/preprocess_config.yamlPKaNC.C.cropmask/run_cropmask.pyPKaNJcropmask/run_preprocess.pyPKaNC77Jcropmask/sequential_grid.pyPKaNcropmask/azuresetup/__init__.pyPKaN9,,'cropmask/azuresetup/create_workspace.pyPKaNfhcropmask/azuresetup/get_SAS.pyPKaN4cropmask/mrcnn/model.pyPKaN'* 4cropmask/mrcnn/parallel_model.pyPKaNQPcropmask/mrcnn/utils.pyPKaN8WWNcropmask/mrcnn/visualize.pyPKaN=/cropmask/tests/data-test.pyPKaN` 6cropmask/tests/download_tests.pyPKaNj2++">cropmask/tests/preprocess_tests.pyPKaNd@77bVcropmask-0.1.dist-info/LICENSEPK!HMuSaZcropmask-0.1.dist-info/WHEELPK!H爻b[cropmask-0.1.dist-info/METADATAPK!Hs{wS +dcropmask-0.1.dist-info/RECORDPK 3 'j