From edee827e6954970b0999fee5d6ea461c9053459a Mon Sep 17 00:00:00 2001 From: Jona Date: Tue, 22 Oct 2024 00:45:50 -0400 Subject: [PATCH 1/3] half labeling --- cerulean_cloud/models.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/cerulean_cloud/models.py b/cerulean_cloud/models.py index a758ab81..60351e2b 100644 --- a/cerulean_cloud/models.py +++ b/cerulean_cloud/models.py @@ -966,23 +966,12 @@ def overlap_percent(a, b): raster = raster.float().detach().numpy() - # Label components based on p3 to find peaks + reduced_labels = set() # Initialize an empty set for unique p1 labels p1_islands, p1_island_count = label(raster >= p1) - # logging.info(f"p1_island_count: {p1_island_count}") - p3_islands, p3_island_count = label(raster >= p3) - # logging.info(f"p3_island_count: {p3_island_count}") - - # Initialize an empty set for unique p1 labels corresponding to p3 components - reduced_labels = set() - - # Iterate over each p3 component - for i in range(1, p3_island_count + 1): - p3_island_mask = p3_islands == i - p1_label_at_p3 = p1_islands[p3_island_mask].flat[ - 0 - ] # Take the first pixel's p1 label - reduced_labels.add(p1_label_at_p3) - # logging.info(f"reduced_labels: {len(reduced_labels)}") + for label_num in range(1, p1_island_count + 1): + p1_label_mask = p1_islands == label_num + if np.any(raster[p1_label_mask] >= p3): + reduced_labels.add(label_num) zero_mask = raster == 0 # Find all pixels that are zero (i.e. nodata value) shapes = rasterio.features.shapes( From 78589800a947cf0817ca83f610e8077fc825ec7d Mon Sep 17 00:00:00 2001 From: Jona Date: Tue, 22 Oct 2024 00:57:39 -0400 Subject: [PATCH 2/3] Addl optimizations --- cerulean_cloud/models.py | 44 ++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/cerulean_cloud/models.py b/cerulean_cloud/models.py index 60351e2b..cae1433b 100644 --- a/cerulean_cloud/models.py +++ b/cerulean_cloud/models.py @@ -894,9 +894,9 @@ def instantiate(self, scene_array_probs, transform): Returns: - geojson.FeatureCollection: A collection of geojson features representing detected instances. """ - # Convert numpy.ndarray to PyTorch tensor if necessary - if isinstance(scene_array_probs, np.ndarray): - scene_array_probs = torch.tensor(scene_array_probs) + # Ensure scene_array_probs is a NumPy array + if isinstance(scene_array_probs, torch.Tensor): + scene_array_probs = scene_array_probs.detach().cpu().numpy() classes_to_consider = [1, 2, 3] scene_oil_probs = scene_array_probs[classes_to_consider].sum(0) @@ -909,18 +909,16 @@ def instantiate(self, scene_array_probs, transform): ) for feat in features: mask = geometry_mask( - [shape(feat["geometry"])], + [feat["geometry"]], out_shape=scene_oil_probs.shape, transform=transform, invert=True, ) cls_sums = [ - cls_probs[mask].detach().sum() - for cls_probs in scene_array_probs[classes_to_consider] - ] - feat["properties"]["inf_idx"] = classes_to_consider[ - cls_sums.index(max(cls_sums)) + scene_array_probs[cls_idx][mask].sum() + for cls_idx in classes_to_consider ] + feat["properties"]["inf_idx"] = classes_to_consider[np.argmax(cls_sums)] return geojson.FeatureCollection(features=features) @@ -949,7 +947,7 @@ def instances_from_probs( Sample values: p1, p2, p3 = 0.1, 0.5, 0.95 Analogous to bbox_score_thresh, poly_score_thresh, pixel_score_thresh Returns: - GeoJSON: A GeoJSON feature collection of the processed predictions. + List[geojson.Feature]: A list of GeoJSON features of the processed predictions. """ def overlap_percent(a, b): @@ -964,13 +962,20 @@ def overlap_percent(a, b): else: return a.intersection(b).area / a.area - raster = raster.float().detach().numpy() + # Ensure raster is a NumPy array + if isinstance(raster, torch.Tensor): + raster = raster.detach().cpu().numpy() - reduced_labels = set() # Initialize an empty set for unique p1 labels + # Label components based on p1 p1_islands, p1_island_count = label(raster >= p1) + + # Initialize an empty set for unique p1 labels containing pixels >= p3 + reduced_labels = set() + + # For each p1_island label, check if it contains any pixels >= p3 for label_num in range(1, p1_island_count + 1): - p1_label_mask = p1_islands == label_num - if np.any(raster[p1_label_mask] >= p3): + island_mask = p1_islands == label_num + if np.any(raster[island_mask] >= p3): reduced_labels.add(label_num) zero_mask = raster == 0 # Find all pixels that are zero (i.e. nodata value) @@ -980,16 +985,21 @@ def overlap_percent(a, b): polygons = [shape(geom) for geom, value in shapes if value == 1] scene_edge = MultiPolygon(polygons).buffer(discard_edge_polygons_buffer) - # Process into feature collections based on unique p1 labels features = [] + # Process into feature collections based on unique p1 labels for p1_label in reduced_labels: mask = (p1_islands == p1_label) & (raster >= p2) # Apply p2 trimming masked_raster = raster[mask] - shapes = rasterio.features.shapes( + + if not np.any(mask): + continue # Skip if mask is empty after trimming + + shapes_generator = rasterio.features.shapes( mask.astype(np.uint8), mask=mask, transform=transform ) - polygons = [shape(geom) for geom, value in shapes if value == 1] + polygons = [shape(geom) for geom, value in shapes_generator if value == 1] polygons = [p for p in polygons if overlap_percent(p, scene_edge) <= 0.5] + # Ensure there are polygons left after trimming to process into a MultiPolygon if polygons: multipolygon = MultiPolygon(polygons) From 9bc80c6c54f039f162021bfa22f9398fd338db7e Mon Sep 17 00:00:00 2001 From: Jona Date: Tue, 22 Oct 2024 10:36:35 -0400 Subject: [PATCH 3/3] Overhaul for memory and speed --- cerulean_cloud/models.py | 107 ++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/cerulean_cloud/models.py b/cerulean_cloud/models.py index cae1433b..12dd66aa 100644 --- a/cerulean_cloud/models.py +++ b/cerulean_cloud/models.py @@ -938,9 +938,9 @@ def instances_from_probs( Args: raster (np.array): The input raster array to be processed. - p1 (float): The lowest probability, used to group adjacent polygons into multipolygons. lower value = fewer groups - p2 (float): The middle probability, used to trim the final polygons size. lower value = coarser polygons - p3 (float): The highest probability, used to discard polygons that don't reach sufficient confidence. higher value = more restrictive + p1 (float): The lowest probability, used to group adjacent polygons into multipolygons. Lower value = fewer groups + p2 (float): The middle probability, used to trim the final polygons size. Lower value = coarser polygons + p3 (float): The highest probability, used to discard polygons that don't reach sufficient confidence. Higher value = more restrictive transform (Affine): The transform to apply to the raster. addl_props (dict): Additional properties to add to the features. discard_edge_polygons_buffer (float): The buffer distance to discard polygons that are too close to the edge of the scene. @@ -966,55 +966,78 @@ def overlap_percent(a, b): if isinstance(raster, torch.Tensor): raster = raster.detach().cpu().numpy() - # Label components based on p1 + raster = raster.astype(np.float32) + + # Generate masks for each threshold + nodata_mask = raster == 0 p1_islands, p1_island_count = label(raster >= p1) + p2_mask = raster >= p2 + p3_mask = raster >= p3 + + # Extract p1 labels where p3_mask is True + p1_p3_labels = np.unique(p1_islands[p3_mask]) - # Initialize an empty set for unique p1 labels containing pixels >= p3 - reduced_labels = set() + # Create mask of all p1_islands that pass p3 criterium + p1_p3_mask = np.isin(p1_islands, p1_p3_labels) - # For each p1_island label, check if it contains any pixels >= p3 - for label_num in range(1, p1_island_count + 1): - island_mask = p1_islands == label_num - if np.any(raster[island_mask] >= p3): - reduced_labels.add(label_num) + # Create mask of all pixels that pass p2 criterium inside p1_islands that pass pass p3 criterium + p1_p2_p3_mask = p1_p3_mask & p2_mask - zero_mask = raster == 0 # Find all pixels that are zero (i.e. nodata value) - shapes = rasterio.features.shapes( - zero_mask.astype(np.uint8), mask=zero_mask, transform=transform + # Assign distinct labels to the islands that pass p1, p2, and p3 criteria + combined_raster = p1_p2_p3_mask * p1_islands + + # Create a buffer around the edge of the scene to discard polygons that are too close to the edge + scene_edge = MultiPolygon( + shape(geom) + for geom, value in shapes( + nodata_mask.astype(np.uint8), mask=nodata_mask, transform=transform + ) + if value == 1 + ).buffer(discard_edge_polygons_buffer) + + # Now, use rasterio.features.shapes to extract polygons and their labels + shapes_generator = shapes( + combined_raster, + mask=combined_raster > 0, # Only consider pixels with p1_label > 0 + transform=transform, ) - polygons = [shape(geom) for geom, value in shapes if value == 1] - scene_edge = MultiPolygon(polygons).buffer(discard_edge_polygons_buffer) - features = [] - # Process into feature collections based on unique p1 labels - for p1_label in reduced_labels: - mask = (p1_islands == p1_label) & (raster >= p2) # Apply p2 trimming - masked_raster = raster[mask] + # Dictionary to collect geometries per p1_label + label_geometries = {} + for geom, p1_label in shapes_generator: + poly = shape(geom) + if overlap_percent(poly, scene_edge) <= 0.5: + # Only record polygons that are not overlapping the scene edge + if p1_label not in label_geometries: # Initialize if doesn't exist + label_geometries[p1_label] = [] + label_geometries[p1_label].append(poly) # Add polygon to list - if not np.any(mask): - continue # Skip if mask is empty after trimming + features = [] + for p1_label, geometries in label_geometries.items(): + multipolygon = MultiPolygon(geometries) - shapes_generator = rasterio.features.shapes( - mask.astype(np.uint8), mask=mask, transform=transform + geom_mask = geometry_mask( + [multipolygon], + out_shape=raster.shape, + transform=transform, + invert=True, ) - polygons = [shape(geom) for geom, value in shapes_generator if value == 1] - polygons = [p for p in polygons if overlap_percent(p, scene_edge) <= 0.5] - - # Ensure there are polygons left after trimming to process into a MultiPolygon - if polygons: - multipolygon = MultiPolygon(polygons) - features.append( - geojson.Feature( - geometry=multipolygon, - properties={ - "mean_conf": float(np.mean(masked_raster)), - "median_conf": float(np.median(masked_raster)), - "max_conf": float(np.max(masked_raster)), - "machine_confidence": float(np.median(masked_raster)), - **addl_props, - }, - ) + + masked_raster = raster[geom_mask] + + features.append( + geojson.Feature( + geometry=multipolygon, + properties={ + "mean_conf": float(np.mean(masked_raster)), + "median_conf": float(np.median(masked_raster)), + "max_conf": float(np.max(masked_raster)), + "machine_confidence": float(np.median(masked_raster)), + "p1_label": int(p1_label), + **addl_props, + }, ) + ) return features