Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) Establish Progressive Resizing Workflow #93

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
3 changes: 1 addition & 2 deletions ceruleanml/coco_load_fastai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
def record_collection_to_record_ids(
record_collection: icevision.data.record_collection.RecordCollection,
):
# check that each record only has one sparse mask
record_ids = []
for r in record_collection:
record_ids.append(r.as_dict()["common"]["record_id"])
record_ids.append(r.common.record_id)
return record_ids


Expand Down
28 changes: 28 additions & 0 deletions ceruleanml/coco_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@
"ambiguous": (255, 255, 255),
}

class_map_coco = {
"background": 0,
"infra_slick": 1,
"natural_seep": 2,
"coincident_vessel": 3,
"recent_vessel": 4,
"old_vessel": 5,
"ambiguous": 6,
}


def sample_stat_lists(arr):
return np.mean(arr, axis=(0, 1)), np.std(arr, axis=(0, 1))
Expand Down Expand Up @@ -308,3 +318,21 @@ def ignore_low_area_records(
):
for record in tqdm(record_collection):
ignore_record_by_area(record, area_thresh)

def remap_records_class(
record_collection: icevision.data.record_collection.RecordCollection,
remap_dict: dict,
):
"""
remap_dict keys/values: "infra_slick", "natural_seep", "coincident_vessel", "recent_vessel", "old_vessel", "ambiguous", None
Deletes class if value = None
"""
for record in tqdm(record_collection):
record_d = record.as_dict()
for i, label in enumerate(record_d["detection"]["labels"]):
if label in remap_dict:
if remap_dict[label] == None:
[record_d["detection"][key].pop(i) for key, value in record_d["detection"].items() if value is not None]
else:
record_d["detection"]["labels"][i] = remap_dict[label]
record_d["detection"]["label_ids"][i] = class_map_coco[remap_dict[label]]
24 changes: 23 additions & 1 deletion ceruleanml/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,30 @@ def cm_f1(arrays_gt, arrays_pred, num_classes, save_dir, normalize=None):
else f"{save_dir}/cm_count.png"
)
plt.savefig(cm_name)

print(f"Confusion matrix saved at {cm_name}")
# compute f1 score
f1 = f1_score(flat_truth, flat_preds, average="macro")
print("f1_score", f1)

return cm, f1


def get_cm_for_learner(dls, learner, save_path):
"""
this doesn't support eval on negative samples if they ar ein the dls,
since val masks don't exist with neg samples. need to be constructed with np.zeros

returns cm and f1 score
"""
with learner.no_bar():
val_arrs = []
class_preds = []
for batch_tuple in dls.valid:
for img, val_mask in zip(batch_tuple[0], batch_tuple[1]):
semantic_mask = val_mask.cpu().detach().numpy()
class_pred = learner.predict(img.cpu())
class_pred = class_pred[0].cpu().detach().numpy()
val_arrs.append(semantic_mask)
class_preds.append(class_pred)

return cm_f1(val_arrs, class_preds, 6, save_path) # todo add normalize false
4 changes: 4 additions & 0 deletions ceruleanml/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
assemble_record_label_lists,
get_all_record_area_lists_for_class,
ignore_low_area_records,
remap_records_class,
)


Expand Down Expand Up @@ -48,6 +49,7 @@ def load_set_record_collection(
area_thresh=10,
negative_sample_count=0,
preprocess=False,
remap_dict={}
):
parser = COCOMaskParser(
annotations_filepath=coco_json_path,
Expand All @@ -57,6 +59,8 @@ def load_set_record_collection(
0
]

remap_records_class(positive_records, remap_dict)

if preprocess:

ignore_low_area_records(positive_records, area_thresh=area_thresh)
Expand Down
181 changes: 181 additions & 0 deletions notebooks/fastai2_unet_trainer_cv2-1channel-baseline.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"See scripts/ to run this notebook experiment as a script"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from ceruleanml import data\n",
"from ceruleanml import evaluation\n",
"from ceruleanml import preprocess\n",
"from ceruleanml.inference import save_fastai_model_state_dict_and_tracing, load_tracing_model, test_tracing_model_one_batch, logits_to_classes\n",
"from fastai.data.block import DataBlock\n",
"from fastai.vision.data import ImageBlock, MaskBlock\n",
"from fastai.vision.augment import aug_transforms, Resize\n",
"from fastai.vision.learner import unet_learner\n",
"from fastai.data.transforms import IndexSplitter\n",
"from fastai.metrics import DiceMulti, Dice\n",
"from ceruleanml.coco_load_fastai import record_collection_to_record_ids, get_image_path, record_to_mask\n",
"from torchvision.models import resnet18, resnet34, resnet50\n",
"from fastai.callback.fp16 import MixedPrecision\n",
"from fastai.callback.tensorboard import TensorBoardCallback\n",
"from fastai.vision.core import PILImageBW\n",
"from datetime import datetime\n",
"from pathlib import Path\n",
"import os, random\n",
"from icevision.visualize import show_data\n",
"import torch\n",
"\n",
"from fastai.callback.tracker import EarlyStoppingCallback, SaveModelCallback\n",
"\n",
"### Parsing COCO Dataset with Icevision\n",
"\n",
"class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}\n",
"class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))\n",
"\n",
"with_context=False\n",
"mount_path = \"/root/\"\n",
"train_set = \"train-no-context-512\"\n",
"tiled_images_folder_train = \"tiled_images_no_context\"\n",
"json_name_train = \"instances_TiledCeruleanDatasetV2NoContextFiles.json\"\n",
"\n",
"coco_json_path_train = f\"{mount_path}/partitions/{train_set}/{json_name_train}\"\n",
"tiled_images_folder_train = f\"{mount_path}/partitions/{train_set}/{tiled_images_folder_train}\"\n",
"val_set = \"val-no-context-512\"\n",
"tiled_images_folder_val= \"tiled_images_no_context\"\n",
"json_name_val = \"instances_TiledCeruleanDatasetV2NoContextFiles.json\"\n",
"coco_json_path_val= f\"{mount_path}/partitions/{val_set}/{json_name_val}\"\n",
"tiled_images_folder_val = f\"{mount_path}/partitions/{val_set}/{tiled_images_folder_val}\"\n",
"\n",
"bs=8 # max\n",
"size=512\n",
"n=\"all\"\n",
"arch=34\n",
"epochs = 100\n",
"\n",
"class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}\n",
"class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))\n",
"negative_sample_count = 0\n",
"negative_sample_count_val = 0\n",
"area_thresh = 10\n",
"\n",
"record_collection_with_negative_small_filtered_train = preprocess.load_set_record_collection(\n",
" coco_json_path_train, tiled_images_folder_train, area_thresh, negative_sample_count, preprocess=False\n",
")\n",
"record_ids_train = record_collection_to_record_ids(record_collection_with_negative_small_filtered_train)\n",
"\n",
"record_collection_with_negative_small_filtered_val = preprocess.load_set_record_collection(\n",
" coco_json_path_val, tiled_images_folder_val, area_thresh, negative_sample_count_val, preprocess=False\n",
")\n",
"record_ids_val = record_collection_to_record_ids(record_collection_with_negative_small_filtered_val)\n",
"\n",
"assert len(set(record_ids_train)) + len(set(record_ids_val)) == len(record_ids_train) + len(record_ids_val)\n",
"\n",
"train_val_record_ids = record_ids_train + record_ids_val\n",
"combined_record_collection = record_collection_with_negative_small_filtered_train + record_collection_with_negative_small_filtered_val\n",
"\n",
"def get_val_indices(combined_ids, val_ids):\n",
" return list(range(len(combined_ids)))[-len(val_ids):]\n",
"\n",
"### Constructing a FastAI DataBlock that uses parsed COCO Dataset from icevision parser. aug_transforms can only be used with_context=True\n",
"\n",
"val_indices = get_val_indices(train_val_record_ids, record_ids_val)\n",
"\n",
"def get_image_by_record_id(record_id):\n",
" return get_image_path(combined_record_collection, record_id)\n",
"\n",
"def get_mask_by_record_id(record_id):\n",
" return record_to_mask(combined_record_collection, record_id)\n",
"\n",
"batch_transfms = [*aug_transforms(flip_vert=True, max_warp=0.1, size=size)]\n",
"coco_seg_dblock = DataBlock(\n",
" blocks=(ImageBlock, MaskBlock(codes=class_ints)), # ImageBlock is RGB by default, uses PIL\n",
" get_x=get_image_by_record_id,\n",
" splitter=IndexSplitter(val_indices),\n",
" get_y=get_mask_by_record_id,\n",
" batch_tfms=batch_transfms,\n",
" n_inp=1\n",
" )\n",
"\n",
"\n",
"dls = coco_seg_dblock.dataloaders(source=train_val_record_ids, batch_size=bs)\n",
"\n",
"### Fastai2 Trainer\n",
"\n",
"dateTimeObj = datetime.now()\n",
"timestampStr = dateTimeObj.strftime(\"%d_%b_%Y_%H_%M_%S\")\n",
"experiment_dir = Path(f'{mount_path}/experiments/cv2/'+timestampStr+'_fastai_unet/')\n",
"experiment_dir.mkdir(exist_ok=True)\n",
"print(experiment_dir)\n",
"\n",
"archs = {18: resnet18, 34: resnet34, 50: resnet50}\n",
"\n",
"cbs = [TensorBoardCallback(projector=False, trace_model=False), \n",
" SaveModelCallback(monitor=\"valid_loss\", with_opt=True),\n",
" EarlyStoppingCallback(monitor='valid_loss', min_delta=0.005, patience=10) ]\n",
"\n",
"learner = unet_learner(dls, archs[arch], metrics=[DiceMulti, Dice],\n",
" model_dir=experiment_dir, n_out=7,\n",
" cbs=cbs) #cbs=cbs# SaveModelCallback saves model when there is improvement\n",
"\n",
"print(\"size\", size)\n",
"print(\"batch size\", bs)\n",
"print(\"arch\", arch)\n",
"print(\"n chips\", n)\n",
"print(\"epochs (with early stopping and patience=10):\", epochs)\n",
"\n",
"learner.fine_tune(epochs, 1e-4, freeze_epochs=1) # cbs=cbs\n",
"\n",
"evaluation.get_cm_for_learner(dls, learner, mount_path)\n",
"\n",
"validation = learner.validate()\n",
"\n",
"save_template = f'test_{bs}_{arch}_{size}_{round(validation[1],3)}_{epochs}.pt'\n",
"\n",
"state_dict_pth, tracing_model_gpu_pth, tracing_model_cpu_pth = save_fastai_model_state_dict_and_tracing(learner, dls, save_template, experiment_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "fastai2_training.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python [conda env:fastai2]",
"language": "python",
"name": "conda-env-fastai2-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading