precommit

AllenCellModeling · Aug 5, 2024 · 1e0358f · 1e0358f
1 parent e7bd5bf
commit 1e0358f
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -75,7 +75,7 @@ model.train()
 await model.train(run_async=True)
 ```
 
-Most models work by passing data paths in the data config. For training or predicting on datasets that are already in memory, you can pass the data directly to the model. Note that this use case is primarily for programmatic use (e.g. in a workflow or a jupyter notebook), not through the normal CLI. An experiment showing a possible config setup for this use case is demonstrated with the [im2im/segmentation_array](configs/experiment/im2im/segmentation_array.yaml) experiment. For training, data must be passed as a dictionary with keys "train" and "val" containing lists of dictionaries with keys corresponding to the data config. 
+Most models work by passing data paths in the data config. For training or predicting on datasets that are already in memory, you can pass the data directly to the model. Note that this use case is primarily for programmatic use (e.g. in a workflow or a jupyter notebook), not through the normal CLI. An experiment showing a possible config setup for this use case is demonstrated with the [im2im/segmentation_array](configs/experiment/im2im/segmentation_array.yaml) experiment. For training, data must be passed as a dictionary with keys "train" and "val" containing lists of dictionaries with keys corresponding to the data config.
 
 ```python
 from cyto_dl.api import CytoDLModel
@@ -87,29 +87,27 @@ model.print_config()
 
 # create CZYX dummy data
 data = {
-    "train": [
-        {"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}
-    ],
-    "val": [
-        {"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}
-    ],
+    "train": [{"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}],
+    "val": [{"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}],
 }
 model.train(data=data)
 ```
 
-For predicting, data must be passed as a list of numpy arrays. The resulting predictions will be processed in a dictionary with one key for each task head in the model config and corresponding values in BC(Z)YX order. 
+For predicting, data must be passed as a list of numpy arrays. The resulting predictions will be processed in a dictionary with one key for each task head in the model config and corresponding values in BC(Z)YX order.
 
 ```python
 from cyto_dl.api import CytoDLModel
 import numpy as np
 from cyto_dl.utils import extract_array_predictions
 
 model = CytoDLModel()
-model.load_default_experiment("segmentation_array", output_dir="./output", overrides=['data=im2im/numpy_dataloader_predict'])
+model.load_default_experiment(
+    "segmentation_array", output_dir="./output", overrides=["data=im2im/numpy_dataloader_predict"]
+)
 model.print_config()
 
 # create CZYX dummy data
-data =  [np.random.rand(1, 32, 64, 64), np.random.rand(1, 32, 64, 64)]
+data = [np.random.rand(1, 32, 64, 64), np.random.rand(1, 32, 64, 64)]
 
 _, _, output = model.predict(data=data)
 preds = extract_array_predictions(output)

diff --git a/configs/data/im2im/numpy_dataloader_predict.yaml b/configs/data/im2im/numpy_dataloader_predict.yaml
@@ -1,16 +1,16 @@
 _target_: cyto_dl.datamodules.array.make_array_dataloader
-data: 
+data:
 num_workers: 1
 batch_size: 1
 source_key: ${source_col}
 transforms:
   - _target_: monai.transforms.ToTensord
-    keys: 
+    keys:
       - ${source_col}
   - _target_: cyto_dl.image.transforms.clip.Clipd
-    keys: 
+    keys:
       - ${source_col}
   - _target_: monai.transforms.NormalizeIntensityd
     channel_wise: true
     keys:
-      - ${source_col}
+      - ${source_col}
diff --git a/configs/data/im2im/numpy_dataloader_train.yaml b/configs/data/im2im/numpy_dataloader_train.yaml
@@ -1,5 +1,5 @@
 _aux:
-  patch_shape: 
+  patch_shape:
   _scales_dict:
     - - ${target_col}
       - [1]
@@ -8,15 +8,15 @@ _aux:
 
 train_dataloaders:
   _target_: cyto_dl.datamodules.array.make_array_dataloader
-  data: 
+  data:
   num_workers: 0
   batch_size: 1
   source_key: ${source_col}
   transforms:
     - _target_: monai.transforms.ToTensord
       keys:
-      - ${source_col}
-      - ${target_col}
+        - ${source_col}
+        - ${target_col}
     - _target_: cyto_dl.image.transforms.clip.Clipd
       keys: ${source_col}
     - _target_: monai.transforms.NormalizeIntensityd
@@ -28,7 +28,7 @@ train_dataloaders:
       above: False
       cval: 1
     - _target_: cyto_dl.image.transforms.RandomMultiScaleCropd
-      keys: 
+      keys:
         - ${source_col}
         - ${target_col}
       patch_shape: ${data._aux.patch_shape}
@@ -51,15 +51,15 @@ train_dataloaders:
 
 val_dataloaders:
   _target_: cyto_dl.datamodules.array.make_array_dataloader
-  data: 
+  data:
   num_workers: 0
   batch_size: 1
   source_key: ${source_col}
   transforms:
     - _target_: monai.transforms.ToTensord
       keys:
-      - ${source_col}
-      - ${target_col}
+        - ${source_col}
+        - ${target_col}
     - _target_: cyto_dl.image.transforms.clip.Clipd
       keys: ${source_col}
     - _target_: monai.transforms.NormalizeIntensityd
@@ -71,7 +71,7 @@ val_dataloaders:
       above: False
       cval: 1
     - _target_: cyto_dl.image.transforms.RandomMultiScaleCropd
-      keys: 
+      keys:
         - ${source_col}
         - ${target_col}
       patch_shape: ${data._aux.patch_shape}

diff --git a/cyto_dl/datamodules/array.py b/cyto_dl/datamodules/array.py
@@ -12,7 +12,7 @@ def make_array_dataloader(
     source_key: str = "input",
     **dataloader_kwargs,
 ):
-    """Create a dataloader from a an array dataset
+    """Create a dataloader from a an array dataset.
 
     Parameters
     ----------

diff --git a/cyto_dl/utils/array.py b/cyto_dl/utils/array.py
@@ -1,6 +1,7 @@
-from omegaconf import OmegaConf
-import hydra 
+import hydra
 import numpy as np
+from omegaconf import OmegaConf
+
 
 def create_dataloader(data_cfg, data=None):
     """Create a dataloader from a data config and optional data."""
@@ -33,7 +34,7 @@ def extract_array_predictions(output, task_heads=None):
         for head in task_heads:
             if head not in predictions:
                 predictions[head] = []
-            predictions[head] += batch_pred[head]['pred']
+            predictions[head] += batch_pred[head]["pred"]
     # stack head predictions into numpy array
     for head, pred in predictions.items():
         predictions[head] = np.stack(pred)

diff --git a/tests/test_array_models.py b/tests/test_array_models.py
@@ -1,9 +1,12 @@
-import pytest
+from pathlib import Path
+
 import numpy as np
+import pytest
+
 from cyto_dl.api import CytoDLModel
-from pathlib import Path
 from cyto_dl.utils import extract_array_predictions
 
+
 @pytest.mark.skip
 def test_array_train(tmp_path):
     model = CytoDLModel()
@@ -15,22 +18,19 @@ def test_array_train(tmp_path):
         "trainer.devices": 1,
     }
 
-    model.load_default_experiment(experiment_type='segmentation_array', output_dir=tmp_path)
+    model.load_default_experiment(experiment_type="segmentation_array", output_dir=tmp_path)
     model.override_config(overrides)
 
     data = {
-        "train": [
-            {"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}
-        ],
-        "val": [
-            {"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}
-        ],
+        "train": [{"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}],
+        "val": [{"raw": np.random.randn(1, 40, 256, 256), "seg": np.ones((1, 40, 256, 256))}],
     }
     model.train(data=data)
 
     ckpt_dir = Path(model.cfg.callbacks.model_checkpoint.dirpath)
-    assert 'last.ckpt' in [fn.name for fn in ckpt_dir.iterdir()]
-    return ckpt_dir/'last.ckpt'
+    assert "last.ckpt" in [fn.name for fn in ckpt_dir.iterdir()]
+    return ckpt_dir / "last.ckpt"
+
 
 @pytest.mark.slow
 def test_array_train_predict(tmp_path):
@@ -42,17 +42,22 @@ def test_array_train_predict(tmp_path):
         "logger": None,
         "trainer.accelerator": "cpu",
         "trainer.devices": 1,
-        "ckpt_path": ckpt_path
+        "ckpt_path": ckpt_path,
     }
 
-    model.load_default_experiment(experiment_type='segmentation_array', output_dir=tmp_path, train=False, overrides=['data=im2im/numpy_dataloader_predict'])
+    model.load_default_experiment(
+        experiment_type="segmentation_array",
+        output_dir=tmp_path,
+        train=False,
+        overrides=["data=im2im/numpy_dataloader_predict"],
+    )
     model.override_config(overrides)
     model.print_config()
 
-    data =  [np.random.rand(1, 32, 64, 64), np.random.rand(1, 32, 64, 64)]
+    data = [np.random.rand(1, 32, 64, 64), np.random.rand(1, 32, 64, 64)]
     _, _, output = model.predict(data=data)
     preds = extract_array_predictions(output)
 
     for head in model.cfg.model.task_heads.keys():
         assert preds[head].shape[0] == len(data)
-        assert preds[head].shape[1:] == data[0].shape
+        assert preds[head].shape[1:] == data[0].shape