Feat integrate somas (#310)

* feat: load somas and detect merges * refactor: removed prints * feat: load soma_ids from swc * refactor: node sampling * refactor: update dependencies * refactor: dependencies --------- Co-authored-by: anna-grim <[email protected]>
AllenInstitute · Jan 31, 2025 · ccca477 · ccca477
1 parent 296f0fd
commit ccca477
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 96 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,13 +17,15 @@ readme = "README.md"
 dynamic = ["version"]
 
 dependencies = [
+    'aiobotocore==2.13.3',
     'boto3',
+    'botocore==1.34.162',
     'fastremap',
     'google-cloud-storage',
     'more_itertools',
     'networkx',
     'plotly',
-    's3fs',
+    's3fs==2024.12.0',
     'scikit-image',
     'scikit-learn',
     'scipy',

diff --git a/src/deep_neurographs/fragments_graph.py b/src/deep_neurographs/fragments_graph.py
@@ -101,7 +101,9 @@ def __init__(
         None
 
         """
+        # Call parent class
         super(FragmentsGraph, self).__init__()
+
         # Loaders
         self.graph_loader = gutil.GraphLoader(
             anisotropy=anisotropy,
@@ -190,7 +192,7 @@ def load_somas(self, somas_path, segmentation_path):
         for xyz_str in util.read_txt(somas_path):
             # Get segment id
             xyz = ast.literal_eval(xyz_str)
-            voxel = img_util.to_voxels(xyz, (0.748, 0.748, 1.0))
+            voxel = img_util.to_voxels(xyz, self.anisotropy)
             swc_id = str(img_reader.img[voxel].read().result())
 
             # Check for collision

diff --git a/src/deep_neurographs/utils/img_util.py b/src/deep_neurographs/utils/img_util.py
@@ -445,7 +445,34 @@ def to_voxels(xyz, anisotropy, multiscale=0):
     return tuple(voxel[::-1])
 
 
-# -- utils --
+# --- miscellaneous ---
+def find_img_path(bucket_name, root_dir, dataset_name):
+    """
+    Finds the path to an image in a GCS bucket for the dataset given by
+    "dataset_name".
+
+    Parameters:
+    ----------
+    bucket_name : str
+        Name of the GCS bucket where the images are stored.
+    root_dir : str
+        Path to the directory in the GCS bucket where the image is expected to
+        be located.
+    dataset_name : str
+        Name of the dataset to be searched for within the subdirectories.
+
+    Returns:
+    -------
+    str
+        Path of the found dataset subdirectory within the specified GCS bucket.
+
+    """
+    for subdir in util.list_gcs_subdirectories(bucket_name, root_dir):
+        if dataset_name in subdir:
+            return subdir + "whole-brain/fused.zarr/"
+    raise f"Dataset not found in {bucket_name} - {root_dir}"
+
+
 def get_minimal_bbox(voxels, buffer=0):
     """
     Gets the min and max coordinates of a bounding box that contains "voxels".
@@ -471,28 +498,49 @@ def get_minimal_bbox(voxels, buffer=0):
     return bbox
 
 
-def find_img_path(bucket_name, root_dir, dataset_name):
+def is_contained(bbox, voxel):
     """
-    Finds the path to an image in a GCS bucket for the dataset given by
-    "dataset_name".
+    Checks whether a given voxel is contained within the image bounding box
+    specified by "bbox".
 
-    Parameters:
+    Parameters
     ----------
-    bucket_name : str
-        Name of the GCS bucket where the images are stored.
-    root_dir : str
-        Path to the directory in the GCS bucket where the image is expected to
-        be located.
-    dataset_name : str
-        Name of the dataset to be searched for within the subdirectories.
+    bbox : dict
+        Dictionary with the keys "min" and "max" which specify a bounding box
+        in an image.
+    voxel : Tuple[int]
+        Voxel coordinate to be checked.
 
-    Returns:
+    Returns
     -------
-    str
-        Path of the found dataset subdirectory within the specified GCS bucket.
+    bool
+        Inidcation of whether "voxel" is contained within the given image
+        bounding box.
 
     """
-    for subdir in util.list_gcs_subdirectories(bucket_name, root_dir):
-        if dataset_name in subdir:
-            return subdir + "whole-brain/fused.zarr/"
-    raise f"Dataset not found in {bucket_name} - {root_dir}"
+    above = any([v >= bbox_max for v, bbox_max in zip(voxel, bbox["max"])])
+    below = any([v < bbox_min for v, bbox_min in zip(voxel, bbox["min"])])
+    return False if above or below else True
+
+
+def is_list_contained(bbox, voxels):
+    """
+    Checks whether a list of voxels is contained within a given image bounding
+    box.
+
+    Parameters
+    ----------
+    bbox : dict
+        Dictionary with the keys "min" and "max" which specify a bounding box
+        in an image.
+    voxels : List[Tuple[int]]
+        List of voxel coordinates to be checked.
+
+    Returns
+    -------
+    bool
+        Indication of whether every element in "voxels" is contained in
+        "bbox".
+
+    """
+    return all([is_contained(bbox, voxel) for voxel in voxels])
diff --git a/src/deep_neurographs/utils/util.py b/src/deep_neurographs/utils/util.py
@@ -258,20 +258,20 @@ def list_gcs_subdirectories(bucket_name, prefix):
     return subdirs
 
 
-# -- io utils --
+# --- io utils ---
 def read_json(path):
     """
-    Reads json file stored at "path".
+    Reads JSON file located at the given path.
 
     Parameters
     ----------
     path : str
-        Path where json file is stored.
+        Path to JSON file to be read.
 
     Returns
     -------
     dict
-        Contents of json file.
+        Contents of JSON file.
 
     """
     with open(path, "r") as f:
@@ -280,12 +280,12 @@ def read_json(path):
 
 def read_txt(path):
     """
-    Reads txt file stored at "path".
+    Reads txt file located at the given path.
 
     Parameters
     ----------
     path : str
-        Path where txt file is stored.
+        Path to txt file to be read.
 
     Returns
     -------
@@ -297,38 +297,21 @@ def read_txt(path):
         return f.read().splitlines()
 
 
-def read_metadata(path):
-    """
-    Parses metadata file to extract the "chunk_origin" and "chunk_shape".
-
-    Parameters
-    ----------
-    path : str
-        Path to metadata file to be read.
-
-    Returns
-    -------
-    list, list
-        Chunk origin and chunk shape specified by metadata.
-
-    """
-    metadata = read_json(path)
-    return metadata["chunk_origin"], metadata["chunk_shape"]
-
-
 def read_zip(zip_file, path):
     """
-    Reads the content of an swc file from a zip file.
+    Reads txt file located in a ZIP archive.
 
     Parameters
     ----------
     zip_file : ZipFile
-        Zip containing text file to be read.
+        ZIP archive containing txt file to be read.
+    path : str
+        Path to txt file within ZIP archive to be read.
 
     Returns
     -------
     str
-        Contents of a txt file.
+        Contents of txt file.
 
     """
     with zip_file.open(path) as f:
@@ -342,9 +325,9 @@ def write_json(path, contents):
     Parameters
     ----------
     path : str
-        Path that .txt file is written to.
+        Path that txt file is written to.
     contents : dict
-        Contents to be written to json file.
+        Contents to be written to JSON file.
 
     Returns
     -------
@@ -422,51 +405,6 @@ def get_avg_std(data, weights=None):
     return avg, math.sqrt(var)
 
 
-def is_contained(bbox, voxel):
-    """
-    Checks whether "voxel" is contained within "bbox".
-
-    Parameters
-    ----------
-    bbox : dict
-        Dictionary with the keys "min" and "max" which specify a bounding box
-        in an image.
-    voxel : ArrayLike
-        Voxel coordinate to be checked.
-
-    Returns
-    -------
-    bool
-        Inidcation of whether "voxel" is contained in "bbox".
-
-    """
-    above = any(voxel >= bbox["max"])
-    below = any(voxel < bbox["min"])
-    return False if above or below else True
-
-
-def is_list_contained(bbox, voxels):
-    """
-    Checks whether every element in "xyz_list" is contained in "bbox".
-
-    Parameters
-    ----------
-    bbox : dict
-        Dictionary with the keys "min" and "max" which specify a bounding box
-        in an image.
-    voxels
-        List of xyz coordinates to be checked.
-
-    Returns
-    -------
-    bool
-        Indication of whether every element in "voxels" is contained in
-        "bbox".
-
-    """
-    return all([is_contained(bbox, voxel) for voxel in voxels])
-
-
 def sample_once(my_container):
     """
     Samples a single element from "my_container".