Skip to content

Commit

Permalink
Merge pull request #812 from fractal-analytics-platform/cellvoyger_co…
Browse files Browse the repository at this point in the history
…nverter_improvements

Cellvoyager converter improvements
  • Loading branch information
jluethi authored Aug 13, 2024
2 parents 82f002a + 265194f commit 4f38279
Show file tree
Hide file tree
Showing 15 changed files with 677 additions and 201 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
**Note**: Numbers like (\#123) point to closed Pull Requests on the fractal-tasks-core repository.

# Unreleased
* Tasks:
* `image_glob_patterns` are renamed to `include_glob_patterns` in Convert Cellvoyager to OME-Zarr (regular & multiplexing) (\#812).
* Convert Cellvoyager to OME-Zarr (regular & multiplexing) gain exclusion patterns to exclude specific patterns of images from being processed (\#812).
* Fix issue with arbitrary acquisition names in Convert Cellvoyager Multiplexing to OME-Zarr (\#812).
* In Convert Cellvoyager to OME-Zarr (regular & multiplexing), handle channels in the mrf metadata file that aren't present in the mlf metadata better (\#812).
* In Convert Cellvoyager to OME-Zarr, improve plate metadata for image list when multiple plates with the same plate name are processed (\#812).
* Catch errors for missing mlf & mrf files better in Convert Cellvoyager to OME-Zarr (regular & multiplexing) (\#812).
* Drop defusexml dependency for cellvoyager metadata conversion (\#812).

# 1.2.1
* Core-library
* Add `create_roi_table_from_df_list` library function in `fractal_tasks_core.v1.roi`: It combines a list of ROI table dataframes into an AnnData ROI table and handles repeating labels (\#811).
Expand Down
50 changes: 40 additions & 10 deletions fractal_tasks_core/__FRACTAL_MANIFEST__.json
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,21 @@
"type": "array",
"description": "A list of `OmeroChannel` s, where each channel must include the `wavelength_id` attribute and where the `wavelength_id` values must be unique across the list."
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Include Glob Patterns",
"type": "array",
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59."
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59. Can interact with exclude_glob_patterns: All included images - all excluded images gives the final list of images to process"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Exclude Glob Patterns",
"type": "array",
"description": "If specified, exclude any image where the filename matches any of the exclusion patterns. Patterns are specified the same as for include_glob_patterns."
},
"num_levels": {
"default": 5,
Expand Down Expand Up @@ -203,11 +211,18 @@
"title": "Image Extension",
"type": "string"
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Include Glob Patterns",
"type": "array"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Exclude Glob Patterns",
"type": "array"
},
"acquisition": {
Expand Down Expand Up @@ -398,13 +413,21 @@
"type": "object",
"description": "dictionary of acquisitions. Each key is the acquisition identifier (normally 0, 1, 2, 3 etc.). Each item defines the acquisition by providing the image_dir and the allowed_channels."
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Include Glob Patterns",
"type": "array",
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59."
"description": "If specified, only parse images with filenames that match with all these patterns. Patterns must be defined as in https://docs.python.org/3/library/fnmatch.html, Example: `image_glob_pattern=[\"*_B03_*\"]` => only process well B03 `image_glob_pattern=[\"*_C09_*\", \"*F016*\", \"*Z[0-5][0-9]C*\"]` => only process well C09, field of view 16 and Z planes 0-59. Can interact with exclude_glob_patterns: All included images - all excluded images gives the final list of images to process"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Exclude Glob Patterns",
"type": "array",
"description": "If specified, exclude any image where the filename matches any of the exclusion patterns. Patterns are specified the same as for include_glob_patterns."
},
"num_levels": {
"default": 5,
Expand Down Expand Up @@ -468,11 +491,18 @@
"title": "Image Extension",
"type": "string"
},
"image_glob_patterns": {
"include_glob_patterns": {
"items": {
"type": "string"
},
"title": "Include Glob Patterns",
"type": "array"
},
"exclude_glob_patterns": {
"items": {
"type": "string"
},
"title": "Image Glob Patterns",
"title": "Exclude Glob Patterns",
"type": "array"
},
"acquisition": {
Expand Down
39 changes: 29 additions & 10 deletions fractal_tasks_core/cellvoyager/filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,42 +22,61 @@
def glob_with_multiple_patterns(
*,
folder: str,
patterns: Sequence[str] = None,
include_patterns: Sequence[str] = None,
exclude_patterns: Sequence[str] = None,
) -> set[str]:
"""
List all the items (files and folders) in a given folder that
simultaneously match a series of glob patterns.
simultaneously match a series of glob include_patterns and do not match
any of the exclude_patterns.
Args:
folder: Base folder where items will be searched.
patterns: If specified, the list of patterns (defined as in
include_patterns: If specified, the list of patterns (defined as in
https://docs.python.org/3/library/fnmatch.html) that item
names will match with.
"""

# Sanitize base-folder path
if folder.endswith("/"):
actual_folder = folder[:-1]
else:
actual_folder = folder[:]

# If not pattern is specified, look for *all* items in the base folder
if not patterns:
patterns = ["*"]
if not include_patterns:
include_patterns = ["*"]
if not exclude_patterns:
exclude_patterns = []

# Combine multiple glob searches (via set intersection)
logging.info(f"[glob_with_multiple_patterns] {patterns=}")
logging.info(f"[glob_with_multiple_patterns] {include_patterns=}")
items = None
for pattern in patterns:
for pattern in include_patterns:
new_matches = glob(f"{actual_folder}/{pattern}")
if items is None:
items = set(new_matches)
else:
items = items.intersection(new_matches)
items = items or set()
logging.info(f"[glob_with_multiple_patterns] Found {len(items)} items")

return items
# Combine all exclude patterns
exclude_items = set()
for pattern in exclude_patterns:
new_matches = glob(f"{actual_folder}/{pattern}")
if len(exclude_items) == 0:
exclude_items = set(new_matches)
else:
exclude_items.update(new_matches)
exclude_items = exclude_items or set()

# Remove exclude_items from included list
consensus_items = items - exclude_items

logging.info(
f"[glob_with_multiple_patterns] Found {len(consensus_items)} items"
)

return consensus_items


def _get_plate_name(plate_prefix: str) -> str:
Expand Down
Loading

0 comments on commit 4f38279

Please sign in to comment.