diff --git a/docs/src/webknossos-py/examples/dataset_usage.md b/docs/src/webknossos-py/examples/dataset_usage.md index deda8d369..2865da90c 100644 --- a/docs/src/webknossos-py/examples/dataset_usage.md +++ b/docs/src/webknossos-py/examples/dataset_usage.md @@ -14,3 +14,13 @@ which themselves can comprise multiple [magnifications represented via `MagView` webknossos/examples/dataset_usage.py --8<-- ``` + +## Parallel Access of WEBKNOSSOS Datasets + +Please consider these restrictions when accessing a WEBKNOSSOS dataset in a multiprocessing-context: + + - When writing shards in parallel, `json_update_allowed` should be set to `False` to disable the automatic update of the bounding box metadata. Otherwise, race conditions may happen. The user is responsible for updating the bounding box manually. + - When writing to chunks in shards, one chunk may only be written to by one actor at any time. + - When writing to compressed shards, one shard may only be written to by one actor at any time. + - For Zarr datasets, parallel write access to shards is not allowed at all. + - Reading in parallel without concurrent writes is fine. diff --git a/webknossos/Changelog.md b/webknossos/Changelog.md index 3bc6aa49e..204845e76 100644 --- a/webknossos/Changelog.md +++ b/webknossos/Changelog.md @@ -19,6 +19,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section ### Changed ### Fixed +- Fixed a bug where parallel access to the properties json leads to an JsonDecodeError in the webknossos CLI [#919](https://github.com/scalableminds/webknossos-libs/issues/919) ## [0.13.4](https://github.com/scalableminds/webknossos-libs/releases/tag/v0.13.4) - 2023-08-14 diff --git a/webknossos/webknossos/cli/convert.py b/webknossos/webknossos/cli/convert.py index 6cce2960a..33681cb3c 100644 --- a/webknossos/webknossos/cli/convert.py +++ b/webknossos/webknossos/cli/convert.py @@ -103,7 +103,7 @@ def main( source, target, voxel_size, - name=name, + name, data_format=data_format, executor=executor, compress=compress, diff --git a/webknossos/webknossos/cli/main.py b/webknossos/webknossos/cli/main.py index 8fabacda5..567116002 100644 --- a/webknossos/webknossos/cli/main.py +++ b/webknossos/webknossos/cli/main.py @@ -17,7 +17,7 @@ upsample, ) -app = typer.Typer(no_args_is_help=True) +app = typer.Typer(no_args_is_help=True, pretty_exceptions_short=False) app.command("check-equality")(check_equality.main) app.command("compress")(compress.main) diff --git a/webknossos/webknossos/dataset/_utils/buffered_slice_writer.py b/webknossos/webknossos/dataset/_utils/buffered_slice_writer.py index e32447be1..afc544990 100644 --- a/webknossos/webknossos/dataset/_utils/buffered_slice_writer.py +++ b/webknossos/webknossos/dataset/_utils/buffered_slice_writer.py @@ -34,6 +34,9 @@ def __init__( self, view: "View", offset: Optional[Vec3IntLike] = None, + # json_update_allowed enables the update of the bounding box and rewriting of the properties json. + # It should be False when parallel access is intended. + json_update_allowed: bool = True, # buffer_size specifies, how many slices should be aggregated until they are flushed. buffer_size: int = 32, dimension: int = 2, # z @@ -48,6 +51,7 @@ def __init__( self.buffer_size = buffer_size self.dtype = self.view.get_dtype() self.use_logging = use_logging + self.json_update_allowed = json_update_allowed if offset is None and relative_offset is None and absolute_offset is None: relative_offset = Vec3Int.zeros() if offset is not None: @@ -129,6 +133,7 @@ def _write_buffer(self) -> None: offset=buffer_start.add_or_none(self.offset), relative_offset=buffer_start_mag1.add_or_none(self.relative_offset), absolute_offset=buffer_start_mag1.add_or_none(self.absolute_offset), + json_update_allowed=self.json_update_allowed, ) except Exception as exc: diff --git a/webknossos/webknossos/dataset/_utils/pims_images.py b/webknossos/webknossos/dataset/_utils/pims_images.py index c0aadf1ad..41bda3455 100644 --- a/webknossos/webknossos/dataset/_utils/pims_images.py +++ b/webknossos/webknossos/dataset/_utils/pims_images.py @@ -481,6 +481,8 @@ def copy_to_view( ) -> Tuple[Tuple[int, int], Optional[int]]: """Copies the images according to the passed arguments to the given mag_view. args is expected to be the start and end of the z-range, meant for usage with an executor. + copy_to_view returns an iterable of image shapes and largest segment ids. When using this + method a manual update of the bounding box and the largest segment id might be necessary. """ z_start, z_end = args shapes = [] @@ -496,6 +498,9 @@ def copy_to_view( with mag_view.get_buffered_slice_writer( relative_offset=(0, 0, z_start * mag_view.mag.z), buffer_size=mag_view.info.chunk_shape.z, + # copy_to_view is typically used in a multiprocessing-context. Therefore the + # buffered slice writer should not update the json file to avoid race conditions. + json_update_allowed=False, ) as writer: for image_slice in images[z_start:z_end]: image_slice = np.array(image_slice) diff --git a/webknossos/webknossos/dataset/dataset.py b/webknossos/webknossos/dataset/dataset.py index e45f53bd8..bd0dfbc55 100644 --- a/webknossos/webknossos/dataset/dataset.py +++ b/webknossos/webknossos/dataset/dataset.py @@ -1249,7 +1249,7 @@ def add_layer_from_images( if pims_images.expected_shape != actual_size: warnings.warn( "[WARNING] Some images are larger than expected, smaller slices are padded with zeros now. " - + f"New size is {actual_size}, expected {pims_images.expected_shape}.", + + f"New size is {actual_size}, expected {pims_images.expected_shape}." ) if first_layer is None: first_layer = layer diff --git a/webknossos/webknossos/dataset/mag_view.py b/webknossos/webknossos/dataset/mag_view.py index c68fab69c..8cf65df03 100644 --- a/webknossos/webknossos/dataset/mag_view.py +++ b/webknossos/webknossos/dataset/mag_view.py @@ -145,6 +145,7 @@ def write( self, data: np.ndarray, offset: Optional[Vec3IntLike] = None, # deprecated, relative, in current mag + json_update_allowed: bool = True, *, relative_offset: Optional[Vec3IntLike] = None, # in mag1 absolute_offset: Optional[Vec3IntLike] = None, # in mag1 @@ -177,10 +178,14 @@ def write( # Only update the layer's bbox if we are actually larger # than the mag-aligned, rounded up bbox (self.bounding_box): - if not self.bounding_box.contains_bbox(mag1_bbox): + if json_update_allowed and not self.bounding_box.contains_bbox(mag1_bbox): self.layer.bounding_box = self.layer.bounding_box.extended_by(mag1_bbox) - super().write(data, absolute_offset=mag1_bbox.topleft) + super().write( + data, + absolute_offset=mag1_bbox.topleft, + json_update_allowed=json_update_allowed, + ) def read( self, diff --git a/webknossos/webknossos/dataset/view.py b/webknossos/webknossos/dataset/view.py index d7016fd07..9eefca36c 100644 --- a/webknossos/webknossos/dataset/view.py +++ b/webknossos/webknossos/dataset/view.py @@ -193,6 +193,7 @@ def write( self, data: np.ndarray, offset: Optional[Vec3IntLike] = None, # deprecated, relative, in current mag + json_update_allowed: bool = True, *, relative_offset: Optional[Vec3IntLike] = None, # in mag1 absolute_offset: Optional[Vec3IntLike] = None, # in mag1 @@ -264,9 +265,10 @@ def write( abs_mag1_offset=absolute_offset, current_mag_size=Vec3Int(data.shape[-3:]), ) - assert self.bounding_box.contains_bbox( - mag1_bbox - ), f"The bounding box to write {mag1_bbox} is larger than the view's bounding box {self.bounding_box}" + if json_update_allowed: + assert self.bounding_box.contains_bbox( + mag1_bbox + ), f"The bounding box to write {mag1_bbox} is larger than the view's bounding box {self.bounding_box}" if len(data.shape) == 4 and data.shape[0] == 1: data = data[0] # remove channel dimension for single-channel data @@ -654,6 +656,9 @@ def get_buffered_slice_writer( offset: Optional[Vec3IntLike] = None, buffer_size: int = 32, dimension: int = 2, # z + # json_update_allowed enables the update of the bounding box and rewriting of the properties json. + # It should be False when parallel access is intended. + json_update_allowed: bool = True, *, relative_offset: Optional[Vec3IntLike] = None, # in mag1 absolute_offset: Optional[Vec3IntLike] = None, # in mag1 @@ -695,6 +700,7 @@ def get_buffered_slice_writer( return BufferedSliceWriter( view=self, offset=offset, + json_update_allowed=json_update_allowed, buffer_size=buffer_size, dimension=dimension, relative_offset=relative_offset, diff --git a/webknossos/webknossos/utils.py b/webknossos/webknossos/utils.py index a9aab067d..f21feeffe 100644 --- a/webknossos/webknossos/utils.py +++ b/webknossos/webknossos/utils.py @@ -309,19 +309,19 @@ def __len__(self) -> int: class NDArrayLike(Protocol): def __getitem__(self, selection: Tuple[slice, ...]) -> np.ndarray: - pass + ... def __setitem__(self, selection: Tuple[slice, ...], value: np.ndarray) -> None: - pass + ... @property def shape(self) -> Tuple[int, ...]: - pass + ... @property def ndim(self) -> int: - pass + ... @property def dtype(self) -> np.dtype: - pass + ...