Skip to content

Commit

Permalink
Merge pull request #67 from bioio-devs/feature/support-public-s3-file…
Browse files Browse the repository at this point in the history
…s-automatically

Support public s3 files automatically
  • Loading branch information
SeanLeRoy authored Aug 15, 2024
2 parents 3d16713 + 3f0e57d commit 3c6097a
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 49 deletions.
85 changes: 60 additions & 25 deletions bioio/bio_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,35 @@ def determine_plugin(
),
)

@staticmethod
def _get_reader(
image: biob.types.ImageLike,
reader: biob.reader.Reader,
use_plugin_cache: bool,
fs_kwargs: Dict[str, Any],
**kwargs: Any,
) -> Tuple[biob.reader.Reader, Optional[PluginEntry]]:
"""
Initializes and returns the reader (and plugin if relevant) for the provided
image based on provided args and/or the available bioio supported plugins
"""
if reader is not None:
# Check specific reader image types in a situation where a specified reader
# only supports some of the ImageLike types.
if not check_type(image, reader):
raise biob.exceptions.UnsupportedFileFormatError(
reader.__name__, str(type(image))
)

return reader(image, fs_kwargs=fs_kwargs, **kwargs), None

# Determine reader class based on available plugins
plugin = BioImage.determine_plugin(
image, fs_kwargs=fs_kwargs, use_plugin_cache=use_plugin_cache, **kwargs
)
ReaderClass = plugin.metadata.get_reader()
return ReaderClass(image, fs_kwargs=fs_kwargs, **kwargs), plugin

def __init__(
self,
image: biob.types.ImageLike,
Expand All @@ -264,25 +293,28 @@ def __init__(
fs_kwargs: Dict[str, Any] = {},
**kwargs: Any,
):
if reader is None:
# Determine reader class and create dask delayed array
self._plugin = BioImage.determine_plugin(
image, fs_kwargs=fs_kwargs, use_plugin_cache=use_plugin_cache, **kwargs
try:
self._reader, self._plugin = self._get_reader(
image,
reader,
use_plugin_cache,
fs_kwargs,
**kwargs,
)
except biob.exceptions.UnsupportedFileFormatError:
# When reading from S3 if we failed trying to read
# try reading as an anonymous user otherwise re-raise
# the error
if not str(image).startswith("s3://"):
raise

self._reader, self._plugin = self._get_reader(
image,
reader,
use_plugin_cache,
fs_kwargs | {"anon": True},
**kwargs,
)
ReaderClass = self._plugin.metadata.get_reader()
else:
# check specific reader image types in a situation where a specified reader
# only supports some of the ImageLike types.
if not check_type(image, reader):
raise biob.exceptions.UnsupportedFileFormatError(
reader.__name__, str(type(image))
)

# connect submitted reader
ReaderClass = reader

# Init and store reader
self._reader = ReaderClass(image, fs_kwargs=fs_kwargs, **kwargs)

# Store delayed modifiers
self._reconstruct_mosaic = reconstruct_mosaic
Expand Down Expand Up @@ -1097,13 +1129,16 @@ def save(
)

def __str__(self) -> str:
return (
f"<BioImage ["
f"plugin: {self._plugin.entrypoint.name} installed "
f"at {datetime.datetime.fromtimestamp(self._plugin.timestamp)}, "
f"Image-is-in-Memory: {self._xarray_data is not None}"
f"]>"
)
if self._plugin is not None:
return (
f"<BioImage ["
f"plugin: {self._plugin.entrypoint.name} installed "
f"at {datetime.datetime.fromtimestamp(self._plugin.timestamp)}, "
f"Image-is-in-Memory: {self._xarray_data is not None}"
f"]>"
)

return f"<BioImage [Image-is-in-Memory: {self._xarray_data is not None}]>"

def __repr__(self) -> str:
return str(self)
Expand Down
12 changes: 10 additions & 2 deletions bioio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@


@pytest.fixture
def sample_text_file(tmp_path: pathlib.Path) -> pathlib.Path:
def sample_text_file(
tmp_path: pathlib.Path,
) -> typing.Generator[pathlib.Path, None, None]:
example_file = tmp_path / "temp-example.txt"
example_file.write_text("just some example text here")
return example_file
yield example_file


def np_random_from_shape(
Expand Down Expand Up @@ -90,3 +92,9 @@ def __exit__(
subprocess.check_call(
[sys.executable, "-m", "pip", "uninstall", "-y", self.package_name]
)


@pytest.fixture
def dummy_plugin() -> typing.Generator[str, None, None]:
with InstallPackage(package_path=DUMMY_PLUGIN_PATH, package_name=DUMMY_PLUGIN_NAME):
yield DUMMY_PLUGIN_NAME
26 changes: 26 additions & 0 deletions bioio/tests/test_bio_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,29 @@ def test_bioimage_submission_data_reader_type_alignment(
) -> None:
with pytest.raises(expected_exception):
BioImage(image, reader=reader_class)


def test_bioimage_attempts_s3_read_with_anon_attr(
sample_text_file: pathlib.Path, dummy_plugin: str
) -> None:
# Arrange
from dummy_plugin import Reader as DummyReader

err_msg = "anon is not True"

class FakeReader(DummyReader):
def __init__(self, _: ImageLike, fs_kwargs: dict = {}):
if fs_kwargs.get("anon", False) is not True:
raise biob.exceptions.UnsupportedFileFormatError(
"test", "test", err_msg
)

# Act / Assert
BioImage("s3://this/could/go/anywhere.ome.zarr", reader=FakeReader)

# (sanity-check) Make sure it would have failed if not an S3 URI
# that gets given the anon attribute
with pytest.raises(biob.exceptions.UnsupportedFileFormatError) as err:
BioImage(sample_text_file, reader=FakeReader)

assert err_msg in str(err.value)
43 changes: 21 additions & 22 deletions bioio/tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,32 @@
import bioio

from ..plugins import dump_plugins
from .conftest import DUMMY_PLUGIN_NAME, DUMMY_PLUGIN_PATH, InstallPackage


def test_dump_plugins() -> None:
with InstallPackage(package_path=DUMMY_PLUGIN_PATH, package_name=DUMMY_PLUGIN_NAME):
# Capture the output of dump_plugins
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
dump_plugins()
output = sys.stdout.getvalue()
finally:
sys.stdout = old_stdout
def test_dump_plugins(dummy_plugin: str) -> None:
# Capture the output of dump_plugins
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
dump_plugins()
output = sys.stdout.getvalue()
finally:
sys.stdout = old_stdout

# Check if package name is in the output
assert DUMMY_PLUGIN_NAME in output
# Check if package name is in the output
assert dummy_plugin in output


def test_plugin_feasibility_report() -> None:
def test_plugin_feasibility_report(dummy_plugin: str) -> None:
# Arrange
test_image = np.random.rand(10, 10)
expected_error_msg = "missing 1 required positional argument: 'path'"
with InstallPackage(package_path=DUMMY_PLUGIN_PATH, package_name=DUMMY_PLUGIN_NAME):
# Act
actual_output = bioio.plugin_feasibility_report(test_image)
# Assert
assert actual_output["ArrayLike"].supported is True
assert actual_output["ArrayLike"].error is None
assert actual_output["dummy-plugin"].supported is False
assert expected_error_msg in (actual_output["dummy-plugin"].error or "")

# Act
actual_output = bioio.plugin_feasibility_report(test_image)

# Assert
assert actual_output["ArrayLike"].supported is True
assert actual_output["ArrayLike"].error is None
assert actual_output[dummy_plugin].supported is False
assert expected_error_msg in (actual_output[dummy_plugin].error or "")

0 comments on commit 3c6097a

Please sign in to comment.