Skip to content

Commit

Permalink
Merge pull request #444 from bioimage-io/partner_module
Browse files Browse the repository at this point in the history
create bioimageio.spec.partner
  • Loading branch information
FynnBe authored May 5, 2022
2 parents d8fbc94 + 2925df0 commit 73e7718
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 16 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,12 @@ As a dependency it is included in [bioimageio.core](https://github.com/bioimage-
| BIOIMAGEIO_CACHE_WARNINGS_LIMIT | "3" | Maximum number of warnings generated for simple cache hits. |
## Changelog
#### bioimageio.spec 0.5.4.post13
- new bioimageio.spec.partner module adding validate-partner-collection command if optional 'lxml' dependency is available
#### bioimageio.spec 0.5.4.post12
- new env var `BIOIMAGEIO_CACHE_WARNINGS_LIMIT` (default: 3) to avoid spam from cache hit warnings
- more robust conversion of ImportableSourceFile for absolute paths to relative paths (don't fail on non-path source file)
- new env var `BIOIMAGEIO_CACHE_WARNINGS_LIMIT` (default: 3) to avoid spam from cache hit warnings
- more robust conversion of ImportableSourceFile for absolute paths to relative paths (don't fail on non-path source file)
#### bioimageio.spec 0.5.4.post11
- resolve symlinks when transforming absolute to relative paths during serialization; see [#438](https://github.com/bioimage-io/spec-bioimage-io/pull/438)
Expand Down
2 changes: 1 addition & 1 deletion bioimageio/spec/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "0.4.5post12"
"version": "0.4.5post13"
}
58 changes: 55 additions & 3 deletions bioimageio/spec/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,24 @@

import typer

from bioimageio.spec import __version__, commands, model, rdf
from bioimageio.spec import __version__, collection, commands, model, rdf

try:
from bioimageio.spec.partner.utils import enrich_partial_rdf_with_imjoy_plugin
except ImportError:
enrich_partial_rdf_with_imjoy_plugin = None
partner_help = ""
else:
partner_help = f"\n+\nbioimageio.spec.partner {__version__}\nimplementing:\n\tpartner collection RDF {collection.format_version}"

help_version = (
f"bioimageio.spec {__version__}"
"\nimplementing:"
f"\n\tcollection RDF {rdf.format_version}"
f"\n\tcollection RDF {collection.format_version}"
f"\n\tgeneral RDF {rdf.format_version}"
f"\n\tmodel RDF {model.format_version}"
f"\n\tmodel RDF {model.format_version}" + partner_help
)

# prevent rewrapping with \b\n: https://click.palletsprojects.com/en/7.x/documentation/#preventing-rewrapping
app = typer.Typer(
help="\b\n" + help_version,
Expand Down Expand Up @@ -56,6 +65,49 @@ def validate(
validate.__doc__ = commands.validate.__doc__


if enrich_partial_rdf_with_imjoy_plugin is not None:

@app.command()
def validate_partner_collection(
rdf_source: str = typer.Argument(..., help="RDF source as relative file path or URI"),
update_format: bool = typer.Option(
False,
help="Update format version to the latest (might fail even if source adheres to an old format version). "
"To inform the format update the source may specify fields of future versions in "
"config:future:<future version>.", # todo: add future documentation
),
update_format_inner: bool = typer.Option(
None, help="For collection RDFs only. Defaults to value of 'update-format'."
),
verbose: bool = typer.Option(False, help="show traceback of unexpected (no ValidationError) exceptions"),
):
summary = commands.validate(
rdf_source, update_format, update_format_inner, enrich_partial_rdf=enrich_partial_rdf_with_imjoy_plugin
)
if summary["error"] is not None:
print(f"Error in {summary['name']}:")
pprint(summary["error"])
if verbose:
print("traceback:")
pprint(summary["traceback"])
ret_code = 1
else:
print(f"No validation errors for {summary['name']}")
ret_code = 0

if summary["warnings"]:
print(f"Validation Warnings for {summary['name']}:")
pprint(summary["warnings"])

sys.exit(ret_code)

validate_partner_collection.__doc__ = (
"A special version of the bioimageio validate command that enriches the RDFs defined in collections by parsing any "
"associated imjoy plugins. This is implemented using the 'enrich_partial_rdf' of the regular validate command:\n"
+ commands.validate.__doc__
)


@app.command()
def update_format(
rdf_source: str = typer.Argument(..., help="RDF source as relative file path or URI"),
Expand Down
39 changes: 31 additions & 8 deletions bioimageio/spec/collection/v0_2/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import pathlib
import warnings
from typing import List, Optional, Tuple, Union
from typing import Callable, List, Optional, Tuple, Union

from marshmallow import missing
from marshmallow.utils import _Missing
Expand All @@ -15,25 +15,46 @@ def filter_resource_description(raw_rd: raw_nodes.RDF) -> raw_nodes.RDF:


def resolve_collection_entries(
collection: raw_nodes.Collection, collection_id: Optional[str] = None, update_to_format: Optional[str] = None
collection: raw_nodes.Collection,
collection_id: Optional[str] = None,
update_to_format: Optional[str] = None,
enrich_partial_rdf: Callable[[dict], dict] = lambda p_rdf: p_rdf,
) -> List[Tuple[Optional[RawResourceDescription], Optional[str]]]:
"""
Args:
collection: collection node to resolve entries of
collection_id: (optional)ly overwrite collection.id
update_to_format: (optional) format version the resolved entries should be updated to
enrich_partial_rdf: (optional) callable to enrich the partial base rdf (inherited from collection) and the
partial entry rdf (only the fields specified in an entry of the collection.collection list of entries)
Returns:
A list of resolved entries consisting each of a resolved 'raw node' and error=None or 'raw node'=None
and an error message.
"""
from bioimageio.spec import serialize_raw_resource_description_to_dict, load_raw_resource_description

if collection.id is missing:
warnings.warn("Collection has no id; links may not be resolved.")

ret = []
seen_ids = set()

# rdf entries are based on collection RDF...
rdf_data_base = serialize_raw_resource_description_to_dict(collection)
assert missing not in rdf_data_base.values()
rdf_data_base.pop("collection") # ... without the collection field to avoid recursion

rdf_data_base = enrich_partial_rdf(rdf_data_base) # enrich the rdf base

root_id = rdf_data_base.pop("id", None) if collection_id is None else collection_id
for idx, entry in enumerate(collection.collection): # type: ignore
rdf_data = dict(rdf_data_base)

entry_error: Optional[str] = None
id_info = f"(id={entry.rdf_update['id']}) " if "id" in entry.rdf_update else ""

# rdf entries are based on collection RDF...
rdf_data = serialize_raw_resource_description_to_dict(collection)
assert missing not in rdf_data.values()
rdf_data.pop("collection") # ... without the collection field to avoid recursion

root_id = rdf_data.pop("id", None) if collection_id is None else collection_id
# update rdf entry with entry's rdf_source
sub_id: Union[str, _Missing] = missing
if entry.rdf_source is not missing:
Expand All @@ -50,10 +71,12 @@ def resolve_collection_entries(
source_entry_data = serialize_raw_resource_description_to_dict(source_entry_rd)
sub_id = source_entry_data.pop("id", missing)
assert missing not in source_entry_data.values()
source_entry_data = enrich_partial_rdf(source_entry_data) # enrich entry data
rdf_data.update(source_entry_data)

# update rdf entry with fields specified directly in the entry
rdf_update = schema.CollectionEntry().dump(entry)
rdf_update = enrich_partial_rdf(rdf_update) # enrich rdf update from entry
assert missing not in rdf_update.values()
sub_id = rdf_update.pop("id", sub_id)
if sub_id is missing:
Expand Down
7 changes: 5 additions & 2 deletions bioimageio/spec/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import traceback
import warnings
from pathlib import Path
from typing import Any, Dict, IO, List, Optional, Union
from typing import Any, Callable, Dict, IO, List, Optional, Union

from marshmallow import ValidationError

Expand Down Expand Up @@ -50,6 +50,7 @@ def validate(
update_format: bool = False,
update_format_inner: bool = None,
verbose: bool = "deprecated", # type: ignore
enrich_partial_rdf: Callable[[dict], dict] = lambda p_rdf: p_rdf,
) -> ValidationSummary:
"""Validate a BioImage.IO Resource Description File (RDF).
Expand All @@ -58,6 +59,8 @@ def validate(
update_format: weather or not to apply auto-conversion to the latest format version before validation
update_format_inner: (applicable to `collections` resources only) `update_format` for nested resources
verbose: deprecated
enrich_partial_rdf: (optional) callable to customize RDF data on the fly.
Don't use this if you don't know exactly what to do with it.
Returns:
A summary dict with keys:
Expand Down Expand Up @@ -116,7 +119,7 @@ def validate(

if raw_rd is not None and raw_rd.type == "collection":
assert hasattr(raw_rd, "collection")
for idx, (entry_rdf, entry_error) in enumerate(resolve_collection_entries(raw_rd)): # type: ignore
for idx, (entry_rdf, entry_error) in enumerate(resolve_collection_entries(raw_rd, enrich_partial_rdf=enrich_partial_rdf)): # type: ignore
if entry_error:
entry_summary: Union[Dict[str, str], ValidationSummary] = {"error": entry_error}
else:
Expand Down
3 changes: 3 additions & 0 deletions bioimageio/spec/partner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""bioimageio.spec extensions for BioImage.IO community partners"""
# todo: consider moving to its own module, e.g. bioimageio.partner
# todo: if not moving to its own module, add the dependency to a 'partner' extra in setup.py
169 changes: 169 additions & 0 deletions bioimageio/spec/partner/imjoy_plugin_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# type: ignore
"""ImJoy plugin parser module."""
import copy
import json
import uuid

import requests
from lxml import etree

from bioimageio.spec.shared import yaml

tag_types = ["config", "script", "link", "window", "style", "docs", "attachment"]

CONFIGURABLE_FIELDS = [
"env",
"requirements",
"dependencies",
"icon",
"ui",
"type",
"flags",
"labels",
"cover",
"base_frame",
"base_worker",
"passive",
]


class dotdict(dict): # pylint: disable=invalid-name
"""Access dictionary attributes with dot.notation."""

__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__

def __deepcopy__(self, memo=None):
"""Make a deep copy."""
return dotdict(copy.deepcopy(dict(self), memo=memo))


def parse_imjoy_plugin(source, overwrite_config=None):
"""Parse ImJoy plugin file and return a dict with all the fields."""
root = etree.HTML("<html>" + source + "</html>")
plugin_comp = dotdict()
for tag_type in tag_types:
elms = root.xpath(f".//{tag_type}")
values = []
for elm in elms:
values.append(
dotdict(
attrs=dotdict(elm.attrib),
content=elm.text,
)
)
plugin_comp[tag_type] = values
if plugin_comp.config[0].attrs.lang == "yaml":
config = yaml.load(plugin_comp.config[0].content)
elif plugin_comp.config[0].attrs.lang == "json":
config = json.loads(plugin_comp.config[0].content)
else:
raise Exception("Unsupported config language: " + plugin_comp.config[0].attrs.lang)

overwrite_config = overwrite_config or {}
config["tag"] = overwrite_config.get("tag") or (config.get("tags") and config.get("tags")[0])
config["hot_reloading"] = overwrite_config.get("hot_reloading")
config["scripts"] = []
# try to match the script with current tag
for elm in plugin_comp.script:
if elm.attrs.tag == config["tag"]:
config["script"] = elm.content
# exclude script with mismatched tag
if not elm.attrs.tag or elm.attrs.tag == config["tag"]:
config["scripts"].append(elm)
if not config.get("script") and len(plugin_comp.script) > 0:
config["script"] = plugin_comp.script[0].content
config["lang"] = plugin_comp.script[0].attrs.lang
config["links"] = plugin_comp.link or None
config["windows"] = plugin_comp.window or None
config["styles"] = plugin_comp.style or None
config["docs"] = plugin_comp.docs[0] if plugin_comp.docs else config.get("docs")
config["attachments"] = plugin_comp.attachment or None

config["_id"] = overwrite_config.get("_id") or config.get("name").replace(" ", "_")
config["uri"] = overwrite_config.get("uri")
config["origin"] = overwrite_config.get("origin")
config["namespace"] = overwrite_config.get("namespace")
config["code"] = source
config["id"] = config.get("name").strip().replace(" ", "_") + "_" + str(uuid.uuid4())
config["runnable"] = config.get("runnable", True)
config["requirements"] = config.get("requirements") or []

for field in CONFIGURABLE_FIELDS:
obj = config.get(field)
if obj and isinstance(obj, dict) and not isinstance(obj, list):
if config.get("tag"):
config[field] = obj.get(config.get("tag"))
if not obj.get(config.get("tag")):
print("WARNING: " + field + " do not contain a tag named: " + config.get("tag"))
else:
raise Exception("You must use 'tags' with configurable fields.")
config["lang"] = config.get("lang") or "javascript"
return config


def convert_config_to_rdf(plugin_config, source_url=None) -> dict:
"""Convert imjoy plugin config to RDF format."""
rdf = {
"type": "application",
}
if source_url:
rdf["source"] = source_url
fields = [
"icon",
"name",
"version",
"api_version",
"description",
"license",
"requirements",
"dependencies",
"env",
"passive",
"services",
]
for field in fields:
if field in plugin_config:
rdf[field] = plugin_config[field]
tags = plugin_config.get("labels", []) + plugin_config.get("flags", [])
if "bioengine" not in tags:
tags.append("bioengine")
rdf["tags"] = tags

docs = plugin_config.get("docs")
if isinstance(docs, dict):
rdf["documentation"] = docs.get("content")
elif isinstance(docs, str):
rdf["documentation"] = docs
rdf["covers"] = plugin_config.get("cover")
# make sure we have a list
if not rdf["covers"]:
rdf["covers"] = []
elif not isinstance(rdf["covers"], list):
rdf["covers"] = [rdf["covers"]]

rdf["badges"] = plugin_config.get("badge")
if not rdf["badges"]:
rdf["badges"] = []
elif not isinstance(rdf["badges"], list):
rdf["badges"] = [rdf["badges"]]

authors = plugin_config.get("author")
if authors:
if isinstance(authors, str):
authors = {"name": authors}
if not isinstance(authors, list):
authors = [authors]
rdf["authors"] = authors

return rdf


def get_plugin_as_rdf(source_url) -> dict:
"""Get imjoy plugin config in RDF format."""
req = requests.get(source_url)
source = req.text
plugin_config = parse_imjoy_plugin(source)
rdf = convert_config_to_rdf(plugin_config, source_url)
return rdf
Loading

0 comments on commit 73e7718

Please sign in to comment.