Merge pull request #444 from bioimage-io/partner_module

create bioimageio.spec.partner
bioimage-io · May 5, 2022 · 73e7718 · 73e7718
2 parents d8fbc94 + 2925df0
commit 73e7718
Show file tree

Hide file tree

Showing 8 changed files with 304 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -140,9 +140,12 @@ As a dependency it is included in [bioimageio.core](https://github.com/bioimage-
 | BIOIMAGEIO_CACHE_WARNINGS_LIMIT | "3" | Maximum number of warnings generated for simple cache hits. |
 
 ## Changelog
+#### bioimageio.spec 0.5.4.post13
+- new bioimageio.spec.partner module adding validate-partner-collection command if optional 'lxml' dependency is available
+
 #### bioimageio.spec 0.5.4.post12
- - new env var `BIOIMAGEIO_CACHE_WARNINGS_LIMIT` (default: 3) to avoid spam from cache hit warnings
- - more robust conversion of ImportableSourceFile for absolute paths to relative paths (don't fail on non-path source file)
+- new env var `BIOIMAGEIO_CACHE_WARNINGS_LIMIT` (default: 3) to avoid spam from cache hit warnings
+- more robust conversion of ImportableSourceFile for absolute paths to relative paths (don't fail on non-path source file)
  
 #### bioimageio.spec 0.5.4.post11
 - resolve symlinks when transforming absolute to relative paths during serialization; see [#438](https://github.com/bioimage-io/spec-bioimage-io/pull/438)

diff --git a/bioimageio/spec/VERSION b/bioimageio/spec/VERSION
@@ -1,3 +1,3 @@
 {
-    "version": "0.4.5post12"
+    "version": "0.4.5post13"
 }
diff --git a/bioimageio/spec/__main__.py b/bioimageio/spec/__main__.py
@@ -4,15 +4,24 @@
 
 import typer
 
-from bioimageio.spec import __version__, commands, model, rdf
+from bioimageio.spec import __version__, collection, commands, model, rdf
+
+try:
+    from bioimageio.spec.partner.utils import enrich_partial_rdf_with_imjoy_plugin
+except ImportError:
+    enrich_partial_rdf_with_imjoy_plugin = None
+    partner_help = ""
+else:
+    partner_help = f"\n+\nbioimageio.spec.partner {__version__}\nimplementing:\n\tpartner collection RDF {collection.format_version}"
 
 help_version = (
     f"bioimageio.spec {__version__}"
     "\nimplementing:"
-    f"\n\tcollection RDF {rdf.format_version}"
+    f"\n\tcollection RDF {collection.format_version}"
     f"\n\tgeneral RDF {rdf.format_version}"
-    f"\n\tmodel RDF {model.format_version}"
+    f"\n\tmodel RDF {model.format_version}" + partner_help
 )
+
 # prevent rewrapping with \b\n: https://click.palletsprojects.com/en/7.x/documentation/#preventing-rewrapping
 app = typer.Typer(
     help="\b\n" + help_version,
@@ -56,6 +65,49 @@ def validate(
 validate.__doc__ = commands.validate.__doc__
 
 
+if enrich_partial_rdf_with_imjoy_plugin is not None:
+
+    @app.command()
+    def validate_partner_collection(
+        rdf_source: str = typer.Argument(..., help="RDF source as relative file path or URI"),
+        update_format: bool = typer.Option(
+            False,
+            help="Update format version to the latest (might fail even if source adheres to an old format version). "
+            "To inform the format update the source may specify fields of future versions in "
+            "config:future:<future version>.",  # todo: add future documentation
+        ),
+        update_format_inner: bool = typer.Option(
+            None, help="For collection RDFs only. Defaults to value of 'update-format'."
+        ),
+        verbose: bool = typer.Option(False, help="show traceback of unexpected (no ValidationError) exceptions"),
+    ):
+        summary = commands.validate(
+            rdf_source, update_format, update_format_inner, enrich_partial_rdf=enrich_partial_rdf_with_imjoy_plugin
+        )
+        if summary["error"] is not None:
+            print(f"Error in {summary['name']}:")
+            pprint(summary["error"])
+            if verbose:
+                print("traceback:")
+                pprint(summary["traceback"])
+            ret_code = 1
+        else:
+            print(f"No validation errors for {summary['name']}")
+            ret_code = 0
+
+        if summary["warnings"]:
+            print(f"Validation Warnings for {summary['name']}:")
+            pprint(summary["warnings"])
+
+        sys.exit(ret_code)
+
+    validate_partner_collection.__doc__ = (
+        "A special version of the bioimageio validate command that enriches the RDFs defined in collections by parsing any "
+        "associated imjoy plugins. This is implemented using the 'enrich_partial_rdf' of the regular validate command:\n"
+        + commands.validate.__doc__
+    )
+
+
 @app.command()
 def update_format(
     rdf_source: str = typer.Argument(..., help="RDF source as relative file path or URI"),

diff --git a/bioimageio/spec/collection/v0_2/utils.py b/bioimageio/spec/collection/v0_2/utils.py
@@ -1,7 +1,7 @@
 import os
 import pathlib
 import warnings
-from typing import List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 
 from marshmallow import missing
 from marshmallow.utils import _Missing
@@ -15,25 +15,46 @@ def filter_resource_description(raw_rd: raw_nodes.RDF) -> raw_nodes.RDF:
 
 
 def resolve_collection_entries(
-    collection: raw_nodes.Collection, collection_id: Optional[str] = None, update_to_format: Optional[str] = None
+    collection: raw_nodes.Collection,
+    collection_id: Optional[str] = None,
+    update_to_format: Optional[str] = None,
+    enrich_partial_rdf: Callable[[dict], dict] = lambda p_rdf: p_rdf,
 ) -> List[Tuple[Optional[RawResourceDescription], Optional[str]]]:
+    """
+
+    Args:
+        collection: collection node to resolve entries of
+        collection_id: (optional)ly overwrite collection.id
+        update_to_format: (optional) format version the resolved entries should be updated to
+        enrich_partial_rdf: (optional) callable to enrich the partial base rdf (inherited from collection) and the
+            partial entry rdf (only the fields specified in an entry of the collection.collection list of entries)
+
+    Returns:
+        A list of resolved entries consisting each of a resolved 'raw node' and error=None or 'raw node'=None
+        and an error message.
+    """
     from bioimageio.spec import serialize_raw_resource_description_to_dict, load_raw_resource_description
 
     if collection.id is missing:
         warnings.warn("Collection has no id; links may not be resolved.")
 
     ret = []
     seen_ids = set()
+
+    # rdf entries are based on collection RDF...
+    rdf_data_base = serialize_raw_resource_description_to_dict(collection)
+    assert missing not in rdf_data_base.values()
+    rdf_data_base.pop("collection")  # ... without the collection field to avoid recursion
+
+    rdf_data_base = enrich_partial_rdf(rdf_data_base)  # enrich the rdf base
+
+    root_id = rdf_data_base.pop("id", None) if collection_id is None else collection_id
     for idx, entry in enumerate(collection.collection):  # type: ignore
+        rdf_data = dict(rdf_data_base)
+
         entry_error: Optional[str] = None
         id_info = f"(id={entry.rdf_update['id']}) " if "id" in entry.rdf_update else ""
 
-        # rdf entries are based on collection RDF...
-        rdf_data = serialize_raw_resource_description_to_dict(collection)
-        assert missing not in rdf_data.values()
-        rdf_data.pop("collection")  # ... without the collection field to avoid recursion
-
-        root_id = rdf_data.pop("id", None) if collection_id is None else collection_id
         # update rdf entry with entry's rdf_source
         sub_id: Union[str, _Missing] = missing
         if entry.rdf_source is not missing:
@@ -50,10 +71,12 @@ def resolve_collection_entries(
                 source_entry_data = serialize_raw_resource_description_to_dict(source_entry_rd)
                 sub_id = source_entry_data.pop("id", missing)
                 assert missing not in source_entry_data.values()
+                source_entry_data = enrich_partial_rdf(source_entry_data)  # enrich entry data
                 rdf_data.update(source_entry_data)
 
         # update rdf entry with fields specified directly in the entry
         rdf_update = schema.CollectionEntry().dump(entry)
+        rdf_update = enrich_partial_rdf(rdf_update)  # enrich rdf update from entry
         assert missing not in rdf_update.values()
         sub_id = rdf_update.pop("id", sub_id)
         if sub_id is missing:

diff --git a/bioimageio/spec/commands.py b/bioimageio/spec/commands.py
@@ -2,7 +2,7 @@
 import traceback
 import warnings
 from pathlib import Path
-from typing import Any, Dict, IO, List, Optional, Union
+from typing import Any, Callable, Dict, IO, List, Optional, Union
 
 from marshmallow import ValidationError
 
@@ -50,6 +50,7 @@ def validate(
     update_format: bool = False,
     update_format_inner: bool = None,
     verbose: bool = "deprecated",  # type: ignore
+    enrich_partial_rdf: Callable[[dict], dict] = lambda p_rdf: p_rdf,
 ) -> ValidationSummary:
     """Validate a BioImage.IO Resource Description File (RDF).
 
@@ -58,6 +59,8 @@ def validate(
         update_format: weather or not to apply auto-conversion to the latest format version before validation
         update_format_inner: (applicable to `collections` resources only) `update_format` for nested resources
         verbose: deprecated
+        enrich_partial_rdf: (optional) callable to customize RDF data on the fly.
+                            Don't use this if you don't know exactly what to do with it.
 
     Returns:
         A summary dict with keys:
@@ -116,7 +119,7 @@ def validate(
 
             if raw_rd is not None and raw_rd.type == "collection":
                 assert hasattr(raw_rd, "collection")
-                for idx, (entry_rdf, entry_error) in enumerate(resolve_collection_entries(raw_rd)):  # type: ignore
+                for idx, (entry_rdf, entry_error) in enumerate(resolve_collection_entries(raw_rd, enrich_partial_rdf=enrich_partial_rdf)):  # type: ignore
                     if entry_error:
                         entry_summary: Union[Dict[str, str], ValidationSummary] = {"error": entry_error}
                     else:

diff --git a/bioimageio/spec/partner/__init__.py b/bioimageio/spec/partner/__init__.py
@@ -0,0 +1,3 @@
+"""bioimageio.spec extensions for BioImage.IO community partners"""
+# todo: consider moving to its own module, e.g. bioimageio.partner
+# todo: if not moving to its own module, add the dependency to a 'partner' extra in setup.py
diff --git a/bioimageio/spec/partner/imjoy_plugin_parser.py b/bioimageio/spec/partner/imjoy_plugin_parser.py
@@ -0,0 +1,169 @@
+# type: ignore
+"""ImJoy plugin parser module."""
+import copy
+import json
+import uuid
+
+import requests
+from lxml import etree
+
+from bioimageio.spec.shared import yaml
+
+tag_types = ["config", "script", "link", "window", "style", "docs", "attachment"]
+
+CONFIGURABLE_FIELDS = [
+    "env",
+    "requirements",
+    "dependencies",
+    "icon",
+    "ui",
+    "type",
+    "flags",
+    "labels",
+    "cover",
+    "base_frame",
+    "base_worker",
+    "passive",
+]
+
+
+class dotdict(dict):  # pylint: disable=invalid-name
+    """Access dictionary attributes with dot.notation."""
+
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+    def __deepcopy__(self, memo=None):
+        """Make a deep copy."""
+        return dotdict(copy.deepcopy(dict(self), memo=memo))
+
+
+def parse_imjoy_plugin(source, overwrite_config=None):
+    """Parse ImJoy plugin file and return a dict with all the fields."""
+    root = etree.HTML("<html>" + source + "</html>")
+    plugin_comp = dotdict()
+    for tag_type in tag_types:
+        elms = root.xpath(f".//{tag_type}")
+        values = []
+        for elm in elms:
+            values.append(
+                dotdict(
+                    attrs=dotdict(elm.attrib),
+                    content=elm.text,
+                )
+            )
+        plugin_comp[tag_type] = values
+    if plugin_comp.config[0].attrs.lang == "yaml":
+        config = yaml.load(plugin_comp.config[0].content)
+    elif plugin_comp.config[0].attrs.lang == "json":
+        config = json.loads(plugin_comp.config[0].content)
+    else:
+        raise Exception("Unsupported config language: " + plugin_comp.config[0].attrs.lang)
+
+    overwrite_config = overwrite_config or {}
+    config["tag"] = overwrite_config.get("tag") or (config.get("tags") and config.get("tags")[0])
+    config["hot_reloading"] = overwrite_config.get("hot_reloading")
+    config["scripts"] = []
+    # try to match the script with current tag
+    for elm in plugin_comp.script:
+        if elm.attrs.tag == config["tag"]:
+            config["script"] = elm.content
+        # exclude script with mismatched tag
+        if not elm.attrs.tag or elm.attrs.tag == config["tag"]:
+            config["scripts"].append(elm)
+    if not config.get("script") and len(plugin_comp.script) > 0:
+        config["script"] = plugin_comp.script[0].content
+        config["lang"] = plugin_comp.script[0].attrs.lang
+    config["links"] = plugin_comp.link or None
+    config["windows"] = plugin_comp.window or None
+    config["styles"] = plugin_comp.style or None
+    config["docs"] = plugin_comp.docs[0] if plugin_comp.docs else config.get("docs")
+    config["attachments"] = plugin_comp.attachment or None
+
+    config["_id"] = overwrite_config.get("_id") or config.get("name").replace(" ", "_")
+    config["uri"] = overwrite_config.get("uri")
+    config["origin"] = overwrite_config.get("origin")
+    config["namespace"] = overwrite_config.get("namespace")
+    config["code"] = source
+    config["id"] = config.get("name").strip().replace(" ", "_") + "_" + str(uuid.uuid4())
+    config["runnable"] = config.get("runnable", True)
+    config["requirements"] = config.get("requirements") or []
+
+    for field in CONFIGURABLE_FIELDS:
+        obj = config.get(field)
+        if obj and isinstance(obj, dict) and not isinstance(obj, list):
+            if config.get("tag"):
+                config[field] = obj.get(config.get("tag"))
+                if not obj.get(config.get("tag")):
+                    print("WARNING: " + field + " do not contain a tag named: " + config.get("tag"))
+            else:
+                raise Exception("You must use 'tags' with configurable fields.")
+    config["lang"] = config.get("lang") or "javascript"
+    return config
+
+
+def convert_config_to_rdf(plugin_config, source_url=None) -> dict:
+    """Convert imjoy plugin config to RDF format."""
+    rdf = {
+        "type": "application",
+    }
+    if source_url:
+        rdf["source"] = source_url
+    fields = [
+        "icon",
+        "name",
+        "version",
+        "api_version",
+        "description",
+        "license",
+        "requirements",
+        "dependencies",
+        "env",
+        "passive",
+        "services",
+    ]
+    for field in fields:
+        if field in plugin_config:
+            rdf[field] = plugin_config[field]
+    tags = plugin_config.get("labels", []) + plugin_config.get("flags", [])
+    if "bioengine" not in tags:
+        tags.append("bioengine")
+    rdf["tags"] = tags
+
+    docs = plugin_config.get("docs")
+    if isinstance(docs, dict):
+        rdf["documentation"] = docs.get("content")
+    elif isinstance(docs, str):
+        rdf["documentation"] = docs
+    rdf["covers"] = plugin_config.get("cover")
+    # make sure we have a list
+    if not rdf["covers"]:
+        rdf["covers"] = []
+    elif not isinstance(rdf["covers"], list):
+        rdf["covers"] = [rdf["covers"]]
+
+    rdf["badges"] = plugin_config.get("badge")
+    if not rdf["badges"]:
+        rdf["badges"] = []
+    elif not isinstance(rdf["badges"], list):
+        rdf["badges"] = [rdf["badges"]]
+
+    authors = plugin_config.get("author")
+    if authors:
+        if isinstance(authors, str):
+            authors = {"name": authors}
+        if not isinstance(authors, list):
+            authors = [authors]
+        rdf["authors"] = authors
+
+    return rdf
+
+
+def get_plugin_as_rdf(source_url) -> dict:
+    """Get imjoy plugin config in RDF format."""
+    req = requests.get(source_url)
+    source = req.text
+    plugin_config = parse_imjoy_plugin(source)
+    rdf = convert_config_to_rdf(plugin_config, source_url)
+    return rdf