diff --git a/docker/ejsonschema/Dockerfile b/docker/ejsonschema/Dockerfile index c7ce8ca..d82179b 100644 --- a/docker/ejsonschema/Dockerfile +++ b/docker/ejsonschema/Dockerfile @@ -11,7 +11,7 @@ RUN update-alternatives --install /usr/lib/uwsgi/plugins/python3_plugin.so \ RUN python -m pip install "setuptools<66.0.0" RUN python -m pip install json-spec jsonschema==2.4.0 requests \ - pytest==4.6.5 filelock crossrefapi pyyaml + pytest==4.6.5 filelock crossrefapi pyyaml jsonpath_ng RUN python -m pip install --no-dependencies jsonmerge==1.3.0 WORKDIR /root diff --git a/python/nistoar/base/config.py b/python/nistoar/base/config.py index 3221e98..6a6f7be 100644 --- a/python/nistoar/base/config.py +++ b/python/nistoar/base/config.py @@ -7,6 +7,8 @@ from collections.abc import Mapping from urllib.parse import urlparse +import jsonpath_ng as jp + from . import OARException oar_home = None @@ -476,3 +478,26 @@ def lookup_config_server(serverport): """ raise NotImplementedError() +NO_VALUE=NotImplemented +RAISE=NO_VALUE +def hget_jp(obj: Mapping, path: str, default=None): + """ + return the first value from within a hierarchical dictionary (e.g. JSON or config structure) + that corresponds to a given location path. The location path is JSONPath-compliant string + (https://goessner.net/articles/JsonPath/). This function is intended for use with paths that + uniquely locate data--i.e. resolve to only one value. + :param dict obj: the dictionary to search for a matching value. + :param str path: a string indicating the location of the value to return. This should be + a JSONPath-compliant string (where the initial "$." is optional) + :raises KeyError: if default is not provide (i.e. is RAISE) and the path does not resolve to + an existing location. + """ + try: + return jp.parse(path).find(obj)[0].value + except IndexError: + if default is RAISE: + raise KeyError(path) + return default + +hget = hget_jp + diff --git a/python/nistoar/nerdm/utils.py b/python/nistoar/nerdm/utils.py index c387615..deeae27 100644 --- a/python/nistoar/nerdm/utils.py +++ b/python/nistoar/nerdm/utils.py @@ -2,6 +2,11 @@ Utility functions and classes for interrogating and manipulating NERDm metadata objects """ import re +import jsonpath_ng as jp +from collections.abc import Mapping, Sequence +from typing import Union, List + +from nistoar.base.config import hget META_PREFIXES = "_$" @@ -181,3 +186,82 @@ def cmp_versions(ver1, ver2): return 0 return +1 +_doc_properties = "title description asOntology notes comments valueDocumentation equivalentTo".split() +def declutter_schema(schema: Mapping, post2020: bool=False): + """ + remove documentation nodes from a JSON Schema object in situ + """ + for prop in _doc_properties: + if prop in schema: + del schema[prop] + + if "properties" in schema: + for prop in schema['properties']: + declutter_schema(schema['properties'][prop]) + + deftag = "definitions" if not post2020 else "$defs" + if deftag in schema: + for defname in schema[deftag]: + declutter_schema(schema[deftag][defname]) + + for seq in "allOf anyOf oneOf".split(): + if seq in schema: + for itm in schema[seq]: + declutter_schema(itm) + + +def unrequire_props_in(schema: Mapping, locations: Union[str, List[str]], post2020: bool=False): + """ + remove ``"required"`` fields at the specified locations from within the given JSON Schema. + + The provided locations should point to schema definitions within the given schema dictionary. + This function will remove the ``"required"`` property within the located schema (if it exists) + as well as any found within ``"allOf"``, ``"anyOf"``, or ``"oneOf"`` properties. + + :param dict schema: a dictionary representing a JSON Schema + :param str|list locations: slash-delimited paths to an internal schema that contains a required. + An example might be "definitions/Resource". An empty string indicates + the top-level processa + """ + if isinstance(locations, str): + locations = [ locations ] + + for loc in locations: + subsch = hget(schema, loc) + if subsch and isinstance(subsch, Mapping): + if "required" in subsch: + del subsch["required"] + for seq in "allOf anyOf oneOf".split(): + if seq in subsch and isinstance(subsch[seq], Sequence): + for itm in subsch[seq]: + unrequire_props_in(itm, "$", post2020) + + +def loosen_schema(schema: Mapping, directives: Mapping, opts=None): + """ + apply the given loosening directive to the given JSON Schema. The directives is a + dictionary describes what to do with the following properties (the directives) supported: + + ``derequire`` + a list of type definitions within the schema from which the required property + should be removed (via :py:func:`~nistoar.nerdm.utils.unrequire_props_in`). Each + type name listed will be assumed to be an item under the "definitions" node in the + schema this directive is applied to. + ``dedocument`` + a boolean indicating whether the documentation annotations should be removed from + the schema. If not set, the default is determined by opts.dedoc if opts is given or + True, otherwise. + + :param dict schema: the schema document as a JSON Schema schema dictionary + :param dict directives: the dictionary of directives to apply + :param opt: an options object (containing scripts command-line options) + """ + if directives.get("dedocument", True): + declutter_schema(schema) + + p2020 = directives.get("post2020") + deftag = "$defs" if p2020 else "definitions" + + dereqtps = [ deftag+'.'+t for t in directives.get("derequire", []) ] + unrequire_props_in(schema, dereqtps, p2020) + diff --git a/python/tests/nistoar/base/test_config.py b/python/tests/nistoar/base/test_config.py index 4fd4d39..86d12eb 100644 --- a/python/tests/nistoar/base/test_config.py +++ b/python/tests/nistoar/base/test_config.py @@ -1,10 +1,14 @@ import os, sys, pdb, shutil, logging, json, re, importlib import unittest as test +from pathlib import Path from nistoar.testing import * import nistoar.base.config as config -datadir = os.path.join(os.path.dirname(__file__), "data") +testdir = Path(__file__).resolve().parents[0] +datadir = str(testdir / "data") +basedir = testdir.parents[3] +schemadir = basedir / 'model' tmpd = None def setUpModule(): @@ -71,6 +75,29 @@ def test_merge_config(self): self.assertEqual(out['zub'], 'dub') self.assertEqual(out['tell'], {"a": 1}) + def test_hget_jp(self): + with open(schemadir/'nerdm-schema.json') as fd: + schema = json.load(fd) + + self.assertEqual(config.hget_jp(schema, "definitions.Resource.properties.title.title"), "Title") + self.assertEqual(config.hget_jp(schema, "definitions.ResourceReference.allOf[1].required"), ["title"]) + + self.assertIsNone(config.hget_jp(schema, "definitions.goober.title")) + self.assertEqual(config.hget_jp(schema, "definitions.goober.title", "Dr."), "Dr.") + with self.assertRaises(KeyError): + config.hget_jp(schema, "definitions.goober.title", config.RAISE) + + with self.assertRaises(KeyError): + config.hget_jp(schema, "definitions.ResourceReference.allOf[23].required", config.RAISE) + + # make sure results are not copies of the original + ressch = config.hget_jp(schema, "definitions.Resource") + self.assertIn("required", ressch) + del ressch['required'] + with self.assertRaises(KeyError): + config.hget_jp(schema, "definitions.Resource.required", config.RAISE) + + class TestLogConfig(test.TestCase): def resetLogfile(self): diff --git a/python/tests/nistoar/nerdm/test_utils.py b/python/tests/nistoar/nerdm/test_utils.py index 11fdab2..037aa52 100644 --- a/python/tests/nistoar/nerdm/test_utils.py +++ b/python/tests/nistoar/nerdm/test_utils.py @@ -1,9 +1,15 @@ import os, sys, pdb, shutil, logging, json import unittest as test +from pathlib import Path +from collections import OrderedDict from nistoar.nerdm import utils from nistoar.nerdm import constants as const +testdir = Path(__file__).resolve().parents[0] +basedir = testdir.parents[3] +schemadir = basedir / 'model' + class TestUtils(test.TestCase): def test_meta_prop_ch(self): @@ -100,6 +106,131 @@ def test_schema_version_cmp(self): self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "0.5"), 1) self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "2.5"), -1) self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "1.3"), 0) + + def test_declutter_schema(self): + with open(schemadir/'nerdm-schema.json') as fd: + schema = json.load(fd) + + self.assertTrue(utils.hget(schema, "title")) + self.assertTrue(utils.hget(schema, "description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.title")) + self.assertTrue(utils.hget(schema, "definitions.Resource.description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.notes")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology")) + + utils.declutter_schema(schema) + + self.assertFalse(utils.hget(schema, "title")) + self.assertFalse(utils.hget(schema, "description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.title")) + self.assertFalse(utils.hget(schema, "definitions.Resource.description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.notes")) + self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.title")) + self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.notes")) + self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.asOntology")) + + def test_declutter_schema_post2020(self): + with open(schemadir/'nerdm-schema.json') as fd: + schema = json.load(fd) + + self.assertTrue(utils.hget(schema, "title")) + self.assertTrue(utils.hget(schema, "description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.title")) + self.assertTrue(utils.hget(schema, "definitions.Resource.description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.notes")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology")) + + utils.declutter_schema(schema, True) + + # the file is not post-2020 compliant, so only the top level documentation will be found + self.assertFalse(utils.hget(schema, "title")) + self.assertFalse(utils.hget(schema, "description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.title")) + self.assertTrue(utils.hget(schema, "definitions.Resource.description")) + self.assertFalse(utils.hget(schema, "definitions.Resource.notes")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description")) + self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology")) + + def test_unrequire_props_in(self): + with open(schemadir/'nerdm-schema.json') as fd: + schema = json.load(fd) + + self.assertTrue(utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(utils.hget(schema, "definitions.ResourceReference.allOf[1].required")) + self.assertTrue(utils.hget(schema, "definitions.Topic.required")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + + utils.unrequire_props_in(schema, "definitions.Resource") + self.assertTrue(not utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(utils.hget(schema, "definitions.ResourceReference.allOf[1].required")) + self.assertTrue(utils.hget(schema, "definitions.Topic.required")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + + utils.unrequire_props_in(schema, ["definitions.ResourceReference"]) + self.assertTrue(not utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(not utils.hget(schema, "definitions.ResourceReference.allOf[1].required")) + self.assertTrue(utils.hget(schema, "definitions.Topic.required")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + + utils.unrequire_props_in(schema, ["definitions.Resource", + "definitions.Topic", + "goober", + "definitions.Organization"]) + self.assertTrue(not utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(not utils.hget(schema, "definitions.ResourceReference.allOf[1].required")) + self.assertTrue(not utils.hget(schema, "definitions.Topic.required")) + self.assertTrue(not utils.hget(schema, "definitions.Organization.required")) + + def test_loosen_schema(self): + with open(schemadir/"nerdm-schema.json") as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + + self.assertTrue(utils.hget(schema, "title")) + self.assertTrue(utils.hget(schema, "description")) + self.assertTrue(utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(utils.hget(schema, "definitions.Resource.description")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + self.assertTrue(utils.hget(schema, "definitions.Organization.description")) + + utils.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": True}) + + self.assertTrue(not utils.hget(schema, "title")) + self.assertTrue(not utils.hget(schema, "description")) + self.assertTrue(not utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(not utils.hget(schema, "definitions.Resource.description")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + self.assertTrue(not utils.hget(schema, "definitions.Organization.description")) + + def test_loosen_schema_no_dedoc(self): + with open(schemadir/"nerdm-schema.json") as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + + self.assertTrue(utils.hget(schema, "title")) + self.assertTrue(utils.hget(schema, "description")) + self.assertTrue(utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(utils.hget(schema, "definitions.Resource.description")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + self.assertTrue(utils.hget(schema, "definitions.Organization.description")) + + utils.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": False}) + + self.assertTrue(utils.hget(schema, "title")) + self.assertTrue(utils.hget(schema, "description")) + self.assertTrue(not utils.hget(schema, "definitions.Resource.required")) + self.assertTrue(utils.hget(schema, "definitions.Resource.description")) + self.assertTrue(utils.hget(schema, "definitions.Organization.required")) + self.assertTrue(utils.hget(schema, "definitions.Organization.description")) + + class TestVersion(test.TestCase): diff --git a/scripts/makedist.nerdmdocs b/scripts/makedist.nerdmdocs index 3c34540..49140e0 100755 --- a/scripts/makedist.nerdmdocs +++ b/scripts/makedist.nerdmdocs @@ -78,8 +78,8 @@ echo '+' PACKAGE_NAME=$PACKAGE_NAME echo '+' version=$version # build the components +# set -x installdir=$BUILD_DIR/docs -set -x mkdir -p $installdir # export schema files diff --git a/scripts/record_deps.py b/scripts/record_deps.py index 6bccde7..46b4e14 100755 --- a/scripts/record_deps.py +++ b/scripts/record_deps.py @@ -12,7 +12,7 @@ # The default package name (oar-sdp) can be over-ridden by the environment # variable PACKAGE_NAME # -import os, sys, json, re +import os, sys, json, re, traceback as tb from collections import OrderedDict prog = os.path.basename(sys.argv[0]) @@ -80,17 +80,23 @@ def ejschemadep(): def jmergedep(): import jsonmerge - eggre = re.compile(r'^jsonmerge-(.*)\.egg-info$') + eggre = re.compile(r'^jsonmerge-(.*)\.egg') modfile = jsonmerge.__file__ libdir = os.path.dirname(os.path.dirname(modfile)) vers="(unknown)" - try: - egginfo = [d for d in os.listdir(libdir) if eggre.match(d)] - if len(egginfo) > 0: - m = eggre.match(egginfo[0]) - vers = m.group(1) - except Exception as ex: - tb.print_exc() + m = eggre.match(os.path.basename(libdir)) + if m: + # zipped egg + vers = m.group(1) + else: + # it's the dist-packages dir; look for the egg-info file + try: + egginfo = [d for d in os.listdir(libdir) if eggre.match(d)] + if len(egginfo) > 0: + m = eggre.match(egginfo[0]) + vers = m.group(1) + except Exception as ex: + tb.print_exc() return OrderedDict([ ("name", "jsonmerge"), ("version", vers)