Skip to content

Commit

Permalink
Merge pull request #61 from usnistgov/feature/schema-tweaker
Browse files Browse the repository at this point in the history
nerdm.utils: add functions stripping features from JSON Schemas
  • Loading branch information
RayPlante authored Nov 30, 2023
2 parents 52de99a + 8ef86a5 commit d96af33
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 12 deletions.
2 changes: 1 addition & 1 deletion docker/ejsonschema/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ RUN update-alternatives --install /usr/lib/uwsgi/plugins/python3_plugin.so \

RUN python -m pip install "setuptools<66.0.0"
RUN python -m pip install json-spec jsonschema==2.4.0 requests \
pytest==4.6.5 filelock crossrefapi pyyaml
pytest==4.6.5 filelock crossrefapi pyyaml jsonpath_ng
RUN python -m pip install --no-dependencies jsonmerge==1.3.0

WORKDIR /root
Expand Down
25 changes: 25 additions & 0 deletions python/nistoar/base/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from collections.abc import Mapping
from urllib.parse import urlparse

import jsonpath_ng as jp

from . import OARException

oar_home = None
Expand Down Expand Up @@ -476,3 +478,26 @@ def lookup_config_server(serverport):
"""
raise NotImplementedError()

NO_VALUE=NotImplemented
RAISE=NO_VALUE
def hget_jp(obj: Mapping, path: str, default=None):
"""
return the first value from within a hierarchical dictionary (e.g. JSON or config structure)
that corresponds to a given location path. The location path is JSONPath-compliant string
(https://goessner.net/articles/JsonPath/). This function is intended for use with paths that
uniquely locate data--i.e. resolve to only one value.
:param dict obj: the dictionary to search for a matching value.
:param str path: a string indicating the location of the value to return. This should be
a JSONPath-compliant string (where the initial "$." is optional)
:raises KeyError: if default is not provide (i.e. is RAISE) and the path does not resolve to
an existing location.
"""
try:
return jp.parse(path).find(obj)[0].value
except IndexError:
if default is RAISE:
raise KeyError(path)
return default

hget = hget_jp

84 changes: 84 additions & 0 deletions python/nistoar/nerdm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Utility functions and classes for interrogating and manipulating NERDm metadata objects
"""
import re
import jsonpath_ng as jp
from collections.abc import Mapping, Sequence
from typing import Union, List

from nistoar.base.config import hget

META_PREFIXES = "_$"

Expand Down Expand Up @@ -181,3 +186,82 @@ def cmp_versions(ver1, ver2):
return 0
return +1

_doc_properties = "title description asOntology notes comments valueDocumentation equivalentTo".split()
def declutter_schema(schema: Mapping, post2020: bool=False):
"""
remove documentation nodes from a JSON Schema object in situ
"""
for prop in _doc_properties:
if prop in schema:
del schema[prop]

if "properties" in schema:
for prop in schema['properties']:
declutter_schema(schema['properties'][prop])

deftag = "definitions" if not post2020 else "$defs"
if deftag in schema:
for defname in schema[deftag]:
declutter_schema(schema[deftag][defname])

for seq in "allOf anyOf oneOf".split():
if seq in schema:
for itm in schema[seq]:
declutter_schema(itm)


def unrequire_props_in(schema: Mapping, locations: Union[str, List[str]], post2020: bool=False):
"""
remove ``"required"`` fields at the specified locations from within the given JSON Schema.
The provided locations should point to schema definitions within the given schema dictionary.
This function will remove the ``"required"`` property within the located schema (if it exists)
as well as any found within ``"allOf"``, ``"anyOf"``, or ``"oneOf"`` properties.
:param dict schema: a dictionary representing a JSON Schema
:param str|list locations: slash-delimited paths to an internal schema that contains a required.
An example might be "definitions/Resource". An empty string indicates
the top-level processa
"""
if isinstance(locations, str):
locations = [ locations ]

for loc in locations:
subsch = hget(schema, loc)
if subsch and isinstance(subsch, Mapping):
if "required" in subsch:
del subsch["required"]
for seq in "allOf anyOf oneOf".split():
if seq in subsch and isinstance(subsch[seq], Sequence):
for itm in subsch[seq]:
unrequire_props_in(itm, "$", post2020)


def loosen_schema(schema: Mapping, directives: Mapping, opts=None):
"""
apply the given loosening directive to the given JSON Schema. The directives is a
dictionary describes what to do with the following properties (the directives) supported:
``derequire``
a list of type definitions within the schema from which the required property
should be removed (via :py:func:`~nistoar.nerdm.utils.unrequire_props_in`). Each
type name listed will be assumed to be an item under the "definitions" node in the
schema this directive is applied to.
``dedocument``
a boolean indicating whether the documentation annotations should be removed from
the schema. If not set, the default is determined by opts.dedoc if opts is given or
True, otherwise.
:param dict schema: the schema document as a JSON Schema schema dictionary
:param dict directives: the dictionary of directives to apply
:param opt: an options object (containing scripts command-line options)
"""
if directives.get("dedocument", True):
declutter_schema(schema)

p2020 = directives.get("post2020")
deftag = "$defs" if p2020 else "definitions"

dereqtps = [ deftag+'.'+t for t in directives.get("derequire", []) ]
unrequire_props_in(schema, dereqtps, p2020)

29 changes: 28 additions & 1 deletion python/tests/nistoar/base/test_config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os, sys, pdb, shutil, logging, json, re, importlib
import unittest as test
from pathlib import Path
from nistoar.testing import *

import nistoar.base.config as config

datadir = os.path.join(os.path.dirname(__file__), "data")
testdir = Path(__file__).resolve().parents[0]
datadir = str(testdir / "data")
basedir = testdir.parents[3]
schemadir = basedir / 'model'
tmpd = None

def setUpModule():
Expand Down Expand Up @@ -71,6 +75,29 @@ def test_merge_config(self):
self.assertEqual(out['zub'], 'dub')
self.assertEqual(out['tell'], {"a": 1})

def test_hget_jp(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertEqual(config.hget_jp(schema, "definitions.Resource.properties.title.title"), "Title")
self.assertEqual(config.hget_jp(schema, "definitions.ResourceReference.allOf[1].required"), ["title"])

self.assertIsNone(config.hget_jp(schema, "definitions.goober.title"))
self.assertEqual(config.hget_jp(schema, "definitions.goober.title", "Dr."), "Dr.")
with self.assertRaises(KeyError):
config.hget_jp(schema, "definitions.goober.title", config.RAISE)

with self.assertRaises(KeyError):
config.hget_jp(schema, "definitions.ResourceReference.allOf[23].required", config.RAISE)

# make sure results are not copies of the original
ressch = config.hget_jp(schema, "definitions.Resource")
self.assertIn("required", ressch)
del ressch['required']
with self.assertRaises(KeyError):
config.hget_jp(schema, "definitions.Resource.required", config.RAISE)


class TestLogConfig(test.TestCase):

def resetLogfile(self):
Expand Down
131 changes: 131 additions & 0 deletions python/tests/nistoar/nerdm/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import os, sys, pdb, shutil, logging, json
import unittest as test
from pathlib import Path
from collections import OrderedDict

from nistoar.nerdm import utils
from nistoar.nerdm import constants as const

testdir = Path(__file__).resolve().parents[0]
basedir = testdir.parents[3]
schemadir = basedir / 'model'

class TestUtils(test.TestCase):

def test_meta_prop_ch(self):
Expand Down Expand Up @@ -100,6 +106,131 @@ def test_schema_version_cmp(self):
self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "0.5"), 1)
self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "2.5"), -1)
self.assertEqual(utils.cmp_versions(utils.get_nerdm_schema_version(data), "1.3"), 0)

def test_declutter_schema(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

utils.declutter_schema(schema)

self.assertFalse(utils.hget(schema, "title"))
self.assertFalse(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertFalse(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

def test_declutter_schema_post2020(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

utils.declutter_schema(schema, True)

# the file is not post-2020 compliant, so only the top level documentation will be found
self.assertFalse(utils.hget(schema, "title"))
self.assertFalse(utils.hget(schema, "description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertFalse(utils.hget(schema, "definitions.Resource.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.title"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.notes"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.properties.title.asOntology"))

def test_unrequire_props_in(self):
with open(schemadir/'nerdm-schema.json') as fd:
schema = json.load(fd)

self.assertTrue(utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))

utils.unrequire_props_in(schema, "definitions.Resource")
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))

utils.unrequire_props_in(schema, ["definitions.ResourceReference"])
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(not utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))

utils.unrequire_props_in(schema, ["definitions.Resource",
"definitions.Topic",
"goober",
"definitions.Organization"])
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(not utils.hget(schema, "definitions.ResourceReference.allOf[1].required"))
self.assertTrue(not utils.hget(schema, "definitions.Topic.required"))
self.assertTrue(not utils.hget(schema, "definitions.Organization.required"))

def test_loosen_schema(self):
with open(schemadir/"nerdm-schema.json") as fd:
schema = json.load(fd, object_pairs_hook=OrderedDict)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.description"))

utils.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": True})

self.assertTrue(not utils.hget(schema, "title"))
self.assertTrue(not utils.hget(schema, "description"))
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(not utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(not utils.hget(schema, "definitions.Organization.description"))

def test_loosen_schema_no_dedoc(self):
with open(schemadir/"nerdm-schema.json") as fd:
schema = json.load(fd, object_pairs_hook=OrderedDict)

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertTrue(utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.description"))

utils.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": False})

self.assertTrue(utils.hget(schema, "title"))
self.assertTrue(utils.hget(schema, "description"))
self.assertTrue(not utils.hget(schema, "definitions.Resource.required"))
self.assertTrue(utils.hget(schema, "definitions.Resource.description"))
self.assertTrue(utils.hget(schema, "definitions.Organization.required"))
self.assertTrue(utils.hget(schema, "definitions.Organization.description"))



class TestVersion(test.TestCase):

Expand Down
2 changes: 1 addition & 1 deletion scripts/makedist.nerdmdocs
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ echo '+' PACKAGE_NAME=$PACKAGE_NAME
echo '+' version=$version

# build the components
# set -x
installdir=$BUILD_DIR/docs
set -x
mkdir -p $installdir

# export schema files
Expand Down
24 changes: 15 additions & 9 deletions scripts/record_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# The default package name (oar-sdp) can be over-ridden by the environment
# variable PACKAGE_NAME
#
import os, sys, json, re
import os, sys, json, re, traceback as tb
from collections import OrderedDict

prog = os.path.basename(sys.argv[0])
Expand Down Expand Up @@ -80,17 +80,23 @@ def ejschemadep():

def jmergedep():
import jsonmerge
eggre = re.compile(r'^jsonmerge-(.*)\.egg-info$')
eggre = re.compile(r'^jsonmerge-(.*)\.egg')
modfile = jsonmerge.__file__
libdir = os.path.dirname(os.path.dirname(modfile))
vers="(unknown)"
try:
egginfo = [d for d in os.listdir(libdir) if eggre.match(d)]
if len(egginfo) > 0:
m = eggre.match(egginfo[0])
vers = m.group(1)
except Exception as ex:
tb.print_exc()
m = eggre.match(os.path.basename(libdir))
if m:
# zipped egg
vers = m.group(1)
else:
# it's the dist-packages dir; look for the egg-info file
try:
egginfo = [d for d in os.listdir(libdir) if eggre.match(d)]
if len(egginfo) > 0:
m = eggre.match(egginfo[0])
vers = m.group(1)
except Exception as ex:
tb.print_exc()
return OrderedDict([
("name", "jsonmerge"),
("version", vers)
Expand Down

0 comments on commit d96af33

Please sign in to comment.