From 1c59404abb1894ad9f3b4071a78daf4004482980 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Fri, 31 May 2024 15:53:21 -0400 Subject: [PATCH] Test for IRIs that share UUIDs This is part of implementing CASE-Examples-QC PR 54. No effects were observed on Make-managed files. References: * https://github.com/ajnelson-nist/CASE-Examples-QC/pull/54 Signed-off-by: Alex Nelson --- examples/postvisit.mk | 12 ++++ .../test_kb_casework_github_io_examples.py | 64 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 examples/test_kb_casework_github_io_examples.py diff --git a/examples/postvisit.mk b/examples/postvisit.mk index f0f5c6cc..0d4a9b08 100644 --- a/examples/postvisit.mk +++ b/examples/postvisit.mk @@ -30,6 +30,9 @@ all_jsonld := $(foreach illustration_dir,$(illustration_dirs),$(illustration_dir all: \ kb.ttl +.PHONY: \ + check-pytest + all-drafting.ttl: \ $(all_drafting_ttl) source $(top_srcdir)/venv/bin/activate \ @@ -40,7 +43,16 @@ all-drafting.ttl: \ mv _$@ $@ check: \ + check-pytest + +check-pytest: \ kb.ttl + source $(top_srcdir)/venv/bin/activate \ + && pytest \ + --ignore urgent_evidence \ + --log-level=DEBUG \ + --verbose \ + --verbose clean: @rm -f \ diff --git a/examples/test_kb_casework_github_io_examples.py b/examples/test_kb_casework_github_io_examples.py new file mode 100644 index 00000000..c8d02b05 --- /dev/null +++ b/examples/test_kb_casework_github_io_examples.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +# Portions of this file contributed by NIST are governed by the +# following statement: +# +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to Title 17 Section 105 of the +# United States Code, this software is not subject to copyright +# protection within the United States. NIST assumes no responsibility +# whatsoever for its use by other parties, and makes no guarantees, +# expressed or implied, about its quality, reliability, or any other +# characteristic. +# +# We would appreciate acknowledgement if the software is used. + +import logging +from collections import defaultdict +from pathlib import Path +from pprint import pformat +from typing import DefaultDict, Dict, Set +from uuid import UUID + +from rdflib import Graph, URIRef + +srcdir = Path(__file__).parent + + +def test_uuid_unique_usage_casework_github_io() -> None: + """ + This test confirms that if two node IRIs end with the same UUID, then the IRI matches. + This test is likely to be copied and adjusted between several example repositories. + """ + uuid_to_urirefs: DefaultDict[UUID, Set[URIRef]] = defaultdict(set) + graph = Graph() + graph.parse(srcdir / "kb.ttl") + + def _ingest(n_thing: URIRef) -> None: + thing_iri = str(n_thing) + if len(thing_iri) < 40: + # Not long enough to contain scheme, colon, and UUID. + return + try: + thing_uuid = UUID(thing_iri[-36:]) + except ValueError: + return + uuid_to_urirefs[thing_uuid].add(n_thing) + + for triple in graph.triples((None, None, None)): + if isinstance(triple[0], URIRef): + _ingest(triple[0]) + if isinstance(triple[2], URIRef): + _ingest(triple[2]) + + computed: Dict[str, Set[str]] = dict() + for _uuid in uuid_to_urirefs: + if len(uuid_to_urirefs[_uuid]) > 1: + computed[str(_uuid)] = {str(x) for x in uuid_to_urirefs[_uuid]} + + try: + assert len(computed) == 0 + except AssertionError: + logging.debug(pformat(computed)) + raise