Skip to content

Commit

Permalink
Prepare adding shacl import
Browse files Browse the repository at this point in the history
  • Loading branch information
dalito committed Aug 19, 2024
1 parent 2ae9ba2 commit 92b60b0
Show file tree
Hide file tree
Showing 7 changed files with 310 additions and 1 deletion.
3 changes: 2 additions & 1 deletion docs/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ Importing from alternative modeling frameworks
See :ref:`importers`

* OWL (but this only works for schema-style OWL)
* SHACL (in progress)
* JSON-Schema
* SQL DDL

In future other frameworks will be supported
In future other frameworks will be supported.

Annotating schemas
------------------
Expand Down
10 changes: 10 additions & 0 deletions docs/packages/importers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ Use robot to convert ahead of time:
robot convert -i schemaorg.ttl -o schemaorg.ofn
schemauto import-owl schemaorg.ofn
Importing from SHACL
--------------------

You can import from a SHACL shapes file.

.. code-block::
schemauto import-shacl tests/resources/test_shacl_simple.ttl
Importing from SQL
------------------

Expand Down
28 changes: 28 additions & 0 deletions schema_automator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,34 @@ def import_rdfs(rdfsfile, output, metamodel_mappings, **args):
schema = sie.convert(rdfsfile, **args)
write_schema(schema, output)

@main.command()
@click.argument('shaclfile')
@output_option
@schema_name_option
@click.option('--input-type', '-I',
default='turtle',
help="Input format, eg. turtle")
@click.option('--identifier', '-I', help="Slot to use as identifier")
@click.option('--model-uri', help="Model URI prefix")
@click.option('--metamodel-mappings',
help="Path to metamodel mappings YAML dictionary")
@click.option('--output', '-o', help="Path to saved yaml schema")
def import_shacl(shaclfile, output, metamodel_mappings, **args):
"""
Import an SHACL profile to LinkML
Example:
schemauto import-shacl mymodel.shacl.ttl -o mymodel.yaml
"""
mappings_obj = None
if metamodel_mappings:
with open(metamodel_mappings) as f:
mappings_obj = yaml.safe_load(f)
sie = ShaclImportEngine(initial_metamodel_mappings=mappings_obj)
schema = sie.convert(shaclfile, **args)
write_schema(schema, output)

@main.command()
@click.argument('rdffile')
@output_option
Expand Down
1 change: 1 addition & 0 deletions schema_automator/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from schema_automator.importers.dosdp_import_engine import DOSDPImportEngine
from schema_automator.importers.frictionless_import_engine import FrictionlessImportEngine
from schema_automator.importers.cadsr_import_engine import CADSRImportEngine
from schema_automator.importers.shacl_import_engine import ShaclImportEngine
227 changes: 227 additions & 0 deletions schema_automator/importers/shacl_import_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import logging

from linkml.utils.schema_builder import SchemaBuilder
from linkml_runtime import SchemaView
from linkml_runtime.linkml_model import (
SchemaDefinition,
SlotDefinition,
ClassDefinition,
)


HTTP_SDO = Namespace("http://schema.org/")

DEFAULT_METAMODEL_MAPPINGS = {
"is_a": [RDFS.subClassOf, SKOS.broader],
"domain_of": [HTTP_SDO.domainIncludes, SDO.domainIncludes],
"rangeIncludes": [HTTP_SDO.rangeIncludes, SDO.rangeIncludes],
"exact_mappings": [OWL.sameAs, HTTP_SDO.sameAs],
ClassDefinition.__name__: [RDFS.Class, OWL.Class, SKOS.Concept],
SlotDefinition.__name__: [
RDF.Property,
OWL.ObjectProperty,
OWL.DatatypeProperty,
OWL.AnnotationProperty,
],
}


@dataclass
class ShaclImportEngine(ImportEngine):
"""
An ImportEngine that takes SHACL and converts it to a LinkML schema
"""

mappings: dict = None
initial_metamodel_mappings: Dict[str, List[URIRef]] = None
metamodel_mappings: Dict[str, List[URIRef]] = None
reverse_metamodel_mappings: Dict[URIRef, List[str]] = None
include_unmapped_annotations = False
metamodel = None
metamodel_schemaview: SchemaView = None
classdef_slots: List[str] = None

def __post_init__(self):
sv = package_schemaview("linkml_runtime.linkml_model.meta")
self.metamodel_schemaview = sv
self.metamodel = sv
self.metamodel_mappings = defaultdict(list)
self.reverse_metamodel_mappings = defaultdict(list)
for k, vs in DEFAULT_METAMODEL_MAPPINGS.items():
self.metamodel_mappings[k].extend(vs)
for v in vs:
self.reverse_metamodel_mappings[v].append(k)
if self.initial_metamodel_mappings:
for k, vs in self.initial_metamodel_mappings.items():
if not isinstance(vs, list):
vs = [vs]
self.metamodel_mappings[k].extend(vs)
for v in vs:
self.reverse_metamodel_mappings[URIRef(v)].append(k)
logging.info(f"Adding mapping {k} -> {v}")
for e in sv.all_elements().values():
mappings = []
for ms in sv.get_mappings(e.name, expand=True).values():
for m in ms:
uri = URIRef(m)
mappings.append(uri)
self.reverse_metamodel_mappings[uri].append(e.name)
self.metamodel_mappings[e.name] = mappings
self.defclass_slots = [s.name for s in sv.class_induced_slots(ClassDefinition.class_name)]

def convert(
self,
file: str,
name: str = None,
format="turtle",
default_prefix: str = None,
model_uri: str = None,
identifier: str = None,
**kwargs,
) -> SchemaDefinition:
"""
Converts an OWL schema-style ontology
:param file:
:param name:
:param model_uri:
:param identifier:
:param kwargs:
:return:
"""
self.mappings = {}
g = Graph()
g.parse(file, format=format)
if name is not None and default_prefix is None:
default_prefix = name
if name is None:
name = default_prefix
if name is None:
name = "example"
sb = SchemaBuilder(name=name)
sb.add_defaults()
schema = sb.schema
for k, v in g.namespaces():
if k == "schema" and v != "http://schema.org/":
continue
sb.add_prefix(k, v, replace_if_present=True)
if default_prefix is not None:
schema.default_prefix = default_prefix
if default_prefix not in schema.prefixes:
sb.add_prefix(default_prefix, model_uri, replace_if_present=True)
schema.id = schema.prefixes[default_prefix].prefix_reference
cls_slots = defaultdict(list)
props = []
for rdfs_property_metaclass in self._rdfs_metamodel_iri(
SlotDefinition.__name__
):
for p in g.subjects(RDF.type, rdfs_property_metaclass):
props.append(p)
# implicit properties
for metap in (
self.reverse_metamodel_mappings["domain_of"]
+ self.reverse_metamodel_mappings["rangeIncludes"]
):
for p, _, _o in g.triples((None, metap, None)):
props.append(p)
for p in set(props):
sn = self.iri_to_name(p)
init_dict = self._dict_for_subject(g, p)
if "domain_of" in init_dict:
for x in init_dict["domain_of"]:
cls_slots[x].append(sn)
del init_dict["domain_of"]
if "rangeIncludes" in init_dict:
init_dict["any_of"] = [{"range": x} for x in init_dict["rangeIncludes"]]
del init_dict["rangeIncludes"]
slot = SlotDefinition(sn, **init_dict)
slot.slot_uri = str(p.n3(g.namespace_manager))
sb.add_slot(slot)
rdfs_classes = []
for rdfs_class_metaclass in self._rdfs_metamodel_iri(ClassDefinition.__name__):
for s in g.subjects(RDF.type, rdfs_class_metaclass):
rdfs_classes.append(s)
# implicit classes
for metap in [RDFS.subClassOf]:
for s, _, o in g.triples((None, metap, None)):
rdfs_classes.append(s)
rdfs_classes.append(o)
for s in set(rdfs_classes):
cn = self.iri_to_name(s)
init_dict = self._dict_for_subject(g, s)
c = ClassDefinition(cn, **init_dict)
c.slots = cls_slots.get(cn, [])
c.class_uri = str(s.n3(g.namespace_manager))
sb.add_class(c)
if identifier is not None:
id_slot = SlotDefinition(identifier, identifier=True, range="uriorcurie")
schema.slots[identifier] = id_slot
for c in schema.classes.values():
if not c.is_a and not c.mixins:
if identifier not in c.slots:
c.slots.append(identifier)
return schema

def _dict_for_subject(self, g: Graph, s: URIRef) -> Dict[str, Any]:
"""
Looks up triples for a subject and converts to dict using linkml keys.
:param g:
:param p:
:return:
"""
init_dict = {}
for pp, obj in g.predicate_objects(s):
if pp == RDF.type:
continue
metaslot_name = self._element_from_iri(pp)
logging.debug(f"Mapping {pp} -> {metaslot_name}")
if metaslot_name not in self.defclass_slots:
continue
if metaslot_name is None:
logging.warning(f"Not mapping {pp}")
continue
if metaslot_name == "name":
metaslot_name = "title"
metaslot = self.metamodel.get_slot(metaslot_name)
v = self._object_to_value(obj, metaslot=metaslot)
metaslot_name_safe = underscore(metaslot_name)
if not metaslot or metaslot.multivalued:
if metaslot_name_safe not in init_dict:
init_dict[metaslot_name_safe] = []
init_dict[metaslot_name_safe].append(v)
else:
init_dict[metaslot_name_safe] = v
return init_dict

def _rdfs_metamodel_iri(self, name: str) -> List[URIRef]:
return self.metamodel_mappings.get(name, [])

def _element_from_iri(self, iri: URIRef) -> str:
r = self.reverse_metamodel_mappings.get(iri, [])
if len(r) > 0:
if len(r) > 1:
logging.debug(f"Multiple mappings for {iri}: {r}")
return r[0]

def _object_to_value(self, obj: Any, metaslot: SlotDefinition = None) -> Any:
if isinstance(obj, URIRef):
if metaslot.range == "uriorcurie" or metaslot.range == "uri":
return str(obj)
return self.iri_to_name(obj)
if isinstance(obj, Literal):
return obj.value
return obj

def iri_to_name(self, v: URIRef) -> str:
n = self._as_name(v)
if n != v:
self.mappings[n] = v
return n

def _as_name(self, v: URIRef):
v = str(v)
for sep in ["#", "/", ":"]:
if sep in v:
return v.split(sep)[-1]
return v
1 change: 1 addition & 0 deletions tests/resources/test_shacl_simple.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# tbw
41 changes: 41 additions & 0 deletions tests/test_importers/test_shacl_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import pytest

from linkml_runtime import SchemaView

from schema_automator.importers.shacl_import_engine import ShaclImportEngine
from linkml.generators.yamlgen import YAMLGenerator

from schema_automator.utils.schemautils import write_schema
from tests import INPUT_DIR, OUTPUT_DIR

# TODO - Write tests (this is a copy of test_rdfs_importer)

REPRO = os.path.join(INPUT_DIR, 'reproschema.ttl')
OUTSCHEMA = os.path.join(OUTPUT_DIR, 'reproschema-from-ttl.yaml')



def test_from_shacl():
"""Test Shacl conversion."""
oie = ShaclImportEngine()

return
schema = oie.convert(REPRO, default_prefix='reproschema', identifier='id')
write_schema(schema, OUTSCHEMA)
# roundtrip
s = YAMLGenerator(OUTSCHEMA).serialize()
print(s[0:100])
sv = SchemaView(OUTSCHEMA)
activity = sv.get_class("Activity")
assert activity
assert activity.name == "Activity"
assert activity.is_a == "CreativeWork"
slots = sv.class_induced_slots(activity.name)
assert len(slots) == 1
slot = slots[0]
assert slot.name == "id"




0 comments on commit 92b60b0

Please sign in to comment.