From f667b2b5737a600be34a2085002cb17551016db6 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Mon, 10 Oct 2022 17:02:57 +0200 Subject: [PATCH 1/5] rdflib_loader: check for None --- linkml_runtime/loaders/rdflib_loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linkml_runtime/loaders/rdflib_loader.py b/linkml_runtime/loaders/rdflib_loader.py index 14d24ecc..ff5b08d1 100644 --- a/linkml_runtime/loaders/rdflib_loader.py +++ b/linkml_runtime/loaders/rdflib_loader.py @@ -198,6 +198,8 @@ def repl(v): def _get_id_dict(self, node: VALID_SUBJECT, schemaview: SchemaView, cn: ClassDefinitionName) -> ANYDICT: id_slot = schemaview.get_identifier_slot(cn) if not isinstance(node, BNode): + if id_slot is None: + raise Exception(f'no slot found for {cn}: bnode={node}') id_val = schemaview.namespaces().curie_for(node) if id_val == None: id_val = str(node) From c7990e83b9518303d114069769cd5c9a47496f99 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 29 Sep 2022 13:26:24 +0200 Subject: [PATCH 2/5] csv loader: table testing f/w --- .../input/table-inlined.tsv | 2 + .../test_loaders_dumpers/input/table-json.tsv | 2 + tests/test_loaders_dumpers/models/table.py | 146 ++++++++++++++++++ tests/test_loaders_dumpers/models/table.yaml | 48 ++++++ .../test_csv_loader_dumper.py | 18 ++- tests/test_utils/test_csv_utils.py | 25 +++ 6 files changed, 234 insertions(+), 7 deletions(-) create mode 100644 tests/test_loaders_dumpers/input/table-inlined.tsv create mode 100644 tests/test_loaders_dumpers/input/table-json.tsv create mode 100644 tests/test_loaders_dumpers/models/table.py create mode 100644 tests/test_loaders_dumpers/models/table.yaml create mode 100644 tests/test_utils/test_csv_utils.py diff --git a/tests/test_loaders_dumpers/input/table-inlined.tsv b/tests/test_loaders_dumpers/input/table-inlined.tsv new file mode 100644 index 00000000..a8a30b13 --- /dev/null +++ b/tests/test_loaders_dumpers/input/table-inlined.tsv @@ -0,0 +1,2 @@ +columnA columnB columnB columnC +table:row1 first value second value something else diff --git a/tests/test_loaders_dumpers/input/table-json.tsv b/tests/test_loaders_dumpers/input/table-json.tsv new file mode 100644 index 00000000..5cd72ea6 --- /dev/null +++ b/tests/test_loaders_dumpers/input/table-json.tsv @@ -0,0 +1,2 @@ +columnA objectB columnC +table:row1 {\"name\": \"foo\", \"value\": \"bar\"} something else diff --git a/tests/test_loaders_dumpers/models/table.py b/tests/test_loaders_dumpers/models/table.py new file mode 100644 index 00000000..6c00dce1 --- /dev/null +++ b/tests/test_loaders_dumpers/models/table.py @@ -0,0 +1,146 @@ +# Auto generated from table.yaml by pythongen.py version: 0.9.0 +# Generation date: 2022-09-29T09:11:32 +# Schema: table +# +# id: https://w3id.org/linkml/examples/table +# description: Represent a table in linkml +# license: https://creativecommons.org/publicdomain/zero/1.0/ + +import dataclasses +import sys +import re +from jsonasobj2 import JsonObj, as_dict +from typing import Optional, List, Union, Dict, ClassVar, Any +from dataclasses import dataclass +from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions + +from linkml_runtime.utils.slot import Slot +from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode +from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int +from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs +from linkml_runtime.utils.formatutils import camelcase, underscore, sfx +from linkml_runtime.utils.enumerations import EnumDefinitionImpl +from rdflib import Namespace, URIRef +from linkml_runtime.utils.curienamespace import CurieNamespace +from linkml_runtime.linkml_model.types import String, Uriorcurie +from linkml_runtime.utils.metamodelcore import URIorCURIE + +metamodel_version = "1.7.0" +version = None + +# Overwrite dataclasses _init_fn to add **kwargs in __init__ +dataclasses._init_fn = dataclasses_init_fn_with_kwargs + +# Namespaces +LINKML = CurieNamespace('linkml', 'https://w3id.org/linkml/') +TABLE = CurieNamespace('table', 'https://w3id.org/linkml/examples/table/') +DEFAULT_ = TABLE + + +# Types + +# Class references +class RowColumnA(URIorCURIE): + pass + + +@dataclass +class Object(YAMLRoot): + """ + An object (bnode) which needs embedding in a single row + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = TABLE.Object + class_class_curie: ClassVar[str] = "table:Object" + class_name: ClassVar[str] = "Object" + class_model_uri: ClassVar[URIRef] = TABLE.Object + + name: Optional[str] = None + value: Optional[str] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.name is not None and not isinstance(self.name, str): + self.name = str(self.name) + + if self.value is not None and not isinstance(self.value, str): + self.value = str(self.value) + + super().__post_init__(**kwargs) + + +@dataclass +class Row(YAMLRoot): + """ + A single data point made up of columns. + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = TABLE.Row + class_class_curie: ClassVar[str] = "table:Row" + class_name: ClassVar[str] = "Row" + class_model_uri: ClassVar[URIRef] = TABLE.Row + + columnA: Union[str, RowColumnA] = None + objectB: Optional[Union[dict, Object]] = None + columnC: Optional[str] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.columnA): + self.MissingRequiredField("columnA") + if not isinstance(self.columnA, RowColumnA): + self.columnA = RowColumnA(self.columnA) + + if self.objectB is not None and not isinstance(self.objectB, Object): + self.objectB = Object(**as_dict(self.objectB)) + + if self.columnC is not None and not isinstance(self.columnC, str): + self.columnC = str(self.columnC) + + super().__post_init__(**kwargs) + + +@dataclass +class Table(YAMLRoot): + """ + Container of rows. + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = TABLE.Table + class_class_curie: ClassVar[str] = "table:Table" + class_name: ClassVar[str] = "Table" + class_model_uri: ClassVar[URIRef] = TABLE.Table + + rows: Optional[Union[Dict[Union[str, RowColumnA], Union[dict, Row]], List[Union[dict, Row]]]] = empty_dict() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + self._normalize_inlined_as_list(slot_name="rows", slot_type=Row, key_name="columnA", keyed=True) + + super().__post_init__(**kwargs) + + +# Enumerations + + +# Slots +class slots: + pass + +slots.object__name = Slot(uri=TABLE.name, name="object__name", curie=TABLE.curie('name'), + model_uri=TABLE.object__name, domain=None, range=Optional[str]) + +slots.object__value = Slot(uri=TABLE.value, name="object__value", curie=TABLE.curie('value'), + model_uri=TABLE.object__value, domain=None, range=Optional[str]) + +slots.row__columnA = Slot(uri=TABLE.columnA, name="row__columnA", curie=TABLE.curie('columnA'), + model_uri=TABLE.row__columnA, domain=None, range=URIRef) + +slots.row__objectB = Slot(uri=TABLE.objectB, name="row__objectB", curie=TABLE.curie('objectB'), + model_uri=TABLE.row__objectB, domain=None, range=Optional[Union[dict, Object]]) + +slots.row__columnC = Slot(uri=TABLE.columnC, name="row__columnC", curie=TABLE.curie('columnC'), + model_uri=TABLE.row__columnC, domain=None, range=Optional[str]) + +slots.table__rows = Slot(uri=TABLE.rows, name="table__rows", curie=TABLE.curie('rows'), + model_uri=TABLE.table__rows, domain=None, range=Optional[Union[Dict[Union[str, RowColumnA], Union[dict, Row]], List[Union[dict, Row]]]]) diff --git a/tests/test_loaders_dumpers/models/table.yaml b/tests/test_loaders_dumpers/models/table.yaml new file mode 100644 index 00000000..1fa24379 --- /dev/null +++ b/tests/test_loaders_dumpers/models/table.yaml @@ -0,0 +1,48 @@ +id: https://w3id.org/linkml/examples/table +name: table +description: |- + Represent a table in linkml +license: https://creativecommons.org/publicdomain/zero/1.0/ +imports: + - linkml:types +prefixes: + table: https://w3id.org/linkml/examples/table/ + linkml: https://w3id.org/linkml/ +default_prefix: table +default_range: string + +classes: + + Object: + description: |- + An object (bnode) which needs embedding in a single row + attributes: + name: + range: string + value: + range: string + + Row: + description: |- + A single data point made up of columns. + attributes: + columnA: + range: uriorcurie + identifier: true + objectB: + range: Object + inlined: true + columnC: + range: string + multivalued: false + + Table: + description: |- + Container of rows. + tree_root: true + attributes: + rows: + range: Row + inlined: true + inlined_as_list: true + multivalued: true diff --git a/tests/test_loaders_dumpers/test_csv_loader_dumper.py b/tests/test_loaders_dumpers/test_csv_loader_dumper.py index 720264fe..ed0ea08b 100644 --- a/tests/test_loaders_dumpers/test_csv_loader_dumper.py +++ b/tests/test_loaders_dumpers/test_csv_loader_dumper.py @@ -13,6 +13,7 @@ from linkml_runtime.loaders import csv_loader from linkml_runtime.utils.yamlutils import as_json_object from tests.test_loaders_dumpers.models.books_normalized import Shop, Book, GenreEnum, BookSeries +from tests.test_loaders_dumpers.models.table import Table, Row ROOT = os.path.abspath(os.path.dirname(__file__)) @@ -26,6 +27,11 @@ OUTPUT = os.path.join(OUTPUT_DIR, 'books_flattened.tsv') OUTPUT2 = os.path.join(OUTPUT_DIR, 'books_flattened_02.tsv') +TABLE_SCHEMA = os.path.join(MODEL_DIR, 'table.yaml') +TABLE_DATA_JSON = os.path.join(INPUT_DIR, 'table-json.tsv') +TABLE_DATA_INLINED = os.path.join(INPUT_DIR, 'table-inlined.tsv') + + def _json(obj) -> str: return json.dumps(obj, indent=' ', sort_keys=True) @@ -84,13 +90,11 @@ def test_csvgen_unroundtrippable(self): logging.debug(json_dumper.dumps(roundtrip)) assert roundtrip == data - - - - - - - + def test_table_model(self): + schemaview = SchemaView(SCHEMA) + table_json= csv_loader.load(TABLE_DATA_JSON, target_class=Table, index_slot='rows', schemaview=schemaview) + for row in table_json.rows: + assert len(row["columnB"]) == 2 if __name__ == '__main__': unittest.main() diff --git a/tests/test_utils/test_csv_utils.py b/tests/test_utils/test_csv_utils.py new file mode 100644 index 00000000..0b292690 --- /dev/null +++ b/tests/test_utils/test_csv_utils.py @@ -0,0 +1,25 @@ +import pytest +import unittest + +from linkml_runtime.utils.csvutils import _get_key_config, get_configmap +from linkml_runtime.utils.schemaview import SchemaView +from tests.support.test_environment import TestEnvironmentTestCase +from tests.test_utils.environment import env + + +class CsvUtilTestCase(TestEnvironmentTestCase): + env = env + + def test_null_configmap(self): + get_configmap(None, "unknown") + # TODO: with pytest, use captlog to verify the output + # assert 'Index slot or schema not specified' in caplog.text + + def test_get_configmap(self): + fname = env.input_path('kitchen_sink.yaml') + schema = SchemaView(fname) + get_configmap(schema, "unknown") + + +if __name__ == '__main__': + unittest.main() From 23705fe6ee0c3a2d92d80437176e629d40f7791d Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Mon, 10 Oct 2022 16:59:58 +0200 Subject: [PATCH 3/5] rdflibloader: fix various None issues --- linkml_runtime/loaders/rdflib_loader.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/linkml_runtime/loaders/rdflib_loader.py b/linkml_runtime/loaders/rdflib_loader.py index ff5b08d1..ca6a5d88 100644 --- a/linkml_runtime/loaders/rdflib_loader.py +++ b/linkml_runtime/loaders/rdflib_loader.py @@ -130,6 +130,8 @@ def from_rdf_graph(self, graph: Graph, schemaview: SchemaView, target_class: Typ # back to a text representation v = namespaces.curie_for(o) e = schemaview.get_enum(slot.range) + if e is None: + raise ValueError(f'no enum found for {slot.range}: {o} (ns={v})') for pv in e.permissible_values.values(): if v == pv.meaning or str(o) == pv.meaning: v = pv.text @@ -169,9 +171,16 @@ def from_rdf_graph(self, graph: Graph, schemaview: SchemaView, target_class: Typ # Step 2: replace inline pointers with object dicts def repl(v): if isinstance(v, Pointer): - v2 = obj_map[v.obj] + v2 = obj_map.get(v.obj) if v2 is None: - raise Exception(f'No mapping for pointer {v}') + msg = f'No mapping for pointer {v}. Triples:' + for s, p, o in graph.triples((None, None, v.obj)): + for s2, p2, o2 in graph.triples((None, None, s)): + msg += f"\n{s2} {p2} {o2}." + msg += f"\n{s} {p} {o}." + for s, p, o in graph.triples((v.obj, None, None)): + msg += f"\n{s} {p} {o}." + raise Exception(msg) return v2 else: return v From 1563ee91b4081276a193e2d9c61a86f7cbdc2f0c Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Mon, 10 Oct 2022 17:00:42 +0200 Subject: [PATCH 4/5] yamlutils: print current object for debugging --- linkml_runtime/utils/yamlutils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linkml_runtime/utils/yamlutils.py b/linkml_runtime/utils/yamlutils.py index 1e30075d..2365f15e 100644 --- a/linkml_runtime/utils/yamlutils.py +++ b/linkml_runtime/utils/yamlutils.py @@ -45,7 +45,9 @@ def __post_init__(self, *args: List[str], **kwargs): for k in kwargs.keys(): v = repr(kwargs[k])[:40].replace('\n', '\\n') messages.append(f"{TypedNode.yaml_loc(k)} Unknown argument: {k} = {v}") - raise ValueError('\n'.join(messages)) + msg = f"Unknown arguments for: {self}\n" + msg += '\n'.join(messages) + raise ValueError(msg) def _default(self, obj, filtr: Callable[[dict], dict] = None): """ JSON serializer callback. From 48a1ae630b366bd909c162173ea3178031b01b05 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Mon, 10 Oct 2022 17:01:14 +0200 Subject: [PATCH 5/5] csvtests: use slots --- tests/test_loaders_dumpers/models/table.py | 26 +++++----- tests/test_loaders_dumpers/models/table.yaml | 50 ++++++++++++-------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/tests/test_loaders_dumpers/models/table.py b/tests/test_loaders_dumpers/models/table.py index 6c00dce1..bf2366f1 100644 --- a/tests/test_loaders_dumpers/models/table.py +++ b/tests/test_loaders_dumpers/models/table.py @@ -1,5 +1,5 @@ # Auto generated from table.yaml by pythongen.py version: 0.9.0 -# Generation date: 2022-09-29T09:11:32 +# Generation date: 2022-10-10T16:55:54 # Schema: table # # id: https://w3id.org/linkml/examples/table @@ -127,20 +127,20 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): class slots: pass -slots.object__name = Slot(uri=TABLE.name, name="object__name", curie=TABLE.curie('name'), - model_uri=TABLE.object__name, domain=None, range=Optional[str]) +slots.rows = Slot(uri=TABLE.rows, name="rows", curie=TABLE.curie('rows'), + model_uri=TABLE.rows, domain=None, range=Optional[Union[Dict[Union[str, RowColumnA], Union[dict, Row]], List[Union[dict, Row]]]]) -slots.object__value = Slot(uri=TABLE.value, name="object__value", curie=TABLE.curie('value'), - model_uri=TABLE.object__value, domain=None, range=Optional[str]) +slots.columnA = Slot(uri=TABLE.columnA, name="columnA", curie=TABLE.curie('columnA'), + model_uri=TABLE.columnA, domain=None, range=URIRef) -slots.row__columnA = Slot(uri=TABLE.columnA, name="row__columnA", curie=TABLE.curie('columnA'), - model_uri=TABLE.row__columnA, domain=None, range=URIRef) +slots.objectB = Slot(uri=TABLE.objectB, name="objectB", curie=TABLE.curie('objectB'), + model_uri=TABLE.objectB, domain=None, range=Optional[Union[dict, Object]]) -slots.row__objectB = Slot(uri=TABLE.objectB, name="row__objectB", curie=TABLE.curie('objectB'), - model_uri=TABLE.row__objectB, domain=None, range=Optional[Union[dict, Object]]) +slots.columnC = Slot(uri=TABLE.columnC, name="columnC", curie=TABLE.curie('columnC'), + model_uri=TABLE.columnC, domain=None, range=Optional[str]) -slots.row__columnC = Slot(uri=TABLE.columnC, name="row__columnC", curie=TABLE.curie('columnC'), - model_uri=TABLE.row__columnC, domain=None, range=Optional[str]) +slots.name = Slot(uri=TABLE.name, name="name", curie=TABLE.curie('name'), + model_uri=TABLE.name, domain=None, range=Optional[str]) -slots.table__rows = Slot(uri=TABLE.rows, name="table__rows", curie=TABLE.curie('rows'), - model_uri=TABLE.table__rows, domain=None, range=Optional[Union[Dict[Union[str, RowColumnA], Union[dict, Row]], List[Union[dict, Row]]]]) +slots.value = Slot(uri=TABLE.value, name="value", curie=TABLE.curie('value'), + model_uri=TABLE.value, domain=None, range=Optional[str]) diff --git a/tests/test_loaders_dumpers/models/table.yaml b/tests/test_loaders_dumpers/models/table.yaml index 1fa24379..3f3fcc92 100644 --- a/tests/test_loaders_dumpers/models/table.yaml +++ b/tests/test_loaders_dumpers/models/table.yaml @@ -16,33 +16,41 @@ classes: Object: description: |- An object (bnode) which needs embedding in a single row - attributes: - name: - range: string - value: - range: string + slots: + - name + - value Row: description: |- A single data point made up of columns. - attributes: - columnA: - range: uriorcurie - identifier: true - objectB: - range: Object - inlined: true - columnC: - range: string - multivalued: false + slots: + - columnA + - objectB + - columnC Table: description: |- Container of rows. tree_root: true - attributes: - rows: - range: Row - inlined: true - inlined_as_list: true - multivalued: true + slots: + - rows + +slots: + rows: + range: Row + inlined: true + inlined_as_list: true + multivalued: true + columnA: + range: uriorcurie + identifier: true + objectB: + range: Object + inlined: true + columnC: + range: string + multivalued: false + name: + range: string + value: + range: string