Skip to content

Commit

Permalink
import_entities_from_triplestore datatype and lang
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed Sep 14, 2024
1 parent 08bf583 commit a61c485
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "rdflib-ocdm"
version = "0.3.8"
version = "0.3.9"
description = ""
authors = ["arcangelo7 <[email protected]>"]
license = "ISC"
Expand Down
91 changes: 72 additions & 19 deletions rdflib_ocdm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
# SOFTWARE.
from __future__ import annotations

from typing import List
from typing import List, Union

from oc_ocdm.support.reporter import Reporter
from rdflib import ConjunctiveGraph, URIRef
from SPARQLWrapper import POST, XML, SPARQLWrapper
from rdflib import ConjunctiveGraph, Graph, Literal, URIRef
from SPARQLWrapper import JSON, POST, XML, SPARQLWrapper

from rdflib_ocdm.ocdm_graph import OCDMGraph
from rdflib_ocdm.ocdm_graph import OCDMConjunctiveGraph, OCDMGraph


class Reader(object):
Expand All @@ -37,20 +37,73 @@ def __init__(self, repok: Reporter = None, reperr: Reporter = None):
self.reperr: Reporter = reperr

@staticmethod
def import_entities_from_triplestore(ocdm_graph: OCDMGraph, ts_url: str, res_list: List[URIRef]) -> URIRef:
def import_entities_from_triplestore(ocdm_graph: Union[OCDMGraph, OCDMConjunctiveGraph], ts_url: str, res_list: List[URIRef]) -> None:
sparql: SPARQLWrapper = SPARQLWrapper(ts_url)
query: str = f'''
CONSTRUCT {{?s ?p ?o}}
WHERE {{
?s ?p ?o.
VALUES ?s {{<{'> <'.join(res_list)}>}}
}}'''
sparql.setQuery(query)
sparql.setMethod(POST)
sparql.setReturnFormat(XML)
result: ConjunctiveGraph = sparql.queryAndConvert()
if result is not None:
for triple in result.triples((None, None, None)):
ocdm_graph.add(triple)

if isinstance(ocdm_graph, OCDMConjunctiveGraph):
query: str = f'''
SELECT ?g ?s ?p ?o (LANG(?o) AS ?lang)
WHERE {{
GRAPH ?g {{
?s ?p ?o.
VALUES ?s {{<{'> <'.join(res_list)}>}}
}}
}}
'''
sparql.setQuery(query)
sparql.setMethod(POST)
sparql.setReturnFormat(JSON)
result = sparql.queryAndConvert()

if result and 'results' in result and 'bindings' in result['results']:
temp_graph = ConjunctiveGraph()
for binding in result['results']['bindings']:
graph_uri = URIRef(binding['g']['value'])
subject = URIRef(binding['s']['value'])
predicate = URIRef(binding['p']['value'])

obj_data = binding['o']
if obj_data['type'] == 'uri':
obj = URIRef(obj_data['value'])
else:
value = obj_data['value']
lang = binding.get('lang', {}).get('value')
datatype = obj_data.get('datatype')

if lang:
obj = Literal(value, lang=lang)
elif datatype:
obj = Literal(value, datatype=URIRef(datatype))
else:
obj = Literal(value)

temp_graph.add((subject, predicate, obj, graph_uri))

for quad in temp_graph.quads():
ocdm_graph.add(quad)
else:
raise ValueError("No entities were found.")

elif isinstance(ocdm_graph, OCDMGraph):
query: str = f'''
CONSTRUCT {{
?s ?p ?o
}}
WHERE {{
?s ?p ?o.
VALUES ?s {{<{'> <'.join(res_list)}>}}
}}
'''
sparql.setQuery(query)
sparql.setMethod(POST)
sparql.setReturnFormat(XML)
result: Graph = sparql.queryAndConvert()

if result is not None and len(result) > 0:
for triple in result:
ocdm_graph.add(triple)
else:
raise ValueError("No entities were found.")

else:
raise ValueError(f"An entity was not found.")
raise TypeError("ocdm_graph must be either OCDMGraph or OCDMConjunctiveGraph")
5 changes: 3 additions & 2 deletions rdflib_ocdm/storer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from oc_ocdm.support.reporter import Reporter
from SPARQLWrapper import SPARQLWrapper

from rdflib_ocdm.ocdm_graph import OCDMGraph, OCDMGraphCommons
from rdflib_ocdm.ocdm_graph import (OCDMConjunctiveGraph, OCDMGraph,
OCDMGraphCommons)
from rdflib_ocdm.query_utils import get_update_query
from rdflib_ocdm.reader import Reader

Expand Down Expand Up @@ -97,7 +98,7 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int
removed_statements: int = 0
skipped_queries: int = 0
result: bool = True
entity_type = 'graph' if isinstance(self.a_set, OCDMGraph) else 'prov'
entity_type = 'graph' if isinstance(self.a_set, OCDMGraph) or isinstance(self.a_set, OCDMConjunctiveGraph) else 'prov'
for idx, entity in enumerate(list(self.a_set.all_entities)):
update_query, n_added, n_removed = get_update_query(self.a_set, entity, entity_type)
if update_query == "":
Expand Down

0 comments on commit a61c485

Please sign in to comment.