Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for the new Hydra vocabulary
Browse files Browse the repository at this point in the history
seitenbau-govdata committed Mar 28, 2024
1 parent e2183dd commit ed9b9d0
Showing 4 changed files with 136 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...HEAD)

* Adds support for the latest Hydra vocabulary. For backward compatibility, the old properties are still supported but marked as deprecated.

## [v1.6.0](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...v1.6.0) - 2024-02-29

29 changes: 18 additions & 11 deletions ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
@@ -116,11 +116,15 @@ def next_page(self):
Returns the URL of the next page or None if there is no next page
'''
for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection):
# Try to find HYDRA.next first
for o in self.g.objects(pagination_node, HYDRA.next):
return str(o)

# If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
for o in self.g.objects(pagination_node, HYDRA.nextPage):
return str(o)
return None


def parse(self, data, _format=None):
'''
Parses and RDF graph serialization and into the class graph
@@ -178,7 +182,6 @@ def datasets(self):

yield dataset_dict


class RDFSerializer(RDFProcessor):
'''
A CKAN to RDF serializer based on rdflib
@@ -209,19 +212,23 @@ def _add_pagination_triples(self, paging_info):
pagination_ref = BNode()
self.g.add((pagination_ref, RDF.type, HYDRA.PagedCollection))

# The predicates `nextPage`, `previousPage`, `firstPage`, `lastPage`
# and `itemsPerPage` are deprecated and will be removed in the future
items = [
('next', HYDRA.nextPage),
('previous', HYDRA.previousPage),
('first', HYDRA.firstPage),
('last', HYDRA.lastPage),
('count', HYDRA.totalItems),
('items_per_page', HYDRA.itemsPerPage),
('next', [HYDRA.nextPage, HYDRA.next]),
('previous', [HYDRA.previousPage, HYDRA.previous]),
('first', [HYDRA.firstPage, HYDRA.first]),
('last', [HYDRA.lastPage, HYDRA.last]),
('count', [HYDRA.totalItems]),
('items_per_page', [HYDRA.itemsPerPage]),
]

for item in items:
key, predicate = item
key, predicates = item
if paging_info.get(key):
self.g.add((pagination_ref, predicate,
Literal(paging_info[key])))
for predicate in predicates:
self.g.add((pagination_ref, predicate,
Literal(paging_info[key])))

return pagination_ref

46 changes: 45 additions & 1 deletion ckanext/dcat/tests/test_base_parser.py
Original file line number Diff line number Diff line change
@@ -140,7 +140,7 @@ def test_parse_data(self):

assert len(p.g) == 2

def test_parse_pagination_next_page(self):
def test_parse_pagination_next_page_deprecated_vocabulary_only(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
@@ -163,6 +163,50 @@ def test_parse_pagination_next_page(self):

assert p.next_page() == 'http://example.com/catalog.xml?page=2'

def test_parse_pagination_next_page_updated_vocabulary_only(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
<hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
<hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
<hydra:next>http://example.com/catalog.xml?page=2</hydra:next>
<hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
</hydra:PagedCollection>
</rdf:RDF>
'''

p = RDFParser()

p.parse(data)

assert p.next_page() == 'http://example.com/catalog.xml?page=2'

def test_parse_pagination_next_page_both_vocabularies(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
<hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
<hydra:next>http://example.com/catalog.xml?page=next</hydra:next>
<hydra:nextPage>http://example.com/catalog.xml?page=nextPage</hydra:nextPage>
<hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
</hydra:PagedCollection>
</rdf:RDF>
'''

p = RDFParser()

p.parse(data)

assert p.next_page() == 'http://example.com/catalog.xml?page=next'

def test_parse_without_pagination(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
73 changes: 72 additions & 1 deletion ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
from ckantoolkit.tests import helpers, factories

from ckanext.dcat import utils
from ckanext.dcat.processors import RDFSerializer
from ckanext.dcat.processors import RDFSerializer, HYDRA
from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT,
DISTRIBUTION_LICENSE_FALLBACK_CONFIG)
@@ -1250,6 +1250,77 @@ def test_subcatalog(self):
assert len(dataset_title) == 1
assert str(dataset_title[0]) == dataset['title']

def test_catalog_pagination(self):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'test dataset',
'extras': [
{'key': 'source_catalog_title', 'value': 'Subcatalog example'},
{'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
{'key': 'source_catalog_description', 'value': 'Subcatalog example description'}
]
}
catalog_dict = {
'title': 'My Catalog',
'description': 'An Open Data Catalog',
'homepage': 'http://example.com',
'language': 'de',
}

expected_first = 'http://subcatalog.example?page=1'
expected_next = 'http://subcatalog.example?page=2'
expected_last = 'http://subcatalog.example?page=3'

pagination = {
'count': 12,
'items_per_page': 5,
'current':expected_first,
'first':expected_first,
'last':expected_last,
'next':expected_next,
}

s = RDFSerializer(profiles=['euro_dcat_ap'])
g = s.g

s.serialize_catalog(catalog_dict, dataset_dicts=[dataset], pagination_info=pagination)

paged_collection = list(g.subjects(RDF.type, HYDRA.PagedCollection))
assert len(paged_collection) == 1

# Pagination item: next
next = list(g.objects(paged_collection[0], HYDRA.next))
assert len(next) == 1
assert str(next[0]) == expected_next
next_page = list(g.objects(paged_collection[0], HYDRA.nextPage))
assert len(next_page) == 1
assert str(next_page[0]) == expected_next

# Pagination item: previous
previous_page = list(g.objects(paged_collection[0], HYDRA.previousPage))
assert len(previous_page) == 0
previous = list(g.objects(paged_collection[0], HYDRA.previous))
assert len(previous) == 0

# Pagination item: last
last = list(g.objects(paged_collection[0], HYDRA.last))
assert len(last) == 1
assert str(last[0]) == expected_last
last_page = list(g.objects(paged_collection[0], HYDRA.lastPage))
assert len(last_page) == 1
assert str(last_page[0]) == expected_last

# Pagination item: count
total_items = list(g.objects(paged_collection[0], HYDRA.totalItems))
assert len(total_items) == 1
assert str(total_items[0]) == "12"

# Pagination item: items_per_page
items_per_page = list(g.objects(paged_collection[0], HYDRA.itemsPerPage))
assert len(items_per_page) == 1
assert str(items_per_page[0]) == "5"

@pytest.mark.ckan_config(DISTRIBUTION_LICENSE_FALLBACK_CONFIG, 'true')
def test_set_missing_license_for_resource(self):
''' Check the behavior if param in config is set: Add license_id to the resource'''

0 comments on commit ed9b9d0

Please sign in to comment.