Skip to content

Commit

Permalink
Merge pull request #227 from dracor-org/225-sitelinks-update
Browse files Browse the repository at this point in the history
Update Wikipedia link count on play ingest
  • Loading branch information
cmil authored Dec 16, 2023
2 parents bb84b82 + 22ffdc0 commit 630a750
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 11 deletions.
2 changes: 2 additions & 0 deletions modules/api.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ function api:corpora-post-json($data) {
util:log-system-out($corpus),
xmldb:create-collection($config:data-root, $name),
xmldb:create-collection($config:metrics-root, $name),
xmldb:create-collection($config:sitelinks-root, $name),
xmldb:create-collection($config:rdf-root, $name),
xmldb:store($tei-dir, "corpus.xml", $corpus),
$json
Expand Down Expand Up @@ -626,6 +627,7 @@ function api:delete-corpus($corpusname, $auth) {
(
xmldb:remove($config:data-root || "/" || $corpusname),
xmldb:remove($config:metrics-root || "/" || $corpusname),
xmldb:remove($config:sitelinks-root || "/" || $corpusname),
xmldb:remove($config:rdf-root || "/" || $corpusname),
map {
"message": "corpus deleted",
Expand Down
51 changes: 40 additions & 11 deletions modules/metrics.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,54 @@ declare namespace trigger = "http://exist-db.org/xquery/trigger";
declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
declare namespace tei = "http://www.tei-c.org/ns/1.0";

(:~
: Query sitelinks for given Wikidata ID and store them to the sitelinks
: collection.
:
: @param $id Wikidata ID
: @param $corpus Corpus name
:)
declare function metrics:update-sitelinks(
$id as xs:string,
$corpus as xs:string
) {
if ($id) then
let $resource := $id || '.xml'
let $collection := $config:sitelinks-root || '/' || $corpus
let $log := util:log-system-out('querying sitelinks for ' || $resource)
let $sitelinks := <sitelinks id="{$id}" updated="{current-dateTime()}">{
for $uri in wd:get-sitelinks($id)
return <uri>{$uri}</uri>
}</sitelinks>
return (
$sitelinks,
xmldb:store($collection, $resource, $sitelinks)
)
else ()
};

(:~
: Update sitelinks for Wikidata ID of play with given url.
:
: @param $url Path to TEI file
:)
declare function metrics:update-sitelinks($url as xs:string) {
let $p := dutil:filepaths($url)
let $doc:= dutil:get-doc($p?corpusname, $p?playname)
let $id := dutil:get-play-wikidata-id($doc/tei:TEI)
return metrics:update-sitelinks($id, $p?corpusname)
};

(:~
: Collect sitelinks for each play in a given corpus from wikidata and store
: them to the sitelinks collection
:
: @param $corpus Corpus name
:)
declare function metrics:collect-sitelinks($corpus as xs:string) {
let $log := util:log('info', 'collecting sitelinks for corpus ' || $corpus)
let $sitelinks-col := xmldb:create-collection(
"/", $config:sitelinks-root || '/' || $corpus
)
util:log-system-out('collecting sitelinks for corpus ' || $corpus),
for $id in dutil:get-play-wikidata-ids($corpus)
let $resource := $id || '.xml'
let $log := util:log('info', 'querying sitelinks for ' || $resource)
let $sitelinks := <sitelinks id="{$id}" updated="{current-dateTime()}">{
for $uri in wd:get-sitelinks($id)
return <uri>{$uri}</uri>
}</sitelinks>
return xmldb:store($sitelinks-col, $resource, $sitelinks)
return metrics:update-sitelinks($id, $corpus)
};

(:~
Expand Down
4 changes: 4 additions & 0 deletions modules/trigger.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ declare function trigger:after-create-document($url as xs:anyURI) {
(
util:log-system-out("running CREATION TRIGGER for " || $url),
metrics:update($url),
metrics:update-sitelinks($url),
drdf:update($url)
)
else (
Expand All @@ -28,6 +29,7 @@ declare function trigger:after-update-document($url as xs:anyURI) {
(
util:log-system-out("running UPDATE TRIGGER for " || $url),
metrics:update($url),
metrics:update-sitelinks($url),
drdf:update($url)
)
else (
Expand All @@ -38,7 +40,9 @@ declare function trigger:after-update-document($url as xs:anyURI) {
declare function trigger:before-delete-document($url as xs:anyURI) {
if (doc($url)/tei:TEI) then
let $paths := dutil:filepaths($url)
let $id := dutil:get-play-wikidata-id(doc($url)/tei:TEI)
return try {
if ($id) then xmldb:remove($paths?collections?sitelinks, $id || '.xml') else (),
xmldb:remove($paths?collections?metrics, $paths?filename),
xmldb:remove($paths?collections?rdf, $paths?playname || ".rdf.xml")
} catch * {
Expand Down
1 change: 1 addition & 0 deletions modules/util.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ declare function dutil:filepaths ($url as xs:string) as map() {
"collections": map {
"tei": $config:data-root || "/" || $corpusname,
"metrics": $config:metrics-root || "/" || $corpusname,
"sitelinks": $config:sitelinks-root || "/" || $corpusname,
"rdf": $config:rdf-root || "/" || $corpusname
},
"files": map {
Expand Down

0 comments on commit 630a750

Please sign in to comment.