Skip to content

Commit

Permalink
wdMismatchFinder report: update and add qualifiers support
Browse files Browse the repository at this point in the history
  • Loading branch information
jmkeil committed Nov 12, 2024
1 parent f733acc commit f3f3491
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 27 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased]

### Fixed
* fix `wdMismatchFinder` report: adjust to [changed mismatches import file format](https://phabricator.wikimedia.org/T313467)

### Added
* extend `wdMismatchFinder` report: add reporting of value deviations for Wikidata qualifiers

## [3.0.1] - 2024-11-05

### Fixed
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<#ftl output_format="plainText">
item_id,statement_guid,property_id,wikidata_value,meta_wikidata_value,external_value,external_url
item_id,statement_guid,property_id,wikidata_value,meta_wikidata_value,external_value,external_url,type
<#list data as row>
"${row.item_id}","${row.statement_guid!}","${row.property_id}","${row.wikidata_value!}","${row.meta_wikidata_value!}","${row.external_value}","${row.external_url!}"
"${row.item_id}","${row.statement_guid!}","${row.property_id}","${row.wikidata_value!}","${row.meta_wikidata_value!}","${row.external_value}","${row.external_url!}","${row.type!}"
</#list>
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ PREFIX dqv: <http://www.w3.org/ns/dqv#>
PREFIX oa: <http://www.w3.org/ns/oa#>
PREFIX wikibase: <http://wikiba.se/ontology#>

SELECT
SELECT DISTINCT
# TODO: adapt if https://phabricator.wikimedia.org/T313469 (allow reporting of mismatches for labels, descriptions and aliases) is done
# TODO: adapt if qualifier omission supported by Mismatch Finder

# expected format: https://github.com/wmde/wikidata-mismatch-finder/blob/main/docs/UserGuide.md

(strAfter(str(?internal_url),"http://www.wikidata.org/entity/") AS ?item_id)
Expand All @@ -47,19 +50,41 @@ SELECT
(STR(?external_urlX) AS ?external_url)
# (Optional) A url or uri to the mismatching entity in the external database.

?type
# (Optional) A string that contains either the value 'statement' or the value 'qualifier' to indicate where the mismatch occurs.
# If left empty a value of 'statement' will be assumed.

WHERE {
BIND (<http://www.wikidata.org/> AS ?dataset)
[ a av:AspectPattern ;
[] a av:AspectPattern ;
av:associatedDataset ?dataset ;
av:ofAspect ?aspect ;
av:hasVariablePath [ av:variableName ?variable ; av:propertyPath ?propertyPath ]
]
BIND (replace(str(?propertyPath),"<http://www.wikidata.org/prop/direct/(P\\d*)>","$1") AS ?property_id)
BIND (IRI(CONCAT("http://www.wikidata.org/prop/",?property_id)) AS ?p)
BIND (IRI(CONCAT("http://www.wikidata.org/prop/statement/",?property_id)) AS ?ps)
BIND (IRI(CONCAT("http://www.wikidata.org/prop/statement/value/",?property_id)) AS ?psv)
av:hasVariablePath [ av:variableName ?variable ; av:propertyPath ?propertyPath ] .
BIND (REPLACE(?propertyPath,"^.*/(P\\d*)>$","$1") AS ?property_id)
FILTER REGEX(?property_id, "^P\\d*$", "i")
BIND (IF(REGEX(?propertyPath, "<http://www.wikidata.org/prop/P\\d*>/<http://www.wikidata.org/prop/qualifier/P\\d*>", "i")
, true
, false
) AS ?isQualifier)
BIND (IF(?isQualifier,"qualifier","statement") AS ?type)
BIND (IF(?isQualifier
, IRI(REPLACE(?propertyPath,"^<(http://www.wikidata.org/prop/P\\d*)>","$1"))
, IRI(CONCAT("http://www.wikidata.org/prop/",?property_id))
) AS ?q)
BIND (IF(?isQualifier
, IRI(CONCAT("http://www.wikidata.org/prop/qualifier/",?property_id))
, IRI(CONCAT("http://www.wikidata.org/prop/statement/",?property_id))
) AS ?ps_OR_pq)
BIND (IF(?isQualifier
, IRI(CONCAT("http://www.wikidata.org/prop/qualifier/value/",?property_id))
, IRI(CONCAT("http://www.wikidata.org/prop/statement/value/",?property_id))
) AS ?psv_OR_pqv)

?issueGraph av:associatedDataset ?dataset .
{
?sourceGraph av:associatedDataset ?dataset ;
a av:PrimaryDataGraph .

{ # Case: Deviation
GRAPH ?issueGraph {
[] a av:Deviation ;
^oa:hasBody [ oa:hasTarget ?internal_url ];
Expand All @@ -69,23 +94,16 @@ WHERE {
av:comparedToValue ?external_valueX ;
av:comparedToResource ?external_urlX .
}
# TODO: adapt if https://phabricator.wikimedia.org/T313467 (ability to report mismatches on qualifiers) is done
# TODO: adapt if https://phabricator.wikimedia.org/T313469 (allow reporting of mismatches for labels, descriptions and aliases) is done
GRAPH ?sourceGraph {
?internal_url ?p ?statement .

# value
?statement ?ps ?wikidata_valueX .
FILTER(regex(str(?statement),"http://www.wikidata.org/entity/statement/([0-9a-zA-Z]*)-(.*)"))

# time value meta data
?internal_url ?p ?statement .
?statement ?ps_OR_pq ?wikidata_valueX .
OPTIONAL {
?statement ?psv [ wikibase:timeCalendarModel ?meta_wikidata_valueX ] .
?statement ?psv_OR_pqv [ wikibase:timeCalendarModel ?meta_wikidata_valueX ] .
}
}
?sourceGraph a av:PrimaryDataGraph ;
av:associatedDataset ?dataset .
} UNION {
}
UNION
{ # Case: Omission
GRAPH ?issueGraph {
[] a av:ValueOmission ;
^oa:hasBody [ oa:hasTarget ?internal_url ];
Expand All @@ -95,7 +113,4 @@ WHERE {
av:comparedToResource ?external_urlX .
}
}
# TODO: adapt if https://phabricator.wikimedia.org/T313467 (ability to report mismatches on qualifiers) is done
FILTER REGEX(?property_id, "^P\\d*$", "i")
}
ORDER BY ?statement_guid ?property_id

0 comments on commit f3f3491

Please sign in to comment.