From e523d813530c66d15ace2d4814e2a68a4251f584 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Sun, 5 Jan 2025 16:22:26 +0100 Subject: [PATCH] Fix forms check to allow for preposition forms + Danish fix --- src/scribe_data/check/check_query_forms.py | 5 +++++ .../danish/prepositions/prepositions.sparql | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/scribe_data/check/check_query_forms.py b/src/scribe_data/check/check_query_forms.py index 5435c844..af936b75 100644 --- a/src/scribe_data/check/check_query_forms.py +++ b/src/scribe_data/check/check_query_forms.py @@ -312,6 +312,7 @@ def validate_forms(query_text: str) -> str: select_vars = select_vars[2:] # Regex pattern to capture the variables in the WHERE clause. dt_pattern = r"WHERE\s*\{[^}]*?wikibase:lemma\s*\?\s*(\w+)\s*[;.]\s*" + potential_prep_case_pattern = r"caseForm rdfs:label.*[.]" forms_pattern = r"ontolex:representation \?([^ ;]+)" where_vars = [] @@ -323,6 +324,10 @@ def validate_forms(query_text: str) -> str: elif dt_match: where_vars.append(dt_match[0]) + potential_prep_case_match = re.findall(potential_prep_case_pattern, query_text) + if potential_prep_case_match and "grammaticalCase" in potential_prep_case_match[0]: + where_vars.append("grammaticalCase") + where_vars += re.findall(forms_pattern, query_text) # Handling labels provided by the labeling service like 'case' and 'gender' in the same order as in select_vars. diff --git a/src/scribe_data/wikidata/language_data_extraction/danish/prepositions/prepositions.sparql b/src/scribe_data/wikidata/language_data_extraction/danish/prepositions/prepositions.sparql index 37636c9f..b7b501a0 100644 --- a/src/scribe_data/wikidata/language_data_extraction/danish/prepositions/prepositions.sparql +++ b/src/scribe_data/wikidata/language_data_extraction/danish/prepositions/prepositions.sparql @@ -5,7 +5,6 @@ SELECT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition - ?grammaticalCase WHERE { ?lexeme dct:language wd:Q9035 ;