Skip to content

Commit

Permalink
Update usage of Wikidata entities to use shared dict
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Jan 11, 2025
1 parent f7b0569 commit 2fdd74b
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 9 deletions.
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ pre-commit install # install pre-commit hooks
# pre-commit run --all-files # lint and fix common problems in the codebase
```

> [!NOTE]
> If you are having issues with pre-commit and want to send along your changes regardless, you can ignore the pre-commit hooks via the following:
>
> ```bash
> git commit --no-verify -m "COMMIT_MESSAGE"
> ```
If you face any issues, consider reinstalling Scribe-data by running the following:
```bash
Expand Down
4 changes: 4 additions & 0 deletions src/scribe_data/resources/wikidata_qids_pids.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"instance_of": "P31",
"ietf_language_tag": "P305"
}
24 changes: 18 additions & 6 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import ast
import contextlib
import requests
import json
import os
import re
Expand All @@ -33,6 +32,7 @@
from typing import Any, Optional

import questionary
import requests
from rich import print as rprint

# MARK: Utils Variables
Expand All @@ -55,6 +55,9 @@
LEXEME_FORM_METADATA_FILE = (
Path(__file__).parent / "resources" / "lexeme_form_metadata.json"
)
WIKIDATA_QIDS_PIDS_FILE = (
Path(__file__).parent / "resources" / "wikidata_qids_pids.json"
)
DATA_DIR = Path(DEFAULT_JSON_EXPORT_DIR)

try:
Expand All @@ -79,6 +82,13 @@
except (IOError, json.JSONDecodeError) as e:
print(f"Error reading lexeme form metadata: {e}")

try:
with WIKIDATA_QIDS_PIDS_FILE.open("r", encoding="utf-8") as file:
wikidata_qids_pids = json.load(file)

except (IOError, json.JSONDecodeError) as e:
print(f"Error reading language metadata: {e}")


language_map = {}
language_to_qid = {}
Expand Down Expand Up @@ -762,8 +772,10 @@ def check_qid_is_language(qid: str):
request = requests.get(request_string, timeout=5)
request_result = request.json()

if request_result["statements"]["P31"]:
instance_of_values = request_result["statements"]["P31"]
if request_result["statements"][wikidata_qids_pids["instance_of"]]:
instance_of_values = request_result["statements"][
wikidata_qids_pids["instance_of"]
]
for val in instance_of_values:
if val["value"]["content"] == "Q34770":
print(f"{request_result['labels']['en']} ({qid}) is a language.\n")
Expand Down Expand Up @@ -796,9 +808,9 @@ def get_language_iso_code(qid: str):
response = requests.get(api_endpoint)
data = response.json()
try:
return data["entities"][qid]["claims"]["P305"][0]["mainsnak"]["datavalue"][
"value"
]
return data["entities"][qid]["claims"][wikidata_qids_pids["ietf_language_tag"]][
0
]["mainsnak"]["datavalue"]["value"]

except ValueError:
raise ValueError("The passed Wikidata QID is not a language.")
Expand Down
18 changes: 15 additions & 3 deletions tests/cli/test_total.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
-->
"""

import json
import unittest
from unittest.mock import MagicMock, call, patch

Expand All @@ -29,7 +30,14 @@
get_total_lexemes,
total_wrapper,
)
from scribe_data.utils import check_qid_is_language
from scribe_data.utils import WIKIDATA_QIDS_PIDS_FILE, check_qid_is_language

try:
with WIKIDATA_QIDS_PIDS_FILE.open("r", encoding="utf-8") as file:
wikidata_qids_pids = json.load(file)

except (IOError, json.JSONDecodeError) as e:
print(f"Error reading language metadata: {e}")


class TestTotalLexemes(unittest.TestCase):
Expand Down Expand Up @@ -217,7 +225,9 @@ class TestCheckQidIsLanguage(unittest.TestCase):
def test_check_qid_is_language_valid(self, mock_get):
mock_response = MagicMock()
mock_response.json.return_value = {
"statements": {"P31": [{"value": {"content": "Q34770"}}]},
"statements": {
wikidata_qids_pids["instance_of"]: [{"value": {"content": "Q34770"}}]
},
"labels": {"en": "English"},
}
mock_get.return_value = mock_response
Expand All @@ -232,7 +242,9 @@ def test_check_qid_is_language_valid(self, mock_get):
def test_check_qid_is_language_invalid(self, mock_get):
mock_response = MagicMock()
mock_response.json.return_value = {
"statements": {"P31": [{"value": {"content": "Q5"}}]},
"statements": {
wikidata_qids_pids["instance_of"]: [{"value": {"content": "Q5"}}]
},
"labels": {"en": "Human"},
}
mock_get.return_value = mock_response
Expand Down

0 comments on commit 2fdd74b

Please sign in to comment.