Skip to content

Commit

Permalink
Add publication place to sentence context
Browse files Browse the repository at this point in the history
  • Loading branch information
mcollardanuy committed Jul 27, 2023
1 parent c3b4ea0 commit 4475547
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 47 deletions.
5 changes: 4 additions & 1 deletion experiments/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,10 @@ def linking_experiments(self) -> None:
self.mylinker.linking_resources["mentions_to_wikidata"],
)
if self.mylinker.rel_params["with_publication"]:
# If "publ", add an artificial publication entry:
# If "publ", add publication info to context and as new entry:
article_dataset = rel_utils.add_publication_in_context(
article_dataset
)
article_dataset = rel_utils.add_publication(article_dataset)
predicted = linking_model.predict(article_dataset)
if self.mylinker.rel_params["with_publication"]:
Expand Down
69 changes: 23 additions & 46 deletions resources/publication_metadata.json
Original file line number Diff line number Diff line change
@@ -1,162 +1,139 @@
{
"sn83030483": {
"publication_title": "Gazette of the United-States",
"publication_place": "New York",
"publication_ctxt": "New York",
"publication_place": "New York, New York",
"publication_dates": "1789-1793",
"wikidata_qid": "Q60"
},
"sn84026272": {
"publication_title": "Gazette of the United-States",
"publication_place": "Philadelphia",
"publication_ctxt": "Pennsylvania",
"publication_place": "Philadelphia, Pennsylvania",
"publication_dates": "1800-1801",
"wikidata_qid": "Q1345"
},
"sn82014385": {
"publication_title": "The Delaware gazette",
"publication_place": "Wilmington",
"publication_ctxt": "Delaware",
"publication_place": "Wilmington, Delaware",
"publication_dates": "1809-1810",
"wikidata_qid": "Q174224"
},
"sn83026170": {
"publication_title": "Alexandria Gazette",
"publication_place": "Alexandria",
"publication_ctxt": "Virginia",
"publication_place": "Alexandria, Virginia",
"publication_dates": "1817-1822",
"wikidata_qid": "Q88"
},
"sn83020874": {
"publication_title": "Cherokee Phoenix, and Indian's advocate",
"publication_place": "Echota",
"publication_ctxt": "Georgia",
"publication_place": "Echota, Georgia",
"publication_dates": "1829-1834",
"wikidata_qid": "Q7007061"
},
"sn84020750": {
"publication_title": "The North Carolinian",
"publication_place": "Fayetteville",
"publication_ctxt": "North Carolina",
"publication_place": "Fayetteville, North Carolina",
"publication_dates": "1839-1861",
"wikidata_qid": "Q331104"
},
"sn85042404": {
"publication_title": "Jamestown Alert",
"publication_place": "Jamestown",
"publication_ctxt": "North Dakota",
"publication_place": "Jamestown, North Dakota",
"publication_dates": "1878-1882",
"wikidata_qid": "Q1052658"
},
"sn88068010": {
"publication_title": "Chariton Courier",
"publication_place": "Keytesville",
"publication_ctxt": "Missouri",
"publication_place": "Keytesville, Missouri",
"publication_dates": "1878-current",
"wikidata_qid": "Q957297"
},
"sn86063397": {
"publication_title": "The Elk Mountain pilot",
"publication_place": "Irwin",
"publication_ctxt": "Colorado",
"publication_place": "Irwin, Colorado",
"publication_dates": "1880-19??",
"wikidata_qid": "Q592729"
},
"sn88085488": {
"publication_title": "Pullman Herald",
"publication_place": "Pullman",
"publication_ctxt": "Washington",
"publication_place": "Pullman, Washington",
"publication_dates": "1888-1989",
"wikidata_qid": "Q983540"
},
"sn89058133": {
"publication_title": "Putnam County Herald",
"publication_place": "Cookeville",
"publication_ctxt": "Tennessee",
"publication_place": "Cookeville, Tennessee",
"publication_dates": "1903-1922",
"wikidata_qid": "Q2456192"
},
"sn83025812": {
"publication_title": "The Independent",
"publication_place": "Elizabeth City",
"publication_ctxt": "North Carolina",
"publication_place": "Elizabeth City, North Carolina",
"publication_dates": "1908-1936",
"wikidata_qid": "Q1018467"
},
"sn92063852": {
"publication_title": "The Detroit Tribune",
"publication_place": "Detroit",
"publication_ctxt": "Michigan",
"publication_place": "Detroit, Michigan",
"publication_dates": "1935-1966",
"wikidata_qid": "Q12439"
},
"sn91068761": {
"publication_title": "Tabor City Tribune",
"publication_place": "Tabor City",
"publication_ctxt": "North Carolina",
"publication_place": "Tabor City, North Carolina",
"publication_dates": "1946-1991",
"wikidata_qid": "Q586130"
},
"0000408": {
"publication_title": "Dorset County Chronicle",
"publication_place": "Dorchester",
"publication_ctxt": "Dorset",
"publication_place": "Dorchester, Dorset",
"publication_dates": "1824-1884",
"wikidata_qid": "Q503331"
},
"0000206": {
"publication_title": "Manchester Courier and Lancashire General Advertiser.",
"publication_place": "Manchester",
"publication_ctxt": "Lancashire",
"publication_place": "Manchester, Lancashire",
"publication_dates": "1825-1916",
"wikidata_qid": "Q18125"
},
"0000968": {
"publication_title": "The Ashton Weekly Reporter, and Stalybridge and Dukinfield Chronicle",
"publication_place": "Ashton-under-Lyne",
"publication_ctxt": "Lancashire",
"publication_place": "Ashton-under-Lyne, Lancashire",
"publication_dates": "1855-",
"wikidata_qid": "Q659803"
},
"0000200": {
"publication_title": "The Manchester Mercury",
"publication_place": "Manchester",
"publication_ctxt": "Lancashire",
"publication_place": "Manchester, Lancashire",
"publication_dates": "1752-1830",
"wikidata_qid": "Q18125"
},
"0000201": {
"publication_title": "The Manchester Mercury",
"publication_place": "Manchester",
"publication_ctxt": "Lancashire",
"publication_place": "Manchester, Lancashire",
"publication_dates": "1752-1830",
"wikidata_qid": "Q18125"
},
"0000239": {
"publication_title": "The Manchester Mercury",
"publication_place": "Manchester",
"publication_ctxt": "Lancashire",
"publication_place": "Manchester, Lancashire",
"publication_dates": "1752-1830",
"wikidata_qid": "Q18125"
},
"0000240": {
"publication_title": "The Manchester Mercury",
"publication_place": "Manchester",
"publication_ctxt": "Lancashire",
"publication_place": "Manchester, Lancashire",
"publication_dates": "1752-1830",
"wikidata_qid": "Q18125"
},
"0000967": {
"publication_title": "Ashton and Stalybridge Reporter",
"publication_place": "Ashton-under-Lyne",
"publication_ctxt": "Lancashire",
"publication_place": "Ashton-under-Lyne, Lancashire",
"publication_dates": "1855-",
"wikidata_qid": "Q659803"
},
"0002325": {
"publication_title": "The Poole and South-Western Herald",
"publication_place": "Poole",
"publication_ctxt": "Dorset",
"publication_place": "Poole, Dorset",
"publication_dates": "1852-1889",
"wikidata_qid": "Q203349"
}
Expand Down
30 changes: 30 additions & 0 deletions utils/rel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,32 @@ def add_publication(
return new_json


def add_publication_in_context(rel_json: dict, publname: Optional[str] = "") -> dict:
"""
Add publication information to the provided JSON data as context.
Arguments:
rel_json (dict): The JSON data containing articles and mention
information.
publname (str, optional): The name of the publication. Defaults to an
empty string.
Returns:
dict: A new JSON dictionary with the added publication information.
"""
new_json = rel_json.copy()
for article in rel_json:
place = publname
if article != "linking":
place = rel_json[article][0].get("place", publname)
new_article = []
for art_mention in rel_json[article]:
art_mention["context"][1] += " " + place
new_article.append(art_mention)
new_json[article] = new_article
return new_json


def prepare_rel_trainset(
df: pd.DataFrame,
rel_params,
Expand Down Expand Up @@ -333,6 +359,10 @@ def prepare_rel_trainset(
# If "publ" is taken into account for the disambiguation, add the place
# of publication as an additional already disambiguated entity per row:
if rel_params["with_publication"] == True:
rel_json = add_publication_in_context(
rel_json,
rel_params["default_publname"],
)
rel_json = add_publication(
rel_json,
rel_params["default_publname"],
Expand Down

0 comments on commit 4475547

Please sign in to comment.