diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8a00955c..274f6c6e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -50,3 +50,7 @@ jobs:
         shell: bash
         run: |
           python -m poetry install
+
+      - name: Test with pytest
+        run: |
+          poetry run pytest -m "not deezy"
\ No newline at end of file
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index c4a08fc6..2273ff4f 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -49,7 +49,7 @@ jobs:
       - name: Install dependencies
         shell: bash
         run: |
-          python -m poetry install --extras docs
+          python -m poetry install --with docs
 
       - name: Build documentation
         run: |
diff --git a/.gitignore b/.gitignore
index 0eac6891..54182d4c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,18 +129,18 @@ dmypy.json
 .pyre/
 
 
-outputs/
-resources/
+/experiments/outputs/
+/resources/
 poetry.lock
 .vscode/*
-evaluation/results/*
-evaluation/CLEF-HIPE-2020-scorer/
-experiments/tmp_*
+/evaluation/results/*
+/evaluation/HIPE-scorer/
+/experiments/tmp_*
 preprocessing/toponymmatching/experiments/
-experiments/REL/
-evaluation/results_table.pkl
-experiments/explore_data.ipynb
-experiments/examine_res.py
+/experiments/REL/
+/evaluation/results_table.pkl
+/experiments/explore_data.ipynb
+/experiments/examine_res.py
 
 # Docs
 _build
diff --git a/README.md b/README.md
index 120cda15..802a575c 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,9 @@ T-Res relies on several resources in the following directory structure:
 
 ```
 T-Res/
+├── t-res/
+│   ├── geoparser/
+│   └── utils/
 ├── app/
 ├── evaluation/
 ├── examples/
@@ -38,11 +41,10 @@ T-Res/
 │               ├── linking_df_split.tsv [*?]
 │               ├── ner_fine_dev.json [*+?]
 │               └── ner_fine_train.json [*+?]
-├── geoparser/
 ├── resources/
 │   ├── deezymatch/
 │   │   └── data/
-│   │       └── w2v_ocr_pairs.txt [*+?]
+│   │       └── w2v_ocr_pairs.txt [?]
 │   ├── models/
 │   ├── news_datasets/
 │   ├── rel_db/
@@ -53,8 +55,7 @@ T-Res/
 │       ├── mentions_to_wikidata.json [*]
 │       ├── wikidta_gazetteer.csv [*]
 │       └── wikidata_to_mentions_normalized.json [*]
-├── tests/
-└── utils/
+└── tests/
 ```
 
 These resources are described in detail in the documentation. A question mark (`?`) is used to indicate resources which are only required for some approaches (for example, the `rel_db/embeddings_database.db` file is only required by the REL-based disambiguation approaches). Note that an asterisk (`*`) next to the resource means that the path can be changed when instantiating the T-Res objects, and a plus sign (`+`) if the name of the file can be changed in the instantiation.
@@ -68,7 +69,7 @@ This is an example on how to use the default T-Res pipeline:
 ```python
 from geoparser import pipeline
 
-geoparser = pipeline.Pipeline()
+geoparser = pipeline.Pipeline(resources_path="./resources")
 
 output = geoparser.run_text("She was on a visit at Chippenham.")
 ```
diff --git a/app/app_template.py b/app/app_template.py
index a034b275..92e435ae 100644
--- a/app/app_template.py
+++ b/app/app_template.py
@@ -8,18 +8,9 @@
 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 
-if "toponym-resolution" in __file__:
-    root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-else:
-    root_path = os.path.dirname(os.path.abspath(__file__))
-experiments_path = Path(root_path, "experiments")
-sys.path.insert(0, str(root_path))
-sys.path.insert(0, str(experiments_path))
-os.chdir(experiments_path)
-
 from config import CONFIG as pipeline_config
 
-from geoparser import pipeline
+from t_res.geoparser import pipeline
 
 geoparser = pipeline.Pipeline(**pipeline_config)
 
diff --git a/app/configs/t-res_deezy_reldisamb-wpubl-wmtops.py b/app/configs/t-res_deezy_reldisamb-wpubl-wmtops.py
index 574fc777..e4e6468f 100644
--- a/app/configs/t-res_deezy_reldisamb-wpubl-wmtops.py
+++ b/app/configs/t-res_deezy_reldisamb-wpubl-wmtops.py
@@ -1,29 +1,26 @@
-import os
-import sys
 import sqlite3
 from pathlib import Path
 
-# sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import pipeline, ranking, linking
+from t_res.geoparser import linking, pipeline, ranking
 
 # --------------------------------------
 # Instantiate the ranker:
 myranker = ranking.Ranker(
     method="deezymatch",
-    resources_path="../resources/wikidata/",
+    resources_path="./resources/",
     strvar_parameters={
         # Parameters to create the string pair dataset:
         "ocr_threshold": 60,
         "top_threshold": 85,
         "min_len": 5,
         "max_len": 15,
-        "w2v_ocr_path": str(Path("../resources/models/w2v/").resolve()),
+        "w2v_ocr_path": str(Path("./resources/models/w2v/").resolve()),
         "w2v_ocr_model": "w2v_*_news",
         "overwrite_dataset": False,
     },
     deezy_parameters={
         # Paths and filenames of DeezyMatch models and data:
-        "dm_path": str(Path("../resources/deezymatch/").resolve()),
+        "dm_path": str(Path("./resources/deezymatch/").resolve()),
         "dm_cands": "wkdtalts",
         "dm_model": "w2v_ocr",
         "dm_output": "deezymatch_on_the_fly",
@@ -38,15 +35,16 @@
     },
 )
 
-with sqlite3.connect("../resources/rel_db/embeddings_database.db") as conn:
+with sqlite3.connect("./resources/rel_db/embeddings_database.db") as conn:
     cursor = conn.cursor()
     mylinker = linking.Linker(
         method="reldisamb",
-        resources_path="../resources/",
+        resources_path="./resources/",
+        experiments_path="./experiments/",
         linking_resources=dict(),
         rel_params={
-            "model_path": "../resources/models/disambiguation/",
-            "data_path": "outputs/data/lwm/",
+            "model_path": "./resources/models/disambiguation/",
+            "data_path": "./experiments/outputs/data/lwm/",
             "training_split": "originalsplit",
             "db_embeddings": cursor,
             "with_publication": True,
diff --git a/app/run_local_app.py b/app/run_local_app.py
index b5ebeb33..9afd86b0 100755
--- a/app/run_local_app.py
+++ b/app/run_local_app.py
@@ -1,33 +1,23 @@
+import importlib
 import os
 import sys
 import time
 from pathlib import Path
-from typing import Union, Optional, List
+from typing import List, Optional, Union
 
 import uvicorn
 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 
-if "toponym-resolution" in __file__:
-    root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-else:
-    root_path = os.path.dirname(os.path.abspath(__file__))
-experiments_path = Path(root_path, "experiments")
-sys.path.insert(0, str(root_path))
-sys.path.insert(0, str(experiments_path))
-os.chdir(experiments_path)
+from t_res.geoparser import pipeline
 
 os.environ["APP_CONFIG_NAME"] = "t-res_deezy_reldisamb-wpubl-wmtops"
-import importlib
 
 config_mod = importlib.import_module(
     ".t-res_deezy_reldisamb-wpubl-wmtops", "app.configs"
 )
 pipeline_config = config_mod.CONFIG
 
-
-from geoparser import pipeline
-
 geoparser = pipeline.Pipeline(**pipeline_config)
 
 
diff --git a/app/template.Dockerfile b/app/template.Dockerfile
index 9bd4fc25..b1484973 100644
--- a/app/template.Dockerfile
+++ b/app/template.Dockerfile
@@ -4,10 +4,11 @@ ARG APP_NAME
 WORKDIR /app
 
 COPY pyproject.toml /app/pyproject.toml
+COPY t_res /app/t_res
 
 RUN pip3 install poetry
 RUN poetry config virtualenvs.create false
-RUN poetry install --no-dev
+RUN poetry install
 
 ENV APP_CONFIG_NAME=${APP_NAME}
 COPY app/app_template.py /app/app.py
diff --git a/docs/source/experiments/index.rst b/docs/source/experiments/index.rst
index b08d24f4..a1deead1 100644
--- a/docs/source/experiments/index.rst
+++ b/docs/source/experiments/index.rst
@@ -6,7 +6,7 @@ Follow these steps to reproduce the experiments in our paper.
 1. Obtain the external resources
 --------------------------------
 
-Follow the instructions in the ":doc:`resources`" page in the documentation
+Follow the instructions in the ":doc:`/getting-started/resources`" page in the documentation
 to obtain the resources required for running the experiments.
 
 2. Preparing the data
@@ -17,7 +17,7 @@ run the following command from the ``./experiments/`` folder:
 
 .. code-block:: bash
 
-    $ python ./prepare_data.py
+    $ python ./prepare_data.py -p ../resources
 
 This script takes care of downloading the LwM and HIPE datasets and format them
 as needed in the experiments.
@@ -30,7 +30,7 @@ folder:
 
 .. code-block:: bash
 
-    $ python ./toponym_resolution.py
+    $ python ./toponym_resolution.py -p ../resources
 
 This script does runs for all different scenarios reported in the experiments in
 the paper.
diff --git a/docs/source/getting-started/complete-tour.rst b/docs/source/getting-started/complete-tour.rst
index bee8401a..bf48f8df 100644
--- a/docs/source/getting-started/complete-tour.rst
+++ b/docs/source/getting-started/complete-tour.rst
@@ -47,7 +47,9 @@ To instantiate the default T-Res pipeline, do:
 
     from geoparser import pipeline
 
-    geoparser = pipeline.Pipeline()
+    geoparser = pipeline.Pipeline(resources_path="../resources/")
+
+.. note:: You should update the resources path argument to reflect your set up.
 
 You can also instantiate a pipeline using a customised Recogniser, Ranker and
 Linker. To see the different options, refer to the sections on instantiating
@@ -603,7 +605,7 @@ and ``levenshtein`` respectively), instantiate it as follows, changing the
 
     myranker = ranking.Ranker(
         method="perfectmatch", # or "partialmatch" or "levenshtein"
-        resources_path="resources/wikidata/",
+        resources_path="resources/",
     )
 
 Note that ``resources_path`` should contain the path to the directory
@@ -668,7 +670,7 @@ The Ranker can then be instantiated as follows:
     myranker = ranking.Ranker(
         # Generic Ranker parameters:
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path="resources/",
         # Parameters to create the string pair dataset:
         strvar_parameters=dict(),
         # Parameters to train, load and use a DeezyMatch model:
@@ -757,7 +759,7 @@ The Ranker can then be instantiated as follows:
     myranker = ranking.Ranker(
         # Generic Ranker parameters:
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path="resources/",
         # Parameters to create the string pair dataset:
         strvar_parameters={
             "ocr_threshold": 60,
diff --git a/docs/source/getting-started/resources.rst b/docs/source/getting-started/resources.rst
index 6b72addc..8745d4c8 100644
--- a/docs/source/getting-started/resources.rst
+++ b/docs/source/getting-started/resources.rst
@@ -561,6 +561,9 @@ for the mentioned resources that are required in order to run the pipeline.
 ::
 
     T-Res/
+    ├── t-res/
+    │   ├── geoparser/
+    │   └── utils/
     ├── app/
     ├── evaluation/
     ├── examples/
@@ -571,7 +574,6 @@ for the mentioned resources that are required in order to run the pipeline.
     │               ├── linking_df_split.tsv [*?]
     │               ├── ner_fine_dev.json [*+?]
     │               └── ner_fine_train.json [*+?]
-    ├── geoparser/
     ├── resources/
     │   ├── deezymatch/
     │   │   └── data/
@@ -586,8 +588,7 @@ for the mentioned resources that are required in order to run the pipeline.
     │       ├── mentions_to_wikidata.json [*]
     │       ├── wikidta_gazetteer.csv [*]
     │       └── wikidata_to_mentions_normalized.json [*]
-    ├── tests/
-    └── utils/
+    └── tests/
 
 A question mark (``?``) is used to indicate resources which are only required
 for some approaches (for example, the ``rel_db/embeddings_database.db`` file
diff --git a/docs/source/reference/geoparser/linker.rst b/docs/source/reference/geoparser/linker.rst
index 26ee9990..e6bb8091 100644
--- a/docs/source/reference/geoparser/linker.rst
+++ b/docs/source/reference/geoparser/linker.rst
@@ -1,8 +1,8 @@
-``geoparser.linking.Linker``
+``t_res.geoparser.linking.Linker``
 ============================
 
-.. autoclass:: geoparser.linking.Linker
+.. autoclass:: t_res.geoparser.linking.Linker
     :members:
     :undoc-members:
 
-.. autoattribute:: geoparser.linking.RANDOM_SEED
\ No newline at end of file
+.. autoattribute:: t_res.geoparser.linking.RANDOM_SEED
\ No newline at end of file
diff --git a/docs/source/reference/geoparser/pipeline.rst b/docs/source/reference/geoparser/pipeline.rst
index 392610ae..95e68b45 100644
--- a/docs/source/reference/geoparser/pipeline.rst
+++ b/docs/source/reference/geoparser/pipeline.rst
@@ -1,6 +1,6 @@
-``geoparser.pipeline.Pipeline``
+``t_res.geoparser.pipeline.Pipeline``
 ===============================
 
-.. autoclass:: geoparser.pipeline.Pipeline
+.. autoclass:: t_res.geoparser.pipeline.Pipeline
     :members:
     :undoc-members:
diff --git a/docs/source/reference/geoparser/ranker.rst b/docs/source/reference/geoparser/ranker.rst
index 659f928a..c31dd884 100644
--- a/docs/source/reference/geoparser/ranker.rst
+++ b/docs/source/reference/geoparser/ranker.rst
@@ -1,6 +1,6 @@
-``geoparser.ranking. Ranker``
+``t_res.geoparser.ranking. Ranker``
 =============================
 
-.. autoclass:: geoparser.ranking.Ranker
+.. autoclass:: t_res.geoparser.ranking.Ranker
     :members:
     :undoc-members:
diff --git a/docs/source/reference/geoparser/recogniser.rst b/docs/source/reference/geoparser/recogniser.rst
index 5b4543ca..d437b140 100644
--- a/docs/source/reference/geoparser/recogniser.rst
+++ b/docs/source/reference/geoparser/recogniser.rst
@@ -1,6 +1,6 @@
-``geoparser.recogniser.Recogniser``
+``t_res.geoparser.recogniser.Recogniser``
 ===================================
 
-.. autoclass:: geoparser.recogniser.Recogniser
+.. autoclass:: t_res.geoparser.recogniser.Recogniser
     :members:
     :undoc-members:
diff --git a/docs/source/reference/utils/deezy_processing.rst b/docs/source/reference/utils/deezy_processing.rst
index 6f9ae76f..aa80e247 100644
--- a/docs/source/reference/utils/deezy_processing.rst
+++ b/docs/source/reference/utils/deezy_processing.rst
@@ -1,10 +1,10 @@
-``utils.deezy_processing`` module
+``t_res.utils.deezy_processing`` module
 =================================
 
-.. autofunction:: utils.deezy_processing.obtain_matches
+.. autofunction:: t_res.utils.deezy_processing.obtain_matches
 
-.. autofunction:: utils.deezy_processing.create_training_set
+.. autofunction:: t_res.utils.deezy_processing.create_training_set
 
-.. autofunction:: utils.deezy_processing.train_deezy_model
+.. autofunction:: t_res.utils.deezy_processing.train_deezy_model
 
-.. autofunction:: utils.deezy_processing.generate_candidates
\ No newline at end of file
+.. autofunction:: t_res.utils.deezy_processing.generate_candidates
\ No newline at end of file
diff --git a/docs/source/reference/utils/get_data.rst b/docs/source/reference/utils/get_data.rst
index f3edecb1..c3016cf1 100644
--- a/docs/source/reference/utils/get_data.rst
+++ b/docs/source/reference/utils/get_data.rst
@@ -1,6 +1,6 @@
-``utils.get_data`` module
+``t_res.utils.get_data`` module
 =========================
 
-.. autofunction:: utils.get_data.download_lwm_data
+.. autofunction:: t_res.utils.get_data.download_lwm_data
 
-.. autofunction:: utils.get_data.download_hipe_data
\ No newline at end of file
+.. autofunction:: t_res.utils.get_data.download_hipe_data
\ No newline at end of file
diff --git a/docs/source/reference/utils/ner.rst b/docs/source/reference/utils/ner.rst
index 363f5484..d8d3dc0b 100644
--- a/docs/source/reference/utils/ner.rst
+++ b/docs/source/reference/utils/ner.rst
@@ -1,18 +1,18 @@
-``utils.ner`` module
+``t_res.utils.ner`` module
 ====================
 
-.. autofunction:: utils.ner.training_tokenize_and_align_labels
+.. autofunction:: t_res.utils.ner.training_tokenize_and_align_labels
 
-.. autofunction:: utils.ner.collect_named_entities
+.. autofunction:: t_res.utils.ner.collect_named_entities
 
-.. autofunction:: utils.ner.aggregate_mentions
+.. autofunction:: t_res.utils.ner.aggregate_mentions
 
-.. autofunction:: utils.ner.fix_capitalization
+.. autofunction:: t_res.utils.ner.fix_capitalization
 
-.. autofunction:: utils.ner.fix_hyphens
+.. autofunction:: t_res.utils.ner.fix_hyphens
 
-.. autofunction:: utils.ner.fix_nested
+.. autofunction:: t_res.utils.ner.fix_nested
 
-.. autofunction:: utils.ner.fix_startEntity
+.. autofunction:: t_res.utils.ner.fix_startEntity
 
-.. autofunction:: utils.ner.aggregate_entities
\ No newline at end of file
+.. autofunction:: t_res.utils.ner.aggregate_entities
\ No newline at end of file
diff --git a/docs/source/reference/utils/preprocess_data.rst b/docs/source/reference/utils/preprocess_data.rst
index 938773a5..73c73d8b 100644
--- a/docs/source/reference/utils/preprocess_data.rst
+++ b/docs/source/reference/utils/preprocess_data.rst
@@ -1,20 +1,20 @@
-``utils.preprocess_data`` module
+``t_res.utils.preprocess_data`` module
 ================================
 
-.. automodule:: utils.preprocess_data
+.. automodule:: t_res.utils.preprocess_data
 
-.. autofunction:: utils.preprocess_data.turn_wikipedia2wikidata
+.. autofunction:: t_res.utils.preprocess_data.turn_wikipedia2wikidata
 
-.. autofunction:: utils.preprocess_data.reconstruct_sentences
+.. autofunction:: t_res.utils.preprocess_data.reconstruct_sentences
 
-.. autofunction:: utils.preprocess_data.process_lwm_for_ner
+.. autofunction:: t_res.utils.preprocess_data.process_lwm_for_ner
 
-.. autofunction:: utils.preprocess_data.process_lwm_for_linking
+.. autofunction:: t_res.utils.preprocess_data.process_lwm_for_linking
 
-.. autofunction:: utils.preprocess_data.aggregate_hipe_entities
+.. autofunction:: t_res.utils.preprocess_data.aggregate_hipe_entities
 
-.. autofunction:: utils.preprocess_data.process_hipe_for_linking
+.. autofunction:: t_res.utils.preprocess_data.process_hipe_for_linking
 
-.. autofunction:: utils.preprocess_data.process_tsv
+.. autofunction:: t_res.utils.preprocess_data.process_tsv
 
-.. autofunction:: utils.preprocess_data.fine_to_coarse
\ No newline at end of file
+.. autofunction:: t_res.utils.preprocess_data.fine_to_coarse
\ No newline at end of file
diff --git a/docs/source/reference/utils/process_data.rst b/docs/source/reference/utils/process_data.rst
index 1f5f2066..25798b04 100644
--- a/docs/source/reference/utils/process_data.rst
+++ b/docs/source/reference/utils/process_data.rst
@@ -1,20 +1,20 @@
-``utils.process_data`` module
+``t_res.utils.process_data`` module
 =============================
 
-.. autofunction:: utils.process_data.eval_with_exception
+.. autofunction:: t_res.utils.process_data.eval_with_exception
 
-.. autofunction:: utils.process_data.prepare_sents
+.. autofunction:: t_res.utils.process_data.prepare_sents
 
-.. autofunction:: utils.process_data.align_gold
+.. autofunction:: t_res.utils.process_data.align_gold
 
-.. autofunction:: utils.process_data.postprocess_predictions
+.. autofunction:: t_res.utils.process_data.postprocess_predictions
 
-.. autofunction:: utils.process_data.ner_and_process
+.. autofunction:: t_res.utils.process_data.ner_and_process
 
-.. autofunction:: utils.process_data.update_with_linking
+.. autofunction:: t_res.utils.process_data.update_with_linking
 
-.. autofunction:: utils.process_data.update_with_skyline
+.. autofunction:: t_res.utils.process_data.update_with_skyline
 
-.. autofunction:: utils.process_data.prepare_storing_links
+.. autofunction:: t_res.utils.process_data.prepare_storing_links
 
-.. autofunction:: utils.process_data.store_for_scorer
+.. autofunction:: t_res.utils.process_data.store_for_scorer
diff --git a/docs/source/reference/utils/process_wikipedia.rst b/docs/source/reference/utils/process_wikipedia.rst
index 69f7e686..807ef9ee 100644
--- a/docs/source/reference/utils/process_wikipedia.rst
+++ b/docs/source/reference/utils/process_wikipedia.rst
@@ -1,8 +1,8 @@
-``utils.process_wikipedia`` module
+``t_res.utils.process_wikipedia`` module
 ==================================
 
-.. autofunction:: utils.process_wikipedia.make_wikilinks_consistent
+.. autofunction:: t_res.utils.process_wikipedia.make_wikilinks_consistent
 
-.. autofunction:: utils.process_wikipedia.make_wikipedia2wikidata_consisent
+.. autofunction:: t_res.utils.process_wikipedia.make_wikipedia2wikidata_consisent
 
-.. autofunction:: utils.process_wikipedia.title_to_id
\ No newline at end of file
+.. autofunction:: t_res.utils.process_wikipedia.title_to_id
\ No newline at end of file
diff --git a/docs/source/reference/utils/rel/entity_disambiguation.rst b/docs/source/reference/utils/rel/entity_disambiguation.rst
index 1ace598e..9a690635 100644
--- a/docs/source/reference/utils/rel/entity_disambiguation.rst
+++ b/docs/source/reference/utils/rel/entity_disambiguation.rst
@@ -1,8 +1,8 @@
-``utils.REL.entity_disambiguation`` module
+``t_res.utils.REL.entity_disambiguation`` module
 ==========================================
 
-.. autoclass:: utils.REL.entity_disambiguation.EntityDisambiguation
+.. autoclass:: t_res.utils.REL.entity_disambiguation.EntityDisambiguation
     :members:
     :undoc-members:
 
-.. autoattribute:: utils.REL.entity_disambiguation.RANDOM_SEED
\ No newline at end of file
+.. autoattribute:: t_res.utils.REL.entity_disambiguation.RANDOM_SEED
\ No newline at end of file
diff --git a/docs/source/reference/utils/rel/mulrel_ranker.rst b/docs/source/reference/utils/rel/mulrel_ranker.rst
index 7e4e77ea..a7352632 100644
--- a/docs/source/reference/utils/rel/mulrel_ranker.rst
+++ b/docs/source/reference/utils/rel/mulrel_ranker.rst
@@ -1,10 +1,10 @@
-``utils.REL.mulrel_ranker`` module
+``t_res.utils.REL.mulrel_ranker`` module
 ==================================
 
-.. autoclass:: utils.REL.mulrel_ranker.PreRank
+.. autoclass:: t_res.utils.REL.mulrel_ranker.PreRank
     :members:
     :undoc-members:
 
-.. autoclass:: utils.REL.mulrel_ranker.MulRelRanker
+.. autoclass:: t_res.utils.REL.mulrel_ranker.MulRelRanker
     :members:
     :undoc-members:
diff --git a/docs/source/reference/utils/rel/utils.rst b/docs/source/reference/utils/rel/utils.rst
index 74641788..1597f964 100644
--- a/docs/source/reference/utils/rel/utils.rst
+++ b/docs/source/reference/utils/rel/utils.rst
@@ -1,10 +1,10 @@
-``utils.REL.utils`` module
+``t_res.utils.REL.t_res.utils`` module
 ==========================
 
-.. autofunction:: utils.REL.utils.flatten_list_of_lists
+.. autofunction:: t_res.utils.REL.t_res.utils.flatten_list_of_lists
 
-.. autofunction:: utils.REL.utils.make_equal_len
+.. autofunction:: t_res.utils.REL.t_res.utils.make_equal_len
 
-.. autofunction:: utils.REL.utils.is_important_word
+.. autofunction:: t_res.utils.REL.t_res.utils.is_important_word
 
-.. autoattribute:: utils.REL.utils.STOPWORDS
\ No newline at end of file
+.. autoattribute:: t_res.utils.REL.t_res.utils.STOPWORDS
\ No newline at end of file
diff --git a/docs/source/reference/utils/rel/vocabulary.rst b/docs/source/reference/utils/rel/vocabulary.rst
index 5ab8da92..3516423d 100644
--- a/docs/source/reference/utils/rel/vocabulary.rst
+++ b/docs/source/reference/utils/rel/vocabulary.rst
@@ -1,6 +1,6 @@
-``utils.REL.vocabulary`` module
+``t_res.utils.REL.vocabulary`` module
 ===============================
 
-.. autoclass:: utils.REL.vocabulary.Vocabulary
+.. autoclass:: t_res.utils.REL.vocabulary.Vocabulary
     :members:
     :undoc-members:
diff --git a/docs/source/reference/utils/rel_e2e.rst b/docs/source/reference/utils/rel_e2e.rst
index 7145d30c..64c3130a 100644
--- a/docs/source/reference/utils/rel_e2e.rst
+++ b/docs/source/reference/utils/rel_e2e.rst
@@ -1,16 +1,16 @@
-``utils.rel_e2e`` module
+``t_res.utils.rel_e2e`` module
 ========================
 
-.. autofunction:: utils.rel_e2e.rel_end_to_end
+.. autofunction:: t_res.utils.rel_e2e.rel_end_to_end
 
-.. autofunction:: utils.rel_e2e.get_rel_from_api
+.. autofunction:: t_res.utils.rel_e2e.get_rel_from_api
 
-.. autofunction:: utils.rel_e2e.match_wikipedia_to_wikidata
+.. autofunction:: t_res.utils.rel_e2e.match_wikipedia_to_wikidata
 
-.. autofunction:: utils.rel_e2e.match_ent
+.. autofunction:: t_res.utils.rel_e2e.match_ent
 
-.. autofunction:: utils.rel_e2e.postprocess_rel
+.. autofunction:: t_res.utils.rel_e2e.postprocess_rel
 
-.. autofunction:: utils.rel_e2e.store_rel
+.. autofunction:: t_res.utils.rel_e2e.store_rel
 
-.. autofunction:: utils.rel_e2e.run_rel_experiments
\ No newline at end of file
+.. autofunction:: t_res.utils.rel_e2e.run_rel_experiments
\ No newline at end of file
diff --git a/docs/source/reference/utils/rel_utils.rst b/docs/source/reference/utils/rel_utils.rst
index d3fb3638..0ce4cb52 100644
--- a/docs/source/reference/utils/rel_utils.rst
+++ b/docs/source/reference/utils/rel_utils.rst
@@ -1,14 +1,14 @@
-``utils.rel_utils`` module
+``t_res.utils.rel_utils`` module
 ==========================
 
-.. autofunction:: utils.rel_utils.get_db_emb
+.. autofunction:: t_res.utils.rel_utils.get_db_emb
 
-.. autofunction:: utils.rel_utils.eval_with_exception
+.. autofunction:: t_res.utils.rel_utils.eval_with_exception
 
-.. autofunction:: utils.rel_utils.prepare_initial_data
+.. autofunction:: t_res.utils.rel_utils.prepare_initial_data
 
-.. autofunction:: utils.rel_utils.rank_candidates
+.. autofunction:: t_res.utils.rel_utils.rank_candidates
 
-.. autofunction:: utils.rel_utils.add_publication
+.. autofunction:: t_res.utils.rel_utils.add_publication
 
-.. autofunction:: utils.rel_utils.prepare_rel_trainset
\ No newline at end of file
+.. autofunction:: t_res.utils.rel_utils.prepare_rel_trainset
\ No newline at end of file
diff --git a/evaluation/README.md b/evaluation/README.md
index d4f44b2c..ad1f4b19 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -4,7 +4,7 @@ First, clone the [CLEF-HIPE-2020-scorer](https://github.com/impresso/CLEF-HIPE-2
 
 ```
 git clone https://github.com/impresso/CLEF-HIPE-2020-scorer.git
-cd CLEF-HIPE-2020-scorer
+cd HIPE-scorer
 git checkout ac5c876eba58065195024cff550c2b5056986f7b
 ```
 
@@ -12,10 +12,10 @@ Then, to run the script:
 
 To assess the performance on toponym recognition:
 ```bash
-python CLEF-HIPE-2020-scorer/clef_evaluation.py --ref ../experiments/outputs/results/lwm-true_bundle2_en_1.tsv --pred ../experiments/outputs/results/lwm-pred_bundle2_en_1.tsv --task nerc_coarse --outdir results/
+python HIPE-scorer/clef_evaluation.py --ref ../experiments/outputs/results/lwm-true_bundle2_en_1.tsv --pred ../experiments/outputs/results/lwm-pred_bundle2_en_1.tsv --task nerc_coarse --outdir results/
 ```
 
 To assess the performance on toponym resolution:
 ```bash
-python CLEF-HIPE-2020-scorer/clef_evaluation.py --ref ../experiments/outputs/results/lwm-true_bundle2_en_1.tsv --pred ../experiments/outputs/results/lwm-pred_bundle2_en_1.tsv --task nel --outdir results/
+python HIPE-scorer/clef_evaluation.py --ref ../experiments/outputs/results/lwm-true_bundle2_en_1.tsv --pred ../experiments/outputs/results/lwm-pred_bundle2_en_1.tsv --task nel --outdir results/
 ```
diff --git a/evaluation/display_results.py b/evaluation/display_results.py
index e42718cd..67e32af0 100644
--- a/evaluation/display_results.py
+++ b/evaluation/display_results.py
@@ -9,8 +9,8 @@
     "ignore", category=FutureWarning
 )  # To fix properly in the future
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath("HIPE-scorer/"))
+# Add "./HIPE-scorer" to path to import HIPE-scorer
+sys.path.insert(0, os.path.abspath("./HIPE-scorer/"))
 import clef_evaluation
 
 dApprNames = dict()
diff --git a/examples/load_use_ner_model.ipynb b/examples/load_use_ner_model.ipynb
index 6890cfd9..c4be7f47 100644
--- a/examples/load_use_ner_model.ipynb
+++ b/examples/load_use_ner_model.ipynb
@@ -21,8 +21,8 @@
     "import os\n",
     "import sys\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import recogniser"
+    "\n",
+    "from t_res.geoparser import recogniser"
    ]
   },
   {
@@ -135,7 +135,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/run_pipeline_basic.ipynb b/examples/run_pipeline_basic.ipynb
index 17417872..aa8aba10 100644
--- a/examples/run_pipeline_basic.ipynb
+++ b/examples/run_pipeline_basic.ipynb
@@ -19,8 +19,7 @@
     "import os\n",
     "import sys\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline"
+    "from t_res.geoparser import pipeline"
    ]
   },
   {
@@ -37,7 +36,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "geoparser = pipeline.Pipeline()"
+    "geoparser = pipeline.Pipeline(resources_path=\"../resources/\")"
    ]
   },
   {
@@ -134,6 +133,20 @@
     "disamb_output = geoparser.run_disambiguation(mentions, candidates)\n",
     "print(disamb_output)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -152,7 +165,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/run_pipeline_deezy_mostpopular.ipynb b/examples/run_pipeline_deezy_mostpopular.ipynb
index 8392dd90..9129b8b3 100644
--- a/examples/run_pipeline_deezy_mostpopular.ipynb
+++ b/examples/run_pipeline_deezy_mostpopular.ipynb
@@ -17,8 +17,8 @@
     "import os\n",
     "import sys\n",
     "from pathlib import Path\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "\n",
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -31,7 +31,7 @@
     "# Instantiate the ranker:\n",
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     "    strvar_parameters={\n",
     "        # Parameters to create the string pair dataset:\n",
     "        \"ocr_threshold\": 60,\n",
@@ -91,6 +91,13 @@
     "for r in resolved:\n",
     "    print(r)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -109,7 +116,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/run_pipeline_deezy_reldisamb+wmtops.ipynb b/examples/run_pipeline_deezy_reldisamb+wmtops.ipynb
index b79ec83d..8f89e400 100644
--- a/examples/run_pipeline_deezy_reldisamb+wmtops.ipynb
+++ b/examples/run_pipeline_deezy_reldisamb+wmtops.ipynb
@@ -20,8 +20,8 @@
     "import sys\n",
     "import sqlite3\n",
     "from pathlib import Path\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "\n",
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -34,7 +34,7 @@
     "# Instantiate the ranker:\n",
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     "    strvar_parameters=dict(),\n",
     "    deezy_parameters={\n",
     "        # Paths and filenames of DeezyMatch models and data:\n",
diff --git a/examples/run_pipeline_deezy_reldisamb+wpubl+wmtops.ipynb b/examples/run_pipeline_deezy_reldisamb+wpubl+wmtops.ipynb
index f7b9ec99..6e74593f 100644
--- a/examples/run_pipeline_deezy_reldisamb+wpubl+wmtops.ipynb
+++ b/examples/run_pipeline_deezy_reldisamb+wpubl+wmtops.ipynb
@@ -20,8 +20,8 @@
     "import sys\n",
     "import sqlite3\n",
     "from pathlib import Path\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "\n",
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -34,7 +34,7 @@
     "# Instantiate the ranker:\n",
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     "    strvar_parameters={\n",
     "        # Parameters to create the string pair dataset:\n",
     "        \"ocr_threshold\": 60,\n",
diff --git a/examples/run_pipeline_deezy_reldisamb+wpubl.ipynb b/examples/run_pipeline_deezy_reldisamb+wpubl.ipynb
index 3375ce41..688a81de 100644
--- a/examples/run_pipeline_deezy_reldisamb+wpubl.ipynb
+++ b/examples/run_pipeline_deezy_reldisamb+wpubl.ipynb
@@ -20,8 +20,8 @@
     "import sys\n",
     "import sqlite3\n",
     "from pathlib import Path\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "\n",
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -34,7 +34,7 @@
     "# Instantiate the ranker:\n",
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     "    strvar_parameters=dict(),\n",
     "    deezy_parameters={\n",
     "        # Paths and filenames of DeezyMatch models and data:\n",
diff --git a/examples/run_pipeline_deezy_reldisamb.ipynb b/examples/run_pipeline_deezy_reldisamb.ipynb
index 4d7bf262..445c1a7d 100644
--- a/examples/run_pipeline_deezy_reldisamb.ipynb
+++ b/examples/run_pipeline_deezy_reldisamb.ipynb
@@ -20,8 +20,8 @@
     "import sys\n",
     "import sqlite3\n",
     "from pathlib import Path\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "\n",
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -34,7 +34,7 @@
     "# Instantiate the ranker:\n",
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     "    mentions_to_wikidata=dict(),\n",
     "    wikidata_to_mentions=dict(),\n",
     "    strvar_parameters={\n",
@@ -125,6 +125,13 @@
     "for r in resolved:\n",
     "    print(r)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -143,7 +150,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/run_pipeline_modular.ipynb b/examples/run_pipeline_modular.ipynb
index 091422b9..40e5aac1 100644
--- a/examples/run_pipeline_modular.ipynb
+++ b/examples/run_pipeline_modular.ipynb
@@ -10,8 +10,8 @@
     "import sys\n",
     "import sqlite3\n",
     "from pathlib import Path\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "\n",
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -24,7 +24,7 @@
     "# Instantiate the ranker:\n",
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     "    strvar_parameters={\n",
     "        # Parameters to create the string pair dataset:\n",
     "        \"ocr_threshold\": 60,\n",
@@ -135,6 +135,13 @@
    "source": [
     "output_disamb"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -153,7 +160,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/run_pipeline_perfect_mostpopular.ipynb b/examples/run_pipeline_perfect_mostpopular.ipynb
index 7f6cae50..4a11aa63 100644
--- a/examples/run_pipeline_perfect_mostpopular.ipynb
+++ b/examples/run_pipeline_perfect_mostpopular.ipynb
@@ -17,8 +17,7 @@
     "import os\n",
     "import sys\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import pipeline, ranking, linking"
+    "from t_res.geoparser import pipeline, ranking, linking"
    ]
   },
   {
@@ -29,7 +28,7 @@
    "source": [
     "myranker = ranking.Ranker(\n",
     "    method=\"perfectmatch\",\n",
-    "    resources_path=\"../resources/wikidata/\",\n",
+    "    resources_path=\"../resources/\",\n",
     ")\n"
    ]
   },
@@ -64,6 +63,13 @@
     "for r in resolved:\n",
     "    print(r)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -82,7 +88,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/train_use_deezy_model_1.ipynb b/examples/train_use_deezy_model_1.ipynb
index e2ce98f9..f379360a 100644
--- a/examples/train_use_deezy_model_1.ipynb
+++ b/examples/train_use_deezy_model_1.ipynb
@@ -52,8 +52,7 @@
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import ranking"
+    "from t_res.geoparser import ranking"
    ]
   },
   {
@@ -72,7 +71,7 @@
    "source": [
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\", # Here we're telling the ranker to use DeezyMatch.\n",
-    "    resources_path=\"../resources/wikidata/\", # Here, the path to the Wikidata resources.\n",
+    "    resources_path=\"../resources/\", # Here, the path to the Wikidata resources.\n",
     "    # Parameters to create the string pair dataset:\n",
     "    strvar_parameters={\n",
     "        \"ocr_threshold\": 60,\n",
@@ -154,8 +153,26 @@
    "source": [
     "# Find candidates given a toponym:\n",
     "toponym = \"Manchefter\"\n",
-    "print(myranker.find_candidates([{\"mention\": toponym}])[0][toponym])"
+    "print(myranker.find_candidates([{\"mention\": toponym}])[toponym])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find candidates given a toponym:\n",
+    "toponym = \"Londen\"\n",
+    "print(myranker.find_candidates([{\"mention\": toponym}])[toponym])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -174,7 +191,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/train_use_deezy_model_2.ipynb b/examples/train_use_deezy_model_2.ipynb
index 5045329e..4214457f 100644
--- a/examples/train_use_deezy_model_2.ipynb
+++ b/examples/train_use_deezy_model_2.ipynb
@@ -47,8 +47,7 @@
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import ranking"
+    "from t_res.geoparser import ranking"
    ]
   },
   {
@@ -67,7 +66,7 @@
    "source": [
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\", # Here we're telling the ranker to use DeezyMatch.\n",
-    "    resources_path=\"../resources/wikidata/\", # Here, the path to the Wikidata resources.\n",
+    "    resources_path=\"../resources/\", # Here, the path to the Wikidata resources.\n",
     "    # Parameters to create the string pair dataset:\n",
     "    strvar_parameters={\n",
     "        \"overwrite_dataset\": False,\n",
@@ -124,7 +123,7 @@
    "outputs": [],
    "source": [
     "# Train a DeezyMatch model if needed:\n",
-    "myranker.train()"
+    "myranker.mentions_to_wikidata = myranker.train()"
    ]
   },
   {
@@ -143,8 +142,26 @@
    "source": [
     "# Find candidates given a toponym:\n",
     "toponym = \"Manchefter\"\n",
-    "print(myranker.find_candidates([{\"mention\": toponym}])[0][toponym])"
+    "print(myranker.find_candidates([{\"mention\": toponym}])[toponym])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find candidates given a toponym:\n",
+    "toponym = \"Londen\"\n",
+    "print(myranker.find_candidates([{\"mention\": toponym}])[toponym])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -163,7 +180,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/train_use_deezy_model_3.ipynb b/examples/train_use_deezy_model_3.ipynb
index 3b9a0c35..28aa9f78 100644
--- a/examples/train_use_deezy_model_3.ipynb
+++ b/examples/train_use_deezy_model_3.ipynb
@@ -49,8 +49,7 @@
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import ranking"
+    "from t_res.geoparser import ranking"
    ]
   },
   {
@@ -69,7 +68,7 @@
    "source": [
     "myranker = ranking.Ranker(\n",
     "    method=\"deezymatch\", # Here we're telling the ranker to use DeezyMatch.\n",
-    "    resources_path=\"../resources/wikidata/\", # Here, the path to the Wikidata resources.\n",
+    "    resources_path=\"../resources/\", # Here, the path to the Wikidata resources.\n",
     "    # Parameters to create the string pair dataset:\n",
     "    strvar_parameters={\n",
     "        \"overwrite_dataset\": False,\n",
@@ -127,7 +126,7 @@
    "source": [
     "# Find candidates given a toponym:\n",
     "toponym = \"Ashton-cnderLyne\"\n",
-    "print(myranker.find_candidates([{\"mention\": toponym}])[0][toponym])"
+    "print(myranker.find_candidates([{\"mention\": toponym}])[toponym])"
    ]
   },
   {
@@ -138,8 +137,15 @@
    "source": [
     "# Find candidates given a toponym:\n",
     "toponym = \"Shefiield\"\n",
-    "print(myranker.find_candidates([{\"mention\": toponym}])[0][toponym])"
+    "print(myranker.find_candidates([{\"mention\": toponym}])[toponym])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -158,7 +164,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/examples/train_use_ner_model.ipynb b/examples/train_use_ner_model.ipynb
index c0e3542e..aa54766f 100644
--- a/examples/train_use_ner_model.ipynb
+++ b/examples/train_use_ner_model.ipynb
@@ -21,8 +21,7 @@
     "import os\n",
     "import sys\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.pardir))\n",
-    "from geoparser import recogniser"
+    "from t_res.geoparser import recogniser"
    ]
   },
   {
@@ -138,6 +137,13 @@
     "predictions = myner.ner_predict(sentence)\n",
     "print([pred for pred in predictions if pred[\"entity\"] != \"O\"]) # Note that, if you've trained the model in the test mode, the model will probably not identify \"Sheffield\" as a location."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -156,7 +162,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.17"
   },
   "orig_nbformat": 4
  },
diff --git a/experiments/README.md b/experiments/README.md
index 1195cc03..bc7a3111 100644
--- a/experiments/README.md
+++ b/experiments/README.md
@@ -20,16 +20,22 @@ You will also need the [word2vec embeddings](TODO: add link) trained from 19th C
 
 To create the datasets that we use in the experiments presented in the paper, run the following command:
 ```bash
-python prepare_data.py
+python prepare_data.py -p ../resources
 ```
+
+> **_NOTE:_** Use the ``-p`` flag to indicate the path to your resources directory.
+
 This script takes care of downloading the LwM and HIPE datasets and format them as needed in the experiments.
 
 ### 3. Running the experiments
 
 To run the experiments, run the following script:
 ```bash
-python toponym_resolution.py
+python toponym_resolution.py -p ../resources
 ```
+
+> **_NOTE:_** Use the ``-p`` flag to indicate the path to your resources directory.
+
 This script does runs for all different scenarios reported in the experiments in the paper.
 
 ### 4. Evaluate
diff --git a/geoparser/__init__.py b/experiments/__init__.py
similarity index 100%
rename from geoparser/__init__.py
rename to experiments/__init__.py
diff --git a/experiments/experiment.py b/experiments/experiment.py
index 2e758586..1ab1412d 100644
--- a/experiments/experiment.py
+++ b/experiments/experiment.py
@@ -7,9 +7,8 @@
 import pandas as pd
 from tqdm import tqdm
 
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import linking, ranking, recogniser
-from utils import process_data, rel_utils
+from t_res.geoparser import linking, ranking, recogniser
+from t_res.utils import process_data, rel_utils
 
 
 class Experiment:
@@ -467,10 +466,12 @@ def create_mentions_df(self) -> pd.DataFrame:
             data=rows,
         )
 
+        print(f"Saving to {os.path.join(self.data_path,self.dataset,f'{self.myner.model}_{cand_approach}')}")
         output_path = (
-            self.data_path + self.dataset + "/" + self.myner.model + "_" + cand_approach
+            os.path.join(self.data_path,self.dataset,f"{self.myner.model}_{cand_approach}")
         )
 
+
         # List of columns to merge (i.e. columns where we have indicated
         # out data splits), and "article_id", the columns on which we
         # will merge the data:
@@ -808,6 +809,6 @@ def linking_experiments(self) -> None:
         # -----------------------------------------------
         # Run end-to-end REL experiments:
         if self.rel_experiments == True:
-            from utils import rel_e2e
+            from t_res.utils import rel_e2e
 
             rel_e2e.run_rel_experiments(self)
diff --git a/experiments/prepare_data.py b/experiments/prepare_data.py
index c84cbe83..89ff300a 100644
--- a/experiments/prepare_data.py
+++ b/experiments/prepare_data.py
@@ -1,24 +1,36 @@
-import os
-import sys
-
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
 import json
 import os
 import random
+import sys
+from argparse import ArgumentParser
 from pathlib import Path
 
 import pandas as pd
 from sklearn.model_selection import train_test_split
 
-from utils import get_data, preprocess_data
+from t_res.utils import get_data, preprocess_data
 
 RANDOM_SEED = 42
 random.seed(RANDOM_SEED)
 
-resources = "../resources/"  # path to resources
-output_path_lwm = "../experiments/outputs/data/lwm/"
-output_path_hipe = "../experiments/outputs/data/hipe/"
+parser = ArgumentParser()
+parser.add_argument(
+    "-p",
+    "--path",
+    dest="path",
+    help="path to resources directory",
+    action="store",
+    type=str,
+)
+
+args = parser.parse_args()
+
+resources_dir = args.path
+
+current_dir = Path(__file__).parent.resolve()
+output_path_lwm = os.path.join(current_dir, "outputs/data/lwm/")
+output_path_hipe = os.path.join(current_dir, "outputs/data/hipe/")
+
 # Create output folders for processed data if they do not exist:
 Path(output_path_lwm).mkdir(parents=True, exist_ok=True)
 Path(output_path_hipe).mkdir(parents=True, exist_ok=True)
@@ -32,7 +44,7 @@
 gazetteer_ids = set(
     list(
         pd.read_csv(
-            os.path.join(resources, "wikidata", "wikidata_gazetteer.csv"),
+            os.path.join(resources_dir, "wikidata", "wikidata_gazetteer.csv"),
             low_memory=False,
         )["wikidata_id"].unique()
     )
@@ -44,7 +56,7 @@
 # ------------------------------------------------------
 
 # Load publication metadata
-with open(os.path.join(f"{resources}", "publication_metadata.json")) as jsonfile:
+with open(os.path.join(resources_dir, "publication_metadata.json")) as jsonfile:
     df_metadata = json.load(jsonfile)
 
 dict_titles = {k: df_metadata[k]["publication_title"] for k in df_metadata}
@@ -57,20 +69,18 @@
 # ------------------------------------------------------
 
 # Path of the manually annotated data:
-news_path = os.path.join(f"{resources}", "news_datasets")
+news_path = os.path.join(resources_dir, "news_datasets")
 
 # Download the annotated data from the BL repository:
 get_data.download_lwm_data(news_path)
 
 # Training data from the manually annotated data:
 topres_path_train = os.path.join(
-    f"{resources}", "news_datasets", "topRes19th_v2", "train"
+    resources_dir, "news_datasets", "topRes19th_v2", "train"
 )
 
 # Test data from the manually annotated data:
-topres_path_test = os.path.join(
-    f"{resources}", "news_datasets", "topRes19th_v2", "test"
-)
+topres_path_test = os.path.join(resources_dir, "news_datasets", "topRes19th_v2", "test")
 
 # Process data for training a named entity recognition model:
 lwm_df = preprocess_data.process_lwm_for_ner(topres_path_train)
@@ -104,8 +114,12 @@
 )
 
 # Process data for the resolution experiments:
-lwm_train_df = preprocess_data.process_lwm_for_linking(topres_path_train, gazetteer_ids)
-lwm_test_df = preprocess_data.process_lwm_for_linking(topres_path_test, gazetteer_ids)
+lwm_train_df = preprocess_data.process_lwm_for_linking(
+    resources_dir, topres_path_train, gazetteer_ids
+)
+lwm_test_df = preprocess_data.process_lwm_for_linking(
+    resources_dir, topres_path_test, gazetteer_ids
+)
 
 # Split train set into train and dev set, by article:
 lwm_train_df, lwm_dev_df = train_test_split(
diff --git a/experiments/toponym_resolution.py b/experiments/toponym_resolution.py
index 0fb55bd8..dee4af56 100644
--- a/experiments/toponym_resolution.py
+++ b/experiments/toponym_resolution.py
@@ -1,14 +1,28 @@
 import os
 import sqlite3
 import sys
+from argparse import ArgumentParser
 from pathlib import Path
 
+import experiment
 import pandas as pd
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from experiments import experiment
-from geoparser import linking, ranking, recogniser
+from t_res.geoparser import linking, ranking, recogniser
+
+parser = ArgumentParser()
+parser.add_argument(
+    "-p",
+    "--path",
+    dest="path",
+    help="path to resources directory",
+    action="store",
+    type=str,
+)
+
+args = parser.parse_args()
+
+resources_dir = args.path
+current_dir = Path(__file__).parent.resolve()
 
 # Choose test scenario:
 # * "dev" while developing and experimenting,
@@ -52,10 +66,12 @@
     # Instantiate the recogniser:
     myner = recogniser.Recogniser(
         model="blb_lwm-ner-" + granularity,
-        train_dataset="../experiments/outputs/data/lwm/ner_"
+        train_dataset=str(current_dir)
+        + "/outputs/data/lwm/ner_"
         + granularity
         + "_train.json",  # Path to the json file containing the training set (see note above).
-        test_dataset="../experiments/outputs/data/lwm/ner_"
+        test_dataset=str(current_dir)
+        + "/outputs/data/lwm/ner_"
         + granularity
         + "_dev.json",  # Path to the json file containing the test set (see note above).
         pipe=None,  # We'll store the NER pipeline here, leave this empty.
@@ -65,7 +81,9 @@
         # https://huggingface.co/Livingwithmachines/bert_1760_1900). You can
         # chose any other model from the HuggingFace hub, as long as it's
         # trained on the "Fill-Mask" objective (filter by task).
-        model_path="../resources/models/",  # Path where the NER model will be stored.
+        model_path=os.path.join(
+            resources_dir, "models/"
+        ),  # Path where the NER model will be stored.
         training_args={
             "batch_size": 8,
             "num_train_epochs": 10,
@@ -81,7 +99,7 @@
     # Instantiate the ranker:
     myranker = ranking.Ranker(
         method=cand_select_method,
-        resources_path="../resources/wikidata/",
+        resources_path=resources_dir,
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -90,13 +108,13 @@
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("../resources/models/w2v/").resolve()),
+            "w2v_ocr_path": os.path.join(resources_dir, "models/w2v/"),
             "w2v_ocr_model": "w2v_*_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("../resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(resources_dir, "deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -113,15 +131,17 @@
 
     # --------------------------------------
     # Instantiate the linker:
-    with sqlite3.connect("../resources/rel_db/embeddings_database.db") as conn:
+    with sqlite3.connect(
+        os.path.join(resources_dir, "rel_db/embeddings_database.db")
+    ) as conn:
         cursor = conn.cursor()
         mylinker = linking.Linker(
             method=top_res_method,
-            resources_path="../resources/",
+            resources_path=resources_dir,
             linking_resources=dict(),
             rel_params={
-                "model_path": "../resources/models/disambiguation/",
-                "data_path": "../experiments/outputs/data/lwm/",
+                "model_path": os.path.join(resources_dir, "models/disambiguation/"),
+                "data_path": os.path.join(current_dir, "outputs/data/lwm/"),
                 "training_split": "",
                 "db_embeddings": cursor,
                 "with_publication": wpubl,
@@ -137,9 +157,9 @@
     # Instantiate the experiment:
     myexperiment = experiment.Experiment(
         dataset=dataset,
-        data_path="outputs/data/",
+        data_path=os.path.join(current_dir, "outputs/data/"),
         dataset_df=pd.DataFrame(),
-        results_path="outputs/results/",
+        results_path=os.path.join(current_dir, "outputs/results/"),
         myner=myner,
         myranker=myranker,
         mylinker=mylinker,
diff --git a/pyproject.toml b/pyproject.toml
index 20fd53e8..e1dd0e53 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,11 +1,11 @@
 [tool.poetry]
-name = "lwm_t_res"
+name = "t_res"
 version = "0.1.0"
 description = ""
 authors = ["Federico Nanni <nanni.federico@gmail.com>"]
 
 [tool.poetry.dependencies]
-python = "^3.9"
+python = ">=3.9, <4.0"
 tqdm = "^4.62.3"
 bs4 = "^0.0.1"
 pandas = "^1.3.4"
@@ -34,20 +34,24 @@ fastapi = "^0.87.0"
 uvicorn = {extras = ["standard"], version = "^0.20.0"}
 ipykernel = "^6.21.3"
 python-levenshtein = "^0.20.9"
-Sphinx = { version = "4.2.0", optional = true }
-sphinx-rtd-theme = { version = "1.0.0", optional = true }
-sphinxcontrib-napoleon = { version = "0.7", optional = true }
-torch = "1.13.1"
-accelerate = "^0.21.0"
+torch = "^1.13.1"
+accelerate = "^0.27.2"
+scipy = "<=1.11.0"
 
-[tool.poetry.dev-dependencies]
-pytest = "^5.2"
+[tool.poetry.group.dev.dependencies]
+pytest = "^7"
 jupyter = "^1.0.0"
 black = "^22.3.0"
 flake8 = "^6.0.0"
 isort = "^5.12.0"
 pre-commit = "^3.3.1"
 
+[tool.poetry.group.docs.dependencies]
+Sphinx = "^5.0.0"
+sphinx-rtd-theme = "^1.0.0"
+sphinxcontrib-napoleon = "^0.7"
+sphinx-copybutton = "^0.5.2"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
@@ -59,5 +63,7 @@ include = '\.pyi?$'
 [tool.isort]
 profile = "black"
 
-[tool.poetry.extras]
-docs = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-napoleon"]
+[tool.pytest.ini_options]
+markers = [
+    "deezy: tests which need a deezy model",
+]
diff --git a/utils/REL/__init__.py b/t_res/__init__.py
similarity index 100%
rename from utils/REL/__init__.py
rename to t_res/__init__.py
diff --git a/utils/__init__.py b/t_res/geoparser/__init__.py
similarity index 100%
rename from utils/__init__.py
rename to t_res/geoparser/__init__.py
diff --git a/geoparser/linking.py b/t_res/geoparser/linking.py
similarity index 90%
rename from geoparser/linking.py
rename to t_res/geoparser/linking.py
index 6295f5f7..bb21749b 100644
--- a/geoparser/linking.py
+++ b/t_res/geoparser/linking.py
@@ -14,12 +14,9 @@
 RANDOM_SEED = 42
 np.random.seed(RANDOM_SEED)
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-
-from geoparser import ranking
-from utils import rel_utils
-from utils.REL import entity_disambiguation
+from ..utils import rel_utils
+from ..utils.REL import entity_disambiguation
+from . import ranking
 
 
 class Linker:
@@ -31,7 +28,9 @@ class Linker:
     Arguments:
         method (Literal["mostpopular", "reldisamb", "bydistance"]): The
             linking method to use.
-        resources_path (str, optional): The path to the linking resources.
+        resources_path (str): The path to the linking resources.
+        experiments_path (str, optional): The path to the experiments
+            directory. Default is "../experiments/".
         linking_resources (dict, optional): Dictionary containing the
             necessary linking resources. Defaults to ``dict()`` (an empty
             dictionary).
@@ -48,7 +47,8 @@ class Linker:
 
        linker = Linker(
          method="mostpopular",
-         resources_path="/path/to/linking/resources/",
+         resources_path="/path/to/resources/",
+         experiments_path="/path/to/experiments/",
          linking_resources={},
          overwrite_training=True,
          rel_params={"with_publication": True, "do_test": True}
@@ -68,6 +68,7 @@ class Linker:
              mylinker = linking.Linker(
              method="reldisamb",
              resources_path="../resources/",
+             experiments_path="../experiments/",
              linking_resources=dict(),
              rel_params={
                "model_path": "../resources/models/disambiguation/",
@@ -107,28 +108,36 @@ def __init__(
         self,
         method: Literal["mostpopular", "reldisamb", "bydistance"],
         resources_path: str,
+        experiments_path: Optional[str] = "../experiments",
         linking_resources: Optional[dict] = dict(),
         overwrite_training: Optional[bool] = False,
-        rel_params: Optional[dict] = {
-            "model_path": "../resources/models/disambiguation/",
-            "data_path": "../experiments/outputs/data/lwm/",
-            "training_split": "originalsplit",
-            "db_embeddings": None,  # The cursor to the embeddings database.
-            "with_publication": True,
-            "without_microtoponyms": True,
-            "do_test": False,
-            "default_publname": "United Kingdom",
-            "default_publwqid": "Q145",
-        },
+        rel_params: Optional[dict] = None,
+        rel_device: Optional[str] = None, 
     ):
         """
         Initialises a Linker object.
         """
         self.method = method
         self.resources_path = resources_path
+        self.experiments_path = experiments_path
         self.linking_resources = linking_resources
         self.overwrite_training = overwrite_training
+
+        if rel_params is None:
+            rel_params = {
+                "model_path": os.path.join(resources_path, "models/disambiguation/"),
+                "data_path": os.path.join(experiments_path, "outputs/data/lwm/"),
+                "training_split": "originalsplit",
+                "db_embeddings": None,  # The cursor to the embeddings database.
+                "with_publication": True,
+                "without_microtoponyms": True,
+                "do_test": False,
+                "default_publname": "United Kingdom",
+                "default_publwqid": "Q145",
+            }
+
         self.rel_params = rel_params
+        self.rel_device = rel_device
 
     def __str__(self) -> str:
         """
@@ -156,12 +165,14 @@ def load_resources(self) -> dict:
 
         # Load Wikidata mentions-to-QID with absolute counts:
         print("  > Loading mentions to wikidata mapping.")
-        with open(self.resources_path + "wikidata/mentions_to_wikidata.json", "r") as f:
+        with open(
+            os.path.join(self.resources_path, "wikidata/mentions_to_wikidata.json"), "r"
+        ) as f:
             self.linking_resources["mentions_to_wikidata"] = json.load(f)
 
         print("  > Loading gazetteer.")
         gaz = pd.read_csv(
-            f"{self.resources_path}wikidata/wikidata_gazetteer.csv",
+            os.path.join(self.resources_path, "wikidata/wikidata_gazetteer.csv"),
             usecols=["wikidata_id", "latitude", "longitude"],
         )
         gaz["latitude"] = gaz["latitude"].astype(float)
@@ -177,7 +188,9 @@ def load_resources(self) -> dict:
 
         # The entity2class.txt file is created as the last step in
         # wikipedia processing:
-        with open(f"{self.resources_path}wikidata/entity2class.txt", "r") as f:
+        with open(
+            os.path.join(self.resources_path, "wikidata/entity2class.txt"), "r"
+        ) as f:
             self.linking_resources["entity2class"] = json.load(f)
 
         print("*** Linking resources loaded!\n")
@@ -444,6 +457,8 @@ def train_load_model(
                     "mode": "train",
                     "model_path": os.path.join(linker_name, "model"),
                 }
+                if self.rel_device is not None:
+                    config_rel["device"] = self.rel_device
 
                 # Instantiate the entity disambiguation model:
                 model = entity_disambiguation.EntityDisambiguation(
@@ -465,6 +480,8 @@ def train_load_model(
                     "mode": "eval",
                     "model_path": os.path.join(linker_name, "model"),
                 }
+                if self.rel_device is not None:
+                    config_rel["device"] = self.rel_device
 
                 model = entity_disambiguation.EntityDisambiguation(
                     self.rel_params["db_embeddings"],
diff --git a/geoparser/pipeline.py b/t_res/geoparser/pipeline.py
similarity index 96%
rename from geoparser/pipeline.py
rename to t_res/geoparser/pipeline.py
index dc0d095d..2d3230cf 100644
--- a/geoparser/pipeline.py
+++ b/t_res/geoparser/pipeline.py
@@ -5,10 +5,8 @@
 
 from sentence_splitter import split_text_into_sentences
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import linking, ranking, recogniser
-from utils import ner, rel_utils
+from ..utils import ner, rel_utils
+from . import linking, ranking, recogniser
 
 
 class Pipeline:
@@ -28,6 +26,9 @@ class Pipeline:
         mylinker (linking.Linker, optional): The ``Linker`` object to use in
             the pipeline. If None, the default ``Linker`` will be instantiated.
             For the default settings, see Notes below.
+        resources_path (str, optional): The path to your resources directory.
+        experiments_path (str, optional): The path to the experiments directory.
+            Default is "../experiments".
 
     Example:
         >>> # Instantiate the Pipeline object with a default setup
@@ -57,7 +58,7 @@ class Pipeline:
 
             ranking.Ranker(
                 method="perfectmatch",
-                resources_path="../resources/wikidata/",
+                resources_path=resources_path,
             )
 
         * The default settings for the ``Linker``:
@@ -66,7 +67,7 @@ class Pipeline:
 
             linking.Linker(
                 method="mostpopular",
-                resources_path="../resources/",
+                resources_path=resources_path,
             )
     """
 
@@ -75,6 +76,9 @@ def __init__(
         myner: Optional[recogniser.Recogniser] = None,
         myranker: Optional[ranking.Ranker] = None,
         mylinker: Optional[linking.Linker] = None,
+        resources_path: Optional[str] = None,
+        experiments_path: Optional[str] = None,
+        ner_device: Optional[str] = None,
     ):
         """
         Instantiates a Pipeline object.
@@ -89,21 +93,34 @@ def __init__(
             self.myner = recogniser.Recogniser(
                 model="Livingwithmachines/toponym-19thC-en",
                 load_from_hub=True,
+                device=ner_device,
             )
 
         # If myranker is None, instantiate the default Ranker.
         if not self.myranker:
+            if not resources_path:
+                raise ValueError("[ERROR] Please specify path to resources directory.")
             self.myranker = ranking.Ranker(
                 method="perfectmatch",
-                resources_path="../resources/wikidata/",
+                resources_path=resources_path,
             )
 
         # If mylinker is None, instantiate the default Linker.
         if not self.mylinker:
-            self.mylinker = linking.Linker(
-                method="mostpopular",
-                resources_path="../resources/",
-            )
+            if not resources_path:
+                raise ValueError("[ERROR] Please specify path to resources directory.")
+
+            if experiments_path:
+                self.mylinker = linking.Linker(
+                    method="mostpopular",
+                    resources_path=resources_path,
+                    experiments_path=experiments_path,
+                )
+            else:
+                self.mylinker = linking.Linker(
+                    method="mostpopular",
+                    resources_path=resources_path,
+                )
 
         # -----------------------------------------
         # NER training and creating pipeline:
@@ -135,9 +152,6 @@ def __init__(
             self.myranker
         )
 
-        # Check we've actually loaded the mentions2wikidata dictionary:
-        assert self.myranker.mentions_to_wikidata["London"] is not None
-
     def run_sentence(
         self,
         sentence: str,
diff --git a/geoparser/ranking.py b/t_res/geoparser/ranking.py
similarity index 93%
rename from geoparser/ranking.py
rename to t_res/geoparser/ranking.py
index 63bdd3b6..a314003c 100644
--- a/geoparser/ranking.py
+++ b/t_res/geoparser/ranking.py
@@ -9,9 +9,7 @@
 from pandarallel import pandarallel
 from pyxdameraulevenshtein import normalized_damerau_levenshtein_distance
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from utils import deezy_processing
+from ..utils import deezy_processing
 
 
 class Ranker:
@@ -111,31 +109,8 @@ def __init__(
         resources_path: str,
         mentions_to_wikidata: Optional[dict] = dict(),
         wikidata_to_mentions: Optional[dict] = dict(),
-        strvar_parameters: Optional[dict] = {
-            # Parameters to create the string pair dataset:
-            "ocr_threshold": 60,
-            "top_threshold": 85,
-            "min_len": 5,
-            "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/w2v/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
-            "overwrite_dataset": False,
-        },
-        deezy_parameters: Optional[dict] = {
-            # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
-            "dm_cands": "wkdtalts",
-            "dm_model": "w2v_ocr",
-            "dm_output": "deezymatch_on_the_fly",
-            # Ranking measures:
-            "ranking_metric": "faiss",
-            "selection_threshold": 50,
-            "num_candidates": 1,
-            "verbose": False,
-            # DeezyMatch training:
-            "overwrite_training": False,
-            "do_test": False,
-        },
+        strvar_parameters: Optional[dict] = None,
+        deezy_parameters: Optional[dict] = None,
         already_collected_cands: Optional[dict] = dict(),
     ):
         """
@@ -145,6 +120,37 @@ def __init__(
         self.resources_path = resources_path
         self.mentions_to_wikidata = mentions_to_wikidata
         self.wikidata_to_mentions = wikidata_to_mentions
+
+        # set paths based on resources path
+        if strvar_parameters is None:
+            strvar_parameters = {
+                # Parameters to create the string pair dataset:
+                "ocr_threshold": 60,
+                "top_threshold": 85,
+                "min_len": 5,
+                "max_len": 15,
+                "w2v_ocr_path": os.path.join(resources_path, "models/w2v/"),
+                "w2v_ocr_model": "w2v_*_news",
+                "overwrite_dataset": False,
+            }
+
+        if deezy_parameters is None:
+            deezy_parameters = {
+                # Paths and filenames of DeezyMatch models and data:
+                "dm_path": os.path.join(resources_path, "deezymatch/"),
+                "dm_cands": "wkdtalts",
+                "dm_model": "w2v_ocr",
+                "dm_output": "deezymatch_on_the_fly",
+                # Ranking measures:
+                "ranking_metric": "faiss",
+                "selection_threshold": 50,
+                "num_candidates": 1,
+                "verbose": False,
+                # DeezyMatch training:
+                "overwrite_training": False,
+                "do_test": False,
+            }
+
         self.strvar_parameters = strvar_parameters
         self.deezy_parameters = deezy_parameters
         self.already_collected_cands = already_collected_cands
@@ -203,8 +209,12 @@ def load_resources(self) -> dict:
 
         # Load files
         files = {
-            "mentions_to_wikidata": f"{self.resources_path}mentions_to_wikidata_normalized.json",
-            "wikidata_to_mentions": f"{self.resources_path}wikidata_to_mentions_normalized.json",
+            "mentions_to_wikidata": os.path.join(
+                self.resources_path, "wikidata/mentions_to_wikidata_normalized.json"
+            ),
+            "wikidata_to_mentions": os.path.join(
+                self.resources_path, "wikidata/wikidata_to_mentions_normalized.json"
+            ),
         }
 
         with open(files["mentions_to_wikidata"], "r") as f:
@@ -275,7 +285,9 @@ def train(self) -> None:
             if self.deezy_parameters["do_test"] == True:
                 self.deezy_parameters["dm_model"] += "_test"
                 self.deezy_parameters["dm_cands"] += "_test"
-            deezy_processing.train_deezy_model(self.deezy_parameters, self.strvar_parameters, self.wikidata_to_mentions)
+            deezy_processing.train_deezy_model(
+                self.deezy_parameters, self.strvar_parameters, self.wikidata_to_mentions
+            )
             deezy_processing.generate_candidates(
                 self.deezy_parameters, self.mentions_to_wikidata
             )
@@ -490,7 +502,7 @@ def deezy_on_the_fly(self, queries: List[str]) -> Tuple[dict, dict]:
 
         Example:
             >>> ranker = Ranker(...)
-            >>> ranker.mentions_to_wikidata = ranker.load_resources()
+            >>> ranker.load_resources()
             >>> queries = ['London', 'Shefrield']
             >>> candidates, already_collected = ranker.deezy_on_the_fly(queries)
             >>> print(candidates)
@@ -583,7 +595,7 @@ def run(self, queries: List[str]) -> Tuple[dict, dict]:
 
         Example:
             >>> myranker = Ranker(method="perfectmatch", ...)
-            >>> myranker.mentions_to_wikidata = myranker.load_resources()
+            >>> ranker.mentions_to_wikidata = myranker.load_resources()
             >>> queries = ['London', 'Barcelona', 'Bologna']
             >>> candidates, already_collected = myranker.run(queries)
             >>> print(candidates)
diff --git a/geoparser/recogniser.py b/t_res/geoparser/recogniser.py
similarity index 96%
rename from geoparser/recogniser.py
rename to t_res/geoparser/recogniser.py
index 54d24b2a..214a4f0a 100644
--- a/geoparser/recogniser.py
+++ b/t_res/geoparser/recogniser.py
@@ -16,9 +16,7 @@
     pipeline,
 )
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from utils import ner
+from ..utils import ner
 
 
 class Recogniser:
@@ -96,6 +94,7 @@ def __init__(
         overwrite_training: Optional[bool] = False,
         do_test: Optional[bool] = False,
         load_from_hub: Optional[bool] = False,
+        device: Optional[str] = None,
     ):
         """
         Initialises a Recogniser object.
@@ -110,6 +109,7 @@ def __init__(
         self.overwrite_training = overwrite_training
         self.do_test = do_test
         self.load_from_hub = load_from_hub
+        self.device = device
 
         # Add "_test" to the model name if do_test is True, unless
         # the model is downloaded from Huggingface, in which case
@@ -167,7 +167,7 @@ def train(self) -> None:
             return None
 
         # If model exists and overwrite is set to False, skip training:
-        model_path = f"{self.model_path}{self.model}.model"
+        model_path = os.path.join(self.model_path,f"{self.model}.model")
         if Path(model_path).exists() and self.overwrite_training == False:
             s = "\n** Note: Model "
             s += f"{model_path} is already trained.\n"
@@ -272,7 +272,7 @@ def compute_metrics(p: Tuple[list, list]) -> dict:
         training_args = TrainingArguments(
             output_dir=self.model_path,
             evaluation_strategy="epoch",
-            logging_dir=self.model_path + "runs/" + self.model,
+            logging_dir=os.path.join(self.model_path,"runs/",self.model),
             learning_rate=self.training_args["learning_rate"],
             per_device_train_batch_size=self.training_args["batch_size"],
             per_device_eval_batch_size=self.training_args["batch_size"],
@@ -297,7 +297,7 @@ def compute_metrics(p: Tuple[list, list]) -> dict:
         trainer.evaluate()
 
         # Save the model:
-        trainer.save_model(self.model_path + self.model + ".model")
+        trainer.save_model(os.path.join(self.model_path,f"{self.model}.model"))
 
     # -------------------------------------------------------------
     def create_pipeline(self) -> Pipeline:
@@ -324,11 +324,11 @@ def create_pipeline(self) -> Pipeline:
         # If the model is local (has not been obtained from the hub),
         # pre-append the model path and the extension of the model
         # to obtain the model name.
-        if self.load_from_hub == False:
-            model_name = self.model_path + self.model + ".model"
+        if self.load_from_hub is False:
+            model_name = os.path.join(self.model_path, f"{self.model}.model")
 
         # Load a NER pipeline:
-        self.pipe = pipeline("ner", model=model_name, ignore_labels=[])
+        self.pipe = pipeline("ner", model=model_name, ignore_labels=[], device=self.device)
         return self.pipe
 
     # -------------------------------------------------------------
diff --git a/t_res/utils/REL/__init__.py b/t_res/utils/REL/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utils/REL/entity_disambiguation.py b/t_res/utils/REL/entity_disambiguation.py
similarity index 99%
rename from utils/REL/entity_disambiguation.py
rename to t_res/utils/REL/entity_disambiguation.py
index 26a147de..68a1c2cf 100644
--- a/utils/REL/entity_disambiguation.py
+++ b/t_res/utils/REL/entity_disambiguation.py
@@ -14,11 +14,10 @@
 from sklearn.linear_model import LogisticRegression
 from torch.autograd import Variable
 
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-import utils.REL.utils as utils
-from utils import rel_utils
-from utils.REL.mulrel_ranker import MulRelRanker, PreRank
-from utils.REL.vocabulary import Vocabulary
+from . import utils
+from .. import rel_utils
+from .mulrel_ranker import MulRelRanker, PreRank
+from .vocabulary import Vocabulary
 
 RANDOM_SEED = 42
 random.seed(RANDOM_SEED)
@@ -68,7 +67,7 @@ def __init__(self, db_embs, user_config, reset_embeddings=False):
         self.config = self.__get_config(user_config)
 
         # Use CPU if cuda is not available:
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.device = self.config.get("device", "cuda" if torch.cuda.is_available() else "cpu")
         self.prerank_model = None
         self.model = None
         self.reset_embeddings = reset_embeddings
@@ -162,7 +161,7 @@ def __get_config(self, user_config):
         }
 
         config = default_config
-        print("Model path:", config["model_path"], config["mode"])
+        print("Model path:", os.path.abspath(config["model_path"]), config["mode"])
 
         return config
 
diff --git a/utils/REL/mulrel_ranker.py b/t_res/utils/REL/mulrel_ranker.py
similarity index 100%
rename from utils/REL/mulrel_ranker.py
rename to t_res/utils/REL/mulrel_ranker.py
diff --git a/utils/REL/utils.py b/t_res/utils/REL/utils.py
similarity index 100%
rename from utils/REL/utils.py
rename to t_res/utils/REL/utils.py
diff --git a/utils/REL/vocabulary.py b/t_res/utils/REL/vocabulary.py
similarity index 100%
rename from utils/REL/vocabulary.py
rename to t_res/utils/REL/vocabulary.py
diff --git a/t_res/utils/__init__.py b/t_res/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utils/deezy_processing.py b/t_res/utils/deezy_processing.py
similarity index 100%
rename from utils/deezy_processing.py
rename to t_res/utils/deezy_processing.py
diff --git a/utils/get_data.py b/t_res/utils/get_data.py
similarity index 100%
rename from utils/get_data.py
rename to t_res/utils/get_data.py
diff --git a/utils/ner.py b/t_res/utils/ner.py
similarity index 100%
rename from utils/ner.py
rename to t_res/utils/ner.py
diff --git a/utils/preprocess_data.py b/t_res/utils/preprocess_data.py
similarity index 98%
rename from utils/preprocess_data.py
rename to t_res/utils/preprocess_data.py
index 975df05c..d8f5c785 100644
--- a/utils/preprocess_data.py
+++ b/t_res/utils/preprocess_data.py
@@ -11,31 +11,30 @@
 
 import pandas as pd
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from utils import process_wikipedia
+from . import process_wikipedia
 
-# Path to Wikipedia resources (where the wiki2wiki mapper is located):
-path_to_wikipedia = "../resources/wikipedia/"
 
-
-def turn_wikipedia2wikidata(wikipedia_title: str) -> Optional[str]:
+def turn_wikipedia2wikidata(
+    wikipedia_title: str,
+    wikipedia_path: str,
+) -> Optional[str]:
     """
     Convert a Wikipedia title to its corresponding Wikidata ID.
 
     Arguments:
         wikipedia_title (str): The title of the Wikipedia page.
+        wikipedia_path (str): The path to your wikipedia directory.
 
     Returns:
         Optional[str]:
             The corresponding Wikidata ID if available, or None if not.
 
     Example:
-        >>> turn_wikipedia2wikidata("https://en.wikipedia.org/wiki/Colosseum")
+        >>> turn_wikipedia2wikidata("https://en.wikipedia.org/wiki/Colosseum", "../resources")
         'Q10285'
-        >>> turn_wikipedia2wikidata("https://en.wikipedia.org/wiki/Ancient_Egypt")
+        >>> turn_wikipedia2wikidata("https://en.wikipedia.org/wiki/Ancient_Egypt", "../resources")
         'Q11768'
-        >>> turn_wikipedia2wikidata("https://en.wikipedia.org/wiki/Invalid_Location")
+        >>> turn_wikipedia2wikidata("https://en.wikipedia.org/wiki/Invalid_Location", "../resources")
         Warning: invalid_location is not in wikipedia2wikidata, the wkdt_qid will be None.
     """
     if not wikipedia_title == "NIL" and not wikipedia_title == "*":
@@ -46,7 +45,7 @@ def turn_wikipedia2wikidata(wikipedia_title: str) -> Optional[str]:
         )
         linked_wqid = process_wikipedia.title_to_id(
             processed_wikipedia_title,
-            path_to_db=os.path.join(path_to_wikipedia, "index_enwiki-latest.db"),
+            path_to_db=os.path.join(wikipedia_path, "index_enwiki-latest.db"),
             lower=True,
         )
         if not linked_wqid:
@@ -234,7 +233,7 @@ def process_lwm_for_ner(tsv_topres_path: str):
 
 
 def process_lwm_for_linking(
-    tsv_topres_path: str, gazetteer_ids: List[str]
+    resources_dir: str, tsv_topres_path: str, gazetteer_ids: List[str]
 ) -> pd.DataFrame:
     """
     Process LwM data for performing entity linking.
@@ -243,6 +242,7 @@ def process_lwm_for_linking(
     Each row includes the annotation and resolution information of the toponym.
 
     Arguments:
+        resources_dir (str): The path to the resources directory
         tsv_topres_path (str): The path to the top-level directory containing the annotated TSV files.
         gazetteer_ids (list): The set of Wikidata IDs in the gazetteer.
 
@@ -327,8 +327,9 @@ def process_lwm_for_linking(
                 # Clean Wikidata URL:
                 wkpd = wkpd.replace("\\", "")
 
+                wikipedia_path = os.path.join(resources_dir, "wikipedia/")
                 # Get Wikidata ID:
-                wkdt = turn_wikipedia2wikidata(wkpd)
+                wkdt = turn_wikipedia2wikidata(wkpd, wikipedia_path)
 
                 # In mentions attached to next token through a dash,
                 # keep only the true mention (this has to do with
diff --git a/utils/process_data.py b/t_res/utils/process_data.py
similarity index 99%
rename from utils/process_data.py
rename to t_res/utils/process_data.py
index a7b80893..c8da8cd5 100644
--- a/utils/process_data.py
+++ b/t_res/utils/process_data.py
@@ -8,11 +8,10 @@
 import pandas as pd
 from tqdm import tqdm
 
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from utils import ner
+from . import ner
 
 if TYPE_CHECKING:
-    from geoparser import recogniser
+    from ..geoparser import recogniser
 
 
 def eval_with_exception(str2parse: str, in_case: Optional[Any] = "") -> Any:
diff --git a/utils/process_wikipedia.py b/t_res/utils/process_wikipedia.py
similarity index 100%
rename from utils/process_wikipedia.py
rename to t_res/utils/process_wikipedia.py
diff --git a/utils/rel_e2e.py b/t_res/utils/rel_e2e.py
similarity index 95%
rename from utils/rel_e2e.py
rename to t_res/utils/rel_e2e.py
index 33dcb190..1d0beca4 100644
--- a/utils/rel_e2e.py
+++ b/t_res/utils/rel_e2e.py
@@ -6,11 +6,11 @@
 import requests
 from tqdm import tqdm
 
-# Import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from utils import process_data, process_wikipedia
-from experiments import experiment
+from . import process_data, process_wikipedia
 
+# Add "../../experiments/" to path to import experiments
+sys.path.insert(0, os.path.abspath("../../experiments/"))
+from experiments import experiment
 
 def rel_end_to_end(sent: str) -> dict:
     """
@@ -57,12 +57,16 @@ def get_rel_from_api(dSentences: dict, rel_end2end_path: str) -> None:
                 rel_preds = json.load(f)
 
 
-def match_wikipedia_to_wikidata(wiki_title: str) -> str:
+def match_wikipedia_to_wikidata(
+    wiki_title: str,
+    path_to_db: str,
+    ) -> str:
     """
     Retrieve the Wikidata ID corresponding to a Wikipedia title.
 
     Arguments:
         wiki_title (str): A Wikipedia title in underscore-separated format.
+        path_to_db (str): The path to your wikipedia database (e.g. "../resources/wikipedia/index_enwiki-latest.db").
 
     Returns:
         str:
@@ -72,7 +76,7 @@ def match_wikipedia_to_wikidata(wiki_title: str) -> str:
     wqid = process_wikipedia.title_to_id(
         wiki_title,
         lower=False,
-        path_to_db="../resources/wikipedia/index_enwiki-latest.db",
+        path_to_db=path_to_db,
     )
     if not wqid:
         wqid = "NIL"
diff --git a/utils/rel_utils.py b/t_res/utils/rel_utils.py
similarity index 98%
rename from utils/rel_utils.py
rename to t_res/utils/rel_utils.py
index 7589924f..6cc38bee 100644
--- a/utils/rel_utils.py
+++ b/t_res/utils/rel_utils.py
@@ -9,8 +9,7 @@
 import numpy as np
 import pandas as pd
 
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import ranking
+from ..geoparser import ranking
 
 RANDOM_SEED = 42
 """Constant representing the random seed used for generating pseudo-random
@@ -323,7 +322,7 @@ def prepare_rel_trainset(
     # Format the mentions are required by the ranker:
     all_mentions = [{"mention": mention} for mention in all_mentions]
     # Use the ranker to find candidates:
-    wk_cands, myranker.already_collected_cands = myranker.find_candidates(all_mentions)
+    wk_cands = myranker.find_candidates(all_mentions)
     # Rank the candidates:
     rel_json = rank_candidates(
         rel_json,
diff --git a/tests/sample_files/experiments/outputs/data/lwm/linking_df_split.tsv b/tests/sample_files/experiments/outputs/data/lwm/linking_df_split.tsv
new file mode 100644
index 00000000..f066ee1d
--- /dev/null
+++ b/tests/sample_files/experiments/outputs/data/lwm/linking_df_split.tsv
@@ -0,0 +1,15 @@
+article_id	sentences	annotations	place	decade	year	ocr_quality_mean	ocr_quality_sd	publication_title	publication_code	place_wqid	originalsplit	apply	withouttest	Ashton1860	Dorchester1820	Dorchester1830	Dorchester1860	Manchester1780	Manchester1800	Manchester1820	Manchester1830	Manchester1860	Poole1860
+12670	[{'sentence_pos': 1, 'sentence_text': 'NOTICE.  '}, {'sentence_pos': 2, 'sentence_text': 'THE STAR, Political, Naval, Military, I Literary, and Commercial Intelligencer, and General Advertiser. '}, {'sentence_pos': 3, 'sentence_text': 'Established in 1813.'}, {'sentence_pos': 4, 'sentence_text': '—This paper which is published every Tuesday, Thursday, and Saturday evening, contains, in addition to ectracts from the British and Foreign newspapers of articles of political and general intelligence, copious and carefully-selected information as to all naval, military, and commercial affairs, and especially that which relates to the trade and commerce of the Channel Islands. '}, {'sentence_pos': 5, 'sentence_text': 'The following are the subscription prices to The Star, payable quarterly :-For three numbers per week For two do. do. '}, {'sentence_pos': 6, 'sentence_text': 'For one do. do. 2s. do. '}, {'sentence_pos': 7, 'sentence_text': 'Single numbers, 2d. each. '}, {'sentence_pos': 8, 'sentence_text': 'Subscribers in the United Kingdom will be charged on the following scale, in British money, including pre-payment and posting, payable in advance : For three numbers per week....9s. 9d. per quarter For two do. do. ....Bs. '}, {'sentence_pos': 9, 'sentence_text': 'Bd. do. '}, {'sentence_pos': 10, 'sentence_text': 'For one do. do. ....3s. 4d. do. '}, {'sentence_pos': 11, 'sentence_text': 'The Star (or three numbers folded together) can be forwarded by post, to any part of the United Kingdom or France, on allicing a postage stamp of one PENNI: sir Address—No. 10, Bordage-Street, Guernsey.'}]	[{'mention_pos': 0, 'mention': 'Channel Islands', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Channel_Islands', 'wkdt_qid': 'Q42314', 'mention_start': 364, 'mention_end': 379, 'sent_pos': 4}, {'mention_pos': 1, 'mention': 'United Kingdom', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/United_Kingdom', 'wkdt_qid': 'Q145', 'mention_start': 19, 'mention_end': 33, 'sent_pos': 8}, {'mention_pos': 2, 'mention': 'Bordage-Street', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 177, 'mention_end': 191, 'sent_pos': 11}, {'mention_pos': 3, 'mention': 'Guernsey', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Guernsey', 'wkdt_qid': 'Q3311985', 'mention_start': 193, 'mention_end': 201, 'sent_pos': 11}]	Poole	1860	1860	0.8953	0.1361	The Poole and South-Western Herald, etc.	2325	Q203349	dev	train	test	train	train	train	train	train	train	train	dev	train	test
+8189322	[{'sentence_pos': 1, 'sentence_text': 'The earl of Stamford and WARRINGTON’S RENTS. '}, {'sentence_pos': 2, 'sentence_text': 'NOTICE IS HEREBY GIVEN, that an AUDIT for the Receipt of the RESERVED and CHIEF RENTS due to the Earl of Stamford and Warrington at Lady Day last for the Manor and Parish of Ashtonnnder-Lyne, in the county of Lancaster, will be holden at the Pitt and Nelson Inn, in Ashton-under-Lyne, on Monday, Tuesday, and Wednesday, the 4th, sth, and 6th days of May next, between the hours of Nine and Pour o’clock each day. '}, {'sentence_pos': 3, 'sentence_text': 'And that an AUDIT for the RESERVED and CHIEF RENTS for the Manor of Stayley, in the county of Chester, will be holden at the Eagle Inn, in Stalybridge, on Thursday, the 7th day of May next, between the hours of Eleven and Two o clock, on which days the tenants are requested to pay their rents. '}, {'sentence_pos': 4, 'sentence_text': 'The Court Leet and View of Frank Pledge of the said Earl, for the Manor of Ashton-under-Lyne, will be held at the Old Court House there, on Wednesday, the 6th day of May next, at Ten o’clock in the forenoon; and the Court Baron for the Manor of Stayley, on Thursday, the 7th day of May next, at One o’clock in the afternoon. '}, {'sentence_pos': 5, 'sentence_text': 'ARTHUR FREDERICK PAYNE, Agent to the said Earl. '}, {'sentence_pos': 6, 'sentence_text': 'Ashton-under-Lyne, April 23rd, 1863.'}]	[{'mention_pos': 0, 'mention': 'Ashtonnnder-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 174, 'mention_end': 190, 'sent_pos': 2}, {'mention_pos': 1, 'mention': 'county of Lancaster', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Lancashire', 'wkdt_qid': 'Q23077', 'mention_start': 199, 'mention_end': 218, 'sent_pos': 2}, {'mention_pos': 2, 'mention': 'Pitt and Nelson Inn', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 242, 'mention_end': 261, 'sent_pos': 2}, {'mention_pos': 3, 'mention': 'Ashton-under-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 266, 'mention_end': 283, 'sent_pos': 2}, {'mention_pos': 4, 'mention': 'Manor of Stayley', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 59, 'mention_end': 75, 'sent_pos': 3}, {'mention_pos': 5, 'mention': 'county of Chester', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Cheshire', 'wkdt_qid': 'Q23064', 'mention_start': 84, 'mention_end': 101, 'sent_pos': 3}, {'mention_pos': 6, 'mention': 'Eagle Inn', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 125, 'mention_end': 134, 'sent_pos': 3}, {'mention_pos': 7, 'mention': 'Stalybridge', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 139, 'mention_end': 150, 'sent_pos': 3}, {'mention_pos': 8, 'mention': 'Ashton-under-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 75, 'mention_end': 92, 'sent_pos': 4}, {'mention_pos': 9, 'mention': 'Old Court House', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 114, 'mention_end': 129, 'sent_pos': 4}, {'mention_pos': 10, 'mention': 'Manor of Stayley', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 236, 'mention_end': 252, 'sent_pos': 4}, {'mention_pos': 11, 'mention': 'Ashton-under-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 0, 'mention_end': 17, 'sent_pos': 6}]	Ashton-under-Lyne	1860	1863	0.8837	0.1619	Ashton and Stalybridge Reporter, etc.	967	Q659803	test	dev	left_out	test	dev	train	train	train	train	train	train	train	train
+3938653	[{'sentence_pos': 1, 'sentence_text': 'THE COURT AND GOVERNMENT.  '}, {'sentence_pos': 2, 'sentence_text': 'OSEORNE, THURSDA . '}, {'sentence_pos': 3, 'sentence_text': 'The Qaeen, accompanied by Princess Helena and Prince Albert Victor, drove oat in the grounds yesterday morning Princess Helena rode on horseback in the afternoon, attended by the Hon. '}, {'sentence_pos': 4, 'sentence_text': 'Emma. '}, {'sentence_pos': 5, 'sentence_text': 'Lascelles. '}, {'sentence_pos': 6, 'sentence_text': 'THE PRINCE AND PRINCESS OF WALES. '}, {'sentence_pos': 7, 'sentence_text': 'Plymouth, Thursday Evenhji;. '}, {'sentence_pos': 8, 'sentence_text': 'A dejeuner was given to-day at Mount Edgecumbe to about 100 of the elite of the neighbourhood, invited to meet the Prince and Princess of Wales. '}, {'sentence_pos': 9, 'sentence_text': 'In the afternoon their Royal Highnesses went a short yachting excursion in the Earl of Edgecunibes yacht; and in the evening the Prince dined with Viscount Templetown, Commander-inChief of the Western District, at the Government House, Mo ant Wise. '}, {'sentence_pos': 10, 'sentence_text': 'The party consisted principally of the army and navy officers in commission at the port, andthe officers •f the French squadron lying in the Sound. '}, {'sentence_pos': 11, 'sentence_text': 'After dinner the Prince adjourned with the company to a grand ball given by the united services at the Royal William Yard, Stonehouse. '}, {'sentence_pos': 12, 'sentence_text': 'Mr. '}, {'sentence_pos': 13, 'sentence_text': 'Frederick Peel ia in improved health, and has been gradually getting better since Tuesday. '}, {'sentence_pos': 14, 'sentence_text': 'The right hon. gentleman is still confined to his chamber.'}]	[{'mention_pos': 0, 'mention': 'OSEORNE', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 0, 'mention_end': 7, 'sent_pos': 2}, {'mention_pos': 1, 'mention': 'Plymouth', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Plymouth', 'wkdt_qid': 'Q43382', 'mention_start': 0, 'mention_end': 8, 'sent_pos': 7}, {'mention_pos': 2, 'mention': 'Mount Edgecumbe', 'entity_type': 'BUILDING', 'wkpd_url': 'https://en.wikipedia.org/wiki/Mount_Edgcumbe_House', 'wkdt_qid': 'Q6920546', 'mention_start': 31, 'mention_end': 46, 'sent_pos': 8}, {'mention_pos': 3, 'mention': 'Government House', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 218, 'mention_end': 234, 'sent_pos': 9}, {'mention_pos': 4, 'mention': 'Mo ant Wise', 'entity_type': 'BUILDING', 'wkpd_url': 'https://en.wikipedia.org/wiki/Mount_Wise,_Plymouth', 'wkdt_qid': 'Q14912878', 'mention_start': 236, 'mention_end': 247, 'sent_pos': 9}, {'mention_pos': 5, 'mention': 'Sound', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Plymouth_Sound', 'wkdt_qid': 'Q2333061', 'mention_start': 141, 'mention_end': 146, 'sent_pos': 10}, {'mention_pos': 6, 'mention': 'Royal William Yard', 'entity_type': 'BUILDING', 'wkpd_url': 'https://en.wikipedia.org/wiki/Royal_William_Victualling_Yard', 'wkdt_qid': 'Q7375014', 'mention_start': 103, 'mention_end': 121, 'sent_pos': 11}, {'mention_pos': 7, 'mention': 'Stonehouse', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stonehouse,_Plymouth', 'wkdt_qid': 'Q7619235', 'mention_start': 123, 'mention_end': 133, 'sent_pos': 11}]	Manchester	1860	1865	0.821	0.2563	Manchester Courier and Lancashire General Advertiser.	206	Q18125	train	train	dev	train	train	train	train	train	train	train	train	test	train
+4938614	[{'sentence_pos': 1, 'sentence_text': 'DUKINFIELD.  '}, {'sentence_pos': 2, 'sentence_text': 'Knutsford Sessions.'}, {'sentence_pos': 3, 'sentence_text': '—The servant girl, Eliza Ann Byrom, who stole a quantity of clothes from the house where she lodged, in Dukiafield, was sentenced to two months’ imprisonment. '}, {'sentence_pos': 4, 'sentence_text': 'Martha Wilde, who was sent from the Dukinfield court for obtaining money under false pretences by representing at two pawnshops in Dukinfield that a spurious composition called coraline beads were real coral was discharged. '}, {'sentence_pos': 5, 'sentence_text': 'Mr. '}, {'sentence_pos': 6, 'sentence_text': 'Brandt appeared for the prisoner. '}, {'sentence_pos': 7, 'sentence_text': 'Accident in a Factory.'}, {'sentence_pos': 8, 'sentence_text': '—Whilst a boy named Edwin _ Diggle, 14 years of age, a pieoer at Mr. Chadwick’s factory'}, {'sentence_pos': 9, 'sentence_text': '. '}, {'sentence_pos': 10, 'sentence_text': 'Tame Valley, was engaged, on Wednesday, in cleaning some jennies in one of the rooms he met with a severe accident to his head._ The spinner, who is rather deaf, not knowing that the lad was amongst the machinery, set the jennies going, and being unable to hear the shouts of the lad on account of the defect in his hearing, did not stop them until another man made signs to him to stop, which he did instantly. '}, {'sentence_pos': 11, 'sentence_text': 'The poor lad had, however, been severely hurt on the head, although no limbs were injured. '}, {'sentence_pos': 12, 'sentence_text': 'He was immediately conveyed in a cab to the Infirmary, where we understand he is progressing favourably.'}]	[{'mention_pos': 0, 'mention': 'DUKINFIELD', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Dukinfield', 'wkdt_qid': 'Q1976179', 'mention_start': 0, 'mention_end': 10, 'sent_pos': 1}, {'mention_pos': 1, 'mention': 'Knutsford', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Knutsford', 'wkdt_qid': 'Q1470791', 'mention_start': 0, 'mention_end': 9, 'sent_pos': 2}, {'mention_pos': 2, 'mention': 'Dukiafield', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Dukinfield', 'wkdt_qid': 'Q1976179', 'mention_start': 104, 'mention_end': 114, 'sent_pos': 3}, {'mention_pos': 3, 'mention': 'Dukinfield', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Dukinfield', 'wkdt_qid': 'Q1976179', 'mention_start': 36, 'mention_end': 46, 'sent_pos': 4}, {'mention_pos': 4, 'mention': 'Dukinfield', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Dukinfield', 'wkdt_qid': 'Q1976179', 'mention_start': 131, 'mention_end': 141, 'sent_pos': 4}, {'mention_pos': 5, 'mention': 'Mr. Chadwick’s factory', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 65, 'mention_end': 87, 'sent_pos': 8}]	Ashton-under-Lyne	1860	1869	0.9047	0.1623	The Ashton Reporter.	968	Q659803	train	train	dev	test	dev	train	train	train	train	train	train	train	train
+8189322	[{'sentence_pos': 1, 'sentence_text': 'The earl of Stamford and WARRINGTON’S RENTS. '}, {'sentence_pos': 2, 'sentence_text': 'NOTICE IS HEREBY GIVEN, that an AUDIT for the Receipt of the RESERVED and CHIEF RENTS due to the Earl of Stamford and Warrington at Lady Day last for the Manor and Parish of Ashtonnnder-Lyne, in the county of Lancaster, will be holden at the Pitt and Nelson Inn, in Ashton-under-Lyne, on Monday, Tuesday, and Wednesday, the 4th, sth, and 6th days of May next, between the hours of Nine and Pour o’clock each day. '}, {'sentence_pos': 3, 'sentence_text': 'And that an AUDIT for the RESERVED and CHIEF RENTS for the Manor of Stayley, in the county of Chester, will be holden at the Eagle Inn, in Stalybridge, on Thursday, the 7th day of May next, between the hours of Eleven and Two o clock, on which days the tenants are requested to pay their rents. '}, {'sentence_pos': 4, 'sentence_text': 'The Court Leet and View of Frank Pledge of the said Earl, for the Manor of Ashton-under-Lyne, will be held at the Old Court House there, on Wednesday, the 6th day of May next, at Ten o’clock in the forenoon; and the Court Baron for the Manor of Stayley, on Thursday, the 7th day of May next, at One o’clock in the afternoon. '}, {'sentence_pos': 5, 'sentence_text': 'ARTHUR FREDERICK PAYNE, Agent to the said Earl. '}, {'sentence_pos': 6, 'sentence_text': 'Ashton-under-Lyne, April 23rd, 1863.'}]	[{'mention_pos': 0, 'mention': 'Ashtonnnder-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 174, 'mention_end': 190, 'sent_pos': 2}, {'mention_pos': 1, 'mention': 'county of Lancaster', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Lancashire', 'wkdt_qid': 'Q23077', 'mention_start': 199, 'mention_end': 218, 'sent_pos': 2}, {'mention_pos': 2, 'mention': 'Pitt and Nelson Inn', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 242, 'mention_end': 261, 'sent_pos': 2}, {'mention_pos': 3, 'mention': 'Ashton-under-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 266, 'mention_end': 283, 'sent_pos': 2}, {'mention_pos': 4, 'mention': 'Manor of Stayley', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 59, 'mention_end': 75, 'sent_pos': 3}, {'mention_pos': 5, 'mention': 'county of Chester', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Cheshire', 'wkdt_qid': 'Q23064', 'mention_start': 84, 'mention_end': 101, 'sent_pos': 3}, {'mention_pos': 6, 'mention': 'Eagle Inn', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 125, 'mention_end': 134, 'sent_pos': 3}, {'mention_pos': 7, 'mention': 'Stalybridge', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 139, 'mention_end': 150, 'sent_pos': 3}, {'mention_pos': 8, 'mention': 'Ashton-under-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 75, 'mention_end': 92, 'sent_pos': 4}, {'mention_pos': 9, 'mention': 'Old Court House', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 114, 'mention_end': 129, 'sent_pos': 4}, {'mention_pos': 10, 'mention': 'Manor of Stayley', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 236, 'mention_end': 252, 'sent_pos': 4}, {'mention_pos': 11, 'mention': 'Ashton-under-Lyne', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 0, 'mention_end': 17, 'sent_pos': 6}]	Ashton-under-Lyne	1860	1863	0.8837	0.1619	Ashton and Stalybridge Reporter, etc.	967	Q659803	test	dev	left_out	test	dev	train	train	train	train	train	train	train	train
+4939308	[{'sentence_pos': 1, 'sentence_text': 'THE STALYBRIDGE “EMPORIUM'}, {'sentence_pos': 2, 'sentence_text': '.'}, {'sentence_pos': 3, 'sentence_text': '”  * 4 During the last year two shops have been erected in Melbourne-street by Mr. '}, {'sentence_pos': 4, 'sentence_text': 'Napoleon Ives, who give them the above name. '}, {'sentence_pos': 5, 'sentence_text': 'On Thursday evening the workmen employed thereon, and others, to the number of forty, sat dowu to a spread of geese, turkey, and other good things, at the house of Mr. '}, {'sentence_pos': 6, 'sentence_text': 'Bray, Dog and Partridge, Market-street. '}, {'sentence_pos': 7, 'sentence_text': 'Afterwards Mr. '}, {'sentence_pos': 8, 'sentence_text': 'Napoleon Ives, who paid for the dinner, occupied the chair, and Mr. '}, {'sentence_pos': 9, 'sentence_text': 'Haigh France the vice-chair.  '}, {'sentence_pos': 10, 'sentence_text': 'The Chairman, in his opening remarks, said they had assembled in accordance with a good old custom, which brought together all who had been engaged in the erection of buildings. '}, {'sentence_pos': 11, 'sentence_text': 'It was one which had been almost extinguished, but he had always thought if ever it should be his fortune to erect any building, he would give a supper to the workmen engaged in its erection. '}, {'sentence_pos': 12, 'sentence_text': 'After complimenting the workmen on having done their duty to the two shops, he alluded to the dangers connected with the building trade from the excavator to the slater, and said that no buildings in town surpassed those which had led to that night’s gathering in drainage and other sanitory arrangements. '}, {'sentence_pos': 13, 'sentence_text': 'Every aperture was properly trapped, an important considei ation, seeing that the medical men of all large towns declared that the great causes of disease were impure water and unwholesome stenches. '}, {'sentence_pos': 14, 'sentence_text': 'Another important matter ip connection with houses was baths, without which no home could be considered complete. '}, {'sentence_pos': 15, 'sentence_text': 'He had, therefore, erected baths in each of the shops, which could be supplied with either hot or cold water. '}, {'sentence_pos': 16, 'sentence_text': 'In conclusion, he proposed the toast of “ The Queen,” and said he hoped she might live long to reign over them, and that her future might be as bright if not more brilliant than the past. '}, {'sentence_pos': 17, 'sentence_text': 'The toast was received with musical honours. '}, {'sentence_pos': 18, 'sentence_text': 'Mr. '}, {'sentence_pos': 19, 'sentence_text': 'Joseph Turner proposed “The Prince and Princess of Wales, and the rest of the royal family,” after which Mr. '}, {'sentence_pos': 20, 'sentence_text': 'Chabnock gave, with an appropriate address, “The Army, Navy, and Volunteers.” '}, {'sentence_pos': 21, 'sentence_text': 'He said Englishmen were proud of their army and navy, and often referred to their deeds in the most patriotic language. '}, {'sentence_pos': 22, 'sentence_text': 'He felt sure that if the services of the volunteers should be required, they would worthily emulate the deeds of old. '}, {'sentence_pos': 23, 'sentence_text': 'In conclusion, he gave a composition of his own, entitled “Defence, not defiance.” '}, {'sentence_pos': 24, 'sentence_text': 'The toast was well received, the Chairman speaking very highly of the volunteers of the country, and pointing out that, if properly managed, they would very much tend to a decrease in the annual military and naval expenditure of the country. '}, {'sentence_pos': 25, 'sentence_text': 'Mr, Henbt Jeffreys proposed “Prosperity to the town and trade of Stalybridge, with a few appropriate remarks. '}, {'sentence_pos': 26, 'sentence_text': 'After it had been duly drunk, the Chairman responded. '}, {'sentence_pos': 27, 'sentence_text': 'He referred to bad trade in general, and to that ef the bmild*'}]	[{'mention_pos': 0, 'mention': 'STALYBRIDGE', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 4, 'mention_end': 15, 'sent_pos': 1}, {'mention_pos': 1, 'mention': 'Melbourne-street', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 59, 'mention_end': 75, 'sent_pos': 3}, {'mention_pos': 2, 'mention': 'Dog and Partridge', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 6, 'mention_end': 23, 'sent_pos': 6}, {'mention_pos': 3, 'mention': 'Market-street', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 25, 'mention_end': 38, 'sent_pos': 6}, {'mention_pos': 4, 'mention': 'Stalybridge', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Stalybridge', 'wkdt_qid': 'Q1398653', 'mention_start': 65, 'mention_end': 76, 'sent_pos': 25}]	Ashton-under-Lyne	1860	1869	0.89	0.1803	The Ashton Reporter.	968	Q659803	train	train	train	test	dev	train	train	train	train	train	train	train	train
+8199709	[{'sentence_pos': 1, 'sentence_text': 'NOTICE.'}, {'sentence_pos': 2, 'sentence_text': '—REGULAR WEEKLY ATTENDANCE, TEETH. '}, {'sentence_pos': 3, 'sentence_text': 'TEETH.  '}, {'sentence_pos': 4, 'sentence_text': 'Messrs molloy,surgeon dentists, of 112, Rusholme-road (near All Saints), Manchester, rm-y be consulted every Saturday, from Ten till Five o’clock, at 931 Stamford-slreet, Ashton, tee residence of Mr. '}, {'sentence_pos': 5, 'sentence_text': 'Bostock, Chemist.  '}, {'sentence_pos': 6, 'sentence_text': 'They continue to suppy their unrivalled MINERAL TEETH and ARTIFICIAL GUMS, which restore both the appearance of natural teeth and their usefulness in mastication. '}, {'sentence_pos': 7, 'sentence_text': 'Their method of fixing them defies detection and dispenses with all painful operations. '}, {'sentence_pos': 8, 'sentence_text': 'Mersrs. '}, {'sentence_pos': 9, 'sentence_text': 'Molloy’s method of applying the PATENT GUMCOLOURED VULCANITE restores the deficiency of the gums and teeth, the natural appearance of the featu-es, and is particularly applicable in those cases in which from old age or long loss of the teeth the gums have become sarnnk or wasted. '}, {'sentence_pos': 10, 'sentence_text': 'By theuseof this perfectly incorrodible, light, and flexible material, from one to a com pie e set of aitifioial teeth may be worn with the greatest comfort, perfo-ming all the functions of natu-ai teeth without causing the slightest pain or inconvenience, and can be removed and replaced with the greatest ease. '}, {'sentence_pos': 11, 'sentence_text': 'Tneir WHITE ENAMEL restores black and decayed teeth to tneir original whiteness, prevents toothache, and makes a hollow tooth sound and useful for many years. '}, {'sentence_pos': 12, 'sentence_text': 'Terms—A single teoth from ss.  '}, {'sentence_pos': 13, 'sentence_text': 'Attendance every Saturday at 231, Stamford-street, Ashton'}]	[{'mention_pos': 0, 'mention': 'Rusholme-road', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 40, 'mention_end': 53, 'sent_pos': 4}, {'mention_pos': 1, 'mention': 'All Saints', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 60, 'mention_end': 70, 'sent_pos': 4}, {'mention_pos': 2, 'mention': 'Manchester', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Manchester', 'wkdt_qid': 'Q18125', 'mention_start': 73, 'mention_end': 83, 'sent_pos': 4}, {'mention_pos': 3, 'mention': 'Stamford-slreet', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 154, 'mention_end': 169, 'sent_pos': 4}, {'mention_pos': 4, 'mention': 'Ashton', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 171, 'mention_end': 177, 'sent_pos': 4}, {'mention_pos': 5, 'mention': 'Stamford-street', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 34, 'mention_end': 49, 'sent_pos': 13}, {'mention_pos': 6, 'mention': 'Ashton', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ashton-under-Lyne', 'wkdt_qid': 'Q659803', 'mention_start': 51, 'mention_end': 57, 'sent_pos': 13}]	Ashton-under-Lyne	1860	1862	0.8747	0.1962	Ashton and Stalybridge Reporter, etc.	967	Q659803	train	train	train	test	dev	train	train	train	train	train	train	train	train
+3580760	"[{'sentence_pos': 1, 'sentence_text': 'Postscript.  '}, {'sentence_pos': 2, 'sentence_text': 'LONDON, THURSDAY, SEPTEMBER 27. '}, {'sentence_pos': 3, 'sentence_text': 'City, Twelve oClock.'}, {'sentence_pos': 4, 'sentence_text': '—The consol market opened this morning at 84A lor the account, but has since been heavy, fluctuating between 83| and 84, and has now declined to 83£ for money, and 83$ J for the account. '}, {'sentence_pos': 5, 'sentence_text': 'Four oJClock—Consols for Account, 83J. '}, {'sentence_pos': 6, 'sentence_text': 'By the Romona, steam boat, we have received accounts from Oporto to the 21st; and from the fleet, which was in latitude 37. 21., longitude 11. 37., to the 18th instant. '}, {'sentence_pos': 7, 'sentence_text': 'The two fleets remained in sight of each other, and it was expected would come to an engagement as soon as Sartorius had been joined by the vessels which he was then expecting at Oporto. '}, {'sentence_pos': 8, 'sentence_text': 'Affairs remained in nearly the same state as when the last accounts left. '}, {'sentence_pos': 9, 'sentence_text': 'He have seen a letter, of which the following is an extract, from Lieutenant-Colonel Hodges:—""On the 16th instant we had a brilliant affair. '}, {'sentence_pos': 10, 'sentence_text': 'We drove the Miguelites betore us from all parts. '}, {'sentence_pos': 11, 'sentence_text': 'My little British band crowned themselves with glory. '}, {'sentence_pos': 12, 'sentence_text': 'Never was there any thing more brave than their repulsing the enemy from the heights . they actually fled before us, and lost one field officer and 50 men. '}, {'sentence_pos': 13, 'sentence_text': 'Santa Martha commanded in person against my part of the line, and had three regiments of the line, one of volunteers, and one of militia, against 200 British and 150 Portuguese. 1 lost one officer, Colonel Staunton, one officer wounded, two men killed, and 17 wounded."" '}, {'sentence_pos': 14, 'sentence_text': 'Frankfort Fair—Sept. 14—The first week of our corn fair this year has not otTered any satisfactory result, and we have to add thatour autumn crop must be reckoned among the worst that we have yet had. '}, {'sentence_pos': 15, 'sentence_text': 'No important transactions have taken place as yet in any article. '}, {'sentence_pos': 16, 'sentence_text': 'We no longer observe that spirit of activity which used to reign in our streets, especially at harvest time. '}, {'sentence_pos': 17, 'sentence_text': 'The inns alone have benefited by the presence of foreign travellers, and of those who have come to our town through business. '}, {'sentence_pos': 18, 'sentence_text': 'Notwithstanding the sad state of our trade in this circumstance, it cannot be said to be more wretched than that of Offenbach, only as our merchants transport their goods from that town, when they effect the sale personally, the principal mass of money entering, consequently, into their coffers, gave a little more animation to the fair. '}, {'sentence_pos': 19, 'sentence_text': 'Smuggling is going on actively; it is even said that there are companies who insure that kind of speculation. '}, {'sentence_pos': 20, 'sentence_text': 'The value of houses and other fixtures decreases considerably in our town, while it rises at Offenbach. '}, {'sentence_pos': 21, 'sentence_text': 'The number of poor increases daily, as well on account of the bad harvest as of the exorbitant taxes which they are obliged to pay to the customs for the introduction of their merchandize. '}, {'sentence_pos': 22, 'sentence_text': 'Their progressive increase has engaged our Senate to take measures for the improvement of the asylums which are destined for them, for the keeping up of which the subscriptions of the philanthropic citizens are no Longer sufficient Suabian Mercury,'}]"	[{'mention_pos': 0, 'mention': 'LONDON', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/London', 'wkdt_qid': 'Q84', 'mention_start': 0, 'mention_end': 6, 'sent_pos': 2}, {'mention_pos': 1, 'mention': 'City', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/City_of_London', 'wkdt_qid': 'Q23311', 'mention_start': 0, 'mention_end': 4, 'sent_pos': 3}, {'mention_pos': 2, 'mention': 'Oporto', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Porto', 'wkdt_qid': 'Q36433', 'mention_start': 58, 'mention_end': 64, 'sent_pos': 6}, {'mention_pos': 3, 'mention': 'Oporto', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Porto', 'wkdt_qid': 'Q36433', 'mention_start': 179, 'mention_end': 185, 'sent_pos': 7}, {'mention_pos': 4, 'mention': 'Frankfort', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Frankfurt', 'wkdt_qid': 'Q1794', 'mention_start': 0, 'mention_end': 9, 'sent_pos': 14}, {'mention_pos': 5, 'mention': 'Offenbach', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Offenbach_am_Main', 'wkdt_qid': 'Q3042', 'mention_start': 116, 'mention_end': 125, 'sent_pos': 18}, {'mention_pos': 6, 'mention': 'Offenbach', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Offenbach_am_Main', 'wkdt_qid': 'Q3042', 'mention_start': 93, 'mention_end': 102, 'sent_pos': 20}]	Manchester	1830	1832	0.8929	0.1848	Manchester Courier and Lancashire General Advertiser.	206	Q18125	train	train	train	train	train	train	train	train	train	train	test	train	train
+8263166	"[{'sentence_pos': 1, 'sentence_text': 'MONTHLY AGRICULTURAL REPORT.  '}, {'sentence_pos': 2, 'sentence_text': 'The mild open weather through the month has revived those plants ot Wheat, uhieh on very light soils appeared much injured by the severity of the late frost. llieir general appearance is promising. '}, {'sentence_pos': 3, 'sentence_text': 'The markets for all kindsof Com continue depressed. '}, {'sentence_pos': 4, 'sentence_text': 'The deficiency of Turnips is severely felt in Norfolk, and odier counties, where thev depend so much upon them to top their spring beasts. 1 n the Northern districts they are generally good Coleseed for a crop is doing well. '}, {'sentence_pos': 5, 'sentence_text': 'Winter Tares and Rye. for early feed, in most parts look kindly. '}, {'sentence_pos': 6, 'sentence_text': 'Clover seed is expected to turn out a fair average crop. '}, {'sentence_pos': 7, 'sentence_text': 'On dry roils Beaus are getting in, and some early Peas alxi; bu; the heavy rains which fell in die mhidle of the mouth, will retard tlieir souring on strong lands— I/can 1 leasts are scarce, and somewhat dearer; lx it Store Sheep continue low priced. '}, {'sentence_pos': 8, 'sentence_text': 'South field for the last two or three weeks has been but thinly supplied with prime Beef, but with plenty of good"" Mutton ; House Lamb is rather scarce, aud at present dear. '}, {'sentence_pos': 9, 'sentence_text': 'Pork is reasonable. '}, {'sentence_pos': 10, 'sentence_text': 'Hops continue low and dull in silo, and the Wool market is rather more brisk for both long and short fleeces; but with little variation from the last mouths prices.'}]"	[{'mention_pos': 0, 'mention': 'Norfolk', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Norfolk', 'wkdt_qid': 'Q23109', 'mention_start': 46, 'mention_end': 53, 'sent_pos': 4}]	Manchester	1820	1821	0.8485	0.2334	The Manchester Mercury and Harrops General Advertiser	239	Q18125	train	train	train	train	train	train	train	train	train	test	train	train	dev
+10734579	[{'sentence_pos': 1, 'sentence_text': 'NOTICE.'}, {'sentence_pos': 2, 'sentence_text': '—How delightful to see a Lady or Gentleman’s beautiful black shining Hoot or Shoe reflecting every surrounding object in TURNER’S UNRIVALLED BLACKING. '}, {'sentence_pos': 3, 'sentence_text': 'This invaluable composition may behad at the Dorset County Chronicle Office, and of all the respectable shop, keepers in Dorchester and its vicinity, in stone bottles, at 6u.t Is., and 18d. each.  '}, {'sentence_pos': 4, 'sentence_text': 'A CH A t.I.r.NG R. 1 challenge the world to produce, if it can, A Blacking that’s equal to Turner’s Japan : For like crystal it shines, while it softens the leather. '}, {'sentence_pos': 5, 'sentence_text': 'And makes your boots proof gainst the wind and.the weather. '}, {'sentence_pos': 6, 'sentence_text': 'Why the praises of Turners Japan should I tell. '}, {'sentence_pos': 7, 'sentence_text': 'When those who have used it must know it so well ? '}, {'sentence_pos': 8, 'sentence_text': 'Why should coachmen still brush at old harness in vain, When this blacking can give it fresh lustre again! '}, {'sentence_pos': 9, 'sentence_text': 'For, without second sight, I can prophecy soon, That you will see it adopted by Foot and Dragoon ; For you scarcely can tell, when drawn up on parade, Which glitters the brightest, the boot or the blade. '}, {'sentence_pos': 10, 'sentence_text': 'The Gentlemen too, who would boast their attire. '}, {'sentence_pos': 11, 'sentence_text': 'And feel for respect so profound a desire. '}, {'sentence_pos': 12, 'sentence_text': 'May hear the fair Ladies, if Turners they uae, Cry, How charmingly polished his manners and shoes* Vc Authors and Poets who gladly engage To reform and instruct this degenerate age. '}, {'sentence_pos': 13, 'sentence_text': 'Use Turners Japan and your fame will take root. '}, {'sentence_pos': 14, 'sentence_text': 'Then your genius will shine as well as your boot. 1 hough Davy and Home have astonished the world. '}, {'sentence_pos': 15, 'sentence_text': 'And the chemical volume of nature unfurl’d, Yet in chemistry surely there something was lacking, TUI Turner discover’d his wonderful Blacking. '}, {'sentence_pos': 16, 'sentence_text': 'Who do not feel pride in a Wellingtons name. '}, {'sentence_pos': 17, 'sentence_text': 'When the whole of the universe rings with his fame 1 So arc Turner and Wellinoion famous afar,  One the hero of Blacking, the other of War!! '}, {'sentence_pos': 18, 'sentence_text': 'Gentlemen may observe that this Composition, when wed for their Gig and Carriage Harness, after one or two application* will produce a brilliant, rich, glossy black lustre, and it the same time act as a preserver of the leather, RICHARD IX RNER.'}]	[{'mention_pos': 0, 'mention': 'Dorset', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Dorset', 'wkdt_qid': 'Q23159', 'mention_start': 45, 'mention_end': 51, 'sent_pos': 3}, {'mention_pos': 1, 'mention': 'Dorchester', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Dorchester,_Dorset', 'wkdt_qid': 'Q503331', 'mention_start': 121, 'mention_end': 131, 'sent_pos': 3}]	Dorchester	1820	1825	0.8665	0.221	Dorset County Chronicle, etc.	408	Q503331	train	train	train	train	test	train	train	train	train	dev	train	dev	train
+3896074	[{'sentence_pos': 1, 'sentence_text': 'A REMONSTRANCE. '}, {'sentence_pos': 2, 'sentence_text': 'What bard art thou so apt to grace. '}, {'sentence_pos': 3, 'sentence_text': 'With poetrys pure breath, The icatterer of the human race ! '}, {'sentence_pos': 4, 'sentence_text': 'The trafficker in death ! '}, {'sentence_pos': 5, 'sentence_text': 'Are plunderd shrine, and midnight chain— A shrieking citys blaze— And beauty, in polution slain, The themes for poets praise ? '}, {'sentence_pos': 6, 'sentence_text': 'Marengos hero then recall! '}, {'sentence_pos': 7, 'sentence_text': 'Dethrone the crowned of earth ! '}, {'sentence_pos': 8, 'sentence_text': 'Shake oer a weeping world deaths pallCheer Horrors brood to birth ! '}, {'sentence_pos': 9, 'sentence_text': 'While startled bones of millions rise All ghastly as they stood When thunderous battle mock d the skies, And lained down human blood! '}, {'sentence_pos': 10, 'sentence_text': 'Whilst hoa y heads, all stark and gashed, Throng shore and town remote, As when the Gallic armies plashed In carnage to the throat! '}, {'sentence_pos': 11, 'sentence_text': 'His meteor-sceptre pledge once more; Napoleon to the van I Come, quaff the reeking cup of gore, And shout for slaughter !'}, {'sentence_pos': 12, 'sentence_text': '—Man. '}, {'sentence_pos': 13, 'sentence_text': 'Another Austerlitz demand, Another Jena claim; And desolate the groaning lan 1 To write one despots fame ! '}, {'sentence_pos': 14, 'sentence_text': 'Let cen the pyramids afford A verse for his renown : But speak not of brave Sidneys sword, That swept the invader down I Sing not of Moscows flaming tide— The fiery brands which hurled The chainer of the nations wide, The scourfer of the world. '}, {'sentence_pos': 15, 'sentence_text': 'With rout and havoc from their shore ! '}, {'sentence_pos': 16, 'sentence_text': 'To Cossack hate consigned :— Famine and frozen wastes before, Bones and the wolf behind. '}, {'sentence_pos': 17, 'sentence_text': 'Let net Trafalgars chief, who died, A moments thought beguile; Nor laud with British heart and pride The Baltic and the Nile! '}, {'sentence_pos': 18, 'sentence_text': 'Leave Nelsons glorious flag unsung, And Wellington unwreathed; Their fame with which all Europe rung, For his— whod best neer breathed ! '}, {'sentence_pos': 19, 'sentence_text': 'And gloze the tyrants guilty mood, And wail his hapless cause: That Sylla—in his thirst for blood ! '}, {'sentence_pos': 20, 'sentence_text': 'That Draco—in his laws ! '}, {'sentence_pos': 21, 'sentence_text': 'Mourn him who to the conscript gave HeHrt-broken France a prey; And sill could outraged nature brave With homicidal sway ! '}, {'sentence_pos': 22, 'sentence_text': 'Yes, wake the throb of sympathyBid maudlin tears reveal How much men grieve for Europe freeHow miss the tyrants steel! '}, {'sentence_pos': 23, 'sentence_text': 'And make the conscious sea blush gore In shame for Nelsons land ; Make earth, revolting, lift once more Her blood-accusing hand ! '}, {'sentence_pos': 24, 'sentence_text': 'CHARLES SWAIN.'}]	[{'mention_pos': 0, 'mention': 'Austerlitz', 'entity_type': 'OTHER', 'wkpd_url': 'https://en.wikipedia.org/wiki/Battle_of_Austerlitz', 'wkdt_qid': 'Q134114', 'mention_start': 8, 'mention_end': 18, 'sent_pos': 13}, {'mention_pos': 1, 'mention': 'Jena', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Jena', 'wkdt_qid': 'Q3150', 'mention_start': 35, 'mention_end': 39, 'sent_pos': 13}, {'mention_pos': 2, 'mention': 'Moscows', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Moscow', 'wkdt_qid': 'Q649', 'mention_start': 133, 'mention_end': 140, 'sent_pos': 14}, {'mention_pos': 3, 'mention': 'Trafalgars', 'entity_type': 'OTHER', 'wkpd_url': 'https://en.wikipedia.org/wiki/Battle_of_Trafalgar', 'wkdt_qid': 'Q171416', 'mention_start': 8, 'mention_end': 18, 'sent_pos': 17}, {'mention_pos': 4, 'mention': 'The Baltic', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Baltic_Sea', 'wkdt_qid': 'Q545', 'mention_start': 101, 'mention_end': 111, 'sent_pos': 17}, {'mention_pos': 5, 'mention': 'Nile', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Nile', 'wkdt_qid': 'Q3392', 'mention_start': 120, 'mention_end': 124, 'sent_pos': 17}, {'mention_pos': 6, 'mention': 'Europe', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Europe', 'wkdt_qid': 'Q46', 'mention_start': 89, 'mention_end': 95, 'sent_pos': 18}, {'mention_pos': 7, 'mention': 'France', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/France', 'wkdt_qid': 'Q142', 'mention_start': 49, 'mention_end': 55, 'sent_pos': 21}, {'mention_pos': 8, 'mention': 'Europe', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Europe', 'wkdt_qid': 'Q46', 'mention_start': 80, 'mention_end': 86, 'sent_pos': 22}]	Manchester	1830	1839	0.883	0.1934	Manchester Courier and Lancashire General Advertiser.	206	Q18125	train	train	train	train	train	train	train	train	train	train	test	train	train
+3691199	[{'sentence_pos': 1, 'sentence_text': 'CHURCHES AND CHAPELS.  '}, {'sentence_pos': 2, 'sentence_text': 'The incorporated society for promoting the enlargement, building, and repairing of churches and chapels, held their nineteenth annual general committee on Friday week, at their chambers in St. Martins-place'}, {'sentence_pos': 3, 'sentence_text': '; the Archbishop of Canterbury in the chair. '}, {'sentence_pos': 4, 'sentence_text': 'The Bishop of Durham, the Bishop of Winchester, the Bishop of Gloucester and Bristol, the Bishop of Hereford, the Bishop of Bangor, the Dean of Norwich, the Rev. '}, {'sentence_pos': 5, 'sentence_text': 'Sir Henry Dukenfield, Bart., Lord Kenyon, and many other distinguished clergymen and laymen, were also present. '}, {'sentence_pos': 6, 'sentence_text': 'The report of the committee stated that they felt great satisfaction and thankfulness towards the Giver of all Good in being able to announce that, during the past year, the society had exerted itself with undiminished activity and success. '}, {'sentence_pos': 7, 'sentence_text': 'Their exhausted funds had been replenished to a degree even beyond their anticipations by the collections made throughout the country under the authority of the kings letter. '}, {'sentence_pos': 8, 'sentence_text': 'All the returns had not yet been received, but those which had, had advised them of contributions to the amount of £34,000, which exceeded the collection under the same authority in 1834, by £3,000, and which was still more gratifying, as, during the last year, £116,000 had been subscribed for the erection of additional churches in the metropolis. '}, {'sentence_pos': 9, 'sentence_text': 'During the last year 188 applications had been made, and the grants had been 108, both exceeding those of any former year, the latter by 35. '}, {'sentence_pos': 10, 'sentence_text': 'The money granted had been £21,872, being £700 above any former year. '}, {'sentence_pos': 11, 'sentence_text': 'The sittings granted had been 41,710, being 7,000 above any precedent; and of those 28,872 were free and unappropriated. '}, {'sentence_pos': 12, 'sentence_text': 'The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. '}, {'sentence_pos': 13, 'sentence_text': 'Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. '}, {'sentence_pos': 14, 'sentence_text': 'AmjtJgst their contributions was the munificent sum of 18,000 from Mr. '}, {'sentence_pos': 15, 'sentence_text': 'George Davenport, of Lime-street, London. '}, {'sentence_pos': 16, 'sentence_text': 'Since their institution in 1818 the society had expended £220,731, thus insuring 354,925 additional sittings, of which 262,366 were free and unappropriated. '}, {'sentence_pos': 17, 'sentence_text': 'The society, therefore, looked with joy to the past, and to the future with the confident hope that they might go on and prosper.'}]	[{'mention_pos': 0, 'mention': 'St. Martins-place', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 189, 'mention_end': 206, 'sent_pos': 2}, {'mention_pos': 1, 'mention': 'Bath', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Bath,_Somerset', 'wkdt_qid': 'Q22889', 'mention_start': 62, 'mention_end': 66, 'sent_pos': 13}, {'mention_pos': 2, 'mention': 'Wells', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Wells,_Somerset', 'wkdt_qid': 'Q212283', 'mention_start': 71, 'mention_end': 76, 'sent_pos': 13}, {'mention_pos': 3, 'mention': 'Winchester', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Winchester', 'wkdt_qid': 'Q172157', 'mention_start': 78, 'mention_end': 88, 'sent_pos': 13}, {'mention_pos': 4, 'mention': 'Exeter', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Exeter', 'wkdt_qid': 'Q134672', 'mention_start': 90, 'mention_end': 96, 'sent_pos': 13}, {'mention_pos': 5, 'mention': 'Cleveland', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Cleveland,_England', 'wkdt_qid': 'Q24651706', 'mention_start': 102, 'mention_end': 111, 'sent_pos': 13}, {'mention_pos': 6, 'mention': 'England', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/England', 'wkdt_qid': 'Q21', 'mention_start': 229, 'mention_end': 236, 'sent_pos': 13}, {'mention_pos': 7, 'mention': 'Lime-street', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 21, 'mention_end': 32, 'sent_pos': 15}, {'mention_pos': 8, 'mention': 'London', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/London', 'wkdt_qid': 'Q84', 'mention_start': 34, 'mention_end': 40, 'sent_pos': 15}]	Manchester	1830	1837	0.92	0.1529	Manchester Courier and Lancashire General Advertiser.	206	Q18125	train	train	train	train	train	train	train	train	train	train	test	train	train
+12275	[{'sentence_pos': 1, 'sentence_text': 'LYND H UR ST.  '}, {'sentence_pos': 2, 'sentence_text': 'CAPITAL RUN WITH THE New FOREST Fox HOUND. '}, {'sentence_pos': 3, 'sentence_text': '—On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. '}, {'sentence_pos': 4, 'sentence_text': '%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. '}, {'sentence_pos': 5, 'sentence_text': 'He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction • distance of five miles to Over Fields ; then took • backward direction to Holme !Jill'}, {'sentence_pos': 6, 'sentence_text': ', thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. '}, {'sentence_pos': 7, 'sentence_text': 'RESIGNATION Of Elll TIMSON, MASTEI OF THE New FOREST Fox HOUNDS.— We regret to announce the resignation of Mr. fimson, the esteemed master of the New Forest hourids, who has hunted the country for the last five seasons, to the entire satisfaction of those gentlemen, members of the hunt and others, who have been fertunate enough to witness some of the splendid sport which he has so often afforded during his career as such ; and although he relinquishes the mastership, we earnestly hope that a continuance of good health will enahle Mr. f imson to take is usual prominent place in the bunting field for many years to come. '}, {'sentence_pos': 8, 'sentence_text': 'It should be remembered that Mr. '}, {'sentence_pos': 9, 'sentence_text': 'Timson accepted the mastership at a time when no other gentleman could be prevailed upon to do so ; it is therefore to be hoped that the members of the hunt will testify their feeling of gratitude and respect either in the shape of a testimonial or farewell banquet. '}, {'sentence_pos': 10, 'sentence_text': 'Mr. '}, {'sentence_pos': 11, 'sentence_text': 'Timson will be succeeded by Captain W. '}, {'sentence_pos': 12, 'sentence_text': 'Fdornat.  '}, {'sentence_pos': 13, 'sentence_text': 'SHIPPING INTELLIGENCr:.  '}, {'sentence_pos': 14, 'sentence_text': 'POOLE.'}]	[{'mention_pos': 0, 'mention': 'LYND H UR ST', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Lyndhurst,_Hampshire', 'wkdt_qid': 'Q3182986', 'mention_start': 0, 'mention_end': 12, 'sent_pos': 1}, {'mention_pos': 1, 'mention': 'New FOREST', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/New_Forest', 'wkdt_qid': 'Q277755', 'mention_start': 21, 'mention_end': 31, 'sent_pos': 2}, {'mention_pos': 2, 'mention': 'New Forest', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/New_Forest', 'wkdt_qid': 'Q277755', 'mention_start': 35, 'mention_end': 45, 'sent_pos': 3}, {'mention_pos': 3, 'mention': 'Boldrewood', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Bolderwood,_Hampshire', 'wkdt_qid': 'Q4939103', 'mention_start': 64, 'mention_end': 74, 'sent_pos': 3}, {'mention_pos': 4, 'mention': 'Ringwood', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Ringwood', 'wkdt_qid': 'Q1248943', 'mention_start': 188, 'mention_end': 196, 'sent_pos': 3}, {'mention_pos': 5, 'mention': 'Church Moor', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 41, 'mention_end': 52, 'sent_pos': 4}, {'mention_pos': 6, 'mention': 'Burley', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Burley,_Hampshire', 'wkdt_qid': 'Q3195509', 'mention_start': 84, 'mention_end': 90, 'sent_pos': 4}, {'mention_pos': 7, 'mention': 'Burley', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Burley,_Hampshire', 'wkdt_qid': 'Q3195509', 'mention_start': 101, 'mention_end': 107, 'sent_pos': 4}, {'mention_pos': 8, 'mention': 'Beech Beds', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 147, 'mention_end': 157, 'sent_pos': 4}, {'mention_pos': 9, 'mention': 'Oakley', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 175, 'mention_end': 181, 'sent_pos': 4}, {'mention_pos': 10, 'mention': 'Buldre. wood', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Bolderwood,_Hampshire', 'wkdt_qid': 'Q4939103', 'mention_start': 30, 'mention_end': 42, 'sent_pos': 5}, {'mention_pos': 11, 'mention': 'Gillett Inclosure', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 51, 'mention_end': 68, 'sent_pos': 5}, {'mention_pos': 12, 'mention': 'Hulme Hill', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 82, 'mention_end': 92, 'sent_pos': 5}, {'mention_pos': 13, 'mention': 'Thrifty Beeches', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 97, 'mention_end': 112, 'sent_pos': 5}, {'mention_pos': 14, 'mention': 'Over Fields', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 216, 'mention_end': 227, 'sent_pos': 5}, {'mention_pos': 15, 'mention': 'Holme !Jill', 'entity_type': 'LOC', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 264, 'mention_end': 275, 'sent_pos': 5}, {'mention_pos': 16, 'mention': 'Emery Down', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Emery_Down', 'wkdt_qid': 'Q5370984', 'mention_start': 12, 'mention_end': 22, 'sent_pos': 6}, {'mention_pos': 17, 'mention': 'Minesteed Manor', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 34, 'mention_end': 49, 'sent_pos': 6}, {'mention_pos': 18, 'mention': 'Notherwood', 'entity_type': 'BUILDING', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 75, 'mention_end': 85, 'sent_pos': 6}, {'mention_pos': 19, 'mention': 'New FOREST', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/New_Forest', 'wkdt_qid': 'Q277755', 'mention_start': 42, 'mention_end': 52, 'sent_pos': 7}, {'mention_pos': 20, 'mention': 'New Forest', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/New_Forest', 'wkdt_qid': 'Q277755', 'mention_start': 146, 'mention_end': 156, 'sent_pos': 7}, {'mention_pos': 21, 'mention': 'POOLE', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Poole', 'wkdt_qid': 'Q203349', 'mention_start': 0, 'mention_end': 5, 'sent_pos': 14}]	Poole	1860	1860	0.879	0.1578	The Poole and South-Western Herald, etc.	2325	Q203349	train	train	train	train	train	train	train	train	train	train	dev	train	test
+12670	[{'sentence_pos': 1, 'sentence_text': 'NOTICE.  '}, {'sentence_pos': 2, 'sentence_text': 'THE STAR, Political, Naval, Military, I Literary, and Commercial Intelligencer, and General Advertiser. '}, {'sentence_pos': 3, 'sentence_text': 'Established in 1813.'}, {'sentence_pos': 4, 'sentence_text': '—This paper which is published every Tuesday, Thursday, and Saturday evening, contains, in addition to ectracts from the British and Foreign newspapers of articles of political and general intelligence, copious and carefully-selected information as to all naval, military, and commercial affairs, and especially that which relates to the trade and commerce of the Channel Islands. '}, {'sentence_pos': 5, 'sentence_text': 'The following are the subscription prices to The Star, payable quarterly :-For three numbers per week For two do. do. '}, {'sentence_pos': 6, 'sentence_text': 'For one do. do. 2s. do. '}, {'sentence_pos': 7, 'sentence_text': 'Single numbers, 2d. each. '}, {'sentence_pos': 8, 'sentence_text': 'Subscribers in the United Kingdom will be charged on the following scale, in British money, including pre-payment and posting, payable in advance : For three numbers per week....9s. 9d. per quarter For two do. do. ....Bs. '}, {'sentence_pos': 9, 'sentence_text': 'Bd. do. '}, {'sentence_pos': 10, 'sentence_text': 'For one do. do. ....3s. 4d. do. '}, {'sentence_pos': 11, 'sentence_text': 'The Star (or three numbers folded together) can be forwarded by post, to any part of the United Kingdom or France, on allicing a postage stamp of one PENNI: sir Address—No. 10, Bordage-Street, Guernsey.'}]	[{'mention_pos': 0, 'mention': 'Channel Islands', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Channel_Islands', 'wkdt_qid': 'Q42314', 'mention_start': 364, 'mention_end': 379, 'sent_pos': 4}, {'mention_pos': 1, 'mention': 'United Kingdom', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/United_Kingdom', 'wkdt_qid': 'Q145', 'mention_start': 19, 'mention_end': 33, 'sent_pos': 8}, {'mention_pos': 2, 'mention': 'Bordage-Street', 'entity_type': 'STREET', 'wkpd_url': '*', 'wkdt_qid': 'NIL', 'mention_start': 177, 'mention_end': 191, 'sent_pos': 11}, {'mention_pos': 3, 'mention': 'Guernsey', 'entity_type': 'LOC', 'wkpd_url': 'https://en.wikipedia.org/wiki/Guernsey', 'wkdt_qid': 'Q3311985', 'mention_start': 193, 'mention_end': 201, 'sent_pos': 11}]	Poole	1860	1860	0.8953	0.1361	The Poole and South-Western Herald, etc.	2325	Q203349	dev	train	test	train	train	train	train	train	train	train	dev	train	test
diff --git a/tests/sample_files/experiments/outputs/data/lwm/ner_fine_dev.json b/tests/sample_files/experiments/outputs/data/lwm/ner_fine_dev.json
new file mode 100644
index 00000000..3532a1aa
--- /dev/null
+++ b/tests/sample_files/experiments/outputs/data/lwm/ner_fine_dev.json
@@ -0,0 +1,41 @@
+{"id":"3896074_13","ner_tags":["O","B-OTHER","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Another","Austerlitz","demand",",","Another","Jena","claim",";","And","desolate","the","groaning","lan","1","To","write","one","despots","fame","!"]}
+{"id":"3896074_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Whilst","hoa","y","heads",",","all","stark","and","gashed",",","Throng","shore","and","town","remote",",","As","when","the","Gallic","armies","plashed","In","carnage","to","the","throat","!"]}
+{"id":"3580760_19","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Smuggling","is","going","on","actively",";","it","is","even","said","that","there","are","companies","who","insure","that","kind","of","speculation","."]}
+{"id":"12275_11","ner_tags":["O","O","O","O","O","O","O","O"],"tokens":["Timson","will","be","succeeded","by","Captain","W","."]}
+{"id":"12275_5","ner_tags":["O","O","O","O","O","O","O","B-LOC","I-LOC","I-LOC","O","O","B-LOC","I-LOC","O","O","O","B-LOC","I-LOC","O","O","B-LOC","I-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","O","O","O","O","O","B-LOC","I-LOC","I-LOC"],"tokens":["He","then","made","a","turn","back","over","Buldre",".","wood","Hill","to","Gillett","Inclosure",",","and","through","Hulme","Hill",",","to","Thrifty","Beeches",";","here","he","again","turned","to","the","lett",",","sod","ran","almost","in","a","straight","direction","\u2022","distance","of","five","miles","to","Over","Fields",";","then","took","\u2022","backward","direction","to","Holme","!","Jill"]}
+{"id":"3691199_7","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Their","exhausted","funds","had","been","replenished","to","a","degree","even","beyond","their","anticipations","by","the","collections","made","throughout","the","country","under","the","authority","of","the","kings","letter","."]}
+{"id":"3896074_5","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Are","plunderd","shrine",",","and","midnight","chain","\u2014","A","shrieking","citys","blaze","\u2014","And","beauty",",","in","polution","slain",",","The","themes","for","poets","praise","?"]}
+{"id":"4939308_12","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["After","complimenting","the","workmen","on","having","done","their","duty","to","the","two","shops",",","he","alluded","to","the","dangers","connected","with","the","building","trade","from","the","excavator","to","the","slater",",","and","said","that","no","buildings","in","town","surpassed","those","which","had","led","to","that","night","\u2019","s","gathering","in","drainage","and","other","sanitory","arrangements","."]}
+{"id":"12670_7","ner_tags":["O","O","O","O","O","O","O"],"tokens":["Single","numbers",",","2d",".","each","."]}
+{"id":"12275_3","ner_tags":["O","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O"],"tokens":["\u2014","On","Thursday",",","the","9th","instant",",","the","New","Forest","fox","hounds","met","at","Boldrewood",",","arid","drew","Holme","Hill","Inclousure",",","where","the","famous","bitch","pock","soon","winded","a","Inc","old","fox",",","who","stole","away","over","the","Ringwood","rued","to","Gillett","Inclusure","."]}
+{"id":"3896074_7","ner_tags":["O","O","O","O","O","O"],"tokens":["Dethrone","the","crowned","of","earth","!"]}
+{"id":"3938653_12","ner_tags":["O","O"],"tokens":["Mr","."]}
+{"id":"3580760_5","ner_tags":["O","O","O","O","O","O","O"],"tokens":["Four","oJClock\u2014Consols","for","Account",",","83J","."]}
+{"id":"4939308_13","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Every","aperture","was","properly","trapped",",","an","important","considei","ation",",","seeing","that","the","medical","men","of","all","large","towns","declared","that","the","great","causes","of","disease","were","impure","water","and","unwholesome","stenches","."]}
+{"id":"12670_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["For","one","do",".","do",".",".",".",".",".3s",".","4d",".","do","."]}
+{"id":"4939308_23","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["In","conclusion",",","he","gave","a","composition","of","his","own",",","entitled","\u201c","Defence",",","not","defiance",".","\u201d"]}
+{"id":"12670_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-STREET","O","B-LOC","O"],"tokens":["The","Star","(","or","three","numbers","folded","together",")","can","be","forwarded","by","post",",","to","any","part","of","the","United","Kingdom","or","France",",","on","allicing","a","postage","stamp","of","one","PENNI",":","sir","Address\u2014No",".","10",",","Bordage-Street",",","Guernsey","."]}
+{"id":"10734579_11","ner_tags":["O","O","O","O","O","O","O","O","O"],"tokens":["And","feel","for","respect","so","profound","a","desire","."]}
+{"id":"12275_14","ner_tags":["B-LOC","O"],"tokens":["POOLE","."]}
+{"id":"3691199_13","ner_tags":["O","O","O","O","O","O","O","O","B-LOC","O","B-LOC","O","B-LOC","O","B-LOC","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O"],"tokens":["Contributions","had","been","received","from","diocesan","associations","in","Bath","and","Wells",",","Winchester",",","Exeter",",","and","Cleveland",",","which","were","gratifying","proofs","of","the","increasing","desire","to","relieve","the","spiritual","wants","of","the","people","of","the","Church","of","England","."]}
+{"id":"8199709_8","ner_tags":["O","O"],"tokens":["Mersrs","."]}
+{"id":"8263166_2","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","mild","open","weather","through","the","month","has","revived","those","plants","ot","Wheat",",","uhieh","on","very","light","soils","appeared","much","injured","by","the","severity","of","the","late","frost",".","llieir","general","appearance","is","promising","."]}
+{"id":"4939308_24","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","toast","was","well","received",",","the","Chairman","speaking","very","highly","of","the","volunteers","of","the","country",",","and","pointing","out","that",",","if","properly","managed",",","they","would","very","much","tend","to","a","decrease","in","the","annual","military","and","naval","expenditure","of","the","country","."]}
+{"id":"3896074_6","ner_tags":["O","O","O","O","O"],"tokens":["Marengos","hero","then","recall","!"]}
+{"id":"3580760_20","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O"],"tokens":["The","value","of","houses","and","other","fixtures","decreases","considerably","in","our","town",",","while","it","rises","at","Offenbach","."]}
+{"id":"10734579_2","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["\u2014","How","delightful","to","see","a","Lady","or","Gentleman","\u2019","s","beautiful","black","shining","Hoot","or","Shoe","reflecting","every","surrounding","object","in","TURNER","\u2019","S","UNRIVALLED","BLACKING","."]}
+{"id":"8263166_9","ner_tags":["O","O","O","O"],"tokens":["Pork","is","reasonable","."]}
+{"id":"3896074_14","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Let","cen","the","pyramids","afford","A","verse","for","his","renown",":","But","speak","not","of","brave","Sidneys","sword",",","That","swept","the","invader","down","I","Sing","not","of","Moscows","flaming","tide","\u2014","The","fiery","brands","which","hurled","The","chainer","of","the","nations","wide",",","The","scourfer","of","the","world","."]}
+{"id":"3691199_12","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","grants","of","last","year","were","deemed","particularly","important",",","as","while","the","society","had","contributed","towards","the","increase","of","accommodation","in","many","of","the","present","churches","they","had","contributed","to","the","erection","of","forty-one","additional","churches","and","chapels",",","and","the","rebuilding","of","sixteen","others","."]}
+{"id":"12670_6","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["For","one","do",".","do",".","2s",".","do","."]}
+{"id":"10734579_3","ner_tags":["O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["This","invaluable","composition","may","behad","at","the","Dorset","County","Chronicle","Office",",","and","of","all","the","respectable","shop",",","keepers","in","Dorchester","and","its","vicinity",",","in","stone","bottles",",","at","6u.t","Is",".",",","and","18d",".","each","."]}
+{"id":"12670_5","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","following","are","the","subscription","prices","to","The","Star",",","payable","quarterly",":","-","For","three","numbers","per","week","For","two","do",".","do","."]}
+{"id":"10734579_12","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["May","hear","the","fair","Ladies",",","if","Turners","they","uae",",","Cry",",","How","charmingly","polished","his","manners","and","shoes","*","Vc","Authors","and","Poets","who","gladly","engage","To","reform","and","instruct","this","degenerate","age","."]}
+{"id":"4939308_5","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["On","Thursday","evening","the","workmen","employed","thereon",",","and","others",",","to","the","number","of","forty",",","sat","dowu","to","a","spread","of","geese",",","turkey",",","and","other","good","things",",","at","the","house","of","Mr","."]}
+{"id":"8263166_6","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Clover","seed","is","expected","to","turn","out","a","fair","average","crop","."]}
+{"id":"3580760_6","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["By","the","Romona",",","steam","boat",",","we","have","received","accounts","from","Oporto","to","the","21st",";","and","from","the","fleet",",","which","was","in","latitude","37",".","21",".",",","longitude","11",".","37",".",",","to","the","18th","instant","."]}
+{"id":"3938653_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-BUILDING","I-BUILDING","O","B-BUILDING","I-BUILDING","I-BUILDING","O"],"tokens":["In","the","afternoon","their","Royal","Highnesses","went","a","short","yachting","excursion","in","the","Earl","of","Edgecunibes","yacht",";","and","in","the","evening","the","Prince","dined","with","Viscount","Templetown",",","Commander-inChief","of","the","Western","District",",","at","the","Government","House",",","Mo","ant","Wise","."]}
+{"id":"8263166_3","ner_tags":["O","O","O","O","O","O","O","O","O"],"tokens":["The","markets","for","all","kindsof","Com","continue","depressed","."]}
+{"id":"10734579_15","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["And","the","chemical","volume","of","nature","unfurl","\u2019","d",",","Yet","in","chemistry","surely","there","something","was","lacking",",","TUI","Turner","discover","\u2019","d","his","wonderful","Blacking","."]}
+{"id":"10734579_10","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["The","Gentlemen","too",",","who","would","boast","their","attire","."]}
+{"id":"8199709_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Molloy","\u2019","s","method","of","applying","the","PATENT","GUMCOLOURED","VULCANITE","restores","the","deficiency","of","the","gums","and","teeth",",","the","natural","appearance","of","the","featu-es",",","and","is","particularly","applicable","in","those","cases","in","which","from","old","age","or","long","loss","of","the","teeth","the","gums","have","become","sarnnk","or","wasted","."]}
diff --git a/tests/sample_files/experiments/outputs/data/lwm/ner_fine_train.json b/tests/sample_files/experiments/outputs/data/lwm/ner_fine_train.json
new file mode 100644
index 00000000..bcc2c70d
--- /dev/null
+++ b/tests/sample_files/experiments/outputs/data/lwm/ner_fine_train.json
@@ -0,0 +1,141 @@
+{"id":"4939308_7","ner_tags":["O","O","O"],"tokens":["Afterwards","Mr","."]}
+{"id":"8263166_7","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["On","dry","roils","Beaus","are","getting","in",",","and","some","early","Peas","alxi",";","bu",";","the","heavy","rains","which","fell","in","die","mhidle","of","the","mouth",",","will","retard","tlieir","souring","on","strong","lands","\u2014","I","\/","can","1","leasts","are","scarce",",","and","somewhat","dearer",";","lx","it","Store","Sheep","continue","low","priced","."]}
+{"id":"3691199_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["During","the","last","year","188","applications","had","been","made",",","and","the","grants","had","been","108",",","both","exceeding","those","of","any","former","year",",","the","latter","by","35","."]}
+{"id":"3896074_4","ner_tags":["O","O","O","O","O"],"tokens":["The","trafficker","in","death","!"]}
+{"id":"8199709_12","ner_tags":["O","O","O","O","O","O"],"tokens":["Terms\u2014A","single","teoth","from","ss","."]}
+{"id":"12670_2","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["THE","STAR",",","Political",",","Naval",",","Military",",","I","Literary",",","and","Commercial","Intelligencer",",","and","General","Advertiser","."]}
+{"id":"8263166_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["South","field","for","the","last","two","or","three","weeks","has","been","but","thinly","supplied","with","prime","Beef",",","but","with","plenty","of","good","\"","Mutton",";","House","Lamb","is","rather","scarce",",","aud","at","present","dear","."]}
+{"id":"3896074_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Shake","oer","a","weeping","world","deaths","pallCheer","Horrors","brood","to","birth","!"]}
+{"id":"3691199_17","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","society",",","therefore",",","looked","with","joy","to","the","past",",","and","to","the","future","with","the","confident","hope","that","they","might","go","on","and","prosper","."]}
+{"id":"4938614_4","ner_tags":["O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Martha","Wilde",",","who","was","sent","from","the","Dukinfield","court","for","obtaining","money","under","false","pretences","by","representing","at","two","pawnshops","in","Dukinfield","that","a","spurious","composition","called","coraline","beads","were","real","coral","was","discharged","."]}
+{"id":"4938614_12","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["He","was","immediately","conveyed","in","a","cab","to","the","Infirmary",",","where","we","understand","he","is","progressing","favourably","."]}
+{"id":"4939308_25","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O"],"tokens":["Mr",",","Henbt","Jeffreys","proposed","\u201c","Prosperity","to","the","town","and","trade","of","Stalybridge",",","with","a","few","appropriate","remarks","."]}
+{"id":"3938653_4","ner_tags":["O","O"],"tokens":["Emma","."]}
+{"id":"3896074_24","ner_tags":["O","O","O"],"tokens":["CHARLES","SWAIN","."]}
+{"id":"3896074_15","ner_tags":["O","O","O","O","O","O","O","O"],"tokens":["With","rout","and","havoc","from","their","shore","!"]}
+{"id":"10734579_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Why","should","coachmen","still","brush","at","old","harness","in","vain",",","When","this","blacking","can","give","it","fresh","lustre","again","!"]}
+{"id":"3896074_23","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["And","make","the","conscious","sea","blush","gore","In","shame","for","Nelsons","land",";","Make","earth",",","revolting",",","lift","once","more","Her","blood-accusing","hand","!"]}
+{"id":"3580760_21","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","number","of","poor","increases","daily",",","as","well","on","account","of","the","bad","harvest","as","of","the","exorbitant","taxes","which","they","are","obliged","to","pay","to","the","customs","for","the","introduction","of","their","merchandize","."]}
+{"id":"12275_10","ner_tags":["O","O"],"tokens":["Mr","."]}
+{"id":"8199709_5","ner_tags":["O","O","O","O"],"tokens":["Bostock",",","Chemist","."]}
+{"id":"4938614_1","ner_tags":["B-LOC","O"],"tokens":["DUKINFIELD","."]}
+{"id":"4938614_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Tame","Valley",",","was","engaged",",","on","Wednesday",",","in","cleaning","some","jennies","in","one","of","the","rooms","he","met","with","a","severe","accident","to","his","head",".","_","The","spinner",",","who","is","rather","deaf",",","not","knowing","that","the","lad","was","amongst","the","machinery",",","set","the","jennies","going",",","and","being","unable","to","hear","the","shouts","of","the","lad","on","account","of","the","defect","in","his","hearing",",","did","not","stop","them","until","another","man","made","signs","to","him","to","stop",",","which","he","did","instantly","."]}
+{"id":"4939308_6","ner_tags":["O","O","B-BUILDING","I-BUILDING","I-BUILDING","O","B-STREET","O"],"tokens":["Bray",",","Dog","and","Partridge",",","Market-street","."]}
+{"id":"12670_3","ner_tags":["O","O","O","O"],"tokens":["Established","in","1813","."]}
+{"id":"3580760_1","ner_tags":["O","O"],"tokens":["Postscript","."]}
+{"id":"3938653_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O"],"tokens":["The","party","consisted","principally","of","the","army","and","navy","officers","in","commission","at","the","port",",","andthe","officers","\u2022","f","the","French","squadron","lying","in","the","Sound","."]}
+{"id":"4938614_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","poor","lad","had",",","however",",","been","severely","hurt","on","the","head",",","although","no","limbs","were","injured","."]}
+{"id":"12275_8","ner_tags":["O","O","O","O","O","O","O"],"tokens":["It","should","be","remembered","that","Mr","."]}
+{"id":"3691199_16","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Since","their","institution","in","1818","the","society","had","expended","\u00a3220,731",",","thus","insuring","354,925","additional","sittings",",","of","which","262,366","were","free","and","unappropriated","."]}
+{"id":"3691199_5","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Sir","Henry","Dukenfield",",","Bart",".",",","Lord","Kenyon",",","and","many","other","distinguished","clergymen","and","laymen",",","were","also","present","."]}
+{"id":"3580760_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Affairs","remained","in","nearly","the","same","state","as","when","the","last","accounts","left","."]}
+{"id":"4939308_22","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["He","felt","sure","that","if","the","services","of","the","volunteers","should","be","required",",","they","would","worthily","emulate","the","deeds","of","old","."]}
+{"id":"4939308_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Napoleon","Ives",",","who","paid","for","the","dinner",",","occupied","the","chair",",","and","Mr","."]}
+{"id":"12275_2","ner_tags":["O","O","O","O","B-LOC","I-LOC","O","O","O"],"tokens":["CAPITAL","RUN","WITH","THE","New","FOREST","Fox","HOUND","."]}
+{"id":"3938653_8","ner_tags":["O","O","O","O","O","O","B-BUILDING","I-BUILDING","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["A","dejeuner","was","given","to-day","at","Mount","Edgecumbe","to","about","100","of","the","elite","of","the","neighbourhood",",","invited","to","meet","the","Prince","and","Princess","of","Wales","."]}
+{"id":"3896074_22","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O"],"tokens":["Yes",",","wake","the","throb","of","sympathyBid","maudlin","tears","reveal","How","much","men","grieve","for","Europe","freeHow","miss","the","tyrants","steel","!"]}
+{"id":"3691199_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","sittings","granted","had","been","41,710",",","being","7,000","above","any","precedent",";","and","of","those","28,872","were","free","and","unappropriated","."]}
+{"id":"3691199_2","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-BUILDING","I-BUILDING","I-BUILDING"],"tokens":["The","incorporated","society","for","promoting","the","enlargement",",","building",",","and","repairing","of","churches","and","chapels",",","held","their","nineteenth","annual","general","committee","on","Friday","week",",","at","their","chambers","in","St",".","Martins-place"]}
+{"id":"10734579_5","ner_tags":["O","O","O","O","O","O","O","O","O","O","O"],"tokens":["And","makes","your","boots","proof","gainst","the","wind","and.the","weather","."]}
+{"id":"3691199_3","ner_tags":["O","O","O","O","O","O","O","O","O"],"tokens":[";","the","Archbishop","of","Canterbury","in","the","chair","."]}
+{"id":"4938614_2","ner_tags":["B-LOC","O","O"],"tokens":["Knutsford","Sessions","."]}
+{"id":"3896074_20","ner_tags":["O","O","O","O","O"],"tokens":["That","Draco\u2014in","his","laws","!"]}
+{"id":"8199709_1","ner_tags":["O","O"],"tokens":["NOTICE","."]}
+{"id":"4939308_26","ner_tags":["O","O","O","O","O","O","O","O","O","O","O"],"tokens":["After","it","had","been","duly","drunk",",","the","Chairman","responded","."]}
+{"id":"3691199_1","ner_tags":["O","O","O","O"],"tokens":["CHURCHES","AND","CHAPELS","."]}
+{"id":"3691199_6","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","report","of","the","committee","stated","that","they","felt","great","satisfaction","and","thankfulness","towards","the","Giver","of","all","Good","in","being","able","to","announce","that",",","during","the","past","year",",","the","society","had","exerted","itself","with","undiminished","activity","and","success","."]}
+{"id":"8199709_7","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Their","method","of","fixing","them","defies","detection","and","dispenses","with","all","painful","operations","."]}
+{"id":"4939308_15","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["He","had",",","therefore",",","erected","baths","in","each","of","the","shops",",","which","could","be","supplied","with","either","hot","or","cold","water","."]}
+{"id":"3896074_17","ner_tags":["O","O","B-OTHER","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","B-LOC","O"],"tokens":["Let","net","Trafalgars","chief",",","who","died",",","A","moments","thought","beguile",";","Nor","laud","with","British","heart","and","pride","The","Baltic","and","the","Nile","!"]}
+{"id":"3580760_2","ner_tags":["B-LOC","O","O","O","O","O","O"],"tokens":["LONDON",",","THURSDAY",",","SEPTEMBER","27","."]}
+{"id":"3896074_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["His","meteor-sceptre","pledge","once","more",";","Napoleon","to","the","van","I","Come",",","quaff","the","reeking","cup","of","gore",",","And","shout","for","slaughter","!"]}
+{"id":"12275_7","ner_tags":["O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["RESIGNATION","Of","Elll","TIMSON",",","MASTEI","OF","THE","New","FOREST","Fox","HOUNDS",".","\u2014","We","regret","to","announce","the","resignation","of","Mr",".","fimson",",","the","esteemed","master","of","the","New","Forest","hourids",",","who","has","hunted","the","country","for","the","last","five","seasons",",","to","the","entire","satisfaction","of","those","gentlemen",",","members","of","the","hunt","and","others",",","who","have","been","fertunate","enough","to","witness","some","of","the","splendid","sport","which","he","has","so","often","afforded","during","his","career","as","such",";","and","although","he","relinquishes","the","mastership",",","we","earnestly","hope","that","a","continuance","of","good","health","will","enahle","Mr",".","f","imson","to","take","is","usual","prominent","place","in","the","bunting","field","for","many","years","to","come","."]}
+{"id":"12275_12","ner_tags":["O","O"],"tokens":["Fdornat","."]}
+{"id":"3896074_19","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["And","gloze","the","tyrants","guilty","mood",",","And","wail","his","hapless","cause",":","That","Sylla\u2014in","his","thirst","for","blood","!"]}
+{"id":"10734579_4","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["A","CH","A","t.I.r.NG","R",".","1","challenge","the","world","to","produce",",","if","it","can",",","A","Blacking","that","\u2019","s","equal","to","Turner","\u2019","s","Japan",":","For","like","crystal","it","shines",",","while","it","softens","the","leather","."]}
+{"id":"8199709_3","ner_tags":["O","O"],"tokens":["TEETH","."]}
+{"id":"4938614_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-BUILDING","I-BUILDING","I-BUILDING","I-BUILDING","I-BUILDING","I-BUILDING"],"tokens":["\u2014","Whilst","a","boy","named","Edwin","_","Diggle",",","14","years","of","age",",","a","pieoer","at","Mr",".","Chadwick","\u2019","s","factory"]}
+{"id":"3938653_6","ner_tags":["O","O","O","O","O","O","O"],"tokens":["THE","PRINCE","AND","PRINCESS","OF","WALES","."]}
+{"id":"4939308_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["It","was","one","which","had","been","almost","extinguished",",","but","he","had","always","thought","if","ever","it","should","be","his","fortune","to","erect","any","building",",","he","would","give","a","supper","to","the","workmen","engaged","in","its","erection","."]}
+{"id":"3580760_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["He","have","seen","a","letter",",","of","which","the","following","is","an","extract",",","from","Lieutenant-Colonel","Hodges",":","\u2014","\"","On","the","16th","instant","we","had","a","brilliant","affair","."]}
+{"id":"4938614_9","ner_tags":["O"],"tokens":["."]}
+{"id":"4938614_3","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O"],"tokens":["\u2014","The","servant","girl",",","Eliza","Ann","Byrom",",","who","stole","a","quantity","of","clothes","from","the","house","where","she","lodged",",","in","Dukiafield",",","was","sentenced","to","two","months","\u2019","imprisonment","."]}
+{"id":"10734579_13","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["Use","Turners","Japan","and","your","fame","will","take","root","."]}
+{"id":"10734579_18","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Gentlemen","may","observe","that","this","Composition",",","when","wed","for","their","Gig","and","Carriage","Harness",",","after","one","or","two","application","*","will","produce","a","brilliant",",","rich",",","glossy","black","lustre",",","and","it","the","same","time","act","as","a","preserver","of","the","leather",",","RICHARD","IX","RNER","."]}
+{"id":"10734579_7","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["When","those","who","have","used","it","must","know","it","so","well","?"]}
+{"id":"4939308_18","ner_tags":["O","O"],"tokens":["Mr","."]}
+{"id":"3896074_18","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O"],"tokens":["Leave","Nelsons","glorious","flag","unsung",",","And","Wellington","unwreathed",";","Their","fame","with","which","all","Europe","rung",",","For","his","\u2014","whod","best","neer","breathed","!"]}
+{"id":"4938614_6","ner_tags":["O","O","O","O","O","O"],"tokens":["Brandt","appeared","for","the","prisoner","."]}
+{"id":"12275_6","ner_tags":["O","O","O","B-LOC","I-LOC","O","O","O","B-BUILDING","I-BUILDING","O","O","O","O","O","O","B-BUILDING","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":[",","thence","to","Emery","Down",",","crowing","to","Minesteed","Manor",";","he","ther","tacked","back","to","Notherwood",",","and","from","thence","back","again","to","the","Manor",",","where",",","after","a","brilliant","run","(","Arnie","hour","and","forty-five","minutes",",","Reynold","was","compelled","to","succumb","to","his","pursuers","."]}
+{"id":"3580760_7","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O"],"tokens":["The","two","fleets","remained","in","sight","of","each","other",",","and","it","was","expected","would","come","to","an","engagement","as","soon","as","Sartorius","had","been","joined","by","the","vessels","which","he","was","then","expecting","at","Oporto","."]}
+{"id":"3896074_2","ner_tags":["O","O","O","O","O","O","O","O","O"],"tokens":["What","bard","art","thou","so","apt","to","grace","."]}
+{"id":"3938653_7","ner_tags":["B-LOC","O","O","O","O","O"],"tokens":["Plymouth",",","Thursday","Evenhji",";","."]}
+{"id":"8199709_4","ner_tags":["O","O","O","O","O","O","O","O","O","B-STREET","O","O","B-BUILDING","I-BUILDING","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-STREET","O","B-LOC","O","O","O","O","O","O"],"tokens":["Messrs","molloy",",","surgeon","dentists",",","of","112",",","Rusholme-road","(","near","All","Saints",")",",","Manchester",",","rm-y","be","consulted","every","Saturday",",","from","Ten","till","Five","o","\u2019","clock",",","at","931","Stamford-slreet",",","Ashton",",","tee","residence","of","Mr","."]}
+{"id":"4939308_3","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","B-STREET","O","O","O"],"tokens":["\u201d","*","4","During","the","last","year","two","shops","have","been","erected","in","Melbourne-street","by","Mr","."]}
+{"id":"3691199_4","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","Bishop","of","Durham",",","the","Bishop","of","Winchester",",","the","Bishop","of","Gloucester","and","Bristol",",","the","Bishop","of","Hereford",",","the","Bishop","of","Bangor",",","the","Dean","of","Norwich",",","the","Rev","."]}
+{"id":"4938614_5","ner_tags":["O","O"],"tokens":["Mr","."]}
+{"id":"3938653_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-BUILDING","I-BUILDING","I-BUILDING","O","B-LOC","O"],"tokens":["After","dinner","the","Prince","adjourned","with","the","company","to","a","grand","ball","given","by","the","united","services","at","the","Royal","William","Yard",",","Stonehouse","."]}
+{"id":"4939308_19","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Joseph","Turner","proposed","\u201c","The","Prince","and","Princess","of","Wales",",","and","the","rest","of","the","royal","family",",","\u201d","after","which","Mr","."]}
+{"id":"3938653_3","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","Qaeen",",","accompanied","by","Princess","Helena","and","Prince","Albert","Victor",",","drove","oat","in","the","grounds","yesterday","morning","Princess","Helena","rode","on","horseback","in","the","afternoon",",","attended","by","the","Hon","."]}
+{"id":"3580760_17","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","inns","alone","have","benefited","by","the","presence","of","foreign","travellers",",","and","of","those","who","have","come","to","our","town","through","business","."]}
+{"id":"3691199_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","money","granted","had","been","\u00a321,872",",","being","\u00a3700","above","any","former","year","."]}
+{"id":"3896074_21","ner_tags":["O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Mourn","him","who","to","the","conscript","gave","HeHrt-broken","France","a","prey",";","And","sill","could","outraged","nature","brave","With","homicidal","sway","!"]}
+{"id":"12670_4","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O"],"tokens":["\u2014","This","paper","which","is","published","every","Tuesday",",","Thursday",",","and","Saturday","evening",",","contains",",","in","addition","to","ectracts","from","the","British","and","Foreign","newspapers","of","articles","of","political","and","general","intelligence",",","copious","and","carefully-selected","information","as","to","all","naval",",","military",",","and","commercial","affairs",",","and","especially","that","which","relates","to","the","trade","and","commerce","of","the","Channel","Islands","."]}
+{"id":"3896074_12","ner_tags":["O","O","O"],"tokens":["\u2014","Man","."]}
+{"id":"12275_13","ner_tags":["O","O","O","O"],"tokens":["SHIPPING","INTELLIGENCr",":","."]}
+{"id":"12275_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Timson","accepted","the","mastership","at","a","time","when","no","other","gentleman","could","be","prevailed","upon","to","do","so",";","it","is","therefore","to","be","hoped","that","the","members","of","the","hunt","will","testify","their","feeling","of","gratitude","and","respect","either","in","the","shape","of","a","testimonial","or","farewell","banquet","."]}
+{"id":"8263166_4","ner_tags":["O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","deficiency","of","Turnips","is","severely","felt","in","Norfolk",",","and","odier","counties",",","where","thev","depend","so","much","upon","them","to","top","their","spring","beasts",".","1","n","the","Northern","districts","they","are","generally","good","Coleseed","for","a","crop","is","doing","well","."]}
+{"id":"3896074_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["While","startled","bones","of","millions","rise","All","ghastly","as","they","stood","When","thunderous","battle","mock","d","the","skies",",","And","lained","down","human","blood","!"]}
+{"id":"10734579_1","ner_tags":["O","O"],"tokens":["NOTICE","."]}
+{"id":"8199709_11","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Tneir","WHITE","ENAMEL","restores","black","and","decayed","teeth","to","tneir","original","whiteness",",","prevents","toothache",",","and","makes","a","hollow","tooth","sound","and","useful","for","many","years","."]}
+{"id":"4939308_14","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Another","important","matter","ip","connection","with","houses","was","baths",",","without","which","no","home","could","be","considered","complete","."]}
+{"id":"3580760_10","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["We","drove","the","Miguelites","betore","us","from","all","parts","."]}
+{"id":"3938653_5","ner_tags":["O","O"],"tokens":["Lascelles","."]}
+{"id":"3896074_16","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["To","Cossack","hate","consigned",":","\u2014","Famine","and","frozen","wastes","before",",","Bones","and","the","wolf","behind","."]}
+{"id":"10734579_17","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["When","the","whole","of","the","universe","rings","with","his","fame","1","So","arc","Turner","and","Wellinoion","famous","afar",",","One","the","hero","of","Blacking",",","the","other","of","War","!","!"]}
+{"id":"4939308_21","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["He","said","Englishmen","were","proud","of","their","army","and","navy",",","and","often","referred","to","their","deeds","in","the","most","patriotic","language","."]}
+{"id":"10734579_9","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["For",",","without","second","sight",",","I","can","prophecy","soon",",","That","you","will","see","it","adopted","by","Foot","and","Dragoon",";","For","you","scarcely","can","tell",",","when","drawn","up","on","parade",",","Which","glitters","the","brightest",",","the","boot","or","the","blade","."]}
+{"id":"12670_1","ner_tags":["O","O"],"tokens":["NOTICE","."]}
+{"id":"10734579_16","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["Who","do","not","feel","pride","in","a","Wellingtons","name","."]}
+{"id":"8199709_13","ner_tags":["O","O","O","O","O","O","B-STREET","O","B-LOC"],"tokens":["Attendance","every","Saturday","at","231",",","Stamford-street",",","Ashton"]}
+{"id":"10734579_14","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Then","your","genius","will","shine","as","well","as","your","boot",".","1","hough","Davy","and","Home","have","astonished","the","world","."]}
+{"id":"3580760_13","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Santa","Martha","commanded","in","person","against","my","part","of","the","line",",","and","had","three","regiments","of","the","line",",","one","of","volunteers",",","and","one","of","militia",",","against","200","British","and","150","Portuguese",".","1","lost","one","officer",",","Colonel","Staunton",",","one","officer","wounded",",","two","men","killed",",","and","17","wounded",".","\""]}
+{"id":"3580760_15","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["No","important","transactions","have","taken","place","as","yet","in","any","article","."]}
+{"id":"4939308_1","ner_tags":["O","B-LOC","O","O"],"tokens":["THE","STALYBRIDGE","\u201c","EMPORIUM"]}
+{"id":"4939308_27","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["He","referred","to","bad","trade","in","general",",","and","to","that","ef","the","bmild","*"]}
+{"id":"4939308_9","ner_tags":["O","O","O","O","O"],"tokens":["Haigh","France","the","vice-chair","."]}
+{"id":"4939308_4","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["Napoleon","Ives",",","who","give","them","the","above","name","."]}
+{"id":"3580760_4","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["\u2014","The","consol","market","opened","this","morning","at","84A","lor","the","account",",","but","has","since","been","heavy",",","fluctuating","between","83","|","and","84",",","and","has","now","declined","to","83","\u00a3","for","money",",","and","83","$","J","for","the","account","."]}
+{"id":"3938653_1","ner_tags":["O","O","O","O","O"],"tokens":["THE","COURT","AND","GOVERNMENT","."]}
+{"id":"4939308_20","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Chabnock","gave",",","with","an","appropriate","address",",","\u201c","The","Army",",","Navy",",","and","Volunteers",".","\u201d"]}
+{"id":"8199709_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["By","theuseof","this","perfectly","incorrodible",",","light",",","and","flexible","material",",","from","one","to","a","com","pie","e","set","of","aitifioial","teeth","may","be","worn","with","the","greatest","comfort",",","perfo-ming","all","the","functions","of","natu-ai","teeth","without","causing","the","slightest","pain","or","inconvenience",",","and","can","be","removed","and","replaced","with","the","greatest","ease","."]}
+{"id":"12670_8","ner_tags":["O","O","O","B-LOC","I-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Subscribers","in","the","United","Kingdom","will","be","charged","on","the","following","scale",",","in","British","money",",","including","pre-payment","and","posting",",","payable","in","advance",":","For","three","numbers","per","week",".",".",".",".9s",".","9d",".","per","quarter","For","two","do",".","do",".",".",".",".",".","Bs","."]}
+{"id":"12275_1","ner_tags":["B-LOC","I-LOC","I-LOC","I-LOC","O"],"tokens":["LYND","H","UR","ST","."]}
+{"id":"8263166_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Hops","continue","low","and","dull","in","silo",",","and","the","Wool","market","is","rather","more","brisk","for","both","long","and","short","fleeces",";","but","with","little","variation","from","the","last","mouths","prices","."]}
+{"id":"3938653_14","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","right","hon",".","gentleman","is","still","confined","to","his","chamber","."]}
+{"id":"3580760_3","ner_tags":["B-LOC","O","O","O","O"],"tokens":["City",",","Twelve","oClock","."]}
+{"id":"3580760_18","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Notwithstanding","the","sad","state","of","our","trade","in","this","circumstance",",","it","cannot","be","said","to","be","more","wretched","than","that","of","Offenbach",",","only","as","our","merchants","transport","their","goods","from","that","town",",","when","they","effect","the","sale","personally",",","the","principal","mass","of","money","entering",",","consequently",",","into","their","coffers",",","gave","a","little","more","animation","to","the","fair","."]}
+{"id":"8263166_5","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Winter","Tares","and","Rye",".","for","early","feed",",","in","most","parts","look","kindly","."]}
+{"id":"3580760_16","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["We","no","longer","observe","that","spirit","of","activity","which","used","to","reign","in","our","streets",",","especially","at","harvest","time","."]}
+{"id":"3691199_14","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["AmjtJgst","their","contributions","was","the","munificent","sum","of","18,000","from","Mr","."]}
+{"id":"3938653_2","ner_tags":["B-LOC","O","O","O"],"tokens":["OSEORNE",",","THURSDA","."]}
+{"id":"3691199_15","ner_tags":["O","O","O","O","B-STREET","O","B-LOC","O"],"tokens":["George","Davenport",",","of","Lime-street",",","London","."]}
+{"id":"3580760_14","ner_tags":["B-LOC","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Frankfort","Fair\u2014Sept",".","14","\u2014","The","first","week","of","our","corn","fair","this","year","has","not","otTered","any","satisfactory","result",",","and","we","have","to","add","thatour","autumn","crop","must","be","reckoned","among","the","worst","that","we","have","yet","had","."]}
+{"id":"3896074_3","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["With","poetrys","pure","breath",",","The","icatterer","of","the","human","race","!"]}
+{"id":"3691199_8","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["All","the","returns","had","not","yet","been","received",",","but","those","which","had",",","had","advised","them","of","contributions","to","the","amount","of","\u00a334,000",",","which","exceeded","the","collection","under","the","same","authority","in","1834",",","by","\u00a33,000",",","and","which","was","still","more","gratifying",",","as",",","during","the","last","year",",","\u00a3116,000","had","been","subscribed","for","the","erection","of","additional","churches","in","the","metropolis","."]}
+{"id":"3580760_12","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Never","was","there","any","thing","more","brave","than","their","repulsing","the","enemy","from","the","heights",".","they","actually","fled","before","us",",","and","lost","one","field","officer","and","50","men","."]}
+{"id":"12670_9","ner_tags":["O","O","O","O"],"tokens":["Bd",".","do","."]}
+{"id":"10734579_6","ner_tags":["O","O","O","O","O","O","O","O","O","O"],"tokens":["Why","the","praises","of","Turners","Japan","should","I","tell","."]}
+{"id":"8263166_1","ner_tags":["O","O","O","O"],"tokens":["MONTHLY","AGRICULTURAL","REPORT","."]}
+{"id":"8199709_6","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["They","continue","to","suppy","their","unrivalled","MINERAL","TEETH","and","ARTIFICIAL","GUMS",",","which","restore","both","the","appearance","of","natural","teeth","and","their","usefulness","in","mastication","."]}
+{"id":"3580760_22","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Their","progressive","increase","has","engaged","our","Senate","to","take","measures","for","the","improvement","of","the","asylums","which","are","destined","for","them",",","for","the","keeping","up","of","which","the","subscriptions","of","the","philanthropic","citizens","are","no","Longer","sufficient","Suabian","Mercury",","]}
+{"id":"3580760_11","ner_tags":["O","O","O","O","O","O","O","O","O"],"tokens":["My","little","British","band","crowned","themselves","with","glory","."]}
+{"id":"4939308_10","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["The","Chairman",",","in","his","opening","remarks",",","said","they","had","assembled","in","accordance","with","a","good","old","custom",",","which","brought","together","all","who","had","been","engaged","in","the","erection","of","buildings","."]}
+{"id":"4939308_16","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["In","conclusion",",","he","proposed","the","toast","of","\u201c","The","Queen",",","\u201d","and","said","he","hoped","she","might","live","long","to","reign","over","them",",","and","that","her","future","might","be","as","bright","if","not","more","brilliant","than","the","past","."]}
+{"id":"3896074_1","ner_tags":["O","O","O"],"tokens":["A","REMONSTRANCE","."]}
+{"id":"4939308_2","ner_tags":["O"],"tokens":["."]}
+{"id":"4938614_7","ner_tags":["O","O","O","O","O"],"tokens":["Accident","in","a","Factory","."]}
+{"id":"12275_4","ner_tags":["O","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","O","O","O","O","O","O","B-LOC","O","O","B-LOC","O","O","O","O","O","O","O","O","B-LOC","I-LOC","O","O","O","O","B-LOC","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["%","here","he","turned","to","the","right","and","crossed","Church","Moor",",","thence","at","a","clipping","pace",",","to","Burley",";","leaving","Burley","to","the","left",",","he","pushed","forward","through","Beech","Beds","to","the","earths","at","Oakley",",","where",",","to","his","aurprse",",","he","found","no","admission","."]}
+{"id":"4939308_17","ner_tags":["O","O","O","O","O","O","O","O"],"tokens":["The","toast","was","received","with","musical","honours","."]}
+{"id":"8199709_2","ner_tags":["O","O","O","O","O","O","O"],"tokens":["\u2014","REGULAR","WEEKLY","ATTENDANCE",",","TEETH","."]}
+{"id":"3938653_13","ner_tags":["O","O","O","O","O","O","O","O","O","O","O","O","O","O","O","O"],"tokens":["Frederick","Peel","ia","in","improved","health",",","and","has","been","gradually","getting","better","since","Tuesday","."]}
diff --git a/tests/sample_files/experiments/outputs/data/lwm/rel_dev.json b/tests/sample_files/experiments/outputs/data/lwm/rel_dev.json
new file mode 100644
index 00000000..57526d2f
--- /dev/null
+++ b/tests/sample_files/experiments/outputs/data/lwm/rel_dev.json
@@ -0,0 +1 @@
+{"12670_4": [{"mention": "Channel Islands", "sent_idx": 4, "sentence": "\u2014This paper which is published every Tuesday, Thursday, and Saturday evening, contains, in addition to ectracts from the British and Foreign newspapers of articles of political and general intelligence, copious and carefully-selected information as to all naval, military, and commercial affairs, and especially that which relates to the trade and commerce of the Channel Islands. ", "ngram": "Channel Islands", "context": ["Established in 1813.", "The following are the subscription prices to The Star, payable quarterly :-For three numbers per week For two do. do. "], "pos": 364, "end_pos": 379, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q42314"]}, {"mention": "Channel Islands", "sent_idx": 4, "sentence": "\u2014This paper which is published every Tuesday, Thursday, and Saturday evening, contains, in addition to ectracts from the British and Foreign newspapers of articles of political and general intelligence, copious and carefully-selected information as to all naval, military, and commercial affairs, and especially that which relates to the trade and commerce of the Channel Islands. ", "ngram": "Channel Islands", "context": ["Established in 1813.", "The following are the subscription prices to The Star, payable quarterly :-For three numbers per week For two do. do. "], "pos": 364, "end_pos": 379, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q42314"]}], "12670_8": [{"mention": "United Kingdom", "sent_idx": 8, "sentence": "Subscribers in the United Kingdom will be charged on the following scale, in British money, including pre-payment and posting, payable in advance : For three numbers per week....9s. 9d. per quarter For two do. do. ....Bs. ", "ngram": "United Kingdom", "context": ["Single numbers, 2d. each. ", "Bd. do. "], "pos": 19, "end_pos": 33, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q145"]}, {"mention": "United Kingdom", "sent_idx": 8, "sentence": "Subscribers in the United Kingdom will be charged on the following scale, in British money, including pre-payment and posting, payable in advance : For three numbers per week....9s. 9d. per quarter For two do. do. ....Bs. ", "ngram": "United Kingdom", "context": ["Single numbers, 2d. each. ", "Bd. do. "], "pos": 19, "end_pos": 33, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q145"]}], "12670_11": [{"mention": "Bordage-Street", "sent_idx": 11, "sentence": "The Star (or three numbers folded together) can be forwarded by post, to any part of the United Kingdom or France, on allicing a postage stamp of one PENNI: sir Address\u2014No. 10, Bordage-Street, Guernsey.", "ngram": "Bordage-Street", "context": ["For one do. do. ....3s. 4d. do. ", ""], "pos": 177, "end_pos": 191, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "STREET", "gold": "NIL"}, {"mention": "Guernsey", "sent_idx": 11, "sentence": "The Star (or three numbers folded together) can be forwarded by post, to any part of the United Kingdom or France, on allicing a postage stamp of one PENNI: sir Address\u2014No. 10, Bordage-Street, Guernsey.", "ngram": "Guernsey", "context": ["For one do. do. ....3s. 4d. do. ", ""], "pos": 193, "end_pos": 201, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q3311985"]}, {"mention": "Bordage-Street", "sent_idx": 11, "sentence": "The Star (or three numbers folded together) can be forwarded by post, to any part of the United Kingdom or France, on allicing a postage stamp of one PENNI: sir Address\u2014No. 10, Bordage-Street, Guernsey.", "ngram": "Bordage-Street", "context": ["For one do. do. ....3s. 4d. do. ", ""], "pos": 177, "end_pos": 191, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "STREET", "gold": "NIL"}, {"mention": "Guernsey", "sent_idx": 11, "sentence": "The Star (or three numbers folded together) can be forwarded by post, to any part of the United Kingdom or France, on allicing a postage stamp of one PENNI: sir Address\u2014No. 10, Bordage-Street, Guernsey.", "ngram": "Guernsey", "context": ["For one do. do. ....3s. 4d. do. ", ""], "pos": 193, "end_pos": 201, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q3311985"]}]}
\ No newline at end of file
diff --git a/tests/sample_files/experiments/outputs/data/lwm/rel_train.json b/tests/sample_files/experiments/outputs/data/lwm/rel_train.json
new file mode 100644
index 00000000..92e0cf03
--- /dev/null
+++ b/tests/sample_files/experiments/outputs/data/lwm/rel_train.json
@@ -0,0 +1 @@
+{"3938653_2": [{"mention": "OSEORNE", "sent_idx": 2, "sentence": "OSEORNE, THURSDA . ", "ngram": "OSEORNE", "context": ["THE COURT AND GOVERNMENT.  ", "The Qaeen, accompanied by Princess Helena and Prince Albert Victor, drove oat in the grounds yesterday morning Princess Helena rode on horseback in the afternoon, attended by the Hon. "], "pos": 0, "end_pos": 7, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": "NIL"}], "3938653_7": [{"mention": "Plymouth", "sent_idx": 7, "sentence": "Plymouth, Thursday Evenhji;. ", "ngram": "Plymouth", "context": ["THE PRINCE AND PRINCESS OF WALES. ", "A dejeuner was given to-day at Mount Edgecumbe to about 100 of the elite of the neighbourhood, invited to meet the Prince and Princess of Wales. "], "pos": 0, "end_pos": 8, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q43382"]}], "3938653_8": [{"mention": "Mount Edgecumbe", "sent_idx": 8, "sentence": "A dejeuner was given to-day at Mount Edgecumbe to about 100 of the elite of the neighbourhood, invited to meet the Prince and Princess of Wales. ", "ngram": "Mount Edgecumbe", "context": ["Plymouth, Thursday Evenhji;. ", "In the afternoon their Royal Highnesses went a short yachting excursion in the Earl of Edgecunibes yacht; and in the evening the Prince dined with Viscount Templetown, Commander-inChief of the Western District, at the Government House, Mo ant Wise. "], "pos": 31, "end_pos": 46, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "BUILDING", "gold": ["Q6920546"]}], "3938653_9": [{"mention": "Government House", "sent_idx": 9, "sentence": "In the afternoon their Royal Highnesses went a short yachting excursion in the Earl of Edgecunibes yacht; and in the evening the Prince dined with Viscount Templetown, Commander-inChief of the Western District, at the Government House, Mo ant Wise. ", "ngram": "Government House", "context": ["A dejeuner was given to-day at Mount Edgecumbe to about 100 of the elite of the neighbourhood, invited to meet the Prince and Princess of Wales. ", "The party consisted principally of the army and navy officers in commission at the port, andthe officers \u2022f the French squadron lying in the Sound. "], "pos": 218, "end_pos": 234, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}, {"mention": "Mo ant Wise", "sent_idx": 9, "sentence": "In the afternoon their Royal Highnesses went a short yachting excursion in the Earl of Edgecunibes yacht; and in the evening the Prince dined with Viscount Templetown, Commander-inChief of the Western District, at the Government House, Mo ant Wise. ", "ngram": "Mo ant Wise", "context": ["A dejeuner was given to-day at Mount Edgecumbe to about 100 of the elite of the neighbourhood, invited to meet the Prince and Princess of Wales. ", "The party consisted principally of the army and navy officers in commission at the port, andthe officers \u2022f the French squadron lying in the Sound. "], "pos": 236, "end_pos": 247, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "BUILDING", "gold": ["Q14912878"]}], "3938653_10": [{"mention": "Sound", "sent_idx": 10, "sentence": "The party consisted principally of the army and navy officers in commission at the port, andthe officers \u2022f the French squadron lying in the Sound. ", "ngram": "Sound", "context": ["In the afternoon their Royal Highnesses went a short yachting excursion in the Earl of Edgecunibes yacht; and in the evening the Prince dined with Viscount Templetown, Commander-inChief of the Western District, at the Government House, Mo ant Wise. ", "After dinner the Prince adjourned with the company to a grand ball given by the united services at the Royal William Yard, Stonehouse. "], "pos": 141, "end_pos": 146, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q2333061"]}], "3938653_11": [{"mention": "Royal William Yard", "sent_idx": 11, "sentence": "After dinner the Prince adjourned with the company to a grand ball given by the united services at the Royal William Yard, Stonehouse. ", "ngram": "Royal William Yard", "context": ["The party consisted principally of the army and navy officers in commission at the port, andthe officers \u2022f the French squadron lying in the Sound. ", "Mr. "], "pos": 103, "end_pos": 121, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "BUILDING", "gold": ["Q7375014"]}, {"mention": "Stonehouse", "sent_idx": 11, "sentence": "After dinner the Prince adjourned with the company to a grand ball given by the united services at the Royal William Yard, Stonehouse. ", "ngram": "Stonehouse", "context": ["The party consisted principally of the army and navy officers in commission at the port, andthe officers \u2022f the French squadron lying in the Sound. ", "Mr. "], "pos": 123, "end_pos": 133, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q7619235"]}], "4938614_1": [{"mention": "DUKINFIELD", "sent_idx": 1, "sentence": "DUKINFIELD.  ", "ngram": "DUKINFIELD", "context": ["", "Knutsford Sessions."], "pos": 0, "end_pos": 10, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1976179"]}], "4938614_2": [{"mention": "Knutsford", "sent_idx": 2, "sentence": "Knutsford Sessions.", "ngram": "Knutsford", "context": ["DUKINFIELD.  ", "\u2014The servant girl, Eliza Ann Byrom, who stole a quantity of clothes from the house where she lodged, in Dukiafield, was sentenced to two months\u2019 imprisonment. "], "pos": 0, "end_pos": 9, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1470791"]}], "4938614_3": [{"mention": "Dukiafield", "sent_idx": 3, "sentence": "\u2014The servant girl, Eliza Ann Byrom, who stole a quantity of clothes from the house where she lodged, in Dukiafield, was sentenced to two months\u2019 imprisonment. ", "ngram": "Dukiafield", "context": ["Knutsford Sessions.", "Martha Wilde, who was sent from the Dukinfield court for obtaining money under false pretences by representing at two pawnshops in Dukinfield that a spurious composition called coraline beads were real coral was discharged. "], "pos": 104, "end_pos": 114, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1976179"]}], "4938614_4": [{"mention": "Dukinfield", "sent_idx": 4, "sentence": "Martha Wilde, who was sent from the Dukinfield court for obtaining money under false pretences by representing at two pawnshops in Dukinfield that a spurious composition called coraline beads were real coral was discharged. ", "ngram": "Dukinfield", "context": ["\u2014The servant girl, Eliza Ann Byrom, who stole a quantity of clothes from the house where she lodged, in Dukiafield, was sentenced to two months\u2019 imprisonment. ", "Mr. "], "pos": 36, "end_pos": 46, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1976179"]}, {"mention": "Dukinfield", "sent_idx": 4, "sentence": "Martha Wilde, who was sent from the Dukinfield court for obtaining money under false pretences by representing at two pawnshops in Dukinfield that a spurious composition called coraline beads were real coral was discharged. ", "ngram": "Dukinfield", "context": ["\u2014The servant girl, Eliza Ann Byrom, who stole a quantity of clothes from the house where she lodged, in Dukiafield, was sentenced to two months\u2019 imprisonment. ", "Mr. "], "pos": 131, "end_pos": 141, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1976179"]}], "4938614_8": [{"mention": "Mr. Chadwick\u2019s factory", "sent_idx": 8, "sentence": "\u2014Whilst a boy named Edwin _ Diggle, 14 years of age, a pieoer at Mr. Chadwick\u2019s factory", "ngram": "Mr. Chadwick\u2019s factory", "context": ["Accident in a Factory.", ". "], "pos": 65, "end_pos": 87, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}], "4939308_1": [{"mention": "STALYBRIDGE", "sent_idx": 1, "sentence": "THE STALYBRIDGE \u201cEMPORIUM", "ngram": "STALYBRIDGE", "context": ["", "."], "pos": 4, "end_pos": 15, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1398653"]}], "4939308_3": [{"mention": "Melbourne-street", "sent_idx": 3, "sentence": "\u201d  * 4 During the last year two shops have been erected in Melbourne-street by Mr. ", "ngram": "Melbourne-street", "context": [".", "Napoleon Ives, who give them the above name. "], "pos": 59, "end_pos": 75, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "STREET", "gold": "NIL"}], "4939308_6": [{"mention": "Dog and Partridge", "sent_idx": 6, "sentence": "Bray, Dog and Partridge, Market-street. ", "ngram": "Dog and Partridge", "context": ["On Thursday evening the workmen employed thereon, and others, to the number of forty, sat dowu to a spread of geese, turkey, and other good things, at the house of Mr. ", "Afterwards Mr. "], "pos": 6, "end_pos": 23, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}, {"mention": "Market-street", "sent_idx": 6, "sentence": "Bray, Dog and Partridge, Market-street. ", "ngram": "Market-street", "context": ["On Thursday evening the workmen employed thereon, and others, to the number of forty, sat dowu to a spread of geese, turkey, and other good things, at the house of Mr. ", "Afterwards Mr. "], "pos": 25, "end_pos": 38, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "STREET", "gold": "NIL"}], "4939308_25": [{"mention": "Stalybridge", "sent_idx": 25, "sentence": "Mr, Henbt Jeffreys proposed \u201cProsperity to the town and trade of Stalybridge, with a few appropriate remarks. ", "ngram": "Stalybridge", "context": ["The toast was well received, the Chairman speaking very highly of the volunteers of the country, and pointing out that, if properly managed, they would very much tend to a decrease in the annual military and naval expenditure of the country. ", "After it had been duly drunk, the Chairman responded. "], "pos": 65, "end_pos": 76, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q1398653"]}], "8199709_4": [{"mention": "Rusholme-road", "sent_idx": 4, "sentence": "Messrs molloy,surgeon dentists, of 112, Rusholme-road (near All Saints), Manchester, rm-y be consulted every Saturday, from Ten till Five o\u2019clock, at 931 Stamford-slreet, Ashton, tee residence of Mr. ", "ngram": "Rusholme-road", "context": ["TEETH.  ", "Bostock, Chemist.  "], "pos": 40, "end_pos": 53, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "STREET", "gold": "NIL"}, {"mention": "All Saints", "sent_idx": 4, "sentence": "Messrs molloy,surgeon dentists, of 112, Rusholme-road (near All Saints), Manchester, rm-y be consulted every Saturday, from Ten till Five o\u2019clock, at 931 Stamford-slreet, Ashton, tee residence of Mr. ", "ngram": "All Saints", "context": ["TEETH.  ", "Bostock, Chemist.  "], "pos": 60, "end_pos": 70, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}, {"mention": "Manchester", "sent_idx": 4, "sentence": "Messrs molloy,surgeon dentists, of 112, Rusholme-road (near All Saints), Manchester, rm-y be consulted every Saturday, from Ten till Five o\u2019clock, at 931 Stamford-slreet, Ashton, tee residence of Mr. ", "ngram": "Manchester", "context": ["TEETH.  ", "Bostock, Chemist.  "], "pos": 73, "end_pos": 83, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q18125"]}, {"mention": "Stamford-slreet", "sent_idx": 4, "sentence": "Messrs molloy,surgeon dentists, of 112, Rusholme-road (near All Saints), Manchester, rm-y be consulted every Saturday, from Ten till Five o\u2019clock, at 931 Stamford-slreet, Ashton, tee residence of Mr. ", "ngram": "Stamford-slreet", "context": ["TEETH.  ", "Bostock, Chemist.  "], "pos": 154, "end_pos": 169, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "STREET", "gold": "NIL"}, {"mention": "Ashton", "sent_idx": 4, "sentence": "Messrs molloy,surgeon dentists, of 112, Rusholme-road (near All Saints), Manchester, rm-y be consulted every Saturday, from Ten till Five o\u2019clock, at 931 Stamford-slreet, Ashton, tee residence of Mr. ", "ngram": "Ashton", "context": ["TEETH.  ", "Bostock, Chemist.  "], "pos": 171, "end_pos": 177, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q659803"]}], "8199709_13": [{"mention": "Stamford-street", "sent_idx": 13, "sentence": "Attendance every Saturday at 231, Stamford-street, Ashton", "ngram": "Stamford-street", "context": ["Terms\u2014A single teoth from ss.  ", ""], "pos": 34, "end_pos": 49, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "STREET", "gold": "NIL"}, {"mention": "Ashton", "sent_idx": 13, "sentence": "Attendance every Saturday at 231, Stamford-street, Ashton", "ngram": "Ashton", "context": ["Terms\u2014A single teoth from ss.  ", ""], "pos": 51, "end_pos": 57, "place": "Ashton-under-Lyne", "place_wqid": "Q659803", "candidates": [], "ner_label": "LOC", "gold": ["Q659803"]}], "3580760_2": [{"mention": "LONDON", "sent_idx": 2, "sentence": "LONDON, THURSDAY, SEPTEMBER 27. ", "ngram": "LONDON", "context": ["Postscript.  ", "City, Twelve oClock."], "pos": 0, "end_pos": 6, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q84"]}], "3580760_3": [{"mention": "City", "sent_idx": 3, "sentence": "City, Twelve oClock.", "ngram": "City", "context": ["LONDON, THURSDAY, SEPTEMBER 27. ", "\u2014The consol market opened this morning at 84A lor the account, but has since been heavy, fluctuating between 83| and 84, and has now declined to 83\u00a3 for money, and 83$ J for the account. "], "pos": 0, "end_pos": 4, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q23311"]}], "3580760_6": [{"mention": "Oporto", "sent_idx": 6, "sentence": "By the Romona, steam boat, we have received accounts from Oporto to the 21st; and from the fleet, which was in latitude 37. 21., longitude 11. 37., to the 18th instant. ", "ngram": "Oporto", "context": ["Four oJClock\u2014Consols for Account, 83J. ", "The two fleets remained in sight of each other, and it was expected would come to an engagement as soon as Sartorius had been joined by the vessels which he was then expecting at Oporto. "], "pos": 58, "end_pos": 64, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q36433"]}], "3580760_7": [{"mention": "Oporto", "sent_idx": 7, "sentence": "The two fleets remained in sight of each other, and it was expected would come to an engagement as soon as Sartorius had been joined by the vessels which he was then expecting at Oporto. ", "ngram": "Oporto", "context": ["By the Romona, steam boat, we have received accounts from Oporto to the 21st; and from the fleet, which was in latitude 37. 21., longitude 11. 37., to the 18th instant. ", "Affairs remained in nearly the same state as when the last accounts left. "], "pos": 179, "end_pos": 185, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q36433"]}], "3580760_14": [{"mention": "Frankfort", "sent_idx": 14, "sentence": "Frankfort Fair\u2014Sept. 14\u2014The first week of our corn fair this year has not otTered any satisfactory result, and we have to add thatour autumn crop must be reckoned among the worst that we have yet had. ", "ngram": "Frankfort", "context": ["Santa Martha commanded in person against my part of the line, and had three regiments of the line, one of volunteers, and one of militia, against 200 British and 150 Portuguese. 1 lost one officer, Colonel Staunton, one officer wounded, two men killed, and 17 wounded.\" ", "No important transactions have taken place as yet in any article. "], "pos": 0, "end_pos": 9, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q1794"]}], "3580760_18": [{"mention": "Offenbach", "sent_idx": 18, "sentence": "Notwithstanding the sad state of our trade in this circumstance, it cannot be said to be more wretched than that of Offenbach, only as our merchants transport their goods from that town, when they effect the sale personally, the principal mass of money entering, consequently, into their coffers, gave a little more animation to the fair. ", "ngram": "Offenbach", "context": ["The inns alone have benefited by the presence of foreign travellers, and of those who have come to our town through business. ", "Smuggling is going on actively; it is even said that there are companies who insure that kind of speculation. "], "pos": 116, "end_pos": 125, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q3042"]}], "3580760_20": [{"mention": "Offenbach", "sent_idx": 20, "sentence": "The value of houses and other fixtures decreases considerably in our town, while it rises at Offenbach. ", "ngram": "Offenbach", "context": ["Smuggling is going on actively; it is even said that there are companies who insure that kind of speculation. ", "The number of poor increases daily, as well on account of the bad harvest as of the exorbitant taxes which they are obliged to pay to the customs for the introduction of their merchandize. "], "pos": 93, "end_pos": 102, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q3042"]}], "8263166_4": [{"mention": "Norfolk", "sent_idx": 4, "sentence": "The deficiency of Turnips is severely felt in Norfolk, and odier counties, where thev depend so much upon them to top their spring beasts. 1 n the Northern districts they are generally good Coleseed for a crop is doing well. ", "ngram": "Norfolk", "context": ["The markets for all kindsof Com continue depressed. ", "Winter Tares and Rye. for early feed, in most parts look kindly. "], "pos": 46, "end_pos": 53, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q23109"]}], "10734579_3": [{"mention": "Dorset", "sent_idx": 3, "sentence": "This invaluable composition may behad at the Dorset County Chronicle Office, and of all the respectable shop, keepers in Dorchester and its vicinity, in stone bottles, at 6u.t Is., and 18d. each.  ", "ngram": "Dorset", "context": ["\u2014How delightful to see a Lady or Gentleman\u2019s beautiful black shining Hoot or Shoe reflecting every surrounding object in TURNER\u2019S UNRIVALLED BLACKING. ", "A CH A t.I.r.NG R. 1 challenge the world to produce, if it can, A Blacking that\u2019s equal to Turner\u2019s Japan : For like crystal it shines, while it softens the leather. "], "pos": 45, "end_pos": 51, "place": "Dorchester", "place_wqid": "Q503331", "candidates": [], "ner_label": "LOC", "gold": ["Q23159"]}, {"mention": "Dorchester", "sent_idx": 3, "sentence": "This invaluable composition may behad at the Dorset County Chronicle Office, and of all the respectable shop, keepers in Dorchester and its vicinity, in stone bottles, at 6u.t Is., and 18d. each.  ", "ngram": "Dorchester", "context": ["\u2014How delightful to see a Lady or Gentleman\u2019s beautiful black shining Hoot or Shoe reflecting every surrounding object in TURNER\u2019S UNRIVALLED BLACKING. ", "A CH A t.I.r.NG R. 1 challenge the world to produce, if it can, A Blacking that\u2019s equal to Turner\u2019s Japan : For like crystal it shines, while it softens the leather. "], "pos": 121, "end_pos": 131, "place": "Dorchester", "place_wqid": "Q503331", "candidates": [], "ner_label": "LOC", "gold": ["Q503331"]}], "3896074_13": [{"mention": "Austerlitz", "sent_idx": 13, "sentence": "Another Austerlitz demand, Another Jena claim; And desolate the groaning lan 1 To write one despots fame ! ", "ngram": "Austerlitz", "context": ["\u2014Man. ", "Let cen the pyramids afford A verse for his renown : But speak not of brave Sidneys sword, That swept the invader down I Sing not of Moscows flaming tide\u2014 The fiery brands which hurled The chainer of the nations wide, The scourfer of the world. "], "pos": 8, "end_pos": 18, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "OTHER", "gold": ["Q134114"]}, {"mention": "Jena", "sent_idx": 13, "sentence": "Another Austerlitz demand, Another Jena claim; And desolate the groaning lan 1 To write one despots fame ! ", "ngram": "Jena", "context": ["\u2014Man. ", "Let cen the pyramids afford A verse for his renown : But speak not of brave Sidneys sword, That swept the invader down I Sing not of Moscows flaming tide\u2014 The fiery brands which hurled The chainer of the nations wide, The scourfer of the world. "], "pos": 35, "end_pos": 39, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q3150"]}], "3896074_14": [{"mention": "Moscows", "sent_idx": 14, "sentence": "Let cen the pyramids afford A verse for his renown : But speak not of brave Sidneys sword, That swept the invader down I Sing not of Moscows flaming tide\u2014 The fiery brands which hurled The chainer of the nations wide, The scourfer of the world. ", "ngram": "Moscows", "context": ["Another Austerlitz demand, Another Jena claim; And desolate the groaning lan 1 To write one despots fame ! ", "With rout and havoc from their shore ! "], "pos": 133, "end_pos": 140, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q649"]}], "3896074_17": [{"mention": "Trafalgars", "sent_idx": 17, "sentence": "Let net Trafalgars chief, who died, A moments thought beguile; Nor laud with British heart and pride The Baltic and the Nile! ", "ngram": "Trafalgars", "context": ["To Cossack hate consigned :\u2014 Famine and frozen wastes before, Bones and the wolf behind. ", "Leave Nelsons glorious flag unsung, And Wellington unwreathed; Their fame with which all Europe rung, For his\u2014 whod best neer breathed ! "], "pos": 8, "end_pos": 18, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "OTHER", "gold": ["Q171416"]}, {"mention": "The Baltic", "sent_idx": 17, "sentence": "Let net Trafalgars chief, who died, A moments thought beguile; Nor laud with British heart and pride The Baltic and the Nile! ", "ngram": "The Baltic", "context": ["To Cossack hate consigned :\u2014 Famine and frozen wastes before, Bones and the wolf behind. ", "Leave Nelsons glorious flag unsung, And Wellington unwreathed; Their fame with which all Europe rung, For his\u2014 whod best neer breathed ! "], "pos": 101, "end_pos": 111, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q545"]}, {"mention": "Nile", "sent_idx": 17, "sentence": "Let net Trafalgars chief, who died, A moments thought beguile; Nor laud with British heart and pride The Baltic and the Nile! ", "ngram": "Nile", "context": ["To Cossack hate consigned :\u2014 Famine and frozen wastes before, Bones and the wolf behind. ", "Leave Nelsons glorious flag unsung, And Wellington unwreathed; Their fame with which all Europe rung, For his\u2014 whod best neer breathed ! "], "pos": 120, "end_pos": 124, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q3392"]}], "3896074_18": [{"mention": "Europe", "sent_idx": 18, "sentence": "Leave Nelsons glorious flag unsung, And Wellington unwreathed; Their fame with which all Europe rung, For his\u2014 whod best neer breathed ! ", "ngram": "Europe", "context": ["Let net Trafalgars chief, who died, A moments thought beguile; Nor laud with British heart and pride The Baltic and the Nile! ", "And gloze the tyrants guilty mood, And wail his hapless cause: That Sylla\u2014in his thirst for blood ! "], "pos": 89, "end_pos": 95, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q46"]}], "3896074_21": [{"mention": "France", "sent_idx": 21, "sentence": "Mourn him who to the conscript gave HeHrt-broken France a prey; And sill could outraged nature brave With homicidal sway ! ", "ngram": "France", "context": ["That Draco\u2014in his laws ! ", "Yes, wake the throb of sympathyBid maudlin tears reveal How much men grieve for Europe freeHow miss the tyrants steel! "], "pos": 49, "end_pos": 55, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q142"]}], "3896074_22": [{"mention": "Europe", "sent_idx": 22, "sentence": "Yes, wake the throb of sympathyBid maudlin tears reveal How much men grieve for Europe freeHow miss the tyrants steel! ", "ngram": "Europe", "context": ["Mourn him who to the conscript gave HeHrt-broken France a prey; And sill could outraged nature brave With homicidal sway ! ", "And make the conscious sea blush gore In shame for Nelsons land ; Make earth, revolting, lift once more Her blood-accusing hand ! "], "pos": 80, "end_pos": 86, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q46"]}], "3691199_2": [{"mention": "St. Martins-place", "sent_idx": 2, "sentence": "The incorporated society for promoting the enlargement, building, and repairing of churches and chapels, held their nineteenth annual general committee on Friday week, at their chambers in St. Martins-place", "ngram": "St. Martins-place", "context": ["CHURCHES AND CHAPELS.  ", "; the Archbishop of Canterbury in the chair. "], "pos": 189, "end_pos": 206, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}], "3691199_13": [{"mention": "Bath", "sent_idx": 13, "sentence": "Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. ", "ngram": "Bath", "context": ["The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. ", "AmjtJgst their contributions was the munificent sum of 18,000 from Mr. "], "pos": 62, "end_pos": 66, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q22889"]}, {"mention": "Wells", "sent_idx": 13, "sentence": "Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. ", "ngram": "Wells", "context": ["The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. ", "AmjtJgst their contributions was the munificent sum of 18,000 from Mr. "], "pos": 71, "end_pos": 76, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q212283"]}, {"mention": "Winchester", "sent_idx": 13, "sentence": "Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. ", "ngram": "Winchester", "context": ["The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. ", "AmjtJgst their contributions was the munificent sum of 18,000 from Mr. "], "pos": 78, "end_pos": 88, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q172157"]}, {"mention": "Exeter", "sent_idx": 13, "sentence": "Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. ", "ngram": "Exeter", "context": ["The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. ", "AmjtJgst their contributions was the munificent sum of 18,000 from Mr. "], "pos": 90, "end_pos": 96, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q134672"]}, {"mention": "Cleveland", "sent_idx": 13, "sentence": "Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. ", "ngram": "Cleveland", "context": ["The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. ", "AmjtJgst their contributions was the munificent sum of 18,000 from Mr. "], "pos": 102, "end_pos": 111, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q24651706"]}, {"mention": "England", "sent_idx": 13, "sentence": "Contributions had been received from diocesan associations in Bath and Wells, Winchester, Exeter, and Cleveland, which were gratifying proofs of the increasing desire to relieve the spiritual wants of the people of the Church of England. ", "ngram": "England", "context": ["The grants of last year were deemed particularly important, as while the society had contributed towards the increase of accommodation in many of the present churches they had contributed to the erection of forty-one additional churches and chapels, and the rebuilding of sixteen others. ", "AmjtJgst their contributions was the munificent sum of 18,000 from Mr. "], "pos": 229, "end_pos": 236, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q21"]}], "3691199_15": [{"mention": "Lime-street", "sent_idx": 15, "sentence": "George Davenport, of Lime-street, London. ", "ngram": "Lime-street", "context": ["AmjtJgst their contributions was the munificent sum of 18,000 from Mr. ", "Since their institution in 1818 the society had expended \u00a3220,731, thus insuring 354,925 additional sittings, of which 262,366 were free and unappropriated. "], "pos": 21, "end_pos": 32, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "STREET", "gold": "NIL"}, {"mention": "London", "sent_idx": 15, "sentence": "George Davenport, of Lime-street, London. ", "ngram": "London", "context": ["AmjtJgst their contributions was the munificent sum of 18,000 from Mr. ", "Since their institution in 1818 the society had expended \u00a3220,731, thus insuring 354,925 additional sittings, of which 262,366 were free and unappropriated. "], "pos": 34, "end_pos": 40, "place": "Manchester", "place_wqid": "Q18125", "candidates": [], "ner_label": "LOC", "gold": ["Q84"]}], "12275_1": [{"mention": "LYND H UR ST", "sent_idx": 1, "sentence": "LYND H UR ST.  ", "ngram": "LYND H UR ST", "context": ["", "CAPITAL RUN WITH THE New FOREST Fox HOUND. "], "pos": 0, "end_pos": 12, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q3182986"]}], "12275_2": [{"mention": "New FOREST", "sent_idx": 2, "sentence": "CAPITAL RUN WITH THE New FOREST Fox HOUND. ", "ngram": "New FOREST", "context": ["LYND H UR ST.  ", "\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. "], "pos": 21, "end_pos": 31, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q277755"]}], "12275_3": [{"mention": "New Forest", "sent_idx": 3, "sentence": "\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "ngram": "New Forest", "context": ["CAPITAL RUN WITH THE New FOREST Fox HOUND. ", "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. "], "pos": 35, "end_pos": 45, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q277755"]}, {"mention": "Boldrewood", "sent_idx": 3, "sentence": "\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "ngram": "Boldrewood", "context": ["CAPITAL RUN WITH THE New FOREST Fox HOUND. ", "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. "], "pos": 64, "end_pos": 74, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q4939103"]}, {"mention": "Ringwood", "sent_idx": 3, "sentence": "\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "ngram": "Ringwood", "context": ["CAPITAL RUN WITH THE New FOREST Fox HOUND. ", "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. "], "pos": 188, "end_pos": 196, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q1248943"]}], "12275_4": [{"mention": "Church Moor", "sent_idx": 4, "sentence": "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", "ngram": "Church Moor", "context": ["\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill"], "pos": 41, "end_pos": 52, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}, {"mention": "Burley", "sent_idx": 4, "sentence": "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", "ngram": "Burley", "context": ["\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill"], "pos": 84, "end_pos": 90, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q3195509"]}, {"mention": "Burley", "sent_idx": 4, "sentence": "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", "ngram": "Burley", "context": ["\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill"], "pos": 101, "end_pos": 107, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q3195509"]}, {"mention": "Beech Beds", "sent_idx": 4, "sentence": "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", "ngram": "Beech Beds", "context": ["\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill"], "pos": 147, "end_pos": 157, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}, {"mention": "Oakley", "sent_idx": 4, "sentence": "%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", "ngram": "Oakley", "context": ["\u2014On Thursday, the 9th instant, the New Forest fox hounds met at Boldrewood, arid drew Holme Hill Inclousure, where the famous bitch pock soon winded a Inc old fox, who stole away over the Ringwood rued to Gillett Inclusure. ", "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill"], "pos": 175, "end_pos": 181, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}], "12275_5": [{"mention": "Buldre. wood", "sent_idx": 5, "sentence": "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "ngram": "Buldre. wood", "context": ["%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. "], "pos": 30, "end_pos": 42, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q4939103"]}, {"mention": "Gillett Inclosure", "sent_idx": 5, "sentence": "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "ngram": "Gillett Inclosure", "context": ["%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. "], "pos": 51, "end_pos": 68, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}, {"mention": "Hulme Hill", "sent_idx": 5, "sentence": "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "ngram": "Hulme Hill", "context": ["%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. "], "pos": 82, "end_pos": 92, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}, {"mention": "Thrifty Beeches", "sent_idx": 5, "sentence": "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "ngram": "Thrifty Beeches", "context": ["%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. "], "pos": 97, "end_pos": 112, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}, {"mention": "Over Fields", "sent_idx": 5, "sentence": "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "ngram": "Over Fields", "context": ["%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. "], "pos": 216, "end_pos": 227, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}, {"mention": "Holme !Jill", "sent_idx": 5, "sentence": "He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "ngram": "Holme !Jill", "context": ["%here he turned to the right and crossed Church Moor, thence at a clipping pace, to Burley ; leaving Burley to the left, he pushed forward through Beech Beds to the earths at Oakley, where, to his aurprse, he found no admission. ", ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. "], "pos": 264, "end_pos": 275, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": "NIL"}], "12275_6": [{"mention": "Emery Down", "sent_idx": 6, "sentence": ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. ", "ngram": "Emery Down", "context": ["He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "RESIGNATION Of Elll TIMSON, MASTEI OF THE New FOREST Fox HOUNDS.\u2014 We regret to announce the resignation of Mr. fimson, the esteemed master of the New Forest hourids, who has hunted the country for the last five seasons, to the entire satisfaction of those gentlemen, members of the hunt and others, who have been fertunate enough to witness some of the splendid sport which he has so often afforded during his career as such ; and although he relinquishes the mastership, we earnestly hope that a continuance of good health will enahle Mr. f imson to take is usual prominent place in the bunting field for many years to come. "], "pos": 12, "end_pos": 22, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q5370984"]}, {"mention": "Minesteed Manor", "sent_idx": 6, "sentence": ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. ", "ngram": "Minesteed Manor", "context": ["He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "RESIGNATION Of Elll TIMSON, MASTEI OF THE New FOREST Fox HOUNDS.\u2014 We regret to announce the resignation of Mr. fimson, the esteemed master of the New Forest hourids, who has hunted the country for the last five seasons, to the entire satisfaction of those gentlemen, members of the hunt and others, who have been fertunate enough to witness some of the splendid sport which he has so often afforded during his career as such ; and although he relinquishes the mastership, we earnestly hope that a continuance of good health will enahle Mr. f imson to take is usual prominent place in the bunting field for many years to come. "], "pos": 34, "end_pos": 49, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}, {"mention": "Notherwood", "sent_idx": 6, "sentence": ", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. ", "ngram": "Notherwood", "context": ["He then made a turn back over Buldre. wood Hill to Gillett Inclosure, and through Hulme Hill, to Thrifty Beeches ; here he again turned to the lett, sod ran almost in a straight direction \u2022 distance of five miles to Over Fields ; then took \u2022 backward direction to Holme !Jill", "RESIGNATION Of Elll TIMSON, MASTEI OF THE New FOREST Fox HOUNDS.\u2014 We regret to announce the resignation of Mr. fimson, the esteemed master of the New Forest hourids, who has hunted the country for the last five seasons, to the entire satisfaction of those gentlemen, members of the hunt and others, who have been fertunate enough to witness some of the splendid sport which he has so often afforded during his career as such ; and although he relinquishes the mastership, we earnestly hope that a continuance of good health will enahle Mr. f imson to take is usual prominent place in the bunting field for many years to come. "], "pos": 75, "end_pos": 85, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "BUILDING", "gold": "NIL"}], "12275_7": [{"mention": "New FOREST", "sent_idx": 7, "sentence": "RESIGNATION Of Elll TIMSON, MASTEI OF THE New FOREST Fox HOUNDS.\u2014 We regret to announce the resignation of Mr. fimson, the esteemed master of the New Forest hourids, who has hunted the country for the last five seasons, to the entire satisfaction of those gentlemen, members of the hunt and others, who have been fertunate enough to witness some of the splendid sport which he has so often afforded during his career as such ; and although he relinquishes the mastership, we earnestly hope that a continuance of good health will enahle Mr. f imson to take is usual prominent place in the bunting field for many years to come. ", "ngram": "New FOREST", "context": [", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. ", "It should be remembered that Mr. "], "pos": 42, "end_pos": 52, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q277755"]}, {"mention": "New Forest", "sent_idx": 7, "sentence": "RESIGNATION Of Elll TIMSON, MASTEI OF THE New FOREST Fox HOUNDS.\u2014 We regret to announce the resignation of Mr. fimson, the esteemed master of the New Forest hourids, who has hunted the country for the last five seasons, to the entire satisfaction of those gentlemen, members of the hunt and others, who have been fertunate enough to witness some of the splendid sport which he has so often afforded during his career as such ; and although he relinquishes the mastership, we earnestly hope that a continuance of good health will enahle Mr. f imson to take is usual prominent place in the bunting field for many years to come. ", "ngram": "New Forest", "context": [", thence to Emery Down,crowing to Minesteed Manor ; he ther tacked back to Notherwood, and from thence back again to the Manor, where, after a brilliant run (Arnie hour and forty-five minutes, Reynold was compelled to succumb to his pursuers. ", "It should be remembered that Mr. "], "pos": 146, "end_pos": 156, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q277755"]}], "12275_14": [{"mention": "POOLE", "sent_idx": 14, "sentence": "POOLE.", "ngram": "POOLE", "context": ["SHIPPING INTELLIGENCr:.  ", ""], "pos": 0, "end_pos": 5, "place": "Poole", "place_wqid": "Q203349", "candidates": [], "ner_label": "LOC", "gold": ["Q203349"]}]}
\ No newline at end of file
diff --git a/tests/sample_files/resources/deezymatch/data/w2v_ocr_pairs_test.txt b/tests/sample_files/resources/deezymatch/data/w2v_ocr_pairs_test.txt
new file mode 100644
index 00000000..7278aedf
--- /dev/null
+++ b/tests/sample_files/resources/deezymatch/data/w2v_ocr_pairs_test.txt
@@ -0,0 +1,200 @@
+russia	france	FALSE
+woman	child	FALSE
+determined	vigourously	FALSE
+determined	reconstitute	FALSE
+determined	resolves	FALSE
+determined	proposes	FALSE
+determined	resolute	FALSE
+labour	earnings	FALSE
+labour	industry	FALSE
+labour	cheapen	FALSE
+labour	drudgery	FALSE
+labour	idleness	FALSE
+labour	disables	FALSE
+labour	sampling	FALSE
+superior	quality	FALSE
+superior	finest	FALSE
+superior	excellence	FALSE
+superior	duality	FALSE
+superior	excellent	FALSE
+superior	champagnes	FALSE
+superior	refractive	FALSE
+superior	pliability	FALSE
+superior	keying	FALSE
+superior	overlays	FALSE
+superior	cheapness	FALSE
+share	profits	FALSE
+share	profit	FALSE
+share	deposit	FALSE
+share	outbid	FALSE
+share	broking	FALSE
+proceed	embark	FALSE
+proceed	depart	FALSE
+proceed	disembark	FALSE
+proceed	assemble	FALSE
+proceed	arrive	FALSE
+proceed	accompany	FALSE
+proceed	resume	FALSE
+dinner	banquet	FALSE
+language	expression	FALSE
+language	epithets	FALSE
+language	soberness	FALSE
+language	fluently	FALSE
+language	epithet	FALSE
+language	couched	FALSE
+language	dialect	FALSE
+major	lieut	FALSE
+refused	alleging	FALSE
+refused	compel	FALSE
+refused	allow	FALSE
+refused	unable	FALSE
+refused	unwilling	FALSE
+refused	traynor	FALSE
+refused	demanding	FALSE
+refused	comply	FALSE
+refused	ruhman	FALSE
+judge	denman	FALSE
+judge	empanel	FALSE
+judge	bolland	FALSE
+introduced	referral	FALSE
+introduced	codification	FALSE
+introduced	analagous	FALSE
+introduced	validating	FALSE
+introduced	eulogizes	FALSE
+introduced	adaption	FALSE
+facts	truth	FALSE
+facts	lucidly	FALSE
+facts	deduce	FALSE
+direct	ratifies	FALSE
+direct	positive	FALSE
+direct	tchad	FALSE
+direct	override	FALSE
+direct	falters	FALSE
+popular	democracy	FALSE
+increased	sixfold	FALSE
+increased	diminish	FALSE
+increased	tenfold	FALSE
+increased	augment	FALSE
+increased	lessening	FALSE
+increased	fivefold	FALSE
+increased	augmenting	FALSE
+increased	subtract	FALSE
+increased	fourfold	FALSE
+opposed	hostile	FALSE
+opposed	crotchety	FALSE
+opposed	reformist	FALSE
+opposed	variance	FALSE
+opposed	support	FALSE
+opposed	resist	FALSE
+opposed	inimical	FALSE
+opposed	favour	FALSE
+opposed	layout	FALSE
+guilty	wilful	FALSE
+guilty	offence	FALSE
+guilty	pleaded	FALSE
+guilty	accused	FALSE
+guilty	heinous	FALSE
+guilty	abetting	FALSE
+guilty	murder	FALSE
+guilty	crime	FALSE
+guilty	offense	FALSE
+dakotah	dakotah	TRUE
+dakotah	dacotah	TRUE
+dakotah	dakotah	TRUE
+dakotah	dacotab	TRUE
+dakotah	dakotah	TRUE
+charioteer	charioteer	TRUE
+gutman	gutman	TRUE
+loquitur	loquitur	TRUE
+quesada	quesada	TRUE
+deaconry	deaconry	TRUE
+incontinent	incontinent	TRUE
+campeche	campeche	TRUE
+propter	propter	TRUE
+antia	antia	TRUE
+flavigny	flavigny	TRUE
+hightail	hightail	TRUE
+sucrose	sucrose	TRUE
+amsden	amsden	TRUE
+amsden	rameden	TRUE
+amsden	amsden	TRUE
+boozy	boozy	TRUE
+grayrigg	grayrigg	TRUE
+harmsworth	harmsworth	TRUE
+gottfried	gottfried	TRUE
+avalos	avalos	TRUE
+stainland	stainland	TRUE
+taton	taton	TRUE
+vinos	vinos	TRUE
+giselle	giselle	TRUE
+emphases	emphases	TRUE
+emphases	emplanes	TRUE
+emphases	emphases	TRUE
+flatiron	flatiron	TRUE
+daggs	daggs	TRUE
+unconventional	unconventional	TRUE
+unconventional	unpoetical	TRUE
+unconventional	unconventional	TRUE
+precociously	precociously	TRUE
+mayville	mayville	TRUE
+pralines	pralines	TRUE
+birtles	birtles	TRUE
+potchefstroom	potchefstroom	TRUE
+saporta	saporta	TRUE
+ostracise	ostracise	TRUE
+ciento	ciento	TRUE
+effenberg	effenberg	TRUE
+magnusson	magnusson	TRUE
+chipstead	chipstead	TRUE
+meols	meols	TRUE
+skunks	skunks	TRUE
+rayners	rayners	TRUE
+suleiman	suleiman	TRUE
+kocher	kocher	TRUE
+mandrell	mandrell	TRUE
+purveyed	purveyed	TRUE
+roxana	roxana	TRUE
+lookup	lookup	TRUE
+laborie	laborie	TRUE
+laborie	labedoyere	TRUE
+laborie	laborie	TRUE
+materiality	materiality	TRUE
+valter	valter	TRUE
+valter	waltor	TRUE
+valter	valter	TRUE
+embolism	embolism	TRUE
+mazzetti	mazzetti	TRUE
+digesters	digesters	TRUE
+hindes	hindes	TRUE
+stopgap	stopgap	TRUE
+classon	classon	TRUE
+meiji	meiji	TRUE
+plett	plett	TRUE
+skylarks	skylarks	TRUE
+zadok	zadok	TRUE
+nicols	nicols	TRUE
+nicols	nickolls	TRUE
+nicols	nicols	TRUE
+burdock	burdock	TRUE
+combahee	combahee	TRUE
+managua	managua	TRUE
+pepped	pepped	TRUE
+cantar	cantar	TRUE
+adorably	adorably	TRUE
+mencius	mencius	TRUE
+ogaden	ogaden	TRUE
+timia	timia	TRUE
+uncorrupt	uncorrupt	TRUE
+garron	garron	TRUE
+paribas	paribas	TRUE
+dinkins	dinkins	TRUE
+kindl	kindl	TRUE
+bartholemew	bartholemew	TRUE
+bartholemew	bartholomews	TRUE
+bartholemew	bartholemew	TRUE
+lyttle	lyttle	TRUE
+lyttle	lyttel	TRUE
+lyttle	lyttle	TRUE
+visioned	visioned	TRUE
+orthopaedic	orthopaedic	TRUE
+dunkerton	dunkerton	TRUE
\ No newline at end of file
diff --git a/tests/sample_files/resources/deezymatch/models/w2v_ocr/input_dfm.yaml b/tests/sample_files/resources/deezymatch/models/w2v_ocr/input_dfm.yaml
new file mode 100644
index 00000000..b777d226
--- /dev/null
+++ b/tests/sample_files/resources/deezymatch/models/w2v_ocr/input_dfm.yaml
@@ -0,0 +1,111 @@
+general:
+  use_gpu: False    # only if available
+  # specify CUDA device, these are 0-indexed, e.g.,
+  #   cuda:0, cuda:1 or others. "cuda" is the default CUDA device
+  gpu_device: "cuda"
+  # Parent dir to save trained models
+  models_dir: "../resources/deezymatch/models"
+
+preprocessing:
+  # normalizing text to the ASCII encoding standard
+  uni2ascii: True
+  lowercase: True
+  # removing both the leading and the trailing empty characters
+  strip: True
+  only_latin_letters: False
+  # Accepted proportion of characters in a string that are not present in our vocabulary, i.e.,
+  # String is accepted if:
+  # (number of its characters found in the vocabulary)/(total number its characters) >= missing_char_threshold
+  missing_char_threshold: 0.5
+  # read a list of characters and add to the vocabulary
+  read_list_chars: "../resources/deezymatch/inputs/characters_v001.vocab"
+
+# --- RNN/GRU/LSTM architecture/misc info
+gru_lstm:
+  main_architecture: "gru"    # rnn, gru, lstm
+  mode:    # Tokenization mode
+    # choices: "char", "ngram", "word"
+    # for example: tokenize: ["char", "ngram", "word"] or ["char", "word"]
+    tokenize: ["char"]
+    # ONLY if "char" or "ngram" are slected in tokenize, the following arg will be used:
+    # Strings in the inputs will be prefix + string + suffix:
+    # NOTE: please use only STRINGS in prefix_suffix list,
+    #       if you want only prefix or suffix, enter the other as an empty string ""
+    #       e.g., ["|", ""] would add | as prefix and no suffix
+    prefix_suffix: ["", ""]
+    # ONLY if "ngram" is selected in tokenize, the following args will be used:
+    min_gram: 4
+    max_gram: 5
+    # ONLY if "word" is selected in tokenize, the following arg will be used:
+    # Characters according to which the string will split into tokens:
+    # Accepted inputs are either a string of delimiters (e.g. ", -!?:()" or
+    # "default", in which case delimiters will be the white space and any
+    # punctuation mark):
+    token_sep: "default"
+  bidirectional: True    # if True, becomes a bidirectional RNN/GRU/LSTM
+  # num_layers
+  # number of recurrent layers. e.g., setting num_layers=2 means stacking two
+  #   RNN/GRU/LSTMs together to form a stacked RNN/GRU/LSTM,
+  #   with the second RNN/GRU/LSTM taking in outputs of the first RNN/GRU/LSTM and computing the final results.
+  num_layers: 2
+  # number of dimensions of the first fully connected network
+  fc1_out_dim: 120
+  # pooling_mode:
+  # hstates_layers_simple, hstates_layers, hstates
+  # hstates_subtract, hstates_l2_distance, hstates_cosine
+  # average, max
+  # attention
+  pooling_mode: 'average'
+  # rnn_dropout:
+  # if non-zero, introduces a Dropout layer on the outputs of each RNN/LSTM/GRU layer except the last layer,
+  #   with dropout probability equal to rnn_dropout.
+  rnn_dropout: 0.1
+  # fully-connected layers dropout depends on the number of fc layers (currently there are two)
+  fc_dropout: [0.1, 0.1]
+  # attention layer dropout depends on the number of attention layers (currently there are two)
+  att_dropout: [0.1, 0.1]
+  # Add bias to all learnable parameters
+  bias: True
+
+  rnn_hidden_dim: 60
+  max_seq_len: 120
+  embedding_dim: 60
+  output_dim: 2
+
+  learning_rate: 0.005
+  optimizer: adam
+  epochs: 5
+  batch_size: 32
+  # shuffle when creating DataLoader
+  dl_shuffle: True
+  random_seed: 123
+  # Early stopping:
+  # Number of epochs with no improvement after which training will be stopped and
+  #   the model with the least validation loss will be saved
+  # If 0 or negative, early stopping will be deactivated
+  early_stopping_patience: 1
+
+  # if -1 or 1, perform the validation step in every epoch;
+  # if 0, no validation will be done
+  # otherwise, specify the interval (integer)
+  validation: 1
+  # split dataset
+  train_proportion: 0.85
+  val_proportion: 0.15
+  test_proportion: 0
+
+  # False or path to a directory to create tensor-board
+  #create_tensor_board: "./tb_gru_test"
+  create_tensor_board: False
+
+  # Layers to freeze during fine-tuning
+  layers_to_freeze: ["emb", "rnn_1", "attn"]
+
+inference:
+ # Output predictions and save the results in output_preds_file
+ output_preds: True
+ # either a path or "default"
+ #   "default" saves the prediction output inside the model directory
+ output_preds_file: "default"
+ # change it to true to have Mean Average Precision as an eval metric. Note that this would have an impact on computational time
+ eval_map_metric: False
diff --git a/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.model b/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.model
new file mode 100644
index 00000000..d9071a77
Binary files /dev/null and b/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.model differ
diff --git a/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.model_state_dict b/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.model_state_dict
new file mode 100644
index 00000000..574e0b14
Binary files /dev/null and b/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.model_state_dict differ
diff --git a/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.vocab b/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.vocab
new file mode 100644
index 00000000..68721ef8
Binary files /dev/null and b/tests/sample_files/resources/deezymatch/models/w2v_ocr/w2v_ocr.vocab differ
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/.DS_Store b/tests/sample_files/resources/news_datasets/topRes19th_v2/.DS_Store
new file mode 100644
index 00000000..afa651e9
Binary files /dev/null and b/tests/sample_files/resources/news_datasets/topRes19th_v2/.DS_Store differ
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/README.md b/tests/sample_files/resources/news_datasets/topRes19th_v2/README.md
new file mode 100644
index 00000000..00274f6e
--- /dev/null
+++ b/tests/sample_files/resources/news_datasets/topRes19th_v2/README.md
@@ -0,0 +1,121 @@
+# A Dataset for Toponym Resolution in Nineteenth-Century English Newspapers
+
+## Description
+
+We present a new dataset (version 2) for the task of toponym resolution in digitised historical newspapers in English. It consists of 455 annotated articles from newspapers based in four different locations in England (Manchester, Ashton-under-Lyne, Poole and Dorchester), published between 1780 and 1870. The articles have been manually annotated with mentions of places, which are linked---whenever possible---to their corresponding entry on Wikipedia. The dataset is published on the British Library shared research repository, and is especially of interest to researchers working on improving semantic access to historical newspaper content.
+
+We share the 455 annotated files (one file per article) in the WebAnno TSV file format version 3.2, a CoNLL-based file format. The files are split into a train and test set. For each split, we additionally provide a TSV file with metadata at the article level. We also provide the original annotation guidelines.
+
+## Directory structure
+
+```bash=
+topRes19th_v2/
+├── README.md
+├── train/
+│   ├── annotated_tsv/
+│   │   ├── 1218_Poole1860.tsv
+│   │   ├── ...
+│   │   └── 10877685_Dorchester1830.tsv
+│   └── metadata.tsv
+├── test/
+│   ├── annotated_tsv/
+│   │   ├── 9144_Poole1860.tsv
+│   │   ├── ...
+│   │   └── 10860796_Dorchester1860.tsv
+│   └── metadata.tsv
+└── original_guidelines.md
+```
+
+## Data description
+
+### `[split]/annotated_tsv/*.tsv`
+
+Each WebAnno TSV file in `annotated_tsv/` corresponds to an article. The file names (e.g. `1218_Poole1860.tsv`) consist of three elements: an internal Living with Machines identifier of the article (`1218`), the place of publication (`Poole`) and the decade of publication (`1860`). The WebAnno TSV format is a CoNLL-based file format, which has a header, is sentence-separated (by a blank line), and lists one token per line, with the different layers of annotations separated with tabs. See an example:
+```
+#FORMAT=WebAnno TSV 3.2
+#T_SP=webanno.custom.Customentity|identifiier|value
+
+
+#Text=THE POOLE AND SOUTH-WESTERN HERALD, THURSDAY, OCTOBER 20, 1864.
+1-1	0-3	THE	_	_	
+1-2	4-9	POOLE	_	_	
+1-3	10-13	AND	_	_	
+1-4	14-27	SOUTH-WESTERN	_	_	
+1-5	28-34	HERALD	_	_	
+1-6	34-35	,	_	_	
+1-7	36-44	THURSDAY	_	_	
+1-8	44-45	,	_	_	
+1-9	46-53	OCTOBER	_	_	
+1-10	54-56	20	_	_	
+1-11	56-57	,	_	_	
+1-12	58-62	1864	_	_	
+1-13	62-63	.	_	_	
+
+#Text=POOLE TOWN COUNCIL.
+2-1	65-70	POOLE	https://en.wikipedia.org/wiki/Poole	LOC
+2-2	71-75	TOWN	_	_	
+2-3	76-83	COUNCIL	_	_	
+2-4	83-84	.	_	_	
+```
+
+This example has two full sentences, preceded by `#Text=`, and split with one token per line. Now we look at one line in more detail:
+```
+2-1	65-70	POOLE	https://en.wikipedia.org/wiki/Poole	LOC	
+```
+The tab-separated elements are:
+* `2-1`: the indices of the sentence in the document and the token in the sentence.
+* `65-70`: start and end character positions of the token in the document.
+* `POOLE`: the token.
+* `https://en.wikipedia.org/wiki/Poole`: the Wikipedia url (if linked).
+* `LOC`: the toponym class.
+
+Toponyms are annotated with the following classes:
+* `BUILDINGS`: names of buildings, such as the 'British Museum'.
+* `STREET`: streets, roads, and other odonyms, such as 'Great Russell St'.
+* `LOC`: any other real world places regardless of type or scale, such as 'Bloomsbury', 'London' or 'Great Britain'.
+* `ALIEN`: extraterrestrial locations, such as 'Venus'.
+* `FICTION`: fictional or mythical places, such as 'Hell'.
+* `OTHER`: other types of entities with coordinates, such as events, like the 'Battle of Waterloo'.
+
+
+### `metadata.tsv`
+
+The `metadata.tsv` file links each annotated tsv file to its metadata. It consists of a header and one row per article, with the following fields:
+* `fname`: name of the annotated file, without the extension (e.g. `1218_Poole1860`)
+* `word_count`: number of words in the article.
+* `ocr_quality_mean`: OCR quality mean, calculated as per-word OCR confidence scores as reported in the source metadata.
+* `ocr_quality_sd`: OCR quality standard deviation.
+* `issue_date`: date of publication of the article.
+* `publication_code`: publication code (internal).
+* `publication_title`: name of the newspaper publication.
+* `decade`: decade of publication of the article.
+* `place_publication`: place of publication.
+* `annotation_batch`: each article is assigned to one annotation batch. All annotation batches are similarly-distributed in terms of place and decade of publication.
+
+## License
+
+The dataset is released under open license CC-BY-NC-SA, available at https://creativecommons.org/licenses/by-nc-sa/4.0/.
+
+## Copyright notice
+
+Newspaper data has been provided by Findmypast Limited from the British Newspaper Archive, a partnership between the British Library and Findmypast (https://www.britishnewspaperarchive.co.uk/).
+
+## Funding statement
+
+This work was supported by Living with Machines (AHRC grant AH/S01179X/1) and The Alan Turing Institute (EPSRC grant EP/N510129/1).
+
+## Dataset creators
+
+Mariona Coll Ardanuy (conceptualization, data curation, formal analysis, project management, writing), David Beavan (resources, software, writing), Kaspar Beelen (resources, data curation, writing), Kasra Hosseini (resources, software), Jon Lawrence (conceptualization, data curation, project management), Katherine McDonough (conceptualization, data curation, writing), Federico Nanni (validation, writing), Daniel van Strien (resources, software), Daniel C.S. Wilson (conceptualization, data curation, writing).
+
+## Version changes
+
+**Version 2:**
+
+* Annotations (`annotated_tsv/*.tsv`):
+    - The toponyms that were annotated as "LOCWiki" are now annotated as "LOC".
+    - "UNKNOWN" has been removed from all data fields, instances of this class have been classified into the other classes (mostly "LOC").
+* Metadata (`metadata.tsv`):
+    - Column "publication_location" removed.
+    - Column "annotation_decade" renamed to "decade".
+    - Column "annotation_location" renamed to "place_publication".
\ No newline at end of file
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/original_guidelines.md b/tests/sample_files/resources/news_datasets/topRes19th_v2/original_guidelines.md
new file mode 100644
index 00000000..17534aee
--- /dev/null
+++ b/tests/sample_files/resources/news_datasets/topRes19th_v2/original_guidelines.md
@@ -0,0 +1,47 @@
+# Annotation guidelines
+
+**Note:** These are the original annotation guidelines. The annotations in the current version of the dataset have since been refined, main changes are described in the accompanying README file (under `Version changes`).
+
+## Toponym resolution annotations
+
+This document contains the annotation guidelines for marking up and georeferencing locations mentioned in historical newspaper articles. The task of the annotator is to recognize each location mentioned in the text and map it to the URL of the Wikipedia article that refers to it.
+
+### The task
+
+Place names are often highly ambiguous. There are, for instance, more than 20 different places named Paris all over the world, as well as different instances of records relating to Paris, France. The task of toponym resolution can be similar to word sense disambiguation: in most scenarios the most commonly used sense (or place) is in most cases the correct sense (or place). However, our data is mostly composed of historical local and regional newspapers, and the world view that is represented in these texts is skewed towards the knowledge expected of their intended average, regional reader. It is therefore important that annotators take into account the date and place(s) of newspaper publication/circulation during the annotation process.
+
+### What to annotate
+
+Location: any named entity of a location that is static and can be defined according to a pair of static world coordinates (including metonyms, as in 'France signed the deal.'). If there is an OCR error, we will annotate the location if we can recognise it because of context clues in the word itself or in the surrounding text (for example, we would link "iHancfjrcter" to https://en.wikipedia.org/wiki/Manchester). We will not perform any additional post-correction of the OCRed text.
+
+### How to annotate
+
+The annotator should map each location found in the text with the URL of the Wikipedia article that refers to it.
+
+To do so:
+* Make sure you have selected the Layer `Custom entity` (if you don't see it, make sure you are in a 'Toponym resolution' project).
+* Select with the mouse the span of text you want to annotate (e.g. 'West Laviogton') and select `LOCWiki` from the dropdown menu.
+
+In this task, the custom entity `LOCWiki` refers to a real world place regardless of scale (region, city, neighborhood) with the exception of the additional, separate categories listed below:
+  * `BUILDING`: Names of buildings (e.g. schools, hospitals, factories, palaces, etc.). Optional link to Wikipedia article if it exists.
+  * `STREET`: Streets, squares, etc. Optional link to Wikipedia article if it exists.
+  * `ALIEN`: Extraterrestrial locations (e.g. the moon). Optional link to Wikipedia article if it exists.
+  * `OTHER`: Others, as in famous trees (https://en.wikipedia.org/wiki/Lone_Cypress) or battlefields (https://en.wikipedia.org/wiki/Battle_of_Waterloo). Optional link to Wikipedia article if it exists.
+  * `UNKNOWN`: If the location has no Wikipedia entry OR if you cannot determine what place it is, but are confident that it is a place. No link to Wikipedia.
+  * `FICTION`: If it is a fictional/mythical place (e.g. Lilliput). Optional link to Wikipedia article if it exists.
+
+* How to annotate with Wikipedia links:
+  * Go to Wikipedia (English version).
+  * Find the correct article corresponding to the place mentioned in the text (e.g. `https://en.wikipedia.org/wiki/West_Lavington,_Wiltshire`).
+  * Copy the full URL and paste it to the identifier box.
+* To delete an annotation, click on it and click on `Delete` in the Annotation box.
+
+The article title will give you an indication of the place of publication of the article, to help you disambiguate the toponyms in the article (e.g. `10713959_Dorchester1820.txt` is an article published in Dorchester, Dorset, in the 1820s---the date refers to the decade, not the year, of publication).
+
+Some annotation considerations:
+* Choose 'historical county' record over 'ceremonial county' for county place names.
+* Do not include places that are not referred to by proper names (e.g. 'the park').
+* Always favour a geo-coded link even if it is less perfect.
+  > For example: Bengal---a province of British Colonial India---has a wiki page but it is not geo-coded because it is an historic term for places now in India (West Bengal) and Bangladesh. The latter has been linked since it represents the bulk of British Bengal and is geo-coded.
+* Do not geocode the place if it's part of a person's title ("the Earl of Warwick").
+* Company stocks and shares names after places - e.g. Westminster Bank, Devon Great Consols (mine) should NOT be linked as it is a commercial credit note linked to a trading entity. It isn't a place as such.
\ No newline at end of file
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/test/.DS_Store b/tests/sample_files/resources/news_datasets/topRes19th_v2/test/.DS_Store
new file mode 100644
index 00000000..bfeec556
Binary files /dev/null and b/tests/sample_files/resources/news_datasets/topRes19th_v2/test/.DS_Store differ
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/test/annotated_tsv/9144_Poole1860.tsv b/tests/sample_files/resources/news_datasets/topRes19th_v2/test/annotated_tsv/9144_Poole1860.tsv
new file mode 100644
index 00000000..7efc6e50
--- /dev/null
+++ b/tests/sample_files/resources/news_datasets/topRes19th_v2/test/annotated_tsv/9144_Poole1860.tsv
@@ -0,0 +1,430 @@
+#FORMAT=WebAnno TSV 3.2
+#T_SP=webanno.custom.Customentity|identifiier|value
+
+
+#Text=WAREHAM TURNPIKE, 
+#Text=NOTICE is hereby given, —That the TOLLS arising at the undermentioned TOLL GATES will be LET BY AUCTION, to the best Bidden, at the Towx HALL, in WADEHAY, on TUESDAY, the 15th day of MAY next, between the hours of twelve and three, in the manner directed by an Act ionised in the third year of his Majesty King George the Fourth" for regulating Turnpike Roads," which Tolls produced last year the sum of £892 15s. 6d. above the expense of collecting them, and will be put up in the following Lots, at such sums as the Trustees present shall think fit, and to be Let from noon on the Ist July next, for such term as shall at the said Meeting be determined 
+#Text=Lot.
+1-1	0-7	WAREHAM	https://en.wikipedia.org/wiki/Wareham,\_Dorset	LOC	
+1-2	8-16	TURNPIKE	_	_	
+1-3	16-17	,	_	_	
+1-4	19-25	NOTICE	_	_	
+1-5	26-28	is	_	_	
+1-6	29-35	hereby	_	_	
+1-7	36-41	given	_	_	
+1-8	41-42	,	_	_	
+1-9	43-44	—	_	_	
+1-10	44-48	That	_	_	
+1-11	49-52	the	_	_	
+1-12	53-58	TOLLS	_	_	
+1-13	59-66	arising	_	_	
+1-14	67-69	at	_	_	
+1-15	70-73	the	_	_	
+1-16	74-88	undermentioned	_	_	
+1-17	89-93	TOLL	_	_	
+1-18	94-99	GATES	_	_	
+1-19	100-104	will	_	_	
+1-20	105-107	be	_	_	
+1-21	108-111	LET	_	_	
+1-22	112-114	BY	_	_	
+1-23	115-122	AUCTION	_	_	
+1-24	122-123	,	_	_	
+1-25	124-126	to	_	_	
+1-26	127-130	the	_	_	
+1-27	131-135	best	_	_	
+1-28	136-142	Bidden	_	_	
+1-29	142-143	,	_	_	
+1-30	144-146	at	_	_	
+1-31	147-150	the	_	_	
+1-32	151-155	Towx	*[1]	BUILDING[1]	
+1-33	156-160	HALL	*[1]	BUILDING[1]	
+1-34	160-161	,	_	_	
+1-35	162-164	in	_	_	
+1-36	165-172	WADEHAY	https://en.wikipedia.org/wiki/Wareham,\_Dorset	LOC	
+1-37	172-173	,	_	_	
+1-38	174-176	on	_	_	
+1-39	177-184	TUESDAY	_	_	
+1-40	184-185	,	_	_	
+1-41	186-189	the	_	_	
+1-42	190-194	15th	_	_	
+1-43	195-198	day	_	_	
+1-44	199-201	of	_	_	
+1-45	202-205	MAY	_	_	
+1-46	206-210	next	_	_	
+1-47	210-211	,	_	_	
+1-48	212-219	between	_	_	
+1-49	220-223	the	_	_	
+1-50	224-229	hours	_	_	
+1-51	230-232	of	_	_	
+1-52	233-239	twelve	_	_	
+1-53	240-243	and	_	_	
+1-54	244-249	three	_	_	
+1-55	249-250	,	_	_	
+1-56	251-253	in	_	_	
+1-57	254-257	the	_	_	
+1-58	258-264	manner	_	_	
+1-59	265-273	directed	_	_	
+1-60	274-276	by	_	_	
+1-61	277-279	an	_	_	
+1-62	280-283	Act	_	_	
+1-63	284-291	ionised	_	_	
+1-64	292-294	in	_	_	
+1-65	295-298	the	_	_	
+1-66	299-304	third	_	_	
+1-67	305-309	year	_	_	
+1-68	310-312	of	_	_	
+1-69	313-316	his	_	_	
+1-70	317-324	Majesty	_	_	
+1-71	325-329	King	_	_	
+1-72	330-336	George	_	_	
+1-73	337-340	the	_	_	
+1-74	341-347	Fourth	_	_	
+1-75	347-348	"	_	_	
+1-76	349-352	for	_	_	
+1-77	353-363	regulating	_	_	
+1-78	364-372	Turnpike	_	_	
+1-79	373-378	Roads	_	_	
+1-80	378-379	,	_	_	
+1-81	379-380	"	_	_	
+1-82	381-386	which	_	_	
+1-83	387-392	Tolls	_	_	
+1-84	393-401	produced	_	_	
+1-85	402-406	last	_	_	
+1-86	407-411	year	_	_	
+1-87	412-415	the	_	_	
+1-88	416-419	sum	_	_	
+1-89	420-422	of	_	_	
+1-90	423-427	£892	_	_	
+1-91	428-431	15s	_	_	
+1-92	431-432	.	_	_	
+1-93	433-435	6d	_	_	
+1-94	435-436	.	_	_	
+1-95	437-442	above	_	_	
+1-96	443-446	the	_	_	
+1-97	447-454	expense	_	_	
+1-98	455-457	of	_	_	
+1-99	458-468	collecting	_	_	
+1-100	469-473	them	_	_	
+1-101	473-474	,	_	_	
+1-102	475-478	and	_	_	
+1-103	479-483	will	_	_	
+1-104	484-486	be	_	_	
+1-105	487-490	put	_	_	
+1-106	491-493	up	_	_	
+1-107	494-496	in	_	_	
+1-108	497-500	the	_	_	
+1-109	501-510	following	_	_	
+1-110	511-515	Lots	_	_	
+1-111	515-516	,	_	_	
+1-112	517-519	at	_	_	
+1-113	520-524	such	_	_	
+1-114	525-529	sums	_	_	
+1-115	530-532	as	_	_	
+1-116	533-536	the	_	_	
+1-117	537-545	Trustees	_	_	
+1-118	546-553	present	_	_	
+1-119	554-559	shall	_	_	
+1-120	560-565	think	_	_	
+1-121	566-569	fit	_	_	
+1-122	569-570	,	_	_	
+1-123	571-574	and	_	_	
+1-124	575-577	to	_	_	
+1-125	578-580	be	_	_	
+1-126	581-584	Let	_	_	
+1-127	585-589	from	_	_	
+1-128	590-594	noon	_	_	
+1-129	595-597	on	_	_	
+1-130	598-601	the	_	_	
+1-131	602-605	Ist	_	_	
+1-132	606-610	July	_	_	
+1-133	611-615	next	_	_	
+1-134	615-616	,	_	_	
+1-135	617-620	for	_	_	
+1-136	621-625	such	_	_	
+1-137	626-630	term	_	_	
+1-138	631-633	as	_	_	
+1-139	634-639	shall	_	_	
+1-140	640-642	at	_	_	
+1-141	643-646	the	_	_	
+1-142	647-651	said	_	_	
+1-143	652-659	Meeting	_	_	
+1-144	660-662	be	_	_	
+1-145	663-673	determined	_	_	
+1-146	675-678	Lot	_	_	
+1-147	678-679	.	_	_	
+
+#Text=I.
+2-1	680-681	I	_	_	
+2-2	681-682	.	_	_	
+
+#Text=—Stoborough and Side Gates, 2.
+3-1	682-683	—	_	_	
+3-2	683-693	Stoborough	https://en.wikipedia.org/wiki/Stoborough	LOC	
+3-3	694-697	and	_	_	
+3-4	698-702	Side	_	_	
+3-5	703-708	Gates	_	_	
+3-6	708-709	,	_	_	
+3-7	710-711	2	_	_	
+3-8	711-712	.	_	_	
+
+#Text=—Heston and Ulwell Gates, S.
+4-1	712-713	—	_	_	
+4-2	713-719	Heston	https://en.wikipedia.org/wiki/Herston,\_Dorset	LOC	
+4-3	720-723	and	_	_	
+4-4	724-730	Ulwell	*	LOC	
+4-5	731-736	Gates	_	_	
+4-6	736-737	,	_	_	
+4-7	738-739	S	_	_	
+4-8	739-740	.	_	_	
+
+#Text=—Westport Gate, 4.
+5-1	740-741	—	_	_	
+5-2	741-749	Westport	*	LOC	
+5-3	750-754	Gate	_	_	
+5-4	754-755	,	_	_	
+5-5	756-757	4	_	_	
+5-6	757-758	.	_	_	
+
+#Text=—Wortbport Gates and Side Gate.
+6-1	758-759	—	_	_	
+6-2	759-768	Wortbport	*	LOC	
+6-3	769-774	Gates	_	_	
+6-4	775-778	and	_	_	
+6-5	779-783	Side	_	_	
+6-6	784-788	Gate	_	_	
+6-7	788-789	.	_	_	
+
+#Text=Whoever happen+ to be the best bidder must at the same time pay one month in advance of the rent at which such Tolls may be let, and give security, with sufficient sureties, to the satisfaction of the Trustees, for payment of the rest of the money monthly.
+7-1	790-797	Whoever	_	_	
+7-2	798-804	happen	_	_	
+7-3	804-805	+	_	_	
+7-4	806-808	to	_	_	
+7-5	809-811	be	_	_	
+7-6	812-815	the	_	_	
+7-7	816-820	best	_	_	
+7-8	821-827	bidder	_	_	
+7-9	828-832	must	_	_	
+7-10	833-835	at	_	_	
+7-11	836-839	the	_	_	
+7-12	840-844	same	_	_	
+7-13	845-849	time	_	_	
+7-14	850-853	pay	_	_	
+7-15	854-857	one	_	_	
+7-16	858-863	month	_	_	
+7-17	864-866	in	_	_	
+7-18	867-874	advance	_	_	
+7-19	875-877	of	_	_	
+7-20	878-881	the	_	_	
+7-21	882-886	rent	_	_	
+7-22	887-889	at	_	_	
+7-23	890-895	which	_	_	
+7-24	896-900	such	_	_	
+7-25	901-906	Tolls	_	_	
+7-26	907-910	may	_	_	
+7-27	911-913	be	_	_	
+7-28	914-917	let	_	_	
+7-29	917-918	,	_	_	
+7-30	919-922	and	_	_	
+7-31	923-927	give	_	_	
+7-32	928-936	security	_	_	
+7-33	936-937	,	_	_	
+7-34	938-942	with	_	_	
+7-35	943-953	sufficient	_	_	
+7-36	954-962	sureties	_	_	
+7-37	962-963	,	_	_	
+7-38	964-966	to	_	_	
+7-39	967-970	the	_	_	
+7-40	971-983	satisfaction	_	_	
+7-41	984-986	of	_	_	
+7-42	987-990	the	_	_	
+7-43	991-999	Trustees	_	_	
+7-44	999-1000	,	_	_	
+7-45	1001-1004	for	_	_	
+7-46	1005-1012	payment	_	_	
+7-47	1013-1015	of	_	_	
+7-48	1016-1019	the	_	_	
+7-49	1020-1024	rest	_	_	
+7-50	1025-1027	of	_	_	
+7-51	1028-1031	the	_	_	
+7-52	1032-1037	money	_	_	
+7-53	1038-1045	monthly	_	_	
+7-54	1045-1046	.	_	_	
+
+#Text=Persons intending to bid, and whose sureties shall not be present at the time of Letting must come prepared with a written consent from two persons to become such sureties, and the Trustee+ particularly desire the bidders to take notice that such sureties must be strictly responsible to the full extent of the rent.
+8-1	1047-1054	Persons	_	_	
+8-2	1055-1064	intending	_	_	
+8-3	1065-1067	to	_	_	
+8-4	1068-1071	bid	_	_	
+8-5	1071-1072	,	_	_	
+8-6	1073-1076	and	_	_	
+8-7	1077-1082	whose	_	_	
+8-8	1083-1091	sureties	_	_	
+8-9	1092-1097	shall	_	_	
+8-10	1098-1101	not	_	_	
+8-11	1102-1104	be	_	_	
+8-12	1105-1112	present	_	_	
+8-13	1113-1115	at	_	_	
+8-14	1116-1119	the	_	_	
+8-15	1120-1124	time	_	_	
+8-16	1125-1127	of	_	_	
+8-17	1128-1135	Letting	_	_	
+8-18	1136-1140	must	_	_	
+8-19	1141-1145	come	_	_	
+8-20	1146-1154	prepared	_	_	
+8-21	1155-1159	with	_	_	
+8-22	1160-1161	a	_	_	
+8-23	1162-1169	written	_	_	
+8-24	1170-1177	consent	_	_	
+8-25	1178-1182	from	_	_	
+8-26	1183-1186	two	_	_	
+8-27	1187-1194	persons	_	_	
+8-28	1195-1197	to	_	_	
+8-29	1198-1204	become	_	_	
+8-30	1205-1209	such	_	_	
+8-31	1210-1218	sureties	_	_	
+8-32	1218-1219	,	_	_	
+8-33	1220-1223	and	_	_	
+8-34	1224-1227	the	_	_	
+8-35	1228-1235	Trustee	_	_	
+8-36	1235-1236	+	_	_	
+8-37	1237-1249	particularly	_	_	
+8-38	1250-1256	desire	_	_	
+8-39	1257-1260	the	_	_	
+8-40	1261-1268	bidders	_	_	
+8-41	1269-1271	to	_	_	
+8-42	1272-1276	take	_	_	
+8-43	1277-1283	notice	_	_	
+8-44	1284-1288	that	_	_	
+8-45	1289-1293	such	_	_	
+8-46	1294-1302	sureties	_	_	
+8-47	1303-1307	must	_	_	
+8-48	1308-1310	be	_	_	
+8-49	1311-1319	strictly	_	_	
+8-50	1320-1331	responsible	_	_	
+8-51	1332-1334	to	_	_	
+8-52	1335-1338	the	_	_	
+8-53	1339-1343	full	_	_	
+8-54	1344-1350	extent	_	_	
+8-55	1351-1353	of	_	_	
+8-56	1354-1357	the	_	_	
+8-57	1358-1362	rent	_	_	
+8-58	1362-1363	.	_	_	
+
+#Text=THOMAS P HIP PARD, Clerk to the Trustees.
+9-1	1364-1370	THOMAS	_	_	
+9-2	1371-1372	P	_	_	
+9-3	1373-1376	HIP	_	_	
+9-4	1377-1381	PARD	_	_	
+9-5	1381-1382	,	_	_	
+9-6	1383-1388	Clerk	_	_	
+9-7	1389-1391	to	_	_	
+9-8	1392-1395	the	_	_	
+9-9	1396-1404	Trustees	_	_	
+9-10	1404-1405	.	_	_	
+
+#Text=Dated Wareham, 15th March, 1880.
+10-1	1406-1411	Dated	_	_	
+10-2	1412-1419	Wareham	https://en.wikipedia.org/wiki/Wareham,\_Dorset	LOC	
+10-3	1419-1420	,	_	_	
+10-4	1421-1425	15th	_	_	
+10-5	1426-1431	March	_	_	
+10-6	1431-1432	,	_	_	
+10-7	1433-1437	1880	_	_	
+10-8	1437-1438	.	_	_	
+
+#Text=Potatoes !
+11-1	1440-1448	Potatoes	_	_	
+11-2	1449-1450	!	_	_	
+
+#Text=Pota•oee !
+12-1	1451-1455	Pota	_	_	
+12-2	1455-1456	•	_	_	
+12-3	1456-1459	oee	_	_	
+12-4	1460-1461	!	_	_	
+
+#Text=!
+13-1	1462-1463	!	_	_	
+
+#Text=Potatoes !!
+14-1	1464-1472	Potatoes	_	_	
+14-2	1473-1474	!	_	_	
+14-3	1474-1475	!	_	_	
+
+#Text=!
+15-1	1476-1477	!	_	_	
+
+#Text=JUST ARRIVED.
+16-1	1478-1482	JUST	_	_	
+16-2	1483-1490	ARRIVED	_	_	
+16-3	1490-1491	.	_	_	
+
+#Text=It.PRIME SAMPLE of Red Scotch and Regent POTATOES, Warranted excellent in quality for eating, and well adapted as a change fur Seed, are NOW SELLING at the Potatoe Stores of Mr.
+17-1	1492-1500	It.PRIME	_	_	
+17-2	1501-1507	SAMPLE	_	_	
+17-3	1508-1510	of	_	_	
+17-4	1511-1514	Red	_	_	
+17-5	1515-1521	Scotch	_	_	
+17-6	1522-1525	and	_	_	
+17-7	1526-1532	Regent	_	_	
+17-8	1533-1541	POTATOES	_	_	
+17-9	1541-1542	,	_	_	
+17-10	1543-1552	Warranted	_	_	
+17-11	1553-1562	excellent	_	_	
+17-12	1563-1565	in	_	_	
+17-13	1566-1573	quality	_	_	
+17-14	1574-1577	for	_	_	
+17-15	1578-1584	eating	_	_	
+17-16	1584-1585	,	_	_	
+17-17	1586-1589	and	_	_	
+17-18	1590-1594	well	_	_	
+17-19	1595-1602	adapted	_	_	
+17-20	1603-1605	as	_	_	
+17-21	1606-1607	a	_	_	
+17-22	1608-1614	change	_	_	
+17-23	1615-1618	fur	_	_	
+17-24	1619-1623	Seed	_	_	
+17-25	1623-1624	,	_	_	
+17-26	1625-1628	are	_	_	
+17-27	1629-1632	NOW	_	_	
+17-28	1633-1640	SELLING	_	_	
+17-29	1641-1643	at	_	_	
+17-30	1644-1647	the	_	_	
+17-31	1648-1655	Potatoe	_	_	
+17-32	1656-1662	Stores	_	_	
+17-33	1663-1665	of	_	_	
+17-34	1666-1668	Mr	_	_	
+17-35	1668-1669	.	_	_	
+
+#Text=JOHN BLUNDELL, Poole Arms Inn, Quay, Poole, at Bs. per cwt. or le. 4.1. per peck.
+18-1	1670-1674	JOHN	_	_	
+18-2	1675-1683	BLUNDELL	_	_	
+18-3	1683-1684	,	_	_	
+18-4	1685-1690	Poole	https://en.wikipedia.org/wiki/Poole	LOC	
+18-5	1691-1695	Arms	*[2]	BUILDING[2]	
+18-6	1696-1699	Inn	*[2]	BUILDING[2]	
+18-7	1699-1700	,	_	_	
+18-8	1701-1705	Quay	*	STREET	
+18-9	1705-1706	,	_	_	
+18-10	1707-1712	Poole	https://en.wikipedia.org/wiki/Poole	LOC	
+18-11	1712-1713	,	_	_	
+18-12	1714-1716	at	_	_	
+18-13	1717-1719	Bs	_	_	
+18-14	1719-1720	.	_	_	
+18-15	1721-1724	per	_	_	
+18-16	1725-1728	cwt	_	_	
+18-17	1728-1729	.	_	_	
+18-18	1730-1732	or	_	_	
+18-19	1733-1735	le	_	_	
+18-20	1735-1736	.	_	_	
+18-21	1737-1740	4.1	_	_	
+18-22	1740-1741	.	_	_	
+18-23	1742-1745	per	_	_	
+18-24	1746-1750	peck	_	_	
+18-25	1750-1751	.	_	_	
+
+#Text=May Bth. 4e60.
+19-1	1752-1755	May	_	_	
+19-2	1756-1759	Bth	_	_	
+19-3	1759-1760	.	_	_	
+19-4	1761-1765	4e60	_	_	
+19-5	1765-1766	.	_	_	
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/test/metadata.tsv b/tests/sample_files/resources/news_datasets/topRes19th_v2/test/metadata.tsv
new file mode 100644
index 00000000..c7e7f325
--- /dev/null
+++ b/tests/sample_files/resources/news_datasets/topRes19th_v2/test/metadata.tsv
@@ -0,0 +1,2 @@
+fname	word_count	ocr_quality_mean	ocr_quality_sd	issue_date	publication_code	publication_title	decade	place_publication	annotation_batch
+9144_Poole1860	319	0.9113	0.1226	1860-05-10	2325	The Poole and South-Western Herald, etc.	1860	Poole	3
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/train/.DS_Store b/tests/sample_files/resources/news_datasets/topRes19th_v2/train/.DS_Store
new file mode 100644
index 00000000..72992317
Binary files /dev/null and b/tests/sample_files/resources/news_datasets/topRes19th_v2/train/.DS_Store differ
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/train/annotated_tsv/1218_Poole1860.tsv b/tests/sample_files/resources/news_datasets/topRes19th_v2/train/annotated_tsv/1218_Poole1860.tsv
new file mode 100644
index 00000000..c18c088a
--- /dev/null
+++ b/tests/sample_files/resources/news_datasets/topRes19th_v2/train/annotated_tsv/1218_Poole1860.tsv
@@ -0,0 +1,488 @@
+#FORMAT=WebAnno TSV 3.2
+#T_SP=webanno.custom.Customentity|identifiier|value
+
+
+#Text=THE POOLE AND SOUTH-WESTERN HERALD, THURSDAY, OCTOBER 20, 1864.
+1-1	0-3	THE	_	_	
+1-2	4-9	POOLE	_	_	
+1-3	10-13	AND	_	_	
+1-4	14-27	SOUTH-WESTERN	_	_	
+1-5	28-34	HERALD	_	_	
+1-6	34-35	,	_	_	
+1-7	36-44	THURSDAY	_	_	
+1-8	44-45	,	_	_	
+1-9	46-53	OCTOBER	_	_	
+1-10	54-56	20	_	_	
+1-11	56-57	,	_	_	
+1-12	58-62	1864	_	_	
+1-13	62-63	.	_	_	
+
+#Text=POOLE TOWN COUNCIL.
+2-1	65-70	POOLE	https://en.wikipedia.org/wiki/Poole	LOC	
+2-2	71-75	TOWN	_	_	
+2-3	76-83	COUNCIL	_	_	
+2-4	83-84	.	_	_	
+
+#Text=On Friday last, a meeting of the Town Cowmen was held at the Guildhall. the Mayor (G.
+3-1	86-88	On	_	_	
+3-2	89-95	Friday	_	_	
+3-3	96-100	last	_	_	
+3-4	100-101	,	_	_	
+3-5	102-103	a	_	_	
+3-6	104-111	meeting	_	_	
+3-7	112-114	of	_	_	
+3-8	115-118	the	_	_	
+3-9	119-123	Town	_	_	
+3-10	124-130	Cowmen	_	_	
+3-11	131-134	was	_	_	
+3-12	135-139	held	_	_	
+3-13	140-142	at	_	_	
+3-14	143-146	the	_	_	
+3-15	147-156	Guildhall	*	BUILDING	
+3-16	156-157	.	_	_	
+3-17	158-161	the	_	_	
+3-18	162-167	Mayor	_	_	
+3-19	168-169	(	_	_	
+3-20	169-170	G	_	_	
+3-21	170-171	.	_	_	
+
+#Text=Belben, jun.
+4-1	172-178	Belben	_	_	
+4-2	178-179	,	_	_	
+4-3	180-183	jun	_	_	
+4-4	183-184	.	_	_	
+
+#Text=Esq.,) in the chair.
+5-1	185-188	Esq	_	_	
+5-2	188-189	.	_	_	
+5-3	189-190	,	_	_	
+5-4	190-191	)	_	_	
+5-5	192-194	in	_	_	
+5-6	195-198	the	_	_	
+5-7	199-204	chair	_	_	
+5-8	204-205	.	_	_	
+
+#Text=There were also present: Ald J.
+6-1	206-211	There	_	_	
+6-2	212-216	were	_	_	
+6-3	217-221	also	_	_	
+6-4	222-229	present	_	_	
+6-5	229-230	:	_	_	
+6-6	231-234	Ald	_	_	
+6-7	235-236	J	_	_	
+6-8	236-237	.	_	_	
+
+#Text=Adey, J.
+7-1	238-242	Adey	_	_	
+7-2	242-243	,	_	_	
+7-3	244-245	J	_	_	
+7-4	245-246	.	_	_	
+
+#Text=Gomm, and W.
+8-1	247-251	Gomm	_	_	
+8-2	251-252	,	_	_	
+8-3	253-256	and	_	_	
+8-4	257-258	W	_	_	
+8-5	258-259	.	_	_	
+
+#Text=Pearce, and Councillel Frampton, J.
+9-1	260-266	Pearce	_	_	
+9-2	266-267	,	_	_	
+9-3	268-271	and	_	_	
+9-4	272-282	Councillel	_	_	
+9-5	283-291	Frampton	_	_	
+9-6	291-292	,	_	_	
+9-7	293-294	J	_	_	
+9-8	294-295	.	_	_	
+
+#Text=Harker, H.
+10-1	296-302	Harker	_	_	
+10-2	302-303	,	_	_	
+10-3	304-305	H	_	_	
+10-4	305-306	.	_	_	
+
+#Text=Harris, F.
+11-1	307-313	Harris	_	_	
+11-2	313-314	,	_	_	
+11-3	315-316	F	_	_	
+11-4	316-317	.	_	_	
+
+#Text=Btyring. and Wood.
+12-1	318-325	Btyring	_	_	
+12-2	325-326	.	_	_	
+12-3	327-330	and	_	_	
+12-4	331-335	Wood	_	_	
+12-5	335-336	.	_	_	
+
+#Text=The TOWN CLERK having read the minutes of the last meeting,, orders were made upon the borough treasurer to the amount of £SB Bs.
+13-1	337-340	The	_	_	
+13-2	341-345	TOWN	_	_	
+13-3	346-351	CLERK	_	_	
+13-4	352-358	having	_	_	
+13-5	359-363	read	_	_	
+13-6	364-367	the	_	_	
+13-7	368-375	minutes	_	_	
+13-8	376-378	of	_	_	
+13-9	379-382	the	_	_	
+13-10	383-387	last	_	_	
+13-11	388-395	meeting	_	_	
+13-12	395-396	,	_	_	
+13-13	396-397	,	_	_	
+13-14	398-404	orders	_	_	
+13-15	405-409	were	_	_	
+13-16	410-414	made	_	_	
+13-17	415-419	upon	_	_	
+13-18	420-423	the	_	_	
+13-19	424-431	borough	_	_	
+13-20	432-441	treasurer	_	_	
+13-21	442-444	to	_	_	
+13-22	445-448	the	_	_	
+13-23	449-455	amount	_	_	
+13-24	456-458	of	_	_	
+13-25	459-460	£	_	_	
+13-26	460-462	SB	_	_	
+13-27	463-465	Bs	_	_	
+13-28	465-466	.	_	_	
+
+#Text=Id.
+14-1	467-469	Id	_	_	
+14-2	469-470	.	_	_	
+
+#Text=Dimmers AND FAIRS.
+15-1	472-479	Dimmers	_	_	
+15-2	480-483	AND	_	_	
+15-3	484-489	FAIRS	_	_	
+15-4	489-490	.	_	_	
+
+#Text=The next leceincas was to receive the tenders for taking the markets and fairs from the 25th of December next, and to declare the accepted tender.
+16-1	491-494	The	_	_	
+16-2	495-499	next	_	_	
+16-3	500-509	leceincas	_	_	
+16-4	510-513	was	_	_	
+16-5	514-516	to	_	_	
+16-6	517-524	receive	_	_	
+16-7	525-528	the	_	_	
+16-8	529-536	tenders	_	_	
+16-9	537-540	for	_	_	
+16-10	541-547	taking	_	_	
+16-11	548-551	the	_	_	
+16-12	552-559	markets	_	_	
+16-13	560-563	and	_	_	
+16-14	564-569	fairs	_	_	
+16-15	570-574	from	_	_	
+16-16	575-578	the	_	_	
+16-17	579-583	25th	_	_	
+16-18	584-586	of	_	_	
+16-19	587-595	December	_	_	
+16-20	596-600	next	_	_	
+16-21	600-601	,	_	_	
+16-22	602-605	and	_	_	
+16-23	606-608	to	_	_	
+16-24	609-616	declare	_	_	
+16-25	617-620	the	_	_	
+16-26	621-629	accepted	_	_	
+16-27	630-636	tender	_	_	
+16-28	636-637	.	_	_	
+
+#Text=Mr.
+17-1	638-640	Mr	_	_	
+17-2	640-641	.	_	_	
+
+#Text=William Gould, the late knee, offered £7O for the next year ; Mr.
+18-1	642-649	William	_	_	
+18-2	650-655	Gould	_	_	
+18-3	655-656	,	_	_	
+18-4	657-660	the	_	_	
+18-5	661-665	late	_	_	
+18-6	666-670	knee	_	_	
+18-7	670-671	,	_	_	
+18-8	672-679	offered	_	_	
+18-9	680-683	£7O	_	_	
+18-10	684-687	for	_	_	
+18-11	688-691	the	_	_	
+18-12	692-696	next	_	_	
+18-13	697-701	year	_	_	
+18-14	702-703	;	_	_	
+18-15	704-706	Mr	_	_	
+18-16	706-707	.	_	_	
+
+#Text=Cary, of Ramsey, Hants, sent in a tender offering £lO more than Mr.
+19-1	708-712	Cary	_	_	
+19-2	712-713	,	_	_	
+19-3	714-716	of	_	_	
+19-4	717-723	Ramsey	https://en.wikipedia.org/wiki/Romsey	LOC	
+19-5	723-724	,	_	_	
+19-6	725-730	Hants	https://en.wikipedia.org/wiki/Hampshire	LOC	
+19-7	730-731	,	_	_	
+19-8	732-736	sent	_	_	
+19-9	737-739	in	_	_	
+19-10	740-741	a	_	_	
+19-11	742-748	tender	_	_	
+19-12	749-757	offering	_	_	
+19-13	758-759	£	_	_	
+19-14	759-761	lO	_	_	
+19-15	762-766	more	_	_	
+19-16	767-771	than	_	_	
+19-17	772-774	Mr	_	_	
+19-18	774-775	.	_	_	
+
+#Text=Gould, a proposal which elicited some laughter and Mr.
+20-1	776-781	Gould	_	_	
+20-2	781-782	,	_	_	
+20-3	783-784	a	_	_	
+20-4	785-793	proposal	_	_	
+20-5	794-799	which	_	_	
+20-6	800-808	elicited	_	_	
+20-7	809-813	some	_	_	
+20-8	814-822	laughter	_	_	
+20-9	823-826	and	_	_	
+20-10	827-829	Mr	_	_	
+20-11	829-830	.	_	_	
+
+#Text=Gooden, of Poole, offered £95.
+21-1	831-837	Gooden	_	_	
+21-2	837-838	,	_	_	
+21-3	839-841	of	_	_	
+21-4	842-847	Poole	https://en.wikipedia.org/wiki/Poole	LOC	
+21-5	847-848	,	_	_	
+21-6	849-856	offered	_	_	
+21-7	857-860	£95	_	_	
+21-8	860-861	.	_	_	
+
+#Text=After a little conversation Mr.
+22-1	862-867	After	_	_	
+22-2	868-869	a	_	_	
+22-3	870-876	little	_	_	
+22-4	877-889	conversation	_	_	
+22-5	890-892	Mr	_	_	
+22-6	892-893	.	_	_	
+
+#Text=Wood proposed. and Mr.
+23-1	894-898	Wood	_	_	
+23-2	899-907	proposed	_	_	
+23-3	907-908	.	_	_	
+23-4	909-912	and	_	_	
+23-5	913-915	Mr	_	_	
+23-6	915-916	.	_	_	
+
+#Text=Styring seconded the motion that Mr.
+24-1	917-924	Styring	_	_	
+24-2	925-933	seconded	_	_	
+24-3	934-937	the	_	_	
+24-4	938-944	motion	_	_	
+24-5	945-949	that	_	_	
+24-6	950-952	Mr	_	_	
+24-7	952-953	.	_	_	
+
+#Text=Goodeds tender be received for one year.
+25-1	954-961	Goodeds	_	_	
+25-2	962-968	tender	_	_	
+25-3	969-971	be	_	_	
+25-4	972-980	received	_	_	
+25-5	981-984	for	_	_	
+25-6	985-988	one	_	_	
+25-7	989-993	year	_	_	
+25-8	993-994	.	_	_	
+
+#Text=UNOCCUPIED LAND TO LET.
+26-1	995-1005	UNOCCUPIED	_	_	
+26-2	1006-1010	LAND	_	_	
+26-3	1011-1013	TO	_	_	
+26-4	1014-1017	LET	_	_	
+26-5	1017-1018	.	_	_	
+
+#Text=The TOWN CLERK read • letter from Mr.
+27-1	1019-1022	The	_	_	
+27-2	1023-1027	TOWN	_	_	
+27-3	1028-1033	CLERK	_	_	
+27-4	1034-1038	read	_	_	
+27-5	1039-1040	•	_	_	
+27-6	1041-1047	letter	_	_	
+27-7	1048-1052	from	_	_	
+27-8	1053-1055	Mr	_	_	
+27-9	1055-1056	.	_	_	
+
+#Text=James Davis, offering two guineas per annum for the right of sporting over certain land at Kinson.
+28-1	1057-1062	James	_	_	
+28-2	1063-1068	Davis	_	_	
+28-3	1068-1069	,	_	_	
+28-4	1070-1078	offering	_	_	
+28-5	1079-1082	two	_	_	
+28-6	1083-1090	guineas	_	_	
+28-7	1091-1094	per	_	_	
+28-8	1095-1100	annum	_	_	
+28-9	1101-1104	for	_	_	
+28-10	1105-1108	the	_	_	
+28-11	1109-1114	right	_	_	
+28-12	1115-1117	of	_	_	
+28-13	1118-1126	sporting	_	_	
+28-14	1127-1131	over	_	_	
+28-15	1132-1139	certain	_	_	
+28-16	1140-1144	land	_	_	
+28-17	1145-1147	at	_	_	
+28-18	1148-1154	Kinson	https://en.wikipedia.org/wiki/Kinson	LOC	
+28-19	1154-1155	.	_	_	
+
+#Text=Some conversation ensued relative to the land in question, Mr.
+29-1	1156-1160	Some	_	_	
+29-2	1161-1173	conversation	_	_	
+29-3	1174-1180	ensued	_	_	
+29-4	1181-1189	relative	_	_	
+29-5	1190-1192	to	_	_	
+29-6	1193-1196	the	_	_	
+29-7	1197-1201	land	_	_	
+29-8	1202-1204	in	_	_	
+29-9	1205-1213	question	_	_	
+29-10	1213-1214	,	_	_	
+29-11	1215-1217	Mr	_	_	
+29-12	1217-1218	.	_	_	
+
+#Text=STYRING remarking that if he were allowed a little time to consider the matter, he should probably make an offer to take the land for a term of years.
+30-1	1219-1226	STYRING	_	_	
+30-2	1227-1236	remarking	_	_	
+30-3	1237-1241	that	_	_	
+30-4	1242-1244	if	_	_	
+30-5	1245-1247	he	_	_	
+30-6	1248-1252	were	_	_	
+30-7	1253-1260	allowed	_	_	
+30-8	1261-1262	a	_	_	
+30-9	1263-1269	little	_	_	
+30-10	1270-1274	time	_	_	
+30-11	1275-1277	to	_	_	
+30-12	1278-1286	consider	_	_	
+30-13	1287-1290	the	_	_	
+30-14	1291-1297	matter	_	_	
+30-15	1297-1298	,	_	_	
+30-16	1299-1301	he	_	_	
+30-17	1302-1308	should	_	_	
+30-18	1309-1317	probably	_	_	
+30-19	1318-1322	make	_	_	
+30-20	1323-1325	an	_	_	
+30-21	1326-1331	offer	_	_	
+30-22	1332-1334	to	_	_	
+30-23	1335-1339	take	_	_	
+30-24	1340-1343	the	_	_	
+30-25	1344-1348	land	_	_	
+30-26	1349-1352	for	_	_	
+30-27	1353-1354	a	_	_	
+30-28	1355-1359	term	_	_	
+30-29	1360-1362	of	_	_	
+30-30	1363-1368	years	_	_	
+30-31	1368-1369	.	_	_	
+
+#Text=He was of opinion that the land might be brought into a state of cultivation.
+31-1	1370-1372	He	_	_	
+31-2	1373-1376	was	_	_	
+31-3	1377-1379	of	_	_	
+31-4	1380-1387	opinion	_	_	
+31-5	1388-1392	that	_	_	
+31-6	1393-1396	the	_	_	
+31-7	1397-1401	land	_	_	
+31-8	1402-1407	might	_	_	
+31-9	1408-1410	be	_	_	
+31-10	1411-1418	brought	_	_	
+31-11	1419-1423	into	_	_	
+31-12	1424-1425	a	_	_	
+31-13	1426-1431	state	_	_	
+31-14	1432-1434	of	_	_	
+31-15	1435-1446	cultivation	_	_	
+31-16	1446-1447	.	_	_	
+
+#Text=The TOWN CLERK thought it would be very unwise to let the laud for a long term, as its value might before long be greatly enhanced.
+32-1	1448-1451	The	_	_	
+32-2	1452-1456	TOWN	_	_	
+32-3	1457-1462	CLERK	_	_	
+32-4	1463-1470	thought	_	_	
+32-5	1471-1473	it	_	_	
+32-6	1474-1479	would	_	_	
+32-7	1480-1482	be	_	_	
+32-8	1483-1487	very	_	_	
+32-9	1488-1494	unwise	_	_	
+32-10	1495-1497	to	_	_	
+32-11	1498-1501	let	_	_	
+32-12	1502-1505	the	_	_	
+32-13	1506-1510	laud	_	_	
+32-14	1511-1514	for	_	_	
+32-15	1515-1516	a	_	_	
+32-16	1517-1521	long	_	_	
+32-17	1522-1526	term	_	_	
+32-18	1526-1527	,	_	_	
+32-19	1528-1530	as	_	_	
+32-20	1531-1534	its	_	_	
+32-21	1535-1540	value	_	_	
+32-22	1541-1546	might	_	_	
+32-23	1547-1553	before	_	_	
+32-24	1554-1558	long	_	_	
+32-25	1559-1561	be	_	_	
+32-26	1562-1569	greatly	_	_	
+32-27	1570-1578	enhanced	_	_	
+32-28	1578-1579	.	_	_	
+
+#Text=He alluded particularly to the promised prosperity of Weabonrne, in which be had great faith.
+33-1	1580-1582	He	_	_	
+33-2	1583-1590	alluded	_	_	
+33-3	1591-1603	particularly	_	_	
+33-4	1604-1606	to	_	_	
+33-5	1607-1610	the	_	_	
+33-6	1611-1619	promised	_	_	
+33-7	1620-1630	prosperity	_	_	
+33-8	1631-1633	of	_	_	
+33-9	1634-1643	Weabonrne	https://en.wikipedia.org/wiki/Westbourne,\_Dorset	LOC	
+33-10	1643-1644	,	_	_	
+33-11	1645-1647	in	_	_	
+33-12	1648-1653	which	_	_	
+33-13	1654-1656	be	_	_	
+33-14	1657-1660	had	_	_	
+33-15	1661-1666	great	_	_	
+33-16	1667-1672	faith	_	_	
+33-17	1672-1673	.	_	_	
+
+#Text=The MAYOR advised that the land should be advertised to be let This suggestion was adopted, and the Town Clerk was directed to advertise the land in question to be let by tender or otherwise.
+34-1	1674-1677	The	_	_	
+34-2	1678-1683	MAYOR	_	_	
+34-3	1684-1691	advised	_	_	
+34-4	1692-1696	that	_	_	
+34-5	1697-1700	the	_	_	
+34-6	1701-1705	land	_	_	
+34-7	1706-1712	should	_	_	
+34-8	1713-1715	be	_	_	
+34-9	1716-1726	advertised	_	_	
+34-10	1727-1729	to	_	_	
+34-11	1730-1732	be	_	_	
+34-12	1733-1736	let	_	_	
+34-13	1737-1741	This	_	_	
+34-14	1742-1752	suggestion	_	_	
+34-15	1753-1756	was	_	_	
+34-16	1757-1764	adopted	_	_	
+34-17	1764-1765	,	_	_	
+34-18	1766-1769	and	_	_	
+34-19	1770-1773	the	_	_	
+34-20	1774-1778	Town	_	_	
+34-21	1779-1784	Clerk	_	_	
+34-22	1785-1788	was	_	_	
+34-23	1789-1797	directed	_	_	
+34-24	1798-1800	to	_	_	
+34-25	1801-1810	advertise	_	_	
+34-26	1811-1814	the	_	_	
+34-27	1815-1819	land	_	_	
+34-28	1820-1822	in	_	_	
+34-29	1823-1831	question	_	_	
+34-30	1832-1834	to	_	_	
+34-31	1835-1837	be	_	_	
+34-32	1838-1841	let	_	_	
+34-33	1842-1844	by	_	_	
+34-34	1845-1851	tender	_	_	
+34-35	1852-1854	or	_	_	
+34-36	1855-1864	otherwise	_	_	
+34-37	1864-1865	.	_	_	
+
+#Text=There was no other business before the Council.
+35-1	1866-1871	There	_	_	
+35-2	1872-1875	was	_	_	
+35-3	1876-1878	no	_	_	
+35-4	1879-1884	other	_	_	
+35-5	1885-1893	business	_	_	
+35-6	1894-1900	before	_	_	
+35-7	1901-1904	the	_	_	
+35-8	1905-1912	Council	_	_	
+35-9	1912-1913	.	_	_	
diff --git a/tests/sample_files/resources/news_datasets/topRes19th_v2/train/metadata.tsv b/tests/sample_files/resources/news_datasets/topRes19th_v2/train/metadata.tsv
new file mode 100644
index 00000000..1250256a
--- /dev/null
+++ b/tests/sample_files/resources/news_datasets/topRes19th_v2/train/metadata.tsv
@@ -0,0 +1,2 @@
+fname	word_count	ocr_quality_mean	ocr_quality_sd	issue_date	publication_code	publication_title	decade	place_publication	annotation_batch
+1218_Poole1860	347	0.8804	0.1506	1864-10-20	2325	The Poole and South-Western Herald, etc.	1860	Poole	2
diff --git a/tests/sample_files/resources/publication_metadata.json b/tests/sample_files/resources/publication_metadata.json
new file mode 100644
index 00000000..a9daace2
--- /dev/null
+++ b/tests/sample_files/resources/publication_metadata.json
@@ -0,0 +1,163 @@
+{
+    "sn83030483": {
+        "publication_title": "Gazette of the United-States",
+        "publication_place": "New York",
+        "publication_ctxt": "New York",
+        "publication_dates": "1789-1793",
+        "wikidata_qid": "Q60"
+    },
+    "sn84026272": {
+        "publication_title": "Gazette of the United-States",
+        "publication_place": "Philadelphia",
+        "publication_ctxt": "Pennsylvania",
+        "publication_dates": "1800-1801",
+        "wikidata_qid": "Q1345"
+    },
+    "sn82014385": {
+        "publication_title": "The Delaware gazette",
+        "publication_place": "Wilmington",
+        "publication_ctxt": "Delaware",
+        "publication_dates": "1809-1810",
+        "wikidata_qid": "Q174224"
+    },
+    "sn83026170": {
+        "publication_title": "Alexandria Gazette",
+        "publication_place": "Alexandria",
+        "publication_ctxt": "Virginia",
+        "publication_dates": "1817-1822",
+        "wikidata_qid": "Q88"
+    },
+    "sn83020874": {
+        "publication_title": "Cherokee Phoenix, and Indian's advocate",
+        "publication_place": "Echota",
+        "publication_ctxt": "Georgia",
+        "publication_dates": "1829-1834",
+        "wikidata_qid": "Q7007061"
+    },
+    "sn84020750": {
+        "publication_title": "The North Carolinian",
+        "publication_place": "Fayetteville",
+        "publication_ctxt": "North Carolina",
+        "publication_dates": "1839-1861",
+        "wikidata_qid": "Q331104"
+    },
+    "sn85042404": {
+        "publication_title": "Jamestown Alert",
+        "publication_place": "Jamestown",
+        "publication_ctxt": "North Dakota",
+        "publication_dates": "1878-1882",
+        "wikidata_qid": "Q1052658"
+    },
+    "sn88068010": {
+        "publication_title": "Chariton Courier",
+        "publication_place": "Keytesville",
+        "publication_ctxt": "Missouri",
+        "publication_dates": "1878-current",
+        "wikidata_qid": "Q957297"
+    },
+    "sn86063397": {
+        "publication_title": "The Elk Mountain pilot",
+        "publication_place": "Irwin",
+        "publication_ctxt": "Colorado",
+        "publication_dates": "1880-19??",
+        "wikidata_qid": "Q592729"
+    },
+    "sn88085488": {
+        "publication_title": "Pullman Herald",
+        "publication_place": "Pullman",
+        "publication_ctxt": "Washington",
+        "publication_dates": "1888-1989",
+        "wikidata_qid": "Q983540"
+    },
+    "sn89058133": {
+        "publication_title": "Putnam County Herald",
+        "publication_place": "Cookeville",
+        "publication_ctxt": "Tennessee",
+        "publication_dates": "1903-1922",
+        "wikidata_qid": "Q2456192"
+    },
+    "sn83025812": {
+        "publication_title": "The Independent",
+        "publication_place": "Elizabeth City",
+        "publication_ctxt": "North Carolina",
+        "publication_dates": "1908-1936",
+        "wikidata_qid": "Q1018467"
+    },
+    "sn92063852": {
+        "publication_title": "The Detroit Tribune",
+        "publication_place": "Detroit",
+        "publication_ctxt": "Michigan",
+        "publication_dates": "1935-1966",
+        "wikidata_qid": "Q12439"
+    },
+    "sn91068761": {
+        "publication_title": "Tabor City Tribune",
+        "publication_place": "Tabor City",
+        "publication_ctxt": "North Carolina",
+        "publication_dates": "1946-1991",
+        "wikidata_qid": "Q586130"
+    },
+    "0000408": {
+        "publication_title": "Dorset County Chronicle",
+        "publication_place": "Dorchester",
+        "publication_ctxt": "Dorset",
+        "publication_dates": "1824-1884",
+        "wikidata_qid": "Q503331"
+    },
+    "0000206": {
+        "publication_title": "Manchester Courier and Lancashire General Advertiser.",
+        "publication_place": "Manchester",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1825-1916",
+        "wikidata_qid": "Q18125"
+    },
+    "0000968": {
+        "publication_title": "The Ashton Weekly Reporter, and Stalybridge and Dukinfield Chronicle",
+        "publication_place": "Ashton-under-Lyne",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1855-",
+        "wikidata_qid": "Q659803"
+    },
+    "0000200": {
+        "publication_title": "The Manchester Mercury",
+        "publication_place": "Manchester",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1752-1830",
+        "wikidata_qid": "Q18125"
+    },
+    "0000201": {
+        "publication_title": "The Manchester Mercury",
+        "publication_place": "Manchester",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1752-1830",
+        "wikidata_qid": "Q18125"
+    },
+    "0000239": {
+        "publication_title": "The Manchester Mercury",
+        "publication_place": "Manchester",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1752-1830",
+        "wikidata_qid": "Q18125"
+    },
+    "0000240": {
+        "publication_title": "The Manchester Mercury",
+        "publication_place": "Manchester",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1752-1830",
+        "wikidata_qid": "Q18125"
+    },
+    "0000967": {
+        "publication_title": "Ashton and Stalybridge Reporter",
+        "publication_place": "Ashton-under-Lyne",
+        "publication_ctxt": "Lancashire",
+        "publication_dates": "1855-",
+        "wikidata_qid": "Q659803"
+    },
+    "0002325": {
+        "publication_title": "The Poole and South-Western Herald",
+        "publication_place": "Poole",
+        "publication_ctxt": "Dorset",
+        "publication_dates": "1852-1889",
+        "wikidata_qid": "Q203349"
+    }
+}
diff --git a/tests/sample_files/resources/wikidata/entity2class.txt b/tests/sample_files/resources/wikidata/entity2class.txt
new file mode 100644
index 00000000..ba7ec9bb
--- /dev/null
+++ b/tests/sample_files/resources/wikidata/entity2class.txt
@@ -0,0 +1 @@
+{"Q8577": "Q159821", "Q79348": "Q1093829", "Q83609": "Q3957", "Q170027": "Q3918", "Q482468": "Q15127012", "Q503516": "Q13410447", "Q547824": "Q852190", "Q729177": "Q570116", "Q801124": "Q55488", "Q871138": "Q3146899", "Q985210": "Q55488", "Q1545354": "Q44782", "Q1950928": "Q2154459", "Q2018322": "Q79007", "Q2460124": "Q532", "Q5645763": "Q494829", "Q7492719": "Q3252927", "Q7746609": "Q811979", "Q14875251": "Q4204495", "Q22059065": "Q17343829", "Q108940076": "Q498162", "Q8712": "Q94993988", "Q20075": "Q5503", "Q23311": "Q515", "Q42182": "Q570116", "Q122744": "Q2755753", "Q130206": "Q537127", "Q219867": "Q55488", "Q503424": "Q5341295", "Q734547": "Q7631958", "Q741640": "Q202570", "Q772421": "Q494230", "Q795678": "Q55488", "Q927198": "Q2202509", "Q1137312": "Q19953632", "Q1359589": "Q11635", "Q1394500": "Q7631958", "Q1399178": "Q32815", "Q1415441": "Q1248784", "Q1988417": "Q123705", "Q2354215": "Q82794", "Q3577611": "Q105731", "Q6669870": "Q11483816", "Q7443327": "Q2380335", "Q7492567": "Q56885635", "Q7492775": "Q9035798", "Q7492778": "Q55488", "Q15242653": "Q33506", "Q27985411": "Q123705", "Q8699": "Q94993988", "Q26888": "Q7897276", "Q209266": "Q15127012", "Q220198": "Q45400320", "Q518864": "Q751708", "Q774015": "Q1002812", "Q835031": "Q3917681", "Q897533": "Q1154710", "Q1431914": "Q644371", "Q1466941": "Q55488", "Q1749569": "Q2940297", "Q2306176": "Q15127012", "Q2365261": "Q1093829", "Q3061911": "Q1093829", "Q7492607": "Q738570", "Q7721041": "Q123705", "Q17509255": "Q79007", "Q17643392": "Q811979", "Q8703": "Q94993988", "Q42448": "Q515", "Q79869": "Q1093829", "Q123738": "Q22698", "Q193196": "Q4671277", "Q194209": "Q26132862", "Q649419": "Q55488", "Q733210": "Q26132862", "Q1184547": "Q498162", "Q1449564": "Q55488", "Q1666958": "Q59861107", "Q2277715": "Q3957", "Q2477346": "Q486972", "Q3228965": "Q383092", "Q4763489": "Q18917976", "Q4834838": "Q14350", "Q5038252": "Q486972", "Q5338273": "Q476028", "Q6515927": "Q618123", "Q6670323": "Q19953632", "Q24896243": "Q483110", "Q8709": "Q94993988", "Q10818": "Q217327", "Q23306": "Q180673", "Q123885": "Q45400320", "Q279459": "Q494829", "Q489255": "Q1093829", "Q951830": "Q464780", "Q1001456": "Q1093829", "Q1323689": "Q220505", "Q1488404": "Q811979", "Q1984238": "Q751708", "Q6515866": "Q55488", "Q6669759": "Q123705", "Q6671078": "Q938381", "Q6900329": "Q2380335", "Q7492566": "Q3257686", "Q84": "Q515", "Q8111": "Q159821", "Q55018": "Q24354", "Q124234": "Q2755753", "Q160302": "Q45400320", "Q195436": "Q207694", "Q238587": "Q207694", "Q278054": "Q3146899", "Q720102": "Q55488", "Q756819": "Q79007", "Q795691": "Q55485", "Q800751": "Q55488", "Q800753": "Q55488", "Q1137962": "Q868557", "Q4523493": "Q702492", "Q5011830": "Q14350", "Q5177618": "Q1137272", "Q6515805": "Q17343829", "Q6515934": "Q494829", "Q7492686": "Q494829", "Q14710970": "Q17343829", "Q60578265": "Q27990982", "Q79568": "Q1093829", "Q148349": "Q2755753", "Q205679": "Q7897276", "Q212883": "Q26132862", "Q214788": "Q55488", "Q216185": "Q4989906", "Q565521": "Q1802963", "Q1128631": "Q476028", "Q1187032": "Q1076486", "Q1402606": "Q842402", "Q2716505": "Q2755753", "Q3028626": "Q18917976", "Q3461415": "Q17343829", "Q4642035": "Q41176", "Q4801470": "Q2418495", "Q4834918": "Q14350", "Q4871546": "Q178561", "Q7594521": "Q1088552", "Q15179170": "Q5367899", "Q8691": "Q94993988", "Q8982": "Q1248784", "Q23298": "Q180673", "Q39121": "Q515", "Q92561": "Q515", "Q171240": "Q11691", "Q186309": "Q667018", "Q578794": "Q18608583", "Q743535": "Q1115575", "Q746876": "Q23413", "Q801125": "Q55488", "Q823917": "Q5341295", "Q1862179": "Q55488", "Q2422792": "Q1907114", "Q3365926": "Q486972", "Q4834926": "Q14350", "Q6669738": "Q17343829", "Q7242790": "Q132241", "Q7492565": "Q532", "Q7492568": "Q17343829", "Q7492570": "Q17343829", "Q12956644": "Q1002812", "Q14946379": "Q18917976", "Q20657974": "Q17343829", "Q21061609": "Q483110"}
\ No newline at end of file
diff --git a/tests/sample_files/resources/wikidata/mentions_to_wikidata.json b/tests/sample_files/resources/wikidata/mentions_to_wikidata.json
new file mode 100644
index 00000000..c4fa7b81
--- /dev/null
+++ b/tests/sample_files/resources/wikidata/mentions_to_wikidata.json
@@ -0,0 +1,183 @@
+{
+    "Edinburgh University": {
+        "Q160302": 202,
+        "Q5338273": 3
+    },
+    "London": {
+        "Q170027": 49,
+        "Q84": 15091,
+        "Q800751": 16,
+        "Q214788": 9,
+        "Q1488404": 2,
+        "Q14946379": 20,
+        "Q23311": 58,
+        "Q6900329": 6,
+        "Q92561": 288,
+        "Q171240": 13,
+        "Q279459": 5,
+        "Q795691": 2,
+        "Q734547": 1,
+        "Q2477346": 11,
+        "Q3061911": 27,
+        "Q8577": 6,
+        "Q1137312": 12,
+        "Q6670323": 2,
+        "Q8691": 23,
+        "Q1545354": 18,
+        "Q338466": 4,
+        "Q1988417": 4,
+        "Q578794": 6,
+        "Q1415441": 1,
+        "Q8111": 2,
+        "Q6669759": 6,
+        "Q985210": 6,
+        "Q219867": 9,
+        "Q795678": 1,
+        "Q7242790": 2,
+        "Q216185": 1,
+        "Q2018322": 1,
+        "Q720102": 6,
+        "Q23306": 39,
+        "Q42182": 1,
+        "Q1449564": 5,
+        "Q733210": 1,
+        "Q14710970": 8,
+        "Q2422792": 2,
+        "Q1001456": 17,
+        "Q503516": 2,
+        "Q8982": 1,
+        "Q22059065": 3,
+        "Q8712": 2,
+        "Q20657974": 4,
+        "Q565521": 1,
+        "Q238587": 1,
+        "Q2716505": 1,
+        "Q927198": 3,
+        "Q122744": 1,
+        "Q123738": 1,
+        "Q8703": 2,
+        "Q15179170": 1,
+        "Q10818": 2,
+        "Q1359589": 8,
+        "Q649419": 2,
+        "Q15242653": 1,
+        "Q20075": 6,
+        "Q6669738": 6,
+        "Q756819": 1,
+        "Q2354215": 2,
+        "Q7443327": 1,
+        "Q123885": 3,
+        "Q55018": 1,
+        "Q130206": 2,
+        "Q4642035": 1,
+        "Q729177": 2,
+        "Q1399178": 1,
+        "Q5645763": 1,
+        "Q194209": 1,
+        "Q801124": 6,
+        "Q7737135": 1,
+        "Q4834838": 1,
+        "Q17509255": 1,
+        "Q951830": 1,
+        "Q800753": 1,
+        "Q6671078": 3,
+        "Q186309": 1,
+        "Q148349": 1,
+        "Q212883": 1,
+        "Q195436": 1,
+        "Q5038252": 1,
+        "Q743535": 1,
+        "Q83609": 1,
+        "Q79348": 11,
+        "Q193196": 1,
+        "Q4801470": 1,
+        "Q220198": 1,
+        "Q124234": 1,
+        "Q23298": 1,
+        "Q1431914": 1,
+        "Q835031": 1,
+        "Q1323689": 2,
+        "Q7594521": 1,
+        "Q26888": 1,
+        "Q8709": 2,
+        "Q1402606": 1,
+        "Q278054": 1,
+        "Q801125": 1,
+        "Q205679": 1,
+        "Q6669870": 1,
+        "Q1666958": 1,
+        "Q5011830": 1,
+        "Q1394500": 1,
+        "Q772421": 1,
+        "Q1749569": 1,
+        "Q60578265": 2
+    },
+    "Leeds": {
+        "Q39121": 1503,
+        "Q1466941": 140,
+        "Q1128631": 21,
+        "Q774015": 18,
+        "Q503424": 30,
+        "Q1137962": 2,
+        "Q6515934": 3,
+        "Q4834918": 2,
+        "Q7721041": 1,
+        "Q482468": 14,
+        "Q2460124": 10,
+        "Q79869": 14,
+        "Q746876": 4,
+        "Q6515805": 4,
+        "Q3461415": 10,
+        "Q2365261": 15,
+        "Q21061609": 12,
+        "Q7746609": 1,
+        "Q14875251": 2,
+        "Q6515927": 1,
+        "Q5177618": 1,
+        "Q8699": 1,
+        "Q27985411": 1,
+        "Q6515866": 1,
+        "Q871138": 2,
+        "Q4763489": 1,
+        "Q4871546": 1,
+        "Q24896243": 1,
+        "Q1187032": 3,
+        "Q489255": 2,
+        "Q3228965": 1,
+        "Q209266": 8
+    },
+    "Sheffield": {
+        "Q42448": 1288,
+        "Q7492778": 10,
+        "Q7492565": 6,
+        "Q1862179": 116,
+        "Q823917": 24,
+        "Q4834926": 3,
+        "Q17643392": 1,
+        "Q2306176": 31,
+        "Q897533": 5,
+        "Q7492570": 5,
+        "Q1950928": 11,
+        "Q2277715": 11,
+        "Q79568": 16,
+        "Q518864": 8,
+        "Q7492591": 3,
+        "Q7492775": 1,
+        "Q741640": 1,
+        "Q7492686": 1,
+        "Q3577611": 1,
+        "Q12956644": 11,
+        "Q547824": 1,
+        "Q7492719": 1,
+        "Q7492566": 6,
+        "Q7492567": 3,
+        "Q4523493": 1,
+        "Q3028626": 1,
+        "Q7492607": 1,
+        "Q3365926": 4,
+        "Q7492568": 4,
+        "Q108940076": 3,
+        "Q1184547": 9,
+        "Q1984238": 8
+    }
+}
\ No newline at end of file
diff --git a/tests/sample_files/resources/wikidata/mentions_to_wikidata_normalized.json b/tests/sample_files/resources/wikidata/mentions_to_wikidata_normalized.json
new file mode 100644
index 00000000..22c19c13
--- /dev/null
+++ b/tests/sample_files/resources/wikidata/mentions_to_wikidata_normalized.json
@@ -0,0 +1,183 @@
+{
+    "Edinburgh University": {
+        "Q160302": 0.12235009085402786,
+        "Q5338273": 0.75
+    },
+    "London": {
+        "Q170027": 0.04149026248941575,
+        "Q84": 0.9822311897943244,
+        "Q800751": 0.042105263157894736,
+        "Q214788": 0.02122641509433962,
+        "Q1488404": 0.058823529411764705,
+        "Q14946379": 0.18518518518518517,
+        "Q23311": 0.033603707995365,
+        "Q6900329": 0.007211538461538462,
+        "Q92561": 0.3769633507853403,
+        "Q171240": 0.01906158357771261,
+        "Q279459": 0.07936507936507936,
+        "Q795691": 0.0045045045045045045,
+        "Q734547": 0.0028089887640449437,
+        "Q2477346": 0.8461538461538463,
+        "Q3061911": 0.6923076923076923,
+        "Q8577": 0.00495458298926507,
+        "Q1137312": 0.058252427184466014,
+        "Q6670323": 0.2222222222222222,
+        "Q8691": 0.026713124274099883,
+        "Q1545354": 0.18181818181818182,
+        "Q338466": 0.13333333333333333,
+        "Q1988417": 0.0975609756097561,
+        "Q578794": 0.03870967741935484,
+        "Q1415441": 0.014285714285714285,
+        "Q8111": 0.007352941176470588,
+        "Q6669759": 1.0,
+        "Q985210": 0.01759530791788856,
+        "Q219867": 0.025423728813559324,
+        "Q795678": 0.03333333333333333,
+        "Q7242790": 0.06896551724137931,
+        "Q216185": 0.002421307506053269,
+        "Q2018322": 0.03225806451612903,
+        "Q720102": 0.015789473684210527,
+        "Q23306": 0.04216216216216216,
+        "Q42182": 0.0010256410256410256,
+        "Q1449564": 0.7142857142857142,
+        "Q733210": 0.09090909090909091,
+        "Q14710970": 0.7272727272727273,
+        "Q2422792": 0.025974025974025976,
+        "Q1001456": 0.6071428571428571,
+        "Q503516": 0.07142857142857142,
+        "Q8982": 0.007352941176470588,
+        "Q22059065": 1.0,
+        "Q8712": 0.012121212121212121,
+        "Q20657974": 1.0,
+        "Q565521": 0.012345679012345678,
+        "Q238587": 0.0026595744680851063,
+        "Q2716505": 0.017857142857142856,
+        "Q927198": 0.01948051948051948,
+        "Q122744": 0.007462686567164179,
+        "Q123738": 0.0017667844522968198,
+        "Q8703": 0.0047169811320754715,
+        "Q15179170": 0.14285714285714285,
+        "Q10818": 0.008097165991902834,
+        "Q1359589": 0.004781829049611476,
+        "Q649419": 0.013157894736842105,
+        "Q15242653": 0.06666666666666667,
+        "Q20075": 0.004062288422477996,
+        "Q6669738": 1.0,
+        "Q756819": 0.003125,
+        "Q2354215": 0.004140786749482402,
+        "Q7443327": 0.0625,
+        "Q123885": 0.001547987616099071,
+        "Q55018": 0.0012531328320802004,
+        "Q130206": 0.007272727272727273,
+        "Q4642035": 0.16666666666666666,
+        "Q729177": 0.11764705882352941,
+        "Q1399178": 0.125,
+        "Q5645763": 0.14285714285714285,
+        "Q194209": 0.06666666666666667,
+        "Q801124": 0.015463917525773196,
+        "Q7737135": 0.16666666666666666,
+        "Q4834838": 0.006211180124223602,
+        "Q17509255": 0.09090909090909091,
+        "Q951830": 0.0041841004184100415,
+        "Q800753": 0.01020408163265306,
+        "Q6671078": 0.12,
+        "Q186309": 0.005813953488372093,
+        "Q148349": 0.0038314176245210726,
+        "Q212883": 0.3333333333333333,
+        "Q195436": 0.004366812227074236,
+        "Q5038252": 1.0,
+        "Q743535": 0.0017699115044247787,
+        "Q83609": 0.008,
+        "Q79348": 1.0,
+        "Q193196": 0.0008116883116883117,
+        "Q4801470": 0.024390243902439025,
+        "Q220198": 0.0055248618784530384,
+        "Q124234": 0.009615384615384616,
+        "Q23298": 0.0004050222762251924,
+        "Q1431914": 0.011904761904761904,
+        "Q835031": 0.0625,
+        "Q1323689": 0.016666666666666666,
+        "Q7594521": 0.16666666666666666,
+        "Q26888": 0.006756756756756757,
+        "Q8709": 0.007751937984496124,
+        "Q1402606": 0.0625,
+        "Q278054": 0.14285714285714285,
+        "Q801125": 0.005291005291005291,
+        "Q205679": 0.0036900369003690036,
+        "Q6669870": 0.05555555555555555,
+        "Q1666958": 0.03571428571428571,
+        "Q5011830": 0.25,
+        "Q1394500": 0.0026595744680851063,
+        "Q772421": 0.025,
+        "Q1749569": 0.16666666666666666,
+        "Q60578265": 0.2857142857142857
+    },
+    "Leeds": {
+        "Q39121": 0.9868680236375573,
+        "Q1466941": 0.8484848484848485,
+        "Q1128631": 0.020114942528735632,
+        "Q774015": 0.20930232558139533,
+        "Q503424": 0.06382978723404255,
+        "Q1137962": 0.004310344827586207,
+        "Q6515934": 0.375,
+        "Q4834918": 0.06060606060606061,
+        "Q7721041": 1.0,
+        "Q482468": 1.0,
+        "Q2460124": 0.6666666666666666,
+        "Q79869": 0.5833333333333333,
+        "Q746876": 0.08333333333333333,
+        "Q6515805": 0.5,
+        "Q3461415": 1.0,
+        "Q2365261": 1.0,
+        "Q21061609": 0.058536585365853655,
+        "Q7746609": 0.05263157894736842,
+        "Q14875251": 0.2,
+        "Q6515927": 0.1111111111111111,
+        "Q5177618": 0.14285714285714285,
+        "Q8699": 0.013888888888888888,
+        "Q27985411": 1.0,
+        "Q6515866": 0.3333333333333333,
+        "Q871138": 0.15384615384615385,
+        "Q4763489": 0.029411764705882353,
+        "Q4871546": 1.0,
+        "Q24896243": 0.25,
+        "Q1187032": 0.06382978723404255,
+        "Q489255": 0.007462686567164179,
+        "Q3228965": 0.022727272727272728,
+        "Q209266": 0.6666666666666666
+    },
+    "Sheffield": {
+        "Q42448": 0.9401459854014598,
+        "Q7492778": 0.3448275862068966,
+        "Q7492565": 1.0,
+        "Q1862179": 0.7341772151898734,
+        "Q823917": 0.0851063829787234,
+        "Q4834926": 0.08571428571428572,
+        "Q17643392": 0.05263157894736842,
+        "Q2306176": 0.5740740740740741,
+        "Q897533": 0.03731343283582089,
+        "Q7492570": 1.0,
+        "Q1950928": 0.6470588235294118,
+        "Q2277715": 0.7857142857142857,
+        "Q79568": 0.48484848484848486,
+        "Q518864": 0.7272727272727273,
+        "Q7492591": 0.30000000000000004,
+        "Q7492775": 0.2,
+        "Q741640": 0.25,
+        "Q7492686": 0.125,
+        "Q3577611": 0.1,
+        "Q12956644": 0.34375,
+        "Q547824": 0.047619047619047616,
+        "Q7492719": 1.0,
+        "Q7492566": 1.0,
+        "Q7492567": 1.0,
+        "Q4523493": 0.125,
+        "Q3028626": 0.08333333333333333,
+        "Q7492607": 0.030303030303030304,
+        "Q3365926": 0.5714285714285714,
+        "Q7492568": 1.0,
+        "Q108940076": 0.75,
+        "Q1184547": 0.8181818181818182,
+        "Q1984238": 0.6153846153846154
+    }
+}
\ No newline at end of file
diff --git a/tests/sample_files/resources/wikidata/wikidata_gazetteer.csv b/tests/sample_files/resources/wikidata/wikidata_gazetteer.csv
new file mode 100644
index 00000000..62f5ad99
--- /dev/null
+++ b/tests/sample_files/resources/wikidata/wikidata_gazetteer.csv
@@ -0,0 +1,174 @@
+wikidata_id,english_label,instance_of,alias_dict,nativelabel,hcounties,countries,latitude,longitude
+Q160302,University of Edinburgh,"['Q875538', 'Q45400320', 'Q2667285']","{'pl': ['Uniwersytet Edynburski', 'Uniwersytet w Edynburgu', 'University of Edinburgh'], 'gd': ['Oilthigh Dhùn Éideann', 'Oilthigh Dùn Eideann', 'Oilthigh Dun Eideann', 'University of Edinburgh', 'Oilthigh Dhùn Eideann', 'Oilthigh Dhùn Èideann'], 'es': ['Universidad de Edinburgo', 'Universidad de Edimburgo la Escuela de Medicina', 'Universidad de Edimburgo, la Escuela de Medicina', 'Universidad de Edinburgh', 'Escuela de Medicina de la Universidad de Edimburgo', 'University of Edinburgh', 'Universidad de Edimburgo'], 'ga': ['Ollscoil Dún Éideann', 'Ollscoil Dhún Éideann'], 'nl': ['University of Edinburgh', 'Edinburgh University', 'Universiteit van Edinburgh'], 'pt': ['University of Edinburgh', 'Oilthigh Dhùn Èideann', 'Universidade de Edinburgh', 'Universidade de Edimburgo'], 'tr': ['University of Edinburgh', 'Edinburg Üniversitesi', 'Edinburgh Üniversitesi'], 'fr': [""Université d'Edimbourg"", ""Université d'Édinbourg"", ""L'Université d'Édimbourg"", ""Université d'Edinburgh"", 'Université d’Édimbourg', ""Universite d'Edimbourg"", 'University of Edinburgh', 'Edinburgh University', ""université d'Édimbourg""], 'it': ['University of Edinburgh', 'Università di Edimburgo'], 'de': ['Edinburgh University', 'Universität Edinburgh', 'Universität von Edinburgh'], 'ro': ['University of Edinburgh', 'Universitatea Edinburgh', 'Universitatea din Edinburgh'], 'en': ['Edinburgh University', 'The University of Edinburgh', 'University of Edinburgh'], 'uk': ['Единбурзький університет'], 'sco': ['Varsity o Edinburgh'], 'cy': ['Prifysgol Caeredin'], 'en-ca': ['University of Edinburgh'], 'en-gb': ['University of Edinburgh'], 'kw': ['Pennskol Karedin']}","['University of Edinburgh', 'University o Edinburgh', 'Oilthigh Dhùn Èideann']",['Q67317221'],"{'Q145': ('', '')}",55.947389,-3.187194
+Q5338273,Edinburgh University A.F.C.,['Q476028'],"{'en': ['Edinburgh University A.F.C.'], 'es': ['Edinburgh University A.F.C.'], 'nl': ['Edinburgh University A.F.C.'], 'fr': ['Edinburgh University Association Football Club']}",,['Q67317221'],"{'Q145': ('', '')}",55.93175,-3.149911
+Q170027,University of London,"['Q3918', 'Q38723', 'Q45400320', 'Q5341295']","{'pl': ['Uniwersytet Londyński', 'University of London'], 'es': ['University of London', 'Universidad de Londres'], 'nl': ['University of london', 'London University', 'Universiteit van Londen'], 'pt': ['University of London', 'University college london', 'Universidade de Londres'], 'tr': ['University of London', 'Londra Üniversitesi'], 'fr': ['Universite de Londres', 'University of London', 'université de Londres'], 'it': ['London University', 'University of London', 'Università di Londra'], 'de': ['Uni London', 'London University', 'Universität von London', 'University of London'], 'ro': ['University of London', 'Universitatea din Londra'], 'en': ['London University', 'Lond.', 'University of London'], 'gd': ['Oilthigh Lunnainn'], 'uk': ['Лондонський університет'], 'cy': ['Prifysgol Llundain'], 'sco': ['Varsity o Lunnon'], 'ga': ['Ollscoil Londan'], 'en-ca': ['University of London'], 'en-gb': ['University of London'], 'en-us': ['University of London']}",['University of London'],['Q19186'],"{'Q145': ('', '')}",51.521111,-0.128889
+Q84,London,"['Q200250', 'Q1066984', 'Q515', 'Q1637706', 'Q208511', 'Q5119', 'Q174844', 'Q51929311']","{'en-gb': ['London, UK', 'London, United Kingdom', 'London, England', 'London'], 'en': ['London, UK', 'London, United Kingdom', 'London, England', 'London UK', 'London U.K.', 'Greater London', 'Londinium', 'Loñ', 'Lundenwic', 'Londinio', 'Londini', 'Londiniensium', 'Augusta', 'Trinovantum', 'Kaerlud', 'Karelundein', 'Lunden', 'Big Smoke', 'the Big Smoke', 'Lundenburh', 'Lundenburgh', 'Llyn Dain', 'Llan Dian', 'Londinion', 'Loniniensi', 'Lon.', 'Loñ.', 'Lond.', 'London'], 'es': ['Londres (Reino Unido)', 'Londres (Inglaterra)', 'Greater London', 'London, UK', 'Londres'], 'fr': ['London', 'Londres'], 'nl': ['Londen, VK', 'Londen, Verenigd Koninkrijk', 'Londen, Engeland', 'Londen'], 'ga': ['Doirelondain', 'Londain'], 'pt': ['Londres, Reino Unido', 'Londres, Inglaterra', 'Londres, UK', 'Londres, GBR', 'Londres'], 'it': ['Londra'], 'pl': ['Londyn'], 'de': ['London'], 'en-ca': ['London'], 'sco': ['Lunnon'], 'cy': ['Llundain'], 'gd': ['Lunnainn'], 'kw': ['Loundres'], 'ro': ['Londra'], 'tr': ['Londra'], 'uk': ['Лондон']}","['London', 'Llundain', 'Lunnainn', 'Lunnon']","['Q19186', 'Q67443130', 'Q67479626', 'Q67442940', 'Q67532100']","{'Q2277': ('0047', '0410'), 'Q110888': ('0500', '0730'), 'Q105092': ('0730', '0918'), 'Q105313': ('0918', '0927'), 'Q179876': ('0927', '1707'), 'Q161885': ('1707', '1800'), 'Q174193': ('1801', '1922'), 'Q145': ('1922', '')}",51.507222,-0.1275
+Q800751,Euston Station,"['Q55488', 'Q55485']","{'pt': ['Estação de Euston (Metro de Londres)', 'Estação de Euston', 'Euston (Metropolitano de Londres)'], 'de': ['Euston station', 'Euston'], 'it': ['Stazione di Euston', 'stazione di Londra Euston'], 'en': ['London Euston', 'Euston railway station', 'Euston station', 'Euston Station'], 'es': ['Estacion de Euston', 'Estación de Euston'], 'ro': ['Londra Euston', 'Gara Londra Euston', 'gara Euston'], 'fr': ['Euston'], 'nl': ['Station London Euston'], 'cy': ['Gorsaf reilffordd Euston'], 'pl': ['Euston Station'], 'en-ca': ['Euston railway station'], 'en-gb': ['Euston railway station'], 'tr': ['Euston Tren İstasyonu'], 'uk': ['Юстон'], 'ga': ['Stáisiún Euston']}",,['Q19186'],"{'Q145': ('', '')}",51.5284,-0.1331
+Q214788,London Paddington station,"['Q55488', 'Q55485', 'Q1793804', 'Q20202072']","{'fr': ['gare de Paddington', 'gare de Londres Paddington', 'London Paddington'], 'es': ['Paddington', 'Estacion de Paddington', 'Estación de Paddington'], 'it': ['Stazione di Paddington', 'stazione di Londra Paddington'], 'de': ['Paddington Station', 'Paddington railway station', 'Bahnhof Paddington'], 'nl': ['Station Paddington', 'Paddington station', 'Station London Paddington'], 'pt': ['Estação de Paddington', 'London Paddington', 'Estação de London Paddington', 'Estação Paddington'], 'en': ['Paddington station', 'Paddington Railway station', ""Paddington (Bishop's Road) station"", 'Paddington', 'London Paddington', 'Paddington Bear Station', 'London Paddington station'], 'ro': ['Londra Paddington', 'gara Paddington', 'gara Londra Paddington'], 'cy': ['Gorsaf Paddington Llundain', 'Gorsaf reilffordd Paddington Llundain'], 'tr': ['Paddington Tren İstasyonu', ""Paddington (Bishop's Road) Tren İstasyonu"", 'Londra Paddington Tren İstasyonu'], 'pl': ['Paddington station'], 'uk': ['Паддінгтон'], 'en-ca': ['London Paddington station'], 'en-gb': ['London Paddington station']}",,['Q19186'],"{'Q145': ('', '')}",51.516667,-0.177222
+Q1488404,London Docks,['Q811979'],"{'de': ['London Docks'], 'en': ['London Docks'], 'fr': ['Docks de Londres']}",,['Q19186'],"{'Q145': ('', '')}",51.506,-0.060333
+Q14946379,Diocese of London,['Q18917976'],"{'it': ['diocesi di Londra', 'diocesi anglicana di Londra'], 'es': ['diócesis de Londres', 'diócesis anglicana de Londres'], 'en': ['Diocese of London'], 'en-ca': ['Diocese of London'], 'en-gb': ['Diocese of London'], 'de': ['Diözese London'], 'fr': ['diocèse de Londres'], 'pt': ['Diocese de Londres'], 'pl': ['Diecezja londyńska'], 'cy': ['Esgobaeth Llundain'], 'nl': ['Bisdom Londen']}",,[],"{'Q145': ('', '')}",51.5138,-0.0986
+Q23311,City of London,"['Q515', 'Q1066984', 'Q738570', 'Q180673', 'Q17601336', 'Q21503295', 'Q7897276']","{'en': ['the City', 'Square Mile', 'City and County of the City of London', 'City of London (unparished area)', 'London', 'City of London'], 'fr': ['La City', 'City of London', 'Cité de Londres', 'cité de Londres'], 'de': ['London', 'City of London'], 'it': ['La City', 'City of London', 'City di Londra', 'Città di Londra'], 'en-gb': ['the City', 'City and County of the City of London', 'City of London'], 'cy': ['y Filltir Sgwâr', 'Dinas Llundain'], 'en-ca': ['City of London'], 'es': ['City de Londres'], 'ga': ['Cathair Londan'], 'nl': ['City of London'], 'pl': ['City of London'], 'pt': ['Cidade de Londres'], 'ro': ['City of London'], 'sco': ['Ceety o Lunnon'], 'tr': ['Londra Şehri'], 'uk': ['Лондонське Сіті']}",['City of London'],['Q19186'],"{'Q145': ('', '')}",51.515556,-0.093056
+Q6900329,The Blitz,['Q2380335'],"{'en': ['Blitz', 'London Blitz', 'The Blitz'], 'cy': ['Y Blitz'], 'es': ['Blitz'], 'pt': ['Blitz'], 'tr': ['The Blitz'], 'ro': ['The Blitz'], 'pl': ['Blitz'], 'fr': ['Blitz'], 'de': ['The Blitz'], 'en-ca': ['The Blitz'], 'en-gb': ['The Blitz'], 'ga': ['An Bhleaist'], 'sco': ['The Blitz'], 'nl': ['The Blitz'], 'it': ['The Blitz'], 'uk': ['Бліц']}",,[],"{'Q145': ('', '')}",51.506944,-0.1275
+Q92561,London,"['Q6593035', 'Q14762300', 'Q515', 'Q1549591']","{'en': ['London, ON', 'London, Ontario', 'London'], 'fr': ['London, Ontario', 'London'], 'es': ['London (Ontario)', 'Londres (Ontario)', 'London'], 'en-gb': ['London'], 'de': ['London'], 'it': ['London'], 'nl': ['London'], 'pl': ['London'], 'pt': ['London'], 'uk': ['Лондон'], 'tr': ['London'], 'ga': ['Londain, Ontario'], 'cy': ['Llundain'], 'gd': ['Lunnainn'], 'ro': ['London']}",,[],"{'Q16': ('', '')}",42.9837,-81.2497
+Q171240,London Stock Exchange,['Q11691'],"{'pl': ['Londyńska Giełda Papierów Wartościowych', 'Giełda Papierów Wartościowych w Londynie', 'London Stock Exchange'], 'fr': ['London Stock Exchange', 'bourse de Londres'], 'es': ['London Stock Exchange', 'Bolsa de Valores de Londres', 'Bolsa de Londres'], 'it': ['Borsa di Londra', 'London Stock Exchange'], 'de': ['Londoner Börse', 'London Stock Exchange Group', 'London Stock Exchange'], 'nl': ['Londense beurs', 'London Stock Exchange'], 'pt': ['Bolsa de Londres', 'London stock exchange', 'Bolsa de Valores de Londres'], 'tr': ['Londra Menkul Değerler Borsası', 'London Stock Exchange', 'Londra Borsası'], 'ro': ['Bursa de Valori din Londra', 'Bursa de la Londra', 'Bursa londoneză', 'Bursa din Londra', 'London Stock Exchange'], 'en': ['London Stock Exchange, LSE', 'London Stock Exchange'], 'uk': ['Лондонська фондова біржа'], 'cy': ['Cyfnewidfa Stoc Llundain'], 'sco': ['Lunnon Stock Exchynge'], 'en-ca': ['London Stock Exchange'], 'en-gb': ['London Stock Exchange']}",,['Q19186'],"{'Q145': ('', '')}",51.515065,-0.098972
+Q279459,Victoria Coach Station,['Q494829'],"{'fr': ['Gare routière Victoria', 'Victoria Coach Station'], 'de': ['Victoria Coach Station'], 'en': ['Victoria Coach Station'], 'es': ['Victoria Coach Station'], 'it': ['autostazione di Victoria'], 'pl': ['Victoria Coach Station'], 'nl': ['Victoria Coach Station'], 'en-gb': ['Victoria Coach Station'], 'pt': ['Estação rodoviária Victoria']}",,['Q19186'],"{'Q145': ('', '')}",51.49316,-0.14864
+Q795691,London Waterloo station,"['Q18543139', 'Q55485']","{'es': ['Waterloo station', 'Estacion de Waterloo', 'Estacion Waterloo', 'London Waterloo', 'Estación Waterloo', 'London Waterloo station', 'Estación de Waterloo'], 'nl': ['Station London Waterloo Station', 'Waterloo Station', 'London Waterloo', 'Station London Waterloo'], 'en': ['Waterloo station', 'Waterloo railway station', 'Waterloo train station', 'London Waterloo station'], 'de': ['Waterloo Bridge', 'Waterloo'], 'it': ['stazione di Waterloo', 'stazione di Londra Waterloo'], 'pt': ['Estação Waterloo'], 'uk': ['Ватерлоо'], 'en-ca': ['London Waterloo station'], 'en-gb': ['London Waterloo station'], 'pl': ['Waterloo Station'], 'cy': ['Gorsaf Waterloo Llundain'], 'fr': ['gare de Londres-Waterloo'], 'ga': ['Stáisiún Londain Waterloo'], 'tr': ['Waterloo İstasyonu']}",,['Q67443130'],"{'Q145': ('', '')}",51.5031,-0.1132
+Q734547,North London,['Q7631958'],"{'en': ['London/North', 'North London'], 'nl': ['Londen/North', 'Noord-Londen'], 'fr': ['North London'], 'it': ['North London'], 'pt': ['North London'], 'en-ca': ['North London'], 'en-gb': ['North London'], 'cy': ['Gogledd Lundain'], 'es': ['Norte de Londres'], 'ga': ['Londain Thuaidh']}",,['Q67479626'],"{'Q145': ('', '')}",51.54962,-0.167614
+Q2477346,London,['Q486972'],"{'fr': ['Londres', 'Ronton', 'London'], 'es': ['Londres', 'Ronton', 'London'], 'it': ['Ronton', 'London'], 'en': ['Ronton', 'London'], 'pt': ['Ronton', 'London'], 'pl': ['London'], 'nl': ['London'], 'de': ['London']}",['London'],[],"{'Q710': ('', '')}",1.983333,-157.475
+Q3061911,London,['Q1093829'],"{'en': ['London, Kentucky', 'London, KY', 'London'], 'tr': ['London, Kentucky', 'London'], 'es': ['Londres, Kentucky', 'London'], 'nl': ['London'], 'pl': ['London'], 'pt': ['London'], 'de': ['London'], 'fr': ['London'], 'it': ['London'], 'cy': ['London'], 'uk': ['Лондон'], 'ga': ['London']}",,[],"{'Q30': ('', '')}",37.1275,-84.0842
+Q8577,2012 Summer Olympics,['Q159821'],"{'en': ['London 2012', 'Games of the XXX Olympiad', '2012 Olympics', '2012 London Olympics', 'London Olympics', 'London Olympic Games', '2012 Summer Olympics'], 'it': ['Londra 2012', 'Giochi della XXX Olimpiade'], 'de': ['London2012', 'Olympia 2012', 'London 2012', 'Olympische Spiele 2012', 'Spiele der XXX. Olympiade', 'XXX. Olympische Sommerspiele', 'Olympische Sommerspiele 2012'], 'pt': ['Londres 2012', 'Jogos da XXX Olimpíada', 'Jogos Olímpicos de 2012', 'Jogos Olímpicos de Verão de 2012'], 'es': ['Londres 2012', 'Juegos de la XXX Olimpiada', 'Juegos Olímpicos de Londres 2012'], 'fr': ['Londres 2012', 'Jeux olympiques 2012', 'Jeux de la XXXe olympiade', ""Jeux olympiques d'été de 2012""], 'cy': ['Gemau Olympaidd yr Haf 2012'], 'en-ca': ['2012 Summer Olympics'], 'en-gb': ['2012 Summer Olympics'], 'ga': ['Cluichí Oilimpeacha an tSamhraidh 2012'], 'nl': ['Olympische Zomerspelen 2012'], 'pl': ['Letnie Igrzyska Olimpijskie 2012'], 'ro': ['Jocurile Olimpice de vară din 2012'], 'sco': ['2012 Simmer Olympics'], 'tr': ['2012 Yaz Olimpiyatları'], 'uk': ['Літні Олімпійські ігри 2012']}",,[],"{'Q145': ('', '')}",51.538611,-0.016389
+Q1137312,County of London,"['Q67376938', 'Q19953632', 'Q2560047', 'Q180673', 'Q21272231']","{'it': ['County of London', 'contea di Londra'], 'fr': ['comté de Londres'], 'en': ['County of London'], 'nl': ['Graafschap Londen'], 'pt': ['Condado de Londres'], 'de': ['County of London'], 'es': ['condado de Londres'], 'pl': ['County of London'], 'uk': ['Лондонське графство'], 'cy': ['Sir Llundain'], 'en-gb': ['County of London']}",,['Q19186'],"{'Q145': ('', '')}",51.5155,-0.0922
+Q6670323,"London District, Upper Canada",['Q19953632'],"{'en': ['London District, Upper Canada']}",,[],"{'Q16': ('', '')}",43.4,-81.2
+Q8691,Heathrow Airport,"['Q644371', 'Q94993988', 'Q43229']","{'en': ['Heathrow', 'London Heathrow Airport', 'London Heathrow', ""London's Heathrow Airport"", 'Heathrow Airport'], 'de': ['Heathrow', 'Flughafen London Heathrow'], 'it': ['Aeroporto di Heathrow', 'Heathrow', 'Aeroporto di Londra-Heathrow'], 'fr': ['Londres Heathrow', 'aéroport de Londres Heathrow'], 'nl': ['London Heathrow Airport', 'Londen Heathrow', 'Luchthaven Heathrow', 'Luchthaven Londen Heathrow'], 'cy': ['Maes Awyr Heathrow'], 'es': ['Aeropuerto de Londres-Heathrow'], 'ga': ['Aerfort Londain-Heathrow'], 'pl': ['Port lotniczy Londyn-Heathrow'], 'pt': ['Aeroporto de Londres Heathrow'], 'ro': ['Aeroportul Londra Heathrow'], 'tr': ['Heathrow Havalimanı'], 'uk': ['Хітроу']}",['London Heathrow Airport'],['Q19186'],"{'Q145': ('', '')}",51.4775,-0.461389
+Q1545354,Port of London,"['Q44782', 'Q863915', 'Q15310171']","{'de': ['Londoner Hafen'], 'en': ['Port of London'], 'es': ['Puerto de Londres'], 'fr': ['port de Londres'], 'it': ['porto di Londra'], 'nl': ['haven van Londen'], 'en-ca': ['Port of London'], 'en-gb': ['Port of London'], 'cy': ['Porthladd Llundain']}",['Port of London'],['Q67442940'],"{'Q145': ('', '')}",51.5,0.05
+Q338466,Anglo-Saxon London,,"{'en': ['Anglo-Saxon London'], 'it': ['Londra anglosassone']}",,[],"{'Q145': ('', '')}",51.5125,-0.1225
+Q1988417,Chinatown,"['Q202509', 'Q2755753', 'Q123705']","{'es': ['Chinatown de Londres', 'Barrio Chino de Londres', 'Chinatown'], 'nl': ['London Chinatown', 'Londen Chinatown', 'Chinatown'], 'en': ['London Chinatown', 'Chinatown, London', 'China Town', 'China Town, London', 'Chinatown'], 'fr': ['Chinatown'], 'uk': ['Чайна-таун'], 'it': ['Chinatown'], 'ga': ['Ceantar Síneach'], 'de': ['Chinatown'], 'en-gb': ['Chinatown']}",,['Q19186'],"{'Q145': ('', '')}",51.511111,-0.131389
+Q578794,London Marathon,"['Q40244', 'Q18608583']","{'es': ['Maraton de Londres', 'Maratón de Londres'], 'de': ['London-Marathon'], 'en': ['London Marathon'], 'fr': ['Marathon de Londres'], 'it': ['Maratona di Londra'], 'nl': ['Marathon van Londen'], 'pt': ['Maratona de Londres'], 'tr': ['Londra Maratonu'], 'cy': ['Marathon Llundain'], 'uk': ['Лондонський марафон'], 'pl': ['Maraton w Londynie'], 'en-ca': ['London Marathon'], 'en-gb': ['London Marathon']}",,[],"{'Q145': ('', '')}",51.472778,0.009444
+Q1415441,London Southend Airport,"['Q1248784', 'Q94993988']","{'de': ['Southend Airport', 'London Southend Airport'], 'en': ['Southend', 'London Southend Airport'], 'fr': ['aérodrome de Royaume Uni', 'Southend', 'aéroport de Londres Southend'], 'nl': ['London Southend Airport', 'Luchthaven London Southend'], 'pl': ['Port lotniczy Londyn-Southend'], 'it': ['Aeroporto di Londra-Southend'], 'es': ['Aeropuerto de Londres-Southend'], 'tr': ['Londra Southend Havalimanı'], 'uk': ['Лондон-Саутенд'], 'ro': ['Aeroportul Londra Southend']}",,['Q67442940'],"{'Q145': ('', '')}",51.570278,0.693333
+Q8111,1908 Summer Olympics,['Q159821'],"{'en': ['London 1908', 'Games of the IV Olympiad', '1908 Summer Olympics'], 'it': ['Londra 1908', 'Giochi della IV Olimpiade'], 'fr': ['Londres 1908', ""Jeux olympiques d'été de 1908""], 'es': ['Juegos de Londres 1908', 'Juegos Olímpicos de Londres 1908'], 'nl': ['Zomerspelen 1908', 'Olympische Zomerspelen 1908'], 'de': ['Olympische Sommerspiele 1908'], 'cy': ['Gemau Olympaidd yr Haf 1908'], 'en-ca': ['1908 Summer Olympics'], 'en-gb': ['1908 Summer Olympics'], 'pl': ['Letnie Igrzyska Olimpijskie 1908'], 'pt': ['Jogos Olímpicos de Verão de 1908'], 'ro': ['Jocurile Olimpice de vară din 1908'], 'tr': ['1908 Yaz Olimpiyatları'], 'uk': ['Літні Олімпійські ігри 1908'], 'ga': ['Cluichí Oilimpeacha an tSamhraidh 1908'], 'sco': ['1908 Simmer Olympics']}",,[],"{'Q174193': ('', '')}",51.51362,-0.2274
+Q6669759,London,['Q123705'],"{'en': ['London'], 'nl': ['London'], 'es': ['London']}",,[],"{'Q403': ('', '')}",44.808495,20.463161
+Q985210,London Victoria station,"['Q55488', 'Q55485', 'Q7886778']","{'en': ['Victoria station', 'Victoria railway station', 'Victoria Railway Station The Former London, Chatham And Dover Railway Station Including Train Shed', 'London Victoria station'], 'es': ['estación Victoria', 'London Victoria', 'estacion de Victoria', 'estacion Victoria', 'estación de Victoria'], 'ro': ['gara London Victoria', 'gara Victoria', 'gara Londra Victoria'], 'it': ['stazione di Victoria', 'stazione di Londra Victoria'], 'de': ['London Victoria Station'], 'cy': ['Gorsaf reilffordd Victoria Llundain'], 'fr': ['gare de Londres Victoria'], 'nl': ['Station London Victoria'], 'pl': ['Victoria Station'], 'pt': ['Estação Victoria'], 'en-ca': ['London Victoria station'], 'en-gb': ['London Victoria station'], 'uk': ['Лондон-Вікторія'], 'tr': ['Victoria İstasyonu']}",,['Q19186'],"{'Q145': ('', '')}",51.495005,-0.143577
+Q219867,London King's Cross railway station,"['Q55488', 'Q55485', 'Q22808404']","{'fr': ['gare de King’s Cross', ""gare de Londres-King's Cross""], 'es': [""Estacion de King's Cross St Pancras"", 'Estacion de Kings Cross', 'Estación de Kings Cross St. Pancras', 'King Cross', 'Kings Cross', 'Estacion de Kings Cross St. Pancras', ""Estación de King's Cross"", 'Estacion de Kings Cross St Pancras', ""King's Cross"", ""Estacion de King's Cross St. Pancras"", ""Estación de King's Cross St. Pancras"", 'Estación de Kings Cross'], 'it': [""Stazione di King's Cross"", ""stazione di Londra King's Cross""], 'de': [""King's Cross"", ""Bahnhof King's Cross"", 'King’s Cross', 'King’s Cross Station', ""King's Cross Station"", 'Bahnhof Kings Cross', 'London King’s Cross Station', 'Bahnhof King’s Cross'], 'nl': [""Station King's Cross"", 'Station King’s Cross', ""King's Cross Station"", ""Station London King's Cross""], 'pt': ['Plataforma Nove e Meia', ""Estação King's Cross"", 'Plataforma Nove e Três Quartos', 'King´s Cross', ""King's Cross"", ""London King's Cross"", ""Estação de King's Cross""], 'en': [""King's Cross Railway Station"", ""King's Cross station"", ""London King's Cross railway station""], 'pl': [""King's Cross Station""], 'uk': ['Кінгс-Кросс'], 'cy': [""Gorsaf reilffordd King's Cross Llundain""], 'ro': [""gara King's Cross""], 'tr': [""King's Cross Tren İstasyonu""], 'en-ca': [""London King's Cross railway station""], 'en-gb': [""London King's Cross railway station""]}","[""King's Cross station""]",['Q19186'],"{'Q145': ('', '')}",51.530889,-0.123306
+Q795678,Waterloo International railway station,"['Q55488', 'Q55485']","{'en': ['Waterloo International station', 'Waterloo International railway station'], 'de': ['London Waterloo Station'], 'nl': ['station Waterloo International']}",,['Q67443130'],"{'Q145': ('', '')}",51.502972,-0.114808
+Q7242790,Pride London,"['Q51404', 'Q11483816', 'Q132241']","{'en': ['Pride in London', 'Pride London'], 'tr': ['Pride London'], 'it': ['Pride London'], 'uk': ['Лондонський прайд'], 'es': ['Orgullo de Londres']}",,['Q19186'],"{'Q145': ('', '')}",51.518334,-0.14401
+Q216185,Charing Cross,"['Q4989906', 'Q2755753']","{'pl': ['Charing Cross'], 'fr': ['Charing Cross'], 'es': ['Charing Cross'], 'it': ['Charing Cross'], 'de': ['Charing Cross'], 'en': ['Charing Cross'], 'nl': ['Charing Cross'], 'pt': ['Charing Cross'], 'cy': ['Charing Cross'], 'en-ca': ['Charing Cross'], 'en-gb': ['Charing Cross'], 'ga': ['Charing Cross'], 'uk': ['Чарінг-Кросс'], 'tr': ['Charing Cross'], 'sco': ['Charing Cross']}",,['Q19186'],"{'Q145': ('', '')}",51.5073,-0.12755
+Q2018322,Old Compton Street,['Q79007'],"{'cy': ['Old Compton Street'], 'en': ['Old Compton Street'], 'de': ['Old Compton Street'], 'nl': ['Old Compton Street'], 'en-ca': ['Old Compton Street'], 'en-gb': ['Old Compton Street'], 'fr': ['Old Compton Street'], 'uk': ['Олд Комптон стріт']}",,['Q19186'],"{'Q145': ('', '')}",51.51326,-0.13128
+Q720102,St Pancras railway station,"['Q55488', 'Q2298537', 'Q55485', 'Q1402443']","{'pl': ['St Pancras Station', 'St Pancras International'], 'fr': ['gare de Londres-Saint-Pancras', 'gare de Saint-Pancras', 'St Pancras International'], 'es': ['Estacion de St Pancras', 'Estacion de St. Pancras', 'St Prancas', 'Estación de St Pancras', 'Estación de St. Pancras'], 'it': ['stazione di St Pancras', 'stazione di Londra St. Pancras', 'stazione di St. Pancras', 'stazione di Londra Saint Pancras', 'stazione di Saint Pancras', 'stazione di Londra St Pancras'], 'cy': ['St Pancras', 'Gorsaf reilffordd St Pancras', 'Gorsaf reilffordd St Pancras Llundain'], 'de': ['Bahnhof St. Pancras', 'London St Pancras', 'St Pancras Station', 'Bahnhof St Pancras'], 'en': ['St Pancras station', 'London St Pancras', 'St Pancras International', 'London St Pancras International', 'St Pancras railway station'], 'nl': ['St. Pancras International', 'St Pancras International', 'Station London St. Pancras', 'Station London St Pancras', 'station London St Pancras International'], 'uk': ['Сент-Панкрас'], 'tr': ['St Pancras Uluslararası Tren İstasyonu'], 'ro': ['gara St Pancras'], 'pt': ['Estação St Pancras']}",['St Pancras railway station'],['Q19186'],"{'Q145': ('', '')}",51.53,-0.125278
+Q23306,Greater London,['Q180673'],"{'de': ['Groß-London', 'Großraum London', 'Greater London'], 'pt': ['Região de Londres', 'Grande Londres'], 'en': ['London Region', 'Greater London'], 'en-gb': ['Greater London'], 'en-ca': ['Greater London'], 'cy': ['Llundain Fawr'], 'es': ['Gran Londres'], 'fr': ['Grand Londres'], 'ga': ['Londain Mhór'], 'it': ['Grande Londra'], 'nl': ['Groot-Londen'], 'pl': ['Wielki Londyn'], 'ro': ['Londra Mare'], 'sco': ['Greater Lunnon'], 'uk': ['Великий Лондон'], 'tr': ['Büyük Londra']}",['Greater London'],[],"{'Q145': ('', '')}",51.5,-0.083333
+Q42182,Buckingham Palace,"['Q53536964', 'Q2087181', 'Q570116', 'Q7328910', 'Q16884952']","{'pl': ['Buckingham Palace', 'Pałac Buckingham'], 'es': ['Buckingham Palace', 'Palacio de Buckingham'], 'pt': ['Buckingham House', 'Buckingham Palace', 'Palácio de Buckingham'], 'fr': ['Buckingham Palace', 'palais de Buckingham'], 'it': ['Buckingham House', 'Bukingam palace', 'Palazzo Di Buckingham', 'Buckingham Palace', 'Palazzo di Buckingham'], 'de': ['Buckinghampalast', 'Buckingham Palast', 'Buckingham-Palast', 'Buckingham Palace'], 'cy': ['Buckingham Palace', 'Palas Bycingam', 'Palas Buckingham'], 'en': ['Buckingham House', 'Buck House', 'Buckingham Palace'], 'ro': ['Casa de Buckingham', 'Buck House', 'Buckingham House', 'Palatul Buckingham'], 'ga': ['Pálás Buckingham'], 'nl': ['Buckingham Palace'], 'tr': ['Buckingham Sarayı'], 'uk': ['Букінгемський палац'], 'en-ca': ['Buckingham Palace'], 'en-gb': ['Buckingham Palace'], 'sco': ['Buckingham Palace'], 'gd': ['Lùchairt Buckingham']}",['Buckingham Palace'],['Q19186'],"{'Q145': ('', '')}",51.501,-0.142
+Q1449564,London station (Ontario),['Q55488'],"{'en': ['London, Ontario railway station', 'London station', 'London railway station', 'VIA Rail London station', 'London station (Ontario)'], 'fr': ['gare de London', 'London'], 'it': ['stazione di London'], 'es': ['Estación de London']}",,[],"{'Q16': ('', '')}",42.9813,-81.2467
+Q733210,basketball at the 1948 Summer Olympics,['Q26132862'],"{'fr': ['Basket-ball aux jeux Olympiques de 1948', ""Basket-ball aux jeux Olympiques d'été 1948"", ""Basket-ball aux jeux Olympiques d'ete de 1948"", ""basket-ball aux Jeux olympiques d'été de 1948""], 'ro': ['Baschet la Jocurile Olimpice din 1948', 'Baschet la Jocurile Olimpice de vară din 1948'], 'nl': ['Olympische Zomerspelen 1948/Basketbal', 'basketbal op de Olympische Zomerspelen 1948'], 'pt': ['Basquetebol nos Jogos Olímpicos de Verão de 1948'], 'pl': ['Koszykówka na Letnich Igrzyskach Olimpijskich 1948'], 'en': ['basketball at the 1948 Summer Olympics'], 'es': ['Anexo:Baloncesto en los Juegos Olímpicos de Londres 1948'], 'tr': [""1948 Yaz Olimpiyatları'nda basketbol""], 'it': ['Pallacanestro ai Giochi della XIV Olimpiade'], 'de': ['Olympische Sommerspiele 1948/Basketball'], 'ga': ['cispheil ag Cluichí Oilimpeacha an tSamhraidh 1948']}",,['Q19186'],"{'Q145': ('', '')}",51.576256,-0.097697
+Q14710970,London,['Q17343829'],"{'en': ['London, Texas', 'London, TX', 'London'], 'fr': ['London'], 'it': ['London']}",,[],"{'Q30': ('', '')}",30.6769,-99.5764
+Q2422792,London commuter belt,['Q1907114'],"{'fr': ['London commuter belt', 'Aire métropolitaine de Londres', 'Commuter Belt', 'Aire urbaine de Londres'], 'es': ['London commuter belt', 'Area metropolitana sureste de Inglaterra', 'Area metropolitana de Londres', 'Área metropolitana sureste de Inglaterra', 'Área metropolitana de Londres'], 'en': ['London Metropolitan Region', 'London Metropolitan Area', 'London commuter belt'], 'nl': ['Metropoolregio van Londen', 'London commuter belt'], 'pt': ['Área metropolitana de Londres'], 'sco': ['Lunnon commuter belt'], 'it': ['area metropolitana di Londra']}",,['Q19186'],"{'Q145': ('', '')}",51.5073,-0.1277
+Q1001456,London,"['Q1093829', 'Q62049']","{'tr': ['London, Ohio', 'London'], 'en': ['London, Ohio', 'London, OH', 'London'], 'es': ['London (Ohio)', 'London'], 'pt': ['London'], 'pl': ['London'], 'fr': ['London'], 'de': ['London'], 'nl': ['London'], 'en-ca': ['London, Ohio'], 'en-gb': ['London'], 'it': ['London'], 'uk': ['Лондон'], 'cy': ['London, Ohio'], 'ga': ['London']}",,[],"{'Q30': ('', '')}",39.887466,-83.445041
+Q503516,Laurel County,['Q13410447'],"{'en': ['Laurel County, Kentucky', 'Laurel County, KY', 'Laurel County'], 'cy': ['Laurel County, Kentucky', 'Laurel County'], 'pt': ['Condado de Laurel'], 'pl': ['Hrabstwo Laurel'], 'fr': ['comté de Laurel'], 'es': ['Condado de Laurel'], 'uk': ['Лорел'], 'it': ['contea di Laurel'], 'de': ['Laurel County'], 'nl': ['Laurel County'], 'gd': ['Laurel County'], 'ro': ['Comitatul Laurel, Kentucky'], 'tr': ['Laurel County'], 'ga': ['Contae Laurel']}",['Laurel County'],[],"{'Q30': ('', '')}",37.11067,-84.1178
+Q8982,London City Airport,"['Q644371', 'Q1248784', 'Q94993988']","{'de': ['London City Airport', 'Flughafen London City'], 'fr': ['aérodrome de Royaume Uni', 'aéroport de Londres City'], 'nl': ['London City Airport', 'Luchthaven Londen City'], 'en': ['London City Airport'], 'it': ['aeroporto di Londra-City'], 'es': ['Aeropuerto de la Ciudad de Londres'], 'pl': ['Port lotniczy Londyn-City'], 'pt': ['Aeroporto da Cidade de Londres'], 'ro': ['Aeroportul London City'], 'sco': ['London City Airport'], 'tr': ['Londra Şehir Havalimanı'], 'uk': ['Лондон-Сіті'], 'cy': ['Maes Awyr Dinas Llundain']}",,['Q67442940'],"{'Q145': ('', '')}",51.505278,0.055278
+Q22059065,London,['Q17343829'],"{'en': ['London, Indiana', 'London, IN', 'London']}",,[],"{'Q30': ('', '')}",39.625556,-85.920278
+Q8712,London Luton Airport,"['Q644371', 'Q94993988']","{'en': ['Luton', 'Luton Airport', 'London Luton Airport'], 'fr': ['Londres-Luton', 'aérodrome de Royaume Uni', 'Luton', 'aéroport de Londres Luton'], 'it': ['Londra-Luton', 'Aeroporto di Londra-Luton'], 'uk': ['Лондонський аеропорт Лутон', 'Лутон'], 'de': ['Flughafen Luton', 'London Luton Airport'], 'nl': ['London Luton Airport', 'Luchthaven Londen Luton'], 'en-gb': ['London Luton Airport'], 'es': ['Aeropuerto de Londres-Luton'], 'pl': ['Port lotniczy Londyn-Luton'], 'pt': ['Aeroporto de Londres Luton'], 'tr': ['Londra Luton Havalimanı'], 'ro': ['Aeroportul Luton'], 'cy': ['Maes Awyr Luton']}",,['Q67387552'],"{'Q145': ('', '')}",51.874722,-0.368333
+Q20657974,London,['Q17343829'],"{'en': ['London, Minnesota', 'London, MN', 'London'], 'es': ['Londres, Minnesota', 'London, Minnesota', 'London']}",,[],"{'Q30': ('', '')}",43.526111,-93.062778
+Q565521,Clarence House,"['Q53536964', 'Q1802963']","{'ro': ['Clarence House', 'Casa Clarence'], 'uk': ['Кларенс-хаус', 'Кларенс-гаус'], 'cy': ['Tŷ Clarence', 'Clarence House'], 'pt': ['Clarence House'], 'pl': ['Clarence House'], 'fr': ['Clarence House'], 'en': ['Clarence House'], 'es': ['Clarence House'], 'it': ['Clarence House'], 'de': ['Clarence House'], 'nl': ['Clarence House'], 'tr': ['Clarence House'], 'en-ca': ['Clarence House'], 'en-gb': ['Clarence House'], 'ga': ['Teach Clarence']}",['Clarence House'],['Q19186'],"{'Q145': ('', '')}",51.504,-0.1385
+Q238587,National Portrait Gallery,"['Q207694', 'Q17431399', 'Q3343298']","{'en': ['Great Britain National Portrait Gallery', 'London National Portrait Gallery', 'National Portrait Gallery (London)', 'National Portrait Gallery London', 'NPG London', 'National Portrait Gallery'], 'cy': ['Oriel Bortreadau Genedlaethol', 'Galeri Genedlaethol o Bortreadau', 'yr Oriel Bortreadau Genedlaethol'], 'de': ['National Portrait Gallery'], 'es': ['National Portrait Gallery'], 'fr': ['National Portrait Gallery'], 'it': ['National Portrait Gallery'], 'nl': ['National Portrait Gallery'], 'pl': ['National Portrait Gallery'], 'pt': ['National Portrait Gallery'], 'ro': ['National Portrait Gallery'], 'uk': ['Національна портретна галерея (Лондон)'], 'ga': ['Gailearaí na bPortráidí Náisiúnta'], 'en-gb': ['National Portrait Gallery'], 'tr': ['Ulusal Portre Galerisi']}",['National Portrait Gallery'],['Q19186'],"{'Q145': ('', '')}",51.5094,-0.1281
+Q2716505,Stamford Hill,['Q2755753'],"{'nl': ['Stamford Hill'], 'en': ['Stamford Hill'], 'fr': ['Stamford Hill'], 'en-gb': ['Stamford Hill'], 'de': ['Stamford Hill'], 'ga': ['Stamford Hill'], 'it': ['Stamford Hill']}",['Stamford Hill'],['Q19186'],"{'Q145': ('', '')}",51.5705,-0.0727
+Q927198,Londinium,['Q2202509'],"{'fr': ['Londres romain', 'Londinium'], 'en': ['Roman London', 'Londinium'], 'uk': ['Лондиніум', 'Лондініум', 'Лондиній'], 'pt': ['Londinium'], 'pl': ['Londinium'], 'es': ['Londinium'], 'de': ['Londinium'], 'nl': ['Londinium'], 'it': ['Londinium'], 'en-ca': ['Londinium'], 'en-gb': ['Londinium'], 'ro': ['Londinium'], 'tr': ['Londinium'], 'cy': ['Londinium']}",,['Q19186'],"{'Q145': ('', '')}",51.514217,-0.088455
+Q122744,Maida Vale,['Q2755753'],"{'fr': ['Maida Vale'], 'en': ['Maida Vale'], 'ga': ['Maida Vale'], 'nl': ['Maida Vale'], 'it': ['Maida Vale'], 'sco': ['Maida Vale'], 'es': ['Maida Vale'], 'pt': ['Maida Vale'], 'pl': ['Maida Vale'], 'uk': ['Мейда-Вейл'], 'ro': ['Maida Vale'], 'de': ['Maida Vale'], 'en-ca': ['Maida Vale'], 'en-gb': ['Maida Vale']}",,['Q19186'],"{'Q145': ('1922', ''), 'Q174193': ('1801', '1922')}",51.5274,-0.1899
+Q123738,Hyde Park,['Q22698'],"{'es': ['Hyde Park Londres', 'Hyde Park'], 'it': ['Hyde Park di Londra', 'Hyde Park'], 'en': ['Hyde Park, London', 'Hyde Park'], 'pl': ['Hyde Park'], 'fr': ['Hyde Park'], 'de': ['Hyde Park'], 'ga': ['Hyde Park'], 'nl': ['Hyde Park'], 'pt': ['Hyde Park'], 'tr': ['Hyde Park'], 'ro': ['Hyde Park'], 'uk': ['Гайд-парк'], 'en-ca': ['Hyde Park, London'], 'en-gb': ['Hyde Park'], 'cy': ['Hyde Park']}",['Hyde Park'],['Q19186'],"{'Q145': ('', '')}",51.508611,-0.163611
+Q8703,London Gatwick Airport,"['Q644371', 'Q94993988']","{'en': ['Gatwick', 'Gatwick Airport', 'London Gatwick', 'London Gatwick Airport'], 'de': ['Flughafen London-Gatwick', 'Flughafen Gatwick', 'Flughafen London Gatwick'], 'it': ['Aeroporto di Gatwick', 'Gatwick', 'Londra-Gatwick', 'Aeroporto di Londra-Gatwick'], 'ro': ['Londra Gatwick', 'aeroportul Londra Gatwick', 'Aeroportul Londra Gatwick'], 'fr': ['Londres-Gatwick', 'aéroport de Londres Gatwick'], 'nl': ['London Gatwick Airport', 'Luchthaven Londen Gatwick'], 'es': ['Londres-Gatwick', 'Aeropuerto de Londres-Gatwick'], 'cy': ['Maes Awyr Gatwick'], 'ga': ['Aerfort Londain-Gatwick'], 'pl': ['Port lotniczy Londyn-Gatwick'], 'pt': ['Aeroporto de Londres Gatwick'], 'uk': ['Аеропорт Гатвік'], 'tr': ['Londra Gatwick Havalimanı'], 'gd': ['Port-adhair Gatwick']}",,['Q67443130'],"{'Q145': ('', '')}",51.147222,-0.190278
+Q15179170,Alexandra Palace transmitting station,['Q5367899'],"{'en-gb': ['Alexandra Palace television station', 'Alexandra Palace transmitting station'], 'en': ['Alexandra Palace television station', 'Alexandra Palace transmitting station']}",,['Q19186'],"{'Q145': ('', '')}",51.594444,-0.129167
+Q10818,7 July 2005 London bombings,['Q217327'],"{'en': ['7/7 London Bombings', '7 July London Bombings', 'July 7, 2005 London Bombings', 'July 7 London Bombings', 'Coordinated terrorist attack hits London', '7 July 2005 London bombings'], 'en-gb': ['7/7 London Bombings', '7 July London Bombings', '7 July 2005 London bombings'], 'fr': ['Les transports londoniens touchés par des attentats', 'attentats du 7 juillet 2005 à Londres'], 'pt': ['Londres é vítima de ataque terrorista', 'Atentados de 7 de julho de 2005 em Londres'], 'de': ['Terroranschläge in London', 'Terroranschläge am 7. Juli 2005 in London'], 'it': ['attentato di Londra del 7 luglio 2005', 'attentati di Londra del 7 luglio 2005'], 'cy': ['Ffrwydradau Llundain 7 Gorffennaf 2005'], 'en-ca': ['7 July 2005 London bombings'], 'es': ['atentados del 7 de julio de 2005 en Londres'], 'nl': ['terroristische aanslagen in Londen van 7 juli 2005'], 'pl': ['Zamach w Londynie'], 'ro': ['Atentatele din 7 iulie 2005 de la Londra'], 'uk': ['Вибухи у Лондоні 7 липня 2005'], 'tr': ['7 Temmuz 2005 Londra saldırıları'], 'ga': ['7 Iúil 2005 Buamáil Londan']}",,['Q19186'],"{'Q145': ('', '')}",51.504872,-0.07857
+Q1359589,West End theatre,['Q11635'],"{'es': ['Teatro del West End', 'Teatro de West End', 'Teatros de West End', 'Teatros del West End'], 'en': ['West End', 'West End theatre'], 'pl': ['West End'], 'fr': ['West End theatre'], 'it': ['Teatro del West End'], 'en-ca': ['West End theatre'], 'en-gb': ['West End theatre'], 'de': ['West End Theatre'], 'ro': ['West End'], 'tr': ['West End tiyatrosu']}",,['Q19186'],"{'Q145': ('', '')}",51.511389,-0.128056
+Q649419,Marylebone station,"['Q55488', 'Q55485']","{'en': ['London Marylebone', 'Marylebone station'], 'ro': ['Londra Marylebone', 'gara Londra Marylebone', 'gara Marylebone'], 'it': ['stazione di Marylebone', 'stazione di Londra Marylebone'], 'pt': ['London Marylebone', 'Estação Marylebone'], 'fr': ['gare de Marylebone', 'London Marylebone'], 'cy': ['Gorsaf reilffordd Marylebone Llundain'], 'de': ['Bahnhof Marylebone'], 'nl': ['Station London Marylebone'], 'pl': ['Marylebone'], 'tr': ['Marylebone İstasyonu'], 'es': ['Estación de Marylebone'], 'uk': ['Мерілебон (станція)']}",,['Q19186'],"{'Q145': ('', '')}",51.522222,-0.163056
+Q15242653,London Museum,['Q33506'],"{'en': ['London Museum'], 'it': ['London Museum'], 'de': ['London Museum']}",,['Q19186'],"{'Q145': ('', '')}",51.5052,-0.188
+Q20075,London Underground,"['Q5503', 'Q1268865']","{'en-gb': ['the Underground', 'the Tube', 'London Underground'], 'en': ['the Underground', 'the Tube', 'London Underground Limited', 'London Underground'], 'es': ['London Underground', 'Underground', 'Tube', 'Metro de Londres'], 'pl': ['London Underground', 'the Tube', 'the Underground', 'Metro w Londynie'], 'cy': ['Underground Llundain', 'Rheilffordd Danddaearol Llundain'], 'it': ['the Tube', 'London Underground', 'metropolitana di Londra'], 'nl': ['London Underground', 'metro van Londen'], 'de': ['London Underground'], 'fr': ['métro de Londres'], 'ga': ['London Underground'], 'pt': ['Metropolitano de Londres'], 'ro': ['Metroul din Londra'], 'tr': ['Londra metrosu'], 'uk': ['Лондонський метрополітен'], 'en-ca': ['London Underground']}",,"['Q67443130', 'Q19186', 'Q67285329', 'Q67442940', 'Q67532100']","{'Q145': ('', '')}",51.492778,-0.100833
+Q6669738,London,['Q17343829'],"{'en': ['London, Wisconsin', 'London, WI', 'London'], 'es': ['Londres, Wisconsin', 'London (Wisconsin)'], 'fr': ['London']}",,[],"{'Q30': ('', '')}",43.0478,-89.0128
+Q756819,Strand,['Q79007'],"{'nl': ['The Strand', 'Strand'], 'en': ['the Strand', 'Strand, London', 'Strand'], 'pt': ['Strand'], 'fr': ['The Strand'], 'es': ['Strand'], 'uk': ['Стренд'], 'it': ['Strand'], 'cy': ['Y Strand'], 'de': ['Strand'], 'en-ca': ['Strand'], 'en-gb': ['Strand'], 'ga': ['Strand'], 'tr': ['Strand, Londra'], 'pl': ['Strand (Londyn)']}",['Strand'],['Q19186'],"{'Q145': ('', '')}",51.5114,-0.119
+Q2354215,Central London,['Q82794'],"{'fr': ['Central London'], 'en': ['Central London'], 'it': ['Central London'], 'en-ca': ['Central London'], 'en-gb': ['Central London'], 'nl': ['Centraal Londen'], 'de': ['Central London'], 'es': ['Centro de Londres'], 'cy': ['canol Llundain'], 'pt': ['Centro de Londres'], 'uk': ['Центральний Лондон']}",,['Q67479626'],"{'Q145': ('', '')}",51.5073,0.12755
+Q7443327,Second Great Fire of London,"['Q2380335', 'Q838718']","{'en': ['Second Great Fire of London'], 'uk': ['Друга велика лондонська пожежа']}",,[],"{'Q145': ('', '')}",51.5157,-0.0921
+Q123885,Royal Society,"['Q414147', 'Q2085381', 'Q45400320', 'Q955824', 'Q1966910']","{'pl': ['Towarzystwo Królewskie', 'The Royal Society of London for Improving Natural Knowledge', 'The Royal Society', 'Royal Society'], 'nl': ['Royal Society of London', 'Royal Society of London for the Improvement of Natural Knowledge', 'Royal Society'], 'pt': ['Royal Society of London', 'Real Sociedade de Londres', 'The Royal Society', 'Sociedade Real de Londres', 'Royal Society'], 'tr': ['The Royal Society of London for the Improvement of Natural Knowledge', 'Kraliyet Cemiyeti', 'Royal Society of London for the Improvement of Natural Knowledge', 'Royal Society'], 'uk': ['Британське королівське товариство', 'Лондонського королівського товариства', 'Королівське товариство', 'Королівське наукове товариство', 'Лондонське королівське наукове товариство', 'Лондонське королівське товариство'], 'it': ['Fellow of the Royal Society', 'Royal Society'], 'ro': ['Fellow of the Royal Society', 'Societatea Regală din Londra'], 'en': ['The Royal Society of London for Improving Natural Knowledge', 'Royal Society of London', 'The President, Council, and Fellows of the Royal Society of London for Improving Natural Knowledge', 'The Royal Society, UK', 'Royal Society'], 'ga': ['Cumann Ríoga', 'An Cumann Ríoga'], 'es': ['Royal Society'], 'de': ['Royal Society'], 'cy': ['y Gymdeithas Frenhinol'], 'fr': ['Royal Society'], 'en-gb': ['Royal Society'], 'gd': ['An Comann Rìoghail'], 'kw': ['Kowethas Riel']}",,['Q19186'],"{'Q145': ('', '')}",51.506111,-0.132222
+Q55018,Royal Opera House,"['Q153562', 'Q24354', 'Q3469910']","{'en': ['Covent Garden', 'Royal Italian Opera', 'ROH Covent Garden', 'Covent Garden Opera', 'Royal Opera House, Covent Garden', 'Royal Opera House'], 'de': ['The Royal Opera House', 'Royal Opera House'], 'pl': ['Royal Opera House', 'English National Opera', 'Covent Garden Theatre'], 'nl': ['Royal Opera House', 'Royal Opera House Covent Garden'], 'en-ca': ['Royal Opera House'], 'en-gb': ['Royal Opera House'], 'es': ['Royal Opera House'], 'fr': ['Royal Opera House'], 'it': ['Royal Opera House'], 'pt': ['Royal Opera House'], 'tr': ['Royal Opera House'], 'uk': ['Королівський театр Ковент-Гарден'], 'cy': ['Tŷ Opera Brenhinol'], 'ro': ['Royal Opera House']}",['Royal Opera House'],['Q19186'],"{'Q145': ('', '')}",51.513056,-0.1225
+Q130206,London Bridge,"['Q537127', 'Q3397519', 'Q158438', 'Q1735471', 'Q1223230']","{'en': ['London Bridge'], 'en-ca': ['London Bridge'], 'en-gb': ['London Bridge'], 'de': ['London Bridge'], 'es': ['London Bridge'], 'fr': ['pont de Londres'], 'ga': ['Droichead Londan'], 'it': ['London Bridge'], 'nl': ['London Bridge'], 'pl': ['London Bridge'], 'pt': ['Ponte de Londres'], 'ro': ['Podul Londrei'], 'tr': ['Londra Köprüsü'], 'uk': ['Лондонський міст'], 'sco': ['London Bridge'], 'cy': ['Pont Llundain']}",,"['Q19186', 'Q67443130']","{'Q21': ('', '')}",51.508056,-0.087778
+Q4642035,64 Baker Street,['Q41176'],{'en': ['64 Baker Street']},,['Q19186'],"{'Q145': ('', '')}",51.5191,-0.156
+Q729177,Cleopatra's Needle,"['Q170980', 'Q570116']","{'en': [""Cleopatra's Needle, London"", ""Cleopatra's Needle""], 'fr': ['Aiguille de Cléopâtre'], 'de': ['Nadeln der Kleopatra'], 'es': ['Agujas de Cleopatra'], 'it': ['Ago di Cleopatra'], 'nl': ['Naald van Cleopatra'], 'pt': ['Agulhas de Cleópatra'], 'uk': ['Голка Клеопатри (Лондон)'], 'pl': ['Igła Kleopatry']}","[""Cleopatra's Needle""]",['Q19186'],"{'Q145': ('', '')}",51.508503,-0.120296
+Q1399178,Fazl Mosque,['Q32815'],"{'en': ['The London Mosque', 'Fazl Mosque'], 'de': ['Fazl-Moschee'], 'es': ['Mezquita Fazl'], 'fr': ['mosquée Fazl'], 'en-gb': ['Fazl Mosque']}",,['Q67443130'],"{'Q145': ('', '')}",51.4511,-0.2075
+Q5645763,Hammersmith bus station,['Q494829'],{'en': ['Hammersmith bus station']},,['Q19186'],"{'Q145': ('', '')}",51.4921,-0.224
+Q194209,basketball at the 2012 Summer Olympics,['Q26132862'],"{'es': ['Anexo:Baloncesto en los Juegos Olímpicos de 2012', 'Anexo:Baloncesto en los Juegos Olímpicos de Londres 2012'], 'ro': ['Baschet la Jocurile Olimpice din 2012', 'Baschet la Jocurile Olimpice de vară din 2012'], 'pl': ['Koszykówka na Letnich Igrzyskach Olimpijskich 2012'], 'fr': [""basket-ball aux Jeux olympiques d'été de 2012""], 'it': ['Pallacanestro ai Giochi della XXX Olimpiade'], 'nl': ['basketbal op de Olympische Zomerspelen 2012'], 'pt': ['Basquetebol nos Jogos Olímpicos de Verão de 2012'], 'en': ['basketball at the 2012 Summer Olympics'], 'tr': [""2012 Yaz Olimpiyatları'nda basketbol""], 'uk': ['Баскетбол на літніх Олімпійських іграх 2012'], 'de': ['Olympische Sommerspiele 2012/Basketball'], 'ga': ['cispheil ag Cluichí Oilimpeacha an tSamhraidh 2012']}",,[],"{'Q145': ('', '')}",51.5486,-0.0139
+Q801124,Liverpool Street station,"['Q55485', 'Q55488', 'Q1793804']","{'nl': ['Liverpool Street', 'Liverpool Street station', 'Liverpool Street Station', 'Station Liverpool Street', 'Station London Liverpool Street'], 'de': ['Liverpool Street Station', 'Bahnhof Liverpool Street'], 'it': ['Stazione di Liverpool Street', 'stazione di Londra Liverpool Street'], 'fr': ['gare de Liverpool Street', 'Liverpool Street'], 'cy': ['Gorsaf reilffordd Liverpool Street', 'Gorsaf reilffordd Liverpool Street Llundain'], 'es': ['Estacion de Liverpool Street', 'London Liverpool Street', 'Liverpool Street', 'Estación de Liverpool Street'], 'pl': ['London Liverpool Street', 'Liverpool Street Station'], 'en': ['Liverpool Street railway station', 'Bishopsgate station', 'London Liverpool Street', 'Liverpool Street Overground station', 'Liverpool Street station'], 'en-gb': ['Liverpool Street railway station', 'London Liverpool Street', 'Bishopsgate station', 'Liverpool Street Underground station', 'Liverpool Street tube station', 'Liverpool Street station'], 'ro': ['gara Londra Liverpool Street', 'Londra Liverpool Street', 'gara Liverpool Street'], 'ga': ['Stáisiún Sráid Learpholl'], 'en-ca': ['Liverpool Street station'], 'tr': ['Liverpool Street İstasyonu'], 'uk': ['Ліверпуль-Стріт'], 'pt': ['Estação Liverpool Street']}",,['Q19186'],"{'Q145': ('', '')}",51.5186,-0.0813
+Q7737135,The Goldsmiths' Company Assay Office,,"{'en': [""The Goldsmiths' Company Assay Office""]}",,[],{},51.5157,-0.0959
+Q4834838,BBC Radio London,['Q14350'],"{'en': ['BBC London 94.9', 'Radio London', 'BBC Radio London'], 'en-gb': ['Radio London', 'BBC London 94.9', 'BBC Radio London'], 'pl': ['BBC London 94.9'], 'it': ['BBC London 94.9'], 'fr': ['BBC London 94.9']}",,[],"{'Q145': ('', '')}",51.5185,-0.1431
+Q17509255,Chiswell Street,['Q79007'],"{'en': ['Chiswell Street'], 'nl': ['Chiswell Street'], 'fr': ['Chiswell Street']}",,['Q19186'],"{'Q145': ('', '')}",51.5207,-0.089944
+Q951830,Royal Mint,"['Q464780', 'Q270791']","{'en': ['The Royal Mint', 'The Royal Mint (UK)', 'Royal Mint'], 'cy': ['Bathdy Brenhinol', 'y Bathdy Brenhinol'], 'nl': ['The Royal Mint (UK)', 'Royal Mint'], 'fr': ['Royal Mint'], 'de': ['Royal Mint'], 'it': ['Royal Mint'], 'es': ['Royal Mint'], 'pt': ['Royal Mint (Reino Unido)']}",,[],"{'Q145': ('', '')}",51.555,-3.387
+Q800753,Fenchurch Street railway station,"['Q55488', 'Q55485']","{'de': ['Fenchurch Street Station', 'Bahnhof Fenchurch Street'], 'nl': ['Station Fenchurch Street', 'Station London Fenchurch Street'], 'pt': ['Fenchurch Street (Londres)', 'Estação de Fenchurch Street', 'Fenchurch Street'], 'it': ['Stazione di Fenchurch Street', 'stazione di Londra Fenchurch Street'], 'en': ['London Fenchurch Street', 'Fenchurch Street', 'Fenchurch Street railway station'], 'fr': ['gare de Fenchurch Street'], 'en-ca': ['Fenchurch Street railway station'], 'en-gb': ['Fenchurch Street railway station'], 'es': ['Estación de Fenchurch Street'], 'uk': ['Фенчерч-стрит (станція)']}",,['Q19186'],"{'Q145': ('', '')}",51.511667,-0.078611
+Q6671078,London bid for the 2012 Summer Olympics,['Q938381'],"{'en': ['London bid for the 2012 Summer Olympics'], 'es': ['Candidatura de Londres a los Juegos Olímpicos de 2012']}",,[],"{'Q145': ('', '')}",51.54615,-0.01269
+Q186309,Madame Tussauds,['Q667018'],"{'pl': [""Madame Tussaud's"", 'Muzeum Figur Woskowych Madame Tussaud', 'Muzeum Figur Woskowych Madame Tussaud w Londynie'], 'fr': ['Musée Madame Tussauds', 'Madame Tussaud', 'Musée de Madame Tussauds', 'Madame Tussauds'], 'es': [""Madame Tussaud's Las Vegas"", 'Madame Tussauds Las Vegas', 'Madame Tussauds', 'Museo Madame Tussaud', 'Museo Madame Tussauds'], 'it': ['Madame Tussaud', ""Madame Tussaud's"", 'Madame Tussauds'], 'de': ['Madame Tussaud', ""Madame Tussaud's Wachsfigurenkabinett"", 'Madame Tussaud’s', ""Madame Tussaud's"", 'Madame Tussaud’s Waxwork Museum', 'Grosholtz', 'Philippe Curtius', ""Madame Tussaud's Waxwork Museum"", 'Madame Tussauds Wachsfigurenkabinett', 'Wachsfiguren-Kabinett', 'Tussaud', 'Wachsfigurenausstellung der Madame Tussand,', 'Madame Tussauds'], 'nl': ['Madame Tussaud', 'Madam Tussaud', 'Madame Tussauds'], 'pt': ['Madame Tussaud', 'Museu Madame Tussauds', 'Madame Tussauds'], 'tr': ['Madame Tussaud', 'Madamme Tussauds', 'Tussaud Müzesi', 'Madame Tussauds'], 'ro': [""Madame tussaud's"", 'Madame Tussauds'], 'en': ['Madame Tussauds'], 'uk': ['Музей мадам Тюссо'], 'ga': ['Madame Tussauds'], 'sco': ['Madame Tussauds']}",,['Q19186'],"{'Q145': ('', '')}",51.52279,-0.15517
+Q148349,Lambeth,"['Q2755753', 'Q149621']","{'es': ['Distrito de Lambeth', 'Lambeth'], 'nl': ['Lambeth'], 'en': ['Lambeth'], 'en-gb': ['Lambeth'], 'fr': ['Lambeth'], 'it': ['Lambeth'], 'de': ['Lambeth'], 'pl': ['Lambeth'], 'uk': ['Ламбет'], 'cy': ['Lambeth'], 'pt': ['Lambeth'], 'ga': ['Lambeth']}",,['Q67443130'],"{'Q145': ('', '')}",51.4903,-0.1193
+Q212883,diving at the 2012 Summer Olympics,['Q26132862'],"{'pl': ['Skoki do wody 2012', 'Skoki do wody na Letnich Igrzyskach Olimpijskich 2012'], 'fr': ['Plongeon aux Jeux olympiques de 2012', ""plongeon aux Jeux olympiques d'été de 2012""], 'ro': ['Sărituri în apă la Jocurile Olimpice din 2012', 'Sărituri natație la Jocurile Olimpice din 2012', 'Sărituri în apă la Jocurile Olimpice de vară din 2012'], 'es': ['Anexo:Saltos en los Juegos Olímpicos de Londres 2012'], 'it': ['Tuffi ai Giochi della XXX Olimpiade'], 'en': ['diving at the 2012 Summer Olympics'], 'nl': ['schoonspringen op de Olympische Zomerspelen 2012'], 'pt': ['Saltos ornamentais nos Jogos Olímpicos de Verão de 2012'], 'tr': [""2012 Yaz Olimpiyatları'nda atlama""], 'uk': ['Стрибки у воду на літніх Олімпійських іграх 2012'], 'de': ['Olympische Sommerspiele 2012/Wasserspringen'], 'ga': ['tumadóireacht ag Cluichí Oilimpeacha an tSamhraidh 2012']}",,[],"{'Q145': ('', '')}",51.5402,-0.0106
+Q195436,Tate Britain,"['Q207694', 'Q17431399']","{'pl': ['Tate Gallery', 'Tate Britain'], 'es': ['Tate Gallery', 'Tate Britain'], 'de': ['Tate Gallery of British Art', 'National Gallery of British Art', 'Tate Gallery', 'Tate Britain'], 'fr': ['National Gallery of British Art', 'Tate Gallery', 'Tate Britain'], 'en': ['National Gallery of British Art', 'Tate Gallery', 'Tate Gallery of British Art', 'Tate Britain'], 'tr': ['Tate Britain'], 'it': ['Tate Britain'], 'cy': ['Tate Britain'], 'nl': ['Tate Britain'], 'uk': ['Тейт Британія'], 'en-gb': ['Tate Britain'], 'pt': ['Tate Modern']}",['Tate Britain'],['Q19186'],"{'Q145': ('', '')}",51.490833,-0.127222
+Q5038252,Cardboard City,['Q486972'],"{'en': ['Cardboard City'], 'nl': ['Cardboard City']}",,['Q67443130'],"{'Q145': ('', '')}",51.505,-0.113611
+Q743535,Chelsea,"['Q2755753', 'Q1115575']","{'en-ca': ['Chelsea, London', 'Chelsea'], 'en': ['Chelsea, London', 'Chelsea'], 'cy': ['Chelsea, London', 'Chelsea'], 'de': ['Chelsea, London', 'Chelsea'], 'en-gb': ['Chelsea, London', 'Chelsea'], 'es': ['Chelsea (Londres)', 'Chelsea'], 'fr': ['Chelsea, London', 'Chelsea'], 'ga': ['Chelsea, London', 'Chelsea'], 'gd': ['Chelsea, London', 'Chelsea'], 'it': ['Chelsea, London', 'Chelsea'], 'nl': ['Chelsea, London', 'Chelsea'], 'pl': ['Chelsea, London', 'Chelsea'], 'pt': ['Chelsea, London', 'Chelsea'], 'ro': ['Chelsea, London', 'Chelsea'], 'sco': ['Chelsea, London', 'Chelsea'], 'tr': ['Chelsea'], 'uk': ['Челсі']}",,['Q19186'],"{'Q145': ('1922', ''), 'Q174193': ('1801', '1922'), 'Q161885': ('1707', '1800'), 'Q179876': ('', '1707')}",51.4875,-0.1684
+Q83609,Acton,"['Q3957', 'Q2755753', 'Q1115575']","{'en': ['Acton, London', 'Acton'], 'es': ['Acton (Londres)', 'Acton'], 'pl': ['Acton'], 'ro': ['Acton'], 'ga': ['Acton'], 'nl': ['Acton'], 'cy': ['Acton, Llundain'], 'en-gb': ['Acton'], 'fr': ['Acton'], 'tr': ['Acton, Londra'], 'uk': ['Ектон (Лондон)'], 'it': ['Acton']}",,['Q19186'],"{'Q145': ('', '')}",51.510519,-0.262661
+Q79348,London,['Q1093829'],"{'en': ['London, Arkansas', 'London, AR', 'London'], 'es': ['London, Arkansas', 'Londres, Arkansas', 'London'], 'pt': ['London'], 'pl': ['London'], 'nl': ['London'], 'fr': ['London'], 'it': ['London'], 'uk': ['Лондон'], 'cy': ['London, Arkansas'], 'de': ['London'], 'ga': ['London']}",,[],"{'Q30': ('', '')}",35.3258,-93.2367
+Q193196,University College London,"['Q15407956', 'Q4671277', 'Q38723', 'Q5341295']","{'fr': ['University College of London', 'University College London', 'University College', 'University College de Londres'], 'es': ['University College of London', 'University College London', 'Escuela Universitaria de Londres', 'University College de Londres'], 'it': ['University College of London', 'University College London', 'University College di Londra', 'University College'], 'de': ['University College in London', 'University College London'], 'pt': ['University College', 'University College London'], 'tr': ['University College', 'Londra Üniversitesi Akademisi', 'Londra Üniversitesi Koleji'], 'en': ['University College, London', 'London University', ""London's Global University"", 'University College London'], 'pl': ['University College London'], 'nl': ['University College London'], 'ro': ['University College London'], 'uk': ['Університетський коледж Лондона'], 'sco': ['University College London'], 'cy': ['Coleg Prifysgol Llundain'], 'en-gb': ['University College London'], 'gd': ['Colaiste Oilthigh Lunnainn'], 'ga': ['University College London'], 'kw': ['Kollji Pennskol Loundres']}",,['Q19186'],"{'Q145': ('', '')}",51.524722,-0.133611
+Q4801470,Arts Educational School,"['Q2418495', 'Q2143781']","{'en': ['ArtsEd', 'Arts Educational School'], 'en-ca': ['Arts Educational Schools, London'], 'en-gb': ['Arts Educational School'], 'cy': ['Arts Educational Schools, Llundain'], 'nl': ['Arts Educational School']}",,['Q19186'],"{'Q145': ('', '')}",51.4961,-0.2525
+Q220198,Zoological Society of London,"['Q748019', 'Q45400320', 'Q1966910']","{'es': ['Sociedad Zoologica de Londres', 'Sociedad Zoológica de Londres'], 'fr': ['Zoological Society of London', 'Société zoologique de Londres'], 'en': ['Zoological Society of London'], 'nl': ['Zoological Society of London'], 'pt': ['Sociedade Zoológica de Londres'], 'de': ['Zoological Society of London'], 'pl': ['Zoological Society of London'], 'it': ['Società Zoologica di Londra'], 'gd': ['Comann Ainmh-eòlas Lunnainn'], 'cy': ['Cymdeithas Swoleg Llundain'], 'uk': ['Зоологічне товариство Лондона'], 'tr': ['Londra Zooloji Topluluğu'], 'ga': ['Cumann Zó-eolaíochta Londan']}",,['Q19186'],"{'Q145': ('', '')}",51.5357,-0.1575
+Q124234,St James’s,['Q2755753'],"{'de': [""St James's"", 'St. James’s'], 'en': ['St. James’s', 'St James’s, London', 'St James’s'], 'it': [""St James's"", ""Saint James's"", ""St. James's""], 'fr': [""St. James's""], 'es': [""St James's""], 'ga': [""St James's""], 'nl': [""St James's""], 'cy': [""St James's""]}",,['Q19186'],"{'Q145': ('', '')}",51.5085,-0.133
+Q23298,Kent,['Q180673'],"{'de': ['Zeremonielle Grafschaft Kent', 'Kent'], 'en': ['Kent, England', 'Kent'], 'tr': ['Törensel Kent Kontluğu', 'Kent'], 'uk': ['графство Кент', 'церемоніальне графство Кент', 'Кент'], 'en-gb': ['Kent'], 'en-ca': ['Kent'], 'cy': ['Caint'], 'es': ['Kent'], 'fr': ['Kent'], 'ga': ['Kent'], 'it': ['Kent'], 'kw': ['Kint'], 'nl': ['Kent'], 'pl': ['Kent'], 'pt': ['Kent'], 'ro': ['Kent'], 'sco': ['Kent']}",,['Q67479626'],"{'Q145': ('1927', ''), 'Q179876': ('', '1707')}",51.19,0.73
+Q1431914,Croydon Airport,['Q644371'],"{'de': ['Croydon Airport', 'RAF Croydon', 'Flughafen London-Croydon'], 'fr': ['base aérienne militaire du Royaume-Uni', 'aéroport de Croydon'], 'en': ['Croydon Airport'], 'it': ['Aeroporto di Croydon'], 'nl': ['Croydon Airport'], 'pt': ['Aeroporto de Croydon'], 'ro': ['Aeroportul Croydon'], 'es': ['Aeropuerto de Croydon']}",,['Q67443130'],"{'Q145': ('', '')}",51.356361,-0.116822
+Q835031,"Embassy of Germany, London",['Q3917681'],"{'pl': ['Niemieccy ambasadorzy w Wielkiej Brytanii', 'Niemmieccy ambasadorzy w Wielkiej Brytanii', 'Ambasadorowie Niemiec w Wielkiej Brytanii'], 'de': ['Deutsche Botschaft London'], 'en': ['Embassy of Germany, London'], 'fr': [""ambassade d'Allemagne au Royaume-Uni""], 'uk': ['Посольство Німеччини у Великій Британії'], 'es': ['embajada de Alemania en el Reino Unido']}",,['Q19186'],"{'Q145': ('', '')}",51.49825,-0.15425
+Q1323689,BFI London Film Festival,['Q220505'],"{'fr': ['Festival de Londres', 'British Film Institute Awards', 'London Film Festival', 'Festival du film de Londres'], 'es': ['London Film Festival', 'Festival de Cine de Londres'], 'tr': ['Londra Uluslararası Film Festivali', 'Londra Film Festivali'], 'de': ['The Times bfi London Film Festival', 'BFI London Film Festival', 'BFI Festival', 'London Film Festival'], 'nl': ['London Film Festival', 'The Times BFI London Film Festival', 'Filmfestival van Londen'], 'en': ['London Film Festival', 'BFI Film Festival', 'BFI London Festival', 'BFI Festival', 'BFI London Film Festival'], 'pl': ['BFI London Film Festival', 'London Film Festival', 'BFI Film Festival', 'BFI London Festival', 'BFI Festival', 'Festiwal Filmowy w Londynie'], 'it': ['BFI London Film Festival'], 'en-ca': ['BFI London Film Festival'], 'en-gb': ['BFI London Film Festival'], 'pt': ['Festival de Cinema de Londres'], 'uk': ['Лондонський кінофестиваль']}",['BFI London Film Festival'],[],"{'Q145': ('', '')}",51.506389,-0.115278
+Q7594521,St Mary's Roman Catholic Church,['Q1088552'],"{'en': ['Roman Catholic Church of St Mary (Church of the Redemptionist Fathers)', ""St Mary's Roman Catholic Church, Clapham"", ""St Mary's Roman Catholic Church""]}",,['Q67443130'],"{'Q145': ('', '')}",51.4616,-0.13743
+Q26888,London Borough of Croydon,"['Q211690', 'Q7897276']","{'fr': ['London Borough of Croydon', 'Croydon', 'London Borough de Croydon', 'Borough londonien de Croydon'], 'it': ['borgo londinese di Croydon', 'Croydon'], 'nl': ['London Borough of Croydon', 'Croydon'], 'pt': ['London Borough of Croydon', 'Borough de Croydon', 'London Borough de Croydon', 'Croydon'], 'ro': ['Burgul londonez Croydon', 'Croydon'], 'en': ['Croydon', 'Croydon (unparished area)', 'London Borough of Croydon'], 'pl': ['London Borough of Croydon'], 'es': ['Croydon'], 'de': ['London Borough of Croydon'], 'ga': ['Buirg Londan Croydon'], 'uk': ['Кройдон'], 'cy': ['Bwrdeistref Llundain Croydon'], 'tr': ['Croydon']}",,['Q67443130'],"{'Q145': ('', '')}",51.371111,-0.098889
+Q8709,London Stansted Airport,"['Q644371', 'Q94993988']","{'en': ['Stansted Airport', 'Stansted', 'London Stansted', 'London Stansted Airport'], 'it': ['London Stansted Airport', 'Londra Stansted', 'Aeroporto di Londra Stansted'], 'fr': ['Stansted', 'Londres-Stansted', 'aéroport de Londres Stansted'], 'nl': ['London Stansted Airport', 'Luchthaven Londen Stansted'], 'ro': ['London Stansted Airport', 'Aeroportul Londra Stansted'], 'es': ['Londres-Stansted', 'Aeropuerto de Londres-Stansted'], 'cy': ['Maes Awyr Stansted'], 'de': ['Flughafen London-Stansted'], 'pl': ['Port lotniczy Londyn-Stansted'], 'pt': ['Aeroporto de Londres Stansted'], 'ga': ['Aerfort Londain Stansted'], 'tr': ['Londra Stansted Havalimanı'], 'uk': ['Лондон-Станстед']}",['London Stansted Airport'],['Q67442940'],"{'Q145': ('', '')}",51.885,0.235
+Q1402606,BAPS Shri Swaminarayan Mandir London,"['Q106807864', 'Q842402']","{'en': ['Neasden Temple', 'BAPS Shri Swaminarayan Mandir London'], 'de': ['Neasden-Tempel'], 'nl': ['Neasdentempel'], 'pl': ['BAPS Shri Swaminarayan Mandir w Londynie'], 'it': ['Shri Swaminarayan Mandir'], 'fr': ['Neasden Temple'], 'uk': ['Шрі Свамінараян Мандір']}",,['Q19186'],"{'Q145': ('', '')}",51.5475,-0.261667
+Q278054,"Roman Catholic Diocese of London, Ontario",['Q3146899'],"{'en': ['Diocese of London', 'Roman Catholic Diocese of London, Ontario'], 'es': ['Diócesis de Londres', 'Diócesis de London'], 'de': ['Bistum London (Ontario)'], 'it': ['diocesi di London'], 'pl': ['Diecezja London'], 'fr': ['diocèse de London'], 'nl': ['Bisdom London'], 'pt': ['Diocese de London'], 'ga': ['Deoise Chaitliceach London, Ontario']}",,[],"{'Q16': ('', '')}",42.9876,-81.25
+Q801125,London Bridge station,"['Q55488', 'Q55490', 'Q55485']","{'pt': ['Estação da London Bridge (Metro de Londres)', 'Estação de London Bridge', 'Estação da London Bridge', 'London Bridge (Metropolitano de Londres)', 'London Bridge (Metrô de Londres)', 'London Bridge (Metro de Londres)', 'Estação da London Bridge (Metrô de Londres)', 'Estação London Bridge'], 'de': ['London Bridge Station', 'Bahnhof London Bridge'], 'en': ['London Bridge railway station', 'London Bridge station'], 'cy': ['Gorsaf reilffordd London Bridge', 'Gorsaf London Bridge'], 'es': ['Estacion de London Bridge', 'Estación de London Bridge'], 'fr': ['gare de London Bridge', 'London Bridge'], 'nl': ['Station London Bridge'], 'it': ['stazione di London Bridge'], 'pl': ['London Bridge Station'], 'en-ca': ['London Bridge station'], 'en-gb': ['London Bridge station'], 'tr': ['London Bridge İstasyonu'], 'uk': ['Лондон-брідж (станція)']}",,['Q67443130'],"{'Q145': ('', '')}",51.505,-0.086111
+Q205679,London Borough of Hackney,"['Q211690', 'Q7897276']","{'it': ['London Borough of Hackney', 'Borgo londinese di Hackney', 'Hackney'], 'de': ['Stoke Newington', 'De Beauvoir Town', 'Hackney Wick', 'Upper Clapton', 'Haggerston', 'Hoxton', 'London Borough of Hackney'], 'nl': ['Londen Borough of Hackney', 'London Borough of Hackney', 'Hackney'], 'pt': ['Borough de Hackney', 'London Borough de Hackney', 'Hackney'], 'ro': ['Burgul londonez Hackney', 'Hackney'], 'en': ['Hackney', 'Hackney (unparished area)', 'London Borough of Hackney'], 'fr': ['London Borough of Hackney', 'borough londonien de Hackney'], 'cy': ['Hackney (bwrdeistref)', 'Bwrdeistref Llundain Hackney'], 'pl': ['London Borough of Hackney'], 'es': ['Hackney'], 'ga': ['London Borough of Hackney'], 'uk': ['Гекні'], 'sco': ['London Borough of Hackney'], 'en-ca': ['London Borough of Hackney'], 'en-gb': ['London Borough of Hackney'], 'tr': ['Hackney']}",,['Q19186'],"{'Q145': ('', '')}",51.544722,-0.0575
+Q6669870,London Book Fair,"['Q11483816', 'Q998672', 'Q57305']","{'en': ['London Book Fair'], 'de': ['London Book Fair'], 'es': ['London Book Fair'], 'fr': ['London Book Fair'], 'it': ['London Book Fair'], 'nl': ['London Book Fair'], 'pt': ['London Book Fair'], 'en-gb': ['London Book Fair']}",,['Q19186'],"{'Q145': ('', '')}",51.496,-0.211
+Q1666958,London International Surrealist Exhibition,['Q59861107'],"{'es': ['Exposición Surrealista', 'Exposición Internacional Surrealista', 'Exposición Internacional Surrealista de Londres', 'Exposición Surrealista de Londres', 'Exposición Surrealista Internacional de Londres'], 'de': ['International Surrealist Exhibition'], 'en': ['London International Surrealist Exhibition'], 'fr': ['International Surrealist Exhibition']}",,[],"{'Q145': ('', '')}",51.509722,-0.141111
+Q5011830,CIQM-FM,['Q14350'],{},,[],"{'Q16': ('', '')}",42.9556,-81.3553
+Q1394500,South London,['Q7631958'],"{'pt': ['Sul de Londres', 'South London'], 'en': ['London/South', 'South London'], 'fr': ['Londres-Sud', 'South London'], 'de': ['South London'], 'nl': ['Zuid-Londen'], 'it': ['Sud di Londra'], 'cy': ['South London'], 'es': ['Londres del Sur'], 'sco': ['Sooth Lunnon'], 'uk': ['Південний Лондон'], 'en-gb': ['South London']}",['South London'],['Q67443130'],"{'Q145': ('', '')}",51.45,-0.1
+Q772421,"St George's, University of London","['Q494230', 'Q2467461', 'Q5341295']","{'en': [""St George's Hospital Medical School"", ""St George's University of London"", ""University of London Saint George's"", ""Saint George's Hospital Medical School"", ""St George's, University of London""], 'en-gb': [""St George's Hospital Medical School"", ""University of London Saint George's"", ""Saint George's Hospital Medical School"", ""St George's, University of London""], 'de': ['St George’s, University of London'], 'cy': [""St George's, Prifysgol Llundain""], 'it': [""St. George's Hospital Medical School""], 'uk': ['Коледж Святого Джорджа'], 'fr': [""St George's, University of London""]}",,['Q67443130'],"{'Q145': ('', '')}",51.426944,-0.174722
+Q1749569,Ny-London,['Q2940297'],"{'en': ['London', 'Camp Mansfield', 'Ny-London'], 'de': ['Ny-London'], 'it': ['Ny-London']}",,[],"{'Q20': ('', '')}",78.963333,12.047778
+Q60578265,London,['Q27990982'],"{'en': ['City of London', 'London'], 'fr': ['cité de Londres'], 'ga': ['Londain']}",,[],"{'Q179876': ('', '')}",51.515556,-0.093056
+Q39121,Leeds,"['Q515', 'Q7897276', 'Q1549591', 'Q1187811']","{'es': ['Ciudad de Leeds', 'Cross Gates', 'Leeds'], 'pl': ['Leeds'], 'ga': ['Leeds'], 'nl': ['Leeds'], 'pt': ['Leeds'], 'tr': ['Leeds'], 'uk': ['Лідс'], 'sco': ['Leeds'], 'fr': ['Leeds'], 'it': ['Leeds'], 'de': ['Leeds'], 'en': ['Leeds'], 'ro': ['Leeds'], 'cy': ['Leeds'], 'en-ca': ['Leeds'], 'en-gb': ['Leeds'], 'gd': ['Leeds']}",['Leeds'],['Q163'],"{'Q145': ('', '')}",53.7975,-1.543611
+Q1466941,Leeds railway station,"['Q55488', 'Q7886778']","{'en': ['Leeds City station', 'Leeds station', 'Leeds City railway station', 'Leeds railway station'], 'it': ['stazione di Leeds City', 'stazione di Leeds'], 'de': ['Leeds City Station'], 'cy': ['Gorsaf reilffordd Leeds'], 'fr': ['gare de Leeds'], 'nl': ['station Leeds'], 'pl': ['Leeds City'], 'ro': ['gara Leeds']}",,['Q163'],"{'Q145': ('', '')}",53.794,-1.547
+Q1128631,Leeds United F.C.,['Q476028'],"{'it': ['Leeds United', 'Leeds United AFC', 'Leeds United FC', 'Leeds United Football Club', 'Leeds United A.F.C.', 'Leeds United Association Football Club', 'Leeds United F.C.'], 'ga': ['Leeds United A.F.C', 'Leeds United', 'Leeds United F.C.', 'Leeds United A.F.C.', 'Leeds United Association Football Club'], 'es': ['Leeds United F.C.', 'Leeds United FC', 'Leeds United', 'Leeds United A.F.C.', 'Leeds United Football Club', 'Leeds United A F C', 'Leeds United F C', 'Leeds United A F.C.', 'Leeds United A F C.', 'Leeds United F C.', 'Leeds United Association Football Club'], 'pt': ['Leeds United FC', 'Leeds FC', 'Leeds United', 'Leeds fc', 'Leeds united', 'Leeds united afc', 'Leeds united fc', 'Leeds United AFC', 'Leeds united association football club', 'Leeds United A.F.C.', 'Leeds United F.C.', 'Leeds United Association Football Club'], 'tr': ['Leeds United F.C.', 'Leeds United A.F.C.', 'Leeds United', 'Leeds United FC', 'Leeds United Association Football Club', 'Leeds United Football Club', 'The Whites', 'Leeds United AFC'], 'ro': ['Leeds United', 'Leeds United A.F.C.', 'Leeds United F.C.', 'Leeds united afc', 'Leeds United AFC'], 'pl': ['Leeds United', 'Leeds United F.C.', 'Leeds United Association Football Club', 'The Peacocks', 'The Whites', 'Leeds United A.F.C.'], 'fr': ['Leeds United FC', 'Leeds United A.Football Club', 'Leeds United Football Club', 'Leeds United Association Football Club', 'Leeds United AFC', 'Leeds United'], 'de': ['Leeds United F.C.', 'Leeds United A.F.C.', 'Leeds United LFC', 'Leeds United'], 'nl': ['Leeds United', 'Leeds United FC', 'Leeds United A.F.C.', 'Leeds United Football Club', 'Leeds United AFC'], 'uk': ['Лідс (футбольний клуб)', 'Лідс (ФК)', 'Лідс Юнайтед'], 'en': ['Leeds United Football Club', 'Leeds United FC', 'Leeds United', 'Leeds United Association Football Club', 'Leeds United A.F.C.', 'Leeds United AFC', 'Leeds', 'United', 'The Whites', 'The Peacocks', 'Leeds United F.C.'], 'sco': ['Leeds Unitit A.F.C.'], 'cy': ['Leeds United A.F.C.']}",['Leeds United F.C.'],[],"{'Q145': ('', '')}",53.777778,-1.572222
+Q774015,Leeds,"['Q21503295', 'Q1002812']","{'en': ['City of Leeds', 'City and Borough of Leeds', 'Leeds'], 'en-gb': ['City of Leeds', 'Leeds'], 'nl': ['City of Leeds'], 'de': ['City of Leeds'], 'fr': ['cité de Leeds'], 'it': ['City of Leeds'], 'pl': ['City of Leeds'], 'tr': ['Leeds Şehri'], 'en-ca': ['City of Leeds'], 'uk': ['Сіті-оф-Лідс'], 'cy': ['Dinas Leeds'], 'ga': ['Leeds']}",,[],"{'Q145': ('', '')}",53.799167,-1.549167
+Q503424,University of Leeds,"['Q62078547', 'Q5341295']","{'pl': ['Uniwersytet Leeds', 'Leeds University', 'University of Leeds'], 'fr': ['Leeds University', 'University of Leeds', 'université de Leeds'], 'es': ['University of Leeds', 'Universidad de Leeds'], 'de': ['Universität Leeds', 'University of Leeds'], 'nl': ['University of Leeds', 'Leeds University', 'Universiteit van Leeds'], 'pt': ['University of leeds', 'Universidade de Leeds'], 'ro': ['Universitatea Leeds', 'Universitatea din Leeds', 'University of Leeds'], 'uk': ['Лідський університет', 'Університет Лідса'], 'en': ['Leeds University', 'University of Leeds'], 'en-gb': ['Leeds University', 'University of Leeds'], 'en-ca': ['Leeds University', 'University of Leeds'], 'tr': ['Leeds Üniversitesi'], 'sco': ['Varsity o Leeds'], 'it': ['Università di Leeds'], 'cy': ['Prifysgol Leeds'], 'gd': ['Oilthigh Leeds'], 'ga': ['Ollscoil Leeds']}",,['Q163'],"{'Q145': ('', '')}",53.807222,-1.551667
+Q1137962,Reading and Leeds Festivals,['Q868557'],"{'pt': ['Reading Festival', 'Reading and leeds festival', 'Reading and leeds festivals', 'Leeds Festival', 'Festivais de Reading e Leeds'], 'fr': ['Reading Festival', 'Leeds Festival', 'Festival de Reading', 'Reading and Leeds Festivals'], 'es': ['Reading Festival', 'Festival de Reading y Leeds', 'Reading and Leeds Festivals', 'Leeds Festival', 'Festival de Leeds', 'Carling Weekend', 'Festival de Reading', 'Festivales de Reading y Leeds'], 'it': ['Reading Festival', 'Reading e Leeds festivals', 'Festival di Leeds', 'Festival di Reading', 'Reading and Leeds Festival', 'Leeds Festival', 'Carling Weekend', 'Reading/Leeds Festival', 'Festival di Reading e Leeds'], 'de': ['Reading Festival', 'Reading and Leeds Festivals'], 'en': ['Reading Festival', 'Leeds Festival', 'Reading Music Festival', 'Reading Music Fest', 'Reading Fest', 'Reading and Leeds Fest', 'Reading and Leeds Festivals'], 'nl': ['Reading en Leeds Festivals'], 'uk': ['Фестивалі Редінг і Лідс']}",,[],"{'Q145': ('', '')}",51.467222,-1.011944
+Q6515934,Leeds City bus station,['Q494829'],{'en': ['Leeds City bus station']},,['Q163'],"{'Q145': ('', '')}",53.7969,-1.53528
+Q4834918,BBC Radio Leeds,['Q14350'],"{'en': ['BBC Radio Leeds'], 'pl': ['BBC Radio Leeds'], 'en-ca': ['BBC Radio Leeds'], 'en-gb': ['BBC Radio Leeds']}",,[],"{'Q145': ('', '')}",53.797389,-1.533833
+Q7721041,The Calls,"['Q79007', 'Q123705']","{'en': ['The Calls'], 'ga': ['The Calls'], 'nl': ['The Calls'], 'fr': ['The Calls']}",,['Q163'],"{'Q145': ('', '')}",53.794,-1.538
+Q482468,Leeds,['Q15127012'],"{'en': ['Leeds, Utah', 'Leeds, UT', 'Leeds'], 'pt': ['Leeds'], 'es': ['Leeds'], 'nl': ['Leeds'], 'de': ['Leeds'], 'it': ['Leeds'], 'pl': ['Leeds'], 'fr': ['Leeds'], 'uk': ['Лідс'], 'cy': ['Leeds, Utah']}",,[],"{'Q30': ('', '')}",37.239444,-113.360833
+Q2460124,Leeds,"['Q532', 'Q1115575']","{'en': ['Leeds, Kent', 'Leeds'], 'cy': ['Leeds, Caint', 'Leeds'], 'nl': ['Leeds'], 'pl': ['Leeds'], 'de': ['Leeds'], 'fr': ['Leeds'], 'it': ['Leeds (Kent)'], 'tr': ['Leeds, Kent'], 'ga': ['Leeds']}",,['Q67479626'],"{'Q145': ('', '')}",51.246311,0.606631
+Q79869,Leeds,['Q1093829'],"{'en': ['Leeds, Alabama', 'Leeds, AL', 'Leeds'], 'es': ['Leeds (Alabama)', 'Leeds'], 'pt': ['Leeds'], 'uk': ['Лідс'], 'it': ['Leeds'], 'de': ['Leeds'], 'nl': ['Leeds'], 'fr': ['Leeds'], 'pl': ['Leeds (Alabama)'], 'tr': ['Leeds'], 'cy': ['Leeds, Alabama'], 'ga': ['Leeds']}",,[],"{'Q30': ('', '')}",33.545592,-86.557388
+Q746876,Leeds Castle,"['Q2087181', 'Q1343246', 'Q23413']","{'de': ['Leeds Castle'], 'en': ['Leeds Castle'], 'es': ['Castillo de Leeds'], 'fr': ['château de Leeds'], 'it': ['castello di Leeds'], 'nl': ['Leeds Castle'], 'pl': ['Zamek Leeds'], 'pt': ['Castelo de Leeds'], 'uk': ['Лідс'], 'ga': ['Caisleán Leeds']}",,['Q67479626'],"{'Q145': ('', '')}",51.2491,0.630411
+Q6515805,Leeds,"['Q751708', 'Q17343829']","{'en': ['Leeds, Massachusetts', 'Leeds, MA', 'Leeds'], 'fr': ['Leeds'], 'tr': ['Leeds, Massachusetts']}",,[],"{'Q30': ('', '')}",42.3514,-72.6994
+Q3461415,Leeds,"['Q498162', 'Q17343829']","{'en': ['Leeds, NY', 'Leeds'], 'es': ['Leeds'], 'it': ['Leeds'], 'de': ['Leeds'], 'fr': ['Leeds'], 'pl': ['Leeds'], 'uk': ['Лідс'], 'ga': ['Leeds']}",,[],"{'Q30': ('', '')}",42.2533,-73.8967
+Q2365261,Leeds,['Q1093829'],"{'en': ['Leeds, North Dakota', 'Leeds, ND', 'Leeds'], 'nl': ['Leeds'], 'es': ['Leeds'], 'it': ['Leeds'], 'pt': ['Leeds'], 'fr': ['Leeds'], 'pl': ['Leeds'], 'tr': ['Leeds'], 'uk': ['Лідс'], 'cy': ['Leeds, Gogledd Dakota'], 'ga': ['Leeds']}",,[],"{'Q30': ('', '')}",48.289444,-99.438889
+Q21061609,Headingley Cricket Ground,"['Q682943', 'Q483110']","{'en': ['Headingley Carnegie Cricket Stadium', 'Emerald Headingley Cricket Ground', 'Headingley Carnegie Cricket Ground', 'Headingley Cricket Ground'], 'fr': ['Headingley Cricket Ground'], 'en-gb': ['Headingley Cricket Ground'], 'it': ['Headingley Cricket Ground']}",,['Q163'],"{'Q145': ('', '')}",53.8177,-1.58198
+Q7746609,The Leeds Studios,['Q811979'],{'en': ['The Leeds Studios']},,['Q163'],"{'Q145': ('', '')}",53.803366,-1.570106
+Q14875251,Leeds County,['Q4204495'],{'en': ['Leeds County']},,[],"{'Q16': ('', '')}",44.5833,-76.0
+Q6515927,Leeds City Region,['Q618123'],{'en': ['Leeds City Region']},,[],"{'Q145': ('', '')}",53.8,-1.549
+Q5177618,County Borough of Leeds,['Q1137272'],{'en': ['County Borough of Leeds']},,[],"{'Q145': ('', '')}",53.799722,-1.549167
+Q8699,Leeds Bradford Airport,"['Q644371', 'Q94993988']","{'en': ['Leeds Bradford International Airport', 'Leeds Bradford Airport'], 'fr': ['aérodrome de Royaume Uni', 'aéroport international de Leeds-Bradford'], 'ro': ['Aeroportul Leeds Bradford', 'Aeroportul Internațional Leeds Bradford'], 'cy': ['Maes Awyr Rhyngwladol Leeds Bradford'], 'de': ['Leeds Bradford International Airport'], 'es': ['Aeropuerto Internacional de Leeds Bradford'], 'it': ['Aeroporto di Leeds-Bradford'], 'nl': ['Leeds Bradford International Airport'], 'pl': ['Port lotniczy Leeds/Bradford'], 'pt': ['Aeroporto Internacional de Leeds Bradford'], 'uk': ['Лідс-Бредфорд']}",,['Q163'],"{'Q145': ('', '')}",53.865833,-1.660556
+Q27985411,"Leeds, Kansas City",['Q123705'],"{'en': ['Leeds, Kansas City'], 'nl': ['Leeds, Kansas City']}",,[],"{'Q30': ('', '')}",39.055838,-94.508565
+Q6515866,Leeds Central railway station,['Q55488'],"{'en': ['Leeds Central railway station'], 'nl': ['station Leeds Central']}",,['Q163'],"{'Q145': ('', '')}",53.7958,-1.5547
+Q871138,Roman Catholic Diocese of Leeds,['Q3146899'],"{'en': ['Diocese of Leeds', 'Roman Catholic Diocese of Leeds'], 'fr': ['diocèse de Leeds'], 'de': ['Bistum Leeds'], 'it': ['diocesi di Leeds'], 'pl': ['Diecezja Leeds'], 'nl': ['Bisdom Leeds'], 'es': ['diócesis de Leeds'], 'ga': ['Deoise Chaitliceach Leeds']}",,[],"{'Q145': ('', '')}",53.7833,-1.53333
+Q4763489,Anglican Diocese of Leeds,['Q18917976'],"{'en': ['Diocese of Leeds', 'Diocese of West Yorkshire and the Dales', 'Anglican Diocese of Leeds'], 'en-gb': ['Diocese of West Yorkshire and the Dales', 'Anglican Diocese of Leeds'], 'it': ['diocesi anglicana di Leeds', 'diocesi di Leeds'], 'pl': ['Diecezja Leeds'], 'es': ['Diócesis anglicana de Leeds'], 'de': ['Anglikanische Diözese Leeds'], 'nl': ['Bisdom Leeds']}",,[],"{'Q145': ('', '')}",53.7998,-1.5305
+Q4871546,Battle of Leeds,['Q178561'],"{'en': ['Battle of Leeds'], 'es': ['Batalla de Leeds'], 'it': ['Battaglia di Leeds']}",,[],"{'Q145': ('', '')}",53.7969,-1.5424
+Q24896243,Elland Road Greyhound Stadium,"['Q483110', 'Q45290083']",{'en': ['Elland Road Greyhound Stadium']},,['Q163'],"{'Q145': ('', '')}",53.775,-1.575
+Q1187032,Headingley Stadium,['Q1076486'],"{'de': ['Headingley-Carnegie-Stadion', 'Headingley Carnegie-Stadion', 'Headingley Carnegie Stadium', 'Headingley-Stadion', 'Headingley Stadium'], 'fr': ['Headingley Stadium', 'Headingley Carnegie Stadium'], 'en': ['Headingley Stadium'], 'nl': ['Headingley Stadium']}",,['Q163'],"{'Q145': ('', '')}",53.817661,-1.581978
+Q489255,Sioux City,"['Q1093829', 'Q62049']","{'en': ['Sioux City, Iowa', 'Sioux City, IA', 'Sioux City'], 'es': ['Sioux City (Iowa)', 'Sioux City'], 'pl': ['Sioux City'], 'gd': ['Sioux City'], 'fr': ['Sioux City'], 'it': ['Sioux City'], 'de': ['Sioux City'], 'kw': ['Sioux City'], 'nl': ['Sioux City'], 'pt': ['Sioux City'], 'cy': ['Sioux City'], 'uk': ['Су-Сіті'], 'sco': ['Sioux City'], 'tr': ['Sioux City'], 'ga': ['Sioux City']}",['Sioux City'],[],"{'Q30': ('', '')}",42.498056,-96.395556
+Q3228965,Leeds Arts University,"['Q383092', 'Q5341295']","{'fr': [""Ecole d'Art de Leeds"", 'Leeds School of Art', 'Leeds college of art and design', 'Leeds College Of Art And Design', ""École d'Art de Leeds"", 'Leeds College of Arts', 'Leeds College of Art and Design'], 'en': ['Jacob Kramer College', 'Leeds College Of Art And Design', 'Leeds College of Art', 'Leeds Arts University'], 'en-gb': ['Jacob Kramer College', 'Leeds College Of Art And Design', 'Leeds College of Art', 'Leeds Arts University'], 'es': ['Leeds College of Art'], 'de': ['Leeds College of Art'], 'nl': ['Leeds Arts University'], 'it': ['Leeds Arts University']}",,['Q163'],"{'Q145': ('', '')}",53.8084,-1.5517
+Q209266,Leeds,['Q15127012'],"{'en': ['Leeds, ME', 'Leeds'], 'es': ['Leeds (Maine)', 'Leeds'], 'fr': ['Leeds'], 'nl': ['Leeds'], 'de': ['Leeds'], 'it': ['Leeds'], 'pl': ['Leeds'], 'uk': ['Лідс'], 'cy': ['Leeds, Maine'], 'tr': ['Leeds, Maine']}",,[],"{'Q30': ('', '')}",44.303333,-70.119167
+Q42448,Sheffield,"['Q515', 'Q1549591', 'Q7897276']","{'es': ['Steel City', 'Sheffield'], 'uk': ['Шефілд', 'Шеффілд'], 'it': ['City of Sheffield', 'Sheffield'], 'en': ['Sheffield, South Yorkshire', 'Sheffield, England', 'Sheffield'], 'pl': ['Sheffield'], 'ga': ['Sheffield'], 'nl': ['Sheffield'], 'pt': ['Sheffield'], 'tr': ['Sheffield'], 'sco': ['Sheffield'], 'fr': ['Sheffield'], 'de': ['Sheffield'], 'ro': ['Sheffield'], 'cy': ['Sheffield'], 'en-ca': ['Sheffield'], 'en-gb': ['Sheffield'], 'gd': ['Sheffield'], 'kw': ['Sheffield']}",['Sheffield'],['Q163'],"{'Q145': ('', '')}",53.380833,-1.470278
+Q7492778,Sheffield Victoria railway station,['Q55488'],"{'en': ['Sheffield Victoria railway station'], 'nl': ['station Sheffield Victoria']}",,['Q163'],"{'Q145': ('', '')}",53.3875,-1.45876
+Q7492565,"Sheffield, Cornwall",['Q532'],"{'cy': ['Sheffield, Cernyw', 'Sheffield'], 'en': ['Sheffield, Cornwall'], 'pl': ['Sheffield'], 'fr': ['Sheffield'], 'nl': ['Sheffield, Cornwall'], 'ga': ['Sheffield, Corn na Breataine']}",,['Q23148'],"{'Q145': ('', '')}",50.0868,-5.555
+Q1862179,Sheffield station,['Q55488'],"{'en': ['Pond Street', 'Sheffield Midland', 'Sheffield', 'Sheffield Station And Attached Bridges And Platform Bridges', 'Sheffield railway station', 'Sheffield station'], 'nl': ['Station Sheffield', 'station Sheffield'], 'de': ['Sheffield station', 'Bahnhof Sheffield'], 'cy': ['Gorsaf reilffordd Sheffield'], 'fr': ['gare de Sheffield'], 'pl': ['Sheffield'], 'it': ['stazione di Sheffield'], 'en-ca': ['Sheffield station'], 'en-gb': ['Sheffield station'], 'ro': ['gara Sheffield']}",,['Q163'],"{'Q145': ('', '')}",53.377778,-1.462222
+Q823917,University of Sheffield,"['Q62078547', 'Q5341295']","{'fr': ['Universite de Sheffield', 'University of Sheffield', 'université de Sheffield'], 'tr': ['University of Sheffield', 'Sheffield Üniversitesi'], 'ro': ['University of Sheffield', 'Universitatea Sheffield'], 'it': ['University of Sheffield', 'Università di Sheffield'], 'de': ['University of Sheffield', 'Universität Sheffield'], 'nl': ['University of Sheffield', 'Universiteit van Sheffield'], 'en': ['Sheffield University', 'The University of Sheffield', 'University of Sheffield'], 'en-gb': ['The University of Sheffield', 'University of Sheffield'], 'es': ['University of Sheffield', 'Universidad de Sheffield'], 'pl': ['University of Sheffield'], 'cy': ['Prifysgol Sheffield'], 'pt': ['Universidade de Sheffield'], 'uk': ['Університет Шеффілда'], 'gd': ['Oilthigh Sheffield'], 'ga': ['Ollscoil Sheffield']}",['University of Sheffield'],['Q163'],"{'Q145': ('', '')}",53.380722,-1.488806
+Q4834926,BBC Radio Sheffield,['Q14350'],"{'en': ['BBC Radio Sheffield'], 'pl': ['BBC Radio Sheffield'], 'en-ca': ['BBC Radio Sheffield'], 'en-gb': ['BBC Radio Sheffield'], 'it': ['BBC Radio Sheffield'], 'fr': ['BBC Radio Sheffield']}",,[],"{'Q145': ('', '')}",53.3759,-1.4668
+Q17643392,Manor Lodge,['Q811979'],"{'en': ['Manor House', 'Manor Lodge'], 'de': ['Sheffield Manor'], 'nl': ['Sheffield Manor'], 'ga': ['Sheffield Manor'], 'es': ['Sheffield Manor Lodge']}",,['Q23436'],"{'Q145': ('', '')}",53.3739,-1.43717
+Q2306176,Sheffield,['Q15127012'],"{'en': ['Sheffield, Massachusetts', 'Sheffield, MA', 'Sheffield'], 'es': ['Sheffield'], 'it': ['Sheffield'], 'nl': ['Sheffield'], 'fr': ['Sheffield'], 'de': ['Sheffield, MA'], 'pl': ['Sheffield'], 'cy': ['Sheffield, Massachusetts'], 'uk': ['Шеффілд'], 'pt': ['Sheffield'], 'tr': ['Sheffield, Massachusetts']}",,[],"{'Q30': ('', '')}",42.110278,-73.355556
+Q897533,Bramall Lane,"['Q1154710', 'Q45290083', 'Q45290083']","{'fr': ['Bramall Lane'], 'en': ['Bramall Lane'], 'pt': ['Bramall Lane'], 'de': ['Bramall Lane'], 'it': ['Bramall Lane'], 'pl': ['Bramall Lane'], 'nl': ['Bramall Lane'], 'es': ['Bramall Lane'], 'uk': ['Бремолл Лейн'], 'tr': ['Bramall Lane'], 'ro': ['Bramall Lane']}",,['Q163'],"{'Q145': ('', '')}",53.370278,-1.470833
+Q7492570,Sheffield,['Q17343829'],"{'en': ['Sheffield, Texas', 'Sheffield, TX', 'Sheffield'], 'de': ['Sheffield'], 'nl': ['Sheffield, Texas']}",,[],"{'Q30': ('', '')}",30.6906,-101.823
+Q1950928,Sheffield,['Q2154459'],"{'en': ['Sheffield, VT', 'Sheffield'], 'de': ['Sheffield'], 'en-ca': ['Sheffield, Vermont'], 'en-gb': ['Sheffield, Vermont'], 'es': ['Sheffield'], 'fr': ['Sheffield'], 'pl': ['Sheffield'], 'it': ['Sheffield'], 'uk': ['Шеффілд'], 'cy': ['Sheffield, Vermont'], 'tr': ['Sheffield, Vermont']}",,[],"{'Q30': ('', '')}",44.642322,-72.127616
+Q2277715,Sheffield,"['Q3957', 'Q98433835']","{'nl': ['Sheffield, Tasmania', 'Sheffield'], 'en': ['Sheffield, Tasmania', 'Sheffield, Tasmania, Australia', 'Sheffield'], 'de': ['Sheffield (Tasmanien)', 'Sheffield'], 'it': ['Sheffield'], 'fr': ['Sheffield'], 'pl': ['Sheffield (Tasmania)']}",,[],"{'Q408': ('', '')}",-41.382222,146.324722
+Q79568,Sheffield,['Q1093829'],"{'en': ['Sheffield, Alabama', 'Sheffield, AL', 'Sheffield'], 'es': ['Sheffield (Alabama)', 'Sheffield'], 'pt': ['Sheffield'], 'fr': ['Sheffield'], 'uk': ['Шеффілд'], 'it': ['Sheffield'], 'de': ['Sheffield, AL'], 'nl': ['Sheffield'], 'pl': ['Sheffield (Alabama)'], 'tr': ['Sheffield'], 'cy': ['Sheffield, Alabama'], 'ga': ['Sheffield']}",['Sheffield'],[],"{'Q30': ('', '')}",34.759721,-87.694592
+Q518864,Sheffield,['Q751708'],"{'en': ['Sheffield, Illinois', 'Sheffield, IL', 'Sheffield'], 'nl': ['Sheffield'], 'es': ['Sheffield'], 'pt': ['Sheffield'], 'de': ['Sheffield, IL'], 'fr': ['Sheffield'], 'it': ['Sheffield'], 'pl': ['Sheffield (Illinois)'], 'uk': ['Шеффілд'], 'cy': ['Sheffield, Illinois']}",,[],"{'Q30': ('', '')}",41.3558,-89.7367
+Q7492591,Sheffield Blitz,,{'en': ['Sheffield Blitz']},,[],{},53.383333,-1.466667
+Q7492775,Sheffield Township,['Q9035798'],"{'en': ['Sheffield Township, Pennsylvania', 'Township of Sheffield', 'Sheffield Township'], 'es': ['Municipio de Sheffield (condado de Warren, Pensilvania)'], 'de': ['Sheffield Township'], 'uk': ['Шеффілд Тауншип'], 'cy': ['Sheffield Township, Pennsylvania']}",,[],"{'Q30': ('', '')}",41.624722,-78.983056
+Q741640,Wheel of Sheffield,['Q202570'],{'en': ['Wheel of Sheffield']},,[],"{'Q145': ('', '')}",53.381,-1.4699
+Q7492686,Sheffield Interchange,['Q494829'],{'en': ['Sheffield Interchange']},,['Q163'],"{'Q145': ('', '')}",53.3812,-1.46451
+Q3577611,Sheffield Lock,['Q105731'],"{'en': ['Sheffield (or Shenfield) Lock', 'Sheffield Lock At Su 648706', 'Sheffield Lock'], 'fr': ['écluse de Sheffield']}",,['Q67284726'],"{'Q145': ('', '')}",51.4307,-1.06927
+Q12956644,Sheffield,"['Q1002812', 'Q21503295']","{'en': ['City of Sheffield', 'Sheffield'], 'pl': ['City of Sheffield'], 'en-gb': ['Sheffield'], 'fr': ['Sheffield'], 'cy': ['Dinas Sheffield'], 'uk': ['Шеффілд'], 'ga': ['Sheffield']}",,[],"{'Q145': ('', '')}",53.41667,-1.5
+Q547824,HMS Sheffield,"['Q2607934', 'Q852190']","{'pl': ['HMS Sheffield'], 'de': ['HMS Sheffield'], 'en': ['HMS Sheffield'], 'es': ['HMS Sheffield'], 'fr': ['HMS Sheffield'], 'it': ['HMS Sheffield'], 'nl': ['HMS Sheffield'], 'pt': ['HMS Sheffield (D80)'], 'ga': ['HMS Sheffield']}",,[],{},-53.066667,-56.933333
+Q7492719,Sheffield Parish,['Q3252927'],"{'en': ['Sheffield Parish, New Brunswick', 'Sheffield Parish'], 'fr': ['paroisse de Sheffield'], 'uk': ['Шеффілд (парафія, Нью-Брансвік)']}",,[],"{'Q16': ('', '')}",45.9955,-66.2224
+Q7492566,Sheffield,['Q3257686'],"{'en': ['Sheffield, New Zealand', 'Sheffield'], 'nl': ['Sheffield, New Zealand'], 'fr': ['Sheffield (Nouvelle-Zélande)']}",,[],"{'Q664': ('', '')}",-43.388889,172.018056
+Q7492567,Sheffield,['Q56885635'],{'en': ['Sheffield']},,[],"{'Q16': ('', '')}",43.324,-80.201
+Q4523493,Sheffield urban area,['Q702492'],"{'en': ['Sheffield urban area'], 'uk': ['Шеффілд (міська агломерація)']}",,['Q163'],"{'Q145': ('', '')}",53.395,-1.455
+Q3028626,Diocese of Sheffield,['Q18917976'],"{'fr': ['diocèse de Sheffield'], 'en': ['Diocese of Sheffield'], 'pl': ['Diecezja Sheffield'], 'de': ['Diözese von Sheffield'], 'nl': ['Bisdom Sheffield']}",,[],"{'Q145': ('', '')}",53.382,-1.47
+Q7492607,Sheffield city centre,['Q738570'],"{'en': ['Sheffield city centre'], 'ga': ['Sheffield Lár na Cathrach']}",,['Q163'],"{'Q145': ('', '')}",53.3814,-1.4746
+Q3365926,Sheffield,['Q486972'],"{'fr': ['Sheffield (nouveau-brunswick)', 'Sheffield (Nouveau-Brunswick)', 'Sheffield'], 'en': ['Sheffield, New Brunswick', 'Sheffield'], 'nl': ['Sheffield, New Brunswick']}",,[],"{'Q16': ('', '')}",45.883,-66.3
+Q7492568,Sheffield,['Q17343829'],"{'en': ['Sheffield, North Carolina', 'Sheffield, NC', 'Sheffield'], 'es': ['Sheffield (Carolina del Norte)'], 'fr': ['Sheffield']}",,[],"{'Q30': ('', '')}",35.966428,-80.680081
+Q108940076,Sheffield,['Q498162'],"{'en': ['Sheffield (CDP), Vermont', 'Sheffield'], 'ga': ['Sheffield']}",,[],"{'Q30': ('', '')}",44.601944,-72.114167
+Q1184547,Sheffield,['Q498162'],"{'en': ['Sheffield, Pennsylvania', 'Sheffield, PA', 'Sheffield'], 'es': ['Sheffield, Pensilvania', 'Sheffield'], 'pt': ['Sheffield'], 'nl': ['Sheffield'], 'de': ['Sheffield'], 'fr': ['Sheffield'], 'it': ['Sheffield'], 'uk': ['Шеффілд'], 'ga': ['Sheffield']}",,[],"{'Q30': ('', '')}",41.7042,-79.0339
+Q1984238,Sheffield,['Q751708'],"{'en': ['Sheffield, Ohio', 'Sheffield Village', 'Sheffield Village, Ohio', 'Sheffield, OH', 'Sheffield'], 'es': ['Sheffield (Ohio)', 'Sheffield'], 'nl': ['Sheffield'], 'pt': ['Sheffield'], 'de': ['Sheffield'], 'fr': ['Sheffield'], 'it': ['Sheffield'], 'pl': ['Sheffield (Ohio)'], 'uk': ['Шеффілд'], 'cy': ['Sheffield, Ohio']}",,[],"{'Q30': ('', '')}",41.4481,-82.0833
diff --git a/tests/sample_files/resources/wikidata/wikidata_to_mentions_normalized.json b/tests/sample_files/resources/wikidata/wikidata_to_mentions_normalized.json
new file mode 100644
index 00000000..29f2ffde
--- /dev/null
+++ b/tests/sample_files/resources/wikidata/wikidata_to_mentions_normalized.json
@@ -0,0 +1,1933 @@
+{
+    "Q123885": {
+        "": 0.0005159958720330237,
+        "Royal Society": 0.785345717234262,
+        "London": 0.001547987616099071,
+        "Royal Society of London": 0.07739938080495355,
+        "Fellow": 0.0005159958720330237,
+        "Foreign Member of the Royal Society": 0.0005159958720330237,
+        "Fellow of the Royal Society": 0.012383900928792569,
+        "Presidency": 0.0005159958720330237,
+        "Royal": 0.0010319917440660474,
+        "FRS": 0.08513931888544891,
+        "Royal Academy of Sciences": 0.0005159958720330237,
+        "The Royal Society of London for Improving Natural Knowledge": 0.0020639834881320948,
+        "FRS(For)": 0.0036119711042311656,
+        "British Royal Society": 0.001547987616099071,
+        "Foreign Member of the Royal Society (ForMemRS)": 0.0010319917440660474,
+        "Royal Society of England": 0.001547987616099071,
+        "F.R.S.": 0.0020639834881320948,
+        "Royal Society in London": 0.0005159958720330237,
+        "Fellowship of the Royal Society": 0.0005159958720330237,
+        "Royal Societies": 0.0010319917440660474,
+        "FRS (For)": 0.0005159958720330237,
+        "The Royal Society": 0.007739938080495355,
+        "ForMemRS": 0.0020639834881320948,
+        "FFRS": 0.0010319917440660474,
+        "Royal Society of Great Britain": 0.0005159958720330237,
+        "British Academy of Science": 0.0005159958720330237,
+        "Royal Society of London for Improving Natural Knowledge": 0.0010319917440660474,
+        "FRS (FOR)": 0.0005159958720330237,
+        "FRSF": 0.0005159958720330237,
+        "the Royal Society": 0.0025799793601651187,
+        "UK Royal Society": 0.0005159958720330237,
+        "its British counterpart": 0.0005159958720330237,
+        "Royal Academy of Science": 0.0005159958720330237,
+        "scientific institution": 0.0005159958720330237,
+        "The Royal Society of London": 0.0005159958720330237,
+        "Society of London for Improving Natural Knowledge": 0.0005159958720330237,
+        "FRSFor": 0.0005159958720330237
+    },
+    "Q734547": {
+        "North": 0.008426966292134831,
+        "north": 0.011235955056179775,
+        "London": 0.0028089887640449437,
+        "North London": 0.7780898876404494,
+        "north London": 0.1797752808988764,
+        "north west London": 0.0028089887640449437,
+        "North West London": 0.011235955056179775,
+        "North London's": 0.0028089887640449437,
+        "Northern London": 0.0028089887640449437
+    },
+    "Q1394500": {
+        "South": 0.007978723404255319,
+        "southeast": 0.007978723404255319,
+        "south": 0.007978723404255319,
+        "south-east": 0.005319148936170213,
+        "London": 0.0026595744680851063,
+        "South London": 0.7819148936170213,
+        "south London": 0.14627659574468085,
+        "South East": 0.007978723404255319,
+        "South East London": 0.013297872340425532,
+        "south east": 0.0026595744680851063,
+        "south west London": 0.0026595744680851063,
+        "South West London": 0.005319148936170213,
+        "south-east London": 0.0026595744680851063,
+        "South London, England": 0.0026595744680851063,
+        "southeast London": 0.0026595744680851063
+    },
+    "Q338466": {
+        "medieval": 0.03333333333333333,
+        "London": 0.13333333333333333,
+        "Saxon": 0.03333333333333333,
+        "Anglo-Saxon settlement": 0.03333333333333333,
+        "Lundenwic": 0.5333333333333333,
+        "Lundenburh": 0.16666666666666666,
+        "Lundenburgh": 0.03333333333333333,
+        "Lundenwick": 0.03333333333333333
+    },
+    "Q927198": {
+        "Roman": 0.01948051948051948,
+        "Augusta": 0.006493506493506494,
+        "Roman times": 0.006493506493506494,
+        "London": 0.01948051948051948,
+        "Londinium": 0.8051948051948052,
+        "settlement by the Romans": 0.006493506493506494,
+        "Lundonia": 0.006493506493506494,
+        "Roman London": 0.05844155844155845,
+        "Londinium's": 0.006493506493506494,
+        "roughly 1600 years old": 0.006493506493506494,
+        "Londonium": 0.006493506493506494,
+        "London citadel": 0.006493506493506494,
+        "governor's palace": 0.012987012987012988,
+        "Londinium (London)": 0.006493506493506494,
+        "\"Londinium\"": 0.006493506493506494,
+        "Roman city of London": 0.006493506493506494,
+        "Londinium Augusta": 0.006493506493506494,
+        "Caer Lundein": 0.006493506493506494
+    },
+    "Q8111": {
+        "Olympic Games": 0.011029411764705881,
+        "London": 0.007352941176470588,
+        "1908": 0.11029411764705882,
+        "1908 Olympics in London": 0.003676470588235294,
+        "1908 Olympics": 0.04044117647058824,
+        "1908 Summer Olympics": 0.6617647058823529,
+        "London Olympic Games": 0.01838235294117647,
+        "London in 1908": 0.003676470588235294,
+        "1908 London Games": 0.007352941176470588,
+        "1908 Games": 0.011029411764705881,
+        "1908 London Olympics": 0.029411764705882353,
+        "1908 Olympic Games": 0.044117647058823525,
+        "London Olympics": 0.007352941176470588,
+        "1908 Summer Olympic Games": 0.007352941176470588,
+        "Olympic Marathon": 0.003676470588235294,
+        "Rome 1908": 0.003676470588235294,
+        "the 1908 Games": 0.003676470588235294,
+        "London 1908": 0.003676470588235294,
+        "London Olympics in 1908": 0.003676470588235294,
+        "London (1908)": 0.003676470588235294,
+        "Olympics in 1908": 0.003676470588235294,
+        "London games in 1908": 0.003676470588235294,
+        "1908 games": 0.003676470588235294,
+        "summer Olympics held in London in July 1908": 0.003676470588235294
+    },
+    "Q8577": {
+        "Olympic Games": 0.00495458298926507,
+        "2008": 0.0008257638315441783,
+        "2012": 0.03550784475639967,
+        "Olympic": 0.007431874483897605,
+        "Summer Olympics": 0.005780346820809248,
+        "London": 0.00495458298926507,
+        "Olympics": 0.0016515276630883566,
+        "games": 0.0008257638315441783,
+        "London Olympics 2012": 0.005780346820809248,
+        "2012 Summer Olympics": 0.4921552436003303,
+        "2012 London Olympics": 0.09578860445912468,
+        "2012 Summer Olympic Games": 0.00495458298926507,
+        "2012 Olympics": 0.061932287365813375,
+        "London 2012 Olympic Games": 0.021469859620148635,
+        "London in 2012": 0.0008257638315441783,
+        "London 2012 Olympics": 0.02972749793559042,
+        "Games": 0.0008257638315441783,
+        "2012 Olympic Games": 0.06028075970272502,
+        "London 2012 Summer Olympics": 0.010734929810074317,
+        "2012 Olympic Park": 0.0008257638315441783,
+        "Olympics Games": 0.0008257638315441783,
+        "London 2012 Summer Olympic Committee": 0.0008257638315441783,
+        "London Summer Olympics": 0.002477291494632535,
+        "London 2012 Olympic and Paralympic Games": 0.004128819157720892,
+        "In 2012": 0.0008257638315441783,
+        "London 2012": 0.03881090008257638,
+        "London Olympic Games": 0.004128819157720892,
+        "2012 London Games": 0.0016515276630883566,
+        "2012 London Summer Olympics": 0.00495458298926507,
+        "July 2012": 0.0008257638315441783,
+        "Summer Olympics in London": 0.0008257638315441783,
+        "2012 Olympic": 0.012386457473162676,
+        "London Olympics in 2012": 0.0033030553261767133,
+        "2012 games": 0.0016515276630883566,
+        "London Olympics": 0.020644095788604457,
+        "XXX Olympiad in London": 0.0008257638315441783,
+        "2012 Games": 0.00495458298926507,
+        "2012 London Olympic Games": 0.00990916597853014,
+        "2012 Paralympic Games": 0.0008257638315441783,
+        "Olympics in London": 0.0016515276630883566,
+        "Summer Olympics 2012": 0.0008257638315441783,
+        "London Olympic Games 2012": 0.0008257638315441783,
+        "London 2012 Olympic": 0.0008257638315441783,
+        "2012 Olympic gold medalist": 0.0008257638315441783,
+        "London, 2012": 0.0008257638315441783,
+        "2012 London Olympics opening ceremony": 0.0008257638315441783,
+        "2012 Summer Olympics logo": 0.0008257638315441783,
+        "XXX Olympic Games": 0.0008257638315441783,
+        "2012 London Olympics and Paralympics": 0.0008257638315441783,
+        "London 2012 Games": 0.002477291494632535,
+        "2012 Summer Olympic": 0.0008257638315441783,
+        "Olympic Games in London": 0.0008257638315441783,
+        "2012 Summer Olympics in London": 0.0008257638315441783,
+        "2012 Olympian": 0.0016515276630883566,
+        "Olympic Games of 2012": 0.0008257638315441783,
+        "the warm-up": 0.0008257638315441783,
+        "2012 in London": 0.0008257638315441783,
+        "London Games of 2012": 0.0008257638315441783,
+        "London for 2012": 0.0008257638315441783,
+        "Summer Olympics of 2012": 0.0008257638315441783,
+        "2012 London": 0.002477291494632535,
+        "upcoming Summer Olympics": 0.0008257638315441783,
+        "2012 Summer Games": 0.002477291494632535,
+        "the previous Summer Olympic Games": 0.0008257638315441783,
+        "the 2012 London Olympics": 0.0008257638315441783,
+        "2012 Olympic games": 0.0008257638315441783,
+        "Olympic Games in 2012": 0.0008257638315441783,
+        "2012 Olympic and Paralympic Games": 0.0008257638315441783,
+        "Olympics in 2012": 0.0008257638315441783,
+        "2012 London Olympic games": 0.0008257638315441783,
+        "Olympics and Paralympics": 0.0008257638315441783,
+        "London Ambassadors for the Olympic and Paralympic Games": 0.0008257638315441783,
+        "London's 2012": 0.0008257638315441783,
+        "London 2012 Olympic Games.": 0.0008257638315441783,
+        "2012 Olympians": 0.0008257638315441783,
+        "Olympic Games 2012": 0.0008257638315441783,
+        "Olympics 2012": 0.0008257638315441783
+    },
+    "Q194209": {
+        "2012": 0.26666666666666666,
+        "London": 0.06666666666666667,
+        "2012 Summer Olympics": 0.26666666666666666,
+        "2012 Olympics": 0.26666666666666666,
+        "2012 London Summer Olympics": 0.06666666666666667,
+        "2012 Summer Olympics in London": 0.06666666666666667
+    },
+    "Q8699": {
+        "L": 0.013888888888888888,
+        "Leeds": 0.013888888888888888,
+        "LBA": 0.013888888888888888,
+        "Leeds Bradford": 0.2638888888888889,
+        "Leeds Bradford Airport": 0.41666666666666663,
+        "Yeadon": 0.013888888888888888,
+        "Leeds Bradford International Airport": 0.15277777777777776,
+        "eeds\u2013Bradford (Airport)": 0.013888888888888888,
+        "Leeds/Bradford": 0.05555555555555555,
+        "Leeds-Bradford": 0.027777777777777776,
+        "Yeadon Aerodrome": 0.013888888888888888
+    },
+    "Q55018": {
+        "Royal Opera House": 0.5739348370927317,
+        "London": 0.0012531328320802004,
+        "Covent Garden": 0.24436090225563908,
+        "Theatre Royal": 0.0037593984962406013,
+        "English National Opera": 0.0012531328320802004,
+        "Royal Opera": 0.005012531328320802,
+        "Covent Garden Theatre": 0.05388471177944862,
+        "Linbury Theatre": 0.002506265664160401,
+        "Royal Italian Opera, Covent Garden": 0.0012531328320802004,
+        "Royal Opera House, Covent Garden": 0.03759398496240601,
+        "Covent Garden Opera House": 0.006265664160401002,
+        "Covent Garden Opera": 0.002506265664160401,
+        "Theatre Royal, Covent Garden": 0.03007518796992481,
+        "Royal Opera House Covent Garden": 0.0037593984962406013,
+        "the Covent Garden theatre": 0.0012531328320802004,
+        "ROH": 0.0012531328320802004,
+        "the Opera": 0.0012531328320802004,
+        "English Opera House": 0.0012531328320802004,
+        "Linbury Studio Theatre": 0.006265664160401002,
+        "Covent Gardens": 0.0012531328320802004,
+        "The Royal Opera House": 0.006265664160401002,
+        "the Covent Garden Theatre": 0.0012531328320802004,
+        "the Theatre Royal, Covent Garden": 0.0012531328320802004,
+        "Covent Garden opera": 0.0012531328320802004,
+        "the Royal Opera Company": 0.0012531328320802004,
+        "Royal Italian Opera": 0.0037593984962406013,
+        "Orchestra of the Royal Opera House": 0.0012531328320802004,
+        "Covent Garden Company": 0.0012531328320802004,
+        "Royal Italian Opera House, Covent Garden": 0.002506265664160401
+    },
+    "Q2354215": {
+        "central": 0.006211180124223603,
+        "Central": 0.008281573498964804,
+        "London": 0.004140786749482402,
+        "city centre": 0.002070393374741201,
+        "central London": 0.4078674948240166,
+        "Central London": 0.5610766045548654,
+        "central area": 0.004140786749482402,
+        "central area of London": 0.002070393374741201,
+        "centre of London": 0.002070393374741201,
+        "Central London's": 0.002070393374741201
+    },
+    "Q1988417": {
+        "Chinatown": 0.4878048780487805,
+        "London": 0.0975609756097561,
+        "London's Chinatown": 0.17073170731707318,
+        "Gerrard Street": 0.024390243902439025,
+        "Chinatown of London": 0.024390243902439025,
+        "China Town": 0.024390243902439025,
+        "Chinatown, London": 0.07317073170731708,
+        "Chinatown in London": 0.024390243902439025,
+        "London Chinatown": 0.07317073170731708
+    },
+    "Q951830": {
+        "mint": 0.016736401673640166,
+        "Royal Mint": 0.8661087866108785,
+        "London": 0.0041841004184100415,
+        "Mint": 0.0041841004184100415,
+        "mints": 0.008368200836820083,
+        "the Royal Mint": 0.0041841004184100415,
+        "London Mint": 0.0041841004184100415,
+        "Royal Mint's": 0.008368200836820083,
+        "royal mint": 0.012552301255230124,
+        "London mint": 0.0041841004184100415,
+        "the Mint": 0.008368200836820083,
+        "Tower Mint": 0.02092050209205021,
+        "Tower mint": 0.0041841004184100415,
+        "British Royal Mint": 0.0041841004184100415,
+        "The Royal Mint Experience": 0.0041841004184100415,
+        "seized the money held in trust at the mint of the Exchequer": 0.0041841004184100415,
+        "Royal mint": 0.008368200836820083,
+        "English Mint": 0.0041841004184100415,
+        "The Royal Mint": 0.008368200836820083
+    },
+    "Q84": {
+        "Augusta": 6.508721687060662e-05,
+        "Bavaria": 6.508721687060662e-05,
+        "London": 0.9822311897943244,
+        "North London": 0.00019526165061181986,
+        "metropolis": 6.508721687060662e-05,
+        "Londinium": 0.00013017443374121324,
+        "London, England": 0.011845873470450404,
+        "Greater London": 0.0003905233012236397,
+        "East London": 6.508721687060662e-05,
+        "West London": 0.0002603488674824265,
+        "London's": 0.0009763082530590992,
+        "Lundenwic": 6.508721687060662e-05,
+        "Lundenburh": 6.508721687060662e-05,
+        "south London": 6.508721687060662e-05,
+        "Romford": 6.508721687060662e-05,
+        "Chelsea, London": 6.508721687060662e-05,
+        "north London": 6.508721687060662e-05,
+        "London Town": 6.508721687060662e-05,
+        "hometown": 6.508721687060662e-05,
+        "London, United Kingdom": 0.0009112210361884927,
+        "the capital city": 6.508721687060662e-05,
+        "South East London": 6.508721687060662e-05,
+        "Lond.": 6.508721687060662e-05,
+        "Lon\"don\"": 6.508721687060662e-05,
+        "the metropolis": 6.508721687060662e-05,
+        "Lunden": 6.508721687060662e-05,
+        "west London": 6.508721687060662e-05,
+        "North West London": 0.00013017443374121324,
+        "South West London": 6.508721687060662e-05,
+        "London, UK": 0.0006508721687060662,
+        "Lundenburgh": 6.508721687060662e-05,
+        "London in January 1963": 6.508721687060662e-05,
+        "the British capital": 6.508721687060662e-05,
+        "Londoner": 6.508721687060662e-05,
+        "London\u2019s": 6.508721687060662e-05,
+        "court of St James's": 6.508721687060662e-05,
+        "Londres": 0.0002603488674824265,
+        "Londra": 0.0002603488674824265,
+        "big city": 6.508721687060662e-05
+    },
+    "Q8703": {
+        "of the same name": 0.0023584905660377358,
+        "London": 0.0047169811320754715,
+        "Gatwick Airport": 0.46462264150943394,
+        "London Gatwick": 0.1179245283018868,
+        "London Gatwick Airport": 0.1509433962264151,
+        "Gatwick": 0.17452830188679244,
+        "London-Gatwick": 0.03773584905660377,
+        "London\u2013Gatwick": 0.0330188679245283,
+        "Gatwick airport": 0.0047169811320754715,
+        "Airport Shuttle people-mover": 0.0023584905660377358,
+        "Gatwick, London": 0.0023584905660377358,
+        "London Gatwick Airport's": 0.0023584905660377358,
+        "Gatwick Airport shuttle system": 0.0023584905660377358
+    },
+    "Q2716505": {
+        "Stamford Hill": 0.9821428571428571,
+        "London": 0.017857142857142856
+    },
+    "Q123738": {
+        "Hyde Park": 0.8356890459363957,
+        "London": 0.0017667844522968198,
+        "Hyde": 0.0017667844522968198,
+        "Hyde Park, London": 0.1413427561837456,
+        "Hyde Park Gate": 0.0017667844522968198,
+        "London's Hyde Park": 0.007067137809187279,
+        "BST Hyde Park festival": 0.0017667844522968198,
+        "British Summer Time: Hyde Park festival": 0.0017667844522968198,
+        "the park": 0.0017667844522968198,
+        "Hyde-Park": 0.0017667844522968198,
+        "Stanhope Lodge": 0.0017667844522968198,
+        "hyde park": 0.0017667844522968198
+    },
+    "Q205679": {
+        "Hoxton": 0.0036900369003690036,
+        "London": 0.0036900369003690036,
+        "Stoke Newington": 0.0036900369003690036,
+        "London Borough of Hackney": 0.39114391143911437,
+        "Hackney": 0.5571955719557196,
+        "Haggerston": 0.0036900369003690036,
+        "Hackney Wick": 0.0036900369003690036,
+        "De Beauvoir Town": 0.0036900369003690036,
+        "wider modern borough": 0.0036900369003690036,
+        "modern borough of Hackney": 0.0036900369003690036,
+        "Upper Clapton": 0.0036900369003690036,
+        "Hackney, East London": 0.01107011070110701,
+        "Borough of Hackney": 0.0036900369003690036,
+        "that of Hackney": 0.0036900369003690036
+    },
+    "Q20075": {
+        "London Underground": 0.885578876100203,
+        "London": 0.004062288422477996,
+        "lines": 0.0006770480704129993,
+        "tube": 0.017603249830737983,
+        "underground": 0.008124576844955992,
+        "Underground": 0.027758970886932972,
+        "London's": 0.0006770480704129993,
+        "Tube": 0.011509817197020988,
+        "the Underground": 0.002708192281651997,
+        "LU": 0.0006770480704129993,
+        "underground railway": 0.0006770480704129993,
+        "until 1863": 0.0006770480704129993,
+        "London underground": 0.002031144211238998,
+        "tube station": 0.0033852403520649964,
+        "subway train": 0.0006770480704129993,
+        "London Underground's": 0.002031144211238998,
+        "tube stations": 0.004062288422477996,
+        "Underground stations": 0.0006770480704129993,
+        "deep-level tube network": 0.0006770480704129993,
+        "Transport for London roundel": 0.0006770480704129993,
+        "Underground roundel": 0.0033852403520649964,
+        "underground shelters": 0.0006770480704129993,
+        "tube trains": 0.0006770480704129993,
+        "London's underground rail network": 0.0006770480704129993,
+        "Tube roundel": 0.0006770480704129993,
+        "London Underground PPP": 0.0006770480704129993,
+        "tube carriages": 0.0006770480704129993,
+        "London Tube": 0.002031144211238998,
+        "the tube railways in London": 0.0006770480704129993,
+        "London's Underground": 0.0013540961408259986,
+        "London's underground railway system": 0.0006770480704129993,
+        "London Tube station": 0.0006770480704129993,
+        "tube roundel": 0.0006770480704129993,
+        "the Tube": 0.004062288422477996,
+        "underground train": 0.0006770480704129993,
+        "London tube railways": 0.0006770480704129993,
+        "tube-train": 0.0006770480704129993,
+        "sitting on the tube": 0.0006770480704129993,
+        "distinctive red-and-blue roundel": 0.0006770480704129993,
+        "system in London": 0.0006770480704129993,
+        "Tube Station": 0.0006770480704129993,
+        "Londons Underground subway system": 0.0006770480704129993,
+        "LUL": 0.0006770480704129993,
+        "London tube": 0.0006770480704129993,
+        "tube train": 0.0006770480704129993
+    },
+    "Q160302": {
+        "Edinburgh": 0.06723198061780739,
+        "University of Edinburgh": 0.789824348879467,
+        "university": 0.0018170805572380374,
+        "Edinburgh University": 0.12235009085402786,
+        "University": 0.0024227740763173833,
+        "Edinburgh.": 0.0006056935190793458,
+        "Edinburg": 0.0006056935190793458,
+        "Moray House College": 0.0012113870381586917,
+        "The University of Edinburgh": 0.0060569351907934586,
+        "College of Edinburgh": 0.0006056935190793458,
+        "University of Edinburgh's": 0.0012113870381586917,
+        "university of Edinburgh": 0.0024227740763173833,
+        "Edinburgh University Savoy Opera Group": 0.0006056935190793458,
+        "University of Edinburg": 0.0006056935190793458,
+        "Easter Bush": 0.0006056935190793458,
+        "Edinburgh University (1760-62)": 0.0006056935190793458,
+        "the University of Edinburgh": 0.0012113870381586917
+    },
+    "Q2422792": {
+        "metropolitan area": 0.012987012987012988,
+        "London": 0.025974025974025976,
+        "commuter town": 0.012987012987012988,
+        "commuting": 0.012987012987012988,
+        "commuter belt": 0.012987012987012988,
+        "greater metropolitan": 0.012987012987012988,
+        "London commuter belt": 0.5844155844155845,
+        "commutes": 0.025974025974025976,
+        "City of London commuters": 0.012987012987012988,
+        "London metropolitan area": 0.025974025974025976,
+        "stockbroker belt": 0.012987012987012988,
+        "Commuter Belt": 0.03896103896103896,
+        "London Commuter Belt": 0.12987012987012989,
+        "London Metropolitan Area": 0.025974025974025976,
+        "housing with gardens forming suburbs to London": 0.012987012987012988,
+        "commuter and retirement town": 0.012987012987012988,
+        "London commuters": 0.012987012987012988,
+        "London metro area": 0.012987012987012988
+    },
+    "Q171240": {
+        "stock exchange": 0.001466275659824047,
+        "LSE": 0.00439882697947214,
+        "London": 0.01906158357771261,
+        "London Stock Exchange": 0.9413489736070382,
+        "stock market": 0.001466275659824047,
+        "Stock Exchange": 0.01466275659824047,
+        "London financial markets": 0.001466275659824047,
+        "London stock exchange": 0.00879765395894428,
+        "stock-exchange": 0.001466275659824047,
+        "London Stock Exchange's": 0.001466275659824047,
+        "Stock Market": 0.001466275659824047,
+        "London exchange": 0.001466275659824047,
+        "stock exchange listing in London": 0.001466275659824047
+    },
+    "Q743535": {
+        "Chelsea": 0.7292035398230088,
+        "London": 0.0017699115044247787,
+        "Chelsea, London": 0.26194690265486725,
+        "the area in London, England": 0.0017699115044247787,
+        "Chelsea, England": 0.0035398230088495575,
+        "Chelsea Old Town Hall": 0.0017699115044247787
+    },
+    "Q193196": {
+        "University College London": 0.7849025974025975,
+        "London": 0.0008116883116883117,
+        "University of London": 0.0016233766233766235,
+        "London University": 0.007305194805194806,
+        "University College": 0.029220779220779224,
+        "school": 0.0008116883116883117,
+        "UCL": 0.05113636363636364,
+        "University College, London": 0.10633116883116883,
+        "University College London's Special Collections": 0.0008116883116883117,
+        "11 constituent faculties": 0.0008116883116883117,
+        "UCL Press": 0.0016233766233766235,
+        "Center for European Studies": 0.0008116883116883117,
+        "Grant Museum of Zoology": 0.0008116883116883117,
+        "University College of London": 0.006493506493506494,
+        "UCL Art Museum": 0.0008116883116883117,
+        "University College London's": 0.0008116883116883117,
+        "London University College": 0.0016233766233766235,
+        "UCL East": 0.0008116883116883117,
+        "UCL Centre for Decision Making Uncertainty": 0.0008116883116883117,
+        "University College London (UCL)": 0.0008116883116883117,
+        "University College, Gower Street": 0.0008116883116883117
+    },
+    "Q795691": {
+        "Waterloo": 0.18468468468468469,
+        "London": 0.0045045045045045045,
+        "Waterloo Bridge": 0.0022522522522522522,
+        "railway station": 0.0022522522522522522,
+        "London Waterloo": 0.4954954954954955,
+        "London Victoria": 0.0022522522522522522,
+        "Waterloo Station": 0.02702702702702703,
+        "Waterloo station": 0.1554054054054054,
+        "London Waterloo station": 0.08783783783783784,
+        "Waterloo main line station": 0.0022522522522522522,
+        "Waterloo National Rail station": 0.0022522522522522522,
+        "London Waterloo railway station": 0.013513513513513514,
+        "Waterloo main-line station": 0.0022522522522522522,
+        "Waterloo railway station": 0.0045045045045045045,
+        "Waterloo Bridge station": 0.009009009009009009,
+        "retail balcony": 0.0022522522522522522,
+        "Waterloo Bridge Station": 0.0022522522522522522
+    },
+    "Q148349": {
+        "Waterloo": 0.01532567049808429,
+        "London": 0.0038314176245210726,
+        "Lambeth": 0.9195402298850575,
+        "Stockwell Infants School": 0.0038314176245210726,
+        "Stangate": 0.007662835249042145,
+        "Lambeth North": 0.0038314176245210726,
+        "South Lambeth": 0.026819923371647507,
+        "Stockwell Primary School": 0.0038314176245210726,
+        "Lambeth Walk": 0.0038314176245210726,
+        "North Lambeth": 0.007662835249042145,
+        "north Lambeth": 0.0038314176245210726
+    },
+    "Q1359589": {
+        "West End": 0.8505678421996413,
+        "London": 0.004781829049611476,
+        "West End of London": 0.002390914524805738,
+        "West End theatre": 0.07053197848176927,
+        "London's West End": 0.01852958756724447,
+        "London West End": 0.0029886431560071725,
+        "theatre district": 0.0005977286312014345,
+        "West End Theatre": 0.003586371787208607,
+        "theatre area": 0.0005977286312014345,
+        "Theatre District": 0.0005977286312014345,
+        "London stage": 0.0041841004184100415,
+        "Theatreland": 0.0053795576808129105,
+        "West End productions": 0.001195457262402869,
+        "the West End": 0.002390914524805738,
+        "West End shows": 0.0017931858936043035,
+        "The West End": 0.0005977286312014345,
+        "West End drama": 0.0005977286312014345,
+        "London's theatrical world": 0.0005977286312014345,
+        "West End theater": 0.001195457262402869,
+        "West End stage": 0.0053795576808129105,
+        "West End theatre district": 0.001195457262402869,
+        "West End theatre company.": 0.0005977286312014345,
+        "West End revival": 0.0029886431560071725,
+        "West End musical": 0.002390914524805738,
+        "West End theatreland": 0.0005977286312014345,
+        "London's theatre district": 0.0005977286312014345,
+        "West End production": 0.002390914524805738,
+        "List of West End theatres": 0.0005977286312014345,
+        "West End musicals": 0.001195457262402869,
+        "West End London": 0.0005977286312014345,
+        "West End's": 0.0005977286312014345,
+        "London productions": 0.0005977286312014345,
+        "West-end stage": 0.0005977286312014345,
+        "West End hit": 0.0005977286312014345,
+        "London theatre": 0.001195457262402869,
+        "London Theatre": 0.0005977286312014345,
+        "West End.": 0.0005977286312014345,
+        "non-commercial West End theatre": 0.0005977286312014345,
+        "commercial theatre": 0.0005977286312014345,
+        "London theatres": 0.0005977286312014345,
+        "west-end": 0.0005977286312014345,
+        "theatres in the West End of London": 0.0005977286312014345,
+        "London stage production": 0.0005977286312014345
+    },
+    "Q42182": {
+        "Buckingham Palace": 0.9712820512820513,
+        "London": 0.0010256410256410256,
+        "Buckingham": 0.0010256410256410256,
+        "Buckingham House": 0.021538461538461538,
+        "Arlington House": 0.0010256410256410256,
+        "the Palace": 0.0010256410256410256,
+        "Goring House": 0.0010256410256410256,
+        "Buckingham house": 0.0010256410256410256,
+        "Royal household": 0.0010256410256410256
+    },
+    "Q1466941": {
+        "York": 0.006060606060606061,
+        "Leeds": 0.8484848484848485,
+        "Leeds City": 0.01818181818181818,
+        "Leeds railway station": 0.07878787878787878,
+        "Leeds Wellington": 0.006060606060606061,
+        "Wellington Station": 0.006060606060606061,
+        "Leeds Station": 0.006060606060606061,
+        "Leeds Wellington station": 0.006060606060606061,
+        "Leeds Wellington Station": 0.006060606060606061,
+        "Leeds station": 0.012121212121212121,
+        "Leeds First": 0.006060606060606061
+    },
+    "Q6900329": {
+        "bombing": 0.003605769230769231,
+        "London": 0.007211538461538462,
+        "East End": 0.001201923076923077,
+        "the Blitz": 0.4338942307692308,
+        "bombed": 0.003605769230769231,
+        "air raid": 0.004807692307692308,
+        "1940": 0.001201923076923077,
+        "London Blitz": 0.09375,
+        "air raids": 0.007211538461538462,
+        "The Blitz": 0.17908653846153846,
+        "Blitz": 0.12740384615384617,
+        "heavily bombed": 0.001201923076923077,
+        "aerial bombardment": 0.003605769230769231,
+        "bombs": 0.001201923076923077,
+        "German bombing": 0.00841346153846154,
+        "German bombings": 0.002403846153846154,
+        "bombing raid": 0.002403846153846154,
+        "German Luftwaffe bombing": 0.001201923076923077,
+        "blitz": 0.010817307692307692,
+        "London blitz": 0.014423076923076924,
+        "the subsequent Blitz": 0.001201923076923077,
+        "World War Two": 0.001201923076923077,
+        "bombing raids": 0.004807692307692308,
+        "nightly air raids": 0.003605769230769231,
+        "bomb damage": 0.001201923076923077,
+        "bombing London": 0.001201923076923077,
+        "bombings": 0.001201923076923077,
+        "blitzed": 0.002403846153846154,
+        "bombing of London": 0.007211538461538462,
+        "widespread destruction of its infrastructure": 0.001201923076923077,
+        "Nazi bombings of London": 0.001201923076923077,
+        "Operation Loge": 0.001201923076923077,
+        "bombing of the city": 0.001201923076923077,
+        "the 1941 Blitz": 0.001201923076923077,
+        "bombed during World War II": 0.001201923076923077,
+        "heavy aerial bombing": 0.001201923076923077,
+        "The aerial bombing of World War II": 0.001201923076923077,
+        "World War II bomb damage": 0.002403846153846154,
+        "German bombing campaign": 0.001201923076923077,
+        "bombing of the City of London": 0.001201923076923077,
+        "Blitz of 1940\u201341": 0.001201923076923077,
+        "wartime London": 0.001201923076923077,
+        "German bombing campaign of 1940\u201341": 0.001201923076923077,
+        "night bombing": 0.001201923076923077,
+        "1941 London Blitz": 0.001201923076923077,
+        "the German bombing campaign": 0.001201923076923077,
+        "the German bombing campaign over England during World War II": 0.001201923076923077,
+        "bomb damage during the Second World War": 0.001201923076923077,
+        "the Blitz of London": 0.001201923076923077,
+        "London to be razed": 0.001201923076923077,
+        "World War II bombing raid": 0.001201923076923077,
+        "bombing raids across London": 0.001201923076923077,
+        "Nazi bombing of British cities": 0.001201923076923077,
+        "1940s Blitz": 0.001201923076923077,
+        "Nazi German air raids over London": 0.001201923076923077,
+        "German \"Blitz\"": 0.001201923076923077,
+        "heavily": 0.001201923076923077,
+        "wartime bomb damage": 0.001201923076923077,
+        "German incendiary bombs": 0.002403846153846154,
+        "firebombing in the Second World War": 0.001201923076923077,
+        "night-time bombing of Britain": 0.001201923076923077,
+        "Bombing of London": 0.001201923076923077,
+        "attacking the docks and warehouses": 0.001201923076923077,
+        "German campaign of bombings": 0.001201923076923077,
+        "German bombs": 0.001201923076923077,
+        "the London Blitz": 0.002403846153846154,
+        "World War II bombing": 0.001201923076923077,
+        "was bomb-struck": 0.001201923076923077,
+        "aerial bombardment of London": 0.001201923076923077,
+        "German bombing of British cities": 0.001201923076923077,
+        "bombing British cities": 0.001201923076923077,
+        "German bomb": 0.001201923076923077,
+        "German bombing raid": 0.001201923076923077,
+        "German Blitz bombing of London": 0.001201923076923077,
+        "bombings on London": 0.001201923076923077,
+        "bombardment of 29th-30th December 1940": 0.001201923076923077,
+        "damaged by a bomb in 1940": 0.001201923076923077,
+        "aerial bombing of London": 0.001201923076923077,
+        "raid over Britain": 0.001201923076923077,
+        "Second World War bomb damage": 0.001201923076923077,
+        "bound for England": 0.001201923076923077,
+        "London bombing": 0.001201923076923077
+    },
+    "Q985210": {
+        "Victoria": 0.21700879765395895,
+        "London": 0.01759530791788856,
+        "London Victoria station": 0.07624633431085044,
+        "London Victoria": 0.44868035190615835,
+        "Victoria Station": 0.11143695014662756,
+        "London Victoria railway station": 0.002932551319648094,
+        "Victoria, London": 0.002932551319648094,
+        "Victoria station": 0.08797653958944282,
+        "Victoria railway station": 0.011730205278592375,
+        "London (Victoria)": 0.002932551319648094,
+        "London Victoria Station": 0.011730205278592375,
+        "Victoria tube station": 0.00879765395894428
+    },
+    "Q279459": {
+        "Victoria": 0.047619047619047616,
+        "London": 0.07936507936507936,
+        "Victoria Coach Station": 0.6825396825396826,
+        "London Victoria": 0.15873015873015872,
+        "London (Victoria)": 0.015873015873015872,
+        "adjacent national coach station": 0.015873015873015872
+    },
+    "Q23311": {
+        "the City": 0.027230590961761298,
+        "London": 0.033603707995365,
+        "the city": 0.0005793742757821553,
+        "city": 0.0034762456546929316,
+        "City": 0.06836616454229433,
+        "City of London": 0.839513325608343,
+        "London, England": 0.0005793742757821553,
+        "London City": 0.0005793742757821553,
+        "The City": 0.005793742757821553,
+        "The City of London": 0.0017381228273464658,
+        "London city fathers": 0.0005793742757821553,
+        "trading heart of the capital": 0.0005793742757821553,
+        "city of London": 0.006373117033603708,
+        "the City of London": 0.002317497103128621,
+        "City Bars": 0.0005793742757821553,
+        "city workers": 0.0011587485515643105,
+        "St Pauls": 0.0005793742757821553,
+        "old city center": 0.0011587485515643105,
+        "Square Mile": 0.0028968713789107765,
+        "London's financial district": 0.0005793742757821553,
+        "The Square Mile": 0.0005793742757821553,
+        "historic City boundaries": 0.0005793742757821553,
+        "London EC3": 0.0005793742757821553
+    },
+    "Q130206": {
+        "bridge": 0.01090909090909091,
+        "London": 0.007272727272727273,
+        "London Bridge": 0.9272727272727272,
+        "Roman bridge": 0.0036363636363636364,
+        "New London Bridge": 0.007272727272727273,
+        "the rebuilding of London Bridge": 0.007272727272727273,
+        "its namesake": 0.0036363636363636364,
+        "Old London Bridge": 0.01090909090909091,
+        "London-bridge": 0.0036363636363636364,
+        "the Rennie London Bridge": 0.0036363636363636364,
+        "new London Bridge": 0.0036363636363636364,
+        "London Bridge of 1209 to 1831": 0.0036363636363636364,
+        "Loddon Bridge": 0.0036363636363636364,
+        "1831 London Bridge": 0.0036363636363636364
+    },
+    "Q5338273": {
+        "Edinburgh University": 0.75,
+        "Edinburgh University Ladies": 0.25
+    },
+    "Q170027": {
+        "London": 0.04149026248941575,
+        "University of London": 0.8882303132938187,
+        "London University": 0.058425063505503805,
+        "Chancellor": 0.000846740050804403,
+        "University College, London": 0.001693480101608806,
+        "Lond.": 0.000846740050804403,
+        "University of London intercollegiate halls of residence": 0.000846740050804403,
+        "the University of London": 0.000846740050804403,
+        "Athlone Press": 0.000846740050804403,
+        "Chancellor of the University of London": 0.000846740050804403,
+        "Universities of London": 0.001693480101608806,
+        "The University of London": 0.000846740050804403,
+        "many constituent colleges": 0.000846740050804403,
+        "London University School": 0.000846740050804403,
+        "'schemes of special relations'": 0.000846740050804403
+    },
+    "Q800751": {
+        "London": 0.042105263157894736,
+        "Euston station": 0.07105263157894737,
+        "London Euston": 0.5842105263157895,
+        "Euston": 0.1631578947368421,
+        "Euston railway station": 0.07105263157894737,
+        "London terminus": 0.002631578947368421,
+        "Euston Station": 0.049999999999999996,
+        "Euston main line station": 0.002631578947368421,
+        "London Euston station": 0.005263157894736842,
+        "(Euston": 0.002631578947368421,
+        "EUS": 0.002631578947368421,
+        "Euston Railway Station": 0.002631578947368421
+    },
+    "Q214788": {
+        "London": 0.02122641509433962,
+        "Paddington": 0.16981132075471697,
+        "London Paddington station": 0.07075471698113207,
+        "Paddington Station": 0.06132075471698113,
+        "London Paddington": 0.5660377358490566,
+        "Paddington station": 0.08962264150943396,
+        "Paddington rail station": 0.0023584905660377358,
+        "Paddington railway station": 0.011792452830188678,
+        "London_Paddington": 0.0023584905660377358,
+        "London Paddington railway station": 0.0023584905660377358,
+        "London (Paddington)": 0.0023584905660377358
+    },
+    "Q1488404": {
+        "London": 0.058823529411764705,
+        "docks": 0.029411764705882353,
+        "London Docks": 0.8235294117647058,
+        "Docks": 0.029411764705882353,
+        "Hermitage entrance": 0.029411764705882353,
+        "London docks": 0.029411764705882353
+    },
+    "Q14946379": {
+        "London": 0.18518518518518517,
+        "Diocese of London": 0.5462962962962963,
+        "diocese of London": 0.12962962962962962,
+        "See of London": 0.046296296296296294,
+        "Bishops of London": 0.009259259259259259,
+        "see of London": 0.07407407407407407,
+        "London Diocesan House": 0.009259259259259259
+    },
+    "Q92561": {
+        "London": 0.3769633507853403,
+        "City of London": 0.003926701570680629,
+        "London, Ontario": 0.6020942408376964,
+        "London's": 0.0013089005235602095,
+        "Fanshawe": 0.0013089005235602095,
+        "London, Canada West": 0.002617801047120419,
+        "London (Ontario)": 0.002617801047120419,
+        "London, Ontario, Canada": 0.0013089005235602095,
+        "Eager Beaver Baseball Association": 0.002617801047120419,
+        "London Police Service": 0.0013089005235602095,
+        "Hubrey": 0.0013089005235602095,
+        "City of London, Ontario": 0.002617801047120419
+    },
+    "Q2477346": {
+        "London": 0.8461538461538463,
+        "Londres": 0.15384615384615385
+    },
+    "Q3061911": {
+        "London": 0.6923076923076923,
+        "London, Kentucky": 0.2564102564102564,
+        "London, KY": 0.05128205128205128
+    },
+    "Q1137312": {
+        "London": 0.058252427184466014,
+        "County of London": 0.9174757281553397,
+        "Inner London": 0.0048543689320388345,
+        "county of London": 0.019417475728155338
+    },
+    "Q6670323": {
+        "London": 0.2222222222222222,
+        "London District": 0.7777777777777777
+    },
+    "Q8691": {
+        "London": 0.026713124274099883,
+        "airport": 0.0011614401858304297,
+        "Heathrow Airport": 0.4796747967479675,
+        "London Heathrow": 0.10569105691056911,
+        "London Heathrow Airport": 0.15098722415795587,
+        "London, England": 0.0011614401858304297,
+        "London Airport": 0.010452961672473868,
+        "Heathrow": 0.16492450638792103,
+        "Heathrow International Airport": 0.0011614401858304297,
+        "London-Heathrow": 0.013937282229965157,
+        "London's Heathrow Airport": 0.0023228803716608595,
+        "London Airport (Heathrow)": 0.0011614401858304297,
+        "Heathrow airport": 0.011614401858304297,
+        "London\u2013Heathrow": 0.023228803716608595,
+        "Heathrow Airport Terminal 5": 0.0011614401858304297,
+        "LHR": 0.0011614401858304297,
+        "London Heathrow Airport (LHR)": 0.0011614401858304297,
+        "Philip Sherwood": 0.0011614401858304297,
+        "London Heathrow airport": 0.0011614401858304297
+    },
+    "Q1545354": {
+        "London": 0.18181818181818182,
+        "docks": 0.010101010101010102,
+        "Port of London": 0.7272727272727273,
+        "tidal part of the Thames": 0.010101010101010102,
+        "Port of London Authority": 0.010101010101010102,
+        "port of London": 0.020202020202020204,
+        "London docks": 0.010101010101010102,
+        "London's historic docks": 0.010101010101010102,
+        "enclosed dock system": 0.010101010101010102,
+        "Control Centre": 0.010101010101010102
+    },
+    "Q578794": {
+        "London": 0.03870967741935484,
+        "Virgin London Marathon": 0.01935483870967742,
+        "London Marathon": 0.9225806451612903,
+        "London Marathon Charitable Trust": 0.0064516129032258064,
+        "the London Marathon": 0.0064516129032258064,
+        "London marathon": 0.0064516129032258064
+    },
+    "Q1415441": {
+        "London": 0.014285714285714285,
+        "Southend": 0.22857142857142856,
+        "London Southend Airport": 0.2857142857142857,
+        "Rochford": 0.02857142857142857,
+        "Southend Airport": 0.22857142857142856,
+        "Southend Municipal Airport": 0.014285714285714285,
+        "Southend (Rochford)": 0.014285714285714285,
+        "London Southend": 0.05714285714285714,
+        "Southend Airport (Rochford)": 0.014285714285714285,
+        "RAF Rochford": 0.05714285714285714,
+        "London South": 0.014285714285714285,
+        "nd": 0.014285714285714285,
+        "London-Southend": 0.014285714285714285,
+        "London-Southend Airport": 0.014285714285714285
+    },
+    "Q6669759": {
+        "London": 1.0
+    },
+    "Q219867": {
+        "London": 0.025423728813559324,
+        "King's Cross": 0.2909604519774011,
+        "King's Cross station": 0.09322033898305085,
+        "Kings Cross": 0.0423728813559322,
+        "London King's Cross": 0.3559322033898305,
+        "King's Cross Station": 0.03389830508474576,
+        "London King's Cross railway station": 0.03954802259887005,
+        "King's Cross, London": 0.002824858757062147,
+        "Kings Cross railway station": 0.002824858757062147,
+        "King's Cross railway station": 0.0423728813559322,
+        "Kings Cross Station": 0.002824858757062147,
+        "London King's Cross station": 0.011299435028248588,
+        "London Kings Cross Railway Station": 0.002824858757062147,
+        "London Kings Cross": 0.031073446327683614,
+        "London Kings Cross station": 0.002824858757062147,
+        "King's Cross mainline station": 0.005649717514124294,
+        "King's Cross York Road": 0.002824858757062147,
+        "King's Cross station, London": 0.002824858757062147,
+        "King's Cross terminal": 0.002824858757062147,
+        "King\u2019s Cross station": 0.005649717514124294
+    },
+    "Q795678": {
+        "London": 0.03333333333333333,
+        "London Waterloo": 0.1,
+        "Waterloo International railway station": 0.2,
+        "Waterloo International": 0.3,
+        "Waterloo Station": 0.03333333333333333,
+        "Waterloo International terminal": 0.03333333333333333,
+        "Waterloo International station": 0.2,
+        "London terminus": 0.03333333333333333,
+        "London Waterloo International": 0.06666666666666667
+    },
+    "Q7242790": {
+        "London": 0.06896551724137931,
+        "Gay Pride": 0.034482758620689655,
+        "Pride London": 0.3793103448275862,
+        "London Pride Parade": 0.034482758620689655,
+        "Pride in London": 0.1724137931034483,
+        "London Pride": 0.13793103448275862,
+        "London Gay Pride parade": 0.034482758620689655,
+        "London Gay Pride Festival": 0.034482758620689655,
+        "Lesbian and Gay Pride": 0.034482758620689655,
+        "anti-transgender protesters at that year's London Pride event": 0.034482758620689655,
+        "London Gay Pride Week": 0.034482758620689655
+    },
+    "Q216185": {
+        "London": 0.002421307506053269,
+        "central London": 0.009685230024213076,
+        "Charing Cross": 0.9685230024213075,
+        "Charing": 0.004842615012106538,
+        "centre of London": 0.007263922518159807,
+        "official centre of London": 0.002421307506053269,
+        "London's centre point": 0.002421307506053269,
+        "Charring Cross": 0.002421307506053269
+    },
+    "Q2018322": {
+        "London": 0.03225806451612903,
+        "gay": 0.03225806451612903,
+        "Old Compton Street": 0.9354838709677419
+    },
+    "Q720102": {
+        "London": 0.015789473684210527,
+        "railway station": 0.002631578947368421,
+        "London St Pancras": 0.3131578947368421,
+        "St Pancras": 0.16578947368421051,
+        "St Pancras railway station": 0.16052631578947368,
+        "St Pancras International": 0.09736842105263158,
+        "London St Pancras International": 0.09736842105263158,
+        "St. Pancras": 0.021052631578947368,
+        "St. Pancras International": 0.007894736842105263,
+        "St Pancras International station": 0.007894736842105263,
+        "London terminus": 0.002631578947368421,
+        "London St Pancras International station": 0.002631578947368421,
+        "St Pancras Station": 0.02631578947368421,
+        "St Pancras station": 0.039473684210526314,
+        "London St. Pancras": 0.021052631578947368,
+        "a new station underneath St Pancras": 0.002631578947368421,
+        "St. Pancras Station": 0.002631578947368421,
+        "Barlow train shed": 0.002631578947368421,
+        "(St Pancras) (STP)": 0.002631578947368421,
+        "London-St Pancras": 0.002631578947368421,
+        "its international railway station": 0.002631578947368421,
+        "London St Pancras station": 0.002631578947368421
+    },
+    "Q23306": {
+        "London": 0.04216216216216216,
+        "Greater London": 0.9448648648648649,
+        "London's": 0.001081081081081081,
+        "London region": 0.005405405405405406,
+        "Greater London's": 0.003243243243243243,
+        "South East London": 0.001081081081081081,
+        "London conurbation": 0.001081081081081081,
+        "Greater London county": 0.001081081081081081
+    },
+    "Q1449564": {
+        "London": 0.7142857142857142,
+        "London station": 0.2857142857142857
+    },
+    "Q733210": {
+        "London": 0.09090909090909091,
+        "1948": 0.18181818181818182,
+        "1948 Summer Olympics": 0.09090909090909091,
+        "Olympic tournament": 0.09090909090909091,
+        "1948 Olympic Games": 0.18181818181818182,
+        "1948 Olympics": 0.09090909090909091,
+        "Olympic basketball": 0.09090909090909091,
+        "at the 1948 Summer Olympics": 0.09090909090909091,
+        "1948 Olympics in London": 0.09090909090909091
+    },
+    "Q14710970": {
+        "London": 0.7272727272727273,
+        "other community of the same name": 0.09090909090909091,
+        "London, Texas": 0.18181818181818182
+    },
+    "Q1001456": {
+        "London": 0.6071428571428571,
+        "London, Ohio": 0.39285714285714285
+    },
+    "Q503516": {
+        "London": 0.07142857142857142,
+        "Laurel": 0.17857142857142855,
+        "Laurel County": 0.5714285714285714,
+        "Laurel Counties": 0.03571428571428571,
+        "Laurel County, Kentucky": 0.14285714285714285
+    },
+    "Q8982": {
+        "London": 0.007352941176470588,
+        "City": 0.007352941176470588,
+        "London City Airport": 0.8014705882352942,
+        "City Airport": 0.007352941176470588,
+        "London City": 0.125,
+        "London-City": 0.029411764705882353,
+        "London City airport": 0.007352941176470588,
+        "London\u2013City": 0.014705882352941176
+    },
+    "Q22059065": {
+        "London": 1.0
+    },
+    "Q8712": {
+        "London": 0.012121212121212121,
+        "Luton": 0.23030303030303031,
+        "Luton Airport": 0.3151515151515151,
+        "London Luton Airport": 0.22424242424242424,
+        "the airport": 0.012121212121212121,
+        "Luton airport": 0.01818181818181818,
+        "local airport": 0.006060606060606061,
+        "London Luton": 0.12121212121212122,
+        "London\u2013Luton": 0.012121212121212121,
+        "Luton International Airport": 0.006060606060606061,
+        "London (Luton)": 0.006060606060606061,
+        "London-Luton": 0.030303030303030304,
+        "London Luton Airport Ltd": 0.006060606060606061
+    },
+    "Q20657974": {
+        "London": 1.0
+    },
+    "Q565521": {
+        "London": 0.012345679012345678,
+        "Clarence House": 0.9753086419753085,
+        "Clarence": 0.012345679012345678
+    },
+    "Q238587": {
+        "London": 0.0026595744680851063,
+        "National Portrait Gallery": 0.8031914893617021,
+        "National Portrait Gallery, London": 0.1622340425531915,
+        "in London": 0.0026595744680851063,
+        "National Portrait Gallery's": 0.0026595744680851063,
+        "National Portrait Gallery in London": 0.005319148936170213,
+        "Portrait Gallery": 0.0026595744680851063,
+        "it Gallery": 0.0026595744680851063,
+        "The National Portrait Gallery": 0.005319148936170213,
+        "National Portrait Gallery (United Kingdom)": 0.0026595744680851063,
+        "London's National Portrait Gallery": 0.0026595744680851063,
+        "National Portrait Gallery (London)": 0.005319148936170213
+    },
+    "Q122744": {
+        "London": 0.007462686567164179,
+        "Maida Vale": 0.8582089552238806,
+        "Maida Vale Studios": 0.007462686567164179,
+        "Little Venice": 0.06716417910447761,
+        "Maida Hill": 0.03731343283582089,
+        "BBC studios": 0.007462686567164179,
+        "Maida Vale studios": 0.007462686567164179,
+        "Maida Vale 4": 0.007462686567164179
+    },
+    "Q15179170": {
+        "London": 0.14285714285714285,
+        "Alexandra Palace transmitter": 0.14285714285714285,
+        "home": 0.14285714285714285,
+        "Alexandra Palace": 0.42857142857142855,
+        "the transmitter at Alexandra Palace": 0.14285714285714285
+    },
+    "Q10818": {
+        "London": 0.008097165991902834,
+        "July 7, 2005 London bombings": 0.012145748987854251,
+        "7 July 2005 London bombings": 0.4534412955465587,
+        "7": 0.004048582995951417,
+        "four coordinated terrorist attacks": 0.004048582995951417,
+        "2005 London bombings": 0.048582995951417005,
+        "series of terrorist attacks": 0.004048582995951417,
+        "terrorist attack": 0.012145748987854251,
+        "bombings": 0.008097165991902834,
+        "London underground bombing": 0.004048582995951417,
+        "bombings on the underground and bus systems": 0.004048582995951417,
+        "7 July Review Committee": 0.004048582995951417,
+        "a series of four suicide bombings": 0.004048582995951417,
+        "London bombings": 0.05668016194331984,
+        "7/7": 0.020242914979757085,
+        "terrorist bombings in London": 0.004048582995951417,
+        "7 July 2005 London Bombings": 0.004048582995951417,
+        "bombs exploded": 0.004048582995951417,
+        "day's London bombings": 0.004048582995951417,
+        "7 July 2005 bombing": 0.004048582995951417,
+        "7 July 2005 terrorist attacks": 0.004048582995951417,
+        "\"7/7\" bombings": 0.004048582995951417,
+        "terrorist bombings": 0.004048582995951417,
+        "July 2005": 0.008097165991902834,
+        "July\u00a07, 2005": 0.004048582995951417,
+        "7/7 London bombings": 0.012145748987854251,
+        "terrorist attacks on London Transport": 0.004048582995951417,
+        "7/7 bombings, London": 0.004048582995951417,
+        "London bombings, 2005": 0.004048582995951417,
+        "'7/7'": 0.004048582995951417,
+        "7/7 terror attackers": 0.004048582995951417,
+        "7 July bombings": 0.004048582995951417,
+        "London bombings in July 2005": 0.004048582995951417,
+        "7/7 terrorist attacks": 0.008097165991902834,
+        "London in 2005": 0.004048582995951417,
+        "London Bombings": 0.004048582995951417,
+        "7 July terrorist attacks": 0.004048582995951417,
+        "bus-bombing of 7 July 2005": 0.004048582995951417,
+        "2005 London terrorist bombings": 0.004048582995951417,
+        "7 July London bombings": 0.020242914979757085,
+        "7 July": 0.016194331983805668,
+        "2005 London Bombings": 0.004048582995951417,
+        "London bombings of 7 July 2005": 0.008097165991902834,
+        "7 July 2005 bombings": 0.012145748987854251,
+        "videotaped statements of the 7 July 2005 London bombers": 0.004048582995951417,
+        "two terrorist bombings": 0.008097165991902834,
+        "7 July 2005": 0.024291497975708502,
+        "7 July 2005 bombers": 0.004048582995951417,
+        "London bombings on 7 July 2005": 0.004048582995951417,
+        "terrorist bombings in London on July 7, 2005": 0.004048582995951417,
+        "July London bombings": 0.004048582995951417,
+        "7/7 bombings": 0.012145748987854251,
+        "July 7 bombings": 0.008097165991902834,
+        "7 July terror attacks": 0.004048582995951417,
+        "July 2005 London bombings": 0.004048582995951417,
+        "7 July 2005 London bombers": 0.004048582995951417,
+        "July 7, 2005 bombings": 0.004048582995951417,
+        "London Underground bombings of July 7, 2005": 0.004048582995951417,
+        "July 7 London bombings": 0.008097165991902834,
+        "2005 London Tube bombings": 0.004048582995951417,
+        "suicide bombers of 7/7": 0.004048582995951417,
+        "three bombs exploded": 0.004048582995951417,
+        "terrorist attacks in London": 0.008097165991902834,
+        "terrorist bombings in London on 7 July 2005": 0.004048582995951417,
+        "2005 London Underground bombings": 0.004048582995951417,
+        "2005 terrorist bombings in London": 0.004048582995951417,
+        "July 7th London bombings": 0.004048582995951417,
+        "London bombings in 2005": 0.004048582995951417,
+        "7 July 2005 London bombing": 0.004048582995951417,
+        "bomb attacks in London": 0.004048582995951417,
+        "London bombings of July 7, 2005": 0.004048582995951417,
+        "bombings on London's transport system": 0.004048582995951417,
+        "7 July 2005 attacks on London": 0.004048582995951417,
+        "7 July 2005 bombings in London": 0.004048582995951417,
+        "London 7/7": 0.004048582995951417,
+        "7/7 terrorist attacks in London": 0.004048582995951417,
+        "7/7 London": 0.004048582995951417
+    },
+    "Q649419": {
+        "London": 0.013157894736842105,
+        "Marylebone": 0.256578947368421,
+        "London Marylebone": 0.5789473684210527,
+        "Marylebone station": 0.11184210526315788,
+        "Marylebone Station": 0.02631578947368421,
+        "Marylebone Railway Station": 0.006578947368421052,
+        "London (Marylebone)": 0.006578947368421052
+    },
+    "Q15242653": {
+        "London": 0.06666666666666667,
+        "London Museum": 0.9333333333333333
+    },
+    "Q6669738": {
+        "London": 1.0
+    },
+    "Q756819": {
+        "London": 0.003125,
+        "the Strand": 0.134375,
+        "Strand": 0.596875,
+        "The Strand": 0.15625,
+        "Strand, London": 0.08125,
+        "the street": 0.003125,
+        "the Strand in London": 0.003125,
+        "The Strand, London": 0.0125,
+        "the street in London": 0.003125,
+        "142 Strand, London": 0.003125,
+        "West Strand, London": 0.003125
+    },
+    "Q7443327": {
+        "London": 0.0625,
+        "Second Great Fire of London": 0.8125,
+        "blitz on London": 0.0625,
+        "one of the greatest fires in London's history": 0.0625
+    },
+    "Q4642035": {
+        "London": 0.16666666666666666,
+        "Baker Street": 0.16666666666666666,
+        "64 Baker Street": 0.6666666666666666
+    },
+    "Q729177": {
+        "London": 0.11764705882352941,
+        "Cleopatra's Needle": 0.8823529411764706
+    },
+    "Q1399178": {
+        "London": 0.125,
+        "Fazl Mosque": 0.75,
+        "mosque in London": 0.125
+    },
+    "Q5645763": {
+        "London": 0.14285714285714285,
+        "Hammersmith": 0.14285714285714285,
+        "Hammersmith bus station": 0.7142857142857142
+    },
+    "Q801124": {
+        "London": 0.015463917525773196,
+        "London Liverpool Street": 0.538659793814433,
+        "Liverpool Street": 0.21649484536082475,
+        "Liverpool Street station": 0.17010309278350516,
+        "Liverpool Street Station": 0.030927835051546393,
+        "Bishopsgate station": 0.005154639175257732,
+        "Liverpool Street railway station": 0.010309278350515464,
+        "Liverpool Street Railway station": 0.002577319587628866,
+        "London Liverpool Street station": 0.007731958762886598,
+        "London Liverpool Street Station": 0.002577319587628866
+    },
+    "Q7737135": {
+        "London": 0.16666666666666666,
+        "assay office": 0.16666666666666666,
+        "the Goldsmiths' Company Assay Office": 0.16666666666666666,
+        "The Goldsmiths' Company Assay Office": 0.5
+    },
+    "Q4834838": {
+        "London": 0.006211180124223602,
+        "Radio London": 0.037267080745341616,
+        "GLR": 0.05590062111801242,
+        "Greater London Radio": 0.043478260869565216,
+        "BBC London 94.9": 0.18012422360248445,
+        "BBC Radio London": 0.5714285714285714,
+        "BBC Greater London Radio": 0.006211180124223602,
+        "BBC London Live": 0.012422360248447204,
+        "BBC GLR": 0.08074534161490683,
+        "BBC Radio London 94.9": 0.006211180124223602
+    },
+    "Q17509255": {
+        "London": 0.09090909090909091,
+        "Chiswell Street": 0.9090909090909092
+    },
+    "Q800753": {
+        "London": 0.01020408163265306,
+        "Fenchurch Street": 0.4183673469387755,
+        "Fenchurch Street railway station": 0.1530612244897959,
+        "Fenchurch Street station": 0.0510204081632653,
+        "London Fenchurch Street": 0.32653061224489793,
+        "Fenchurch Street railway line": 0.01020408163265306,
+        "Fenchurch Street Station": 0.030612244897959183
+    },
+    "Q6671078": {
+        "London": 0.12,
+        "London's successful bid": 0.12,
+        "2012 Olympics and Paralympics": 0.04,
+        "London 2012": 0.04,
+        "London bid committee": 0.04,
+        "London bid for the 2012 Summer Olympics": 0.12,
+        "winning bid for the 2012 Olympic Games": 0.04,
+        "2012 Olympics bid": 0.04,
+        "London to host the 2012 Olympics": 0.04,
+        "London 2012 Olympic bid": 0.16,
+        "London 2012 Olympic Bid": 0.04,
+        "successful bid": 0.04,
+        "2012 Olympic bid": 0.04,
+        "London's 2012 Olympic bid": 0.04,
+        "London won the bid for the 2012 Summer Olympics": 0.04,
+        "London's Olympic and Paralympic bid": 0.04
+    },
+    "Q186309": {
+        "London": 0.005813953488372093,
+        "Orlando": 0.005813953488372093,
+        "Madame Tussauds": 0.8546511627906976,
+        "Madame Tussaud's": 0.05232558139534883,
+        "Madame Tussauds Sydney": 0.005813953488372093,
+        "Madame Tussauds Wax Museums": 0.005813953488372093,
+        "Madame Tussaud Wax Museum": 0.005813953488372093,
+        "an exhibition": 0.005813953488372093,
+        "Chamber of Horrors": 0.011627906976744186,
+        "Madame Tussaud's Wax Museum": 0.005813953488372093,
+        "Madame Tussauds Wax Works": 0.005813953488372093,
+        "Madame Tussauds Wax Museum": 0.005813953488372093,
+        "Madame Tussaud's wax museum": 0.005813953488372093,
+        "Madame Tussauds London": 0.011627906976744186,
+        "Tussaud": 0.011627906976744186
+    },
+    "Q212883": {
+        "London": 0.3333333333333333,
+        "diving": 0.6666666666666666
+    },
+    "Q195436": {
+        "London": 0.004366812227074236,
+        "Tate Modern": 0.004366812227074236,
+        "Tate Gallery": 0.08733624454148471,
+        "Tate": 0.021834061135371178,
+        "Tate Britain": 0.8253275109170305,
+        "The Tate": 0.004366812227074236,
+        "Tate Gallery, London": 0.004366812227074236,
+        "Clore Gallery": 0.008733624454148471,
+        "National Gallery of British Art": 0.013100436681222707,
+        "Millbank Gallery": 0.004366812227074236,
+        "Tate Library": 0.004366812227074236,
+        "Tate Britain, London": 0.004366812227074236,
+        "Tate Britain art gallery": 0.004366812227074236,
+        "Tate collections": 0.004366812227074236,
+        "Tate Archive": 0.004366812227074236
+    },
+    "Q5038252": {
+        "London": 1.0
+    },
+    "Q83609": {
+        "London": 0.008,
+        "Acton": 0.856,
+        "Acton, West London": 0.016,
+        "Acton, London": 0.10400000000000001,
+        "East Acton": 0.008,
+        "Acton hospital": 0.008
+    },
+    "Q79348": {
+        "London": 1.0
+    },
+    "Q4801470": {
+        "London": 0.024390243902439025,
+        "Arts Educational Schools, London": 0.12195121951219512,
+        "ArtsEd": 0.0975609756097561,
+        "Arts Educational Schools": 0.1951219512195122,
+        "The Arts Educational Schools": 0.04878048780487805,
+        "Arts Educational School": 0.3902439024390244,
+        "the Arts Educational School": 0.024390243902439025,
+        "Arts Educational School in London": 0.024390243902439025,
+        "Arts Educational Schools London": 0.04878048780487805,
+        "Arts Educational Trust School": 0.024390243902439025
+    },
+    "Q220198": {
+        "London": 0.0055248618784530384,
+        "Zoological Society of London": 0.7458563535911602,
+        "London Society": 0.0055248618784530384,
+        "Zoological Society": 0.08839779005524862,
+        "Zoological Gardens": 0.0055248618784530384,
+        "Zoological": 0.027624309392265192,
+        "London Zoological Society": 0.03867403314917127,
+        "Regent's Park Zoological Gardens in London": 0.0055248618784530384,
+        "FZS": 0.055248618784530384,
+        "Fellow of the Zoological Society": 0.0055248618784530384,
+        "Zoological Society of London (ZSL)": 0.0055248618784530384,
+        "Zoological societies": 0.0055248618784530384,
+        "F.Z.S.": 0.0055248618784530384
+    },
+    "Q124234": {
+        "London": 0.009615384615384616,
+        "St James's": 0.701923076923077,
+        "St. James's": 0.19230769230769232,
+        "St James": 0.009615384615384616,
+        "Parish of St James": 0.009615384615384616,
+        "St James's Parish": 0.009615384615384616,
+        "Regent Street St. James (Lower Regent Street)": 0.009615384615384616,
+        "St James, London": 0.009615384615384616,
+        "St James Place": 0.009615384615384616,
+        "Regent Street St James (Lower Regent Street)": 0.009615384615384616,
+        "St James's, London": 0.009615384615384616,
+        "St. James's, London": 0.009615384615384616,
+        "St.\u00a0James's": 0.009615384615384616
+    },
+    "Q23298": {
+        "London": 0.0004050222762251924,
+        "English county": 0.0004050222762251924,
+        "Kent": 0.9732685297691374,
+        "County": 0.0004050222762251924,
+        "Kent County": 0.0004050222762251924,
+        "Kent, England": 0.005265289590927502,
+        "modern county": 0.0004050222762251924,
+        "the County of Kent": 0.0004050222762251924,
+        "County of Kent": 0.014985824220332119,
+        "Garden of England": 0.0004050222762251924,
+        "West Kent": 0.0004050222762251924,
+        "county of Kent": 0.0008100445524503848,
+        "Kent County, England": 0.0008100445524503848,
+        "County Kent": 0.0004050222762251924,
+        "Kent (England)": 0.0004050222762251924,
+        "County Of Kent": 0.0004050222762251924,
+        "Kent, England.": 0.0004050222762251924
+    },
+    "Q1431914": {
+        "London": 0.011904761904761904,
+        "Croydon": 0.16666666666666666,
+        "Croydon Airport": 0.6785714285714285,
+        "Airport House": 0.011904761904761904,
+        "Croydon airfield": 0.011904761904761904,
+        "Croydon Airfield": 0.011904761904761904,
+        "Croydon airport": 0.023809523809523808,
+        "original London Airport at Croydon": 0.011904761904761904,
+        "Croydon Aerodrome": 0.03571428571428571,
+        "London's international airport": 0.011904761904761904,
+        "RAF Croydon": 0.023809523809523808
+    },
+    "Q835031": {
+        "London": 0.0625,
+        "German Embassy": 0.0625,
+        "German Ambassador": 0.1875,
+        "German Embassy in London": 0.125,
+        "German embassy in London": 0.0625,
+        "German ambassador to the United Kingdom": 0.0625,
+        "Embassy of Germany": 0.0625,
+        "German Ambassador to the United Kingdom": 0.0625,
+        "London embassy": 0.0625,
+        "Embassy of the Federal Republic of Germany": 0.0625,
+        "German ambassador to Britain": 0.0625,
+        "Embassy of Germany to the U.K.": 0.0625,
+        "German ambassador": 0.0625
+    },
+    "Q1323689": {
+        "London": 0.016666666666666666,
+        "London International Film Festival": 0.041666666666666664,
+        "London Film Festival": 0.6666666666666666,
+        "BFI London Film Festival": 0.24166666666666667,
+        "British Film Institute Awards": 0.008333333333333333,
+        "London Film Fest": 0.008333333333333333,
+        "The Times BFI London Film Festival": 0.016666666666666666
+    },
+    "Q7594521": {
+        "London": 0.16666666666666666,
+        "St Mary's Roman Catholic Church": 0.3333333333333333,
+        "Our Immaculate Lady of Victories": 0.3333333333333333,
+        "Clapham parish": 0.16666666666666666
+    },
+    "Q26888": {
+        "London": 0.006756756756756757,
+        "Croydon": 0.2702702702702703,
+        "London Borough of Croydon": 0.6486486486486487,
+        "Croydon Council": 0.0472972972972973,
+        "London Borough": 0.006756756756756757,
+        "borough of Croydon": 0.006756756756756757,
+        "London Boroughs of Croydon": 0.006756756756756757,
+        "Borough of Croydon": 0.006756756756756757
+    },
+    "Q8709": {
+        "London": 0.007751937984496124,
+        "Stansted Airport": 0.3178294573643411,
+        "London Stansted": 0.13953488372093023,
+        "London Stansted Airport": 0.25193798449612403,
+        "Stansted": 0.2131782945736434,
+        "London-Stansted": 0.031007751937984496,
+        "proposed expansion": 0.003875968992248062,
+        "London\u2013Stansted": 0.023255813953488372,
+        "London's third airport": 0.003875968992248062,
+        "London Stansted Airport's": 0.003875968992248062,
+        "London-Stansted Airport": 0.003875968992248062
+    },
+    "Q1402606": {
+        "London": 0.0625,
+        "Neasden Temple": 0.375,
+        "BAPS Shri Swaminarayan Mandir London": 0.25,
+        "Swaminarayan Temple in Neasden": 0.0625,
+        "Neasden temple": 0.0625,
+        "Shri Swaminarayan Mandir": 0.0625,
+        "BAPS mandir in Neasden": 0.0625,
+        "Hindu Temple in Neasden": 0.0625
+    },
+    "Q278054": {
+        "London": 0.14285714285714285,
+        "Diocese of London": 0.2857142857142857,
+        "Roman Catholic Diocese of London": 0.14285714285714285,
+        "Bishop of London, Ontario": 0.42857142857142855
+    },
+    "Q801125": {
+        "London": 0.005291005291005291,
+        "London Bridge": 0.7777777777777777,
+        "London Bridge Station": 0.05291005291005291,
+        "London Bridge station": 0.12169312169312169,
+        "London Bridge railway station": 0.037037037037037035,
+        "redevelopment of London Bridge": 0.005291005291005291
+    },
+    "Q6669870": {
+        "London": 0.05555555555555555,
+        "London Book Fair": 0.8888888888888888,
+        "exclusion of dissident Chinese writers from the London Book Fair 2012": 0.05555555555555555
+    },
+    "Q1666958": {
+        "London": 0.03571428571428571,
+        "London International Surrealist Exhibition": 0.5357142857142857,
+        "International Surrealist Exhibition": 0.3214285714285714,
+        "1936 Surrealist Exhibition in London": 0.03571428571428571,
+        "International Exhibition of Surrealism": 0.03571428571428571,
+        "International Surrealist Exhibition in London": 0.03571428571428571
+    },
+    "Q5011830": {
+        "London": 0.25,
+        "CIQM-FM": 0.75
+    },
+    "Q772421": {
+        "London": 0.025,
+        "St George's": 0.05,
+        "St. George's University": 0.025,
+        "St George's Hospital": 0.025,
+        "St. George's Medical School": 0.025,
+        "St George's, University of London": 0.375,
+        "St George's Hospital Medical School": 0.275,
+        "St. George's Hospital": 0.025,
+        "St George's Medical School": 0.025,
+        "Grosvenor School of Medicine": 0.025,
+        "St George's (University of London)": 0.025,
+        "St George's Hospita": 0.025,
+        "St. George's Hospital Medical School": 0.05,
+        "St. George's University of London": 0.025
+    },
+    "Q1749569": {
+        "London": 0.16666666666666666,
+        "Ny-London": 0.8333333333333333
+    },
+    "Q60578265": {
+        "London": 0.2857142857142857,
+        "City": 0.14285714285714285,
+        "City of London": 0.5714285714285714
+    },
+    "Q897533": {
+        "Bramall Lane": 0.9253731343283582,
+        "Sheffield": 0.03731343283582089,
+        "John Street Stand": 0.007462686567164179,
+        "corner infill stand": 0.007462686567164179,
+        "Bramall Lane Stadium": 0.007462686567164179,
+        "Brammall Lane": 0.014925373134328358
+    },
+    "Q21061609": {
+        "Headingley": 0.8634146341463415,
+        "Leeds": 0.058536585365853655,
+        "cricket ground": 0.00975609756097561,
+        "Headingley, Leeds": 0.004878048780487805,
+        "Headingley Cricket Ground": 0.058536585365853655,
+        "home county ground": 0.004878048780487805
+    },
+    "Q1187032": {
+        "Headingley": 0.6170212765957447,
+        "Leeds": 0.06382978723404255,
+        "Headingley Stadium": 0.2978723404255319,
+        "Emerald Headingley Stadium": 0.02127659574468085
+    },
+    "Q39121": {
+        "Leeds": 0.9868680236375573,
+        "Leeds city centre": 0.0006565988181221273,
+        "Cross Gates": 0.0006565988181221273,
+        "Leeds, England": 0.004596191726854892,
+        "city of Leeds": 0.0013131976362442547,
+        "Lord Mayor of Leeds": 0.0013131976362442547,
+        "Leeds, Yorkshire": 0.0026263952724885093,
+        "Leeds, West Yorkshire": 0.0006565988181221273,
+        "Leeds, United Kingdom": 0.0006565988181221273,
+        "Leeds, UK": 0.0006565988181221273
+    },
+    "Q1128631": {
+        "Leeds": 0.020114942528735632,
+        "Leeds United": 0.9310344827586207,
+        "Leeds United A.F.C.": 0.012452107279693486,
+        "Leeds United F.C.": 0.01532567049808429,
+        "Leeds United Football Club": 0.008620689655172414,
+        "Leeds United FC": 0.008620689655172414,
+        "Leeds United's": 0.0028735632183908046,
+        "Leeds United's greatest team": 0.0009578544061302681
+    },
+    "Q774015": {
+        "Leeds": 0.20930232558139533,
+        "City of Leeds": 0.7209302325581395,
+        "Leeds City Council": 0.011627906976744186,
+        "Leeds City": 0.011627906976744186,
+        "City of Leeds Metropolitan District": 0.023255813953488372,
+        "City of Leeds Borough": 0.011627906976744186,
+        "Greater Leeds": 0.011627906976744186
+    },
+    "Q503424": {
+        "Leeds": 0.06382978723404255,
+        "University": 0.00425531914893617,
+        "University of Leeds": 0.7468085106382979,
+        "Institute of Education": 0.002127659574468085,
+        "Leeds University": 0.1595744680851064,
+        "Yorkshire College": 0.006382978723404256,
+        "Leeds University Business School": 0.002127659574468085,
+        "Leeds School of Architecture": 0.002127659574468085,
+        "Yorkshire College, Leeds": 0.00425531914893617,
+        "The University of Leeds": 0.002127659574468085,
+        "University campus": 0.002127659574468085,
+        "Yorkshire College of Science": 0.002127659574468085,
+        "Leeds Institute of Education": 0.002127659574468085
+    },
+    "Q1137962": {
+        "Leeds": 0.004310344827586207,
+        "Reading": 0.05818965517241379,
+        "Leeds Festival": 0.036637931034482756,
+        "Reading Festival": 0.41594827586206895,
+        "Reading and Leeds Festivals": 0.30387931034482757,
+        "Reading and Leeds Festival": 0.05603448275862069,
+        "Windsor Jazz and Blues Festival": 0.0021551724137931034,
+        "Reading and Leeds": 0.01939655172413793,
+        "Reading & Leeds": 0.0021551724137931034,
+        "Carling Weekend": 0.023706896551724137,
+        "Reading Rock Festival": 0.00646551724137931,
+        "Reading and Leeds festival": 0.01293103448275862,
+        "2010 Reading and Leeds Festivals": 0.0021551724137931034,
+        "Reading Music Festival": 0.010775862068965518,
+        "Reading festival": 0.004310344827586207,
+        "Carling Festival": 0.00646551724137931,
+        "Reading and Leeds festivals": 0.01939655172413793,
+        "The Reading and Leeds Festivals": 0.0021551724137931034,
+        "Reading and Leeds music festivals": 0.0021551724137931034,
+        "Reading/Leeds Festivals": 0.0021551724137931034,
+        "The Carling Weekend": 0.004310344827586207,
+        "Reading/Leeds": 0.0021551724137931034,
+        "Reading And Leeds": 0.0021551724137931034
+    },
+    "Q6515934": {
+        "Leeds": 0.375,
+        "Leeds City bus station": 0.625
+    },
+    "Q4834918": {
+        "Leeds": 0.06060606060606061,
+        "BBC Radio Leeds": 0.8484848484848485,
+        "BBC Leeds": 0.030303030303030304,
+        "Radio Leeds": 0.06060606060606061
+    },
+    "Q7721041": {
+        "Leeds": 1.0
+    },
+    "Q482468": {
+        "Leeds": 1.0
+    },
+    "Q2460124": {
+        "Leeds": 0.6666666666666666,
+        "Leeds, Kent": 0.3333333333333333
+    },
+    "Q79869": {
+        "Leeds": 0.5833333333333333,
+        "Leeds, Alabama": 0.41666666666666663
+    },
+    "Q746876": {
+        "Leeds": 0.08333333333333333,
+        "Leeds Castle": 0.9166666666666666
+    },
+    "Q6515805": {
+        "Leeds": 0.5,
+        "neighborhood of Leeds": 0.125,
+        "Leeds, Massachusetts": 0.375
+    },
+    "Q3461415": {
+        "Leeds": 1.0
+    },
+    "Q2365261": {
+        "Leeds": 1.0
+    },
+    "Q7746609": {
+        "Leeds": 0.05263157894736842,
+        "studios": 0.10526315789473684,
+        "The Leeds Studios": 0.47368421052631576,
+        "the Leeds Studios": 0.05263157894736842,
+        "Leeds Studios": 0.2631578947368421,
+        "Kirkstall Road studios": 0.05263157894736842
+    },
+    "Q14875251": {
+        "Leeds": 0.2,
+        "Leeds County": 0.7000000000000001,
+        "Leeds County, Ontario": 0.1
+    },
+    "Q6515927": {
+        "Leeds": 0.1111111111111111,
+        "Leeds City Region": 0.7777777777777777,
+        "Leeds City Region Partnership": 0.1111111111111111
+    },
+    "Q5177618": {
+        "Leeds": 0.14285714285714285,
+        "Leeds Corporation": 0.14285714285714285,
+        "County Borough of Leeds": 0.7142857142857142
+    },
+    "Q27985411": {
+        "Leeds": 1.0
+    },
+    "Q6515866": {
+        "Leeds": 0.3333333333333333,
+        "Leeds Central": 0.6666666666666666
+    },
+    "Q871138": {
+        "Leeds": 0.15384615384615385,
+        "Diocese of Leeds": 0.38461538461538464,
+        "Roman Catholic Diocese of Leeds": 0.46153846153846156
+    },
+    "Q4763489": {
+        "Leeds": 0.029411764705882353,
+        "Diocese of Leeds": 0.7647058823529411,
+        "Anglican Diocese of Leeds": 0.11764705882352941,
+        "of Leeds": 0.058823529411764705,
+        "Leeds diocese": 0.029411764705882353
+    },
+    "Q4871546": {
+        "Leeds": 1.0
+    },
+    "Q24896243": {
+        "Leeds": 0.25,
+        "Elland Road Greyhound Stadium": 0.75
+    },
+    "Q489255": {
+        "Leeds": 0.007462686567164179,
+        "Sioux City": 0.4888059701492537,
+        "Sioux City, Iowa": 0.5,
+        "larger neighbor": 0.0037313432835820895
+    },
+    "Q3228965": {
+        "Leeds": 0.022727272727272728,
+        "Leeds Arts University": 0.20454545454545456,
+        "Leeds School of Art": 0.06818181818181818,
+        "Leeds College of Art": 0.4772727272727273,
+        "Jacob Kramer College": 0.11363636363636365,
+        "Leeds College of Art (Leeds Arts University)": 0.022727272727272728,
+        "Leeds Arts": 0.022727272727272728,
+        "Leeds College of Art and Design": 0.045454545454545456,
+        "Jacob Kramer College of Art": 0.022727272727272728
+    },
+    "Q209266": {
+        "Leeds": 0.6666666666666666,
+        "Leeds Junction": 0.08333333333333333,
+        "Leeds, Maine": 0.25
+    },
+    "Q42448": {
+        "Sheffield": 0.9401459854014598,
+        "Sheffield sound": 0.00072992700729927,
+        "City of Sheffield": 0.03941605839416058,
+        "home town": 0.00072992700729927,
+        "Sheffield, England": 0.012408759124087591,
+        "Sheffield, South Yorkshire": 0.00145985401459854,
+        "Sheffield, Yorkshire, England": 0.00072992700729927,
+        "City and County Borough of Sheffield": 0.00072992700729927,
+        "Sheffield, West Riding of Yorkshire": 0.00145985401459854,
+        "Sheffield, Yorkshire": 0.00072992700729927,
+        "Sheffield District": 0.00072992700729927,
+        "Sheffield's": 0.00072992700729927
+    },
+    "Q7492778": {
+        "Sheffield": 0.3448275862068966,
+        "Sheffield Victoria railway station": 0.13793103448275862,
+        "Sheffield Victoria": 0.41379310344827586,
+        "Sheffield Victoria station": 0.06896551724137931,
+        "Sheffield Victoria Station": 0.034482758620689655
+    },
+    "Q7492565": {
+        "Sheffield": 1.0
+    },
+    "Q1862179": {
+        "Sheffield": 0.7341772151898734,
+        "station": 0.006329113924050633,
+        "Pond Street": 0.006329113924050633,
+        "Sheffield station": 0.09493670886075949,
+        "Sheffield Station": 0.0189873417721519,
+        "Sheffield Railway Station": 0.006329113924050633,
+        "Sheffield Midland station": 0.0189873417721519,
+        "Sheffield railway station": 0.06962025316455696,
+        "Sheffield Midland Station": 0.012658227848101266,
+        "Sheffield Midland": 0.02531645569620253,
+        "Sheffield (Midland)": 0.006329113924050633
+    },
+    "Q823917": {
+        "Sheffield": 0.0851063829787234,
+        "University of Sheffield": 0.7234042553191489,
+        "The University of Sheffield": 0.014184397163120567,
+        "Sheffield University": 0.14893617021276595,
+        "the University of Sheffield": 0.0035460992907801418,
+        "universities of Sheffield": 0.0035460992907801418,
+        "Sheffield University's": 0.0035460992907801418,
+        "Tapton Hall of Residence": 0.0035460992907801418,
+        "University College of Sheffield": 0.0035460992907801418,
+        "Firth College": 0.0070921985815602835,
+        "Sheffield University.": 0.0035460992907801418
+    },
+    "Q4834926": {
+        "Sheffield": 0.08571428571428572,
+        "BBC Radio Sheffield": 0.8285714285714285,
+        "Radio Sheffield": 0.08571428571428572
+    },
+    "Q17643392": {
+        "Sheffield": 0.05263157894736842,
+        "Manor House": 0.05263157894736842,
+        "Sheffield Manor Lodge": 0.15789473684210525,
+        "Sheffield Manor": 0.5789473684210527,
+        "Manor Lodge": 0.15789473684210525
+    },
+    "Q2306176": {
+        "Sheffield": 0.5740740740740741,
+        "Sheffield, Massachusetts": 0.35185185185185186,
+        "Ashley Falls": 0.037037037037037035,
+        "Ashley Falls, Massachusetts": 0.037037037037037035
+    },
+    "Q7492570": {
+        "Sheffield": 1.0
+    },
+    "Q1950928": {
+        "Sheffield": 0.6470588235294118,
+        "Sheffield, Vermont": 0.29411764705882354,
+        "Sheffield Heights": 0.058823529411764705
+    },
+    "Q2277715": {
+        "Sheffield": 0.7857142857142857,
+        "Sheffield, Tasmania": 0.21428571428571427
+    },
+    "Q79568": {
+        "Sheffield": 0.48484848484848486,
+        "Sheffield, Alabama": 0.5151515151515151
+    },
+    "Q518864": {
+        "Sheffield": 0.7272727272727273,
+        "Sheffield, Illinois": 0.2727272727272727
+    },
+    "Q7492591": {
+        "Sheffield": 0.30000000000000004,
+        "Sheffield Blitz": 0.7000000000000001
+    },
+    "Q7492775": {
+        "Sheffield": 0.2,
+        "Sheffield Township": 0.8
+    },
+    "Q741640": {
+        "Sheffield": 0.25,
+        "Wheel of Sheffield": 0.75
+    },
+    "Q7492686": {
+        "Sheffield": 0.125,
+        "Sheffield Interchange": 0.75,
+        "the bus interchange": 0.125
+    },
+    "Q3577611": {
+        "Sheffield": 0.1,
+        "Sheffield Lock": 0.5,
+        "Sheffield Bottom": 0.4
+    },
+    "Q12956644": {
+        "Sheffield": 0.34375,
+        "City of Sheffield": 0.59375,
+        "district borough": 0.03125,
+        "Sheffield Council": 0.03125
+    },
+    "Q547824": {
+        "Sheffield": 0.047619047619047616,
+        "HMS \"Sheffield\"": 0.38095238095238093,
+        "\"Sheffield\"": 0.047619047619047616,
+        "HMS Sheffield": 0.5238095238095237
+    },
+    "Q7492719": {
+        "Sheffield": 1.0
+    },
+    "Q7492566": {
+        "Sheffield": 1.0
+    },
+    "Q7492567": {
+        "Sheffield": 1.0
+    },
+    "Q4523493": {
+        "Sheffield": 0.125,
+        "Sheffield Built-up Area": 0.125,
+        "Sheffield built-up area": 0.25,
+        "Sheffield urban area": 0.5
+    },
+    "Q3028626": {
+        "Sheffield": 0.08333333333333333,
+        "Diocese of Sheffield": 0.75,
+        "diocese of Sheffield": 0.16666666666666666
+    },
+    "Q7492607": {
+        "Sheffield": 0.030303030303030304,
+        "city centre": 0.09090909090909091,
+        "quarters": 0.030303030303030304,
+        "City Centre": 0.06060606060606061,
+        "Sheffield City Centre": 0.42424242424242425,
+        "Sheffield city centre": 0.33333333333333337,
+        "Sheffield's city centre": 0.030303030303030304
+    },
+    "Q3365926": {
+        "Sheffield": 0.5714285714285714,
+        "Sheffield, New Brunswick": 0.42857142857142855
+    },
+    "Q7492568": {
+        "Sheffield": 1.0
+    },
+    "Q108940076": {
+        "Sheffield": 0.75,
+        "village of Sheffield": 0.25
+    },
+    "Q1184547": {
+        "Sheffield": 0.8181818181818182,
+        "Sheffield, PA": 0.18181818181818182
+    },
+    "Q1984238": {
+        "Sheffield": 0.6153846153846154,
+        "Sheffield, Ohio": 0.38461538461538464
+    }
+}
\ No newline at end of file
diff --git a/tests/test_data_processing.py b/tests/test_data_processing.py
index 238edb66..6b5570ad 100644
--- a/tests/test_data_processing.py
+++ b/tests/test_data_processing.py
@@ -6,11 +6,13 @@
 from pathlib import Path
 
 import pandas as pd
+import pytest
 
-large_resources = "/resources/"  # path to large resources
-small_resources = "resources/"  # path to small resources
-processed_path_lwm = "experiments/outputs/data/lwm/"  # path to processed LwM data
-processed_path_hipe = "experiments/outputs/data/hipe/"  # path to processed LwM data
+current_dir = Path(__file__).parent.resolve()
+
+small_resources = os.path.join(current_dir,"sample_files/resources/")  # path to small resources
+processed_path_lwm = os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/")  # path to processed LwM data
+processed_path_hipe = os.path.join(current_dir,"sample_files/experiments/outputs/data/hipe/")  # path to processed LwM data
 
 
 def test_publication_metadata_exists():
@@ -53,8 +55,8 @@ def test_original_lwm_data():
     train_metadata = pd.read_csv(path_train_metadata, sep="\t")
     test_metadata = pd.read_csv(path_test_metadata, sep="\t")
     # Assert the size of the metadata files:
-    assert train_metadata.shape[0] == 343
-    assert test_metadata.shape[0] == 112
+    assert train_metadata.shape[0] == 1
+    assert test_metadata.shape[0] == 1    
     assert train_metadata.shape[1] == 10
     assert test_metadata.shape[1] == 10
     # Items in metadata match number of files in directory, for test:
@@ -98,8 +100,8 @@ def test_lwm_ner_conversion_fine():
         dtype={"id": str},
     )
     # Assert size of the train and dev sets:
-    assert df_ner_train.shape == (5216, 3)
-    assert df_ner_dev.shape == (1304, 3)
+    assert df_ner_train.shape == (141, 3)
+    assert df_ner_dev.shape == (41, 3)
     # Assert number of sentences in train and dev (length of list and set should be the same):
     assert (
         len(list(df_ner_train["id"]) + list(df_ner_dev["id"]))
@@ -107,45 +109,11 @@ def test_lwm_ner_conversion_fine():
         == df_ner_train.shape[0] + df_ner_dev.shape[0]
     )
     # Assert ID is read as string:
-    assert type(df_ner_train["id"].iloc[0]) == str
+    assert isinstance(df_ner_train["id"].iloc[0],str)
     # Assert number of unique articles:
     train_articles = [x.split("_")[0] for x in list(df_ner_train["id"])]
     dev_articles = [x.split("_")[0] for x in list(df_ner_dev["id"])]
-    assert len(set(train_articles + dev_articles)) == 343
-
-
-def test_lwm_ner_conversion_coarse():
-    """
-    Test process_lwm_for_ner is not missing articles.
-    """
-    df_ner_train = pd.read_json(
-        os.path.join(f"{processed_path_lwm}", "ner_coarse_train.json"),
-        orient="records",
-        lines=True,
-        dtype={"id": str},
-    )
-    df_ner_dev = pd.read_json(
-        os.path.join(f"{processed_path_lwm}", "ner_coarse_dev.json"),
-        orient="records",
-        lines=True,
-        dtype={"id": str},
-    )
-    # Assert size of the train and dev sets:
-    assert df_ner_train.shape == (5216, 3)
-    assert df_ner_dev.shape == (1304, 3)
-    # Assert number of sentences in train and dev (length of list and set should be the same):
-    assert (
-        len(list(df_ner_train["id"]) + list(df_ner_dev["id"]))
-        == len(set(list(df_ner_train["id"]) + list(df_ner_dev["id"])))
-        == df_ner_train.shape[0] + df_ner_dev.shape[0]
-    )
-    # Assert ID is read as string:
-    assert type(df_ner_train["id"].iloc[0]) == str
-    # Assert number of unique articles:
-    train_articles = [x.split("_")[0] for x in list(df_ner_train["id"])]
-    dev_articles = [x.split("_")[0] for x in list(df_ner_dev["id"])]
-    assert len(set(train_articles + dev_articles)) == 343
-
+    assert len(set(train_articles + dev_articles)) == 11
 
 def test_lwm_linking_conversion():
     """
@@ -156,26 +124,26 @@ def test_lwm_linking_conversion():
         sep="\t",
     )
     # Assert size of the dataset (i.e. number of articles):
-    assert df_linking.shape[0] == 455
+    assert df_linking.shape[0] == 14
     # Assert if place has been filled correctly:
     for x in df_linking.place:
-        assert type(x) == str
+        assert isinstance(x,str)
         assert x != ""
     # Assert if place QID has been filled correctly:
     for x in df_linking.place_wqid:
-        assert type(x) == str
+        assert isinstance(x,str)
         assert x != ""
     for x in df_linking.annotations:
         x = literal_eval(x)
         for ann in x:
             assert ann["wkdt_qid"] == "NIL" or ann["wkdt_qid"].startswith("Q")
-    assert df_linking[df_linking["originalsplit"] == "train"].shape[0] == 229
-    assert df_linking[df_linking["originalsplit"] == "dev"].shape[0] == 114
-    assert df_linking[df_linking["originalsplit"] == "test"].shape[0] == 112
-    assert df_linking[df_linking["withouttest"] == "train"].shape[0] == 153
-    assert df_linking[df_linking["withouttest"] == "dev"].shape[0] == 76
-    assert df_linking[df_linking["withouttest"] == "test"].shape[0] == 114
-    assert df_linking[df_linking["withouttest"] == "left_out"].shape[0] == 112
+    assert df_linking[df_linking["originalsplit"] == "train"].shape[0] == 10
+    assert df_linking[df_linking["originalsplit"] == "dev"].shape[0] == 2
+    assert df_linking[df_linking["originalsplit"] == "test"].shape[0] == 2
+    assert df_linking[df_linking["withouttest"] == "train"].shape[0] == 8
+    assert df_linking[df_linking["withouttest"] == "dev"].shape[0] == 2
+    assert df_linking[df_linking["withouttest"] == "test"].shape[0] == 2
+    assert df_linking[df_linking["withouttest"] == "left_out"].shape[0] == 2
     test_withouttest = set(
         list(df_linking[df_linking["withouttest"] == "test"].article_id)
     )
@@ -185,7 +153,7 @@ def test_lwm_linking_conversion():
     # Test articles of the original split and without test should not overlap:
     assert not (test_withouttest & test_originalsplit)
 
-
+@pytest.mark.skip(reason="Requires HIPE data")
 def test_hipe_linking_conversion():
     """
     Test process_hipe_for_linking is not missing articles.
@@ -211,11 +179,11 @@ def test_hipe_linking_conversion():
     assert not (test_withouttest & test_originalsplit)
     # Assert if place has been filled correctly:
     for x in df_linking.place:
-        assert type(x) == str
+        assert isinstance(x,str)
         assert x != ""
     # Assert if place QID has been filled correctly:
     for x in df_linking.place_wqid:
-        assert type(x) == str
+        assert isinstance(x,str)
         assert x != ""
     # Do HIPE stats match https://github.com/hipe-eval/HIPE-2022-data/blob/main/notebooks/hipe2022-datasets-stats.ipynb
     number_locs = 0
diff --git a/tests/test_deezy.py b/tests/test_deezy.py
new file mode 100644
index 00000000..a72fa630
--- /dev/null
+++ b/tests/test_deezy.py
@@ -0,0 +1,44 @@
+import os
+from pathlib import Path
+
+import pytest
+from DeezyMatch import candidate_ranker
+
+current_dir = Path(__file__).parent.resolve()
+
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_deezy_match_deezy_candidate_ranker(tmp_path):
+    deezy_parameters = {
+        # Paths and filenames of DeezyMatch models and data:
+        "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
+        "dm_cands": "wkdtalts",
+        "dm_model": "w2v_ocr",
+        "dm_output": "deezymatch_on_the_fly",
+        # Ranking measures:
+        "ranking_metric": "faiss",
+        "selection_threshold": 50,
+        "num_candidates": 1,
+        "verbose": False,
+        # DeezyMatch training:
+        "overwrite_training": False,
+        "do_test": False,
+    }
+
+    dm_path = deezy_parameters["dm_path"]
+    dm_cands = deezy_parameters["dm_cands"]
+    dm_model = deezy_parameters["dm_model"]
+    dm_output = deezy_parameters["dm_output"]
+
+    query = ["-", "ST G", "• - , i", "- P", "• FERRIS"]
+
+    candidates = candidate_ranker(
+        candidate_scenario=os.path.join(dm_path, "combined", dm_cands + "_" + dm_model),
+        query=query,
+        ranking_metric=deezy_parameters["ranking_metric"],
+        selection_threshold=deezy_parameters["selection_threshold"],
+        num_candidates=deezy_parameters["num_candidates"],
+        search_size=deezy_parameters["num_candidates"],
+        verbose=deezy_parameters["verbose"],
+        output_path=os.path.join(tmp_path,dm_output),
+    )
+    assert len(candidates) == len(query)
diff --git a/tests/test_disambiguation.py b/tests/test_disambiguation.py
index 1a206f2f..f9ab0372 100644
--- a/tests/test_disambiguation.py
+++ b/tests/test_disambiguation.py
@@ -3,22 +3,24 @@
 import sys
 from pathlib import Path
 
+import pytest
 import pandas as pd
+import pytest
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import linking, pipeline, ranking, recogniser
-from utils import rel_utils
-from utils.REL import entity_disambiguation
+from t_res.geoparser import linking, pipeline, ranking, recogniser
+from t_res.utils import rel_utils
+from t_res.utils.REL import entity_disambiguation
 
+current_dir = Path(__file__).parent.resolve()
 
+@pytest.mark.skip(reason="Needs large db file")
 def test_embeddings():
     """
     Test embeddings are loaded correctly.
     """
     # Test 1: Check glove embeddings
     mentions = ["in", "apple"]
-    with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
         cursor = conn.cursor()
         embs = rel_utils.get_db_emb(cursor, mentions, "snd")
         assert len(mentions) == len(embs)
@@ -43,29 +45,18 @@ def test_embeddings():
         embs = rel_utils.get_db_emb(cursor, mentions, "entity")
         assert embs == [None]
 
-
-def test_prepare_initial_data():
-    df = pd.read_csv(
-        "experiments/outputs/data/lwm/linking_df_split.tsv", sep="\t"
-    ).iloc[:1]
-    parsed_doc = rel_utils.prepare_initial_data(df)
-    assert parsed_doc["4939308_1"][0]["mention"] == "STALYBRIDGE"
-    assert parsed_doc["4939308_1"][0]["gold"][0] == "Q1398653"
-    assert parsed_doc["4939308_6"][1]["mention"] == "Market-street"
-    assert parsed_doc["4939308_6"][1]["gold"] == "NIL"
-
-
-def test_train():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_train(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
+        model="ner_test",  # NER model name prefix (will have suffixes appended)
         pipe=None,  # We'll store the NER pipeline here
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        model_path=str(tmp_path),  # Path where the NER model is or will be stored
         training_args={
             "batch_size": 8,
-            "num_train_epochs": 10,
+            "num_train_epochs": 1,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
@@ -76,7 +67,7 @@ def test_train():
 
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -85,13 +76,13 @@ def test_train():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/w2v/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
+            "w2v_ocr_path": str(tmp_path),
+            "w2v_ocr_model": "w2v_1800_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -105,16 +96,16 @@ def test_train():
             "do_test": False,
         },
     )
-    with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
-        cursor = conn.cursor()
 
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
+        cursor = conn.cursor()
         mylinker = linking.Linker(
             method="reldisamb",
-            resources_path="resources/",
+            resources_path=os.path.join(current_dir,"sample_files/resources/"),
             linking_resources=dict(),
             rel_params={
-                "model_path": "resources/models/disambiguation/",
-                "data_path": "experiments/outputs/data/lwm/",
+                "model_path": os.path.join(current_dir,"sample_files/resources/models/disambiguation/"),
+                "data_path": os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm"),
                 "training_split": "originalsplit",
                 "db_embeddings": cursor,
                 "with_publication": False,
@@ -154,18 +145,18 @@ def test_train():
     # assert expected performance on test set
     assert mylinker.rel_params["ed_model"].best_performance["f1"] == 0.6288416075650118
 
-
-def test_load_eval_model():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_load_eval_model(tmp_path):
     myner = recogniser.Recogniser(
         model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
         pipe=None,  # We'll store the NER pipeline here
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        model_path=str(tmp_path),  # Path where the NER model is or will be stored
         training_args={
             "batch_size": 8,
-            "num_train_epochs": 10,
+            "num_train_epochs": 1,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
@@ -176,7 +167,7 @@ def test_load_eval_model():
 
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -185,13 +176,13 @@ def test_load_eval_model():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/w2v/").resolve()),
+            "w2v_ocr_path": str(tmp_path),
             "w2v_ocr_model": "w2v_*_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -206,18 +197,16 @@ def test_load_eval_model():
         },
     )
 
-    with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
         cursor = conn.cursor()
-
         mylinker = linking.Linker(
             method="reldisamb",
-            resources_path="resources/",
+            resources_path=os.path.join(current_dir,"sample_files/resources/"),
             linking_resources=dict(),
             rel_params={
-                "model_path": "resources/models/disambiguation/",
-                "data_path": "experiments/outputs/data/lwm/",
+                "model_path": os.path.join(current_dir,"sample_files/resources/models/disambiguation/"),
+                "data_path": os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm"),
                 "training_split": "originalsplit",
-                "topn_candidates": 10,
                 "db_embeddings": cursor,
                 "with_publication": False,
                 "without_microtoponyms": False,
@@ -253,19 +242,19 @@ def test_load_eval_model():
         == entity_disambiguation.EntityDisambiguation
     )
 
-
-def test_predict():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_predict(tmp_path):
     myner = recogniser.Recogniser(
         model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
         pipe=None,  # We'll store the NER pipeline here
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        model_path=str(tmp_path),  # Path where the NER model is or will be stored
         training_args={
             "learning_rate": 5e-5,
             "batch_size": 16,
-            "num_train_epochs": 4,
+            "num_train_epochs": 1,
             "weight_decay": 0.01,
         },
         overwrite_training=False,  # Set to True if you want to overwrite model if existing
@@ -275,7 +264,7 @@ def test_predict():
 
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -284,13 +273,13 @@ def test_predict():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/w2v/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
+            "w2v_ocr_path": str(tmp_path),
+            "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -304,23 +293,21 @@ def test_predict():
             "do_test": False,
         },
     )
-    with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
-        cursor = conn.cursor()
 
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
+        cursor = conn.cursor()
         mylinker = linking.Linker(
             method="reldisamb",
-            resources_path="resources/",
+            resources_path=os.path.join(current_dir,"sample_files/resources/"),
             linking_resources=dict(),
             rel_params={
-                "model_path": "resources/models/disambiguation/",
-                "data_path": "experiments/outputs/data/lwm/",
+                "model_path": os.path.join(current_dir,"sample_files/resources/models/disambiguation/"),
+                "data_path": os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm"),
                 "training_split": "originalsplit",
                 "db_embeddings": cursor,
                 "with_publication": True,
                 "without_microtoponyms": True,
                 "do_test": False,
-                "default_publname": "United Kingdom",
-                "default_publwqid": "Q145",
             },
             overwrite_training=False,
         )
@@ -332,7 +319,7 @@ def test_predict():
         place="London",
         place_wqid="Q84",
     )
-    assert type(predictions) == list
+    assert isinstance(predictions,list)
 
     assert predictions[1]["prediction"] in predictions[1]["cross_cand_score"]
 
diff --git a/tests/test_experiments.py b/tests/test_experiments.py
index 825e29db..87fc6fd2 100644
--- a/tests/test_experiments.py
+++ b/tests/test_experiments.py
@@ -1,23 +1,25 @@
 import os
 import sys
 from ast import literal_eval
+from pathlib import Path
 
 import pandas as pd
 import pytest
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
+# Add "../" to path to import experiment
+current_dir = Path(__file__).parent.resolve()
+sys.path.insert(0, os.path.join(current_dir,"../"))
 from experiments import experiment
-from geoparser import linking, ranking, recogniser
 
+from t_res.geoparser import linking, ranking, recogniser
 
-def test_wrong_dataset_path():
+def test_experiments_wrong_dataset_path(tmp_path):
     with pytest.raises(SystemExit) as cm:
         experiment.Experiment(
             dataset="lwm",
             data_path="wrong_path/",
             dataset_df=pd.DataFrame(),
-            results_path="experiments/outputs/results/",
+            results_path=str(tmp_path),
             myner="test",
             myranker="test",
             mylinker="test",
@@ -30,8 +32,8 @@ def test_wrong_dataset_path():
     )
 
 
-def test_load_data():
-    data = pd.read_csv("experiments/outputs/data/lwm/linking_df_split.tsv", sep="\t")
+def test_load_data(tmp_path):
+    data = pd.read_csv(os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/linking_df_split.tsv"), sep="\t")
     ids = set()
 
     for idx, row in data.iterrows():
@@ -41,29 +43,34 @@ def test_load_data():
             ids.add(str(article_id) + "_" + str(sent["sentence_pos"]))
 
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
-        pipe=None,  # We'll store the NER pipeline here
-        base_model="khosseini/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
-        training_args=dict(),
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
+        base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
+        model_path=str(tmp_path),  # Path where the NER model will be stored
+        training_args={
+            "batch_size": 8,
+            "num_train_epochs": 1,
+            "learning_rate": 0.00005,
+            "weight_decay": 0.0,
+        },
         overwrite_training=False,  # Set to True if you want to overwrite model if existing
         do_test=False,  # Set to True if you want to train on test mode
-        load_from_hub=False,
+        load_from_hub=False,  
     )
 
     # Instantiate the ranker:
     myranker = ranking.Ranker(
         method="perfectmatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     # --------------------------------------
     # Instantiate the linker:
     mylinker = linking.Linker(
         method="mostpopular",
-        resources_path="resources/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     myner.train()
@@ -78,9 +85,9 @@ def test_load_data():
     # Instantiate the experiment:
     exp = experiment.Experiment(
         dataset="lwm",
-        data_path="experiments/outputs/data/",
+        data_path=os.path.join(current_dir,"sample_files/experiments/outputs/data/"),
         dataset_df=pd.DataFrame(),
-        results_path="experiments/outputs/results/",
+        results_path=str(tmp_path),
         myner=myner,
         myranker=myranker,
         mylinker=mylinker,
@@ -121,18 +128,18 @@ def test_load_data():
         assert len(not_empty_dMentionsPred) == len(not_empty_dCandidates)
 
 
-def test_wrong_ranker_method():
+def test_wrong_ranker_method(tmp_path):
     ranker = ranking.Ranker(
         # wrong naming: it should be perfectmatch
         method="perfect_match",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     exp = experiment.Experiment(
         dataset="lwm",
-        data_path="experiments/outputs/data/",
+        data_path=os.path.join(current_dir,"sample_files/experiments/outputs/data/"),
         dataset_df=pd.DataFrame(),
-        results_path="experiments/outputs/results/",
+        results_path=str(tmp_path),
         myner="test",
         myranker=ranker,
         mylinker="test",
@@ -142,31 +149,36 @@ def test_wrong_ranker_method():
     assert cm.value.code == 0
 
 
-def test_apply():
+def test_apply(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
-        pipe=None,  # We'll store the NER pipeline here
-        base_model="khosseini/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
-        training_args=dict(),
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
+        base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
+        model_path=str(tmp_path),  # Path where the NER model will be stored
+        training_args={
+            "batch_size": 8,
+            "num_train_epochs": 1,
+            "learning_rate": 0.00005,
+            "weight_decay": 0.0,
+        },
         overwrite_training=False,  # Set to True if you want to overwrite model if existing
         do_test=False,  # Set to True if you want to train on test mode
-        load_from_hub=False,
+        load_from_hub=False,  
     )
 
     # Instantiate the ranker:
     myranker = ranking.Ranker(
         method="perfectmatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     # --------------------------------------
     # Instantiate the linker:
     mylinker = linking.Linker(
         method="mostpopular",
-        resources_path="resources/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     myner.train()
@@ -181,9 +193,9 @@ def test_apply():
     # Instantiate the experiment:
     exp = experiment.Experiment(
         dataset="lwm",
-        data_path="experiments/outputs/data/",
+        data_path=os.path.join(current_dir,"sample_files/experiments/outputs/data/"),
         dataset_df=pd.DataFrame(),
-        results_path="experiments/outputs/results/",
+        results_path=str(tmp_path),
         myner=myner,
         myranker=myranker,
         mylinker=mylinker,
diff --git a/tests/test_linking.py b/tests/test_linking.py
index c233ecc8..30587416 100644
--- a/tests/test_linking.py
+++ b/tests/test_linking.py
@@ -1,33 +1,19 @@
 import os
 import sqlite3
 import sys
+from pathlib import Path
 
 import numpy as np
+import pytest
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import linking
+from t_res.geoparser import linking
 
+current_dir = Path(__file__).parent.resolve()
 
-def test_initialise_method():
-    """
-    Test initialisation works fine
-    """
+def test_linking_most_popular():
     mylinker = linking.Linker(
         method="mostpopular",
-        resources_path="resources/",
-        linking_resources=dict(),
-        rel_params=dict(),
-        overwrite_training=False,
-    )
-
-    assert type(mylinker.__str__()) == str
-
-
-def test_most_popular():
-    mylinker = linking.Linker(
-        method="mostpopular",
-        resources_path="resources/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         linking_resources=dict(),
         rel_params=dict(),
         overwrite_training=False,
@@ -39,8 +25,8 @@ def test_most_popular():
     }
     keep_most_popular, final_score, candidates = mylinker.most_popular(dict_mention)
     assert keep_most_popular == "Q84"
-    assert final_score == 0.9895689976719958
-    assert candidates == {"Q84": 0.9895689976719958, "Q92561": 0.01043100232800422}
+    assert final_score == 0.9812731647051174
+    assert candidates == {"Q84": 0.9812731647051174, "Q92561": 0.018726835294882633}
 
     dict_mention = {"candidates": {}}
     keep_most_popular, final_score, candidates = mylinker.most_popular(dict_mention)
@@ -52,7 +38,7 @@ def test_most_popular():
 def test_by_distance():
     mylinker = linking.Linker(
         method="bydistance",
-        resources_path="resources/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         linking_resources=dict(),
         rel_params=dict(),
         overwrite_training=False,
@@ -60,6 +46,7 @@ def test_by_distance():
 
     mylinker.load_resources()
 
+    #test it finds London, UK
     dict_mention = {
         "candidates": {
             "London": {"Candidates": {"Q84": 0.9, "Q92561": 0.1}, "Score": 0.397048}
@@ -71,20 +58,22 @@ def test_by_distance():
     assert final_score == 0.824
     assert "Q84" in resulting_cands
 
+    #test it finds London, CA
     dict_mention = {
         "candidates": {
             "London": {"Candidates": {"Q84": 0.9, "Q92561": 0.1}, "Score": 0.397048}
         },
-        "place_wqid": "Q172",
+        "place_wqid": "Q92561",
     }
     pred, final_score, resulting_cands = mylinker.by_distance(dict_mention)
     assert pred == "Q92561"
-    assert final_score == 0.54
+    assert final_score == 0.624
     assert "Q84" in resulting_cands
 
+    #check it finds none
     dict_mention = {
         "candidates": {"London": {"Candidates": {}, "Score": 0.397048}},
-        "place_wqid": "Q172",
+        "place_wqid": "Q2365261",
     }
     pred, final_score, resulting_cands = mylinker.by_distance(dict_mention)
     assert pred == "NIL"
diff --git a/tests/test_ner.py b/tests/test_ner.py
index b8a9718c..746e636a 100644
--- a/tests/test_ner.py
+++ b/tests/test_ner.py
@@ -1,190 +1,129 @@
 import os
-import shutil
-import sys
+from pathlib import Path
+import pytest
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-import transformers
+from transformers.pipelines.token_classification import TokenClassificationPipeline
 
-from geoparser import recogniser
-from utils import ner
+from t_res.geoparser import recogniser
+from t_res.utils import ner
 
+current_dir = Path(__file__).parent.resolve()
 
-def test_training():
-    """
-    Test that running train() generates a model folder
-    """
-
-    test_folder_path = "resources/models/blb_lwm-ner-coarse_test.model"
-
-    if os.path.isdir(test_folder_path):
-        shutil.rmtree(test_folder_path)
-
+def test_ner_local_train(tmp_path):
+    model_path = os.path.join(tmp_path,"ner_test.model")
+    
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-coarse",  # NER model name prefix (will have suffixes appended)
-        base_model="Livingwithmachines/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_coarse_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_coarse_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model="ner_test",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        base_model="Livingwithmachines/bert_1760_1900", 
+        model_path=f"{tmp_path}/",
         training_args={
             "batch_size": 8,
             "num_train_epochs": 10,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
-        overwrite_training=True,  # Set to True if you want to overwrite model if existing
-        do_test=True,  # Set to True if you want to train on test mode
+        overwrite_training=False,
+        do_test=False,
         load_from_hub=False,
     )
-    assert os.path.isdir(test_folder_path) == False
+    assert os.path.exists(model_path) is False
     myner.train()
-    assert os.path.isdir(test_folder_path) == True
-
-
-def test_create_pipeline():
-    """
-    Test that create_pipeline returns a model folder path that exists and an Pipeline object
-    """
-    myner = recogniser.Recogniser(
-        model="blb_lwm-ner-coarse",  # NER model name prefix (will have suffixes appended)
-        base_model="Livingwithmachines/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
-        training_args={
-            "batch_size": 8,
-            "num_train_epochs": 10,
-            "learning_rate": 0.00005,
-            "weight_decay": 0.0,
-        },
-        overwrite_training=False,  # Set to True if you want to overwrite model if existing
-        do_test=True,  # Set to True if you want to train on test mode
-        load_from_hub=False,
-    )
-    pipe = myner.create_pipeline()
-    assert (
-        type(pipe)
-        == transformers.pipelines.token_classification.TokenClassificationPipeline
-    )
-
+    print(model_path)
+    print(os.listdir(tmp_path))
+    assert os.path.exists(model_path) is True
 
+@pytest.mark.skip(reason="Needs large model file")
 def test_ner_predict():
+    model_path = os.path.join(current_dir,"sample_files/resources/models/ner_test.model")
+    assert os.path.isdir(model_path) is True
+
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
-        base_model="Livingwithmachines/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model="ner_test",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        base_model="Livingwithmachines/bert_1760_1900", 
+        model_path=os.path.join(current_dir,"sample_files/resources/models/"),
         training_args={
             "batch_size": 8,
             "num_train_epochs": 10,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
-        overwrite_training=False,  # Set to True if you want to overwrite model if existing
-        do_test=False,  # Set to True if you want to train on test mode
-        load_from_hub=False,
+        overwrite_training=False,
+        do_test=False,
+        load_from_hub=False, # Whether the final model should be loaded from the HuggingFace hub"
     )
     myner.pipe = myner.create_pipeline()
+    assert isinstance(myner.pipe, TokenClassificationPipeline)
 
-    preds = myner.ner_predict(
-        "I grew up in Bologna, a city near Florence, but way more interesting."
-    )
-    assert type(preds) == list
-    assert (type(preds[0])) == dict
-    assert len(preds) == 16
-    assert preds[4]["entity"] == "B-LOC"
-    assert preds[4]["score"] == 0.9994915723800659
+    sentence = "A remarkable case of rattening has just occurred in the building trade at Sheffield."
+    predictions = myner.ner_predict(sentence)
+    assert isinstance(predictions, list)
+    assert len(predictions) == 15
+    assert predictions[13] == {'entity': 'B-LOC', 'score': 0.7941257357597351, 'word': 'Sheffield', 'start': 74, 'end': 83}
 
     # Test that ner_predict() can handle hyphens
-    preds = myner.ner_predict("- I grew up in Plymouth—Kingston.")
-    assert preds[0]["word"] == "-"
-    assert preds[6]["word"] == ","
+    sentence = "- I grew up in Plymouth—Kingston."
+    predictions = myner.ner_predict(sentence)
+    assert predictions[0]["word"] == "-"
+    assert predictions[6]["word"] == ","
 
 
-def test_ner_load_from_hub():
+def test_ner_from_hub():
     myner = recogniser.Recogniser(
         model="Livingwithmachines/toponym-19thC-en",
         load_from_hub=True,
     )
-    pipe = myner.create_pipeline()
-    assert (
-        type(pipe)
-        == transformers.pipelines.token_classification.TokenClassificationPipeline
-    )
+    myner.train()
+    myner.pipe = myner.create_pipeline()
+    assert isinstance(myner.pipe, TokenClassificationPipeline)
+    
+    sentence = "A remarkable case of rattening has just occurred in the building trade at Sheffield."
+    predictions = myner.ner_predict(sentence)
+    assert isinstance(predictions, list)
+    assert len(predictions) == 15
+    assert predictions[13] == {'entity': 'B-LOC', 'score': 0.9996446371078491, 'word': 'Sheffield', 'start': 74, 'end': 83}
 
 
 def test_aggregate_mentions():
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # NER model name prefix (will have suffixes appended)
-        base_model="Livingwithmachines/bert_1760_1900",  # Base model to fine-tune (from huggingface)
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
-        training_args={
-            "batch_size": 8,
-            "num_train_epochs": 10,
-            "learning_rate": 0.00005,
-            "weight_decay": 0.0,
-        },
-        overwrite_training=False,  # Set to True if you want to overwrite model if existing
-        do_test=False,  # Set to True if you want to train on test mode
-        load_from_hub=False,
+        model="Livingwithmachines/toponym-19thC-en",
+        load_from_hub=True,
     )
     myner.pipe = myner.create_pipeline()
-
+    
     sentence = "I grew up in Bologna, a city near Florence, but way more interesting."
     predictions = myner.ner_predict(sentence)
     # Process predictions:
     procpreds = [
-        [x["word"], x["entity"], "O", x["start"], x["end"], x["score"]]
+        [x["word"], x["entity"], "O", x["start"], x["end"]]
         for x in predictions
     ]
     # Aggregate mentions:
     mentions = ner.aggregate_mentions(procpreds, "pred")
-    assert mentions[0]["mention"] == "Bologna"
+    assert len(mentions) == 2
     assert mentions[1]["mention"] == "Florence"
+    assert mentions[0] == {'mention': 'Bologna', 'start_offset': 4, 'end_offset': 4, 'start_char': 13, 'end_char': 20, 'ner_score': 20.0, 'ner_label': 'LOC', 'entity_link': 'O'}
     assert mentions[0]["end_char"] - mentions[0]["start_char"] == len(
         mentions[0]["mention"]
     )
-    assert mentions[1]["end_char"] - mentions[1]["start_char"] == len(
-        mentions[1]["mention"]
-    )
     assert mentions[0]["mention"] in sentence
-    assert mentions[1]["mention"] in sentence
-
-    sentence = "I grew up in New York City, a city in the United States."
-    predictions = myner.ner_predict(sentence)
-    # Process predictions:
-    procpreds = [
-        [x["word"], x["entity"], "O", x["start"], x["end"], x["score"]]
-        for x in predictions
-    ]
-    # Aggregate mentions:
-    mentions = ner.aggregate_mentions(procpreds, "pred")
-    assert mentions[0]["mention"] == "New York City"
-    assert mentions[1]["mention"] == "United States"
-    assert mentions[0]["end_char"] - mentions[0]["start_char"] == len(
-        mentions[0]["mention"]
-    )
-    assert mentions[1]["end_char"] - mentions[1]["start_char"] == len(
-        mentions[1]["mention"]
-    )
-    assert mentions[0]["mention"] in sentence
-    assert mentions[1]["mention"] in sentence
 
     sentence = "ARMITAGE, DEM’TIST, may be consulted dally, from 9 a.m., till 8 p.m., at his residence, 95, STAMFORP-9TKEET, Ashton-cnder-Ltne."
     predictions = myner.ner_predict(sentence)
     # Process predictions:
     procpreds = [
-        [x["word"], x["entity"], "O", x["start"], x["end"], x["score"]]
+        [x["word"], x["entity"], "O", x["start"], x["end"]]
         for x in predictions
     ]
     # Aggregate mentions:
     mentions = ner.aggregate_mentions(procpreds, "pred")
-    assert mentions[-1]["mention"] == "Ashton-cnder-Ltne"
-    for i in range(len(mentions)):
-        assert mentions[i]["end_char"] - mentions[i]["start_char"] == len(
-            mentions[i]["mention"]
+    assert len(mentions) == 2
+    assert mentions[1]["mention"] == "Ashton-cnder-Ltne"
+    assert mentions[0] == {'mention': 'STAMFORP-9TKEET', 'start_offset': 31, 'end_offset': 33, 'start_char': 92, 'end_char': 107, 'ner_score': 102.667, 'ner_label': 'STREET', 'entity_link': 'O'}
+    assert mentions[0]["end_char"] - mentions[0]["start_char"] == len(
+            mentions[0]["mention"]
         )
-        assert mentions[i]["mention"] in sentence
+    assert mentions[0]["mention"] in sentence
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 19643665..274d7267 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -1,23 +1,57 @@
 import os
 import sqlite3
-import sys
 from pathlib import Path
 
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import linking, pipeline, ranking, recogniser
+import pytest
 
+from t_res.geoparser import linking, pipeline, ranking, recogniser
 
-def test_deezy_mostpopular():
+current_dir = Path(__file__).parent.resolve()
+
+def test_pipeline_basic():
+    geoparser = pipeline.Pipeline(
+        resources_path=os.path.join(current_dir,"sample_files/resources")
+    )
+
+    sentence = "A remarkable case of rattening has just occurred in the building trade at Sheffield."
+    resolved = geoparser.run_text(sentence)
+    assert len(resolved)==1
+    assert resolved[0]["mention"]=="Sheffield"
+    assert resolved[0]["ner_score"]==1.0
+    assert resolved[0]["prediction"]=="Q42448"
+
+def test_pipeline_modular():
+    myranker = ranking.Ranker(
+        method="perfectmatch",
+        resources_path=os.path.join(current_dir,"sample_files/resources"),
+    )
+    
+    mylinker = linking.Linker(
+        method="mostpopular",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
+    )
+
+    geoparser = pipeline.Pipeline(myranker=myranker, mylinker=mylinker)
+    
+    sentence = "A remarkable case of rattening has just occurred in the building trade at Sheffield."
+    resolved = geoparser.run_text(sentence)
+    assert len(resolved)==1
+    assert resolved[0]["mention"]=="Sheffield"
+    assert resolved[0]["ner_score"]==1.0
+    assert resolved[0]["prediction"]=="Q42448"
+
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_deezy_mostpopular(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # We'll store the NER model here
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        pipe=None,  # We'll store the NER pipeline here
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model_path=str(tmp_path),  # Path where the NER model will be stored
         training_args={
             "batch_size": 8,
-            "num_train_epochs": 10,
+            "num_train_epochs": 1,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
@@ -28,7 +62,7 @@ def test_deezy_mostpopular():
 
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -37,13 +71,13 @@ def test_deezy_mostpopular():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
+            "w2v_ocr_path": os.path.join(tmp_path,"resources/models/"),
+            "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -60,14 +94,16 @@ def test_deezy_mostpopular():
 
     mylinker = linking.Linker(
         method="mostpopular",
-        resources_path="resources/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     geoparser = pipeline.Pipeline(myner=myner, myranker=myranker, mylinker=mylinker)
+    assert len(geoparser.myranker.mentions_to_wikidata.keys())>0
 
     resolved = geoparser.run_text(
-        "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Lancaster. Not in Nottingham though. Not in Ashton either, nor in Salop!",
+        "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Leeds. Not in London though.",
     )
+    assert len(resolved) == 3
     assert resolved[0]["mention"] == "Shefiield"
     assert resolved[0]["prior_cand_score"] == dict()
     assert resolved[0]["cross_cand_score"]["Q42448"] == 0.903
@@ -84,20 +120,19 @@ def test_deezy_mostpopular():
 
     # asserting behaviour with • character
     resolved = geoparser.run_text(
-        " • - ST G pOllO-P• FERRIS - • - , i ",
+        " • - S G pOllO-P• FERRIS - • - , i ",
     )
-
     assert resolved == []
 
-
-def test_deezy_rel_wpubl_wmtops():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_deezy_rel_wpubl_wmtops(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # We'll store the NER model here
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        pipe=None,  # We'll store the NER pipeline here
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model_path=str(tmp_path),  # Path where the NER model will be stored
         training_args={
             "batch_size": 8,
             "num_train_epochs": 10,
@@ -113,7 +148,7 @@ def test_deezy_rel_wpubl_wmtops():
     # Instantiate the ranker:
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -122,13 +157,13 @@ def test_deezy_rel_wpubl_wmtops():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/w2v/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
+            "w2v_ocr_path": str(tmp_path),
+            "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -143,15 +178,15 @@ def test_deezy_rel_wpubl_wmtops():
         },
     )
 
-    with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
         cursor = conn.cursor()
         mylinker = linking.Linker(
             method="reldisamb",
-            resources_path="resources/",
+            resources_path=os.path.join(current_dir,"sample_files/resources/"),
             linking_resources=dict(),
             rel_params={
-                "model_path": "resources/models/disambiguation/",
-                "data_path": "experiments/outputs/data/lwm/",
+                "model_path": os.path.join(current_dir,"sample_files/resources/models/disambiguation/"),
+                "data_path": os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/"),
                 "training_split": "originalsplit",
                 "db_embeddings": cursor,
                 "with_publication": True,
@@ -166,11 +201,12 @@ def test_deezy_rel_wpubl_wmtops():
     geoparser = pipeline.Pipeline(myner=myner, myranker=myranker, mylinker=mylinker)
 
     resolved = geoparser.run_text(
-        "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Lancaster. Not in Nottingham though. Not in Ashton either, nor in Salop!",
+        "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Leeds. Not in London though.",
         place="Sheffield",
         place_wqid="Q42448",
     )
 
+    assert len(resolved) == 3
     assert resolved[0]["mention"] == "Shefiield"
     assert resolved[0]["prior_cand_score"]["Q42448"] == 0.891
     assert resolved[0]["cross_cand_score"]["Q42448"] == 0.576
@@ -178,18 +214,18 @@ def test_deezy_rel_wpubl_wmtops():
     assert resolved[0]["ed_score"] == 0.039
     assert resolved[0]["ner_score"] == 1.0
 
-
-def test_perfect_rel_wpubl_wmtops():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_perfect_rel_wpubl_wmtops(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # We'll store the NER model here
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        pipe=None,  # We'll store the NER pipeline here
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model_path=str(tmp_path),  # Path where the NER model will be stored
         training_args={
             "batch_size": 8,
-            "num_train_epochs": 10,
+            "num_train_epochs": 1,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
@@ -202,7 +238,7 @@ def test_perfect_rel_wpubl_wmtops():
     # Instantiate the ranker:
     myranker = ranking.Ranker(
         method="perfectmatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -211,13 +247,13 @@ def test_perfect_rel_wpubl_wmtops():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("resources/models/w2v/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
+            "w2v_ocr_path": str(tmp_path),
+            "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -232,20 +268,20 @@ def test_perfect_rel_wpubl_wmtops():
         },
     )
 
-    with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
         cursor = conn.cursor()
         mylinker = linking.Linker(
             method="reldisamb",
-            resources_path="resources/",
+            resources_path=os.path.join(current_dir,"sample_files/resources/"),
             linking_resources=dict(),
             rel_params={
-                "model_path": "resources/models/disambiguation/",
-                "data_path": "experiments/outputs/data/lwm/",
+                "model_path": os.path.join(current_dir,"sample_files/resources/models/disambiguation/"),
+                "data_path": os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/"),
                 "training_split": "originalsplit",
                 "db_embeddings": cursor,
                 "with_publication": True,
                 "without_microtoponyms": True,
-                "do_test": True,
+                "do_test": False,
                 "default_publname": "United Kingdom",
                 "default_publwqid": "Q145",
             },
@@ -255,7 +291,7 @@ def test_perfect_rel_wpubl_wmtops():
     geoparser = pipeline.Pipeline(myner=myner, myranker=myranker, mylinker=mylinker)
 
     resolved = geoparser.run_text(
-        "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Lancaster. Not in Nottingham though. Not in Ashton either, nor in Salop!",
+        "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Leeds. Not in London though.",
         place="Sheffield",
         place_wqid="Q42448",
     )
@@ -267,18 +303,18 @@ def test_perfect_rel_wpubl_wmtops():
     assert resolved[0]["ed_score"] == 0.0
     assert resolved[0]["ner_score"] == 1.0
 
-
-def test_modular_deezy_rel():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_modular_deezy_rel(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # We'll store the NER model here
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        pipe=None,  # We'll store the NER pipeline here
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model_path=str(tmp_path),  # Path where the NER model will be stored
         training_args={
             "batch_size": 8,
-            "num_train_epochs": 10,
+            "num_train_epochs": 1,
             "learning_rate": 0.00005,
             "weight_decay": 0.0,
         },
@@ -287,22 +323,26 @@ def test_modular_deezy_rel():
         load_from_hub=False,  # Bool: True if model is in HuggingFace hub
     )
 
+    # --------------------------------------
+    # Instantiate the ranker:
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="./resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
+        mentions_to_wikidata=dict(),
+        wikidata_to_mentions=dict(),
         strvar_parameters={
             # Parameters to create the string pair dataset:
             "ocr_threshold": 60,
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": str(Path("./resources/models/w2v/").resolve()),
-            "w2v_ocr_model": "w2v_*_news",
+            "w2v_ocr_path": str(tmp_path),
+            "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": str(Path("./resources/deezymatch/").resolve()),
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -317,15 +357,15 @@ def test_modular_deezy_rel():
         },
     )
 
-    with sqlite3.connect("./resources/rel_db/embeddings_database.db") as conn:
+    with sqlite3.connect(os.path.join(current_dir,"sample_files/resources/rel_db/embeddings_database.db")) as conn:
         cursor = conn.cursor()
         mylinker = linking.Linker(
             method="reldisamb",
-            resources_path="./resources/",
+            resources_path=os.path.join(current_dir,"sample_files/resources/"),
             linking_resources=dict(),
             rel_params={
-                "model_path": "./resources/models/disambiguation/",
-                "data_path": "./experiments/outputs/data/lwm/",
+                "model_path": os.path.join(current_dir,"sample_files/resources/models/disambiguation/"),
+                "data_path": os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/"),
                 "training_split": "apply",
                 "db_embeddings": cursor,
                 "with_publication": True,
diff --git a/tests/test_process_data.py b/tests/test_process_data.py
index 671c85ca..9d5fb683 100644
--- a/tests/test_process_data.py
+++ b/tests/test_process_data.py
@@ -1,15 +1,14 @@
 import os
 import sys
+from pathlib import Path
 
 import pandas as pd
 import pytest
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
-
-from geoparser import recogniser
-from utils import process_data
+from t_res.geoparser import recogniser
+from t_res.utils import process_data
 
+current_dir = Path(__file__).parent.resolve()
 
 def test_eval_with_exception():
     # test normal behaviour
@@ -18,10 +17,8 @@ def test_eval_with_exception():
     list_of_dict = process_data.eval_with_exception(str_list_of_dict)
 
     assert list_of_dict != str_list_of_dict
-
-    assert type(list_of_dict) == list
-
-    assert type(list_of_dict[0]) == dict
+    assert isinstance(list_of_dict,list)
+    assert isinstance(list_of_dict[0],dict)
 
     # test that it returns "" if the input is None
 
@@ -32,17 +29,14 @@ def test_eval_with_exception():
 
     # test that it raises an error if the syntax is wrong
     str_list_of_dict = "[{'key_1': 1, 'key_2': 2}"
-    check = False
+
     with pytest.raises(SyntaxError) as cm:
-        check = True
         process_data.eval_with_exception(str_list_of_dict)
 
-    assert check == True
-
 
 def test_prepare_sents():
     dataset_df = pd.read_csv(
-        "experiments/outputs/data/lwm/linking_df_split.tsv",
+        os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/linking_df_split.tsv"),
         sep="\t",
     )
 
@@ -53,7 +47,7 @@ def test_prepare_sents():
 
     dAnnotated, dSentences, dMetadata = process_data.prepare_sents(dataset_df)
 
-    assert dAnnotated["4428937_4"][(26, 41)] == ("LOC", "Bt. Jamess Park", "Q216914")
+    assert dAnnotated["3580760_2"][(0, 6)] == ('LOC', 'LONDON', 'Q84')
 
     test_data = process_data.eval_with_exception(dataset_df["annotations"][0])
     test_data[0]["wkdt_qid"] = "*"
@@ -62,7 +56,7 @@ def test_prepare_sents():
 
     dAnnotated, dSentences, dMetadata = process_data.prepare_sents(dataset_df)
 
-    assert dAnnotated["4428937_4"][(26, 41)] == ("LOC", "Bt. Jamess Park", "Q216914")
+    assert dAnnotated["3580760_2"][(0, 6)] == ('LOC', 'LONDON', 'Q84')
 
     assert len(dAnnotated) == len(dSentences) == len(dMetadata)
 
@@ -70,35 +64,37 @@ def test_prepare_sents():
     assert len([x for x, y in dMetadata.items() if len(y) == 0]) == 0
 
 
-def test_align_gold():
+def test_align_gold(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # We'll store the NER model here
-        pipe=None,  # We'll store the NER pipeline here
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model_path=str(tmp_path),  # Path where the NER model will be stored
         training_args={
             "learning_rate": 5e-5,
             "batch_size": 16,
-            "num_train_epochs": 4,
+            "num_train_epochs": 1,
             "weight_decay": 0.01,
         },
         overwrite_training=False,  # Set to True if you want to overwrite model if existing
         do_test=False,  # Set to True if you want to train on test mode
+        load_from_hub=False,  # Bool: True if model is in HuggingFace hub
     )
 
+    myner.train()
     myner.pipe = myner.create_pipeline()
 
     dataset_df = pd.read_csv(
-        "experiments/outputs/data/lwm/linking_df_split.tsv",
+        os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/linking_df_split.tsv"),
         sep="\t",
     )
 
     dAnnotated, dSentences, dMetadata = process_data.prepare_sents(dataset_df)
     empty_list = []
     for sent_id in dSentences.keys():
-        if "4935585_1" == sent_id:
+        if "3580760_2" == sent_id:
             sent = dSentences[sent_id]
             annotations = dAnnotated[sent_id]
             predictions = myner.ner_predict(sent)
@@ -125,27 +121,30 @@ def test_align_gold():
     assert len(empty_list) == 0
 
 
-def test_ner_and_process():
+def test_ner_and_process(tmp_path):
     myner = recogniser.Recogniser(
-        model="blb_lwm-ner-fine",  # We'll store the NER model here
-        pipe=None,  # We'll store the NER pipeline here
+        model="blb_lwm-ner-fine",
+        train_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_train.json"),
+        test_dataset=os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/ner_fine_dev.json"),
+        pipe=None,
         base_model="khosseini/bert_1760_1900",  # Base model to fine-tune
-        train_dataset="experiments/outputs/data/lwm/ner_fine_train.json",  # Training set (part of overall training set)
-        test_dataset="experiments/outputs/data/lwm/ner_fine_dev.json",  # Test set (part of overall training set)
-        model_path="resources/models/",  # Path where the NER model is or will be stored
+        model_path=str(tmp_path),  # Path where the NER model will be stored
         training_args={
             "learning_rate": 5e-5,
             "batch_size": 16,
-            "num_train_epochs": 4,
+            "num_train_epochs": 1,
             "weight_decay": 0.01,
         },
         overwrite_training=False,  # Set to True if you want to overwrite model if existing
         do_test=False,  # Set to True if you want to train on test mode
+        load_from_hub=False,  # Bool: True if model is in HuggingFace hub
     )
+
+    myner.train()
     myner.pipe = myner.create_pipeline()
 
     dataset_df = pd.read_csv(
-        "experiments/outputs/data/lwm/linking_df_split.tsv",
+        os.path.join(current_dir,"sample_files/experiments/outputs/data/lwm/linking_df_split.tsv"),
         sep="\t",
     )
 
diff --git a/tests/test_ranking.py b/tests/test_ranking.py
index 5f3dae5e..9becf11c 100644
--- a/tests/test_ranking.py
+++ b/tests/test_ranking.py
@@ -1,39 +1,23 @@
-import json
 import os
-import sys
 from pathlib import Path
 
 import pytest
-from DeezyMatch import candidate_ranker
 
-# Add "../" to path to import utils
-sys.path.insert(0, os.path.abspath(os.path.pardir))
+from t_res.geoparser import ranking
 
-from geoparser import ranking
+current_dir = Path(__file__).parent.resolve()
 
-
-def test_initialise_method():
-    """
-    Test initialisation works fine
-    """
-    myranker = ranking.Ranker(
-        method="perfectmatch",
-        resources_path="resources/wikidata/",
-        mentions_to_wikidata=dict(),
-        wikidata_to_mentions=dict(),
-    )
-    assert type(myranker.__str__()) == str
-
-
-def test_perfect_match():
+def test_ranking_perfect_match():
     """
     Test that perfect_match returns only perfect matching cases
     """
     myranker = ranking.Ranker(
         method="perfectmatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
+    
     myranker.mentions_to_wikidata = myranker.load_resources()
+    myranker.already_collected_cands = {}
     candidates, already_collected_cands = myranker.perfect_match(["London"])
     assert candidates["London"]["London"] == 1.0
 
@@ -44,107 +28,92 @@ def test_perfect_match():
     assert candidates["Paperopoli"] == {}
 
 
-def test_damlev():
+def test_ranking_damlev():
     """
     Test that damlev returns correctly
     """
     myranker = ranking.Ranker(
         method="partialmatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
+
     score = myranker.damlev_dist("Lvndon", {"mentions": "London"})
     assert score == 0.8333333283662796
 
+    score = myranker.damlev_dist("uityity", {"mentions": "asdasd"})
+    assert score == 0.0
+
     with pytest.raises(TypeError):
-        found = True
         myranker.damlev_dist("Lvndon", "London")
-    assert found == True
 
-    assert 0.0 == myranker.damlev_dist("uityity", {"mentions": "asdasd"})
 
-
-def test_check_if_contained():
+def test_ranking_check_if_contained():
     """
     Test that check_if_contained returns score only when there is an overlap
     """
 
     myranker = ranking.Ranker(
         method="partialmatch",
-        resources_path="resources/wikidata/",
-        mentions_to_wikidata=dict(),
-        wikidata_to_mentions=dict(),
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
+    
     score_a = myranker.check_if_contained("New York", {"mentions": "New York City"})
     score_b = myranker.check_if_contained("New York City", {"mentions": "New York"})
-
     assert score_a == score_b == 0.6153846153846154
 
     with pytest.raises(TypeError):
-        found = True
         myranker.check_if_contained("Lvndon", "London")
-    assert found == True
 
-    assert None == myranker.check_if_contained("London", {"mentions": "New York"})
+    score = myranker.check_if_contained("London", {"mentions": "New York"})
+    assert score is None
 
 
-def test_partial_match():
+def test_ranking_partial_match():
     """
     Test that partial match either returns results or {}
     """
 
     myranker = ranking.Ranker(
         method="partialmatch",
-        resources_path="resources/wikidata/",
-        mentions_to_wikidata=dict(),
-        wikidata_to_mentions=dict(),
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
     )
 
     myranker.mentions_to_wikidata = myranker.load_resources()
 
     # Test that perfect_match acts before partial match
     myranker.mentions_to_wikidata = {"London": "Q84"}
-    candidates, already_collected_cands = myranker.partial_match(
-        ["London"], damlev=False
-    )
+    myranker.already_collected_cands = {}
+    candidates, already_collected_cands = myranker.partial_match(["London"], damlev=False)
     assert candidates["London"]["London"] == 1.0
 
     # Test that damlev works
     myranker.already_collected_cands = {}
 
-    candidates, already_collected_cands = myranker.partial_match(
-        ["Lvndvn"], damlev=True
-    )
+    candidates, already_collected_cands = myranker.partial_match(["Lvndvn"], damlev=True)
     assert candidates["Lvndvn"]["London"] == 0.6666666567325592
 
     # Test that overlap works properly
     myranker.mentions_to_wikidata = {"New York City": "Q60"}
     myranker.already_collected_cands = {}
 
-    candidates, already_collected_cands = myranker.partial_match(
-        ["New York"], damlev=False
-    )
+    candidates, already_collected_cands = myranker.partial_match(["New York"], damlev=False)
     assert candidates["New York"]["New York City"] == 0.6153846153846154
 
-    myranker.mentions_to_wikidata = {"New York City": "Q60"}
     myranker.already_collected_cands = {}
 
-    candidates, already_collected_cands = myranker.partial_match(
-        ["Lvndvn"], damlev=False
-    )
+    candidates, already_collected_cands = myranker.partial_match(["Lvndvn"], damlev=False)
     assert candidates["Lvndvn"] == {}
 
     myranker.already_collected_cands = {}
 
-    candidates, already_collected_cands = myranker.partial_match(
-        ["asdasd"], damlev=True
-    )
+    candidates, already_collected_cands = myranker.partial_match(["asdasd"], damlev=True)
     assert candidates["asdasd"] == {"New York City": 0.0}
 
-
-def test_deezy_on_the_fly():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_ranking_deezy_on_the_fly(tmp_path):
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -153,13 +122,13 @@ def test_deezy_on_the_fly():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": "resources/models/",
+            "w2v_ocr_path": str(tmp_path),
             "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": "resources/deezymatch/",
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -171,7 +140,7 @@ def test_deezy_on_the_fly():
             "verbose": False,
             # DeezyMatch training:
             "overwrite_training": False,
-            "do_test": True,
+            "do_test": False,
         },
     )
 
@@ -182,20 +151,14 @@ def test_deezy_on_the_fly():
 
     # Test that deezy works
     myranker.already_collected_cands = {}
+    candidates, already_collected_cands = myranker.deezy_on_the_fly(["Ashton-cnderLyne"])
+    assert (0.0 < candidates["Ashton-cnderLyne"]["Ashton-under-Lyne"] < 1.0)
 
-    candidates, already_collected_cands = myranker.deezy_on_the_fly(
-        ["Ashton-cnderLyne"]
-    )
-    assert (
-        candidates["Ashton-cnderLyne"]["Ashton-under-Lyne"] > 0.0
-        and candidates["Ashton-cnderLyne"]["Ashton-under-Lyne"] < 1.0
-    )
-
-
-def test_find_candidates():
+@pytest.mark.deezy(reason="Needs deezy model")
+def test_ranking_find_candidates(tmp_path):
     myranker = ranking.Ranker(
         method="deezymatch",
-        resources_path="resources/wikidata/",
+        resources_path=os.path.join(current_dir,"sample_files/resources/"),
         mentions_to_wikidata=dict(),
         wikidata_to_mentions=dict(),
         strvar_parameters={
@@ -204,13 +167,13 @@ def test_find_candidates():
             "top_threshold": 85,
             "min_len": 5,
             "max_len": 15,
-            "w2v_ocr_path": "resources/models/",
+            "w2v_ocr_path": str(tmp_path),
             "w2v_ocr_model": "w2v_1800s_news",
             "overwrite_dataset": False,
         },
         deezy_parameters={
             # Paths and filenames of DeezyMatch models and data:
-            "dm_path": "resources/deezymatch/",
+            "dm_path": os.path.join(current_dir,"sample_files/resources/deezymatch/"),
             "dm_cands": "wkdtalts",
             "dm_model": "w2v_ocr",
             "dm_output": "deezymatch_on_the_fly",
@@ -228,22 +191,14 @@ def test_find_candidates():
 
     # Test that perfect_match acts before deezy
     myranker.mentions_to_wikidata = myranker.load_resources()
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "London"}]
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "London"}])
     assert candidates["London"]["London"]["Score"] == 1.0
     assert "Q84" in candidates["London"]["London"]["Candidates"]
 
     # Test that deezy works
     myranker.already_collected_cands = {}
-
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheftield"}]
-    )
-    assert (
-        candidates["Sheftield"]["Sheffield"]["Score"] > 0.0
-        and candidates["Sheftield"]["Sheffield"]["Score"] < 1.0
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheftield"}])
+    assert (0.0 < candidates["Sheftield"]["Sheffield"]["Score"] < 1.0)
     assert "Q42448" in candidates["Sheftield"]["Sheffield"]["Candidates"]
 
     # Test that Perfect Match works
@@ -251,17 +206,12 @@ def test_find_candidates():
 
     # Test that perfect_match acts before deezy
     myranker.mentions_to_wikidata = myranker.load_resources()
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheffield"}]
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheffield"}])
     assert candidates["Sheffield"]["Sheffield"]["Score"] == 1.0
     assert "Q42448" in candidates["Sheffield"]["Sheffield"]["Candidates"]
 
     myranker.already_collected_cands = {}
-
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheftield"}]
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheftield"}])
     assert candidates["Sheftield"] == {}
 
     # Test that check if contained works
@@ -270,17 +220,13 @@ def test_find_candidates():
     # Test that perfect_match acts before partialmatch
     myranker.mentions_to_wikidata = myranker.load_resources()
 
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheffield"}]
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheffield"}])
     assert candidates["Sheffield"]["Sheffield"]["Score"] == 1.0
     assert "Q42448" in candidates["Sheffield"]["Sheffield"]["Candidates"]
 
     myranker.already_collected_cands = {}
 
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheftield"}]
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheftield"}])
     assert "Sheffield" not in candidates["Sheftield"]
 
     # Test that levenshtein works
@@ -289,62 +235,11 @@ def test_find_candidates():
     # Test that perfect_match acts before partialmatch
     myranker.mentions_to_wikidata = myranker.load_resources()
 
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheffield"}]
-    )
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheffield"}])
     assert candidates["Sheffield"]["Sheffield"]["Score"] == 1.0
     assert "Q42448" in candidates["Sheffield"]["Sheffield"]["Candidates"]
 
     myranker.already_collected_cands = {}
-
-    candidates, already_collected_cands = myranker.find_candidates(
-        [{"mention": "Sheftield"}]
-    )
-    assert (
-        candidates["Sheftield"]["Sheffield"]["Score"] > 0.0
-        and candidates["Sheftield"]["Sheffield"]["Score"] < 1.0
-    )
-    assert "Q42448" in candidates["Sheftield"]["Sheffield"]["Candidates"]
-
-
-def test_deezy_candidate_ranker():
-    deezy_parameters = {
-        # Paths and filenames of DeezyMatch models and data:
-        "dm_path": str(Path("resources/deezymatch/").resolve()),
-        "dm_cands": "wkdtalts",
-        "dm_model": "w2v_ocr",
-        "dm_output": "deezymatch_on_the_fly",
-        # Ranking measures:
-        "ranking_metric": "faiss",
-        "selection_threshold": 50,
-        "num_candidates": 1,
-        "verbose": False,
-        # DeezyMatch training:
-        "overwrite_training": False,
-        "do_test": False,
-    }
-
-    dm_path = deezy_parameters["dm_path"]
-    dm_cands = deezy_parameters["dm_cands"]
-    dm_model = deezy_parameters["dm_model"]
-    dm_output = deezy_parameters["dm_output"]
-
-    query = ["-", "ST G", "• - , i", "- P", "• FERRIS"]
-
-    candidates = candidate_ranker(
-        candidate_scenario=os.path.join(dm_path, "combined", dm_cands + "_" + dm_model),
-        query=query,
-        ranking_metric=deezy_parameters["ranking_metric"],
-        selection_threshold=deezy_parameters["selection_threshold"],
-        num_candidates=deezy_parameters["num_candidates"],
-        search_size=deezy_parameters["num_candidates"],
-        verbose=deezy_parameters["verbose"],
-        output_path=os.path.join(dm_path, "ranking", dm_output),
-        pretrained_model_path=os.path.join(
-            f"{dm_path}", "models", f"{dm_model}", f"{dm_model}" + ".model"
-        ),
-        pretrained_vocab_path=os.path.join(
-            f"{dm_path}", "models", f"{dm_model}", f"{dm_model}" + ".vocab"
-        ),
-    )
-    assert len(candidates) == len(query)
+    candidates, already_collected_cands = myranker.find_candidates([{"mention": "Sheftield"}])
+    assert (0.0 < candidates["Sheftield"]["Sheffield"]["Score"] < 1.0)
+    assert "Q42448" in candidates["Sheftield"]["Sheffield"]["Candidates"]
\ No newline at end of file
diff --git a/tests/test_wiki_functions.py b/tests/test_wiki_functions.py
index 74948c40..efc96510 100644
--- a/tests/test_wiki_functions.py
+++ b/tests/test_wiki_functions.py
@@ -1,6 +1,8 @@
 import urllib
 
-from utils import process_wikipedia
+import pytest
+
+from t_res.utils import process_wikipedia
 
 
 def test_make_links_consistent():
@@ -15,7 +17,7 @@ def test_make_links_consistent():
     assert (process_wikipedia.make_wikilinks_consistent(string_a) == string_a) is False
     assert process_wikipedia.make_wikilinks_consistent(string_c) == "new%20york"
 
-
+@pytest.mark.skip(reason="Needs large db file")
 def test_wikidata2wikipedia():
     db = "resources/wikipedia/index_enwiki-latest.db"
     assert process_wikipedia.title_to_id("BOLOGNA", lower=True, path_to_db=db) == None