diff --git a/geoparser/ranking.py b/geoparser/ranking.py index 53820b91..63bdd3b6 100644 --- a/geoparser/ranking.py +++ b/geoparser/ranking.py @@ -275,13 +275,11 @@ def train(self) -> None: if self.deezy_parameters["do_test"] == True: self.deezy_parameters["dm_model"] += "_test" self.deezy_parameters["dm_cands"] += "_test" - deezy_processing.create_training_set( - self.deezy_parameters, self.strvar_parameters, self.wikidata_to_mentions - ) - deezy_processing.train_deezy_model(self.deezy_parameters) + deezy_processing.train_deezy_model(self.deezy_parameters, self.strvar_parameters, self.wikidata_to_mentions) deezy_processing.generate_candidates( self.deezy_parameters, self.mentions_to_wikidata ) + # This dictionary is not used anymore: self.wikidata_to_mentions = dict() diff --git a/utils/deezy_processing.py b/utils/deezy_processing.py index fbf481de..0da3d10a 100644 --- a/utils/deezy_processing.py +++ b/utils/deezy_processing.py @@ -253,7 +253,7 @@ def create_training_set( fw.write(pm) -def train_deezy_model(deezy_parameters: dict) -> None: +def train_deezy_model(deezy_parameters: dict, strvar_parameters: dict, wikidata_to_mentions: dict) -> None: """ Train a DeezyMatch model using the provided ``myranker`` parameters and input files. @@ -300,6 +300,9 @@ def train_deezy_model(deezy_parameters: dict) -> None: ) ).exists() ): + # Create the training set (if it already exists, it will be skipped): + create_training_set(deezy_parameters, strvar_parameters, wikidata_to_mentions) + # Training a DeezyMatch model dm_train( input_file_path=input_file_path,