From 0083920e84483ab18b4b2f9ecf736e57d75e6944 Mon Sep 17 00:00:00 2001 From: p-goulart Date: Fri, 2 Feb 2024 14:15:16 +0100 Subject: [PATCH] Sundry fixes - custom install version env var must be formed from language; - frequency info added to POS dict compilation. --- lib/languagetool_utils.py | 1 + lib/utils.py | 6 +++--- lib/variant.py | 6 +++++- scripts/build_tagger_dicts.py | 4 +++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/languagetool_utils.py b/lib/languagetool_utils.py index 1462aac..93aa088 100644 --- a/lib/languagetool_utils.py +++ b/lib/languagetool_utils.py @@ -96,6 +96,7 @@ def build_pos_binary(self) -> None: f"org.languagetool.tools.POSDictionaryBuilder " f"-i {gd.DIRS.RESULT_POS_DICT_FILEPATH} " f"-info {self.variant.pos_info_java_input_path()} " + f"-freq {self.variant.freq()} " f"-o {self.variant.pos_dict_java_output_path()}" ) ShellCommand(cmd_build).run() diff --git a/lib/utils.py b/lib/utils.py index 8223427..6bfca80 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -24,13 +24,13 @@ def compile_lt(): ShellCommand("mvn clean install -DskipTests", cwd=gd.DIRS.LT_DIR).run() -def install_dictionaries(custom_version: Optional[str]): +def install_dictionaries(custom_version: Optional[tuple[str, str]]): """Install our dictionaries to the local ~/.m2.""" LOGGER.info("Installing dictionaries...") env: dict = {} if custom_version is not None: - LOGGER.info(f"Installing custom version \"{custom_version}\"") - env['PT_DICT_VERSION'] = custom_version + LOGGER.info(f"Installing custom version \"{custom_version[1]}\"") + env[custom_version[0]] = custom_version[1] else: LOGGER.info(f"Installing environment-defined version \"{env['PT_DICT_VERSION']}\"") ShellCommand("mvn clean install", env=env, cwd=gd.DIRS.JAVA_RESULTS_DIR).run() diff --git a/lib/variant.py b/lib/variant.py index cdb6e87..17d6f93 100644 --- a/lib/variant.py +++ b/lib/variant.py @@ -57,7 +57,11 @@ def compounds(self) -> str: return path.join(gd.DIRS.COMPOUNDS_DIR, f"{self.underscored}.dic") def freq(self) -> str: - return path.join(gd.DIRS.SPELLING_DICT_DIR, f"{self.lang}_{self.country}_wordlist.xml") + if self.country: + filename = f"{self.lang}_{self.country}_wordlist.xml" + else: + filename = f"{self.lang}_wordlist.xml" + return path.join(gd.DIRS.SPELLING_DICT_DIR, filename) def java_output_dir(self) -> str: return path.join(gd.DIRS.JAVA_RESULTS_DIR, "src/main/resources/org/languagetool/resource", self.lang) diff --git a/scripts/build_tagger_dicts.py b/scripts/build_tagger_dicts.py index 631407c..5ad2fc9 100644 --- a/scripts/build_tagger_dicts.py +++ b/scripts/build_tagger_dicts.py @@ -69,7 +69,9 @@ def main(): lt.build_pos_binary() lt.build_synth_binary() if FORCE_INSTALL: - install_dictionaries(custom_version=CUSTOM_INSTALL_VERSION) + custom_install_env_var_name = LANGUAGE.lang.upper() + "_DICT_VERSION" + custom_version: tuple[str, str] = (custom_install_env_var_name, CUSTOM_INSTALL_VERSION) + install_dictionaries(custom_version) if LOGGER.level == 10: # DEBUG lt.dump_dictionary() end_time = datetime.now()