diff --git a/legal_tools/management/commands/publish.py b/legal_tools/management/commands/publish.py index 3c06388c..4163f9d6 100644 --- a/legal_tools/management/commands/publish.py +++ b/legal_tools/management/commands/publish.py @@ -3,6 +3,7 @@ import os import socket from argparse import SUPPRESS, ArgumentParser +from copy import copy from multiprocessing import Pool from pathlib import Path from pprint import pprint @@ -28,7 +29,9 @@ save_bytes_to_file, save_redirect, save_url_as_static_file, + update_title, ) +from legal_tools.views import render_redirect ALL_TRANSLATION_BRANCHES = "###all###" LOG = logging.getLogger(__name__) @@ -114,7 +117,14 @@ def save_legal_code(output_dir, legal_code, opt_apache_only): for symlink in symlinks: wrap_relative_symlink(output_dir, relpath, symlink) for redirect_data in redirects_data: - save_redirect(output_dir, redirect_data) + redirect_content = render_redirect( + title=redirect_data["title"], + destination=redirect_data["destination"], + language_code=redirect_data["language_code"], + ) + save_redirect( + output_dir, redirect_data["redirect_file"], redirect_content + ) return legal_code.get_redirect_pairs() @@ -231,6 +241,18 @@ def add_arguments(self, parser: ArgumentParser): dest="apache_only", ) + def check_titles(self): + LOG.info("Checking legal code titles") + log_level = copy(LOG.level) + LOG.setLevel(LOG_LEVELS[0]) + results = update_title(options={"dryrun": True}) + LOG.setLevel(log_level) + if results["records_requiring_update"] > 0: + raise CommandError( + "Legal code titles require an update. See the `update_title`" + " command." + ) + def purge_output_dir(self): if self.options["apache_only"] or self.options["rdf_only"]: return @@ -639,6 +661,7 @@ def distill_metadata_yaml(self): ) def distill_and_copy(self): + self.check_titles() self.purge_output_dir() self.call_collectstatic() self.write_robots_txt() diff --git a/legal_tools/management/commands/update_title.py b/legal_tools/management/commands/update_title.py new file mode 100644 index 00000000..f8492e39 --- /dev/null +++ b/legal_tools/management/commands/update_title.py @@ -0,0 +1,44 @@ +# Standard library +import logging +from argparse import ArgumentParser + +# Third-party +from django.core.management import BaseCommand + +# First-party/Local +from legal_tools.utils import init_utils_logger, update_title + +LOG = logging.getLogger(__name__) +LOG_LEVELS = { + 0: logging.ERROR, + 1: logging.WARNING, + 2: logging.INFO, + 3: logging.DEBUG, +} + + +class Command(BaseCommand): + """ + Update the title property of all legal tools by normalizing legalcy titles + and normalizing translated titles for current legal tools (Licenses 4.0 and + CC0 1.0). + """ + + def add_arguments(self, parser: ArgumentParser): + # Python defaults to lowercase starting character for the first + # character of help text, but Djano appears to use uppercase and so + # shall we + parser.description = self.__doc__ + parser._optionals.title = "Django optional arguments" + parser.add_argument( + "-n", + "--dryrun", + action="store_true", + help="dry run: do not make any changes", + ) + + def handle(self, **options): + self.options = options + LOG.setLevel(LOG_LEVELS[int(options["verbosity"])]) + init_utils_logger(LOG) + update_title(options) diff --git a/legal_tools/tests/test_utils.py b/legal_tools/tests/test_utils.py index 2638a079..2d84dc83 100644 --- a/legal_tools/tests/test_utils.py +++ b/legal_tools/tests/test_utils.py @@ -149,27 +149,12 @@ def test_relative_symlink(self): def test_save_redirect(self): output_dir = "/OUTPUT_DIR" - redirect_data = { - "destination": "DESTINATION", - "language_code": "LANGUAGE_CODE", - "redirect_file": ("FILE_PATH"), - "title": "TITLE", - } - - with mock.patch( - "legal_tools.utils.render_redirect", - return_value="STRING", - ) as mock_render: - with mock.patch( - "legal_tools.utils.save_bytes_to_file" - ) as mock_save: - utils.save_redirect(output_dir, redirect_data) - - mock_render.assert_called_with( - title="TITLE", - destination="DESTINATION", - language_code="LANGUAGE_CODE", - ) + redirect_file = "FILE_PATH" + redirect_content = "STRING" + + with mock.patch("legal_tools.utils.save_bytes_to_file") as mock_save: + utils.save_redirect(output_dir, redirect_file, redirect_content) + mock_save.assert_called_with("STRING", "/OUTPUT_DIR/FILE_PATH") @@ -567,3 +552,79 @@ def validate_udpate_source(): # Subsequent run to test with wrong data and verify behavior of # repeated runs validate_udpate_source() + + +class TitleTest(TestCase): + def setup(self): + for version in ("1.0", "4.0"): + ToolFactory(category="licenses", unit="by", version=version) + for tool in Tool.objects.all(): + LegalCodeFactory(tool=tool, language_code="fr") + title_en = utils.get_tool_title_en( + tool.unit, + tool.version, + tool.category, + tool.jurisdiction_code, + ) + LegalCodeFactory(tool=tool, title=title_en, language_code="en") + LegalCodeFactory(tool=tool, title=title_en, language_code="nl") + if tool.version == "1.0": + LegalCodeFactory( + tool=tool, + title="Namensnennung 1.0 Generic", + language_code="de", + ) + elif tool.version == "4.0": + LegalCodeFactory( + tool=tool, + title="Namensnennung 4.0 International", + language_code="de", + ) + + def test_get_tool_title(self): + self.setup() + unit = "by" + category = "licenses" + jurisdiction = "" + titles = {} + + with self.assertNumQueries(6): + for version in ("1.0", "4.0"): + for language_code in ("de", "en", "fr", "nl"): + title = utils.get_tool_title( + unit=unit, + version=version, + category=category, + jurisdiction=jurisdiction, + language_code=language_code, + ) + titles[f"{version}{language_code}"] = title + + self.assertEqual("Namensnennung 1.0 Generic", titles["1.0de"]) + self.assertEqual("Attribution 1.0 Generic", titles["1.0en"]) + self.assertEqual("Attribution 1.0 Générique", titles["1.0fr"]) + self.assertEqual("Naamsvermelding 1.0 Unported", titles["1.0nl"]) + self.assertEqual("Namensnennung 4.0 International", titles["4.0de"]) + self.assertEqual("Attribution 4.0 International", titles["4.0en"]) + self.assertEqual("Attribution 4.0 International", titles["4.0fr"]) + self.assertEqual("Naamsvermelding 4.0 Internationaal", titles["4.0nl"]) + + def test_update_titles_dryrun(self): + self.setup() + + with self.assertNumQueries(9): + results = utils.update_title({"dryrun": True}) + + self.assertEqual( + {"records_updated": 0, "records_requiring_update": 4}, results + ) + + def test_update_titles_with_updates(self): + self.setup() + + with self.assertNumQueries(13): + results = utils.update_title({"dryrun": False}) + + self.assertEqual( + {"records_updated": 4, "records_requiring_update": 0}, results + ) diff --git a/legal_tools/utils.py b/legal_tools/utils.py index 749205b9..2053a370 100644 --- a/legal_tools/utils.py +++ b/legal_tools/utils.py @@ -5,16 +5,22 @@ # Third-party from bs4 import NavigableString +from colorlog.escape_codes import escape_codes from django.conf import settings +from django.core.cache import cache from django.urls import get_resolver +from django.utils import translation # First-party/Local import legal_tools.models +from i18n import UNIT_NAMES from i18n.utils import ( + active_translation, get_default_language_for_jurisdiction_naive, + get_jurisdiction_name, + get_translation_object, map_legacy_to_django_language_code, ) -from legal_tools.views import render_redirect LOG = logging.getLogger(__name__) @@ -83,18 +89,12 @@ def relative_symlink(src1, src2, dst): os.close(dir_fd) -def save_redirect(output_dir, redirect_data): - relpath = redirect_data["redirect_file"] - content = render_redirect( - title=redirect_data["title"], - destination=redirect_data["destination"], - language_code=redirect_data["language_code"], - ) - path, filename = os.path.split(relpath) +def save_redirect(output_dir, redirect_file, redirect_content): + path, filename = os.path.split(redirect_file) padding = " " * (len(os.path.dirname(path)) + 8) LOG.debug(f"{padding}*{filename}") - output_filename = os.path.join(output_dir, relpath) - save_bytes_to_file(content, output_filename) + output_filename = os.path.join(output_dir, redirect_file) + save_bytes_to_file(redirect_content, output_filename) def parse_legal_code_filename(filename): @@ -288,6 +288,80 @@ def clean_string(s): return s +def get_tool_title(unit, version, category, jurisdiction, language_code): + """ + Determine tool title: + 1. If English, use English + 2. Attempt to pull translated title from DB + 3. Translate title using Deeds & UX translation domain + """ + prefix = f"{unit}-{version}-{jurisdiction}-{language_code}-" + tool_title = cache.get(f"{prefix}title", "") + if tool_title: + return tool_title + + # English is easy given it is the default + tool_title_en = get_tool_title_en(unit, version, category, jurisdiction) + if language_code == "en": + tool_title = tool_title_en # already applied clean_string() + cache.add(f"{prefix}title", tool_title) + return tool_title + + # Use the legal code title, if it exists + try: + legal_code = legal_tools.models.LegalCode.objects.get( + tool__category=category, + tool__version=version, + tool__unit=unit, + tool__jurisdiction_code=jurisdiction, + language_code=language_code, + ) + except legal_tools.models.LegalCode.DoesNotExist: + legal_code = False + if legal_code: + tool_title_db = clean_string(legal_code.title) + if tool_title_db and tool_title_db != tool_title_en: + tool_title = tool_title_db + cache.add(f"{prefix}title", tool_title) + return tool_title + + # Translate title using Deeds & UX translation domain + with translation.override(language_code): + tool_name = UNIT_NAMES.get(unit, "UNIMPLEMENTED") + jurisdiction_name = get_jurisdiction_name( + category, unit, version, jurisdiction + ) + tool_title = clean_string(f"{tool_name} {version} {jurisdiction_name}") + + cache.add(f"{prefix}title", tool_title) + return tool_title + + +def get_tool_title_en(unit, version, category, jurisdiction): + prefix = f"{unit}-{version}-{jurisdiction}-en-" + tool_title_en = cache.get(f"{prefix}title", "") + if tool_title_en: + return tool_title_en + + # Retrieve title parts untranslated (English) + with translation.override(None): + tool_name = str(UNIT_NAMES.get(unit, "UNIMPLEMENTED")) + jurisdiction_name = str( + get_jurisdiction_name(category, unit, version, jurisdiction) + ) + # Licenses before 4.0 use "NoDerivs" instead of "NoDerivatives" + if version not in ("1.0", "2.0", "2.1", "2.5", "3.0"): + tool_name = tool_name.replace("NoDerivs", "NoDerivatives") + tool_title_en = f"{tool_name} {version} {jurisdiction_name}" + tool_title_en = tool_title_en.replace( + " Intergovernmental Organization", " IGO" + ) + tool_title_en = clean_string(tool_title_en) + + cache.add(f"{prefix}title", tool_title_en) + return tool_title_en + + def update_is_replaced_by(): """ Update the is_replaced_by property of all licenses by doing simple unit @@ -383,3 +457,108 @@ def update_source(): LOG.info(f"Remove {tool.resource_name} source: '{tool.source}'") tool.source = None tool.save() + + +def update_title(options): + """ + Update the title property of all legal tools by normalizing legacy titles + and normalizing translated titles for current legal tools (Licenses 4.0 and + CC0 1.0). + """ + bold = escape_codes["bold"] + green = escape_codes["green"] + red = escape_codes["red"] + reset = escape_codes["reset"] + pad = " " * 14 + + results = {"records_updated": 0, "records_requiring_update": 0} + if options["dryrun"]: + message = "requires update (dryrun)" + else: + message = "changed" + + LOG.info("Updating legal code object titles in database") + legal_code_objects = legal_tools.models.LegalCode.objects.all() + for legal_code in legal_code_objects: + tool = legal_code.tool + category = tool.category + version = tool.version + unit = tool.unit + jurisdiction = tool.jurisdiction_code + language_code = legal_code.language_code + language_name = translation.get_language_info(language_code)["name"] + full_identifier = f"{bold}{tool.identifier()} {language_name}{reset}" + old_title = legal_code.title + new_title = None + + # English is easy given it is the default + tool_title_en = get_tool_title_en( + unit, version, category, jurisdiction + ) + if language_code == "en": + new_title = tool_title_en # already applied clean_string() + else: + if ( + category == "licenses" + and version in ("1.0", "2.0", "2.1", "2.5", "3.0") + ) and unit != "zero": + # Query database for title extracted from legacy HTML and clean + # it + new_title_db = clean_string(old_title) + if new_title_db and new_title_db != tool_title_en: + new_title = new_title_db + else: + # Translate title using legal code translation domain for legal + # code that is in Transifex (ex. CC0, Licenses 4.0) + slug = f"{unit}_{version}".replace(".", "") + language_default = get_default_language_for_jurisdiction_naive( + jurisdiction + ) + current_translation = get_translation_object( + slug, language_code, language_default + ) + tool_title_lc = "" + with active_translation(current_translation): + tool_title_lc = clean_string( + translation.gettext(tool_title_en) + ) + # Only use legal code translation domain version if translation + # was successful (does not match English). There are deed + # translations in languages for which we do not yet have legal + # code translations. + if tool_title_lc != tool_title_en: + new_title = tool_title_lc + if not new_title: + # Translate title using Deeds & UX translation domain + with translation.override(language_code): + tool_name = UNIT_NAMES.get(unit, "UNIMPLEMENTED") + jurisdiction_name = get_jurisdiction_name( + category, unit, version, jurisdiction + ) + new_title = clean_string( + f"{tool_name} {version} {jurisdiction_name}" + ) + + if old_title == new_title: + LOG.debug(f'{full_identifier} title unchanged: "{old_title}"') + else: + if options["dryrun"]: + results["records_requiring_update"] += 1 + else: + legal_code.title = new_title + legal_code.save() + results["records_updated"] += 1 + LOG.info( + f"{full_identifier} title {message}:" + f'\n{pad}{red}- "{reset}{old_title}{red}"{reset}' + f'\n{pad}{green}+ "{reset}{new_title}{green}"{reset}' + ) + + if options["dryrun"]: + count = results["records_requiring_update"] + LOG.info(f"legal code object titles requiring an update: {count}") + else: + count = results["records_updated"] + LOG.info(f"legal code object titles updated: {count}") + + return results diff --git a/legal_tools/views.py b/legal_tools/views.py index 2ce57bc5..0c9e8a03 100644 --- a/legal_tools/views.py +++ b/legal_tools/views.py @@ -17,13 +17,11 @@ from django.utils import translation # First-party/Local -from i18n import UNIT_NAMES from i18n.utils import ( active_translation, get_default_language_for_jurisdiction_deed, get_default_language_for_jurisdiction_naive, get_jurisdiction_name, - get_translation_object, load_deeds_ux_translations, map_django_to_transifex_language_code, ) @@ -38,6 +36,7 @@ generate_legal_code_rdf, order_rdf_xml, ) +from legal_tools.utils import get_tool_title NUM_COMMITS = 3 PLAIN_TEXT_TOOL_IDENTIFIERS = [ @@ -85,76 +84,6 @@ def get_category_and_category_title(category=None, tool=None): return category, category_title -def get_tool_title_en(unit, version, category, jurisdiction): - prefix = f"{unit}-{version}-{jurisdiction}-en-" - tool_title_en = cache.get(f"{prefix}title", "") - if tool_title_en: - return tool_title_en - - # Retrieve title parts untranslated (English) - with translation.override(None): - tool_name = str(UNIT_NAMES.get(unit, "UNIMPLEMENTED")) - jurisdiction_name = str( - get_jurisdiction_name(category, unit, version, jurisdiction) - ) - # Licenses before 4.0 use "NoDerivs" instead of "NoDerivatives" - if version not in ("1.0", "2.0", "2.1", "2.5", "3.0"): - tool_name = tool_name.replace("NoDerivs", "NoDerivatives") - tool_title_en = f"{tool_name} {version} {jurisdiction_name}".strip() - - cache.add(f"{prefix}title", tool_title_en) - return tool_title_en - - -def get_tool_title(unit, version, category, jurisdiction, language_code): - prefix = f"{unit}-{version}-{jurisdiction}-{language_code}-" - tool_title = cache.get(f"{prefix}title", "") - if tool_title: - return tool_title - - # English is easy given it is the default - tool_title_en = get_tool_title_en(unit, version, category, jurisdiction) - if language_code == "en": - tool_title = tool_title_en - cache.add(f"{prefix}title", tool_title) - return tool_title - - # Translate title using legal code translation domain for legal code that - # is in Transifex (ex. CC0, Licenses 4.0) - if ( - category == "licenses" - and version not in ("1.0", "2.0", "2.1", "2.5", "3.0") - ) or unit == "zero": - slug = f"{unit}_{version}".replace(".", "") - language_default = get_default_language_for_jurisdiction_naive( - jurisdiction - ) - current_translation = get_translation_object( - slug, language_code, language_default - ) - tool_title_lc = "" - with active_translation(current_translation): - tool_title_lc = translation.gettext(tool_title_en) - # Only use legal code translation domain version if translation - # was successful (does not match English). There are deed translations - # in languages for which we do not yet have legal code translations. - if tool_title_lc != tool_title_en: - tool_title = tool_title_lc - cache.add(f"{prefix}title", tool_title) - return tool_title - - # Translate title using Deeds & UX translation domain - with translation.override(language_code): - tool_name = UNIT_NAMES.get(unit, "UNIMPLEMENTED") - jurisdiction_name = get_jurisdiction_name( - category, unit, version, jurisdiction - ) - tool_title = f"{tool_name} {version} {jurisdiction_name}" - - cache.add(f"{prefix}title", tool_title) - return tool_title - - def get_languages_and_links_for_deeds_ux(request_path, selected_language_code): languages_and_links = []