diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000000..e986ad46646 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,130 @@ +# Run pull request tests and validation for openedx-translations + +name: CI + +on: + - pull_request + +jobs: + # Run unit and integration tests for Python + tests: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + statuses: write + steps: + - name: clone openedx/openedx-translations + uses: actions/checkout@v3 + + - name: Install gettext + run: sudo apt install -y gettext + + - name: setup python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Run python tests + run: | + make test_requirements + make test + + - name: Allow editing translation files for bot pull requests + env: + # secrets can't be used in job conditionals, so we set them to env here + TRANSIFEX_APP_ACTOR_NAME: "${{ secrets.TRANSIFEX_APP_ACTOR_NAME }}" + TRANSIFEX_APP_ACTOR_ID: "${{ secrets.TRANSIFEX_APP_ACTOR_ID }}" + if: "${{ github.actor == env.TRANSIFEX_APP_ACTOR_NAME && github.actor_id == env.TRANSIFEX_APP_ACTOR_ID }}" + run: | + echo "VALIDATION_OPTIONS=--mark-fuzzy" >> "$GITHUB_ENV" + + - name: Validate translation files + id: validate_translation_files + run: | + has_errors=0 + python scripts/validate_translation_files.py $VALIDATION_OPTIONS 2>error_log.txt || has_errors=1 + + cat error_log.txt # Print the errors to the console for debugging + + # Save the validation errors to an output variable + { + echo 'ERROR_LOG<> "$GITHUB_OUTPUT" + + exit $has_errors + + - name: Commit fixes to git + id: commit_fixes + if: ${{ github.event.repository.full_name == github.event.pull_request.head.repo.full_name }} + run: | + # Commit if there are changes to translation files + if ! git diff --no-ext-diff --quiet --exit-code; then + # Set the git user to the bot user to enable commit + git config --global user.email "translations-bot@openedx.org" + git config --global user.name "edx-transifex-bot" + + # Switch from the merge commit to the pull request branch to enable push + git checkout "${{ github.head_ref }}" + + git add translations/ + git commit --message "fix: mark invalid entries as fuzzy" translations/ + git push + + echo "FUZZY_FIX_COMMIT_SHA=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + else + echo "No changes to commit" + fi + + - name: Allow merging the fuzzy fix commit + uses: actions/github-script@v6 + if: ${{ steps.commit_fixes.outputs.FUZZY_FIX_COMMIT_SHA }} + with: + script: | + await github.rest.repos.createCommitStatus({ + context: 'tests', + description: 'ci.yml: Forced success status.', + owner: context.repo.owner, + repo: context.repo.repo, + sha: '${{ steps.commit_fixes.outputs.FUZZY_FIX_COMMIT_SHA }}', + state: 'success', + }) + + - name: Post translation validation results as a pull request comment + # Due to GitHub Actions security reasons posting a comment isn't possible on fork pull requests. + # This shouldn't be an issue, because bots writes directly to this repository. + if: ${{ always() && github.event.repository.full_name == github.event.pull_request.head.repo.full_name }} + uses: mshick/add-pr-comment@7c0890544fb33b0bdd2e59467fbacb62e028a096 + with: + message: | + :white_check_mark: All translation files are valid. + + ``` + ${{ steps.validate_translation_files.outputs.ERROR_LOG || 'No errors were reported.' }} + ``` + + This comment has been posted by the `validate-translation-files.yml` GitHub Actions workflow. + + message-failure: | + :warning: There are errors in the translation files: + + ``` + ${{ steps.validate_translation_files.outputs.ERROR_LOG || 'No errors were reported.' }} + ``` + + This comment has been posted by the `validate-translation-files.yml` GitHub Actions workflow. + + - name: Auto-merge pull request + env: + # secrets can't be used in job conditionals, so we set them to env here + TRANSIFEX_APP_ACTOR_NAME: "${{ secrets.TRANSIFEX_APP_ACTOR_NAME }}" + TRANSIFEX_APP_ACTOR_ID: "${{ secrets.TRANSIFEX_APP_ACTOR_ID }}" + # This token requires Write access to the openedx-translations repo + GITHUB_TOKEN: ${{ secrets.EDX_TRANSIFEX_BOT_GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.number }} + if: "${{ github.actor == env.TRANSIFEX_APP_ACTOR_NAME && github.actor_id == env.TRANSIFEX_APP_ACTOR_ID }}" + run: | + # Add the pull request to the merge queue with --rebase commit strategy + gh pr merge ${{ github.head_ref }} --rebase --auto diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml deleted file mode 100644 index 16e2d443f4c..00000000000 --- a/.github/workflows/python-tests.yml +++ /dev/null @@ -1,31 +0,0 @@ -# run make test for python - -name: Python Tests - -on: - - pull_request - -jobs: - python-translations: - runs-on: ubuntu-latest - - steps: - # Clones the openedx-translations repo - - name: clone openedx/openedx-translations - uses: actions/checkout@v3 - - - name: Install gettext - run: | - sudo apt install -y gettext - - # Sets up Python - - name: setup python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - # Run the script - - name: run python tests - run: | - make test_requirements - make test diff --git a/.github/workflows/validate-translation-files.yml b/.github/workflows/validate-translation-files.yml deleted file mode 100644 index 79eaa78ab42..00000000000 --- a/.github/workflows/validate-translation-files.yml +++ /dev/null @@ -1,56 +0,0 @@ -# Validate the po files to ensure translation files are compilable. - -name: Validate translation PO files - -on: - pull_request: - -jobs: - validate-po-files: - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: write - steps: - # Clones the openedx-translations repo - - name: clone openedx/openedx-translations - uses: actions/checkout@v3 - - - name: Install gettext - run: | - sudo apt install -y gettext - - - name: Validate translation files - id: validate_translation_files - run: | - has_validation_errors=0 - python scripts/validate_translation_files.py 2>validation_errors.txt || has_validation_errors=1 - cat validation_errors.txt - - { - echo 'VALIDATION_ERRORS<> "$GITHUB_OUTPUT" - - exit $has_validation_errors - - - name: Post translation validation results as a comment - # Due to GitHub Actions security reasons posting a comment isn't possible on fork pull requests. - # This shouldn't be an issue, because bots writes directly to this repository. - if: ${{ always() && !github.event.pull_request.head.repo.fork }} - uses: mshick/add-pr-comment@7c0890544fb33b0bdd2e59467fbacb62e028a096 - with: - message: | - :white_check_mark: All translation files are valid. - - This comment has been posted by the `validate-translation-files.yml` GitHub Actions workflow. - - message-failure: | - :warning: There are errors in the translation files: - - ``` - ${{ steps.validate_translation_files.outputs.VALIDATION_ERRORS }} - ``` - - This comment has been posted by the `validate-translation-files.yml` GitHub Actions workflow. diff --git a/Makefile b/Makefile index 9cb4142edd2..4373b93c99d 100644 --- a/Makefile +++ b/Makefile @@ -37,9 +37,14 @@ test_requirements: ## Installs test.txt requirements test: ## Run scripts tests pytest -v -s --cov=. --cov-report=term-missing --cov-report=html scripts/tests -validate_translation_files: ## Run basic validation to ensure files are compilable +validate_translation_files: ## Run basic validation to ensure translation files are valid python scripts/validate_translation_files.py + +validate_translation_files_mark_fuzzy: ## Run basic validation and mark invalid entries as fuzzy + python scripts/validate_translation_files.py --mark-fuzzy + + sync_translations: ## Syncs from the old projects to the new openedx-translations project python scripts/sync_translations.py $(SYNC_ARGS) diff --git a/docs/decisions/0002-mark-fuzzy-entries.rst b/docs/decisions/0002-mark-fuzzy-entries.rst new file mode 100644 index 00000000000..b9853b0380a --- /dev/null +++ b/docs/decisions/0002-mark-fuzzy-entries.rst @@ -0,0 +1,96 @@ +Mark invalid entries in po files as fuzzy +######################################### + +Context +******* +As of the `OEP-58`_, the Transifex GitHub App is used to sync Translations. +These translations are validated by `LexiQA`_, built-in Transifex quality +checks, and human reviewers. During this synchronization process, the +`Transifex GitHub App`_ creates pull requests to this repository. The +translations in the pull requests are then validated by ``msgfmt`` in CI. + +Problem +******* +There are times when the translations being synchronized fail ``msgfmt`` +validation. This prevents the pull requests from being merged. + + +Design Decision +*************** + +A GitHub Actions workflow will be implemented to mark invalid entries in +synchronized ``.po`` files as ``fuzzy``. This will update pull requests +created by the `Transifex GitHub App`_. + +To ensure a safe and reliable workflow, the following workflows will be +combined into one single workflow with multiple jobs: + +#. Run ``python-tests.yml`` to validate the python code +#. Then, run ``validate-translation-files.yml`` which performs the following: + + #. Validate the po-files using ``msgfmt`` + #. Notify the translators about the invalid entries via the preferred + communication channel (Slack, Transifex, GitHub) + #. Edit the po files to mark invalid entries as ``fuzzy``, so it's + excluded from ``.mo`` files + #. Revalidate the files + +#. Commit the updated entries and push to the PR branch +#. Automatically merge the PR + + +Informing the Translators +************************* +Translators can be notified about invalid translations via Slack, Transifex +or GitHub issues depending on the community's preference. + +Pros and Cons +************* + +**Pros** + +* New valid strings would make it into the ``.mo`` files +* There's no need for manual intervention, therefore it's fast and won't + create a backlog of pull requests. +* Rejected strings are easily identifiable by looking in the code, so it's + easy to fix them. +* Translators can be notified about invalid translations via Slack, Transifex, + GitHub depending on the community's preference. + + +**Cons** + +* The workflow script runs and edits the pull request, which can be + confusing for the reviewers. +* Previously valid entries are going to be overwritten with new ``fuzzy`` + strings which will make those entries to be shown in source language + (English). + +Rejected Alternative: Keep pull requests open +********************************************* + +- **Create a Transifex issue only:** It would be possible to identify the + faulty entries and create a Transifex issue via API to the faulty entries. + However, this would require attention from the translators + team, which can take a rather long time therefore creating a backlog of + invalid entries. Additionally, Transifex issues are not understood or used + by most of the Open edX community. + +- **Post errors on Slack only:** Post the errors on Slack and ask the + translators + to fix them. Same as the previous option, not all translators are on Slack. + Additionally, this option would have a slow feedback loop causing the pull + requests backlog to build-up. + +- **Mark faulty entries as unreviewed only:** Use the Transifex API to mark + the the invalid entries as unreviewed, then pull only + reviewed entries into this repository. + This option would require an extensive use the of the Transifex API, + and pull again which can be complex to implement. Additionally, this option + would have a slow feedback loop causing the pull requests backlog to + build-up. + + +.. _OEP-58: https://open-edx-proposals.readthedocs.io/en/latest/architectural-decisions/oep-0058-arch-translations-management.html +.. _LexiQA: https://help.transifex.com/en/articles/6219179-lexiqa +.. _Transifex GitHub App: https://github.com/apps/transifex-integration diff --git a/scripts/tests/mock_translations_dir/demo-xblock/conf/locale/hi/LC_MESSAGES/django.po b/scripts/tests/mock_translations_dir/demo-xblock/conf/locale/hi/LC_MESSAGES/django.po index 87b04909901..651ba802a34 100644 --- a/scripts/tests/mock_translations_dir/demo-xblock/conf/locale/hi/LC_MESSAGES/django.po +++ b/scripts/tests/mock_translations_dir/demo-xblock/conf/locale/hi/LC_MESSAGES/django.po @@ -13,6 +13,51 @@ msgstr "" "Plural-Forms: nplurals=2; plural=(n != 1);\n" "Generated-By: Babel 2.8.0\n" + +#: default_data.py:5 +msgid "" +"An isosceles triangle with three layers of similar height. It is shown " +"upright, so the widest layer is located at the bottom, and the narrowest " +"layer is located at the top." +msgstr "" + +#, python-brace-format +msgid "{action} invalid actions." +msgstr "{something_else} an invalid action." + +#: default_data.py:20 +msgid "" +"Use this zone to associate an item with the bottom layer of the triangle." +msgstr "" + +#: default_data.py:22 +msgid "Correct! This one belongs to The Top Zone." +msgstr "" + #, python-brace-format -msgid "{action} is not a valid action." -msgstr "{कार्रवाई} एक वैध कार्रवाई नहीं है।" # Invalid brace-format with encoding errors +msgid "" +"{action2} another invalid action." +msgstr "{कार्रवाई2} एक वैध कार्रवाई नहीं है।" + +#: default_data.py:23 +msgid "Correct! This one belongs to The Middle Zone." +msgstr "" + +#: default_data.py:24 +msgid "Correct! This one belongs to The Bottom Zone." +msgstr "" + +#: default_data.py:25 +msgid "No, this item does not belong here. Try again." +msgstr "नहीं, यह आइटम यहाँ नहीं है। पुनः प्रयास करें।" + +#: default_data.py:26 +msgid "You silly, there are no zones for this one." +msgstr "आप मूर्खतापूर्ण हैं, इसके लिए कोई क्षेत्र नहीं है।" + +#, python-brace-format +msgid "{action3} one more time!" +msgstr "" +"{कार्रवाई3} एक वैध कार्रवाई नहीं है।" +"" + diff --git a/scripts/tests/test_validate_translation_files.py b/scripts/tests/test_validate_translation_files.py index 821701d5231..d425fedb1f2 100644 --- a/scripts/tests/test_validate_translation_files.py +++ b/scripts/tests/test_validate_translation_files.py @@ -3,10 +3,12 @@ """ import os.path +import re +import shutil from ..validate_translation_files import ( get_translation_files, - main, + validate_translation_files, ) SCRIPT_DIR = os.path.dirname(__file__) @@ -35,7 +37,7 @@ def test_main_on_invalid_files(capsys): Integration test for the `main` function on some invalid files. """ mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir') - exit_code = main(mock_translations_dir) + exit_code = validate_translation_files(mock_translations_dir) out, err = capsys.readouterr() assert 'VALID:' in out, 'Valid files should be printed in stdout' @@ -44,8 +46,8 @@ def test_main_on_invalid_files(capsys): assert 'hi/LC_MESSAGES/django.po' not in out, 'Invalid file should be printed in stderr' assert 'en/LC_MESSAGES/django.po' not in out, 'Source file should not be validated' - assert 'INVALID:' in err - assert 'hi/LC_MESSAGES/django.po' in err + assert re.match(r'INVALID: .*hi/LC_MESSAGES/django.po', err) + assert 'Errors:' in err assert '\'msgstr\' is not a valid Python brace format string, unlike \'msgid\'' in err assert 'FAILURE: Some translations are invalid.' in err @@ -57,11 +59,57 @@ def test_main_on_valid_files(capsys): Integration test for the `main` function but only for the Arabic translations which is valid. """ mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir/demo-xblock/conf/locale/ar') - exit_code = main(mock_translations_dir) + exit_code = validate_translation_files(mock_translations_dir) out, err = capsys.readouterr() assert 'VALID:' in out, 'Valid files should be printed in stdout' assert 'ar/LC_MESSAGES/django.po' in out, 'Arabic translation file is valid' assert 'SUCCESS: All translation files are valid.' in out + assert 'Errors:' not in out, 'There should be no errors' assert not err.strip(), 'The stderr should be empty' assert exit_code == 0, 'Should succeed due in validating the ar/LC_MESSAGES/django.po file' + + +def test_main_on_valid_files_with_mark_fuzzy(capsys, tmp_path): + """ + Test the `main` function with the --mark-fuzzy option. + """ + original_mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir') + mock_translations_dir = tmp_path / 'translations' + shutil.copytree(original_mock_translations_dir, mock_translations_dir) + + exit_code = validate_translation_files(translations_dir=mock_translations_dir, mark_fuzzy=True) + out, err = capsys.readouterr() + + assert 'VALID:' in out, 'Valid files should be printed in stdout' + assert re.match(r'FIXED: .*hi/LC_MESSAGES/django.po', err), 'Should print the FIXED files in stderr' + assert 'ar/LC_MESSAGES/django.po' in out, 'Arabic translation file is valid regardless' + assert 'NOTICE: Some translations were fixed.' in err, 'Should print NOTICE in stderr' + assert 'SUCCESS: All translation files are now valid.' in err, 'Should print SUCCESS in stderr' + assert 'Previous errors:' in err, 'Original errors should be printed' + assert 'New output:' in err, 'New output should be printed after the fix' + assert exit_code == 0, ( + 'Should succeed even though the in/LC_MESSAGES/django.po is invalid, because it was marked as fuzzy' + ) + + +def test_main_on_valid_files_with_mark_fuzzy_unfixable_files(capsys, tmp_path): + """ + Test the `main` function with the --mark-fuzzy option but the file is broken in an unknown way. + """ + original_mock_translations_dir = os.path.join(SCRIPT_DIR, 'mock_translations_dir') + mock_translations_dir = tmp_path / 'translations' + shutil.copytree(original_mock_translations_dir, mock_translations_dir) + hi_language_file = mock_translations_dir / 'demo-xblock/conf/locale/hi/LC_MESSAGES/django.po' + + with open(hi_language_file, 'a') as f: + f.write('<<<>>>') + + exit_code = validate_translation_files(translations_dir=mock_translations_dir, mark_fuzzy=True) + out, err = capsys.readouterr() + + assert 'VALID:' in out, 'Valid files should be printed in stdout regardless' + assert re.match(r'INVALID: .*hi/LC_MESSAGES/django.po', err), 'Should print the Error files in stderr' + assert 'Previous errors:' in err, 'Original errors should be printed' + assert 'New errors:' in err, 'New errors should be printed after the fix' + assert exit_code == 1, 'Should fail due to unfixable hi/LC_MESSAGES/django.po file' diff --git a/scripts/validate_translation_files.py b/scripts/validate_translation_files.py index 8d334e26656..7082b2738ee 100644 --- a/scripts/validate_translation_files.py +++ b/scripts/validate_translation_files.py @@ -1,7 +1,15 @@ -import sys +""" +Validate translation files using GNU gettext `msgfmt` command. + +This script is used to validate translation files in the Open edX platform and mark +invalid entries as fuzzy. +""" + +import argparse import os import os.path import subprocess +import sys def get_translation_files(translation_directory): @@ -34,12 +42,70 @@ def validate_translation_file(po_file): stderr = completed_process.stderr.decode(encoding='utf-8', errors='replace') return { + 'po_file': po_file, 'valid': completed_process.returncode == 0, - 'output': f'{stdout}\n{stderr}', + 'output': f'{stdout}\n{stderr}'.strip(), + 'stdout': stdout, + 'stderr': stderr, } -def main(translations_dir='translations'): +def get_invalid_msgstr_lines(validation_stderr): + """ + Parse the error output of `msgfmt` and return line numbers of invalid msgstr lines. + """ + invalid_msgstr_lines = [] + for line in validation_stderr.splitlines(): + if '.po:' in line: + _file_name, line_number, _rest = line.split(':', 2) + invalid_msgstr_lines.append(int(line_number)) + return invalid_msgstr_lines + + +def get_invalid_msgid_lines(po_file, invalid_msgstr_lines): + """ + Get the line numbers of invalid msgid lines by their msgstr line numbers. + """ + with open(po_file, mode='r', encoding='utf-8') as f: + pofile_lines = f.readlines() + + invalid_msgid_lines = [] + last_msgid_line = None + for line_number, line_text in enumerate(pofile_lines, start=1): + if line_text.startswith('msgid'): + last_msgid_line = line_number + + if line_text.startswith('msgstr') and line_number in invalid_msgstr_lines: + invalid_msgid_lines.append(last_msgid_line) + + return invalid_msgid_lines + + +def mark_invalid_entries_as_fuzzy(validation_result): + """ + Mark invalid entries as fuzzy. + """ + # Get line numbers of invalid msgstr lines + invalid_msgstr_lines = get_invalid_msgstr_lines(validation_result['stderr']) + invalid_msgid_lines = get_invalid_msgid_lines( + validation_result['po_file'], + invalid_msgstr_lines, + ) + + with open(validation_result['po_file'], mode='r', encoding='utf-8') as f: + pofile_lines = f.readlines() + + with open(validation_result['po_file'], mode='w', encoding='utf-8') as f: + for line_number, line_text in enumerate(pofile_lines, start=1): + if line_number in invalid_msgid_lines: + f.write('#, fuzzy\n') + f.write(line_text) + + +def validate_translation_files( + translations_dir='translations', + mark_fuzzy=False, +): """ Run GNU gettext `msgfmt` and print errors to stderr. @@ -49,37 +115,81 @@ def main(translations_dir='translations'): return 1 for invalid translations. """ translations_valid = True + translations_fixed = False - invalid_lines = [] + stderr_lines = [] po_files = get_translation_files(translations_dir) for po_file in po_files: - result = validate_translation_file(po_file) + first_attempt = validate_translation_file(po_file) - if result['valid']: + if first_attempt['valid']: print('VALID: ' + po_file) - print(result['output'], '\n' * 2) + print(first_attempt['output'], '\n' * 2) else: - invalid_lines.append('INVALID: ' + po_file) - invalid_lines.append(result['output'] + '\n' * 2) - translations_valid = False + if mark_fuzzy: + mark_invalid_entries_as_fuzzy(first_attempt) + + second_attempt = validate_translation_file(po_file) + + if second_attempt['valid']: + translations_fixed = True + stderr_lines.append('FIXED: ' + po_file) + stderr_lines.append('This file was invalid, but it was fixed by`make validate_translation_files`.') + stderr_lines.append('Previous errors:') + stderr_lines.append(first_attempt['output'] + '\n') + stderr_lines.append('New output:') + stderr_lines.append(second_attempt['output'] + '\n' * 2) + else: + stderr_lines.append('INVALID: ' + po_file) + stderr_lines.append('Attempted to fix the file, but it is still invalid.') + + if mark_fuzzy: + stderr_lines.append('Previous errors:') + else: + stderr_lines.append('Errors:') + + stderr_lines.append(first_attempt['output'] + '\n') + + if mark_fuzzy: + stderr_lines.append('New errors:') + stderr_lines.append(second_attempt['output'] + '\n' * 2) + translations_valid = False # Print validation errors in the bottom for easy reading - print('\n'.join(invalid_lines), file=sys.stderr) + print('\n'.join(stderr_lines), file=sys.stderr) if translations_valid: - print('-----------------------------------------') - print('SUCCESS: All translation files are valid.') - print('-----------------------------------------') exit_code = 0 + + if translations_fixed: + print('---------------------------------------------', file=sys.stderr) + print('NOTICE: Some translations were fixed.', file=sys.stderr) + print('SUCCESS: All translation files are now valid.', file=sys.stderr) + print('---------------------------------------------', file=sys.stderr) + else: + print('-----------------------------------------') + print('SUCCESS: All translation files are valid.') + print('-----------------------------------------') else: + exit_code = 1 print('---------------------------------------', file=sys.stderr) print('FAILURE: Some translations are invalid.', file=sys.stderr) print('---------------------------------------', file=sys.stderr) - exit_code = 1 return exit_code +def main(): # pragma: no cover + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--mark-fuzzy', dest='mark_fuzzy', action='store_true', default=False) + parser.add_argument('--dir', action='store', type=str, default='translations') + args = parser.parse_args() + sys.exit(validate_translation_files( + translations_dir=args.dir, + mark_fuzzy=args.mark_fuzzy, + )) + + if __name__ == '__main__': - sys.exit(main()) # pragma: no cover + main() # pragma: no cover