From 2f965c8d733883018afa1b20567aa307f2e0be90 Mon Sep 17 00:00:00 2001 From: Jean Aurambault Date: Wed, 27 Nov 2024 14:01:12 -0800 Subject: [PATCH] Ad hoc relaxation of the integrity checker for plural strings Only the CLDR 'other' form must have the exact same number of placeholders. For other forms, we allow one placeholder to be removed. Note that placeholders are stored in a set, so duplicate placeholders will count as one. This only target printflike check, so mainly targeting Android, and some gettext. This is pretty adhoc anyway --- .../PluralIntegrityCheckerRelaxer.java | 41 ++++++++++++ .../tm/TMTextUnitIntegrityCheckService.java | 19 +++++- .../TextUnitBatchImporterService.java | 56 ++++++++++------ .../PluralIntegrityCheckerRelaxerTest.java | 64 +++++++++++++++++++ 4 files changed, 158 insertions(+), 22 deletions(-) create mode 100644 webapp/src/main/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxer.java create mode 100644 webapp/src/test/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxerTest.java diff --git a/webapp/src/main/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxer.java b/webapp/src/main/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxer.java new file mode 100644 index 0000000000..13d08b167c --- /dev/null +++ b/webapp/src/main/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxer.java @@ -0,0 +1,41 @@ +package com.box.l10n.mojito.service.assetintegritychecker.integritychecker; + +import java.util.Set; +import org.springframework.stereotype.Component; + +@Component +public class PluralIntegrityCheckerRelaxer { + + /** + * This is very ad hoc! + * + *

There are cases where the plural form doesn’t retain the number. Right now, those are + * wrongly marked as rejected. Ideally, we should clearly identify which placeholder might be + * missing from the string, but that’s another level of complexity. Also, use the class name to + * target certain integrity checks to keep this hack constrained. + */ + public boolean shouldRelaxIntegrityCheck( + String source, String target, String pluralForm, TextUnitIntegrityChecker textUnitChecker) { + + boolean shouldRelax = false; + + if (pluralForm != null && !"other".equals(pluralForm)) { + if (textUnitChecker instanceof PrintfLikeIntegrityChecker + || textUnitChecker instanceof PrintfLikeIgnorePercentageAfterBracketsIntegrityChecker + || textUnitChecker instanceof PrintfLikeVariableTypeIntegrityChecker + || textUnitChecker instanceof SimplePrintfLikeIntegrityChecker) { + + RegexIntegrityChecker regexIntegrityChecker = (RegexIntegrityChecker) textUnitChecker; + + Set sourcePlaceholders = regexIntegrityChecker.getPlaceholders(source); + Set targetPlaceholders = regexIntegrityChecker.getPlaceholders(target); + + if (sourcePlaceholders.size() - targetPlaceholders.size() <= 1) { + shouldRelax = true; + } + } + } + + return shouldRelax; + } +} diff --git a/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMTextUnitIntegrityCheckService.java b/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMTextUnitIntegrityCheckService.java index ad45b4c186..587528acfe 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMTextUnitIntegrityCheckService.java +++ b/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMTextUnitIntegrityCheckService.java @@ -7,6 +7,7 @@ import com.box.l10n.mojito.service.asset.AssetRepository; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckException; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckerFactory; +import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.PluralIntegrityCheckerRelaxer; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker; import java.util.Set; import org.slf4j.Logger; @@ -27,6 +28,8 @@ public class TMTextUnitIntegrityCheckService { @Autowired TMTextUnitRepository tmTextUnitRepository; + @Autowired PluralIntegrityCheckerRelaxer pluralIntegrityCheckerRelaxer; + /** * Checks the integrity of the content given the {@link com.box.l10n.mojito.entity.TMTextUnit#id} * @@ -46,7 +49,21 @@ public void checkTMTextUnitIntegrity(Long tmTextUnitId, String contentToCheck) logger.debug("No designated checker for this asset. Nothing to do"); } else { for (TextUnitIntegrityChecker textUnitChecker : textUnitCheckers) { - textUnitChecker.check(tmTextUnit.getContent(), contentToCheck); + try { + textUnitChecker.check(tmTextUnit.getContent(), contentToCheck); + } catch (IntegrityCheckException e) { + if (tmTextUnit.getPluralForm() != null && pluralIntegrityCheckerRelaxer.shouldRelaxIntegrityCheck( + tmTextUnit.getContent(), + contentToCheck, + tmTextUnit.getPluralForm().getName(), + textUnitChecker)) { + logger.debug( + "Relaxing the check for plural string with form: {}", + tmTextUnit.getPluralForm().getName()); + } else { + throw e; + } + } } } } diff --git a/webapp/src/main/java/com/box/l10n/mojito/service/tm/importer/TextUnitBatchImporterService.java b/webapp/src/main/java/com/box/l10n/mojito/service/tm/importer/TextUnitBatchImporterService.java index 564250faa0..3402a3ca4b 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/service/tm/importer/TextUnitBatchImporterService.java +++ b/webapp/src/main/java/com/box/l10n/mojito/service/tm/importer/TextUnitBatchImporterService.java @@ -22,6 +22,7 @@ import com.box.l10n.mojito.service.asset.ImportTextUnitJobInput; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckException; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckerFactory; +import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.PluralIntegrityCheckerRelaxer; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker; import com.box.l10n.mojito.service.locale.LocaleService; import com.box.l10n.mojito.service.pollableTask.PollableFuture; @@ -96,6 +97,8 @@ public class TextUnitBatchImporterService { @Autowired MeterRegistry meterRegistry; + @Autowired PluralIntegrityCheckerRelaxer pluralIntegrityCheckerRelaxer; + @Value("${l10n.textUnitBatchImporterService.quartz.schedulerName:" + DEFAULT_SCHEDULER_NAME + "}") String schedulerName; @@ -386,30 +389,41 @@ void applyIntegrityChecks( currentTextUnit.getSource(), textUnitForBatchImport.getContent(), ice); - textUnitForBatchImport.setIncludedInLocalizedFile(false); - textUnitForBatchImport.setStatus(Status.TRANSLATION_NEEDED); - - if (hasSameTarget) { - switch (integrityChecksType) { - case UNLESS_TAGGED_USE_INTEGRITY_CHECKER_STATUS: - if (!Strings.nullToEmpty(currentTextUnit.getTargetComment()) - .toLowerCase() - .contains(FALSE_POSITIVE_TAG_FOR_STATUS)) { + + if (pluralIntegrityCheckerRelaxer.shouldRelaxIntegrityCheck( + currentTextUnit.getSource(), + textUnitForBatchImport.getContent(), + textUnitForBatchImport.getCurrentTextUnit().getPluralForm(), + textUnitChecker)) { + logger.debug( + "Relaxing the check for plural string with form: {}", + textUnitForBatchImport.getCurrentTextUnit().getPluralForm()); + } else { + textUnitForBatchImport.setIncludedInLocalizedFile(false); + textUnitForBatchImport.setStatus(Status.TRANSLATION_NEEDED); + + if (hasSameTarget) { + switch (integrityChecksType) { + case UNLESS_TAGGED_USE_INTEGRITY_CHECKER_STATUS: + if (!Strings.nullToEmpty(currentTextUnit.getTargetComment()) + .toLowerCase() + .contains(FALSE_POSITIVE_TAG_FOR_STATUS)) { + break; + } + case KEEP_STATUS_IF_SAME_TARGET: + case KEEP_STATUS_IF_REJECTED_AND_SAME_TARGET: + textUnitForBatchImport.setIncludedInLocalizedFile( + currentTextUnit.isIncludedInLocalizedFile()); + textUnitForBatchImport.setStatus(currentTextUnit.getStatus()); break; - } - case KEEP_STATUS_IF_SAME_TARGET: - case KEEP_STATUS_IF_REJECTED_AND_SAME_TARGET: - textUnitForBatchImport.setIncludedInLocalizedFile( - currentTextUnit.isIncludedInLocalizedFile()); - textUnitForBatchImport.setStatus(currentTextUnit.getStatus()); - break; + } } - } - TMTextUnitVariantComment tmTextUnitVariantComment = new TMTextUnitVariantComment(); - tmTextUnitVariantComment.setSeverity(Severity.ERROR); - tmTextUnitVariantComment.setContent(ice.getMessage()); - textUnitForBatchImport.getTmTextUnitVariantComments().add(tmTextUnitVariantComment); + TMTextUnitVariantComment tmTextUnitVariantComment = new TMTextUnitVariantComment(); + tmTextUnitVariantComment.setSeverity(Severity.ERROR); + tmTextUnitVariantComment.setContent(ice.getMessage()); + textUnitForBatchImport.getTmTextUnitVariantComments().add(tmTextUnitVariantComment); + } } } } diff --git a/webapp/src/test/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxerTest.java b/webapp/src/test/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxerTest.java new file mode 100644 index 0000000000..7afeb4961d --- /dev/null +++ b/webapp/src/test/java/com/box/l10n/mojito/service/assetintegritychecker/integritychecker/PluralIntegrityCheckerRelaxerTest.java @@ -0,0 +1,64 @@ +package com.box.l10n.mojito.service.assetintegritychecker.integritychecker; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.Test; + +public class PluralIntegrityCheckerRelaxerTest { + + @Test + public void testShouldRelaxIntegrityCheck_whenPluralFormIsOther() { + PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer(); + boolean result = + relaxer.shouldRelaxIntegrityCheck( + "source %d", "target", "other", new PrintfLikeIntegrityChecker()); + + assertFalse(result, "Integrity check should not be relaxed for plural form 'other'."); + } + + @Test + public void testShouldRelaxIntegrityCheck_whenPlaceholdersDifferByOne_One() { + PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer(); + boolean result = + relaxer.shouldRelaxIntegrityCheck( + "source %d", "target", "one", new PrintfLikeIntegrityChecker()); + + assertTrue( + result, "Integrity check should be relaxed when placeholders differ by one for 'one' form"); + } + + @Test + public void testShouldRelaxIntegrityCheck_whenPlaceholdersDifferByOne_One_DuplicatePlaceholder() { + PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer(); + boolean result = + relaxer.shouldRelaxIntegrityCheck( + "source %d %d", "target", "one", new PrintfLikeIntegrityChecker()); + + assertTrue( + result, + "Integrity check should be relaxed when placeholders differ by one for 'one' form (as placeholders are stored in a set, ignoring duplicates)"); + } + + @Test + public void testShouldRelaxIntegrityCheck_whenPlaceholdersDifferByTwo_One() { + PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer(); + boolean result = + relaxer.shouldRelaxIntegrityCheck( + "source %1d %2d", "target", "one", new PrintfLikeIntegrityChecker()); + + assertFalse( + result, + "Integrity check should not be relaxed when placeholders differ by two for 'one' form"); + } + + @Test + public void testShouldRelaxIntegrityCheck_withUnsupportedChecker() { + PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer(); + + boolean result = + relaxer.shouldRelaxIntegrityCheck("source %d", "target", "one", new URLIntegrityChecker()); + + assertFalse(result, "Integrity check should not be relaxed for unsupported checker types."); + } +}