Skip to content

Commit

Permalink
Ad hoc relaxation of the integrity checker for plural strings
Browse files Browse the repository at this point in the history
Only the CLDR 'other' form must have the exact same number of placeholders. For other forms, we allow one placeholder to be removed. Note that placeholders are stored in a set, so duplicate placeholders will count as one.

This only target printflike check, so mainly targeting Android, and some gettext. This is pretty adhoc anyway
  • Loading branch information
ja-openai committed Dec 6, 2024
1 parent af566ed commit 2f965c8
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.box.l10n.mojito.service.assetintegritychecker.integritychecker;

import java.util.Set;
import org.springframework.stereotype.Component;

@Component
public class PluralIntegrityCheckerRelaxer {

/**
* This is very ad hoc!
*
* <p>There are cases where the plural form doesn’t retain the number. Right now, those are
* wrongly marked as rejected. Ideally, we should clearly identify which placeholder might be
* missing from the string, but that’s another level of complexity. Also, use the class name to
* target certain integrity checks to keep this hack constrained.
*/
public boolean shouldRelaxIntegrityCheck(
String source, String target, String pluralForm, TextUnitIntegrityChecker textUnitChecker) {

boolean shouldRelax = false;

if (pluralForm != null && !"other".equals(pluralForm)) {
if (textUnitChecker instanceof PrintfLikeIntegrityChecker
|| textUnitChecker instanceof PrintfLikeIgnorePercentageAfterBracketsIntegrityChecker
|| textUnitChecker instanceof PrintfLikeVariableTypeIntegrityChecker
|| textUnitChecker instanceof SimplePrintfLikeIntegrityChecker) {

RegexIntegrityChecker regexIntegrityChecker = (RegexIntegrityChecker) textUnitChecker;

Set<String> sourcePlaceholders = regexIntegrityChecker.getPlaceholders(source);
Set<String> targetPlaceholders = regexIntegrityChecker.getPlaceholders(target);

if (sourcePlaceholders.size() - targetPlaceholders.size() <= 1) {
shouldRelax = true;
}
}
}

return shouldRelax;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.box.l10n.mojito.service.asset.AssetRepository;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckException;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckerFactory;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.PluralIntegrityCheckerRelaxer;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker;
import java.util.Set;
import org.slf4j.Logger;
Expand All @@ -27,6 +28,8 @@ public class TMTextUnitIntegrityCheckService {

@Autowired TMTextUnitRepository tmTextUnitRepository;

@Autowired PluralIntegrityCheckerRelaxer pluralIntegrityCheckerRelaxer;

/**
* Checks the integrity of the content given the {@link com.box.l10n.mojito.entity.TMTextUnit#id}
*
Expand All @@ -46,7 +49,21 @@ public void checkTMTextUnitIntegrity(Long tmTextUnitId, String contentToCheck)
logger.debug("No designated checker for this asset. Nothing to do");
} else {
for (TextUnitIntegrityChecker textUnitChecker : textUnitCheckers) {
textUnitChecker.check(tmTextUnit.getContent(), contentToCheck);
try {
textUnitChecker.check(tmTextUnit.getContent(), contentToCheck);
} catch (IntegrityCheckException e) {
if (tmTextUnit.getPluralForm() != null && pluralIntegrityCheckerRelaxer.shouldRelaxIntegrityCheck(
tmTextUnit.getContent(),
contentToCheck,
tmTextUnit.getPluralForm().getName(),
textUnitChecker)) {
logger.debug(
"Relaxing the check for plural string with form: {}",
tmTextUnit.getPluralForm().getName());
} else {
throw e;
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.box.l10n.mojito.service.asset.ImportTextUnitJobInput;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckException;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckerFactory;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.PluralIntegrityCheckerRelaxer;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker;
import com.box.l10n.mojito.service.locale.LocaleService;
import com.box.l10n.mojito.service.pollableTask.PollableFuture;
Expand Down Expand Up @@ -96,6 +97,8 @@ public class TextUnitBatchImporterService {

@Autowired MeterRegistry meterRegistry;

@Autowired PluralIntegrityCheckerRelaxer pluralIntegrityCheckerRelaxer;

@Value("${l10n.textUnitBatchImporterService.quartz.schedulerName:" + DEFAULT_SCHEDULER_NAME + "}")
String schedulerName;

Expand Down Expand Up @@ -386,30 +389,41 @@ void applyIntegrityChecks(
currentTextUnit.getSource(),
textUnitForBatchImport.getContent(),
ice);
textUnitForBatchImport.setIncludedInLocalizedFile(false);
textUnitForBatchImport.setStatus(Status.TRANSLATION_NEEDED);

if (hasSameTarget) {
switch (integrityChecksType) {
case UNLESS_TAGGED_USE_INTEGRITY_CHECKER_STATUS:
if (!Strings.nullToEmpty(currentTextUnit.getTargetComment())
.toLowerCase()
.contains(FALSE_POSITIVE_TAG_FOR_STATUS)) {

if (pluralIntegrityCheckerRelaxer.shouldRelaxIntegrityCheck(
currentTextUnit.getSource(),
textUnitForBatchImport.getContent(),
textUnitForBatchImport.getCurrentTextUnit().getPluralForm(),
textUnitChecker)) {
logger.debug(
"Relaxing the check for plural string with form: {}",
textUnitForBatchImport.getCurrentTextUnit().getPluralForm());
} else {
textUnitForBatchImport.setIncludedInLocalizedFile(false);
textUnitForBatchImport.setStatus(Status.TRANSLATION_NEEDED);

if (hasSameTarget) {
switch (integrityChecksType) {
case UNLESS_TAGGED_USE_INTEGRITY_CHECKER_STATUS:
if (!Strings.nullToEmpty(currentTextUnit.getTargetComment())
.toLowerCase()
.contains(FALSE_POSITIVE_TAG_FOR_STATUS)) {
break;
}
case KEEP_STATUS_IF_SAME_TARGET:
case KEEP_STATUS_IF_REJECTED_AND_SAME_TARGET:
textUnitForBatchImport.setIncludedInLocalizedFile(
currentTextUnit.isIncludedInLocalizedFile());
textUnitForBatchImport.setStatus(currentTextUnit.getStatus());
break;
}
case KEEP_STATUS_IF_SAME_TARGET:
case KEEP_STATUS_IF_REJECTED_AND_SAME_TARGET:
textUnitForBatchImport.setIncludedInLocalizedFile(
currentTextUnit.isIncludedInLocalizedFile());
textUnitForBatchImport.setStatus(currentTextUnit.getStatus());
break;
}
}
}

TMTextUnitVariantComment tmTextUnitVariantComment = new TMTextUnitVariantComment();
tmTextUnitVariantComment.setSeverity(Severity.ERROR);
tmTextUnitVariantComment.setContent(ice.getMessage());
textUnitForBatchImport.getTmTextUnitVariantComments().add(tmTextUnitVariantComment);
TMTextUnitVariantComment tmTextUnitVariantComment = new TMTextUnitVariantComment();
tmTextUnitVariantComment.setSeverity(Severity.ERROR);
tmTextUnitVariantComment.setContent(ice.getMessage());
textUnitForBatchImport.getTmTextUnitVariantComments().add(tmTextUnitVariantComment);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package com.box.l10n.mojito.service.assetintegritychecker.integritychecker;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.junit.Test;

public class PluralIntegrityCheckerRelaxerTest {

@Test
public void testShouldRelaxIntegrityCheck_whenPluralFormIsOther() {
PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer();
boolean result =
relaxer.shouldRelaxIntegrityCheck(
"source %d", "target", "other", new PrintfLikeIntegrityChecker());

assertFalse(result, "Integrity check should not be relaxed for plural form 'other'.");
}

@Test
public void testShouldRelaxIntegrityCheck_whenPlaceholdersDifferByOne_One() {
PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer();
boolean result =
relaxer.shouldRelaxIntegrityCheck(
"source %d", "target", "one", new PrintfLikeIntegrityChecker());

assertTrue(
result, "Integrity check should be relaxed when placeholders differ by one for 'one' form");
}

@Test
public void testShouldRelaxIntegrityCheck_whenPlaceholdersDifferByOne_One_DuplicatePlaceholder() {
PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer();
boolean result =
relaxer.shouldRelaxIntegrityCheck(
"source %d %d", "target", "one", new PrintfLikeIntegrityChecker());

assertTrue(
result,
"Integrity check should be relaxed when placeholders differ by one for 'one' form (as placeholders are stored in a set, ignoring duplicates)");
}

@Test
public void testShouldRelaxIntegrityCheck_whenPlaceholdersDifferByTwo_One() {
PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer();
boolean result =
relaxer.shouldRelaxIntegrityCheck(
"source %1d %2d", "target", "one", new PrintfLikeIntegrityChecker());

assertFalse(
result,
"Integrity check should not be relaxed when placeholders differ by two for 'one' form");
}

@Test
public void testShouldRelaxIntegrityCheck_withUnsupportedChecker() {
PluralIntegrityCheckerRelaxer relaxer = new PluralIntegrityCheckerRelaxer();

boolean result =
relaxer.shouldRelaxIntegrityCheck("source %d", "target", "one", new URLIntegrityChecker());

assertFalse(result, "Integrity check should not be relaxed for unsupported checker types.");
}
}

0 comments on commit 2f965c8

Please sign in to comment.