diff --git a/build.gradle b/build.gradle index 4d042d9eac..471ad50c9f 100644 --- a/build.gradle +++ b/build.gradle @@ -300,6 +300,7 @@ dependencies { } runtimeOnly(libs.language.detector) runtimeOnly(libs.dumont.hunspell) + implementation(libs.icu4j) // Lucene for tokenizers implementation(libs.bundles.lucene) @@ -1702,7 +1703,7 @@ tasks.register('testAcceptance', Test) { classpath = sourceSets.testAcceptance.runtimeClasspath systemProperties = System.properties systemProperty 'java.util.logging.config.file', "${rootDir}/config/test/logger.properties" - + dependsOn firstStepsEn dependsOn ':aligner:jar' } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 2090ecac13..dd5389310a 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -8,7 +8,7 @@ commons_io = "2.16.1" commons_text = "1.11.0" commons_validator = "1.9.0" jsoup = "1.18.2" -icu4j = { require = "[70,73.2[", prefer = "72.1" } +icu4j = { require = "[71.1,76.1[", prefer = "74.2" } stax2api = "4.2.2" woodstox = "6.5.0" languagetool = "6.1" @@ -65,7 +65,7 @@ commons-lang3 = {group = "org.apache.commons", name = "commons-lang3", version.r commons-text = {group = "org.apache.commons", name = "commons-text", version.ref = "commons_text"} commons-validator = {group = "commons-validator", name = "commons-validator", version.ref = "commons_validator"} jsoup = {group = "org.jsoup", name = "jsoup", version.ref = "jsoup"} -icj4j = {group = "com.ibm.icu", name = "icu4j", version.ref = "icu4j"} +icu4j = {group = "com.ibm.icu", name = "icu4j", version.ref = "icu4j"} stax2-api = {group = "org.codehaus.woodstox", name = "stax2-api", version.ref = "stax2api"} woodstox-core = {group = "com.fasterxml.woodstox", name = "woodstox-core", version.ref = "woodstox"} languagetool-all = {group = "org.languagetool", name = "language-all", version.ref = "languagetool"} diff --git a/language-modules/ja/build.gradle b/language-modules/ja/build.gradle index 1855d1888f..4a1deeda4d 100644 --- a/language-modules/ja/build.gradle +++ b/language-modules/ja/build.gradle @@ -25,7 +25,7 @@ dependencies { exclude module: 'icu4j' } implementation(dependencies.variantOf(libs.lucene.gosen) { classifier("ipadic") }) - implementation(libs.icj4j) + compileOnly(libs.icu4j) } testImplementation(libs.junit4) @@ -43,7 +43,7 @@ dependencies { exclude module: 'icu4j' } testRuntimeOnly(dependencies.variantOf(libs.lucene.gosen) { classifier("ipadic") }) - testRuntimeOnly(libs.icj4j) + testRuntimeOnly(libs.icu4j) testImplementation(libs.assertj) testImplementation(testFixtures(project.rootProject)) diff --git a/src/org/omegat/gui/editor/EditorController.java b/src/org/omegat/gui/editor/EditorController.java index 8276a109cb..ac60c22bbf 100644 --- a/src/org/omegat/gui/editor/EditorController.java +++ b/src/org/omegat/gui/editor/EditorController.java @@ -704,9 +704,12 @@ protected void loadDocument() { doc.setDocumentFilter(new DocumentFilter3()); - // add locate for target language to editor + // add locales to editor Locale targetLocale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); editor.setLocale(targetLocale); + editor.setTargetLocale(targetLocale); + Locale sourceLocale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); + editor.setSourceLocale(sourceLocale); editor.setDocument(doc); @@ -1639,8 +1642,9 @@ public void changeCase(CHANGE_CASE_TO toWhat) { try { // no selection? make it the current word if (start == end) { - start = EditorUtils.getWordStart(editor, start); - end = EditorUtils.getWordEnd(editor, end); + Locale locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); + start = EditorUtils.getWordStart(editor, start, locale); + end = EditorUtils.getWordEnd(editor, end, locale); // adjust the bound again if (start < translationStart && end <= translationEnd) { @@ -1947,6 +1951,7 @@ private void createAdditionalPanes() { .setComponentOrientation(BiDiUtils.isRtl(language) ? ComponentOrientation.RIGHT_TO_LEFT : ComponentOrientation.LEFT_TO_RIGHT); introPane.setEditable(false); + introPane.setName("IntroPane"); DragTargetOverlay.apply(introPane, dropInfo); URI uri = Help.getHelpFileURI(OConsts.HELP_FIRST_STEPS_PREFIX, language, OConsts.HELP_FIRST_STEPS); if (uri != null) { @@ -1958,6 +1963,7 @@ private void createAdditionalPanes() { emptyProjectPaneTitle = OStrings.getString("TF_INTRO_EMPTYPROJECT_FILENAME"); emptyProjectPane = new JTextPane(); emptyProjectPane.setEditable(false); + emptyProjectPane.setName("EmptyProjectPane"); emptyProjectPane.setText(OStrings.getString("TF_INTRO_EMPTYPROJECT")); emptyProjectPane.setFont(mw.getApplicationFont()); DragTargetOverlay.apply(emptyProjectPane, dropInfo); diff --git a/src/org/omegat/gui/editor/EditorTextArea3.java b/src/org/omegat/gui/editor/EditorTextArea3.java index f7d7e4fe31..2461da1f73 100644 --- a/src/org/omegat/gui/editor/EditorTextArea3.java +++ b/src/org/omegat/gui/editor/EditorTextArea3.java @@ -42,6 +42,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Locale; import javax.swing.JEditorPane; import javax.swing.JPopupMenu; @@ -141,6 +142,9 @@ public class EditorTextArea3 extends JEditorPane { */ protected boolean overtypeMode = false; + private Locale targetLocale; + private Locale sourceLocale; + public EditorTextArea3(EditorController controller) { this.controller = controller; setEditorKit(new StyledEditorKit() { @@ -165,10 +169,16 @@ protected void createInputAttributes(Element element, MutableAttributeSet set) { c.setBlinkRate(getCaret().getBlinkRate()); setCaret(c); + sourceLocale = getLocale(); + targetLocale = getLocale(); + addCaretListener(e -> { try { - int start = EditorUtils.getWordStart(EditorTextArea3.this, e.getMark()); - int end = EditorUtils.getWordEnd(EditorTextArea3.this, e.getMark()); + // Detection of target string locale. + // It uses a source or a target language as a processing locale. + Locale locale = isInActiveTranslation(e.getMark()) ? targetLocale : sourceLocale; + int start = EditorUtils.getWordStart(EditorTextArea3.this, e.getMark(), locale); + int end = EditorUtils.getWordEnd(EditorTextArea3.this, e.getMark(), locale); if (end - start <= 0) { // word not defined return; @@ -200,6 +210,14 @@ public void setFont(Font font) { } } + void setTargetLocale(Locale targetLocale) { + this.targetLocale = targetLocale; + } + + void setSourceLocale(Locale sourceLocale) { + this.sourceLocale = sourceLocale; + } + /** * Return OmDocument instead just a Document. If editor was not initialized * with OmDocument, it will contains other Document implementation. In this diff --git a/src/org/omegat/gui/editor/EditorUtils.java b/src/org/omegat/gui/editor/EditorUtils.java index 8e5f49da1c..7a97a30916 100644 --- a/src/org/omegat/gui/editor/EditorUtils.java +++ b/src/org/omegat/gui/editor/EditorUtils.java @@ -31,9 +31,13 @@ import java.util.Locale; import javax.swing.text.BadLocationException; +import javax.swing.text.Document; +import javax.swing.text.Element; import javax.swing.text.JTextComponent; import javax.swing.text.Utilities; +import com.ibm.icu.text.BreakIterator; + import org.omegat.core.Core; import org.omegat.core.data.ProtectedPart; import org.omegat.core.data.SourceTextEntry; @@ -61,15 +65,30 @@ private EditorUtils() { * Determines the start of a word for the given model location. This method * skips direction char. * - * TODO: change to use document's locale - * - * @param c - * @param offs - * @return + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @return position of word start on the text component. * @throws BadLocationException + * when there is no line found in the text component. */ + @Deprecated public static int getWordStart(JTextComponent c, int offs) throws BadLocationException { - int result = Utilities.getWordStart(c, offs); + return getWordStart(c, offs, c.getLocale()); + } + + /** + * Determines the start of a word for the given model location. This method + * skips direction char. + * + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @param locale locale of the text. + * @return position of word start on the text component. + * @throws BadLocationException + * when there is no line found in the text component. + */ + public static int getWordStart(JTextComponent c, int offs, Locale locale) throws BadLocationException { + int result = getWordBoundary(c, offs, locale, false); char ch = c.getDocument().getText(result, 1).charAt(0); if (isDirectionChar(ch)) { result++; @@ -81,15 +100,30 @@ public static int getWordStart(JTextComponent c, int offs) throws BadLocationExc * Determines the end of a word for the given model location. This method * skips direction char. * - * TODO: change to use document's locale - * - * @param c - * @param offs - * @return + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @return position of the word end on the text component. * @throws BadLocationException + * when there is no line found in the text component. */ + @Deprecated public static int getWordEnd(JTextComponent c, int offs) throws BadLocationException { - int result = Utilities.getWordEnd(c, offs); + return getWordEnd(c, offs, c.getLocale()); + } + + /** + * Determines the end of a word for the given model location. This method + * skips direction char. + * + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @param locale locale of the text. + * @return position of the word end on the text component. + * @throws BadLocationException + * when there is no line found in the text component. + */ + public static int getWordEnd(JTextComponent c, int offs, Locale locale) throws BadLocationException { + int result = getWordBoundary(c, offs, locale, true); if (result > 0) { char ch = c.getDocument().getText(result - 1, 1).charAt(0); if (isDirectionChar(ch)) { @@ -99,6 +133,46 @@ public static int getWordEnd(JTextComponent c, int offs) throws BadLocationExcep return result; } + private static int getWordBoundary(JTextComponent c, int offs, Locale locale, boolean end) throws BadLocationException { + int result = offs; + Element line = Utilities.getParagraphElement(c, offs); + if (line == null) { + throw new BadLocationException("No word at " + offs, offs); + } + int lineStart = line.getStartOffset(); + Document doc = c.getDocument(); + int lineEnd = Math.min(line.getEndOffset(), doc.getLength()); + if (lineEnd - lineStart > 0) { + String lineString = doc.getText(lineStart, lineEnd - lineStart); + result = lineStart + getWordBoundary(locale, lineString, offs - lineStart, end); + } + return result; + } + + /** + * Get word boundary. + *
+ * When the end argument is true, return a word end.
+ * Otherwise, return a start of word.
+ * @param locale locale of the line string.
+ * @param lineString a string of the line.
+ * @param wordPosition target position of the line.
+ * @param end return end of word, otherwise start of word.
+ * @return index of the word boundary.
+ */
+ static int getWordBoundary(Locale locale, String lineString, int wordPosition, boolean end) {
+ BreakIterator words = com.ibm.icu.text.BreakIterator.getWordInstance(locale);
+ words.setText(lineString);
+ if (wordPosition >= words.last()) {
+ wordPosition = words.last() - 1;
+ }
+ if (end) {
+ return words.following(wordPosition);
+ }
+ words.following(wordPosition);
+ return words.previous();
+}
+
/**
* Check if char is direction char(u202A,u202B,u202C).
*
@@ -420,7 +494,7 @@ public static String addBidiAroundTags(String text, SourceTextEntry ste) {
StringBuilder s = new StringBuilder(text.length() * 12 / 10);
for (Tag t : tags) {
if (pos < t.pos) {
- s.append(text.substring(pos, t.pos));
+ s.append(text, pos, t.pos);
}
s.append(SegmentBuilder.BIDI_RLM_CHAR);
s.append(SegmentBuilder.BIDI_LRM_CHAR);
@@ -437,11 +511,8 @@ public static String addBidiAroundTags(String text, SourceTextEntry ste) {
public static boolean hasBidiAroundTag(String text, String tag, int pos) {
try {
- boolean has = true;
- if (text.charAt(pos - 1) != SegmentBuilder.BIDI_LRM_CHAR
- || text.charAt(pos - 2) != SegmentBuilder.BIDI_RLM_CHAR) {
- has = false;
- }
+ boolean has = text.charAt(pos - 1) == SegmentBuilder.BIDI_LRM_CHAR
+ && text.charAt(pos - 2) == SegmentBuilder.BIDI_RLM_CHAR;
if (text.charAt(pos + tag.length()) != SegmentBuilder.BIDI_LRM_CHAR
|| text.charAt(pos + tag.length() + 1) != SegmentBuilder.BIDI_RLM_CHAR) {
has = false;
diff --git a/test-acceptance/data/project_CN_JP/.gitignore b/test-acceptance/data/project_CN_JP/.gitignore
new file mode 100644
index 0000000000..2f3bf92e42
--- /dev/null
+++ b/test-acceptance/data/project_CN_JP/.gitignore
@@ -0,0 +1,3 @@
+project_stats.txt
+project_stats.json
+*.bak
diff --git a/test-acceptance/data/project_CN_JP/dictionary/.keep b/test-acceptance/data/project_CN_JP/dictionary/.keep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test-acceptance/data/project_CN_JP/glossary/glossary.txt b/test-acceptance/data/project_CN_JP/glossary/glossary.txt
new file mode 100644
index 0000000000..b53c81f5a7
--- /dev/null
+++ b/test-acceptance/data/project_CN_JP/glossary/glossary.txt
@@ -0,0 +1,3 @@
+# Glossary in tab-separated format -*- coding: utf-8 -*-
+介绍 紹介
+中的 中心的な
diff --git a/test-acceptance/data/project_CN_JP/omegat.project b/test-acceptance/data/project_CN_JP/omegat.project
new file mode 100644
index 0000000000..1f961b9d19
--- /dev/null
+++ b/test-acceptance/data/project_CN_JP/omegat.project
@@ -0,0 +1,33 @@
+
+