Skip to content

Commit

Permalink
1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Bhlowe committed Jul 21, 2024
1 parent 334882c commit 1ba043a
Show file tree
Hide file tree
Showing 8 changed files with 501 additions and 160 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/.idea/
/target/
translate-properties.iml
/src/main/java/com/nuspectra/translation/OA.java
translate-properties.iml
13 changes: 9 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

<groupId>com.nuspectra.translation</groupId>
<artifactId>translate-properties</artifactId>
<version>1.1</version>
<version>1.3</version>
<packaging>jar</packaging>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.target>1.8</maven.compiler.target>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<maven.compiler.source>11</maven.compiler.source>
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
</properties>

Expand Down Expand Up @@ -45,7 +45,12 @@
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.4</version>
</dependency>


</dependencies>
</project>
</project>
59 changes: 38 additions & 21 deletions src/main/java/com/nuspectra/translation/FixTranslation.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,26 @@ public enum FixTranslation {

public int count(String text, String find) {
int index = 0, count = 0, length = find.length();
while( (index = text.indexOf(find, index)) != -1 ) {
index += length; count++;
while ((index = text.indexOf(find, index)) != -1) {
index += length;
count++;
}
return count;
}

int countMacros(String in)
{
int countMacros(String in) {
int c = 0;
for (String m:knownMacros)
{
for (String m : knownMacros) {
c += count(in, m);
}
return c;
}

boolean checkOutputOK(String translated, String orig)
{
boolean checkOutputOK(String translated, String orig) {
int oc = countMacros(orig);
int tc = countMacros(translated);
if (oc!=tc)
{
System.out.println("Macro Error!\n"+orig+"\n"+translated);
if (oc != tc) {
System.out.println("Macro Error!\n" + orig + "\n" + translated);
return false;
}
return true;
Expand All @@ -47,9 +44,24 @@ private String fixPercent(String s, char c) {
return s;
}

public String fixLine(String s, String inputString) {
public String fixLine(final String s, String inputString) {
String copy = s;
int count = 0;

for (int x = 0; x < 100; x++) {
String out = _fixLine(copy, inputString);
if (out.equals(copy))
return out;
count++;
System.out.println("taking another attempt: " + s);
copy = out;
}
return copy;
}

private String _fixLine(String s, String inputString) {
boolean ok = checkOutputOK(s, inputString);
assert(ok);
assert (ok);

s = s.replace("\\ N", "\\n");
s = s.replace("\\ n", "\\n");
Expand All @@ -67,21 +79,26 @@ public String fixLine(String s, String inputString) {
s = s.replace("&quot;", "\"");
s = s.replace("&#39;", "'");

if (s.contains("&"))
{

s = s.replace("' %d'", "'%d'");
s = s.replace("' %s'", "'%s'");

s = s.replace("( %", "(%");


if (s.contains("&")) {
// Unescape failed... ?
System.out.println("& found in output:"+s);
System.out.println("& found in output:" + s);
}

s = s.replace(" .", ".");
// s = s.replace(" .", "."); // This actually is correct in some languages.

s = s.trim();

boolean ok2 = checkOutputOK(s, inputString);
if (!ok2)
{
System.err.println("Failure with macro counts:"+s+" from "+inputString);
assert(ok2);
if (!ok2) {
System.err.println("Failure with macro counts:" + s + " from " + inputString);
assert (ok2);
}
return s;
}
Expand Down
90 changes: 17 additions & 73 deletions src/main/java/com/nuspectra/translation/GoogleTranslate.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@
import com.google.cloud.translate.Translate;
import com.google.cloud.translate.TranslateOptions;
import com.google.cloud.translate.Translation;
import org.apache.commons.lang3.StringEscapeUtils;

import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

// singleton class to access GoogleTranslate.
public enum GoogleTranslate {
instance;
private static final Log log = LogFactory.getLog(GoogleTranslate.class);

final HashMap<String, String> escapeMap = new HashMap<>(); // strings that shall not be translated. (such as %s)

// simple english translation without text substitutions
public String translate(String text, String targetLanguge) {
return translate(text, "en", targetLanguge, "text");
public String translate(String text, String targetLanguage) {
return translate(text, "en", targetLanguage, "text");
}

public String translate(String text, String sourceLanguage, String targetLanguage, String format) {
Expand All @@ -34,85 +32,31 @@ public String translate(String text, String sourceLanguage, String targetLanguag
return out;
}

public String translateFormattedString(String text, String sourceLanguage, String targetLanguage) throws Exception {
String translated = translate(text, sourceLanguage, targetLanguage, "text");
return translated;

public String translateFormattedString(String string, String sourceLanguage, String targetLanguage) throws Exception {
String text = escapeString(string);

try {

String translated = translate(text, sourceLanguage, targetLanguage, "html");
boolean ok1 = FixTranslation.instance.checkOutputOK(string, translated);
String unescaped = unescapeString(translated);
unescaped = FixTranslation.instance.fixLine(unescaped, text);
boolean ok2 = FixTranslation.instance.checkOutputOK(string, translated);

if (!ok1 || !ok2)
throw new Exception("Failed to preserve macros... failing");

return unescaped;
} catch (Exception th) {
System.err.println("Error translating to " + targetLanguage + " " + text);
throw th;
}
}


GoogleTranslate() {
initEscapedSubstitutionMap();
}

// this is not an all-inclusive list. too many to know: %-d, % s, etc.
// Typical string substitutions. you need to make sure the ones used in your properties file are in this list.
// TODO: Add logic to detect when a string contains an unmapped substitution
protected void initEscapedSubstitutionMap() {
String format_specifiers[] = {"%s", "%d", "%f", "%n", "%a", "%%", "%S", "%e", "%E"};
for (String s : format_specifiers) {
doNotTranslate(s);
}
escapeMap.put("\\n", "<br>");
escapeMap.put("\\t", " ");

// Alternate method, but could fail (?) if non-arabic numerals are translated.
// But might give better translations so google knows a number is part of the translation
if (false) {
escapeMap.put("%f", "12345.98765");
escapeMap.put("%d", "54321");
}

}

// Call this to add text that shouldn't be translated.
public void doNotTranslate(String text)
{
escapeMap.put(text, "<span translate=\"no\">" + text + "+</span>");
}

public String escapeString(String line) {
if (escapeMap.isEmpty()) initEscapedSubstitutionMap();

String out = line;
for (Map.Entry<String, String> e : escapeMap.entrySet()) {
out = out.replace(e.getKey(), e.getValue());
}

// now there may be other strings to replace..
// for instance, '%s' often gets translated to ' %s ' which we can "fix" here if we know all of the quirks of google translate.
// so replace ' %s ', "'%s'"


return out;
}
public String translateHTML(String srcHtml, String sourceLanguage, String targetLanguage) {

public String unescapeString(String line) {
String out = StringEscapeUtils.unescapeHtml4(line);
Translate translate = TranslateOptions.getDefaultInstance().getService();
Translation translation =
translate.translate(
srcHtml,
Translate.TranslateOption.sourceLanguage(sourceLanguage),
Translate.TranslateOption.targetLanguage(targetLanguage),
Translate.TranslateOption.format("html"));
String out = translation.getTranslatedText();

for (Map.Entry<String, String> e : escapeMap.entrySet()) {
out = out.replace(e.getValue(), e.getKey());
}
// convert &#nnn; escaped characters.
return out;
}



}
86 changes: 86 additions & 0 deletions src/main/java/com/nuspectra/translation/PropertiesToHTML.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package com.nuspectra.translation;

import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;


public class PropertiesToHTML {
private static final Log log = LogFactory.getLog(PropertiesToHTML.class);

public static String toHTML(String content) {
String html = "<html><body>";
for (String line : content.split("\n")) {
if (line.startsWith("#")) continue;
if (line.trim().isEmpty()) continue;
String[] parts = line.split("=");
if (parts.length != 2) continue;
String key = parts[0].trim();
String value = parts[1].trim();
String entry = "<pre id='" + key + "'>";
String escaped = escape(value);
entry += escaped;
entry += "</pre>";
html += entry + "\n";
}
html += "</body></html>";
return html;
}

private static String escape(String value) {


if (value.contains("%")) {
// log.info("percent: "+value);
}


return value;
}

public static String toHTML(File propertiesFile) throws IOException {
return toHTML(FileUtils.readFileToString(propertiesFile, StandardCharsets.UTF_8));
}


public static HashMap<String, String> htmlToMap(String html) {
HashMap<String, String> preTagsMap = new HashMap<>();
Document doc = Jsoup.parse(html);

Elements preTags = doc.select("pre");

for (Element preTag : preTags) {
String id = preTag.id();
String text = preTag.text().trim();
preTagsMap.put(id, text);
}

return preTagsMap;
}

public static String htmlToProperties(String html) {
String out = "";
Document doc = Jsoup.parse(html);

Elements preTags = doc.select("pre");

for (Element preTag : preTags) {
String id = preTag.id();
String text = preTag.text().trim();
out += id + "=" + text + "\n";
}

return out;
}


}
Loading

0 comments on commit 1ba043a

Please sign in to comment.