From 1ba043af77f068c16d1e76e12c573c86e6cb8d90 Mon Sep 17 00:00:00 2001 From: openaudible Date: Sun, 21 Jul 2024 16:57:15 -0700 Subject: [PATCH] 1.3 --- .gitignore | 2 + pom.xml | 13 +- .../nuspectra/translation/FixTranslation.java | 59 ++++--- .../translation/GoogleTranslate.java | 90 ++--------- .../translation/PropertiesToHTML.java | 86 ++++++++++ .../nuspectra/translation/PropertyUtils.java | 108 ++++++++++--- .../translation/TranslateProperties.java | 151 ++++++++++++----- .../translation/TranslateWebsite.java | 152 ++++++++++++++++++ 8 files changed, 501 insertions(+), 160 deletions(-) create mode 100644 src/main/java/com/nuspectra/translation/PropertiesToHTML.java create mode 100644 src/main/java/com/nuspectra/translation/TranslateWebsite.java diff --git a/.gitignore b/.gitignore index fd0bf9a..da67a0b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /.idea/ /target/ translate-properties.iml +/src/main/java/com/nuspectra/translation/OA.java +translate-properties.iml diff --git a/pom.xml b/pom.xml index 46745fd..795413a 100644 --- a/pom.xml +++ b/pom.xml @@ -6,14 +6,14 @@ com.nuspectra.translation translate-properties - 1.1 + 1.3 jar UTF-8 UTF-8 - 1.8 - 1.8 + 11 + 11 3.8.1 @@ -45,7 +45,12 @@ commons-io 2.6 + + org.jsoup + jsoup + 1.15.4 + - \ No newline at end of file + diff --git a/src/main/java/com/nuspectra/translation/FixTranslation.java b/src/main/java/com/nuspectra/translation/FixTranslation.java index 2349952..d786f76 100644 --- a/src/main/java/com/nuspectra/translation/FixTranslation.java +++ b/src/main/java/com/nuspectra/translation/FixTranslation.java @@ -7,29 +7,26 @@ public enum FixTranslation { public int count(String text, String find) { int index = 0, count = 0, length = find.length(); - while( (index = text.indexOf(find, index)) != -1 ) { - index += length; count++; + while ((index = text.indexOf(find, index)) != -1) { + index += length; + count++; } return count; } - int countMacros(String in) - { + int countMacros(String in) { int c = 0; - for (String m:knownMacros) - { + for (String m : knownMacros) { c += count(in, m); } return c; } - boolean checkOutputOK(String translated, String orig) - { + boolean checkOutputOK(String translated, String orig) { int oc = countMacros(orig); int tc = countMacros(translated); - if (oc!=tc) - { - System.out.println("Macro Error!\n"+orig+"\n"+translated); + if (oc != tc) { + System.out.println("Macro Error!\n" + orig + "\n" + translated); return false; } return true; @@ -47,9 +44,24 @@ private String fixPercent(String s, char c) { return s; } - public String fixLine(String s, String inputString) { + public String fixLine(final String s, String inputString) { + String copy = s; + int count = 0; + + for (int x = 0; x < 100; x++) { + String out = _fixLine(copy, inputString); + if (out.equals(copy)) + return out; + count++; + System.out.println("taking another attempt: " + s); + copy = out; + } + return copy; + } + + private String _fixLine(String s, String inputString) { boolean ok = checkOutputOK(s, inputString); - assert(ok); + assert (ok); s = s.replace("\\ N", "\\n"); s = s.replace("\\ n", "\\n"); @@ -67,21 +79,26 @@ public String fixLine(String s, String inputString) { s = s.replace(""", "\""); s = s.replace("'", "'"); - if (s.contains("&")) - { + + s = s.replace("' %d'", "'%d'"); + s = s.replace("' %s'", "'%s'"); + + s = s.replace("( %", "(%"); + + + if (s.contains("&")) { // Unescape failed... ? - System.out.println("& found in output:"+s); + System.out.println("& found in output:" + s); } - s = s.replace(" .", "."); + // s = s.replace(" .", "."); // This actually is correct in some languages. s = s.trim(); boolean ok2 = checkOutputOK(s, inputString); - if (!ok2) - { - System.err.println("Failure with macro counts:"+s+" from "+inputString); - assert(ok2); + if (!ok2) { + System.err.println("Failure with macro counts:" + s + " from " + inputString); + assert (ok2); } return s; } diff --git a/src/main/java/com/nuspectra/translation/GoogleTranslate.java b/src/main/java/com/nuspectra/translation/GoogleTranslate.java index fed2e6a..06a5589 100644 --- a/src/main/java/com/nuspectra/translation/GoogleTranslate.java +++ b/src/main/java/com/nuspectra/translation/GoogleTranslate.java @@ -3,20 +3,18 @@ import com.google.cloud.translate.Translate; import com.google.cloud.translate.TranslateOptions; import com.google.cloud.translate.Translation; -import org.apache.commons.lang3.StringEscapeUtils; - -import java.util.HashMap; -import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; // singleton class to access GoogleTranslate. public enum GoogleTranslate { instance; + private static final Log log = LogFactory.getLog(GoogleTranslate.class); - final HashMap escapeMap = new HashMap<>(); // strings that shall not be translated. (such as %s) // simple english translation without text substitutions - public String translate(String text, String targetLanguge) { - return translate(text, "en", targetLanguge, "text"); + public String translate(String text, String targetLanguage) { + return translate(text, "en", targetLanguage, "text"); } public String translate(String text, String sourceLanguage, String targetLanguage, String format) { @@ -34,85 +32,31 @@ public String translate(String text, String sourceLanguage, String targetLanguag return out; } + public String translateFormattedString(String text, String sourceLanguage, String targetLanguage) throws Exception { + String translated = translate(text, sourceLanguage, targetLanguage, "text"); + return translated; - public String translateFormattedString(String string, String sourceLanguage, String targetLanguage) throws Exception { - String text = escapeString(string); - - try { - - String translated = translate(text, sourceLanguage, targetLanguage, "html"); - boolean ok1 = FixTranslation.instance.checkOutputOK(string, translated); - String unescaped = unescapeString(translated); - unescaped = FixTranslation.instance.fixLine(unescaped, text); - boolean ok2 = FixTranslation.instance.checkOutputOK(string, translated); - - if (!ok1 || !ok2) - throw new Exception("Failed to preserve macros... failing"); - - return unescaped; - } catch (Exception th) { - System.err.println("Error translating to " + targetLanguage + " " + text); - throw th; - } } GoogleTranslate() { - initEscapedSubstitutionMap(); } - // this is not an all-inclusive list. too many to know: %-d, % s, etc. - // Typical string substitutions. you need to make sure the ones used in your properties file are in this list. - // TODO: Add logic to detect when a string contains an unmapped substitution - protected void initEscapedSubstitutionMap() { - String format_specifiers[] = {"%s", "%d", "%f", "%n", "%a", "%%", "%S", "%e", "%E"}; - for (String s : format_specifiers) { - doNotTranslate(s); - } - escapeMap.put("\\n", "
"); - escapeMap.put("\\t", " "); - - // Alternate method, but could fail (?) if non-arabic numerals are translated. - // But might give better translations so google knows a number is part of the translation - if (false) { - escapeMap.put("%f", "12345.98765"); - escapeMap.put("%d", "54321"); - } - - } // Call this to add text that shouldn't be translated. - public void doNotTranslate(String text) - { - escapeMap.put(text, "" + text + "+"); - } - - public String escapeString(String line) { - if (escapeMap.isEmpty()) initEscapedSubstitutionMap(); - - String out = line; - for (Map.Entry e : escapeMap.entrySet()) { - out = out.replace(e.getKey(), e.getValue()); - } - - // now there may be other strings to replace.. - // for instance, '%s' often gets translated to ' %s ' which we can "fix" here if we know all of the quirks of google translate. - // so replace ' %s ', "'%s'" - return out; - } + public String translateHTML(String srcHtml, String sourceLanguage, String targetLanguage) { - public String unescapeString(String line) { - String out = StringEscapeUtils.unescapeHtml4(line); + Translate translate = TranslateOptions.getDefaultInstance().getService(); + Translation translation = + translate.translate( + srcHtml, + Translate.TranslateOption.sourceLanguage(sourceLanguage), + Translate.TranslateOption.targetLanguage(targetLanguage), + Translate.TranslateOption.format("html")); + String out = translation.getTranslatedText(); - for (Map.Entry e : escapeMap.entrySet()) { - out = out.replace(e.getValue(), e.getKey()); - } - // convert &#nnn; escaped characters. return out; } - - - } diff --git a/src/main/java/com/nuspectra/translation/PropertiesToHTML.java b/src/main/java/com/nuspectra/translation/PropertiesToHTML.java new file mode 100644 index 0000000..0eda78b --- /dev/null +++ b/src/main/java/com/nuspectra/translation/PropertiesToHTML.java @@ -0,0 +1,86 @@ +package com.nuspectra.translation; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; + + +public class PropertiesToHTML { + private static final Log log = LogFactory.getLog(PropertiesToHTML.class); + + public static String toHTML(String content) { + String html = ""; + for (String line : content.split("\n")) { + if (line.startsWith("#")) continue; + if (line.trim().isEmpty()) continue; + String[] parts = line.split("="); + if (parts.length != 2) continue; + String key = parts[0].trim(); + String value = parts[1].trim(); + String entry = "
";
+            String escaped = escape(value);
+            entry += escaped;
+            entry += "
"; + html += entry + "\n"; + } + html += ""; + return html; + } + + private static String escape(String value) { + + + if (value.contains("%")) { + // log.info("percent: "+value); + } + + + return value; + } + + public static String toHTML(File propertiesFile) throws IOException { + return toHTML(FileUtils.readFileToString(propertiesFile, StandardCharsets.UTF_8)); + } + + + public static HashMap htmlToMap(String html) { + HashMap preTagsMap = new HashMap<>(); + Document doc = Jsoup.parse(html); + + Elements preTags = doc.select("pre"); + + for (Element preTag : preTags) { + String id = preTag.id(); + String text = preTag.text().trim(); + preTagsMap.put(id, text); + } + + return preTagsMap; + } + + public static String htmlToProperties(String html) { + String out = ""; + Document doc = Jsoup.parse(html); + + Elements preTags = doc.select("pre"); + + for (Element preTag : preTags) { + String id = preTag.id(); + String text = preTag.text().trim(); + out += id + "=" + text + "\n"; + } + + return out; + } + + +} diff --git a/src/main/java/com/nuspectra/translation/PropertyUtils.java b/src/main/java/com/nuspectra/translation/PropertyUtils.java index dfedead..b01862a 100644 --- a/src/main/java/com/nuspectra/translation/PropertyUtils.java +++ b/src/main/java/com/nuspectra/translation/PropertyUtils.java @@ -1,47 +1,107 @@ package com.nuspectra.translation; -import java.io.*; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Properties; +import org.apache.commons.io.FileUtils; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.*; public class PropertyUtils { public static Properties readProperties(File propertyFile) throws IOException { - final Properties basePropery = new Properties(); + final Properties props = new Properties(); InputStream is = new FileInputStream(propertyFile); if (is == null) throw new IOException("Property file could not be read " + propertyFile.getAbsolutePath()); + String contents = FileUtils.readFileToString(propertyFile, StandardCharsets.UTF_8); + int lineNum = 0; - try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(propertyFile), "UTF-8"))) { - String line; - while ((line = bufferedReader.readLine()) != null) { - line = line.trim(); - if (line.isEmpty()) continue; - if (line.startsWith("#")) continue; - if (!line.contains("=")) - throw new IOException("Expected = in line:"+line); - - String[] propertyTokens = line.split("="); - basePropery.put(propertyTokens[0], propertyTokens[1]); - } + for (String line : contents.split("\n")) { + lineNum++; + line = line.trim(); + if (line.isEmpty()) continue; + if (line.startsWith("#")) continue; + if (!line.contains("=")) + throw new IOException("Expected = in line:" + propertyFile.getAbsolutePath() + ":" + lineNum + " " + line); + + String[] propertyTokens = line.split("="); + String key = propertyTokens[0]; + String val = propertyTokens.length > 1 ? propertyTokens[1] : ""; + props.put(key, val); } - return basePropery; + + return props; } public static void writeProperties(final Properties properties, File destinationFile, boolean sort) throws IOException { List keyList = new ArrayList<>(); - for (Object k:properties.keySet()) + for (Object k : properties.keySet()) keyList.add(k.toString()); if (sort) Collections.sort(keyList); - try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(destinationFile))) { - for (String k : keyList) { - bufferedWriter.write(k + "=" + properties.getProperty(k) + "\n"); - } + String out = ""; + for (String k : keyList) { + out += k + "=" + properties.getProperty(k) + "\n"; + } + FileUtils.writeStringToFile(destinationFile, out, StandardCharsets.UTF_8); + String check = FileUtils.readFileToString(destinationFile, StandardCharsets.UTF_8); + if (!out.equals(check)) { + throw new IOException("Expected file to be same: " + out + "\n" + check); } } + public static String compareProperties(File f1, File f2) throws IOException { + String out = ""; + out += "Comparing " + f1.getAbsolutePath() + " and " + f2.getAbsolutePath() + "\n"; + Properties p1 = readProperties(f1); + Properties p2 = readProperties(f2); + Set set = new HashSet<>(); + set.addAll(p1.stringPropertyNames()); + set.addAll(p2.stringPropertyNames()); + + List keys = new ArrayList<>(); + keys.addAll(set); + Collections.sort(keys); + if (p1.size() == p2.size()) + out += "Same size: " + p1.size() + "\n"; + else { + out += "Diff size: " + p1.size() + " " + p2.size() + "\n"; + + for (String k : set) { + if (p1.get(k) == null) + out += "missing from p1: " + k + "\n"; + if (p2.get(k) == null) + out += "missing from p2: " + k + "\n"; + } + } + int differences = 0; + + for (String k : keys) { + String s1 = p1.getProperty(k); + String s2 = p2.getProperty(k); + if (s1 == null) { + s1 = ""; + out += "MISSING1:"; + } + if (s2 == null) { + s2 = ""; + out += "MISSING2:"; + } + + if (!s1.equals(s2)) { + out += "diff: " + k + " " + s1 + "!=" + s2 + "\n"; + differences++; + } + } + + out += "differences=" + differences; + + return out; + } } + + diff --git a/src/main/java/com/nuspectra/translation/TranslateProperties.java b/src/main/java/com/nuspectra/translation/TranslateProperties.java index 48a8ae6..a8dcef7 100644 --- a/src/main/java/com/nuspectra/translation/TranslateProperties.java +++ b/src/main/java/com/nuspectra/translation/TranslateProperties.java @@ -1,8 +1,5 @@ package com.nuspectra.translation; -import com.google.cloud.translate.Translate; -import com.google.cloud.translate.TranslateOptions; - import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -13,8 +10,7 @@ public class TranslateProperties { final Properties baseProperties; // input key=value properties - final Translate translate = TranslateOptions.getDefaultInstance().getService(); - String defaultLanguages[] = {"ar", "fr", "es", "ru", "hi", "de"}; // jp is failing + String[] defaultLanguages = {"ar", "fr", "es", "ru", "hi", "de"}; // jp is failing final File dir; // directory of propertyFiles to work on. final File inputFile; // basePropertyFile @@ -22,6 +18,7 @@ public class TranslateProperties { boolean verbose = true; int count; HashSet languages = new HashSet<>(); + boolean requireUpperCaseKeys = true; // use for tests to check if key exists. public static boolean hasAPIKey() { @@ -33,6 +30,7 @@ public static boolean hasAPIKey() { return hasKey; } + // TranslateProperties.main([file], [languages]) // File of base properties file to translate (defaults to test directory.) // Languages is a comma separated list of language identifiers. (defaults to defaultLanguages) @@ -54,6 +52,9 @@ public static void main(String[] args) { } } + public static Set updateProperties(File mainProperty, String languages) throws Exception { + return updateProperties(mainProperty, languages, true, true, true); + } public static Set updateProperties(File mainProperty, String languages, boolean fix, boolean update, boolean saveChanges) throws Exception { @@ -106,17 +107,29 @@ private Set run(boolean fix, boolean update, boolean saveChanges) throws // For all languages desired.. translate the keys. for (String l : getLanguages()) - if (!l.equals(getSourceLanguage())) - translateStrings(l, keySet, saveChanges); + translateStrings(l, keySet, saveChanges); } // save a copy of the properties so we know which have been modified next time run. if (saveChanges) writeProperties(baseProperties, getChangedFile()); - System.out.println("Updated. Translated " + count); + System.out.println("write=" + saveChanges + " fix=" + fix + " translated " + count + "\n" + inspect()); return keySet; } + public String inspect() throws IOException { + + String out = getSourceLanguage() + "\t" + baseProperties.size() + "\t" + wordCount(baseProperties) + "\n"; + + for (String l : getLanguages()) { + Properties p = getProperties(l); + out += l + "\t" + p.size() + "\t" + wordCount(p) + "\n"; + } + + return out; + + } + // string of comma separated languages... eg. en, fr, es public void setLanguages(String languages) { this.languages.clear(); @@ -196,33 +209,72 @@ private Set translateOrUpdateAll(boolean updateOnly, boolean cleanOnly) return keySet; }*/ + public Properties getProperties(String lang) throws IOException { + if (lang.isEmpty()) + return baseProperties; + assert (languages.contains(lang)); + File file = getFileDest(lang); + if (file.exists()) { + return PropertyUtils.readProperties(file); + } + return new Properties(); + } + + public Collection fixTranslations(boolean saveChanges) throws IOException { Collection changes = new HashSet<>(); - for (String l : getLanguages()) - if (!l.equals(getSourceLanguage())) { - File file = getFileDest(l); - if (file.exists()) { - Properties props = PropertyUtils.readProperties(file); - - - int changed = fixTranslations(props); - if (changed > 0) { - changes.add(l); - System.out.println("Changed " + file.getName() + " = " + changed); - if (saveChanges) { - // write the properties. - writeProperties(props, file); - } + for (String l : getLanguages()) { + File file = getFileDest(l); + if (file.exists()) { + Properties props = PropertyUtils.readProperties(file); + + + int changed = fixTranslations(file.getName(), props); + if (changed > 0) { + changes.add(l); + System.out.println("Changed " + file.getAbsolutePath() + " = " + changed + ", saveChanges=" + saveChanges); + if (saveChanges) { + // write the properties. + writeProperties(props, file); } } } + } return changes; } + public Collection checkInputs() throws Exception { + Collection changes = new HashSet<>(); + + + for (String l : getLanguages()) { + File file = getFileDest(l); + if (file.exists()) { + checkInput(file); + } + } + + + return changes; + } + + private void checkInput(File file) throws Exception { + if (file.exists()) { + Properties props = PropertyUtils.readProperties(file); + for (Object k : props.keySet()) { + String key = k.toString(); + if (!key.equals(key.toUpperCase())) + throw new Exception("Expected uppercase key:" + key + " in " + file); + + } + } + } + + private int wordCount(Properties properties) { int wc = 0; for (Object k : properties.keySet()) { @@ -230,7 +282,6 @@ private int wordCount(Properties properties) { if (!v.isEmpty()) { if (v.contains(" ")) { v = v.replace(" ", " "); - assert (false); } wc++; for (char c : v.toCharArray()) { @@ -243,31 +294,46 @@ private int wordCount(Properties properties) { } - private int fixTranslations(Properties properties) { + private int fixTranslations(String which, Properties properties) { int count = 0; HashSet toDelete = new HashSet<>(); for (Object k : properties.keySet()) { - String v = properties.getProperty(k.toString()); - String o = baseProperties.getProperty(k.toString()); + String key = k.toString(); + boolean badCase = false; + if (!key.equals(key.toUpperCase())) { + badCase = true; + if (requireUpperCaseKeys) + key = key.toUpperCase(); + } + + + String v = properties.getProperty(key); + String o = baseProperties.getProperty(key); if (o == null) { - System.out.println("Key no longer in base properties:" + k + " but exists in translated file. Will be deleted:\n" + v); + System.out.println("Key no longer in base properties:" + k + " but exists in translated file. Will be deleted:\n" + k + "->" + v); toDelete.add(k.toString()); count++; continue; } String c = FixTranslation.instance.fixLine(v, baseProperties.getProperty(k.toString())); - if (!v.equals(c)) { + if (!v.equals(c) || badCase) { count++; if (verbose) - System.out.println(v + "\n" + c); + System.out.println(which + " " + k + " fixed from/to\n" + v + "\n" + c); properties.put(k.toString(), c); } + + if (c.contains(" !")) { + System.out.println("Found non-breaking ( !) space in " + k+" for "+which); + toDelete.add(k.toString()); + } + } - for (String k:toDelete) + for (String k : toDelete) properties.remove(k); return count; @@ -291,10 +357,12 @@ public File getFileDest(String language_code) { return targetFile; } + // Do translations for all selected languages and save as a properties file. // key set is either a list of keys that have changed translations (and need to be re-translated) // or it is all keys. - public Properties translateStrings(String targetLanguage, Set mustUpdateKeySet, boolean saveChanges) throws Exception { + public Properties translateStrings(String targetLanguage, Set mustUpdateKeySet, boolean saveChanges) throws + Exception { File targetFile = getFileDest(targetLanguage); Properties p = targetFile.exists() ? PropertyUtils.readProperties(targetFile) : new Properties(); assert (!targetLanguage.equals(getSourceLanguage())); @@ -303,14 +371,20 @@ public Properties translateStrings(String targetLanguage, Set mustUpdate String key = e.getKey().toString(); String value = e.getValue().toString(); String foreignValue = p.getProperty(key); - boolean needTranslation = mustUpdateKeySet.contains(key) || foreignValue == null || foreignValue.contains("&#"); + boolean needTranslation = mustUpdateKeySet.contains(key) || foreignValue == null || foreignValue.contains("&#") || foreignValue.isEmpty(); + if (key.equals("COMMAND_COPY") && targetLanguage.equals("ja")) + System.currentTimeMillis(); if (needTranslation) { - String translatedString = GoogleTranslate.instance.translateFormattedString(value, getSourceLanguage(), targetLanguage); - p.put(key, translatedString); - count++; - if (verbose) { - System.out.println(count + ". " + key + ":" + value + " -> " + translatedString); + try { + String translatedString = GoogleTranslate.instance.translateFormattedString(value, getSourceLanguage(), targetLanguage); + p.put(key, translatedString); + count++; + if (verbose) { + System.out.println(count + ". " + key + ":" + value + " -> " + translatedString); + } + } catch (Throwable th) { + System.err.println("Unable to translate:" + value); } } } @@ -320,5 +394,6 @@ public Properties translateStrings(String targetLanguage, Set mustUpdate return p; } + } diff --git a/src/main/java/com/nuspectra/translation/TranslateWebsite.java b/src/main/java/com/nuspectra/translation/TranslateWebsite.java new file mode 100644 index 0000000..edc8bee --- /dev/null +++ b/src/main/java/com/nuspectra/translation/TranslateWebsite.java @@ -0,0 +1,152 @@ +package com.nuspectra.translation; + +import com.google.cloud.translate.Translate; +import com.google.cloud.translate.TranslateOptions; +import org.apache.commons.io.FileUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.*; + + +public class TranslateWebsite { + + final Translate translate = TranslateOptions.getDefaultInstance().getService(); + String[] defaultLanguages = {"ar", "fr", "es", "ru", "hi", "de", "it"}; // jp is failing + + final List files; + final File destDir; // where to save the translated files. Each will be in a subdirectory named for the language. + + private static final Log log = LogFactory.getLog(TranslateWebsite.class); + + boolean verbose = true; + int count; + HashSet languages = new HashSet<>(); + + // use for tests to check if key exists. + + + public static Set updateHTML(ArrayList files, File destFile, String langs, boolean fix, boolean update, boolean write) throws Exception { + TranslateWebsite t = new TranslateWebsite(files, destFile, langs); + return t.run(); + } + + + Set getChangedFiles() throws IOException { + HashSet changed = new HashSet<>(); // Keys to update. + for (File f : files) { + File cache = new File(f.getParentFile(), "en"); + String html = FileUtils.readFileToString(f, StandardCharsets.UTF_8); + String prev = cache.exists() ? FileUtils.readFileToString(cache, StandardCharsets.UTF_8) : null; + if (html.equals(prev)) + continue; + changed.add(f); + } + return changed; + } + + private Set run() throws Exception { + + + Set changedFiles = getChangedFiles(); + + + for (File f : files) { + + File cache = getDestFile(f, "en"); + + + // For all languages desired.. translate the keys. + for (String l : getLanguages()) { + File dest = getDestFile(f, l); + if (!dest.exists() || changedFiles.contains(f)) { + boolean changed = translateHtml(l, f, dest); + + if (changed) + changedFiles.add(dest); + + } else { + log.info("Skip unchanged " + dest.getAbsolutePath()); + } + } + + FileUtils.writeStringToFile(cache, FileUtils.readFileToString(f, StandardCharsets.UTF_8), StandardCharsets.UTF_8); + + } + + return changedFiles; + } + + + // string of comma separated languages... eg. en, fr, es + public void setLanguages(String languages) { + this.languages.clear(); + for (String l : languages.trim().split(",")) + this.languages.add(l.trim()); + } + + // override or change if source language is not english. + public String getSourceLanguage() { + return "en"; + } + + // return list of ISO-639-1 language identifiers, from: + // https://cloud.google.com/translate/docs/languages + // This should NOT return the source language (usually english!) + public Collection getLanguages() { + if (languages.isEmpty()) + languages.addAll(Arrays.asList(defaultLanguages.clone())); + + assert (!languages.contains(getSourceLanguage())); + + return languages; + } + + + // this saves the {baseName}_en.properties file. It could be saved elsewhere + protected File getDestFile(File f, String lang) { + File dir = new File(destDir, lang); + dir.mkdirs(); + File out = new File(dir, f.getName()); + return out; + } + + public TranslateWebsite(List files, File destDir, String langs) { + this.files = files; + this.destDir = destDir; + this.languages.addAll(Arrays.asList(langs.split(","))); + } + + + // Do translations for all selected languages and save as a properties file. + // key set is either a list of keys that have changed translations (and need to be re-translated) + // or it is all keys. + public boolean translateHtml(String targetLanguage, File sourceFile, File destFile) throws + Exception { + + String srcHtml = FileUtils.readFileToString(sourceFile, "UTF-8"); + String dstHtml = srcHtml; + // File hashFile = hashFile(destFile); // possible hash of srcHTML + + if (targetLanguage.equals(getSourceLanguage())) { + log.info("Skipping source language " + targetLanguage); + return false; + } + + + dstHtml = GoogleTranslate.instance.translateHTML(srcHtml, getSourceLanguage(), targetLanguage); + + + FileUtils.writeStringToFile(destFile, dstHtml, StandardCharsets.UTF_8); + // writeHash(hashFile, srcHtml); + log.info("Saved " + destFile.getAbsolutePath()); + + + return true; + } + +} +