Skip to content

Commit

Permalink
Rebase with origin/main, fix JUnit 5 migration errors
Browse files Browse the repository at this point in the history
  • Loading branch information
rzo1 committed Nov 29, 2023
1 parent 49f51ef commit 342a025
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

import opennlp.tools.sentdetect.segment.LanguageRule;
import opennlp.tools.sentdetect.segment.Rule;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.XmlUtil;
import opennlp.tools.util.model.ArtifactSerializer;
Expand Down Expand Up @@ -521,4 +524,79 @@ public boolean getBool(String name, boolean defValue) throws InvalidFormatExcept
*/
public abstract AdaptiveFeatureGenerator create() throws InvalidFormatException;
}

public static Map<String, LanguageRule> getLanguageRules(InputStream xmlDescriptionIn) throws IOException {
Document xmlDocument = createDOM(xmlDescriptionIn);
Element element = xmlDocument.getDocumentElement();
String tagName = element.getTagName();

Map<String, LanguageRule> mapping = new HashMap<>();
if ("languageRules".equals(tagName)) {
NodeList nodes = element.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++) {
if (nodes.item(i) instanceof Element) {
Element childElem = (Element)nodes.item(i);
if ("languageRule".equals(childElem.getTagName())) {
getRules(mapping, childElem);
}
}
}
}
return mapping;
}

static void getRules(Map<String, LanguageRule> map, Element element) {
String name = element.getAttribute("name");
if (name != null) {
LanguageRule languageRule = new LanguageRule(name);
NodeList nodes = element.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++) {
if (nodes.item(i) instanceof Element) {
Element childElem = (Element)nodes.item(i);
if ("rule".equals(childElem.getTagName())) {
getRule(languageRule, childElem);
}
}
}
map.put(name, languageRule);
}
}

static void getRule(LanguageRule languageRule, Element element) {
String breaking = element.getAttribute("break");
String beforeBreak = "";
String afterBreak = "";
if (breaking != null) {
NodeList nodes = element.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++) {
if (nodes.item(i) instanceof Element) {
Element childElem = (Element)nodes.item(i);
if ("beforeBreak".equals(childElem.getTagName())) {
Node firstChild = childElem.getFirstChild();
Text text = (Text) firstChild;
if (text != null) {
beforeBreak = text.getWholeText();
} else {
beforeBreak = "";
}
}
if ("afterBreak".equals(childElem.getTagName())) {
Node firstChild = childElem.getFirstChild();
Text text = (Text) firstChild;
if (text != null) {
afterBreak = text.getWholeText();
} else {
afterBreak = "";
}
}
}
}
if ("yes".equals(breaking)) {
languageRule.addRule(new Rule(true, beforeBreak, afterBreak));
}
if ("no".equals(breaking)) {
languageRule.addRule(new Rule(false, beforeBreak, afterBreak));
}
}
}
}
Loading

0 comments on commit 342a025

Please sign in to comment.