From b3e43b699bbf113569ff8703519aa27008f7213b Mon Sep 17 00:00:00 2001 From: Weronika Pecio Date: Thu, 13 Jul 2023 14:20:06 +0200 Subject: [PATCH] Rename hyphenator properties --- README.md | 27 +++++++++++------- .../nianna/api/HyphenatorProperties.java | 28 +++++++++---------- .../nianna/internal/HyphenIndexFinder.java | 4 +-- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 53e4b95..da01147 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,18 @@ Hyphenation preserves the case of the original text and hyphenates words with no ## Requirements -To use this tool, you need to have a hyphenation pattern dictionary for the desired language which can be downloaded e.g. from [LibreOffice repositories](https://github.com/LibreOffice/dictionaries). -Find the file with "hyph" prefix e.g. hyph_pl_PL.dic for Polish language. +To use this tool, you need a hyphenation pattern dictionary for the desired language. +It can be downloaded from [TeX hyphenation repository](https://github.com/hyphenation/tex-hyphen/tree/master/hyph-utf8/tex/generic/hyph-utf8/patterns/txt). +Choose the *.pat.txt file. -## Hyph dic file format -Hyph dictionaries contain metadata followed by a list of patterns. Make sure to pass only the patterns themselves when creating _Hyphenator_ instance. +Alternatively, you can download the dictionary e.g. from [LibreOffice repositories](https://github.com/LibreOffice/dictionaries). +In this case choose file with "hyph" prefix e.g. hyph_pl_PL.dic for Polish language. +Make sure to remove the tags at the beginning of the file and only pass the patterns themselves to the _Hyphenator_. ``` UTF-8 <---- encoding info, use it to load file -LEFTHYPHENMIN 2 <------ minimium prefix length, the value can be passed to the Hyphenator in HyphenatorProperties -RIGHTHYPHENMIN 2 <------ minimium suffix length, the value can be passed to the Hyphenator in HyphenatorProperties -.ć8 <--- example pattern +LEFTHYPHENMIN 2 <------ this value can be passed to the Hyphenator as minLeadingLength +RIGHTHYPHENMIN 2 <------ this value can be passed to the Hyphenator as minTralingLength +.ć8 <--- pattern .4ć3ć8 .ćł8 .2ć1ń8 @@ -28,7 +30,9 @@ RIGHTHYPHENMIN 2 <------ minimium suffix length, the value can be passed to the ## Example usage ### Using defaults -By default input text is automatically split into tokens. After hyphenation each word's prefix and suffix must be at least 2 characters long. Space is used as word separator and hyphen as syllables separator. +Input text is automatically split into tokens. +By default the first and last chunk after hyphenation must be at least 2 characters long. +Space is used as word separator and hyphen as syllables separator. ``` List patterns = ... // load the patterns from hyph dictionary file Hyphenator hyphenator = new Hyphenator(patterns); @@ -47,8 +51,11 @@ System.out.println(hyphenatedText); // prints "Test-ing" System.out.println(result.hyphenIndexes()); // prints [4] ``` -### Customizing minimum prefix and suffix lengths -To customize minimum prefix and suffix length pass custom _HyphenatorProperties_ when creating _Hyphenator_ instance. +### Customizing hyphenation +To skip some hyphens you can specify the following properties while creating _Hyphenator_ instance. + * minLeadingLength (default: 2) - hyphen can be placed only after first _minLeadingLength_ characters + * minTrailingLength (default: 2) - hyphen can be placed only before last _minTrailingLength_ characters + ``` List patterns = ... // load the patterns from hyph dictionary file HyphenatorProperties properties = new HyphenatorProperties(3, 4); diff --git a/src/main/java/com/github/nianna/api/HyphenatorProperties.java b/src/main/java/com/github/nianna/api/HyphenatorProperties.java index 43aa021..2465e72 100644 --- a/src/main/java/com/github/nianna/api/HyphenatorProperties.java +++ b/src/main/java/com/github/nianna/api/HyphenatorProperties.java @@ -4,31 +4,31 @@ public class HyphenatorProperties { - public static int DEFAULT_MIN_PREFIX_LENGTH = 2; + public static int DEFAULT_MIN_LEADING_LENGTH = 2; - public static int DEFAULT_MIN_SUFFIX_LENGTH = 2; + public static int DEFAULT_MIN_TRAILING_LENGTH = 2; - private final int minPrefixLength; + private final int minLeadingLength; - private final int minSuffixLength; + private final int minTrailingLength; - public HyphenatorProperties(int minPrefixLength, int minSuffixLength) { - checkArgument(minPrefixLength > 0, "Prefix must be at least 1 character long"); - checkArgument(minSuffixLength > 0, "Suffix must be at least 1 character long"); - this.minPrefixLength = minPrefixLength; - this.minSuffixLength = minSuffixLength; + public HyphenatorProperties(int minLeadingLength, int minTrailingLength) { + checkArgument(minLeadingLength > 0, "Min leading length must be at least 1"); + checkArgument(minTrailingLength > 0, "Min trailing length must be at least 1"); + this.minLeadingLength = minLeadingLength; + this.minTrailingLength = minTrailingLength; } public HyphenatorProperties() { - this(DEFAULT_MIN_PREFIX_LENGTH, DEFAULT_MIN_SUFFIX_LENGTH); + this(DEFAULT_MIN_LEADING_LENGTH, DEFAULT_MIN_TRAILING_LENGTH); } - public int getMinPrefixLength() { - return minPrefixLength; + public int getMinLeadingLength() { + return minLeadingLength; } - public int getMinSuffixLength() { - return minSuffixLength; + public int getMinTrailingLength() { + return minTrailingLength; } } diff --git a/src/main/java/com/github/nianna/internal/HyphenIndexFinder.java b/src/main/java/com/github/nianna/internal/HyphenIndexFinder.java index 4a7c2fc..e12c897 100644 --- a/src/main/java/com/github/nianna/internal/HyphenIndexFinder.java +++ b/src/main/java/com/github/nianna/internal/HyphenIndexFinder.java @@ -93,8 +93,8 @@ private Stream getIndexesWithOddPriorities(String token, Map isOdd(entry.getValue())) .map(Map.Entry::getKey) - .filter(index -> index <= token.length() - hyphenatorProperties.getMinSuffixLength()) - .filter(index -> index >= hyphenatorProperties.getMinPrefixLength()); + .filter(index -> index <= token.length() - hyphenatorProperties.getMinTrailingLength()) + .filter(index -> index >= hyphenatorProperties.getMinLeadingLength()); } private List append(List collector, String newValue) {