Skip to content

Commit

Permalink
Fix #5 by sorting indexes in HyphenIndexFinder (#6)
Browse files Browse the repository at this point in the history
* Fix #5 by sorting indexes in `HyphenIndexFinder`.

* Refactor common loading code
  • Loading branch information
jespersm authored Sep 12, 2024
1 parent ad81d3d commit 0cd8dca
Show file tree
Hide file tree
Showing 5 changed files with 1,207 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@

import io.github.nianna.api.HyphenatorProperties;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -146,7 +142,8 @@ private Stream<Integer> getIndexesWithOddPriorities(int tokenLength, Map<Integer
.filter(entry -> Utils.isOdd(entry.getValue()))
.map(Map.Entry::getKey)
.filter(index -> index <= tokenLength - hyphenatorProperties.minTrailingLength())
.filter(index -> index >= hyphenatorProperties.minLeadingLength());
.filter(index -> index >= hyphenatorProperties.minLeadingLength())
.sorted();
}

private List<String> append(List<String> collector, String newValue) {
Expand Down
10 changes: 9 additions & 1 deletion src/test/java/io/github/nianna/TestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,16 @@
public class TestUtil {

public static List<String> loadPlPatterns() {
return loadPatterns("pl_PL");
}

public static List<String> loadDaPatterns() {
return loadPatterns("da_DK");
}

private static List<String> loadPatterns(String tag) {
try {
Path patternsPath = Path.of(TestUtil.class.getResource("/hyph_pl_PL.dic").toURI());
Path patternsPath = Path.of(TestUtil.class.getResource("/hyph_" + tag + ".dic").toURI());
return Files.readAllLines(patternsPath);
} catch (IOException | URISyntaxException e) {
e.printStackTrace();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ void shouldHyphenateEvenWithLeadingAndTrailingNonAlphabeticCharacters() {
assertEquals(List.of(5,7,9), result);
}

@Test
void shouldHyphenateReallyLongWordsInOrder() {
HyphenIndexFinder finder = new HyphenIndexFinder(TestUtil.loadDaPatterns(), hyphenatorProperties);
List<Integer> result = finder.findIndexes("Trafiksikkerhedskampagneplakat");
assertEquals(List.of(3, 6, 9, 12, 16, 22, 24, 27), result);
// Tra-fik-sik-ker-heds-kampag-ne-pla-kat
}

@Test
void shouldReturnEmptyListIfWordIsNotAlphabetic() {
HyphenIndexFinder finder = new HyphenIndexFinder(List.of(), hyphenatorProperties);
Expand Down
43 changes: 43 additions & 0 deletions src/test/resources/README_da.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
title: Hyphenation patterns for Danish
% copyright: Copyright (C) 1994 Frank Jensen
% notice: This file is part of the hyph-utf8 package.
% See http://www.hyphenation.org/tex for more information.
% language:
% name: Danish
% tag: da
% version: 2011-01-11
% authors:
% -
% name: Frank Jensen
% contact: frank.jensen (at) hugin.com
% licence:
% - This file is available under any of these licences:
% -
% name: LPPL
% version: 1.3
% or_later: true
% url: http://www.latex-project.org/lppl/lppl-1-3.html
% -
% name: MIT
% url: https://opensource.org/licenses/MIT
% text: >
% Permission is hereby granted, free of charge, to any person
% obtaining a copy of this software and associated documentation
% files (the "Software"), to deal in the Software without
% restriction, including without limitation the rights to use,
% copy, modify, merge, publish, distribute, sublicense, and/or sell
% copies of the Software, and to permit persons to whom the
% Software is furnished to do so, subject to the following
% conditions:
%
% The above copyright notice and this permission notice shall be
% included in all copies or substantial portions of the Software.
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
% OTHER DEALINGS IN THE SOFTWARE.
Loading

0 comments on commit 0cd8dca

Please sign in to comment.