-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #48 from MighTguY/feature/checkStyleFix
updating travis and adding german qwertzkeyboard support
- Loading branch information
Showing
5 changed files
with
100,275 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,23 @@ | ||
|
||
language: java | ||
jdk: | ||
- openjdk8 | ||
branches: | ||
only: | ||
- develop | ||
- master | ||
|
||
addons: | ||
sonarcloud: | ||
organization: "sonarcloud" | ||
organization: sonarcloud | ||
token: | ||
secure: "ASXWKuOdMi0AqVIS4iW3xvik39qs2S4CTb6H4cuCSvL09kqwxqGRWrk7ZvK2F4cpqUL+aaLO6pIdIZvbM3CXwSdqbNoADv2Qyui0VKTKBZ9rvG2c/ouNnrPKkvyydVA4VcfKiJnnU9h6kH072XQfQg5oWdjFFPX9iyxT9Mpo+FPfjmoEEBt9j6/5BSZvusBOwkcRKwRBLGV2emR2dKSt/zv/fFThxbXXo7pikVfISJmp9leYqOFuYj32WVmiwJycj0n2m1+Ks44Unq4nolEqWDngDKottOCN2PNMf6Av3jMULbUfRkIMT3n1/J14lsRWbp694E4p0wEXZmCM9f6GkZZoOCNtyeyGu9kK9hRZK9UqjckJAA4aggfiUK2BZK2iWMMK67Wgrvtjf+IqBjFy6Lfgra3d6C29OOxU0nu9nSYo50prgeGkl9KfkCqqr0UioNFNJLwqFcQS2cPNkCvc7oJrOkhxmkxAfDsYexsQhS7VQnsd+PHuxU1o2mtwIe0nadVio2T3ooBiulDzL6XTcmFYWrsCCNkNfbaq5JNTJcTanA7OonSUO6dFK+8eMunNpAc2H4WYlHW5T/BgfvtBAaihdiR6YEKEhCnEe25AmZBryMTc5gTPxPj9up+WevyD8Kj4y9xXyD0py4CyFkUxakVMd0U9jncdKWRwUQ1F3mU=" | ||
|
||
secure: ASXWKuOdMi0AqVIS4iW3xvik39qs2S4CTb6H4cuCSvL09kqwxqGRWrk7ZvK2F4cpqUL+aaLO6pIdIZvbM3CXwSdqbNoADv2Qyui0VKTKBZ9rvG2c/ouNnrPKkvyydVA4VcfKiJnnU9h6kH072XQfQg5oWdjFFPX9iyxT9Mpo+FPfjmoEEBt9j6/5BSZvusBOwkcRKwRBLGV2emR2dKSt/zv/fFThxbXXo7pikVfISJmp9leYqOFuYj32WVmiwJycj0n2m1+Ks44Unq4nolEqWDngDKottOCN2PNMf6Av3jMULbUfRkIMT3n1/J14lsRWbp694E4p0wEXZmCM9f6GkZZoOCNtyeyGu9kK9hRZK9UqjckJAA4aggfiUK2BZK2iWMMK67Wgrvtjf+IqBjFy6Lfgra3d6C29OOxU0nu9nSYo50prgeGkl9KfkCqqr0UioNFNJLwqFcQS2cPNkCvc7oJrOkhxmkxAfDsYexsQhS7VQnsd+PHuxU1o2mtwIe0nadVio2T3ooBiulDzL6XTcmFYWrsCCNkNfbaq5JNTJcTanA7OonSUO6dFK+8eMunNpAc2H4WYlHW5T/BgfvtBAaihdiR6YEKEhCnEe25AmZBryMTc5gTPxPj9up+WevyD8Kj4y9xXyD0py4CyFkUxakVMd0U9jncdKWRwUQ1F3mU= | ||
cache: | ||
directories: | ||
- ~/.m2/repository | ||
- ~/.sonar/cache | ||
|
||
- "~/.m2/repository" | ||
- "~/.sonar/cache" | ||
jobs: | ||
include: | ||
- stage: build_master | ||
if: branch = master AND type = push | ||
|
||
before_install: | ||
- echo "Build Started for Master" | ||
- export GPG_TTY=$(tty) | ||
|
@@ -33,31 +28,29 @@ jobs: | |
- sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo | ||
tee /etc/hosts | ||
- cat /etc/hosts | ||
- mvn help:evaluate -N -Dexpression=project.version|grep -v '\[' | cut -d'-' -f 1 |cut -d'.' -f1-2 | ||
- export project_version=$(mvn help:evaluate -N -Dexpression=project.version|grep -v '\[' | cut -d'-' -f 1 | cut -d'.' -f1-2) | ||
- mvn help:evaluate -N -Dexpression=project.version|grep -v '\[' | cut -d'-' | ||
-f 1 |cut -d'.' -f1-2 | ||
- export project_version=$(mvn help:evaluate -N -Dexpression=project.version|grep | ||
-v '\[' | cut -d'-' -f 1 | cut -d'.' -f1-2) | ||
- git config --local user.name "travis-ci" | ||
- git config --local user.email "[email protected]" | ||
- export TRAVIS_JOB_NUMBER1=${TRAVIS_JOB_NUMBER:-$(date +'%Y%m%d%H%M%S')-$(git log --format=%h -1)} | ||
- export TRAVIS_JOB_NUMBER1=${TRAVIS_JOB_NUMBER:-$(date +'%Y%m%d%H%M%S')-$(git | ||
log --format=%h -1)} | ||
- TRAVIS_JOB_NUMBER1=$(echo $TRAVIS_JOB_NUMBER1 | cut -d'.' -f1) | ||
- export NEW_VERSION=$project_version.$TRAVIS_JOB_NUMBER1 | ||
- mvn versions:set -DnewVersion=$NEW_VERSION | ||
|
||
install: | ||
- mvn --settings .maven.xml install -Dgpg.skip -B -V | ||
|
||
script: | ||
- mvn clean deploy --settings .maven.xml -DskipTests=true -B -U -Prelease | ||
|
||
after_success: | ||
- mvn coveralls:report | ||
|
||
before_deploy: | ||
- git tag $NEW_VERSION | ||
|
||
deploy: | ||
provider: releases | ||
api_key: | ||
secure: vaz/OrLwayipY8axrE0lVexyV0MIowN4wggkAwZPn3t77g3uGB7hD3drPklkptLdIeXRTKpFu8Cduz4n/v5a5JnhhsR3cDmQatGa4/3NzpJNjUEs3NEcFww7KEFMqsuiTh+3cazWT02b9wmwyaJ6tGW2VXI+r8S6qeQVbguEHH4SKoptU6Nf/ONUMIkCGgCnWji9WlnvgMsxUXA2Q9c31MUq5Pi/BVLEWAm+SVMU1AJRDQIXd+qJduo0vEVH7vaHFgS3jO78PyISzZdKaKw9aQGutFuiEp3TzkD9KcmS6J13Akk23ZwCE7U/U3wLnvDBDn26QUYmJPMRZcH+MmXeCL4w9D5s/Rt7ifFcosh0Xg08JSsmgsB0F9Tjpn4oeWip9u2c7bf3fIm4rHf1g0NjPsUJD9OX2aQ9MyR+Woyv8f0o/FSRQd8iAVwX/6BmaSG86R9TzQIz+6K0z+JvAfRXmYotU/XBtr9k0OiDJN+zxEAuSBp78GKtFxyzpulxs6cRf4bOnzKx7pOMHFeLFMbdYkOoebQd4g2u3BT+CLyCjcfxNbyLwYazlt+swd9MawddojA2JTK9NRjosYrZ/OxnggOExH9P+GESM3ivUFhT+g2AavkliWU8zeF8Sx5+emPM+Jxawi8v9rRIeicY7KTHLRaBHpq0C6J1zmQp1bkPAHg= | ||
secure: CTIvEHKyxk8PW3O6AbJCANQwqb5sE9ZLmPg0Fq9TvGI++WgA2uOLWY3GY7ZcUeXWj3/IdUu34NIqy7Q0tn1zGIY6/6Ih/+79NBUWjTl25Bqv4ZGYSVis9JCjkfP0ua4raIfFojctB/SqxUiuiVUlyJZfUcs4m7O2EWP+7iifF3AzAgxs9TeuyxGyAx4f1esqQjsvd32v24wB+CiXq9V5KG8ca8PzijhYwfrfVo45DLIyMQk6DC9RzYKYuIOQ7Jmd1Mh+Uk3wHlrf8h6DxLBymCmwSCuDlJdhXovExwUdNQaRzkNISInoZDCfth00qIHaMnowLclaZTNKEEqIM030CA0NPHjW4FNmcYtOH5U93bG7bD2UPBu6aAVDtxEeCCjmeM3jQnh8AH16I29kRVJmNP55hdp0MeuU9T9o3csM4Q1rwe/+GDkOFFnBK962T4nR1pFRmMs7jfhE/VP+FlcJsGN8mAINol/GYFxUXN4ggIAVUxEGHSLbh3qZCtKLsNo0IY6UYflstbpID7ZUUycy0s86kpto1KQ1fO/sat9FT7F7wMtcxsjHM1l7zqgXZjrKj/1mMoNAjZ9rmd1aVFblz9Njyxke/OQDwRciPbjsw/g7UOE6yLe3MjJdHj7tbdBXEC8TkGSfBLcaDJzaLaut+jhE9ADLbnqplWrbj+xDhzw= | ||
file: | ||
- symspell-lib/target/symspell-lib-$NEW_VERSION.jar | ||
on: | ||
|
@@ -84,7 +77,6 @@ jobs: | |
- mvn --settings .maven.xml install -Dgpg.skip -B -V | ||
script: | ||
- mvn cobertura:cobertura | ||
|
||
after_success: | ||
- bash <(curl -s https://codecov.io/bash) | ||
- stage: build_pr | ||
|
@@ -101,6 +93,3 @@ jobs: | |
- cat /etc/hosts | ||
install: | ||
- mvn --settings .maven.xml install -Dgpg.skip -B -V | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
142 changes: 142 additions & 0 deletions
142
symspell-lib/src/main/java/io/github/mightguy/spellcheck/symspell/common/QwertzDistance.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package io.github.mightguy.spellcheck.symspell.common; | ||
|
||
import io.github.mightguy.spellcheck.symspell.api.CharDistance; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
public class QwertzDistance implements CharDistance { | ||
|
||
double directConnect = 0.1f; | ||
double diagonalConnect = 0.4f; | ||
double defaultValue = 1f; | ||
|
||
Map<String, Double> operationCost = new HashMap<>(); | ||
|
||
public QwertzDistance() { | ||
this.initializeCostMatrix(); | ||
} | ||
|
||
@Override | ||
public double distance(char a, char b) { | ||
if (a == b) { | ||
return 0; | ||
} | ||
return operationCost | ||
.getOrDefault(a + "-" + b, operationCost.getOrDefault(b + "-" + a, defaultValue)); | ||
} | ||
|
||
/** | ||
* Initializing the cost matrix | ||
*/ | ||
public void initializeCostMatrix() { | ||
|
||
//Middle row | ||
addReplaceWeight('a', "s", directConnect); | ||
addReplaceWeight('a', "qwy", diagonalConnect); | ||
|
||
addReplaceWeight('s', "ad", directConnect); | ||
addReplaceWeight('s', "wexy", diagonalConnect); | ||
|
||
addReplaceWeight('d', "sf", directConnect); | ||
addReplaceWeight('d', "erxc", diagonalConnect); | ||
|
||
addReplaceWeight('f', "sf", directConnect); | ||
addReplaceWeight('f', "erxc", diagonalConnect); | ||
|
||
addReplaceWeight('g', "fh", directConnect); | ||
addReplaceWeight('g', "tzvb", diagonalConnect); | ||
|
||
addReplaceWeight('h', "gj", directConnect); | ||
addReplaceWeight('h', "zubn", diagonalConnect); | ||
|
||
addReplaceWeight('j', "hk", directConnect); | ||
addReplaceWeight('j', "uinm", diagonalConnect); | ||
|
||
addReplaceWeight('k', "jl", directConnect); | ||
addReplaceWeight('k', "iom", diagonalConnect); | ||
|
||
addReplaceWeight('l', "kö", directConnect); | ||
addReplaceWeight('l', "op", diagonalConnect); | ||
|
||
addReplaceWeight('ö', "lä", directConnect); | ||
addReplaceWeight('ö', "pü", diagonalConnect); | ||
|
||
addReplaceWeight('ä', "ö", directConnect); | ||
addReplaceWeight('ä', "ü", diagonalConnect); | ||
|
||
//Top Row | ||
|
||
addReplaceWeight('q', "w", directConnect); | ||
addReplaceWeight('q', "a", diagonalConnect); | ||
|
||
addReplaceWeight('w', "qe", directConnect); | ||
addReplaceWeight('w', "as", diagonalConnect); | ||
|
||
addReplaceWeight('e', "wr", directConnect); | ||
addReplaceWeight('e', "sd", diagonalConnect); | ||
|
||
addReplaceWeight('r', "et", directConnect); | ||
addReplaceWeight('r', "df", diagonalConnect); | ||
|
||
addReplaceWeight('t', "rz", directConnect); | ||
addReplaceWeight('t', "fg", diagonalConnect); | ||
|
||
addReplaceWeight('z', "tu", directConnect); | ||
addReplaceWeight('z', "gh", diagonalConnect); | ||
|
||
addReplaceWeight('u', "zi", directConnect); | ||
addReplaceWeight('u', "hj", diagonalConnect); | ||
|
||
addReplaceWeight('i', "uo", directConnect); | ||
addReplaceWeight('i', "jk", diagonalConnect); | ||
|
||
addReplaceWeight('o', "ip", directConnect); | ||
addReplaceWeight('o', "kl", diagonalConnect); | ||
|
||
addReplaceWeight('p', "oü", directConnect); | ||
addReplaceWeight('p', "lö", diagonalConnect); | ||
|
||
addReplaceWeight('ü', "p", directConnect); | ||
addReplaceWeight('ü', "öä", diagonalConnect); | ||
|
||
//Bottom Row | ||
|
||
addReplaceWeight('y', "x", directConnect); | ||
addReplaceWeight('y', "sa", diagonalConnect); | ||
|
||
addReplaceWeight('x', "yc", directConnect); | ||
addReplaceWeight('x', "sd", diagonalConnect); | ||
|
||
addReplaceWeight('c', "xv", directConnect); | ||
addReplaceWeight('c', "df", diagonalConnect); | ||
|
||
addReplaceWeight('v', "bc", directConnect); | ||
addReplaceWeight('v', "fg", diagonalConnect); | ||
|
||
addReplaceWeight('b', "vn", directConnect); | ||
addReplaceWeight('b', "gh", diagonalConnect); | ||
|
||
addReplaceWeight('n', "bm", directConnect); | ||
addReplaceWeight('n', "hj", diagonalConnect); | ||
|
||
addReplaceWeight('m', "n", directConnect); | ||
addReplaceWeight('m', "jk", diagonalConnect); | ||
|
||
} | ||
|
||
/** | ||
* | ||
* @param a | ||
* @param listOfChars | ||
* @param connectWeight | ||
*/ | ||
private void addReplaceWeight(char a, String listOfChars, double connectWeight) { | ||
for (char ch : listOfChars.toCharArray()) { | ||
operationCost.put(a + "-" + ch, connectWeight); | ||
} | ||
} | ||
|
||
public void addReplaceWeight(char a, char b, double connectWeight) { | ||
operationCost.put(a + "-" + b, connectWeight); | ||
} | ||
} |
120 changes: 120 additions & 0 deletions
120
...pell-lib/src/test/java/io/github/mightguy/spellcheck/symspell/GermanLangSpellChecker.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
package io.github.mightguy.spellcheck.symspell; | ||
|
||
import io.github.mightguy.spellcheck.symspell.api.DataHolder; | ||
import io.github.mightguy.spellcheck.symspell.common.DictionaryItem; | ||
import io.github.mightguy.spellcheck.symspell.common.Murmur3HashFunction; | ||
import io.github.mightguy.spellcheck.symspell.common.QwertzDistance; | ||
import io.github.mightguy.spellcheck.symspell.common.SpellCheckSettings; | ||
import io.github.mightguy.spellcheck.symspell.common.SuggestionItem; | ||
import io.github.mightguy.spellcheck.symspell.common.Verbosity; | ||
import io.github.mightguy.spellcheck.symspell.common.WeightedDamerauLevenshteinDistance; | ||
import io.github.mightguy.spellcheck.symspell.exception.SpellCheckException; | ||
import io.github.mightguy.spellcheck.symspell.impl.InMemoryDataHolder; | ||
import io.github.mightguy.spellcheck.symspell.impl.SymSpellCheck; | ||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.FileReader; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import org.junit.Assert; | ||
import org.junit.BeforeClass; | ||
import org.junit.Test; | ||
|
||
public class GermanLangSpellChecker { | ||
|
||
static DataHolder dataHolder1; | ||
static DataHolder dataHolder2; | ||
static SymSpellCheck symSpellCheck; | ||
static SymSpellCheck qwertzSymSpellCheck; | ||
static WeightedDamerauLevenshteinDistance weightedDamerauLevenshteinDistance; | ||
static WeightedDamerauLevenshteinDistance qwertzWeightedDamerauLevenshteinDistance; | ||
|
||
@BeforeClass | ||
public static void setup() throws IOException, SpellCheckException { | ||
|
||
ClassLoader classLoader = SymSpellTest.class.getClassLoader(); | ||
|
||
SpellCheckSettings spellCheckSettings = SpellCheckSettings.builder() | ||
.countThreshold(1) | ||
.deletionWeight(1) | ||
.insertionWeight(1) | ||
.replaceWeight(1) | ||
.maxEditDistance(2) | ||
.transpositionWeight(1) | ||
.topK(5) | ||
.prefixLength(10) | ||
.verbosity(Verbosity.ALL).build(); | ||
|
||
weightedDamerauLevenshteinDistance = | ||
new WeightedDamerauLevenshteinDistance(spellCheckSettings.getDeletionWeight(), | ||
spellCheckSettings.getInsertionWeight(), spellCheckSettings.getReplaceWeight(), | ||
spellCheckSettings.getTranspositionWeight(), null); | ||
|
||
qwertzWeightedDamerauLevenshteinDistance = | ||
new WeightedDamerauLevenshteinDistance(spellCheckSettings.getDeletionWeight(), | ||
spellCheckSettings.getInsertionWeight(), spellCheckSettings.getReplaceWeight(), | ||
spellCheckSettings.getTranspositionWeight(), new QwertzDistance()); | ||
|
||
dataHolder1 = new InMemoryDataHolder(spellCheckSettings, new Murmur3HashFunction()); | ||
dataHolder2 = new InMemoryDataHolder(spellCheckSettings, new Murmur3HashFunction()); | ||
|
||
symSpellCheck = new SymSpellCheck(dataHolder1, weightedDamerauLevenshteinDistance, | ||
spellCheckSettings); | ||
|
||
qwertzSymSpellCheck = new SymSpellCheck(dataHolder2, qwertzWeightedDamerauLevenshteinDistance, | ||
spellCheckSettings); | ||
|
||
List<String> result = new ArrayList<>(); | ||
loadUniGramFile( | ||
new File(classLoader.getResource("de-100k.txt").getFile())); | ||
|
||
} | ||
|
||
private static void loadUniGramFile(File file) throws IOException, SpellCheckException { | ||
BufferedReader br = new BufferedReader(new FileReader(file)); | ||
String line; | ||
while ((line = br.readLine()) != null) { | ||
String[] arr = line.split("\\s+"); | ||
dataHolder1.addItem(new DictionaryItem(arr[0], Double.parseDouble(arr[1]), -1.0)); | ||
dataHolder2.addItem(new DictionaryItem(arr[0], Double.parseDouble(arr[1]), -1.0)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testMultiWordCorrection() throws SpellCheckException { | ||
|
||
assertTypoAndCorrected(symSpellCheck, | ||
"entwick lung".toLowerCase(), | ||
"entwicklung".toLowerCase(), | ||
2); | ||
|
||
assertTypoEdAndCorrected(symSpellCheck, | ||
"nömlich".toLowerCase(), | ||
"nämlich".toLowerCase(), | ||
2, 1); | ||
|
||
assertTypoEdAndCorrected(qwertzSymSpellCheck, | ||
"nömlich".toLowerCase(), | ||
"nämlich".toLowerCase(), | ||
2, 0.10); | ||
|
||
} | ||
|
||
public static void assertTypoAndCorrected(SymSpellCheck spellCheck, String typo, String correct, | ||
double maxEd) throws SpellCheckException { | ||
List<SuggestionItem> suggestionItems = spellCheck | ||
.lookupCompound(typo.toLowerCase().trim(), maxEd); | ||
Assert.assertTrue(suggestionItems.size() > 0); | ||
Assert.assertEquals(correct.toLowerCase().trim(), suggestionItems.get(0).getTerm().trim()); | ||
} | ||
|
||
public static void assertTypoEdAndCorrected(SymSpellCheck spellCheck, String typo, String correct, | ||
double maxEd, double expED) throws SpellCheckException { | ||
List<SuggestionItem> suggestionItems = spellCheck | ||
.lookupCompound(typo.toLowerCase().trim(), maxEd); | ||
Assert.assertTrue(suggestionItems.size() > 0); | ||
Assert.assertEquals(correct.toLowerCase().trim(), suggestionItems.get(0).getTerm().trim()); | ||
Assert.assertEquals(suggestionItems.get(0).getDistance(), expED, 0.12); | ||
} | ||
} |
Oops, something went wrong.