Skip to content

Commit

Permalink
Merge pull request #48 from MighTguY/feature/checkStyleFix
Browse files Browse the repository at this point in the history
updating travis and adding german qwertzkeyboard support
  • Loading branch information
MighTguY authored May 10, 2020
2 parents 75b239f + 3b77d81 commit 78b03bf
Show file tree
Hide file tree
Showing 5 changed files with 100,275 additions and 23 deletions.
33 changes: 11 additions & 22 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,28 +1,23 @@

language: java
jdk:
- openjdk8
branches:
only:
- develop
- master

addons:
sonarcloud:
organization: "sonarcloud"
organization: sonarcloud
token:
secure: "ASXWKuOdMi0AqVIS4iW3xvik39qs2S4CTb6H4cuCSvL09kqwxqGRWrk7ZvK2F4cpqUL+aaLO6pIdIZvbM3CXwSdqbNoADv2Qyui0VKTKBZ9rvG2c/ouNnrPKkvyydVA4VcfKiJnnU9h6kH072XQfQg5oWdjFFPX9iyxT9Mpo+FPfjmoEEBt9j6/5BSZvusBOwkcRKwRBLGV2emR2dKSt/zv/fFThxbXXo7pikVfISJmp9leYqOFuYj32WVmiwJycj0n2m1+Ks44Unq4nolEqWDngDKottOCN2PNMf6Av3jMULbUfRkIMT3n1/J14lsRWbp694E4p0wEXZmCM9f6GkZZoOCNtyeyGu9kK9hRZK9UqjckJAA4aggfiUK2BZK2iWMMK67Wgrvtjf+IqBjFy6Lfgra3d6C29OOxU0nu9nSYo50prgeGkl9KfkCqqr0UioNFNJLwqFcQS2cPNkCvc7oJrOkhxmkxAfDsYexsQhS7VQnsd+PHuxU1o2mtwIe0nadVio2T3ooBiulDzL6XTcmFYWrsCCNkNfbaq5JNTJcTanA7OonSUO6dFK+8eMunNpAc2H4WYlHW5T/BgfvtBAaihdiR6YEKEhCnEe25AmZBryMTc5gTPxPj9up+WevyD8Kj4y9xXyD0py4CyFkUxakVMd0U9jncdKWRwUQ1F3mU="

secure: ASXWKuOdMi0AqVIS4iW3xvik39qs2S4CTb6H4cuCSvL09kqwxqGRWrk7ZvK2F4cpqUL+aaLO6pIdIZvbM3CXwSdqbNoADv2Qyui0VKTKBZ9rvG2c/ouNnrPKkvyydVA4VcfKiJnnU9h6kH072XQfQg5oWdjFFPX9iyxT9Mpo+FPfjmoEEBt9j6/5BSZvusBOwkcRKwRBLGV2emR2dKSt/zv/fFThxbXXo7pikVfISJmp9leYqOFuYj32WVmiwJycj0n2m1+Ks44Unq4nolEqWDngDKottOCN2PNMf6Av3jMULbUfRkIMT3n1/J14lsRWbp694E4p0wEXZmCM9f6GkZZoOCNtyeyGu9kK9hRZK9UqjckJAA4aggfiUK2BZK2iWMMK67Wgrvtjf+IqBjFy6Lfgra3d6C29OOxU0nu9nSYo50prgeGkl9KfkCqqr0UioNFNJLwqFcQS2cPNkCvc7oJrOkhxmkxAfDsYexsQhS7VQnsd+PHuxU1o2mtwIe0nadVio2T3ooBiulDzL6XTcmFYWrsCCNkNfbaq5JNTJcTanA7OonSUO6dFK+8eMunNpAc2H4WYlHW5T/BgfvtBAaihdiR6YEKEhCnEe25AmZBryMTc5gTPxPj9up+WevyD8Kj4y9xXyD0py4CyFkUxakVMd0U9jncdKWRwUQ1F3mU=
cache:
directories:
- ~/.m2/repository
- ~/.sonar/cache

- "~/.m2/repository"
- "~/.sonar/cache"
jobs:
include:
- stage: build_master
if: branch = master AND type = push

before_install:
- echo "Build Started for Master"
- export GPG_TTY=$(tty)
Expand All @@ -33,31 +28,29 @@ jobs:
- sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo
tee /etc/hosts
- cat /etc/hosts
- mvn help:evaluate -N -Dexpression=project.version|grep -v '\[' | cut -d'-' -f 1 |cut -d'.' -f1-2
- export project_version=$(mvn help:evaluate -N -Dexpression=project.version|grep -v '\[' | cut -d'-' -f 1 | cut -d'.' -f1-2)
- mvn help:evaluate -N -Dexpression=project.version|grep -v '\[' | cut -d'-'
-f 1 |cut -d'.' -f1-2
- export project_version=$(mvn help:evaluate -N -Dexpression=project.version|grep
-v '\[' | cut -d'-' -f 1 | cut -d'.' -f1-2)
- git config --local user.name "travis-ci"
- git config --local user.email "[email protected]"
- export TRAVIS_JOB_NUMBER1=${TRAVIS_JOB_NUMBER:-$(date +'%Y%m%d%H%M%S')-$(git log --format=%h -1)}
- export TRAVIS_JOB_NUMBER1=${TRAVIS_JOB_NUMBER:-$(date +'%Y%m%d%H%M%S')-$(git
log --format=%h -1)}
- TRAVIS_JOB_NUMBER1=$(echo $TRAVIS_JOB_NUMBER1 | cut -d'.' -f1)
- export NEW_VERSION=$project_version.$TRAVIS_JOB_NUMBER1
- mvn versions:set -DnewVersion=$NEW_VERSION

install:
- mvn --settings .maven.xml install -Dgpg.skip -B -V

script:
- mvn clean deploy --settings .maven.xml -DskipTests=true -B -U -Prelease

after_success:
- mvn coveralls:report

before_deploy:
- git tag $NEW_VERSION

deploy:
provider: releases
api_key:
secure: vaz/OrLwayipY8axrE0lVexyV0MIowN4wggkAwZPn3t77g3uGB7hD3drPklkptLdIeXRTKpFu8Cduz4n/v5a5JnhhsR3cDmQatGa4/3NzpJNjUEs3NEcFww7KEFMqsuiTh+3cazWT02b9wmwyaJ6tGW2VXI+r8S6qeQVbguEHH4SKoptU6Nf/ONUMIkCGgCnWji9WlnvgMsxUXA2Q9c31MUq5Pi/BVLEWAm+SVMU1AJRDQIXd+qJduo0vEVH7vaHFgS3jO78PyISzZdKaKw9aQGutFuiEp3TzkD9KcmS6J13Akk23ZwCE7U/U3wLnvDBDn26QUYmJPMRZcH+MmXeCL4w9D5s/Rt7ifFcosh0Xg08JSsmgsB0F9Tjpn4oeWip9u2c7bf3fIm4rHf1g0NjPsUJD9OX2aQ9MyR+Woyv8f0o/FSRQd8iAVwX/6BmaSG86R9TzQIz+6K0z+JvAfRXmYotU/XBtr9k0OiDJN+zxEAuSBp78GKtFxyzpulxs6cRf4bOnzKx7pOMHFeLFMbdYkOoebQd4g2u3BT+CLyCjcfxNbyLwYazlt+swd9MawddojA2JTK9NRjosYrZ/OxnggOExH9P+GESM3ivUFhT+g2AavkliWU8zeF8Sx5+emPM+Jxawi8v9rRIeicY7KTHLRaBHpq0C6J1zmQp1bkPAHg=
secure: CTIvEHKyxk8PW3O6AbJCANQwqb5sE9ZLmPg0Fq9TvGI++WgA2uOLWY3GY7ZcUeXWj3/IdUu34NIqy7Q0tn1zGIY6/6Ih/+79NBUWjTl25Bqv4ZGYSVis9JCjkfP0ua4raIfFojctB/SqxUiuiVUlyJZfUcs4m7O2EWP+7iifF3AzAgxs9TeuyxGyAx4f1esqQjsvd32v24wB+CiXq9V5KG8ca8PzijhYwfrfVo45DLIyMQk6DC9RzYKYuIOQ7Jmd1Mh+Uk3wHlrf8h6DxLBymCmwSCuDlJdhXovExwUdNQaRzkNISInoZDCfth00qIHaMnowLclaZTNKEEqIM030CA0NPHjW4FNmcYtOH5U93bG7bD2UPBu6aAVDtxEeCCjmeM3jQnh8AH16I29kRVJmNP55hdp0MeuU9T9o3csM4Q1rwe/+GDkOFFnBK962T4nR1pFRmMs7jfhE/VP+FlcJsGN8mAINol/GYFxUXN4ggIAVUxEGHSLbh3qZCtKLsNo0IY6UYflstbpID7ZUUycy0s86kpto1KQ1fO/sat9FT7F7wMtcxsjHM1l7zqgXZjrKj/1mMoNAjZ9rmd1aVFblz9Njyxke/OQDwRciPbjsw/g7UOE6yLe3MjJdHj7tbdBXEC8TkGSfBLcaDJzaLaut+jhE9ADLbnqplWrbj+xDhzw=
file:
- symspell-lib/target/symspell-lib-$NEW_VERSION.jar
on:
Expand All @@ -84,7 +77,6 @@ jobs:
- mvn --settings .maven.xml install -Dgpg.skip -B -V
script:
- mvn cobertura:cobertura

after_success:
- bash <(curl -s https://codecov.io/bash)
- stage: build_pr
Expand All @@ -101,6 +93,3 @@ jobs:
- cat /etc/hosts
install:
- mvn --settings .maven.xml install -Dgpg.skip -B -V



3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
* added keyboard-distance to get a dynamic replacement weight (since letters close to each other are more likely to be replaced)
* do some query normalization before search

## Keyboard based Qwerty Distance
## Keyboard based Qwerty/Qwertz Distance

There are 2 implementations of the keyboards one is English Qwerty based and other is German Qwertz based implementation
we used the adjancey graph of the keyboard for the weights to the connected nodes.
<img src="qwerty.png" align="center">

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package io.github.mightguy.spellcheck.symspell.common;

import io.github.mightguy.spellcheck.symspell.api.CharDistance;
import java.util.HashMap;
import java.util.Map;

public class QwertzDistance implements CharDistance {

double directConnect = 0.1f;
double diagonalConnect = 0.4f;
double defaultValue = 1f;

Map<String, Double> operationCost = new HashMap<>();

public QwertzDistance() {
this.initializeCostMatrix();
}

@Override
public double distance(char a, char b) {
if (a == b) {
return 0;
}
return operationCost
.getOrDefault(a + "-" + b, operationCost.getOrDefault(b + "-" + a, defaultValue));
}

/**
* Initializing the cost matrix
*/
public void initializeCostMatrix() {

//Middle row
addReplaceWeight('a', "s", directConnect);
addReplaceWeight('a', "qwy", diagonalConnect);

addReplaceWeight('s', "ad", directConnect);
addReplaceWeight('s', "wexy", diagonalConnect);

addReplaceWeight('d', "sf", directConnect);
addReplaceWeight('d', "erxc", diagonalConnect);

addReplaceWeight('f', "sf", directConnect);
addReplaceWeight('f', "erxc", diagonalConnect);

addReplaceWeight('g', "fh", directConnect);
addReplaceWeight('g', "tzvb", diagonalConnect);

addReplaceWeight('h', "gj", directConnect);
addReplaceWeight('h', "zubn", diagonalConnect);

addReplaceWeight('j', "hk", directConnect);
addReplaceWeight('j', "uinm", diagonalConnect);

addReplaceWeight('k', "jl", directConnect);
addReplaceWeight('k', "iom", diagonalConnect);

addReplaceWeight('l', "kö", directConnect);
addReplaceWeight('l', "op", diagonalConnect);

addReplaceWeight('ö', "lä", directConnect);
addReplaceWeight('ö', "pü", diagonalConnect);

addReplaceWeight('ä', "ö", directConnect);
addReplaceWeight('ä', "ü", diagonalConnect);

//Top Row

addReplaceWeight('q', "w", directConnect);
addReplaceWeight('q', "a", diagonalConnect);

addReplaceWeight('w', "qe", directConnect);
addReplaceWeight('w', "as", diagonalConnect);

addReplaceWeight('e', "wr", directConnect);
addReplaceWeight('e', "sd", diagonalConnect);

addReplaceWeight('r', "et", directConnect);
addReplaceWeight('r', "df", diagonalConnect);

addReplaceWeight('t', "rz", directConnect);
addReplaceWeight('t', "fg", diagonalConnect);

addReplaceWeight('z', "tu", directConnect);
addReplaceWeight('z', "gh", diagonalConnect);

addReplaceWeight('u', "zi", directConnect);
addReplaceWeight('u', "hj", diagonalConnect);

addReplaceWeight('i', "uo", directConnect);
addReplaceWeight('i', "jk", diagonalConnect);

addReplaceWeight('o', "ip", directConnect);
addReplaceWeight('o', "kl", diagonalConnect);

addReplaceWeight('p', "oü", directConnect);
addReplaceWeight('p', "lö", diagonalConnect);

addReplaceWeight('ü', "p", directConnect);
addReplaceWeight('ü', "öä", diagonalConnect);

//Bottom Row

addReplaceWeight('y', "x", directConnect);
addReplaceWeight('y', "sa", diagonalConnect);

addReplaceWeight('x', "yc", directConnect);
addReplaceWeight('x', "sd", diagonalConnect);

addReplaceWeight('c', "xv", directConnect);
addReplaceWeight('c', "df", diagonalConnect);

addReplaceWeight('v', "bc", directConnect);
addReplaceWeight('v', "fg", diagonalConnect);

addReplaceWeight('b', "vn", directConnect);
addReplaceWeight('b', "gh", diagonalConnect);

addReplaceWeight('n', "bm", directConnect);
addReplaceWeight('n', "hj", diagonalConnect);

addReplaceWeight('m', "n", directConnect);
addReplaceWeight('m', "jk", diagonalConnect);

}

/**
*
* @param a
* @param listOfChars
* @param connectWeight
*/
private void addReplaceWeight(char a, String listOfChars, double connectWeight) {
for (char ch : listOfChars.toCharArray()) {
operationCost.put(a + "-" + ch, connectWeight);
}
}

public void addReplaceWeight(char a, char b, double connectWeight) {
operationCost.put(a + "-" + b, connectWeight);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package io.github.mightguy.spellcheck.symspell;

import io.github.mightguy.spellcheck.symspell.api.DataHolder;
import io.github.mightguy.spellcheck.symspell.common.DictionaryItem;
import io.github.mightguy.spellcheck.symspell.common.Murmur3HashFunction;
import io.github.mightguy.spellcheck.symspell.common.QwertzDistance;
import io.github.mightguy.spellcheck.symspell.common.SpellCheckSettings;
import io.github.mightguy.spellcheck.symspell.common.SuggestionItem;
import io.github.mightguy.spellcheck.symspell.common.Verbosity;
import io.github.mightguy.spellcheck.symspell.common.WeightedDamerauLevenshteinDistance;
import io.github.mightguy.spellcheck.symspell.exception.SpellCheckException;
import io.github.mightguy.spellcheck.symspell.impl.InMemoryDataHolder;
import io.github.mightguy.spellcheck.symspell.impl.SymSpellCheck;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

public class GermanLangSpellChecker {

static DataHolder dataHolder1;
static DataHolder dataHolder2;
static SymSpellCheck symSpellCheck;
static SymSpellCheck qwertzSymSpellCheck;
static WeightedDamerauLevenshteinDistance weightedDamerauLevenshteinDistance;
static WeightedDamerauLevenshteinDistance qwertzWeightedDamerauLevenshteinDistance;

@BeforeClass
public static void setup() throws IOException, SpellCheckException {

ClassLoader classLoader = SymSpellTest.class.getClassLoader();

SpellCheckSettings spellCheckSettings = SpellCheckSettings.builder()
.countThreshold(1)
.deletionWeight(1)
.insertionWeight(1)
.replaceWeight(1)
.maxEditDistance(2)
.transpositionWeight(1)
.topK(5)
.prefixLength(10)
.verbosity(Verbosity.ALL).build();

weightedDamerauLevenshteinDistance =
new WeightedDamerauLevenshteinDistance(spellCheckSettings.getDeletionWeight(),
spellCheckSettings.getInsertionWeight(), spellCheckSettings.getReplaceWeight(),
spellCheckSettings.getTranspositionWeight(), null);

qwertzWeightedDamerauLevenshteinDistance =
new WeightedDamerauLevenshteinDistance(spellCheckSettings.getDeletionWeight(),
spellCheckSettings.getInsertionWeight(), spellCheckSettings.getReplaceWeight(),
spellCheckSettings.getTranspositionWeight(), new QwertzDistance());

dataHolder1 = new InMemoryDataHolder(spellCheckSettings, new Murmur3HashFunction());
dataHolder2 = new InMemoryDataHolder(spellCheckSettings, new Murmur3HashFunction());

symSpellCheck = new SymSpellCheck(dataHolder1, weightedDamerauLevenshteinDistance,
spellCheckSettings);

qwertzSymSpellCheck = new SymSpellCheck(dataHolder2, qwertzWeightedDamerauLevenshteinDistance,
spellCheckSettings);

List<String> result = new ArrayList<>();
loadUniGramFile(
new File(classLoader.getResource("de-100k.txt").getFile()));

}

private static void loadUniGramFile(File file) throws IOException, SpellCheckException {
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
while ((line = br.readLine()) != null) {
String[] arr = line.split("\\s+");
dataHolder1.addItem(new DictionaryItem(arr[0], Double.parseDouble(arr[1]), -1.0));
dataHolder2.addItem(new DictionaryItem(arr[0], Double.parseDouble(arr[1]), -1.0));
}
}

@Test
public void testMultiWordCorrection() throws SpellCheckException {

assertTypoAndCorrected(symSpellCheck,
"entwick lung".toLowerCase(),
"entwicklung".toLowerCase(),
2);

assertTypoEdAndCorrected(symSpellCheck,
"nömlich".toLowerCase(),
"nämlich".toLowerCase(),
2, 1);

assertTypoEdAndCorrected(qwertzSymSpellCheck,
"nömlich".toLowerCase(),
"nämlich".toLowerCase(),
2, 0.10);

}

public static void assertTypoAndCorrected(SymSpellCheck spellCheck, String typo, String correct,
double maxEd) throws SpellCheckException {
List<SuggestionItem> suggestionItems = spellCheck
.lookupCompound(typo.toLowerCase().trim(), maxEd);
Assert.assertTrue(suggestionItems.size() > 0);
Assert.assertEquals(correct.toLowerCase().trim(), suggestionItems.get(0).getTerm().trim());
}

public static void assertTypoEdAndCorrected(SymSpellCheck spellCheck, String typo, String correct,
double maxEd, double expED) throws SpellCheckException {
List<SuggestionItem> suggestionItems = spellCheck
.lookupCompound(typo.toLowerCase().trim(), maxEd);
Assert.assertTrue(suggestionItems.size() > 0);
Assert.assertEquals(correct.toLowerCase().trim(), suggestionItems.get(0).getTerm().trim());
Assert.assertEquals(suggestionItems.get(0).getDistance(), expED, 0.12);
}
}
Loading

0 comments on commit 78b03bf

Please sign in to comment.