Skip to content

Commit

Permalink
Changes for the bigram as unigram support
Browse files Browse the repository at this point in the history
  • Loading branch information
MighTguY committed May 11, 2020
1 parent bdb1b68 commit 6b02c25
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,16 @@ public List<SuggestionItem> lookupCompound(String word) throws SpellCheckExcepti
return lookupCompound(word, spellCheckSettings.getMaxEditDistance());
}

public abstract List<SuggestionItem> lookupCompound(String word, double editDistance)

public abstract List<SuggestionItem> lookupCompound(String word, double editDistance,
boolean tokenizeOnWhiteSpace)
throws SpellCheckException;

public List<SuggestionItem> lookupCompound(String word, double editDistance)
throws SpellCheckException {
return lookupCompound(word, spellCheckSettings.getMaxEditDistance(), true);
}


public Composition wordBreakSegmentation(String phrase) throws SpellCheckException {
return wordBreakSegmentation(phrase, spellCheckSettings.getPrefixLength(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,10 @@ public class SpellCheckSettings {
@Builder.Default
private double editFactor = 0.3;

@Builder.Default
private boolean doKeySplit = true;

@Builder.Default
private String keySplitRegex = "\\s+";

}
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ public boolean addItem(final DictionaryItem dictionaryItem) throws SpellCheckExc


private boolean addToDictionary(String key, double frequency) {
if (key.split("\\s+").length > 1) {
if (spellCheckSettings.isDoKeySplit()
&& key.split(spellCheckSettings.getKeySplitRegex()).length > 1) {
bigramsDictionary.put(key, frequency);
if (frequency < spellCheckSettings.getBigramCountMin()) {
spellCheckSettings.setBigramCountMin(frequency);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ public SymSpellCheck(DataHolder dataHolder,
* the input string.
*/
@Override
public List<SuggestionItem> lookupCompound(String phrase, double maxEditDistance)
public List<SuggestionItem> lookupCompound(String phrase, double maxEditDistance,
boolean tokenizeOnWhiteSpace)
throws SpellCheckException {

if (maxEditDistance > spellCheckSettings.getMaxEditDistance()) {
Expand All @@ -63,7 +64,12 @@ public List<SuggestionItem> lookupCompound(String phrase, double maxEditDistance
if (spellCheckSettings.isLowerCaseTerms()) {
phrase = phrase.toLowerCase();
}
String[] items = SpellHelper.tokenizeOnWhiteSpace(phrase);
String[] items;
if (tokenizeOnWhiteSpace) {
items = SpellHelper.tokenizeOnWhiteSpace(phrase);
} else {
items = new String[]{phrase};
}
List<SuggestionItem> suggestions = new ArrayList<>();
List<SuggestionItem> suggestionParts = new ArrayList<>();
boolean isLastCombi = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,14 @@ public void process(ResponseBuilder rb) throws IOException {
}
SolrParams params = rb.req.getParams();
String q = params.get(Constants.SPELLCHECK_Q, params.get(CommonParams.Q));
boolean sow = params.getBool(Constants.SPELLCHECK_SOW, true);
List<SuggestionItem> suggestions;
try {
List<SuggestionItem> suggestions = spellChecker.lookupCompound(q);
if (sow) {
suggestions = spellChecker.lookupCompound(q);
} else {
suggestions = spellChecker.lookupCompound(q, 2, false);
}
if (!CollectionUtils.isEmpty(suggestions)) {
addToResponse(rb, suggestions);
}
Expand Down Expand Up @@ -183,6 +189,10 @@ private void addSpellChecker(SolrCore core, NamedList spellcheckerNL) {
SearchRequestUtil
.getFromNamedList(spellcheckerNL, "verbosity", Verbosity.ALL.name())))
.countThreshold(SearchRequestUtil.getFromNamedList(spellcheckerNL, "countThreshold", 10))
.doKeySplit(
SearchRequestUtil.getFromNamedList(spellcheckerNL, "createBigram", true))
.keySplitRegex(
SearchRequestUtil.getFromNamedList(spellcheckerNL, "bigramSplitRegex", "\\s+"))
.build();

StringDistance stringDistance = getStringDistance(spellcheckerNL, spellCheckSettings, core);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ private Constants() {
public static final String SPELLCHECK_THRESHOLD = SPELLCHECK_PREFIX + "threshold";
public static final String SPELLCHECK_ENABLE = SPELLCHECK_PREFIX + "enable";
public static final String SPELLCHECK_BUILD = SPELLCHECK_PREFIX + "build";
public static final String SPELLCHECK_SOW = SPELLCHECK_PREFIX + "sow";
}

0 comments on commit 6b02c25

Please sign in to comment.