Skip to content

Commit

Permalink
Drop unnecessary ngram files for Chinese, Japanese and Korean (#101)
Browse files Browse the repository at this point in the history
  • Loading branch information
pemistahl committed Dec 16, 2021
1 parent 4ca58ea commit 444aaa0
Show file tree
Hide file tree
Showing 13 changed files with 5 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -446,26 +446,26 @@ class LanguageDetector internal constructor(

val model = loadLanguageModels(languageModels, language, ngramLength)

return model.getRelativeFrequency(ngram)
return model?.getRelativeFrequency(ngram) ?: 0.0
}

private fun loadLanguageModels(
languageModels: MutableMap<Language, TrainingDataLanguageModel>,
language: Language,
ngramLength: Int
): TrainingDataLanguageModel {
): TrainingDataLanguageModel? {
if (languageModels.containsKey(language)) {
return languageModels.getValue(language)
}
val model = loadLanguageModel(language, ngramLength)
val model = loadLanguageModel(language, ngramLength) ?: return null
languageModels[language] = model
return model
}

private fun loadLanguageModel(language: Language, ngramLength: Int): TrainingDataLanguageModel {
private fun loadLanguageModel(language: Language, ngramLength: Int): TrainingDataLanguageModel? {
val fileName = "${Ngram.getNgramNameByLength(ngramLength)}s.json"
val filePath = "/language-models/${language.isoCode639_1}/$fileName"
val inputStream = Language::class.java.getResourceAsStream(filePath)
val inputStream = Language::class.java.getResourceAsStream(filePath) ?: return null
val jsonContent = inputStream.bufferedReader(Charsets.UTF_8).use { it.readText() }
return TrainingDataLanguageModel.fromJson(jsonContent)
}
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/language-models/ja/bigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ja/fivegrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ja/quadrigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ja/trigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ko/bigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ko/fivegrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ko/quadrigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/ko/trigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/zh/bigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/zh/fivegrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/zh/quadrigrams.json

This file was deleted.

1 change: 0 additions & 1 deletion src/main/resources/language-models/zh/trigrams.json

This file was deleted.

0 comments on commit 444aaa0

Please sign in to comment.