Skip to content

Commit

Permalink
More lang detect changes
Browse files Browse the repository at this point in the history
  • Loading branch information
aviks committed Aug 4, 2018
1 parent eb4e6ff commit 19d58a5
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/Languages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ using Match
export prepositions
export pronouns
export stopwords
export LanguageDetector

cache = Dict()

Expand Down
7 changes: 6 additions & 1 deletion src/whatlang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,12 @@ function calculate_distance(lang_trigrams, text_trigrams)
total_dist
end

function detect(text::AbstractString, options=default_options())
Base.@deprecate detect(text::AbstractString, options=default_options()) LanguageDetector()(text, options)

type LanguageDetector
end

function(m::LanguageDetector)(text::AbstractString, options=default_options())
if text==""; throw(ArgumentError("Cannot detect language for empty text")); end
script = detect_script(text)
if script == nothing; return (nothing, nothing, 0); end
Expand Down
14 changes: 9 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ using Base.Test
@test length(stopwords(lang)) == 488
end

@test Languages.from_code("ben") == Languages.Bengali()
@test Languages.from_code("Ben") == Languages.Bengali()
@test Languages.from_code("BEn") == Languages.Bengali()
@testset "lang code" begin
@test Languages.from_code("ben") == Languages.Bengali()
@test Languages.from_code("Ben") == Languages.Bengali()
@test Languages.from_code("BEn") == Languages.Bengali()

@test Languages.from_code("abc") == nothing
@test Languages.from_code("abc") == nothing
end

include("whatlang.jl")
@testset "lang detect" begin
include("whatlang.jl")
end
13 changes: 7 additions & 6 deletions test/whatlang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,19 @@ res = Languages.get_trigrams_with_positions("xaaaaabbbbd")
@test res["bbb"] == 2

#Detection
d = LanguageDetector()
text = "Además de todo lo anteriormente dicho, también encontramos..."
output = Languages.detect(text)
output = d(text)
@test output[1] == Languages.Spanish()
@test output[2] == Languages.LatinScript()

text = "Та нічого, все нормально. А в тебе як?"
output = Languages.detect(text)
output = d(text)
@test output[1] == Languages.Ukrainian()
@test output[2] == Languages.CyrillicScript()

text = "I am begging pardon";
output = Languages.detect(text)
output = d(text)
@test output[1] == Languages.Tagalog()

text = """
Expand All @@ -136,14 +137,14 @@ text = """
И лучше выдумать не мог.
"""

output = Languages.detect(text)
output = d(text)
@test output[1] == Languages.Russian()

#Test all languages!
examples = JSON.parse(readstring(joinpath(dirname(@__FILE__), "examples.json")))
Languages.detect(examples["deu"])
d(examples["deu"])

for (key, val) in examples
output = Languages.detect(val)
output = d(val)
@test output[1] == Languages.from_code(key)
end

0 comments on commit 19d58a5

Please sign in to comment.