diff --git a/src/Languages.jl b/src/Languages.jl index 22f4b3f..8304cbc 100644 --- a/src/Languages.jl +++ b/src/Languages.jl @@ -8,6 +8,7 @@ using Match export prepositions export pronouns export stopwords + export LanguageDetector cache = Dict() diff --git a/src/whatlang.jl b/src/whatlang.jl index a6f6819..f73392d 100644 --- a/src/whatlang.jl +++ b/src/whatlang.jl @@ -409,7 +409,12 @@ function calculate_distance(lang_trigrams, text_trigrams) total_dist end -function detect(text::AbstractString, options=default_options()) +Base.@deprecate detect(text::AbstractString, options=default_options()) LanguageDetector()(text, options) + +type LanguageDetector +end + +function(m::LanguageDetector)(text::AbstractString, options=default_options()) if text==""; throw(ArgumentError("Cannot detect language for empty text")); end script = detect_script(text) if script == nothing; return (nothing, nothing, 0); end diff --git a/test/runtests.jl b/test/runtests.jl index f25f130..ca9bb79 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,10 +10,14 @@ using Base.Test @test length(stopwords(lang)) == 488 end -@test Languages.from_code("ben") == Languages.Bengali() -@test Languages.from_code("Ben") == Languages.Bengali() -@test Languages.from_code("BEn") == Languages.Bengali() +@testset "lang code" begin + @test Languages.from_code("ben") == Languages.Bengali() + @test Languages.from_code("Ben") == Languages.Bengali() + @test Languages.from_code("BEn") == Languages.Bengali() -@test Languages.from_code("abc") == nothing + @test Languages.from_code("abc") == nothing +end -include("whatlang.jl") +@testset "lang detect" begin + include("whatlang.jl") +end diff --git a/test/whatlang.jl b/test/whatlang.jl index 4f4c4af..907fa30 100644 --- a/test/whatlang.jl +++ b/test/whatlang.jl @@ -115,18 +115,19 @@ res = Languages.get_trigrams_with_positions("xaaaaabbbbd") @test res["bbb"] == 2 #Detection +d = LanguageDetector() text = "Además de todo lo anteriormente dicho, también encontramos..." -output = Languages.detect(text) +output = d(text) @test output[1] == Languages.Spanish() @test output[2] == Languages.LatinScript() text = "Та нічого, все нормально. А в тебе як?" -output = Languages.detect(text) +output = d(text) @test output[1] == Languages.Ukrainian() @test output[2] == Languages.CyrillicScript() text = "I am begging pardon"; -output = Languages.detect(text) +output = d(text) @test output[1] == Languages.Tagalog() text = """ @@ -136,14 +137,14 @@ text = """ И лучше выдумать не мог. """ -output = Languages.detect(text) +output = d(text) @test output[1] == Languages.Russian() #Test all languages! examples = JSON.parse(readstring(joinpath(dirname(@__FILE__), "examples.json"))) -Languages.detect(examples["deu"]) +d(examples["deu"]) for (key, val) in examples - output = Languages.detect(val) + output = d(val) @test output[1] == Languages.from_code(key) end