diff --git a/src/LM/langmodel.jl b/src/LM/langmodel.jl index e1cc29dc..6450d49c 100644 --- a/src/LM/langmodel.jl +++ b/src/LM/langmodel.jl @@ -99,12 +99,10 @@ To get probability of word given that context In other words, for given context calculate frequency distribution of word """ -function prob(m::Langmodel, templ_lm::DefaultDict, word, context=nothing) - if context == nothing || context == "" - return(1/float(length(templ_lm))) #provide distribution - else - accum = templ_lm[context] - end +function prob(m::Langmodel, templ_lm::DefaultDict, word, context=nothing)::Float64 + (isnothing(context) || isempty(context)) && return 1.0/length(templ_lm) #provide distribution + + accum = templ_lm[context] s = float(sum(accum)) for (text, count) in accum if text == word @@ -112,7 +110,7 @@ function prob(m::Langmodel, templ_lm::DefaultDict, word, context=nothing) end end if context in keys(m.vocab.vocab) - return(0) + return 0.0 end return(Inf) end @@ -186,9 +184,8 @@ depending upon the sub-Type """ function score(m::InterpolatedLanguageModel, temp_lm::DefaultDict, word, context=nothing) - if context == nothing || context == "" - return prob(m, temp_lm, word, context) - end + (isnothing(context) || isempty(context)) && return prob(m, temp_lm, word) + if context in keys(temp_lm) alpha,gamma = alpha_gammma(m, temp_lm, word, context) return (alpha + gamma*score(m, temp_lm, word, context_reduce(context))) @@ -242,5 +239,3 @@ function alpha_gammma(m::KneserNeyInterpolated, templ_lm::DefaultDict, word, con gamma = (m.discount * count_non_zero_vals(accum) /s) return alpha, gamma end - - diff --git a/src/deprecations.jl b/src/deprecations.jl index 4c2cf7f2..483f0661 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -1,7 +1,7 @@ ## Deprecations for Languages -function WordTokenizers.tokenize(::Type{S}, s::T) where {S <: Language, T <: AbstractString} +function tokenize(::Type{S}, s::T) where {S <: Language, T <: AbstractString} depwarn("Use of Languages as types is deprecated. Use instances.", Symbol(S)) tokenize(S(), s) end diff --git a/src/tokenizer.jl b/src/tokenizer.jl index 18ef1af8..78ef1c9f 100644 --- a/src/tokenizer.jl +++ b/src/tokenizer.jl @@ -16,7 +16,7 @@ julia> tokenize(Languages.English(), "Too foo words!") See also: [`sentence_tokenize`](@ref) """ -WordTokenizers.tokenize(lang::S, s::T) where {S <: Language, T <: AbstractString} = WordTokenizers.tokenize(s) +tokenize(lang::S, s::T) where {S <: Language, T <: AbstractString} = WordTokenizers.tokenize(s) """