diff --git a/src/StatsFuns.jl b/src/StatsFuns.jl index 66d31a6..5ce491e 100644 --- a/src/StatsFuns.jl +++ b/src/StatsFuns.jl @@ -219,6 +219,18 @@ export tdistinvlogcdf, # inverse-logcdf of student's t distribution tdistinvlogccdf, # inverse-logccdf of student's t distribution + # distrs/signrank + signrankpdf, + signranklogpdf, + signrankcdf, + signranklogcdf, + signrankccdf, + signranklogccdf, + signrankinvcdf, + signrankinvccdf, + signrankinvlogcdf, + signrankinvlogccdf, + # distrs/srdist srdistcdf, # cdf of studentized range distribution srdistccdf, # ccdf of studentized range distribution @@ -258,6 +270,7 @@ include(joinpath("distrs", "norm.jl")) include(joinpath("distrs", "ntdist.jl")) include(joinpath("distrs", "pois.jl")) include(joinpath("distrs", "tdist.jl")) +include(joinpath("distrs", "signrank.jl")) include(joinpath("distrs", "srdist.jl")) if !isdefined(Base, :get_extension) diff --git a/src/distrs/signrank.jl b/src/distrs/signrank.jl new file mode 100644 index 0000000..6ac0eb2 --- /dev/null +++ b/src/distrs/signrank.jl @@ -0,0 +1,95 @@ +#= +The signrank distribution is equivalent to the problem +of finding the number of subsets of {1,2,...,n} summing to W, +relative to the total number of subsets. +The empty subset is defined to sum to zero. +This can be calculated using the recursion: +either n is in the subset in which case we need to calculate +the number of subsets of {1,2,...,n-1} summing to W-n, +or n is not in the subset in which case we need to calculate +the number of subsets of {1,2,...,n-1} summing to W. +This can be calculated bottom up using dynamic programming. + +The i'th element of DP in the j'th outer loop iteration represents: +the number of ways {1,2,...,j} can sum to W-i+1. + =# + +@inline function signrankDP(n, W) + DP = zeros(Int, W + 1) + DP[W+1] = 1 + for j in 1:n + for i in 1:(W+1-j) + DP[i] += DP[i+j] + end + end + return DP +end + +function signrankpdf(n::Int, W::Union{Float64,Int}) + W = round(Int, W) + max_W = (n * (n + 1)) >> 1 + if W < 0 + return 0.0 + elseif W >= max_W + 1 + return 0.0 + elseif W > max_W >> 1 + return signrankpdf(n, max_W - W) + end + DP = signrankDP(n, W) + return ldexp(float(DP[1]), -n) +end + +function signranklogpdf(n::Int, W::Union{Float64,Int}, ) + return log(signrankpdf(n, W)) +end + +function signrankcdf(n::Int, W::Union{Float64,Int}, ) + W = round(Int, W) + max_W = (n * (n + 1)) >> 1 + if W < 0 + return 0.0 + elseif W >= max_W + return 1.0 + elseif W > max_W >> 1 + return 1.0 - signrankcdf(n, max_W - W - 1, ) + end + DP = signrankDP(n, W) + return sum(Base.Fix2(ldexp, -n) ∘ float, DP) +end + +function signranklogcdf(n::Int, W::Union{Float64,Int}, ) + return log(signrankcdf(n, W)) +end + +function signrankccdf(n::Int, W::Union{Float64,Int}, ) + W = round(Int, W) + max_W = (n * (n + 1)) >> 1 + return signrankcdf(n, max_W - W - 1, ) +end + +function signranklogccdf(n::Int, W::Union{Float64,Int},) + return log(signrankccdf(n, W)) +end + +function signrankinvcdf(n::Int, p::Float64) + if p < 0.0 || p > 1.0 + return NaN + end + W = 0 + while signrankcdf(n, W) < p # TODO binary search and symmetry + W += 1 + end + return float(W) +end + +function signrankinvlogcdf(n::Int, logp::Float64) + signrankinvcdf(n, exp(logp)) +end + +function signrankinvccdf(n::Int, p::Float64) + signrankinvcdf(n, 1 - p) +end + +function signrankinvlogccdf(n::Int, logp::Float64) + signrankinvccdf(n, exp(logp)) +end diff --git a/src/rmath.jl b/src/rmath.jl index 595babf..e84086c 100644 --- a/src/rmath.jl +++ b/src/rmath.jl @@ -131,6 +131,7 @@ end ### Import specific functions +@import_rmath signrank signrank n @import_rmath beta beta α β @import_rmath binom binom n p @import_rmath chisq chisq k diff --git a/test/rmath.jl b/test/rmath.jl index 0276353..1a5c01c 100644 --- a/test/rmath.jl +++ b/test/rmath.jl @@ -215,6 +215,11 @@ end @test @inferred(betainvccdf(α, 0, p)) === 1f0 end end + + rmathcomp_tests("signrank", [ + ((4),-2:12), + #((10),-2:57), + ]) rmathcomp_tests("binom", [ ((1, 0.5), 0.0:1.0),