Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stderror and meandiff functions for TTest #151

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
11 changes: 9 additions & 2 deletions src/HypothesisTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ using Distributions, Roots, StatsBase
using Combinatorics: combinations, permutations
using Rmath: pwilcox, psignrank

import StatsBase.confint
import StatsBase.confint, StatsBase.stderror

export testname, pvalue, confint
export testname, pvalue, confint, stderror
abstract type HypothesisTest end

check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y)
Expand Down Expand Up @@ -68,6 +68,13 @@ If `tail` is `:both` (default), then the p-value for the two-sided test is retur
"""
function pvalue end

"""
stderror(test::HypothesisTest)

Compute the standard error for the point estimate of interest for a test.
"""
function stderror end

# Basic function for finding a p-value given a distribution and tail
pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) =
if tail == :both
Expand Down
7 changes: 6 additions & 1 deletion src/binomial.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ probability is not equal to `p`.

Computed confidence intervals ([`confint`](@ref)) by default are Clopper-Pearson intervals.

Implements: [`pvalue`](@ref), [`confint(::BinomialTest)`](@ref)
Implements: [`pvalue`](@ref), [`confint(::BinomialTest)`](@ref), [`stderror`](@ref)
"""
BinomialTest(x::AbstractVector{Bool}, p=0.5) =
BinomialTest(sum(x), length(x), p)
Expand All @@ -65,6 +65,11 @@ function show_params(io::IO, x::BinomialTest, ident="")
end

pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail)
function StatsBase.stderror(b::BinomialTest)
n = b.n
phat = b.x / n
sqrt((phat * (1 - phat)) / n)
end

# Confidence interval
"""
Expand Down
3 changes: 2 additions & 1 deletion src/correlation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ of vectors `x` and `y` is zero.
Perform a t-test for the hypothesis that ``\\text{Cor}(x,y|Z=z) = 0``, i.e. the partial
correlation of vectors `x` and `y` given the matrix `Z` is zero.

Implements `pvalue` for the t-test.
Implements `pvalue` for the t-test, as well as `stderror`.
Implements `confint` using an approximate confidence interval based on Fisher's
``z``-transform.

Expand Down Expand Up @@ -82,6 +82,7 @@ end

default_tail(::CorrelationTest) = :both
pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail)
StatsBase.stderror(test::CorrelationTest) = sqrt(1/dof(test))

function show_params(io::IO, test::CorrelationTest, indent="")
println(io, indent, "number of observations: ", nobs(test))
Expand Down
12 changes: 7 additions & 5 deletions src/t.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ function StatsBase.confint(x::TTest; level::Float64=0.95, tail=:both)
end
end

# The standard error of the difference
StatsBase.stderror(x::TTest) = x.stderr

## ONE SAMPLE T-TEST

Expand Down Expand Up @@ -77,7 +79,7 @@ Perform a one sample t-test of the null hypothesis that `n` values with mean `xb
sample standard deviation `stddev` come from a distribution with mean `μ0` against the
alternative hypothesis that the distribution does not have mean `μ0`.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function OneSampleTTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0)
stderr = stddev/sqrt(n)
Expand All @@ -93,7 +95,7 @@ Perform a one sample t-test of the null hypothesis that the data in vector `v` c
a distribution with mean `μ0` against the alternative hypothesis that the distribution
does not have mean `μ0`.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
OneSampleTTest(v::AbstractVector{T}, μ0::Real=0) where {T<:Real} = OneSampleTTest(mean(v), std(v), length(v), μ0)

Expand All @@ -104,7 +106,7 @@ Perform a paired sample t-test of the null hypothesis that the differences betwe
values in vectors `x` and `y` come from a distribution with mean `μ0` against the
alternative hypothesis that the distribution does not have mean `μ0`.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function OneSampleTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real, S<:Real}
check_same_length(x, y)
Expand Down Expand Up @@ -142,7 +144,7 @@ Perform a two-sample t-test of the null hypothesis that `x` and `y` come from di
with equal means and variances against the alternative hypothesis that the distributions
have different means but equal variances.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function EqualVarianceTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real}
nx, ny = length(x), length(y)
Expand Down Expand Up @@ -184,7 +186,7 @@ equation:
\\frac{(k_i s_i^2)^2}{ν_i}}
```

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function UnequalVarianceTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real}
nx, ny = length(x), length(y)
Expand Down
12 changes: 7 additions & 5 deletions src/z.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ function StatsBase.confint(x::ZTest; level::Float64=0.95, tail=:both)
end
end

# The standard error of the difference
StatsBase.stderror(x::ZTest) = x.stderr

## ONE SAMPLE Z-TEST

Expand Down Expand Up @@ -75,7 +77,7 @@ Perform a one sample z-test of the null hypothesis that `n` values with mean `xb
population standard deviation `stddev` come from a distribution with mean `μ0` against the
alternative hypothesis that the distribution does not have mean `μ0`.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function OneSampleZTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0)
stderr = stddev/sqrt(n)
Expand All @@ -90,7 +92,7 @@ Perform a one sample z-test of the null hypothesis that the data in vector `v` c
a distribution with mean `μ0` against the alternative hypothesis that the distribution
does not have mean `μ0`.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
OneSampleZTest(v::AbstractVector{T}, μ0::Real=0) where {T<:Real} = OneSampleZTest(mean(v), std(v), length(v), μ0)

Expand All @@ -101,7 +103,7 @@ Perform a paired sample z-test of the null hypothesis that the differences betwe
values in vectors `x` and `y` come from a distribution with mean `μ0` against the
alternative hypothesis that the distribution does not have mean `μ0`.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function OneSampleZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real, S<:Real}
check_same_length(x, y)
Expand Down Expand Up @@ -137,7 +139,7 @@ Perform a two-sample z-test of the null hypothesis that `x` and `y` come from di
with equal means and variances against the alternative hypothesis that the distributions
have different means but equal variances.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function EqualVarianceZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real}
nx, ny = length(x), length(y)
Expand Down Expand Up @@ -169,7 +171,7 @@ Perform an unequal variance two-sample z-test of the null hypothesis that `x` an
from distributions with equal means against the alternative hypothesis that the
distributions have different means.

Implements: [`pvalue`](@ref), [`confint`](@ref)
Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref)
"""
function UnequalVarianceZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real}
nx, ny = length(x), length(y)
Expand Down
2 changes: 2 additions & 0 deletions test/binomial.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using HypothesisTests: default_tail
@test pvalue(t) ≈ 0.004334880883507431
@test pvalue(t, tail=:left) ≈ 0.002167440441753716
@test pvalue(t, tail=:right) ≈ 0.9989844298129187
@test stderror(t) ≈ 0.05337605126836238
@test default_tail(t) == :both
@test_ci_approx confint(t) (0.23058523962930383, 0.4491666887959782)
@test_ci_approx confint(t, tail=:left) (0.0, 0.4313047758370174)
Expand All @@ -32,6 +33,7 @@ using HypothesisTests: default_tail
@test pvalue(t) ≈ 0.5078125000000002
@test pvalue(t, tail=:left) ≈ 0.91015625
@test pvalue(t, tail=:right) ≈ 0.2539062500000001
@test stderror(t) ≈ 0.15713484026367724
@test_ci_approx confint(t) (0.2992950562085405, 0.9251453685803082)
@test_ci_approx confint(t, tail=:left) (0.0, 0.9022531865607242)
@test_ci_approx confint(t, tail=:right) (0.3449413659437032, 1.0)
Expand Down
2 changes: 2 additions & 0 deletions test/correlation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ end
@test nobs(w) == 37
@test dof(w) == 33
@test pvalue(w) < 0.00001
@test stderror(w) ≈ 0.17407765595569785

X = [ 2 1 0
4 2 0
Expand All @@ -52,4 +53,5 @@ end
@test nobs(x) == 4
@test dof(x) == 1
@test pvalue(x) ≈ 0.25776212 atol=1e-6
@test stderror(x) ≈ 1.0
end
5 changes: 5 additions & 0 deletions test/t.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ using HypothesisTests: default_tail
@test abs(pvalue(tst; tail=:left) - 0.9735) <= 1e-4
@test abs(pvalue(tst; tail=:right) - 0.0265) <= 1e-4
@test default_tail(tst) == :both
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)

tst = OneSampleTTest(mean(-5:10), std(-5:10), 16)
Expand All @@ -24,6 +25,7 @@ using HypothesisTests: default_tail
c = confint(tst; tail=:right)
@test abs(c[1] - 0.4135) .<= 1e-4
@test c[2] == Inf
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)

tst = OneSampleTTest(-10:5)
Expand All @@ -33,6 +35,7 @@ using HypothesisTests: default_tail
@test all(abs.([confint(tst)...] - [-5.0369, 0.0369]) .<= 1e-4)
@test abs.(confint(tst; tail=:left)[2] - (-0.4135)) .<= 1e-4
@test abs.(confint(tst; tail=:right)[1] - (-4.5865)) .<= 1e-4
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)
end

Expand All @@ -52,6 +55,7 @@ end
@test abs(pvalue(tst) - 0.078) <= 1e-3
@test all(abs.([confint(tst)...] - [-0.0131, 0.2031]) .<= 1e-4)
@test default_tail(tst) == :both
@test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015
show(IOBuffer(), tst)

tst = UnequalVarianceTTest(a1, a2)
Expand All @@ -60,6 +64,7 @@ end
@test abs(pvalue(tst) - 0.091) <= 1e-3
@test all(abs.([confint(tst)...] - [-0.0196, 0.2096]) .<= 1e-4)
@test default_tail(tst) == :both
@test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015
show(IOBuffer(), tst)
end
end
7 changes: 7 additions & 0 deletions test/z.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ null = Normal(0.0, 1.0)
@test pvalue(tst; tail=:left) ≈ cdf(null, z)
@test pvalue(tst; tail=:right) ≈ ccdf(null, z)
@test default_tail(tst) == :both
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)

tst = OneSampleZTest(m, s, n)
Expand All @@ -33,6 +34,7 @@ null = Normal(0.0, 1.0)
@test confint(tst; tail=:left)[2] ≈ m + cquantile(null, 0.05) * se
@test confint(tst; tail=:right)[1] ≈ m + quantile(null, 0.05) * se
@test confint(tst; tail=:right)[2] ≈ Inf
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)

x = -10:5
Expand All @@ -44,6 +46,7 @@ null = Normal(0.0, 1.0)
@test pvalue(tst) ≈ 2 * min(cdf(null, z), ccdf(null, z))
@test pvalue(tst; tail=:left) ≈ cdf(null, z)
@test pvalue(tst; tail=:right) ≈ ccdf(null, z)
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)

tst = OneSampleZTest(m, s, n)
Expand All @@ -56,6 +59,7 @@ null = Normal(0.0, 1.0)
@test confint(tst; tail=:left)[2] ≈ m + cquantile(null, 0.05) * se
@test confint(tst; tail=:right)[1] ≈ m + quantile(null, 0.05) * se
@test confint(tst; tail=:right)[2] ≈ Inf
@test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083
show(IOBuffer(), tst)
end

Expand All @@ -66,6 +70,7 @@ end
z = (m - 0) / se
tst = OneSampleZTest(x, y)
@test pvalue(tst) ≈ 2 * min(cdf(null, z), ccdf(null, z))
@test stderror(tst) ≈ tst.stderr ≈ 0.24494897427831783
end

@testset "Two sample" begin
Expand All @@ -87,6 +92,7 @@ end
@test default_tail(tst) == :both
@test confint(tst)[1] ≈ xbar + quantile(null, 0.05 / 2) * se
@test confint(tst)[2] ≈ xbar + cquantile(null, 0.05 / 2) * se
@test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015
show(IOBuffer(), tst)

tst = UnequalVarianceZTest(a1, a2)
Expand All @@ -98,6 +104,7 @@ end
@test pvalue(tst; tail=:right) ≈ ccdf(null, z)
@test confint(tst)[1] ≈ xbar + quantile(null, 0.05 / 2) * se
@test confint(tst)[2] ≈ xbar + cquantile(null, 0.05 / 2) * se
@test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015
show(IOBuffer(), tst)
end
end