From 13ae82ac6b13cb46ce0aae0ec42f6f5fcd5232aa Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:25:04 -0600 Subject: [PATCH 01/12] Add stderror and meandiff functions for TTest --- src/t.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/t.jl b/src/t.jl index e80eeef3..19dd7870 100644 --- a/src/t.jl +++ b/src/t.jl @@ -48,6 +48,11 @@ function StatsBase.confint(x::TTest; level::Float64=0.95, tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar ## ONE SAMPLE T-TEST From adb6fbe4c5d639cd52eb36513801a6e956e63780 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:30:52 -0600 Subject: [PATCH 02/12] Same for ZTest --- src/z.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/z.jl b/src/z.jl index 32d4f1f7..8c6454cf 100644 --- a/src/z.jl +++ b/src/z.jl @@ -48,6 +48,12 @@ function StatsBase.confint(x::ZTest; level::Float64=0.95, tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar + ## ONE SAMPLE Z-TEST From 3cd11c2b3200032e6528f11d8b11e564bf6133d3 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 3 Jan 2020 19:04:02 -0500 Subject: [PATCH 03/12] test squashing --- src/HypothesisTests.jl | 4 ++-- src/correlation.jl | 1 + src/t.jl | 3 --- src/z.jl | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 4a5cf229..26e9479f 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -29,9 +29,9 @@ using Distributions, Roots, StatsBase using Combinatorics: combinations, permutations using Rmath: pwilcox, psignrank -import StatsBase.confint +import StatsBase.confint, StatsBase.stderror -export testname, pvalue, confint +export testname, pvalue, confint, stderror abstract type HypothesisTest end check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y) diff --git a/src/correlation.jl b/src/correlation.jl index f1dcb2f1..3c89ea3a 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -69,6 +69,7 @@ end default_tail(::CorrelationTest) = :both pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) +StatsBase.stderror(test::CorrelationTest) = sqrt(1/dof(test)) function show_params(io::IO, test::CorrelationTest, indent="") println(io, indent, "number of observations: ", nobs(test)) diff --git a/src/t.jl b/src/t.jl index 19dd7870..0da81d15 100644 --- a/src/t.jl +++ b/src/t.jl @@ -51,9 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - ## ONE SAMPLE T-TEST struct OneSampleTTest <: TTest diff --git a/src/z.jl b/src/z.jl index 8c6454cf..6fe0c776 100644 --- a/src/z.jl +++ b/src/z.jl @@ -51,10 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - - ## ONE SAMPLE Z-TEST struct OneSampleZTest <: ZTest From caf91fe26660722ec23668f044a3f6cb96330004 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 3 Jan 2020 19:17:09 -0500 Subject: [PATCH 04/12] binomial --- src/binomial.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/binomial.jl b/src/binomial.jl index abea8dcf..15f2befd 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -65,6 +65,11 @@ function show_params(io::IO, x::BinomialTest, ident="") end pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail) +function StatsBase.stderror(b::BinomialTest) + n = b.n + phat = b.x / n + sqrt((phat * (1 - phat)) / n) +end # Confidence interval """ From d3993fae15a64007c12a67993cd9c2e32beb8c4b Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 13 Mar 2020 12:38:36 -0400 Subject: [PATCH 05/12] add tests --- src/z.jl | 2 +- test/binomial.jl | 2 ++ test/correlation.jl | 2 ++ test/t.jl | 5 +++++ test/z.jl | 7 +++++++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/z.jl b/src/z.jl index 6fe0c776..1f089ea7 100644 --- a/src/z.jl +++ b/src/z.jl @@ -49,7 +49,7 @@ function StatsBase.confint(x::ZTest; level::Float64=0.95, tail=:both) end # The standard error of the difference -StatsBase.stderror(x::TTest) = x.stderr +StatsBase.stderror(x::ZTest) = x.stderr ## ONE SAMPLE Z-TEST diff --git a/test/binomial.jl b/test/binomial.jl index b0f5197d..902204ea 100644 --- a/test/binomial.jl +++ b/test/binomial.jl @@ -6,6 +6,7 @@ using HypothesisTests: default_tail @test pvalue(t) ≈ 0.004334880883507431 @test pvalue(t, tail=:left) ≈ 0.002167440441753716 @test pvalue(t, tail=:right) ≈ 0.9989844298129187 + @test stderror(t) ≈ 0.05337605126836238 @test default_tail(t) == :both @test_ci_approx confint(t) (0.23058523962930383, 0.4491666887959782) @test_ci_approx confint(t, tail=:left) (0.0, 0.4313047758370174) @@ -32,6 +33,7 @@ using HypothesisTests: default_tail @test pvalue(t) ≈ 0.5078125000000002 @test pvalue(t, tail=:left) ≈ 0.91015625 @test pvalue(t, tail=:right) ≈ 0.2539062500000001 + @test stderror(t) ≈ 0.15713484026367724 @test_ci_approx confint(t) (0.2992950562085405, 0.9251453685803082) @test_ci_approx confint(t, tail=:left) (0.0, 0.9022531865607242) @test_ci_approx confint(t, tail=:right) (0.3449413659437032, 1.0) diff --git a/test/correlation.jl b/test/correlation.jl index 4c27ed2b..8ee31296 100644 --- a/test/correlation.jl +++ b/test/correlation.jl @@ -17,6 +17,7 @@ using StatsBase @test nobs(w) == 37 @test dof(w) == 33 @test pvalue(w) < 0.00001 + @test stderror(w) ≈ 0.17407765595569785 X = [ 2 1 0 4 2 0 @@ -28,4 +29,5 @@ using StatsBase @test nobs(x) == 4 @test dof(x) == 1 @test pvalue(x) ≈ 0.25776212 atol=1e-6 + @test stderror(x) ≈ 1.0 end diff --git a/test/t.jl b/test/t.jl index 1f0ccb4b..37fad3f3 100644 --- a/test/t.jl +++ b/test/t.jl @@ -11,6 +11,7 @@ using HypothesisTests: default_tail @test abs(pvalue(tst; tail=:left) - 0.9735) <= 1e-4 @test abs(pvalue(tst; tail=:right) - 0.0265) <= 1e-4 @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleTTest(mean(-5:10), std(-5:10), 16) @@ -24,6 +25,7 @@ using HypothesisTests: default_tail c = confint(tst; tail=:right) @test abs(c[1] - 0.4135) .<= 1e-4 @test c[2] == Inf + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleTTest(-10:5) @@ -33,6 +35,7 @@ using HypothesisTests: default_tail @test all(abs.([confint(tst)...] - [-5.0369, 0.0369]) .<= 1e-4) @test abs.(confint(tst; tail=:left)[2] - (-0.4135)) .<= 1e-4 @test abs.(confint(tst; tail=:right)[1] - (-4.5865)) .<= 1e-4 + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) end @@ -52,6 +55,7 @@ end @test abs(pvalue(tst) - 0.078) <= 1e-3 @test all(abs.([confint(tst)...] - [-0.0131, 0.2031]) .<= 1e-4) @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) tst = UnequalVarianceTTest(a1, a2) @@ -60,6 +64,7 @@ end @test abs(pvalue(tst) - 0.091) <= 1e-3 @test all(abs.([confint(tst)...] - [-0.0196, 0.2096]) .<= 1e-4) @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) end end diff --git a/test/z.jl b/test/z.jl index faf6eaa8..cb872358 100644 --- a/test/z.jl +++ b/test/z.jl @@ -21,6 +21,7 @@ null = Normal(0.0, 1.0) @test pvalue(tst; tail=:left) ≈ cdf(null, z) @test pvalue(tst; tail=:right) ≈ ccdf(null, z) @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleZTest(m, s, n) @@ -33,6 +34,7 @@ null = Normal(0.0, 1.0) @test confint(tst; tail=:left)[2] ≈ m + cquantile(null, 0.05) * se @test confint(tst; tail=:right)[1] ≈ m + quantile(null, 0.05) * se @test confint(tst; tail=:right)[2] ≈ Inf + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) x = -10:5 @@ -44,6 +46,7 @@ null = Normal(0.0, 1.0) @test pvalue(tst) ≈ 2 * min(cdf(null, z), ccdf(null, z)) @test pvalue(tst; tail=:left) ≈ cdf(null, z) @test pvalue(tst; tail=:right) ≈ ccdf(null, z) + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleZTest(m, s, n) @@ -56,6 +59,7 @@ null = Normal(0.0, 1.0) @test confint(tst; tail=:left)[2] ≈ m + cquantile(null, 0.05) * se @test confint(tst; tail=:right)[1] ≈ m + quantile(null, 0.05) * se @test confint(tst; tail=:right)[2] ≈ Inf + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) end @@ -66,6 +70,7 @@ end z = (m - 0) / se tst = OneSampleZTest(x, y) @test pvalue(tst) ≈ 2 * min(cdf(null, z), ccdf(null, z)) + @test stderror(tst) ≈ tst.stderr ≈ 0.24494897427831783 end @testset "Two sample" begin @@ -87,6 +92,7 @@ end @test default_tail(tst) == :both @test confint(tst)[1] ≈ xbar + quantile(null, 0.05 / 2) * se @test confint(tst)[2] ≈ xbar + cquantile(null, 0.05 / 2) * se + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) tst = UnequalVarianceZTest(a1, a2) @@ -98,6 +104,7 @@ end @test pvalue(tst; tail=:right) ≈ ccdf(null, z) @test confint(tst)[1] ≈ xbar + quantile(null, 0.05 / 2) * se @test confint(tst)[2] ≈ xbar + cquantile(null, 0.05 / 2) * se + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) end end From af6965dc78a7c8acbd5e15fd68770f9e8bb25f56 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Sun, 29 Mar 2020 16:36:33 -0400 Subject: [PATCH 06/12] Add documentation --- src/HypothesisTests.jl | 7 +++++++ src/binomial.jl | 2 +- src/correlation.jl | 2 +- src/t.jl | 10 +++++----- src/z.jl | 10 +++++----- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 26e9479f..15ab7c62 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -68,6 +68,13 @@ If `tail` is `:both` (default), then the p-value for the two-sided test is retur """ function pvalue end +""" + stderror(test::HypothesisTest) + +Compute the standard error for the point estimate of interest for a test. +""" +function stderror end + # Basic function for finding a p-value given a distribution and tail pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) = if tail == :both diff --git a/src/binomial.jl b/src/binomial.jl index 15f2befd..f1bcac6e 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -45,7 +45,7 @@ probability is not equal to `p`. Computed confidence intervals ([`confint`](@ref)) by default are Clopper-Pearson intervals. -Implements: [`pvalue`](@ref), [`confint(::BinomialTest)`](@ref) +Implements: [`pvalue`](@ref), [`confint(::BinomialTest)`](@ref), [`stderror`](@ref) """ BinomialTest(x::AbstractVector{Bool}, p=0.5) = BinomialTest(sum(x), length(x), p) diff --git a/src/correlation.jl b/src/correlation.jl index 3c89ea3a..f61981a5 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -11,7 +11,7 @@ export CorrelationTest Perform a t-test for the hypothesis that ``\\text{Cor}(x,y|Z=z) = 0``, i.e. the partial correlation of vectors `x` and `y` given the matrix `Z` is zero. -Implements `pvalue` for the t-test. +Implements `pvalue` for the t-test, as well as `stderror`. Implements `confint` using an approximate confidence interval based on Fisher's ``z``-transform. diff --git a/src/t.jl b/src/t.jl index 0da81d15..dc6fbc19 100644 --- a/src/t.jl +++ b/src/t.jl @@ -79,7 +79,7 @@ Perform a one sample t-test of the null hypothesis that `n` values with mean `xb sample standard deviation `stddev` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleTTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0) stderr = stddev/sqrt(n) @@ -95,7 +95,7 @@ Perform a one sample t-test of the null hypothesis that the data in vector `v` c a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ OneSampleTTest(v::AbstractVector{T}, μ0::Real=0) where {T<:Real} = OneSampleTTest(mean(v), std(v), length(v), μ0) @@ -106,7 +106,7 @@ Perform a paired sample t-test of the null hypothesis that the differences betwe values in vectors `x` and `y` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real, S<:Real} check_same_length(x, y) @@ -144,7 +144,7 @@ Perform a two-sample t-test of the null hypothesis that `x` and `y` come from di with equal means and variances against the alternative hypothesis that the distributions have different means but equal variances. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function EqualVarianceTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) @@ -186,7 +186,7 @@ equation: \\frac{(k_i s_i^2)^2}{ν_i}} ``` -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function UnequalVarianceTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) diff --git a/src/z.jl b/src/z.jl index 1f089ea7..dc68cfa8 100644 --- a/src/z.jl +++ b/src/z.jl @@ -77,7 +77,7 @@ Perform a one sample z-test of the null hypothesis that `n` values with mean `xb population standard deviation `stddev` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleZTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0) stderr = stddev/sqrt(n) @@ -92,7 +92,7 @@ Perform a one sample z-test of the null hypothesis that the data in vector `v` c a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ OneSampleZTest(v::AbstractVector{T}, μ0::Real=0) where {T<:Real} = OneSampleZTest(mean(v), std(v), length(v), μ0) @@ -103,7 +103,7 @@ Perform a paired sample z-test of the null hypothesis that the differences betwe values in vectors `x` and `y` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real, S<:Real} check_same_length(x, y) @@ -139,7 +139,7 @@ Perform a two-sample z-test of the null hypothesis that `x` and `y` come from di with equal means and variances against the alternative hypothesis that the distributions have different means but equal variances. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function EqualVarianceZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) @@ -171,7 +171,7 @@ Perform an unequal variance two-sample z-test of the null hypothesis that `x` an from distributions with equal means against the alternative hypothesis that the distributions have different means. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function UnequalVarianceZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) From ad0f7d3ce098fc1b376dd852f7d52aefae9f2449 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:25:04 -0600 Subject: [PATCH 07/12] Add stderror and meandiff functions for TTest --- src/t.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/t.jl b/src/t.jl index e80eeef3..19dd7870 100644 --- a/src/t.jl +++ b/src/t.jl @@ -48,6 +48,11 @@ function StatsBase.confint(x::TTest; level::Float64=0.95, tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar ## ONE SAMPLE T-TEST From 2417e94e963848285588bf3daceef0ad5caa63bb Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:30:52 -0600 Subject: [PATCH 08/12] Same for ZTest --- src/z.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/z.jl b/src/z.jl index af3370ef..c48269ca 100644 --- a/src/z.jl +++ b/src/z.jl @@ -48,6 +48,12 @@ function StatsBase.confint(x::ZTest; level::Float64=0.95, tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar + ## ONE SAMPLE Z-TEST From 6a35b167a2ff57ef3c380671f2491da4dfc224e1 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 3 Jan 2020 19:04:02 -0500 Subject: [PATCH 09/12] test squashing --- src/HypothesisTests.jl | 4 ++-- src/correlation.jl | 1 + src/t.jl | 3 --- src/z.jl | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 8939ca00..63696b86 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -29,9 +29,9 @@ using Distributions, Roots, StatsBase using Combinatorics: combinations, permutations using Rmath: pwilcox, psignrank -import StatsBase.confint +import StatsBase.confint, StatsBase.stderror -export testname, pvalue, confint +export testname, pvalue, confint, stderror abstract type HypothesisTest end check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y) diff --git a/src/correlation.jl b/src/correlation.jl index 4bbf3233..d5f804c0 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -82,6 +82,7 @@ end default_tail(::CorrelationTest) = :both pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) +StatsBase.stderror(test::CorrelationTest) = sqrt(1/dof(test)) function show_params(io::IO, test::CorrelationTest, indent="") println(io, indent, "number of observations: ", nobs(test)) diff --git a/src/t.jl b/src/t.jl index 19dd7870..0da81d15 100644 --- a/src/t.jl +++ b/src/t.jl @@ -51,9 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - ## ONE SAMPLE T-TEST struct OneSampleTTest <: TTest diff --git a/src/z.jl b/src/z.jl index c48269ca..039dc036 100644 --- a/src/z.jl +++ b/src/z.jl @@ -51,10 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - - ## ONE SAMPLE Z-TEST struct OneSampleZTest <: ZTest From 81596606310434a778918d332128cfab8ca77ecb Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 3 Jan 2020 19:17:09 -0500 Subject: [PATCH 10/12] binomial --- src/binomial.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/binomial.jl b/src/binomial.jl index abea8dcf..15f2befd 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -65,6 +65,11 @@ function show_params(io::IO, x::BinomialTest, ident="") end pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail) +function StatsBase.stderror(b::BinomialTest) + n = b.n + phat = b.x / n + sqrt((phat * (1 - phat)) / n) +end # Confidence interval """ From dfed179e3498e3c5000d7c7dbccefe4b72ac5ebc Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 13 Mar 2020 12:38:36 -0400 Subject: [PATCH 11/12] add tests --- src/z.jl | 2 +- test/binomial.jl | 2 ++ test/correlation.jl | 2 ++ test/t.jl | 5 +++++ test/z.jl | 7 +++++++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/z.jl b/src/z.jl index 039dc036..567e4201 100644 --- a/src/z.jl +++ b/src/z.jl @@ -49,7 +49,7 @@ function StatsBase.confint(x::ZTest; level::Float64=0.95, tail=:both) end # The standard error of the difference -StatsBase.stderror(x::TTest) = x.stderr +StatsBase.stderror(x::ZTest) = x.stderr ## ONE SAMPLE Z-TEST diff --git a/test/binomial.jl b/test/binomial.jl index b0f5197d..902204ea 100644 --- a/test/binomial.jl +++ b/test/binomial.jl @@ -6,6 +6,7 @@ using HypothesisTests: default_tail @test pvalue(t) ≈ 0.004334880883507431 @test pvalue(t, tail=:left) ≈ 0.002167440441753716 @test pvalue(t, tail=:right) ≈ 0.9989844298129187 + @test stderror(t) ≈ 0.05337605126836238 @test default_tail(t) == :both @test_ci_approx confint(t) (0.23058523962930383, 0.4491666887959782) @test_ci_approx confint(t, tail=:left) (0.0, 0.4313047758370174) @@ -32,6 +33,7 @@ using HypothesisTests: default_tail @test pvalue(t) ≈ 0.5078125000000002 @test pvalue(t, tail=:left) ≈ 0.91015625 @test pvalue(t, tail=:right) ≈ 0.2539062500000001 + @test stderror(t) ≈ 0.15713484026367724 @test_ci_approx confint(t) (0.2992950562085405, 0.9251453685803082) @test_ci_approx confint(t, tail=:left) (0.0, 0.9022531865607242) @test_ci_approx confint(t, tail=:right) (0.3449413659437032, 1.0) diff --git a/test/correlation.jl b/test/correlation.jl index d2686378..d7111741 100644 --- a/test/correlation.jl +++ b/test/correlation.jl @@ -41,6 +41,7 @@ end @test nobs(w) == 37 @test dof(w) == 33 @test pvalue(w) < 0.00001 + @test stderror(w) ≈ 0.17407765595569785 X = [ 2 1 0 4 2 0 @@ -52,4 +53,5 @@ end @test nobs(x) == 4 @test dof(x) == 1 @test pvalue(x) ≈ 0.25776212 atol=1e-6 + @test stderror(x) ≈ 1.0 end diff --git a/test/t.jl b/test/t.jl index 1f0ccb4b..37fad3f3 100644 --- a/test/t.jl +++ b/test/t.jl @@ -11,6 +11,7 @@ using HypothesisTests: default_tail @test abs(pvalue(tst; tail=:left) - 0.9735) <= 1e-4 @test abs(pvalue(tst; tail=:right) - 0.0265) <= 1e-4 @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleTTest(mean(-5:10), std(-5:10), 16) @@ -24,6 +25,7 @@ using HypothesisTests: default_tail c = confint(tst; tail=:right) @test abs(c[1] - 0.4135) .<= 1e-4 @test c[2] == Inf + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleTTest(-10:5) @@ -33,6 +35,7 @@ using HypothesisTests: default_tail @test all(abs.([confint(tst)...] - [-5.0369, 0.0369]) .<= 1e-4) @test abs.(confint(tst; tail=:left)[2] - (-0.4135)) .<= 1e-4 @test abs.(confint(tst; tail=:right)[1] - (-4.5865)) .<= 1e-4 + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) end @@ -52,6 +55,7 @@ end @test abs(pvalue(tst) - 0.078) <= 1e-3 @test all(abs.([confint(tst)...] - [-0.0131, 0.2031]) .<= 1e-4) @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) tst = UnequalVarianceTTest(a1, a2) @@ -60,6 +64,7 @@ end @test abs(pvalue(tst) - 0.091) <= 1e-3 @test all(abs.([confint(tst)...] - [-0.0196, 0.2096]) .<= 1e-4) @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) end end diff --git a/test/z.jl b/test/z.jl index faf6eaa8..cb872358 100644 --- a/test/z.jl +++ b/test/z.jl @@ -21,6 +21,7 @@ null = Normal(0.0, 1.0) @test pvalue(tst; tail=:left) ≈ cdf(null, z) @test pvalue(tst; tail=:right) ≈ ccdf(null, z) @test default_tail(tst) == :both + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleZTest(m, s, n) @@ -33,6 +34,7 @@ null = Normal(0.0, 1.0) @test confint(tst; tail=:left)[2] ≈ m + cquantile(null, 0.05) * se @test confint(tst; tail=:right)[1] ≈ m + quantile(null, 0.05) * se @test confint(tst; tail=:right)[2] ≈ Inf + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) x = -10:5 @@ -44,6 +46,7 @@ null = Normal(0.0, 1.0) @test pvalue(tst) ≈ 2 * min(cdf(null, z), ccdf(null, z)) @test pvalue(tst; tail=:left) ≈ cdf(null, z) @test pvalue(tst; tail=:right) ≈ ccdf(null, z) + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) tst = OneSampleZTest(m, s, n) @@ -56,6 +59,7 @@ null = Normal(0.0, 1.0) @test confint(tst; tail=:left)[2] ≈ m + cquantile(null, 0.05) * se @test confint(tst; tail=:right)[1] ≈ m + quantile(null, 0.05) * se @test confint(tst; tail=:right)[2] ≈ Inf + @test stderror(tst) ≈ tst.stderr ≈ 1.1902380714238083 show(IOBuffer(), tst) end @@ -66,6 +70,7 @@ end z = (m - 0) / se tst = OneSampleZTest(x, y) @test pvalue(tst) ≈ 2 * min(cdf(null, z), ccdf(null, z)) + @test stderror(tst) ≈ tst.stderr ≈ 0.24494897427831783 end @testset "Two sample" begin @@ -87,6 +92,7 @@ end @test default_tail(tst) == :both @test confint(tst)[1] ≈ xbar + quantile(null, 0.05 / 2) * se @test confint(tst)[2] ≈ xbar + cquantile(null, 0.05 / 2) * se + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) tst = UnequalVarianceZTest(a1, a2) @@ -98,6 +104,7 @@ end @test pvalue(tst; tail=:right) ≈ ccdf(null, z) @test confint(tst)[1] ≈ xbar + quantile(null, 0.05 / 2) * se @test confint(tst)[2] ≈ xbar + cquantile(null, 0.05 / 2) * se + @test stderror(tst) ≈ tst.stderr ≈ 0.048493985881413015 show(IOBuffer(), tst) end end From 7b29eb262f16257b1758b28b1f2aa365224070ac Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Sun, 29 Mar 2020 16:36:33 -0400 Subject: [PATCH 12/12] Add documentation --- src/HypothesisTests.jl | 7 +++++++ src/binomial.jl | 2 +- src/correlation.jl | 2 +- src/t.jl | 10 +++++----- src/z.jl | 10 +++++----- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 63696b86..5c0f6094 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -68,6 +68,13 @@ If `tail` is `:both` (default), then the p-value for the two-sided test is retur """ function pvalue end +""" + stderror(test::HypothesisTest) + +Compute the standard error for the point estimate of interest for a test. +""" +function stderror end + # Basic function for finding a p-value given a distribution and tail pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) = if tail == :both diff --git a/src/binomial.jl b/src/binomial.jl index 15f2befd..f1bcac6e 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -45,7 +45,7 @@ probability is not equal to `p`. Computed confidence intervals ([`confint`](@ref)) by default are Clopper-Pearson intervals. -Implements: [`pvalue`](@ref), [`confint(::BinomialTest)`](@ref) +Implements: [`pvalue`](@ref), [`confint(::BinomialTest)`](@ref), [`stderror`](@ref) """ BinomialTest(x::AbstractVector{Bool}, p=0.5) = BinomialTest(sum(x), length(x), p) diff --git a/src/correlation.jl b/src/correlation.jl index d5f804c0..a1d68c9e 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -16,7 +16,7 @@ of vectors `x` and `y` is zero. Perform a t-test for the hypothesis that ``\\text{Cor}(x,y|Z=z) = 0``, i.e. the partial correlation of vectors `x` and `y` given the matrix `Z` is zero. -Implements `pvalue` for the t-test. +Implements `pvalue` for the t-test, as well as `stderror`. Implements `confint` using an approximate confidence interval based on Fisher's ``z``-transform. diff --git a/src/t.jl b/src/t.jl index 0da81d15..dc6fbc19 100644 --- a/src/t.jl +++ b/src/t.jl @@ -79,7 +79,7 @@ Perform a one sample t-test of the null hypothesis that `n` values with mean `xb sample standard deviation `stddev` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleTTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0) stderr = stddev/sqrt(n) @@ -95,7 +95,7 @@ Perform a one sample t-test of the null hypothesis that the data in vector `v` c a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ OneSampleTTest(v::AbstractVector{T}, μ0::Real=0) where {T<:Real} = OneSampleTTest(mean(v), std(v), length(v), μ0) @@ -106,7 +106,7 @@ Perform a paired sample t-test of the null hypothesis that the differences betwe values in vectors `x` and `y` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real, S<:Real} check_same_length(x, y) @@ -144,7 +144,7 @@ Perform a two-sample t-test of the null hypothesis that `x` and `y` come from di with equal means and variances against the alternative hypothesis that the distributions have different means but equal variances. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function EqualVarianceTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) @@ -186,7 +186,7 @@ equation: \\frac{(k_i s_i^2)^2}{ν_i}} ``` -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function UnequalVarianceTTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) diff --git a/src/z.jl b/src/z.jl index 567e4201..ad719b04 100644 --- a/src/z.jl +++ b/src/z.jl @@ -77,7 +77,7 @@ Perform a one sample z-test of the null hypothesis that `n` values with mean `xb population standard deviation `stddev` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleZTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0) stderr = stddev/sqrt(n) @@ -92,7 +92,7 @@ Perform a one sample z-test of the null hypothesis that the data in vector `v` c a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ OneSampleZTest(v::AbstractVector{T}, μ0::Real=0) where {T<:Real} = OneSampleZTest(mean(v), std(v), length(v), μ0) @@ -103,7 +103,7 @@ Perform a paired sample z-test of the null hypothesis that the differences betwe values in vectors `x` and `y` come from a distribution with mean `μ0` against the alternative hypothesis that the distribution does not have mean `μ0`. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function OneSampleZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real, S<:Real} check_same_length(x, y) @@ -139,7 +139,7 @@ Perform a two-sample z-test of the null hypothesis that `x` and `y` come from di with equal means and variances against the alternative hypothesis that the distributions have different means but equal variances. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function EqualVarianceZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y) @@ -171,7 +171,7 @@ Perform an unequal variance two-sample z-test of the null hypothesis that `x` an from distributions with equal means against the alternative hypothesis that the distributions have different means. -Implements: [`pvalue`](@ref), [`confint`](@ref) +Implements: [`pvalue`](@ref), [`confint`](@ref), [`stderror`](@ref) """ function UnequalVarianceZTest(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) where {T<:Real,S<:Real} nx, ny = length(x), length(y)