From 61300fce00cdb66821e57095e704353b23557014 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:25:04 -0600 Subject: [PATCH 01/23] Add stderror and meandiff functions for TTest --- src/t.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/t.jl b/src/t.jl index 2cb7c251..c10dac31 100644 --- a/src/t.jl +++ b/src/t.jl @@ -48,6 +48,11 @@ function StatsBase.confint(x::TTest, alpha::Float64=0.05; tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar ## ONE SAMPLE T-TEST From 39056fe24b4419714e1d91f9c55e35bb9ed9572d Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:30:52 -0600 Subject: [PATCH 02/23] Same for ZTest --- src/z.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/z.jl b/src/z.jl index d30e2b64..18915a81 100644 --- a/src/z.jl +++ b/src/z.jl @@ -48,6 +48,12 @@ function StatsBase.confint(x::ZTest, alpha::Float64=0.05; tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar + ## ONE SAMPLE Z-TEST From fdb3fe2097d2106b0bb9a1ca36d32236585d298c Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Thu, 21 Feb 2019 16:24:18 -0800 Subject: [PATCH 03/23] Add Barlett's test (#147) --- docs/src/multivariate.md | 9 ++++++ src/HypothesisTests.jl | 1 + src/bartlett.jl | 60 ++++++++++++++++++++++++++++++++++++++++ test/bartlett.jl | 18 ++++++++++++ test/runtests.jl | 1 + 5 files changed, 89 insertions(+) create mode 100644 src/bartlett.jl create mode 100644 test/bartlett.jl diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md index ce7a0a25..cc8e670e 100644 --- a/docs/src/multivariate.md +++ b/docs/src/multivariate.md @@ -7,3 +7,12 @@ OneSampleHotellingT2 EqualCovHotellingT2 UnequalCovHotellingT2 ``` + +## Equality of covariance matrices + +Bartlett's test for equality of two covariance matrices is provided. +This is equivalent to Box's ``M``-test for two groups. + +```@docs +BartlettsTest +``` diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 1e1d4559..6dac75e1 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -178,4 +178,5 @@ include("jarque_bera.jl") include("durbin_watson.jl") include("permutation.jl") include("hotelling.jl") +include("bartlett.jl") end diff --git a/src/bartlett.jl b/src/bartlett.jl new file mode 100644 index 00000000..7c8a332b --- /dev/null +++ b/src/bartlett.jl @@ -0,0 +1,60 @@ +# Tests for equality of covariance matrices + +export BartlettsTest + +abstract type CovarianceEqualityTest <: HypothesisTest end + +population_param_of_interest(::CovarianceEqualityTest) = + ("Equality of covariance matrices", NaN, NaN) + +## Utility functions + +# Finite population correction factor +@inline _correction(p::Int, nxm1::Int, nym1::Int) = + 1 - ((2p^2 + 3p - 1) * (inv(nxm1) + inv(nym1) - inv(nxm1 + nym1)) / 6(p + 1)) + +## Bartlett's test + +struct BartlettsTest <: CovarianceEqualityTest + L′::Real + p::Int + nx::Int + ny::Int +end + +""" + BartlettsTest(X::AbstractMatrix, Y::AbstractMatrix) + +Perform Bartlett's test of the hypothesis that the covariance matrices of `X` and `Y` +are equal. + +!!! note + Bartlett's test is sensitive to departures from multivariate normality. +""" +function BartlettsTest(X::AbstractMatrix, Y::AbstractMatrix) + nx, p = size(X) + ny, q = size(Y) + p == q || throw(DimensionMismatch("Inconsistent number of variables")) + a = nx - 1 + b = ny - 1 + Sx = cov(X) + Sy = cov(Y) + L′ = -a * logdet(Sx) - b * logdet(Sy) + L′ += (a + b) * logdet(poolcov!(Sx, a, Sy, b)) + L′ *= _correction(p, a, b) + return BartlettsTest(L′, p, nx, ny) +end + +StatsBase.nobs(B::BartlettsTest) = (B.nx, B.ny) +StatsBase.dof(B::BartlettsTest) = div(B.p * (B.p + 1), 2) + +testname(::BartlettsTest) = "Bartlett's Test for Equality of Covariance Matrices" +default_tail(::BartlettsTest) = :right +pvalue(B::BartlettsTest; tail=:right) = pvalue(Chisq(dof(B)), B.L′, tail=tail) + +function show_params(io::IO, B::BartlettsTest, indent="") + println(io, indent, "number of observations: ", nobs(B)) + println(io, indent, "number of variables: ", B.p) + println(io, indent, "χ² statistic: ", B.L′) + println(io, indent, "degrees of freedom: ", dof(B)) +end diff --git a/test/bartlett.jl b/test/bartlett.jl new file mode 100644 index 00000000..396bae0e --- /dev/null +++ b/test/bartlett.jl @@ -0,0 +1,18 @@ +using HypothesisTests +using Test +using StatsBase +using DelimitedFiles + +@testset "Bartlett's test" begin + # Columns are type, length, left, right, bottom, top, diag + swiss = readdlm(joinpath(@__DIR__, "data", "swiss3.txt")) + genuine = convert(Matrix{Float64}, swiss[view(swiss, :, 1) .== "real", 2:end]) + counterfeit = convert(Matrix{Float64}, swiss[view(swiss, :, 1) .== "fake", 2:end]) + + b = BartlettsTest(genuine, counterfeit) + @test nobs(b) == (100, 100) + @test dof(b) == 21 + @test pvalue(b) ≈ 0.0 atol=1e-10 + @test b.L′ ≈ 121.8991235 atol=1e-6 + @test occursin("reject h_0", sprint(show, b)) +end diff --git a/test/runtests.jl b/test/runtests.jl index 2c6ecbd9..dd216d10 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -32,3 +32,4 @@ include("t.jl") include("wilcoxon.jl") include("z.jl") include("hotelling.jl") +include("bartlett.jl") From 3eecd2bbc06a54a47af2b9bc8df4c5868e4368bb Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Fri, 17 May 2019 10:11:14 +0200 Subject: [PATCH 04/23] Add sections for chi-squared and likelihood ratio tests in docs This makes them much easier to find in the homepage, as users do not necessarily know that these are special cases of the power divergence test. --- docs/src/parametric.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/parametric.md b/docs/src/parametric.md index 8ebb9a15..ed9593f4 100644 --- a/docs/src/parametric.md +++ b/docs/src/parametric.md @@ -5,12 +5,12 @@ PowerDivergenceTest ``` -### Pearson chi-squared test +## Pearson chi-squared test ```@docs ChisqTest ``` -### Multinomial likelihood ratio test +## Multinomial likelihood ratio test ```@docs MultinomialLRT ``` From e79096ba576e650f4c75ca5b088ccac111ec272a Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Fri, 17 May 2019 10:27:39 +0200 Subject: [PATCH 05/23] Rename some tests for consistency MultinomialLRT and the three *HotellingT2 were the only tests whose names did not end with "Test". --- docs/src/multivariate.md | 6 ++-- docs/src/parametric.md | 2 +- src/deprecated.jl | 5 ++++ src/hotelling.jl | 64 ++++++++++++++++++++-------------------- src/power_divergence.jl | 14 ++++----- test/hotelling.jl | 8 ++--- test/power_divergence.jl | 10 +++---- 7 files changed, 57 insertions(+), 52 deletions(-) diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md index cc8e670e..7a8dd702 100644 --- a/docs/src/multivariate.md +++ b/docs/src/multivariate.md @@ -3,9 +3,9 @@ ## Hotelling's ``T^2`` test ```@docs -OneSampleHotellingT2 -EqualCovHotellingT2 -UnequalCovHotellingT2 +OneSampleHotellingT2Test +EqualCovHotellingT2Test +UnequalCovHotellingT2Test ``` ## Equality of covariance matrices diff --git a/docs/src/parametric.md b/docs/src/parametric.md index ed9593f4..10f1452f 100644 --- a/docs/src/parametric.md +++ b/docs/src/parametric.md @@ -12,7 +12,7 @@ ChisqTest ## Multinomial likelihood ratio test ```@docs -MultinomialLRT +MultinomialLRTest ``` ## t-test diff --git a/src/deprecated.jl b/src/deprecated.jl index a44bc037..adc0a975 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,3 +1,8 @@ using Base: @deprecate @deprecate ci(args...) confint(args...) + +@deprecate MultinomialLRT MultinomialLRTest +@deprecate OneSampleHotellingT2 OneSampleHotellingT2Test +@deprecate EqualCovHotellingT2 EqualCovHotellingT2Test +@deprecate UnequalCovHotellingT2 UnequalCovHotellingT2Test \ No newline at end of file diff --git a/src/hotelling.jl b/src/hotelling.jl index e2ea2f12..47635664 100644 --- a/src/hotelling.jl +++ b/src/hotelling.jl @@ -1,13 +1,13 @@ # Hotelling T² test -export OneSampleHotellingT2, EqualCovHotellingT2, UnequalCovHotellingT2 +export OneSampleHotellingT2Test, EqualCovHotellingT2Test, UnequalCovHotellingT2Test -abstract type HotellingT2Test <: HypothesisTest end +abstract type HotellingT2TestTest <: HypothesisTest end -default_tail(::HotellingT2Test) = :right -pvalue(T::HotellingT2Test; tail=:right) = pvalue(FDist(dof(T)...), T.F, tail=tail) +default_tail(::HotellingT2TestTest) = :right +pvalue(T::HotellingT2TestTest; tail=:right) = pvalue(FDist(dof(T)...), T.F, tail=tail) -function show_params(io::IO, T::HotellingT2Test, indent="") +function show_params(io::IO, T::HotellingT2TestTest, indent="") println(io, indent, "number of observations: ", nobs(T)) println(io, indent, "number of variables: ", T.p) println(io, indent, "T² statistic: ", T.T²) @@ -32,7 +32,7 @@ At_Binv_A(A::AbstractArray, B::AbstractArray) = A'*(B \ A) ## One sample -struct OneSampleHotellingT2 <: HotellingT2Test +struct OneSampleHotellingT2Test <: HotellingT2TestTest T²::Real F::Real n::Int @@ -42,16 +42,16 @@ struct OneSampleHotellingT2 <: HotellingT2Test S::Matrix end -StatsBase.nobs(T::OneSampleHotellingT2) = T.n -StatsBase.dof(T::OneSampleHotellingT2) = (T.p, T.n - T.p) +StatsBase.nobs(T::OneSampleHotellingT2Test) = T.n +StatsBase.dof(T::OneSampleHotellingT2Test) = (T.p, T.n - T.p) """ - OneSampleHotellingT2(X::AbstractMatrix, μ₀=) + OneSampleHotellingT2Test(X::AbstractMatrix, μ₀=) Perform a one sample Hotelling's ``T^2`` test of the hypothesis that the vector of column means of `X` is equal to `μ₀`. """ -function OneSampleHotellingT2(X::AbstractMatrix{T}, +function OneSampleHotellingT2Test(X::AbstractMatrix{T}, μ₀::AbstractVector=fill(middle(zero(T)), size(X, 2))) where T n, p = size(X) p == length(μ₀) || @@ -61,32 +61,32 @@ function OneSampleHotellingT2(X::AbstractMatrix{T}, S = cov(X) T² = n * At_Binv_A(x̄ .- μ₀, S) F = (n - p) * T² / (p * (n - 1)) - return OneSampleHotellingT2(T², F, n, p, μ₀, x̄, S) + return OneSampleHotellingT2Test(T², F, n, p, μ₀, x̄, S) end """ - OneSampleHotellingT2(X::AbstractMatrix, Y::AbstractMatrix, μ₀=) + OneSampleHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix, μ₀=) Perform a paired Hotelling's ``T^2`` test of the hypothesis that the vector of mean column differences between `X` and `Y` is equal to `μ₀`. """ -function OneSampleHotellingT2(X::AbstractMatrix{T}, Y::AbstractMatrix{S}, +function OneSampleHotellingT2Test(X::AbstractMatrix{T}, Y::AbstractMatrix{S}, μ₀::AbstractVector=fill(middle(zero(T), zero(S)), size(X, 2))) where {T,S} p, nx, ny = checkdims(X, Y) nx == ny || throw(DimensionMismatch("Inconsistent number of observations: $nx, $ny")) p == length(μ₀) || throw(DimensionMismatch("Number of variables does not match number of means")) - return OneSampleHotellingT2(X .- Y, μ₀) + return OneSampleHotellingT2Test(X .- Y, μ₀) end -testname(::OneSampleHotellingT2) = "One sample Hotelling's T² test" -population_param_of_interest(T::OneSampleHotellingT2) = ("Mean vector", T.μ₀, T.x̄) +testname(::OneSampleHotellingT2Test) = "One sample Hotelling's T² test" +population_param_of_interest(T::OneSampleHotellingT2Test) = ("Mean vector", T.μ₀, T.x̄) ## Two sample ## Two sample: equal covariance -struct EqualCovHotellingT2 <: HotellingT2Test +struct EqualCovHotellingT2Test <: HotellingT2TestTest T²::Real F::Real nx::Int @@ -97,34 +97,34 @@ struct EqualCovHotellingT2 <: HotellingT2Test end """ - EqualCovHotellingT2(X::AbstractMatrix, Y::AbstractMatrix) + EqualCovHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix) Perform a two sample Hotelling's ``T^2`` test of the hypothesis that the difference in the mean vectors of `X` and `Y` is zero, assuming that `X` and `Y` have equal covariance matrices. """ -function EqualCovHotellingT2(X::AbstractMatrix, Y::AbstractMatrix) +function EqualCovHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix) p, nx, ny = checkdims(X, Y) Δ = vec(mean(X, dims=1) .- mean(Y, dims=1)) S = poolcov!(cov(X), nx - 1, cov(Y), ny - 1) .* (inv(nx) .+ inv(ny)) T² = At_Binv_A(Δ, S) F = T² * (nx + ny - p - 1) / (p * (nx + ny - 2)) - return EqualCovHotellingT2(T², F, nx, ny, p, Δ, S) + return EqualCovHotellingT2Test(T², F, nx, ny, p, Δ, S) end -StatsBase.nobs(T::EqualCovHotellingT2) = (T.nx, T.ny) -StatsBase.dof(T::EqualCovHotellingT2) = (T.p, T.nx + T.ny - T.p - 1) +StatsBase.nobs(T::EqualCovHotellingT2Test) = (T.nx, T.ny) +StatsBase.dof(T::EqualCovHotellingT2Test) = (T.p, T.nx + T.ny - T.p - 1) -testname(::EqualCovHotellingT2) = +testname(::EqualCovHotellingT2Test) = "Two sample Hotelling's T² test (equal covariance matrices)" -population_param_of_interest(T::EqualCovHotellingT2) = +population_param_of_interest(T::EqualCovHotellingT2Test) = ("Difference in mean vectors", zeros(eltype(T.Δ), T.p), T.Δ) ## Two sample: unequal covariance # Store the denominator degrees of freedom in the type, since the computation # is expensive and we don't want to redo it every time the user calls dof -struct UnequalCovHotellingT2 <: HotellingT2Test +struct UnequalCovHotellingT2Test <: HotellingT2TestTest T²::Real F::Real nx::Int @@ -136,13 +136,13 @@ struct UnequalCovHotellingT2 <: HotellingT2Test end """ - UnequalCovHotellingT2(X::AbstractMatrix, Y::AbstractMatrix) + UnequalCovHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix) Perform a two sample Hotelling's ``T^2`` test of the hypothesis that the difference in the mean vectors of `X` and `Y` is zero, without assuming that `X` and `Y` have equal covariance matrices. """ -function UnequalCovHotellingT2(X::AbstractMatrix, Y::AbstractMatrix) +function UnequalCovHotellingT2Test(X::AbstractMatrix, Y::AbstractMatrix) p, nx, ny = checkdims(X, Y) Δ = vec(mean(X, dims=1) .- mean(Y, dims=1)) Sx = cov(X) ./ nx @@ -153,13 +153,13 @@ function UnequalCovHotellingT2(X::AbstractMatrix, Y::AbstractMatrix) tmp = Symmetric(ST) \ Δ iν = (dot(tmp, Sx * tmp) / T²)^2 / (nx - 1) + (dot(tmp, Sy * tmp) / T²)^2 / (ny - 1) ν = trunc(Int, inv(iν)) - return UnequalCovHotellingT2(T², F, nx, ny, p, ν, Δ, ST) + return UnequalCovHotellingT2Test(T², F, nx, ny, p, ν, Δ, ST) end -StatsBase.nobs(T::UnequalCovHotellingT2) = (T.nx, T.ny) -StatsBase.dof(T::UnequalCovHotellingT2) = (T.p, T.ν) +StatsBase.nobs(T::UnequalCovHotellingT2Test) = (T.nx, T.ny) +StatsBase.dof(T::UnequalCovHotellingT2Test) = (T.p, T.ν) -testname(::UnequalCovHotellingT2) = +testname(::UnequalCovHotellingT2Test) = "Two sample Hotelling's T² test (unequal covariance matrices)" -population_param_of_interest(T::UnequalCovHotellingT2) = +population_param_of_interest(T::UnequalCovHotellingT2Test) = ("Difference in mean vectors", zeros(eltype(T.Δ), T.p), T.Δ) diff --git a/src/power_divergence.jl b/src/power_divergence.jl index 94e8be11..030081f0 100644 --- a/src/power_divergence.jl +++ b/src/power_divergence.jl @@ -1,4 +1,4 @@ -export PowerDivergenceTest, ChisqTest, MultinomialLRT +export PowerDivergenceTest, ChisqTest, MultinomialLRTest const Levels{T} = Tuple{UnitRange{T},UnitRange{T}} @@ -380,9 +380,9 @@ end ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} = PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0) -#MultinomialLRT +#MultinomialLRTest """ - MultinomialLRT(x[, y][, theta0 = ones(length(x))/length(x)]) + MultinomialLRTest(x[, y][, theta0 = ones(length(x))/length(x)]) Perform a multinomial likelihood ratio test (equivalent to a [`PowerDivergenceTest`](@ref) with ``λ = 0``). @@ -403,21 +403,21 @@ Note that the entries of `x` (and `y` if provided) must be non-negative integers Implements: [`pvalue`](@ref), [`confint`](@ref) """ -function MultinomialLRT(x::AbstractMatrix{T}) where T<:Integer +function MultinomialLRTest(x::AbstractMatrix{T}) where T<:Integer PowerDivergenceTest(x, lambda=0.0) end -function MultinomialLRT(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}) where T<:Integer +function MultinomialLRTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}) where T<:Integer d = counts(x, y, levels) PowerDivergenceTest(d, lambda=0.0) end -function MultinomialLRT(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where T<:Integer +function MultinomialLRTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where T<:Integer d = counts(x, y, k) PowerDivergenceTest(d, lambda=0.0) end -MultinomialLRT(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} = +MultinomialLRTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} = PowerDivergenceTest(reshape(x, length(x), 1), lambda=0.0, theta0=theta0) function show_params(io::IO, x::PowerDivergenceTest, ident="") diff --git a/test/hotelling.jl b/test/hotelling.jl index 3878d586..b6f59660 100644 --- a/test/hotelling.jl +++ b/test/hotelling.jl @@ -40,7 +40,7 @@ end # Columns are calcium, iron, protein, vitamin A, vitamin C nutrient = readdlm(joinpath(@__DIR__, "data", "nutrient.txt"))[:,2:end] - t = OneSampleHotellingT2(nutrient, [1000, 15, 60, 800, 75]) + t = OneSampleHotellingT2Test(nutrient, [1000, 15, 60, 800, 75]) @test nobs(t) == 737 @test dof(t) == (5, 732) @test pvalue(t) ≈ 0.0 atol=eps() @@ -54,7 +54,7 @@ end spouse = readdlm(joinpath(@__DIR__, "data", "spouse.txt")) # Paired - p = OneSampleHotellingT2(spouse[:,1:4], spouse[:,5:end]) + p = OneSampleHotellingT2Test(spouse[:,1:4], spouse[:,5:end]) @test nobs(p) == 30 @test dof(p) == (4, 26) @test pvalue(p) ≈ 0.039369144 atol=1e-6 @@ -71,7 +71,7 @@ end genuine = convert(Matrix{Float64}, swiss[view(swiss, :, 1) .== "real", 2:end]) counterfeit = convert(Matrix{Float64}, swiss[view(swiss, :, 1) .== "fake", 2:end]) - eq = EqualCovHotellingT2(genuine, counterfeit) + eq = EqualCovHotellingT2Test(genuine, counterfeit) @test nobs(eq) == (100, 100) @test dof(eq) == (6, 193) @test pvalue(eq) ≈ 0.0 atol=eps() @@ -79,7 +79,7 @@ end @test eq.F ≈ 391.9217023 atol=1e-6 @test occursin("reject h_0", sprint(show, eq)) - un = UnequalCovHotellingT2(genuine, counterfeit) + un = UnequalCovHotellingT2Test(genuine, counterfeit) @test nobs(un) == (100, 100) @test dof(un) == (6, 193) @test pvalue(un) ≈ 0.0 atol=eps() diff --git a/test/power_divergence.jl b/test/power_divergence.jl index 83107a21..ae09f273 100644 --- a/test/power_divergence.jl +++ b/test/power_divergence.jl @@ -60,7 +60,7 @@ pvalue(m) show(IOBuffer(), m) m = ChisqTest(d) -m = MultinomialLRT(d) +m = MultinomialLRTest(d) confint(m, method = :bootstrap) confint(m, method = :bootstrap, tail=:left) @@ -136,7 +136,7 @@ pvalue(m) show(IOBuffer(), m) m = ChisqTest(d) -m = MultinomialLRT(d) +m = MultinomialLRTest(d) confint(m, method = :bootstrap) confint(m, method = :bootstrap, tail=:left) @@ -165,7 +165,7 @@ y=[1,1,1,2,2,3] d = counts(x,y,3) ChisqTest(d) -MultinomialLRT(d) +MultinomialLRTest(d) PowerDivergenceTest(x,y,3) PowerDivergenceTest(x,y,(1:3,1:3)) @@ -173,8 +173,8 @@ PowerDivergenceTest(x,y,(1:3,1:3)) ChisqTest(x,y,3) ChisqTest(x,y,(1:3,1:3)) -MultinomialLRT(x,y,3) -MultinomialLRT(x,y,(1:3,1:3)) +MultinomialLRTest(x,y,3) +MultinomialLRTest(x,y,(1:3,1:3)) # Test that large counts don't cause overflow (issue #43) d = [113997 1031298 From 093a7ebcd57e232a3bd85c678cfb4b4e8a4e01e0 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Mon, 27 May 2019 18:03:50 +0200 Subject: [PATCH 06/23] Allow Travis failures on Julia nightly (#159) The failure currently blocks deploying Documenter manual. --- .travis.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.travis.yml b/.travis.yml index 18249494..1397cc88 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,30 @@ # Documentation: http://docs.travis-ci.com/user/languages/julia/ language: julia + os: - linux + julia: - 0.7 - 1.0 - 1.1 - nightly + +matrix: + allow_failures: + - julia: nightly + notifications: email: false + # uncomment the following lines to override the default test script #script: # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi # - julia -e 'Pkg.clone(pwd()); Pkg.build("HypothesisTests"); Pkg.test("HypothesisTests"; coverage=true)' + after_success: - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'; + jobs: include: - stage: "Documentation" From d32b38d664141ac5c00b6fed35da0799900b91cf Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Thu, 20 Jun 2019 19:18:52 +0200 Subject: [PATCH 07/23] Rename BartlettsTest to BartlettTest (#160) For consistency with other tests. --- docs/src/multivariate.md | 2 +- src/bartlett.jl | 22 +++++++++++----------- src/deprecated.jl | 4 +++- test/bartlett.jl | 2 +- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md index 7a8dd702..4b587a56 100644 --- a/docs/src/multivariate.md +++ b/docs/src/multivariate.md @@ -14,5 +14,5 @@ Bartlett's test for equality of two covariance matrices is provided. This is equivalent to Box's ``M``-test for two groups. ```@docs -BartlettsTest +BartlettTest ``` diff --git a/src/bartlett.jl b/src/bartlett.jl index 7c8a332b..944f70d8 100644 --- a/src/bartlett.jl +++ b/src/bartlett.jl @@ -1,6 +1,6 @@ # Tests for equality of covariance matrices -export BartlettsTest +export BartlettTest abstract type CovarianceEqualityTest <: HypothesisTest end @@ -15,7 +15,7 @@ population_param_of_interest(::CovarianceEqualityTest) = ## Bartlett's test -struct BartlettsTest <: CovarianceEqualityTest +struct BartlettTest <: CovarianceEqualityTest L′::Real p::Int nx::Int @@ -23,7 +23,7 @@ struct BartlettsTest <: CovarianceEqualityTest end """ - BartlettsTest(X::AbstractMatrix, Y::AbstractMatrix) + BartlettTest(X::AbstractMatrix, Y::AbstractMatrix) Perform Bartlett's test of the hypothesis that the covariance matrices of `X` and `Y` are equal. @@ -31,7 +31,7 @@ are equal. !!! note Bartlett's test is sensitive to departures from multivariate normality. """ -function BartlettsTest(X::AbstractMatrix, Y::AbstractMatrix) +function BartlettTest(X::AbstractMatrix, Y::AbstractMatrix) nx, p = size(X) ny, q = size(Y) p == q || throw(DimensionMismatch("Inconsistent number of variables")) @@ -42,17 +42,17 @@ function BartlettsTest(X::AbstractMatrix, Y::AbstractMatrix) L′ = -a * logdet(Sx) - b * logdet(Sy) L′ += (a + b) * logdet(poolcov!(Sx, a, Sy, b)) L′ *= _correction(p, a, b) - return BartlettsTest(L′, p, nx, ny) + return BartlettTest(L′, p, nx, ny) end -StatsBase.nobs(B::BartlettsTest) = (B.nx, B.ny) -StatsBase.dof(B::BartlettsTest) = div(B.p * (B.p + 1), 2) +StatsBase.nobs(B::BartlettTest) = (B.nx, B.ny) +StatsBase.dof(B::BartlettTest) = div(B.p * (B.p + 1), 2) -testname(::BartlettsTest) = "Bartlett's Test for Equality of Covariance Matrices" -default_tail(::BartlettsTest) = :right -pvalue(B::BartlettsTest; tail=:right) = pvalue(Chisq(dof(B)), B.L′, tail=tail) +testname(::BartlettTest) = "Bartlett's Test for Equality of Covariance Matrices" +default_tail(::BartlettTest) = :right +pvalue(B::BartlettTest; tail=:right) = pvalue(Chisq(dof(B)), B.L′, tail=tail) -function show_params(io::IO, B::BartlettsTest, indent="") +function show_params(io::IO, B::BartlettTest, indent="") println(io, indent, "number of observations: ", nobs(B)) println(io, indent, "number of variables: ", B.p) println(io, indent, "χ² statistic: ", B.L′) diff --git a/src/deprecated.jl b/src/deprecated.jl index adc0a975..24a0934d 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -5,4 +5,6 @@ using Base: @deprecate @deprecate MultinomialLRT MultinomialLRTest @deprecate OneSampleHotellingT2 OneSampleHotellingT2Test @deprecate EqualCovHotellingT2 EqualCovHotellingT2Test -@deprecate UnequalCovHotellingT2 UnequalCovHotellingT2Test \ No newline at end of file +@deprecate UnequalCovHotellingT2 UnequalCovHotellingT2Test + +@deprecate BartlettsTest BartlettTest \ No newline at end of file diff --git a/test/bartlett.jl b/test/bartlett.jl index 396bae0e..9eaefd89 100644 --- a/test/bartlett.jl +++ b/test/bartlett.jl @@ -9,7 +9,7 @@ using DelimitedFiles genuine = convert(Matrix{Float64}, swiss[view(swiss, :, 1) .== "real", 2:end]) counterfeit = convert(Matrix{Float64}, swiss[view(swiss, :, 1) .== "fake", 2:end]) - b = BartlettsTest(genuine, counterfeit) + b = BartlettTest(genuine, counterfeit) @test nobs(b) == (100, 100) @test dof(b) == 21 @test pvalue(b) ≈ 0.0 atol=1e-10 From e8760bb6bcf8adeffdae9120001494b4145f5a13 Mon Sep 17 00:00:00 2001 From: Rory Finnegan Date: Thu, 25 Jul 2019 16:15:03 -0400 Subject: [PATCH 08/23] Fix show methods to respect IOContext settings. (#161) --- src/HypothesisTests.jl | 20 ++++++++++++++------ src/augmented_dickey_fuller.jl | 4 +++- src/kruskal_wallis.jl | 8 ++++++-- src/mann_whitney.jl | 8 ++++++-- src/permutation.jl | 4 +++- src/power_divergence.jl | 8 ++++++-- src/wilcoxon.jl | 8 ++++++-- test/show.jl | 4 ++-- 8 files changed, 46 insertions(+), 18 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 6dac75e1..e83b5b33 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -107,11 +107,18 @@ function Base.show(io::IO, test::T) where T<:HypothesisTest (param_name, param_under_h0, param_estimate) = population_param_of_interest(test) println(io, "Population details:") println(io, " parameter of interest: $param_name") - println(io, " value under h_0: $param_under_h0") - println(io, " point estimate: $param_estimate") + print(io, " value under h_0: ") + show(io, param_under_h0) + println(io) + print(io, " point estimate: ") + show(io, param_estimate) + println(io) + if has_ci ci = map(x -> round.(x, digits=4, base=10), StatsBase.confint(test)) - println(io, " 95% confidence interval: $ci") + print(io, " 95% confidence interval: ") + show(io, ci) + println(io) end println(io) @@ -150,9 +157,10 @@ function show_params(io::IO, test::T, ident="") where T<:HypothesisTest for i = 1:length(fieldidx) name = T.names[fieldidx[i]] - println(io, ident, repeat(" ", maxlen-lengths[i]), - replace(string(name), "_", " "), - " = $(getfield(test, name))") + print(io, ident, repeat(" ", maxlen-lengths[i]), + replace(string(name), "_", " ", " = ")) + show(io, getfield(test, name)) + println(io) end end end diff --git a/src/augmented_dickey_fuller.jl b/src/augmented_dickey_fuller.jl index 15812487..61f04746 100644 --- a/src/augmented_dickey_fuller.jl +++ b/src/augmented_dickey_fuller.jl @@ -212,7 +212,9 @@ function show_params(io::IO, x::ADFTest, ident) println(io, ident, "sample size in regression: ", x.n) println(io, ident, "number of lags: ", x.lag) println(io, ident, "ADF statistic: ", x.stat) - println(io, ident, "Critical values at 1%, 5%, and 10%: ", x.cv') + print(io, ident, "Critical values at 1%, 5%, and 10%: ") + show(io, x.cv') + println(io) end pvalue(x::ADFTest) = HypothesisTests.pvalue(Normal(0, 1), adf_pv_aux(x.stat, x.deterministic); tail=:left) diff --git a/src/kruskal_wallis.jl b/src/kruskal_wallis.jl index a98aff04..48e80a19 100644 --- a/src/kruskal_wallis.jl +++ b/src/kruskal_wallis.jl @@ -81,9 +81,13 @@ population_param_of_interest(x::KruskalWallisTest) = ("Location parameters", "al default_tail(test::KruskalWallisTest) = :right function show_params(io::IO, x::KruskalWallisTest, ident) - println(io, ident, "number of observation in each group: ", x.n_i) + print(io, ident, "number of observation in each group: ") + show(io, x.n_i) + println(io) println(io, ident, "χ²-statistic: ", x.H) - println(io, ident, "rank sums: ", x.R_i) + print(io, ident, "rank sums: ") + show(io, x.R_i) + println(io) println(io, ident, "degrees of freedom: ", x.df) println(io, ident, "adjustment for ties: ", x.tie_adjustment) end diff --git a/src/mann_whitney.jl b/src/mann_whitney.jl index eb7eb79d..90114a5e 100644 --- a/src/mann_whitney.jl +++ b/src/mann_whitney.jl @@ -105,9 +105,13 @@ population_param_of_interest(x::ExactMannWhitneyUTest) = ("Location parameter (p default_tail(test::ExactMannWhitneyUTest) = :both function show_params(io::IO, x::ExactMannWhitneyUTest, ident) - println(io, ident, "number of observations in each group: ", [x.nx, x.ny]) + print(io, ident, "number of observations in each group: ") + show(io, [x.nx, x.ny]) + println(io) println(io, ident, "Mann-Whitney-U statistic: ", x.U) - println(io, ident, "rank sums: ", [sum(x.ranks[1:x.nx]), sum(x.ranks[x.nx+1:end])]) + print(io, ident, "rank sums: ") + show(io, [sum(x.ranks[1:x.nx]), sum(x.ranks[x.nx+1:end])]) + println(io) println(io, ident, "adjustment for ties: ", x.tie_adjustment) end diff --git a/src/permutation.jl b/src/permutation.jl index 1c4dd8b4..edaea760 100644 --- a/src/permutation.jl +++ b/src/permutation.jl @@ -55,5 +55,7 @@ testname(::PermutationTest) = "Permutation Test" function show_params(io::IO, apt::PermutationTest, ident) println(io, ident, "observation: ", apt.observation) - println(io, ident, "samples: ", apt.samples) + print(io, ident, "samples: ") + show(io, apt.samples) + println(io) end diff --git a/src/power_divergence.jl b/src/power_divergence.jl index 030081f0..5ae92cd7 100644 --- a/src/power_divergence.jl +++ b/src/power_divergence.jl @@ -424,6 +424,10 @@ function show_params(io::IO, x::PowerDivergenceTest, ident="") println(io, ident, "Sample size: $(x.n)") println(io, ident, "statistic: $(x.stat)") println(io, ident, "degrees of freedom: $(x.df)") - println(io, ident, "residuals: $(vec(x.residuals))") - println(io, ident, "std. residuals: $(vec(x.stdresiduals))") + print(io, ident, "residuals: ") + show(io, vec(x.residuals)) + println(io) + print(io, ident, "std. residuals: ") + show(io, vec(x.stdresiduals)) + println(io) end diff --git a/src/wilcoxon.jl b/src/wilcoxon.jl index 220feaa1..a213f3a2 100644 --- a/src/wilcoxon.jl +++ b/src/wilcoxon.jl @@ -103,7 +103,9 @@ default_tail(test::ExactSignedRankTest) = :both function show_params(io::IO, x::ExactSignedRankTest, ident) println(io, ident, "number of observations: ", x.n) println(io, ident, "Wilcoxon rank-sum statistic: ", x.W) - println(io, ident, "rank sums: ", [sum(x.ranks[x.signs]), sum(x.ranks[map(!, x.signs)])]) + print(io, ident, "rank sums: ") + show(io, [sum(x.ranks[x.signs]), sum(x.ranks[map(!, x.signs)])]) + println(io) println(io, ident, "adjustment for ties: ", x.tie_adjustment) end @@ -213,7 +215,9 @@ default_tail(test::ApproximateSignedRankTest) = :both function show_params(io::IO, x::ApproximateSignedRankTest, ident) println(io, ident, "number of observations: ", x.n) println(io, ident, "Wilcoxon rank-sum statistic: ", x.W) - println(io, ident, "rank sums: ", [sum(x.ranks[x.signs]), sum(x.ranks[map(!, x.signs)])]) + print(io, ident, "rank sums: ") + show(io, [sum(x.ranks[x.signs]), sum(x.ranks[map(!, x.signs)])]) + println(io) println(io, ident, "adjustment for ties: ", x.tie_adjustment) println(io, ident, "normal approximation (μ, σ): ", (x.mu, x.sigma)) end diff --git a/test/show.jl b/test/show.jl index 6626c13d..957ece89 100644 --- a/test/show.jl +++ b/test/show.jl @@ -5,7 +5,7 @@ using HypothesisTests, Test d = [[762,484] [327,239] [468,477]] m = PowerDivergenceTest(d) -@test sprint(show, m) == +@test sprint(show, m, context=:compact => true) == """ Pearson's Chi-square Test ------------------------- @@ -30,7 +30,7 @@ m = PowerDivergenceTest(d) d = [ 20, 15, 25 ] m = PowerDivergenceTest(d) -@test sprint(show, m) == +@test sprint(show, m, context=:compact => true) == """ Pearson's Chi-square Test ------------------------- From 585e793f9c94b131f4a4f96c3c0c5ec0adacbf39 Mon Sep 17 00:00:00 2001 From: Miles Lucas Date: Wed, 9 Oct 2019 00:19:00 -1000 Subject: [PATCH 09/23] Add Waldwolf-Wolfowitz (Runs) Test (#165) --- docs/Project.toml | 1 - docs/src/nonparametric.md | 8 ++++- src/HypothesisTests.jl | 1 + src/wald_wolfowitz.jl | 55 ++++++++++++++++++++++++++++++ test/runtests.jl | 5 +-- test/wald_wolfowitz.jl | 70 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 src/wald_wolfowitz.jl create mode 100644 test/wald_wolfowitz.jl diff --git a/docs/Project.toml b/docs/Project.toml index d7a1eef2..c2785383 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,5 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" [compat] Documenter = "~0.20" diff --git a/docs/src/nonparametric.md b/docs/src/nonparametric.md index 7b31b225..816b069c 100644 --- a/docs/src/nonparametric.md +++ b/docs/src/nonparametric.md @@ -21,7 +21,7 @@ BinomialTest FisherExactTest ``` -## Kolmogorov–Smirnov test +## Kolmogorov-Smirnov test Available are an exact one-sample test and approximate (i.e. asymptotic) one- and two-sample tests. @@ -52,6 +52,12 @@ ApproximateMannWhitneyUTest SignTest ``` +## Wald-Wolfowitz independence test + +```@docs +WaldWolfowitzTest +``` + ## Wilcoxon signed rank test ```@docs diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index e83b5b33..0963fc03 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -187,4 +187,5 @@ include("durbin_watson.jl") include("permutation.jl") include("hotelling.jl") include("bartlett.jl") +include("wald_wolfowitz.jl") end diff --git a/src/wald_wolfowitz.jl b/src/wald_wolfowitz.jl new file mode 100644 index 00000000..c847f462 --- /dev/null +++ b/src/wald_wolfowitz.jl @@ -0,0 +1,55 @@ +export WaldWolfowitzTest + +struct WaldWolfowitzTest{T <: Real} <: HypothesisTest + nabove::Int # Number of points above median (or of value a) + nbelow::Int # Number of points below median (or of value b) + nruns::Int # Number of runs + μ::T # Expected mean + σ::T # Expected variance + z::T # test z-statistic from Normal distribution +end + +testname(::WaldWolfowitzTest) = "Wald-Wolfowitz Test" +population_param_of_interest(x::WaldWolfowitzTest) = ("Number of runs", x.μ, x.nruns) # parameter of interest: name, value under h0, point estimate +default_tail(::WaldWolfowitzTest) = :both +pvalue(test::WaldWolfowitzTest; tail=:both) = pvalue(Normal(0.0, 1.0), test.z; tail=tail) + + +function show_params(io::IO, x::WaldWolfowitzTest, ident="") + println(io, ident, "number of runs: $(x.nruns)") + println(io, ident, "z-statistic: $(x.z)") +end + +""" + WaldWolfowitzTest(x::AbstractVector{Bool}) + WaldWolfowitzTest(x::AbstractVector{<:Real}) + +Perform the Wald-Wolfowitz (or Runs) test of the null hypothesis that the given data is random, or independently sampled. +The data can come as many-valued or two-valued (Boolean). If many-valued, the sample is transformed by labelling each +element as above or below the median. + +Implements: [`pvalue`](@ref) +""" +function WaldWolfowitzTest(x::AbstractVector{Bool}) + n = length(x) + nabove = sum(x) + nbelow = n - nabove + + # Get the expected value and standard deviation + μ = 1 + 2 * nabove * (nbelow / n) + σ = sqrt((μ - 1) * (μ - 2) / (n - 1)) + + # Get the number of runs + nruns = 1 + for k in 1:(n - 1) + @inbounds if x[k] != x[k + 1] + nruns += 1 + end + end + + # calculate simple z-statistic + z = (nruns - μ) / σ + WaldWolfowitzTest(nabove, nbelow, nruns, μ, σ, z) +end + +WaldWolfowitzTest(x::AbstractVector{<:Real}) = WaldWolfowitzTest(x .>= median(x)) diff --git a/test/runtests.jl b/test/runtests.jl index dd216d10..7b2fe5cb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,6 +14,7 @@ end include("anderson_darling.jl") include("augmented_dickey_fuller.jl") +include("bartlett.jl") include("binomial.jl") include("box_test.jl") include("breusch_godfrey.jl") @@ -22,6 +23,7 @@ include("common.jl") include("durbin_watson.jl") include("fisher.jl") include("jarque_bera.jl") +include("hotelling.jl") include("kolmogorov_smirnov.jl") include("kruskal_wallis.jl") include("mann_whitney.jl") @@ -29,7 +31,6 @@ include("permutation.jl") include("power_divergence.jl") include("show.jl") include("t.jl") +include("wald_wolfowitz.jl") include("wilcoxon.jl") include("z.jl") -include("hotelling.jl") -include("bartlett.jl") diff --git a/test/wald_wolfowitz.jl b/test/wald_wolfowitz.jl new file mode 100644 index 00000000..244c04a4 --- /dev/null +++ b/test/wald_wolfowitz.jl @@ -0,0 +1,70 @@ +using HypothesisTests, Test +using Distributions + +@testset "Wald-Wolfowitz test" begin + +@testset "Many-Valued Observations" begin + # Get set of dependent observations + x = 1:1000 + tst = WaldWolfowitzTest(x) + + @test tst.z ≈ -31.575 atol=1e-3 + @test pvalue(tst) ≈ 8.052e-219 atol=1e-222 + + # Test consistency of z-statistics + @test pvalue(tst) == pvalue(Normal(tst.μ, tst.σ), tst.nruns) + @test pvalue(tst, tail=:left) == pvalue(Normal(tst.μ, tst.σ), tst.nruns, tail=:left) + @test pvalue(tst, tail=:right) == pvalue(Normal(tst.μ, tst.σ), tst.nruns, tail=:right) + expected_output = """ + Wald-Wolfowitz Test + ------------------- + Population details: + parameter of interest: Number of runs + value under h_0: 501.0 + point estimate: 2 + + Test summary: + outcome with 95% confidence: reject h_0 + two-sided p-value: <1e-99 + + Details: + number of runs: 2 + z-statistic: -31.575338477995764 + """ + output = sprint(show, tst) + @test output == expected_output +end + +@testset "Two-Valued Observations" begin + # equivalent data as above (half under median half over) + x = [falses(500); trues(500)] + tst = WaldWolfowitzTest(x) + + @test tst.z ≈ -31.575 atol=1e-3 + @test pvalue(tst) ≈ 8.052e-219 atol=1e-222 + + # Test consistency of z-statistics + @test pvalue(tst) == pvalue(Normal(tst.μ, tst.σ), tst.nruns) + @test pvalue(tst, tail=:left) == pvalue(Normal(tst.μ, tst.σ), tst.nruns, tail=:left) + @test pvalue(tst, tail=:right) == pvalue(Normal(tst.μ, tst.σ), tst.nruns, tail=:right) + expected_output = """ + Wald-Wolfowitz Test + ------------------- + Population details: + parameter of interest: Number of runs + value under h_0: 501.0 + point estimate: 2 + + Test summary: + outcome with 95% confidence: reject h_0 + two-sided p-value: <1e-99 + + Details: + number of runs: 2 + z-statistic: -31.575338477995764 + """ + output = sprint(show, tst) + @test output == expected_output +end + +end From bd8ee6c51e41f74168cc06d36cf2ad463ab8a343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 21 Oct 2019 21:05:21 +0200 Subject: [PATCH 10/23] automatic selection of CI method for PowerDivergenceTest (#143) --- src/power_divergence.jl | 11 ++++++++--- test/power_divergence.jl | 22 ++++++++++++++++++++-- test/show.jl | 2 +- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/power_divergence.jl b/src/power_divergence.jl index 5ae92cd7..39f1ca21 100644 --- a/src/power_divergence.jl +++ b/src/power_divergence.jl @@ -44,12 +44,14 @@ default_tail(test::PowerDivergenceTest) = :right pvalue(x::PowerDivergenceTest; tail=:right) = pvalue(Chisq(x.df),x.stat; tail=tail) """ - confint(test::PowerDivergenceTest, alpha = 0.05; tail = :both, method = :sison_glaz) + confint(test::PowerDivergenceTest, alpha = 0.05; tail = :both, method = :auto) Compute a confidence interval with coverage 1-`alpha` for multinomial proportions using one of the following methods. Possible values for `method` are: - - `:sison_glaz` (default): Sison-Glaz intervals + - `:auto` (default): If the minimum of the expected cell counts exceeds 100, Quesenberry-Hurst + intervals are used, otherwise Sison-Glaz. + - `:sison_glaz`: Sison-Glaz intervals - `:bootstrap`: Bootstrap intervals - `:quesenberry_hurst`: Quesenberry-Hurst intervals - `:gold`: Gold intervals (asymptotic simultaneous intervals) @@ -66,7 +68,7 @@ one of the following methods. Possible values for `method` are: Mathematical Statistics, 30:56-74, 1963. """ function StatsBase.confint(x::PowerDivergenceTest, alpha::Float64=0.05; - tail::Symbol=:both, method::Symbol=:sison_glaz, correct::Bool=true, + tail::Symbol=:both, method::Symbol=:auto, correct::Bool=true, bootstrap_iters::Int64=10000, GC::Bool=true) check_alpha(alpha) @@ -79,6 +81,9 @@ function StatsBase.confint(x::PowerDivergenceTest, alpha::Float64=0.05; i = StatsBase.confint(x, alpha*2,method=method, GC=GC) Tuple{Float64,Float64}[(i[j][1], 1.0) for j in 1:m] elseif tail == :both + if method == :auto + method = minimum(x.expected) > 100 ? :quesenberry_hurst : :sison_glaz + end if method == :gold ci_gold(x,alpha,correct=correct,GC=GC) elseif method == :sison_glaz diff --git a/test/power_divergence.jl b/test/power_divergence.jl index ae09f273..6c058b08 100644 --- a/test/power_divergence.jl +++ b/test/power_divergence.jl @@ -13,7 +13,7 @@ m = PowerDivergenceTest(d) @test m.theta0 ≈ [0.25523082406125785,0.19670969099133556,0.11593952361049113,0.08935608756107216,0.1935739395970214,0.1491899341788219] @test m.thetahat ≈ [0.2763873775843308,0.1755531374682626,0.11860718171926006,0.08668842945230323,0.16974972796517954,0.17301414581066377] -c = confint(m) +c = confint(m, method=:sison_glaz) c0 = [(.25788900979, .29554669435), (.15705476968, .19471245423), (.10010881393, .13776649848), @@ -62,6 +62,14 @@ show(IOBuffer(), m) m = ChisqTest(d) m = MultinomialLRTest(d) +confint(m) +confint(m, tail=:left) +confint(m, tail=:right) + +confint(m, method = :auto) +confint(m, method = :auto, tail=:left) +confint(m, method = :auto, tail=:right) + confint(m, method = :bootstrap) confint(m, method = :bootstrap, tail=:left) confint(m, method = :bootstrap, tail=:right) @@ -82,7 +90,7 @@ confint(m, method = :sison_glaz, tail=:right) @test_throws ArgumentError confint(m, method=:FOO) @test_throws ArgumentError confint(m, tail=:fox) - +@test confint(m, method = :quesenberry_hurst) == confint(m, method = :auto) == confint(m) #Example 3 in R @@ -138,6 +146,14 @@ show(IOBuffer(), m) m = ChisqTest(d) m = MultinomialLRTest(d) +confint(m) +confint(m, tail=:left) +confint(m, tail=:right) + +confint(m, method = :auto) +confint(m, method = :auto, tail=:left) +confint(m, method = :auto, tail=:right) + confint(m, method = :bootstrap) confint(m, method = :bootstrap, tail=:left) confint(m, method = :bootstrap, tail=:right) @@ -158,6 +174,8 @@ confint(m, method = :sison_glaz, tail=:right) @test_throws ArgumentError confint(m, method=:FOO) @test_throws ArgumentError confint(m, tail=:fox) +@test confint(m, method = :sison_glaz) == confint(m, method = :auto) == confint(m) + # x=[1,2,3,1,2,3] y=[1,1,1,2,2,3] diff --git a/test/show.jl b/test/show.jl index 957ece89..d07c35d9 100644 --- a/test/show.jl +++ b/test/show.jl @@ -13,7 +13,7 @@ m = PowerDivergenceTest(d) parameter of interest: Multinomial Probabilities value under h_0: [0.255231, 0.19671, 0.11594, 0.0893561, 0.193574, 0.14919] point estimate: [0.276387, 0.175553, 0.118607, 0.0866884, 0.16975, 0.173014] - 95% confidence interval: Tuple{Float64,Float64}[(0.2579, 0.2955), (0.1571, 0.1947), (0.1001, 0.1378), (0.0682, 0.1058), (0.1513, 0.1889), (0.1545, 0.1922)] + 95% confidence interval: Tuple{Float64,Float64}[(0.2545, 0.2994), (0.1573, 0.1955), (0.1033, 0.1358), (0.0736, 0.1019), (0.1517, 0.1894), (0.1548, 0.1928)] Test summary: outcome with 95% confidence: reject h_0 From 91893d1b8360c2ded08e0a3ea9cc077b35ca1ce1 Mon Sep 17 00:00:00 2001 From: Raphael Saavedra Date: Tue, 26 Nov 2019 06:11:16 -0300 Subject: [PATCH 11/23] Adding F-test (#167) including unit tests and docs --- docs/src/parametric.md | 5 +++ docs/src/time_series.md | 2 +- src/HypothesisTests.jl | 1 + src/f.jl | 77 +++++++++++++++++++++++++++++++++++++++++ test/f.jl | 49 ++++++++++++++++++++++++++ test/runtests.jl | 1 + 6 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 src/f.jl create mode 100644 test/f.jl diff --git a/docs/src/parametric.md b/docs/src/parametric.md index 10f1452f..2e463b26 100644 --- a/docs/src/parametric.md +++ b/docs/src/parametric.md @@ -28,3 +28,8 @@ OneSampleZTest EqualVarianceZTest UnequalVarianceZTest ``` + +## F-test +```@docs +VarianceFTest +``` \ No newline at end of file diff --git a/docs/src/time_series.md b/docs/src/time_series.md index ed102535..bfe77d73 100644 --- a/docs/src/time_series.md +++ b/docs/src/time_series.md @@ -22,4 +22,4 @@ JarqueBeraTest ## Augmented Dickey-Fuller test ```@docs ADFTest -``` +``` \ No newline at end of file diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 0963fc03..12b45a19 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -188,4 +188,5 @@ include("permutation.jl") include("hotelling.jl") include("bartlett.jl") include("wald_wolfowitz.jl") +include("f.jl") end diff --git a/src/f.jl b/src/f.jl new file mode 100644 index 00000000..2e1ed061 --- /dev/null +++ b/src/f.jl @@ -0,0 +1,77 @@ +# f.jl +# F-tests +# +# Copyright (C) 2019 Raphael Saavedra +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +export VarianceFTest + +struct VarianceFTest <: HypothesisTest + n_x::Int # size of sample 1 + n_y::Int # size of sample 2 + df_x::Int # degrees of freedom 1 + df_y::Int # degrees of freedom 2 + F::Real # test statistic +end + +""" + VarianceFTest(y1::AbstractVector{<: Real}, y2::AbstractVector{<: Real}) + +Perform an F-test of the null hypothesis that two real-valued vectors `y1` and `y2` have equal variances. + +Implements: [`pvalue`](@ref) + +# References + + * George E. P. Box, "Non-Normality and Tests on Variances", Biometrika 40 (3/4): 318–335, 1953. + +# External links + + * [F-test of equality of variances on Wikipedia](https://en.wikipedia.org/wiki/F-test_of_equality_of_variances) +""" +function VarianceFTest(y1::AbstractVector{<: Real}, y2::AbstractVector{<: Real}) + n1, n2 = length(y1), length(y2) + F = var(y1) / var(y2) + return VarianceFTest(n1, n2, n1-1, n2-1, F) +end + +testname(::VarianceFTest) = "Variance F-test" +population_param_of_interest(x::VarianceFTest) = ("variance ratio", 1.0, x.F) +default_tail(test::VarianceFTest) = :both + +function show_params(io::IO, x::VarianceFTest, ident) + println(io, ident, "number of observations: [$(x.n_x), $(x.n_y)]") + println(io, ident, "F statistic: $(x.F)") + println(io, ident, "degrees of freedom: [$(x.df_x), $(x.df_y)]") +end + +function pvalue(x::VarianceFTest; tail=:both) + dist = FDist(x.df_x, x.df_y) + if tail == :both + return 1 - 2*abs(cdf(dist, x.F) - 0.5) + elseif tail == :right + return 1 - cdf(dist, x.F) + elseif tail == :left + return cdf(dist, x.F) + else + throw(ArgumentError("tail=$(tail) is invalid")) + end +end \ No newline at end of file diff --git a/test/f.jl b/test/f.jl new file mode 100644 index 00000000..a0ab58f4 --- /dev/null +++ b/test/f.jl @@ -0,0 +1,49 @@ +using HypothesisTests, Test +using HypothesisTests: default_tail + +@testset "F-tests" begin + @testset "Basic variance F-test" begin + Random.seed!(12) + y1_h0 = 4 .+ randn(500) + y2_h0 = 4 .+ randn(400) + + t = VarianceFTest(y1_h0, y2_h0) + + @test t.n_x == 500 + @test t.n_y == 400 + @test t.df_x == 499 + @test t.df_y == 399 + @test t.F ≈ 0.974693 rtol=1e-5 + @test pvalue(t) ≈ 0.784563 rtol=1e-5 + @test pvalue(t, tail=:left) ≈ 0.392281 rtol=1e-5 + @test pvalue(t, tail=:right) ≈ 0.607718 rtol=1e-5 + @test default_tail(t) == :both + + t = VarianceFTest(y2_h0, y1_h0) + + @test t.n_x == 400 + @test t.n_y == 500 + @test t.df_x == 399 + @test t.df_y == 499 + @test t.F ≈ 1.025963 rtol=1e-5 + @test pvalue(t) ≈ 0.784563 rtol=1e-5 + @test pvalue(t, tail=:right) ≈ 0.392281 rtol=1e-5 + @test pvalue(t, tail=:left) ≈ 0.607718 rtol=1e-5 + @test default_tail(t) == :both + + y1_h1 = 0.7*randn(200) + y2_h1 = 1.3*randn(120) + + t = VarianceFTest(y1_h1, y2_h1) + + @test t.n_x == 200 + @test t.n_y == 120 + @test t.df_x == 199 + @test t.df_y == 119 + @test t.F ≈ 0.367547 rtol=1e-5 + @test pvalue(t) < 1e-8 + @test default_tail(t) == :both + @test pvalue(t, tail=:left) < 1e-8 + @test pvalue(t, tail=:right) > 1.0 - 1e-8 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 7b2fe5cb..ade795d8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,3 +34,4 @@ include("t.jl") include("wald_wolfowitz.jl") include("wilcoxon.jl") include("z.jl") +include("f.jl") \ No newline at end of file From b8b22c5ada020102f0a90b250940dd8a4f7ef616 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Fri, 29 Nov 2019 13:40:48 -0800 Subject: [PATCH 12/23] Add partial correlation test (#146) Implemented as a three-argument constructor for `CorrelationTest`, which is added in this PR. The two-argument constructor will come later. --- Project.toml | 3 ++ REQUIRE | 2 +- docs/src/multivariate.md | 6 ++++ src/HypothesisTests.jl | 1 + src/correlation.jl | 78 ++++++++++++++++++++++++++++++++++++++++ test/correlation.jl | 31 ++++++++++++++++ test/data/wechsler.txt | 37 +++++++++++++++++++ test/runtests.jl | 3 +- 8 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 src/correlation.jl create mode 100644 test/correlation.jl create mode 100644 test/data/wechsler.txt diff --git a/Project.toml b/Project.toml index 9ca78631..94b96994 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,9 @@ Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +[compat] +StatsBase = ">=0.27.0" + [extras] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/REQUIRE b/REQUIRE index 00891c56..2a4362df 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,6 +1,6 @@ julia 0.7 Distributions 0.16.3 Roots -StatsBase 0.25.0 +StatsBase 0.27.0 Rmath 0.5.0 Combinatorics 0.7.0 diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md index 4b587a56..c136da71 100644 --- a/docs/src/multivariate.md +++ b/docs/src/multivariate.md @@ -16,3 +16,9 @@ This is equivalent to Box's ``M``-test for two groups. ```@docs BartlettTest ``` + +## Partial correlation test + +```@docs +CorrelationTest +``` diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index 12b45a19..fdfcab70 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -189,4 +189,5 @@ include("hotelling.jl") include("bartlett.jl") include("wald_wolfowitz.jl") include("f.jl") +include("correlation.jl") end diff --git a/src/correlation.jl b/src/correlation.jl new file mode 100644 index 00000000..d40b1b66 --- /dev/null +++ b/src/correlation.jl @@ -0,0 +1,78 @@ +# Correlation +# TODO: Implement `CorrelationTest` for two arguments + +using Statistics: clampcor + +export CorrelationTest + +""" + CorrelationTest(x, y, Z) + +Perform a t-test for the hypothesis that ``\\text{Cor}(x,y|Z=z) = 0``, i.e. the partial +correlation of vectors `x` and `y` given the matrix `Z` is zero. + +Implements `pvalue` for the t-test. +Implements `confint` using an approximate confidence interval based on Fisher's +``z``-transform. + +See also `partialcor` from StatsBase. + +# External resources + +* [Partial correlation on Wikipedia](https://en.wikipedia.org/wiki/Partial_correlation#As_conditional_independence_test) + (for the construction of the confidence interval) +* [Section testing using Student's t-distribution from Pearson correlation coefficient on Wikipedia](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Testing_using_Student's_t-distribution) +* [K.J. Levy and S.C. Narula (1978): Testing Hypotheses concerning Partial Correlations: Some Methods and Discussion. International Statistical Review 46(2).](https://www.jstor.org/stable/1402814) +""" +struct CorrelationTest{T<:Real} <: HypothesisTest + r::T + n::Int + k::Int + t::T + + # Error checking is done in `partialcor` + function CorrelationTest(x::AbstractVector, y::AbstractVector, Z::AbstractMatrix) + r = partialcor(x, y, Z) + n, k = size(Z) + t = r * sqrt((n - 2 - k) / (1 - r^2)) + return new{typeof(r)}(r, n, k, t) + end + + function CorrelationTest(x::AbstractVector, y::AbstractVector, z::AbstractVector) + r = partialcor(x, y, z) + n = length(z) + t = r * sqrt((n - 3) / (1 - r^2)) + return new{typeof(r)}(r, n, 1, t) + end +end + +testname(p::CorrelationTest) = + string("Test for nonzero ", p.k == 0 ? "partial " : "", " correlation") + +function population_param_of_interest(p::CorrelationTest) + param = p.k == 0 ? "Partial correlation" : "Correlation" + (param, zero(p.r), p.r) +end + +StatsBase.nobs(p::CorrelationTest) = p.n +StatsBase.dof(p::CorrelationTest) = p.n - 2 - p.k + +function StatsBase.confint(test::CorrelationTest{T}, alpha::Float64=0.05) where T + dof(test) > 1 || return (-one(T), one(T)) # Otherwise we can get NaNs + q = quantile(Normal(), 1 - alpha / 2) + fisher = atanh(test.r) + bound = q / sqrt(dof(test) - 1) + elo = clampcor(tanh(fisher - bound)) + ehi = clampcor(tanh(fisher + bound)) + return (elo, ehi) +end + +default_tail(::CorrelationTest) = :both +pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) + +function show_params(io::IO, test::CorrelationTest, indent="") + println(io, indent, "number of observations: ", nobs(test)) + println(io, indent, "number of conditional variables: ", test.k) + println(io, indent, "t-statistic: ", test.t) + println(io, indent, "degrees of freedom: ", dof(test)) +end diff --git a/test/correlation.jl b/test/correlation.jl new file mode 100644 index 00000000..4c27ed2b --- /dev/null +++ b/test/correlation.jl @@ -0,0 +1,31 @@ +using HypothesisTests +using Test +using DelimitedFiles +using StatsBase + +@testset "Partial correlation" begin + # Columns are information, similarities, arithmetic, picture completion + wechsler = readdlm(joinpath(@__DIR__, "data", "wechsler.txt"))[:,2:end] + w = CorrelationTest(wechsler[:,1], wechsler[:,2], wechsler[:,3:4]) + let out = sprint(show, w) + @test occursin("reject h_0", out) && !occursin("fail to", out) + end + let ci = confint(w) + @test first(ci) ≈ 0.4963917 atol=1e-6 + @test last(ci) ≈ 0.8447292 atol=1e-6 + end + @test nobs(w) == 37 + @test dof(w) == 33 + @test pvalue(w) < 0.00001 + + X = [ 2 1 0 + 4 2 0 + 15 3 1 + 20 4 1] + x = CorrelationTest(view(X,:,1), view(X,:,2), view(X,:,3)) + @test occursin("fail to reject", sprint(show, x)) + @test confint(x) == (-1.0, 1.0) + @test nobs(x) == 4 + @test dof(x) == 1 + @test pvalue(x) ≈ 0.25776212 atol=1e-6 +end diff --git a/test/data/wechsler.txt b/test/data/wechsler.txt new file mode 100644 index 00000000..913211f5 --- /dev/null +++ b/test/data/wechsler.txt @@ -0,0 +1,37 @@ + 1 7 5 9 8 + 2 8 8 5 6 + 3 16 18 11 9 + 4 8 3 7 9 + 5 6 3 13 9 + 6 11 8 10 10 + 7 12 7 9 8 + 8 8 11 9 3 + 9 14 12 11 4 +10 13 13 13 6 +11 13 9 9 9 +12 13 10 15 7 +13 14 11 12 8 +14 15 11 11 10 +15 13 10 15 9 +16 10 5 8 6 +17 10 3 7 7 +18 17 13 13 7 +19 10 6 10 7 +20 10 10 15 8 +21 14 7 11 5 +22 16 11 12 11 +23 10 7 14 6 +24 10 10 9 6 +25 10 7 10 10 +26 7 6 5 9 +27 15 12 10 6 +28 17 15 15 8 +29 16 13 16 9 +30 13 10 17 8 +31 13 10 17 10 +32 19 12 16 10 +33 19 15 17 11 +34 13 10 7 8 +35 15 11 12 8 +36 16 9 11 11 +37 14 13 14 9 \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index ade795d8..629a0981 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ include("hotelling.jl") include("kolmogorov_smirnov.jl") include("kruskal_wallis.jl") include("mann_whitney.jl") +include("correlation.jl") include("permutation.jl") include("power_divergence.jl") include("show.jl") @@ -34,4 +35,4 @@ include("t.jl") include("wald_wolfowitz.jl") include("wilcoxon.jl") include("z.jl") -include("f.jl") \ No newline at end of file +include("f.jl") From e8f9fe4b49930b20f4e7a2e0cce3eb4d2fff5c86 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 1 Dec 2019 11:00:15 +0100 Subject: [PATCH 13/23] Call arguments x and y in VarianceFTest --- src/f.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/f.jl b/src/f.jl index 2e1ed061..d4f4e367 100644 --- a/src/f.jl +++ b/src/f.jl @@ -33,9 +33,9 @@ struct VarianceFTest <: HypothesisTest end """ - VarianceFTest(y1::AbstractVector{<: Real}, y2::AbstractVector{<: Real}) + VarianceFTest(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) -Perform an F-test of the null hypothesis that two real-valued vectors `y1` and `y2` have equal variances. +Perform an F-test of the null hypothesis that two real-valued vectors `x` and `y` have equal variances. Implements: [`pvalue`](@ref) @@ -47,9 +47,9 @@ Implements: [`pvalue`](@ref) * [F-test of equality of variances on Wikipedia](https://en.wikipedia.org/wiki/F-test_of_equality_of_variances) """ -function VarianceFTest(y1::AbstractVector{<: Real}, y2::AbstractVector{<: Real}) - n1, n2 = length(y1), length(y2) - F = var(y1) / var(y2) +function VarianceFTest(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) + n1, n2 = length(x), length(y) + F = var(x) / var(y) return VarianceFTest(n1, n2, n1-1, n2-1, F) end @@ -74,4 +74,4 @@ function pvalue(x::VarianceFTest; tail=:both) else throw(ArgumentError("tail=$(tail) is invalid")) end -end \ No newline at end of file +end From c0b8eaca9bae410f97146e7b44df7d5f571342b9 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 1 Dec 2019 15:46:08 +0100 Subject: [PATCH 14/23] Remove unnecessary T<: (#171) --- src/wilcoxon.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wilcoxon.jl b/src/wilcoxon.jl index a213f3a2..2f5727aa 100644 --- a/src/wilcoxon.jl +++ b/src/wilcoxon.jl @@ -29,7 +29,7 @@ export SignedRankTest, ExactSignedRankTest, ApproximateSignedRankTest # Automatic exact/normal selection """ SignedRankTest(x::AbstractVector{<:Real}) - SignedRankTest(x::AbstractVector{<:Real}, y::AbstractVector{T<:Real}) + SignedRankTest(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) Perform a Wilcoxon signed rank test of the null hypothesis that the distribution of `x` (or the difference `x - y` if `y` is provided) has zero median against the alternative From 3a9e0c7d03559a84fda5a3944917f8005f0c0dae Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Mon, 23 Dec 2019 10:52:48 -0500 Subject: [PATCH 15/23] Simple additions --- src/HypothesisTests.jl | 4 ++-- src/correlation.jl | 1 + src/t.jl | 3 --- src/z.jl | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index fdfcab70..4fe8695f 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -29,9 +29,9 @@ using Distributions, Roots, StatsBase using Combinatorics: combinations, permutations using Rmath: pwilcox, psignrank -import StatsBase.confint +import StatsBase.confint, StatsBase.stderror -export testname, pvalue, confint +export testname, pvalue, confint, stderror abstract type HypothesisTest end check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y) diff --git a/src/correlation.jl b/src/correlation.jl index d40b1b66..468963df 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -69,6 +69,7 @@ end default_tail(::CorrelationTest) = :both pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) +stderror(test::CorrelationTest) = sqrt(1/dof(test)) function show_params(io::IO, test::CorrelationTest, indent="") println(io, indent, "number of observations: ", nobs(test)) diff --git a/src/t.jl b/src/t.jl index c10dac31..909be189 100644 --- a/src/t.jl +++ b/src/t.jl @@ -51,9 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - ## ONE SAMPLE T-TEST struct OneSampleTTest <: TTest diff --git a/src/z.jl b/src/z.jl index 18915a81..ec3c8ca8 100644 --- a/src/z.jl +++ b/src/z.jl @@ -51,10 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - - ## ONE SAMPLE Z-TEST struct OneSampleZTest <: ZTest From bb9e733b29c413410f2a846ddcc469da5be95b45 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:25:04 -0600 Subject: [PATCH 16/23] Add stderror and meandiff functions for TTest --- src/t.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/t.jl b/src/t.jl index 2cb7c251..c10dac31 100644 --- a/src/t.jl +++ b/src/t.jl @@ -48,6 +48,11 @@ function StatsBase.confint(x::TTest, alpha::Float64=0.05; tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar ## ONE SAMPLE T-TEST From cca9afa0cdb7ca67cc466928123c613910ca4729 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:30:52 -0600 Subject: [PATCH 17/23] Same for ZTest --- src/z.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/z.jl b/src/z.jl index d30e2b64..18915a81 100644 --- a/src/z.jl +++ b/src/z.jl @@ -48,6 +48,12 @@ function StatsBase.confint(x::ZTest, alpha::Float64=0.05; tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar + ## ONE SAMPLE Z-TEST From 20c520d3f94b3d45cb18fdd4a3264d6fc8e28fcb Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Mon, 23 Dec 2019 10:52:48 -0500 Subject: [PATCH 18/23] Simple additions --- src/HypothesisTests.jl | 4 ++-- src/correlation.jl | 1 + src/t.jl | 3 --- src/z.jl | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index fdfcab70..4fe8695f 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -29,9 +29,9 @@ using Distributions, Roots, StatsBase using Combinatorics: combinations, permutations using Rmath: pwilcox, psignrank -import StatsBase.confint +import StatsBase.confint, StatsBase.stderror -export testname, pvalue, confint +export testname, pvalue, confint, stderror abstract type HypothesisTest end check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y) diff --git a/src/correlation.jl b/src/correlation.jl index d40b1b66..468963df 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -69,6 +69,7 @@ end default_tail(::CorrelationTest) = :both pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) +stderror(test::CorrelationTest) = sqrt(1/dof(test)) function show_params(io::IO, test::CorrelationTest, indent="") println(io, indent, "number of observations: ", nobs(test)) diff --git a/src/t.jl b/src/t.jl index c10dac31..909be189 100644 --- a/src/t.jl +++ b/src/t.jl @@ -51,9 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - ## ONE SAMPLE T-TEST struct OneSampleTTest <: TTest diff --git a/src/z.jl b/src/z.jl index 18915a81..ec3c8ca8 100644 --- a/src/z.jl +++ b/src/z.jl @@ -51,10 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - - ## ONE SAMPLE Z-TEST struct OneSampleZTest <: ZTest From ce6edab226a8685d3d2d0f86e750b59e2ed336ae Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:25:04 -0600 Subject: [PATCH 19/23] Add stderror and meandiff functions for TTest --- src/t.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/t.jl b/src/t.jl index 2cb7c251..c10dac31 100644 --- a/src/t.jl +++ b/src/t.jl @@ -48,6 +48,11 @@ function StatsBase.confint(x::TTest, alpha::Float64=0.05; tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar ## ONE SAMPLE T-TEST From 13a0db4f0f295403b570332df14bbf63cff89cdb Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 21 Feb 2019 10:30:52 -0600 Subject: [PATCH 20/23] Same for ZTest --- src/z.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/z.jl b/src/z.jl index d30e2b64..18915a81 100644 --- a/src/z.jl +++ b/src/z.jl @@ -48,6 +48,12 @@ function StatsBase.confint(x::ZTest, alpha::Float64=0.05; tail=:both) end end +# The standard error of the difference +StatsBase.stderror(x::TTest) = x.stderr + +# The magnitude of the difference +meandiff(x::TTest) = x.xbar + ## ONE SAMPLE Z-TEST From e73bd4e5691e2ab8e6e9cebdea23e088fe7a384a Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Thu, 21 Feb 2019 16:24:18 -0800 Subject: [PATCH 21/23] Add Barlett's test (#147) --- docs/src/multivariate.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md index c136da71..973b381b 100644 --- a/docs/src/multivariate.md +++ b/docs/src/multivariate.md @@ -22,3 +22,12 @@ BartlettTest ```@docs CorrelationTest ``` + +## Equality of covariance matrices + +Bartlett's test for equality of two covariance matrices is provided. +This is equivalent to Box's ``M``-test for two groups. + +```@docs +BartlettsTest +``` From bc9cb6489bd2ac3a98de56f678377a0e241038a0 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Mon, 23 Dec 2019 10:52:48 -0500 Subject: [PATCH 22/23] Simple additions --- src/HypothesisTests.jl | 4 ++-- src/correlation.jl | 1 + src/t.jl | 3 --- src/z.jl | 4 ---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl index fdfcab70..4fe8695f 100644 --- a/src/HypothesisTests.jl +++ b/src/HypothesisTests.jl @@ -29,9 +29,9 @@ using Distributions, Roots, StatsBase using Combinatorics: combinations, permutations using Rmath: pwilcox, psignrank -import StatsBase.confint +import StatsBase.confint, StatsBase.stderror -export testname, pvalue, confint +export testname, pvalue, confint, stderror abstract type HypothesisTest end check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length(y) diff --git a/src/correlation.jl b/src/correlation.jl index d40b1b66..468963df 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -69,6 +69,7 @@ end default_tail(::CorrelationTest) = :both pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) +stderror(test::CorrelationTest) = sqrt(1/dof(test)) function show_params(io::IO, test::CorrelationTest, indent="") println(io, indent, "number of observations: ", nobs(test)) diff --git a/src/t.jl b/src/t.jl index c10dac31..909be189 100644 --- a/src/t.jl +++ b/src/t.jl @@ -51,9 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - ## ONE SAMPLE T-TEST struct OneSampleTTest <: TTest diff --git a/src/z.jl b/src/z.jl index 18915a81..ec3c8ca8 100644 --- a/src/z.jl +++ b/src/z.jl @@ -51,10 +51,6 @@ end # The standard error of the difference StatsBase.stderror(x::TTest) = x.stderr -# The magnitude of the difference -meandiff(x::TTest) = x.xbar - - ## ONE SAMPLE Z-TEST struct OneSampleZTest <: ZTest From af003b0243017af448317ba0dbd052b17cc2e6f2 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Fri, 3 Jan 2020 18:42:43 -0500 Subject: [PATCH 23/23] StatsBase --- src/correlation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.jl b/src/correlation.jl index 468963df..3d8a6a84 100644 --- a/src/correlation.jl +++ b/src/correlation.jl @@ -69,7 +69,7 @@ end default_tail(::CorrelationTest) = :both pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail) -stderror(test::CorrelationTest) = sqrt(1/dof(test)) +StatsBase.stderror(test::CorrelationTest) = sqrt(1/dof(test)) function show_params(io::IO, test::CorrelationTest, indent="") println(io, indent, "number of observations: ", nobs(test))