Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hypothesis testing #46

Draft
wants to merge 4 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version = "0.5.6"
[deps]
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DirectionalStatistics = "e814f24e-44b0-11e9-2fd5-aba2b6113d95"
EcoBase = "a58aae7d-b440-5a11-b283-399458f99aac"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -24,6 +25,7 @@ RCall = "0.13"
RecipesBase = "0.6, 0.7, 0.8, 1"
Requires = "^1"
StatsBase = "0.32, 0.33"
DirectionalStatistics = "0.1.0 - 1.0"
julia = "^1"

[extras]
Expand Down
11 changes: 11 additions & 0 deletions src/Diversity.jl
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,17 @@ export hillnumber

end # sub-module Hill

"""
Diversity.Testing

Hypothesis tests for distance matrices.
"""
module Testing

include("Testing.jl")
export dispersion, permutest, mantel
end

# Path into package
path(path...; dir::String = "test") = joinpath(@__DIR__, "..", dir, path...)

Expand Down
6 changes: 6 additions & 0 deletions src/Testing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

using LinearAlgebra, Random, DirectionalStatistics,AxisArrays
import Statistics: mean
include("Testing/ANOVA.jl")
include("Testing/dispersion.jl")
include("Testing/permutation_tests.jl")
67 changes: 67 additions & 0 deletions src/Testing/ANOVA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

# Compute F-statistic, given vector of vectors
#ANOVA
function f(R)
X = vcat(R...)
N = length(X)
nj = length.(R)
X̅ = mean(X)
X̅j = mean.(R)
k = length(R)
top = sum(nj .*(X̅j .- X̅) .^2) /(k-1)
bottom = sum([sum((R[i] .- X̅j[i]) .^2) for i in 1:k])/(N-k)
return top/bottom
end

function f(R,N,nj,X̅,k)
X̅j = mean.(R)
top = 0.0
bottom = 0.0
@inbounds for i in 1:k
top += nj[i] * (X̅j[i] - X̅)^2
for j in 1:nj[i]
bottom += (R[i][j] - X̅j[i])^2
end
end
top/= (k-1)
bottom /= (N-k)
return top/bottom
end

#Pairwise ANOVA
function f_pairs(R)
n = length(R)
F = zeros(n,n)
for j in 1:n-1
for i in j+1:n
F[i,j] = f([R[i],R[j]])
end
end
return F
end
function f_pairs(R ::Vector,N ::Array{Int},nj ::Matrix{Tuple},X̅ ::Matrix{Float64})
n = length(R)
F = zeros(n,n)
for j in 1:n-1
for i in j+1:n
F[i,j] = f([R[i],R[j]],N[i,j],nj[i,j],X̅[i,j],2)
end
end
return F
end
# get parameter values corresponding to each pairwise combination of groups
function get_pars(R)
n = length(R)
N_p = Array{Int}(undef,n,n)
nj_p= Array{Tuple}(undef,n,n)
X̅_p = Array{Float64}(undef,n,n)

for j in 1:n-1
for i in j+1:n
N_p[i,j] = length(R[i]) + length(R[j])
nj_p[i,j] = (length(R[i]) , length(R[j]))
X̅_p[i,j] = mean(vcat(R[i]...,R[j]...))
end
end
return N_p,nj_p,X̅_p
end
98 changes: 98 additions & 0 deletions src/Testing/dispersion.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# structs

struct Disp
F ::Float64
pairwise_F ::Array{Float64}
medians ::Union{Tuple,Vector}
residuals ::Vector{Matrix{Float64}}
means ::Vector{Float64}
group ::Vector
levels ::Vector
group_residuals ::Vector
end
#Helper functions

# calculate distances from medians
function get_residuals(x,c)
d = x .-c
sum(d .^2,dims =2)
end

# doube center matrix
function double_center(D)
A = D .- mean(D,dims = 1)
return A .- mean(A,dims = 2)
end

# calculate medians
# Get spatial median as a row vector
geo_median(X) = transpose(DirectionalStatistics.geometric_median([row for row in eachrow(X)]))
#apply `geo_median` function to get positive_inds and negative_inds median for each group
function geo_median(pco_space, group, positive_inds)

spMedPos = [geo_median(pco_space[group .== g,positive_inds]) for g in unique(group)]
spMedNeg = [geo_median(pco_space[group .== g, .!positive_inds]) for g in unique(group)]

return (spMedPos,spMedNeg )
end
function geo_median(pco_space, group)

return [geo_median(pco_space[group .== g,:]) for g in unique(group)]
end

function dispersion(D,group;metric = false)
"""
Quantify the dispersion around the group spatial median for each group in D, according to the given distance metric.
Once X is converted to a distance matrix, it is then transformed by principal coordinate analysis before medians and
distances are calculated. The algorithm is based on Anderson (2006).

The function accepts:
D: A dissimiarity or distance matrix
group: A vector containing group labels
metric: (key word) boolean value specifying whether or not the distance used used metric

This function returns a named tuple containing:
F = Global F-Statistic
pairwise_F = pairwise F-statistics
medians = spatial (aka geometric) median of each group in the transformed coordinates
residuals = vector containing each observations distance from its group median
means = vector containing the means distance to median for each group
group = the original grouping vector
levels = unique items from 'group'

Anderson, M.J. (2006) Distance-based tests for homogeneity of multivariate dispersions. Biometrics 62(1), 245--253.
"""
levels = sort(unique(group))

if metric == false
# Double center the matrix.
A = double_center(D .^2 )
vals, vecs = eigen(Hermitian(-A/2))
pco_space = vecs * diagm(sqrt.(abs.(vals)))
# record indices corresponding to postitive eigenvalues
positive_inds = real.(vals) .> 0.0
med = geo_median(pco_space,ones(size(pco_space)[1]),positive_inds)
medians = geo_median(pco_space, group, positive_inds)
# calculate distances (to median) for "positive_inds" and "negative_inds" vectors separately. in orer to
# subtract "negative_inds" from "positive_inds". See Anderson (2006) for details.
dis_pos = [get_residuals(pco_space[group .== levels[i],positive_inds], medians[1][i]) for i in 1:length(levels)]
dis_neg = [get_residuals(pco_space[group .== levels[i], .!positive_inds], medians[2][i]) for i in 1:length(levels)]
# Where dis_neg > dis_pos, set residual = 0 by taking only the real part
# of the square root of a negative.
residuals = [(real.(sqrt.(Complex.(dis_pos[i] .- dis_neg[i]))) ) for i in 1:length(dis_pos)]
group_dis_pos = get_residuals(vcat(medians[1]...), med[1][1])
group_dis_neg = get_residuals(vcat(medians[2]...), med[2][1])
group_residuals = [(real.(sqrt.(Complex.(group_dis_pos[i] .- group_dis_neg[i]))) ) for i in 1:length(group_dis_pos)]

else
medians = geo_median(D, group)
med = geo_median(D,ones(size(D)[1]))
residuals = [get_residuals(D[group .== levels[i],:], medians[i]) for i in 1:length(levels)]
group_residuals = vec(get_residuals(vcat(medians...), med[1]))
end
F = f(residuals)
F_pairs = f_pairs(residuals)
means = AxisArray(mean.(residuals),string.(levels))
return Disp(F, F_pairs, medians, residuals, means,group, levels,group_residuals)
end

107 changes: 107 additions & 0 deletions src/Testing/permutation_tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@

function permutest(disp ::Disp,n_perm = 10000)
"""
This function permutes the residuals returned by 'dispersion' to provide global and pairwise
P-values. Currently, no corrections are made for multiple testing. MultipleTesting.jl provides
this functionality, if desired

The function accepts:
disp: The named tuple returned by 'dispersion'
n_perm: the number of permutations

This function returns a named tuple containing:
P = Global P-value
pairwise_P = pairwise P-value
F = Global F-Statistic
pairwise_F = pairwise F-statistics

https://github.com/juliangehring/MultipleTesting.jl
"""
# read in values from `disp`
F = disp.F
R = disp.residuals
F_pairs = disp.pairwise_F
group = disp.group
levels = disp.levels
level_names = string.(levels)
inds = [group .== level for level in levels]
n = length(levels)

# pre-calculate values for ANOVA function
r = vcat(R...)
X = vcat(R...)
N = length(X)
nj = Tuple(length.(R))
X̅ = mean(X)
k = length(R)
# generate empty containers for F values
perm = Vector{Float64}(undef,n_perm)
#rs = copy(R)#Vector{Vector}(undef,n)
# run permutation
@inbounds for p in 1:n_perm
shuffle!(r)
rs = [view(r,inds[i]) for i in 1:n]

perm[p] =f(rs,N,nj,X̅,k)
end
ppairs =zeros(k,k)

@inbounds for i in 1:k-1
for j in i+1:k
r_1 = r[group .== levels[i]]
r_2 = r[group .== levels[j]]
pstat =permutest(r_1,r_2, F_pairs[j,i],n_perm)
ppairs[j,i] = pstat
end
end
# calculate P-values and return P and F values
P = sum(perm .> F)/n_perm
P_pairs = AxisArray(ppairs,level_names,level_names)
F_pairs = AxisArray(LowerTriangular(F_pairs), level_names,level_names)

return (P =P,pairwise_P = P_pairs,F = F, pairwise_F =F_pairs)
end

function permutest(r_1,r_2 ,F,n_perm = 1000)
r = vcat(r_1,r_2)
nj = (length(r_1),length(r_2))
n = 2
N = sum(nj)
# pre-calculate values for ANOVA function
X̅ = mean(r)
k = 2
inds = (1:nj[1],1:nj[2])
# generate empty containers for F values
perm = Vector{Float64}(undef,n_perm)
# run permutation
@inbounds for p in 1:n_perm
shuffle!(r)
rs = (view(r,inds[1]) ,view(r,inds[2]) )
perm[p] =f(rs,N,nj,X̅,k)
end
P = sum(perm .> F)/n_perm
return P
end


get_tri(x) = [x[i,j] for i in 2:size(x)[1] for j in 1:i]
function mantel(x_1,x_2, n_perm = 1000)
"""
A permutation based test of the correlation between two matrices. Given two dissimilarity/distance matrices of equal size, calculates the P-value as
the probability of obtaining the same or greater correlation coefficient after permuting one of the matrices.
"""
x = get_tri(x_1)
y = get_tri(x_2)
perm_c = Vector{Float64}(undef,n_perm)

c = cor(x,y)
@inbounds for i in 1:n_perm
shuffle!(y)
perm_c[i] = cor(x,y)
end

sum(perm_c .> c)/n_perm
end