EcoJulia · EvoArt · Apr 25, 2022 · Apr 25, 2022 · Apr 25, 2022 · Apr 25, 2022
diff --git a/Project.toml b/Project.toml
@@ -6,6 +6,7 @@ version = "0.5.6"
 [deps]
 AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DirectionalStatistics = "e814f24e-44b0-11e9-2fd5-aba2b6113d95"
 EcoBase = "a58aae7d-b440-5a11-b283-399458f99aac"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -24,6 +25,7 @@ RCall = "0.13"
 RecipesBase = "0.6, 0.7, 0.8, 1"
 Requires = "^1"
 StatsBase = "0.32, 0.33"
+DirectionalStatistics = "0.1.0 - 1.0"
 julia = "^1"
 
 [extras]

diff --git a/src/Diversity.jl b/src/Diversity.jl
@@ -190,6 +190,17 @@ export hillnumber
 
 end # sub-module Hill
 
+"""
+    Diversity.Testing
+
+Hypothesis tests for distance matrices.
+"""
+module Testing
+
+include("Testing.jl")
+export dispersion, permutest, mantel
+end
+
 # Path into package
 path(path...; dir::String = "test") = joinpath(@__DIR__, "..", dir, path...)
 

diff --git a/src/Testing.jl b/src/Testing.jl
@@ -0,0 +1,6 @@
+
+using LinearAlgebra,  Random, DirectionalStatistics,AxisArrays
+import Statistics: mean
+include("Testing/ANOVA.jl")
+include("Testing/dispersion.jl")
+include("Testing/permutation_tests.jl")
diff --git a/src/Testing/ANOVA.jl b/src/Testing/ANOVA.jl
@@ -0,0 +1,67 @@
+
+# Compute F-statistic, given vector of vectors
+#ANOVA
+function f(R)
+    X = vcat(R...)
+    N = length(X)
+    nj = length.(R)
+    X̅ = mean(X)
+    X̅j = mean.(R)
+    k = length(R)
+    top = sum(nj .*(X̅j .- X̅) .^2) /(k-1)
+    bottom = sum([sum((R[i] .- X̅j[i]) .^2) for i in 1:k])/(N-k)
+    return top/bottom
+end
+
+function f(R,N,nj,X̅,k)
+    X̅j = mean.(R)
+    top = 0.0
+    bottom = 0.0
+    @inbounds for i in 1:k
+        top += nj[i] * (X̅j[i] - X̅)^2
+        for j in 1:nj[i]
+            bottom += (R[i][j] - X̅j[i])^2
+        end
+    end
+    top/= (k-1)
+    bottom /= (N-k)
+    return top/bottom
+end
+
+#Pairwise ANOVA
+function f_pairs(R) 
+    n = length(R)
+    F = zeros(n,n)
+    for j in 1:n-1
+        for i in j+1:n
+            F[i,j] = f([R[i],R[j]])
+        end
+    end
+    return F
+end
+function f_pairs(R ::Vector,N ::Array{Int},nj ::Matrix{Tuple},X̅ ::Matrix{Float64}) 
+    n = length(R)
+    F = zeros(n,n)
+    for j in 1:n-1
+        for i in j+1:n
+            F[i,j] = f([R[i],R[j]],N[i,j],nj[i,j],X̅[i,j],2)
+        end
+    end
+    return F
+end
+# get parameter values corresponding to each pairwise combination of groups
+function get_pars(R)
+    n = length(R)
+    N_p = Array{Int}(undef,n,n)
+    nj_p= Array{Tuple}(undef,n,n)
+    X̅_p = Array{Float64}(undef,n,n)
+
+    for j in 1:n-1
+        for i in j+1:n
+            N_p[i,j] = length(R[i]) + length(R[j])
+            nj_p[i,j] = (length(R[i]) , length(R[j]))
+            X̅_p[i,j] = mean(vcat(R[i]...,R[j]...))
+        end
+    end
+    return N_p,nj_p,X̅_p
+end
diff --git a/src/Testing/dispersion.jl b/src/Testing/dispersion.jl
@@ -0,0 +1,98 @@
+# structs
+
+struct Disp
+    F ::Float64
+    pairwise_F ::Array{Float64} 
+    medians ::Union{Tuple,Vector}
+    residuals ::Vector{Matrix{Float64}} 
+    means ::Vector{Float64}
+    group  ::Vector
+    levels  ::Vector
+    group_residuals ::Vector
+end
+#Helper functions
+
+# calculate distances from medians
+function get_residuals(x,c)
+    d = x .-c
+    sum(d .^2,dims =2) 
+end
+
+# doube center matrix
+function double_center(D)
+    A = D .- mean(D,dims = 1)
+    return A .- mean(A,dims = 2) 
+end
+
+# calculate medians 
+# Get spatial median as a row vector
+geo_median(X) = transpose(DirectionalStatistics.geometric_median([row for row in eachrow(X)]))
+#apply `geo_median` function to get positive_inds and negative_inds median for each group
+function geo_median(pco_space, group, positive_inds)
+
+    spMedPos = [geo_median(pco_space[group .== g,positive_inds]) for g in unique(group)]
+    spMedNeg = [geo_median(pco_space[group .== g, .!positive_inds]) for g in unique(group)]
+
+    return (spMedPos,spMedNeg )
+end
+function geo_median(pco_space, group)
+
+    return [geo_median(pco_space[group .== g,:]) for g in unique(group)]
+end
+
+function dispersion(D,group;metric = false)
+    """
+    Quantify the dispersion around the group spatial median for each group in D, according to the given distance metric.
+    Once X is converted to a distance matrix, it is then transformed by principal coordinate analysis before medians and 
+    distances are calculated. The algorithm is based on Anderson (2006). 
+
+    The function accepts:
+        D: A dissimiarity or distance matrix
+        group: A vector containing group labels
+        metric: (key word) boolean value specifying whether or not the distance used used metric
+
+    This function returns a named tuple containing:
+        F = Global F-Statistic 
+        pairwise_F = pairwise F-statistics
+        medians = spatial (aka geometric) median of each group in the transformed coordinates
+        residuals = vector containing each observations distance from its group median
+        means = vector containing the means distance to median for each group
+        group = the original grouping vector
+        levels = unique items from 'group'
+
+    Anderson, M.J. (2006) Distance-based tests for homogeneity of multivariate dispersions. Biometrics 62(1), 245--253.
+    """
+    levels = sort(unique(group))
+
+    if metric == false
+        # Double center the matrix.
+        A = double_center(D .^2 ) 
+        vals, vecs = eigen(Hermitian(-A/2))
+        pco_space = vecs * diagm(sqrt.(abs.(vals))) 
+        # record indices corresponding to postitive eigenvalues
+        positive_inds = real.(vals) .> 0.0 
+        med = geo_median(pco_space,ones(size(pco_space)[1]),positive_inds)
+        medians = geo_median(pco_space, group, positive_inds)
+        # calculate distances (to median) for "positive_inds" and "negative_inds" vectors separately. in orer to 
+        # subtract "negative_inds" from "positive_inds". See Anderson (2006) for details.
+        dis_pos = [get_residuals(pco_space[group .== levels[i],positive_inds], medians[1][i]) for i in 1:length(levels)]
+        dis_neg = [get_residuals(pco_space[group .== levels[i], .!positive_inds], medians[2][i]) for i in 1:length(levels)]
+        # Where dis_neg > dis_pos, set residual = 0 by taking only the real part 
+        # of the square root of a negative. 
+        residuals = [(real.(sqrt.(Complex.(dis_pos[i] .- dis_neg[i]))) )   for i in 1:length(dis_pos)] 
+        group_dis_pos = get_residuals(vcat(medians[1]...), med[1][1]) 
+        group_dis_neg = get_residuals(vcat(medians[2]...), med[2][1])
+        group_residuals = [(real.(sqrt.(Complex.(group_dis_pos[i] .- group_dis_neg[i]))) )   for i in 1:length(group_dis_pos)] 
+
+    else
+        medians = geo_median(D, group)
+        med = geo_median(D,ones(size(D)[1]))
+        residuals = [get_residuals(D[group .== levels[i],:], medians[i]) for i in 1:length(levels)]
+        group_residuals = vec(get_residuals(vcat(medians...), med[1])) 
+    end
+    F = f(residuals)
+    F_pairs = f_pairs(residuals)
+    means = AxisArray(mean.(residuals),string.(levels))
+    return Disp(F, F_pairs, medians, residuals, means,group, levels,group_residuals)
+end
+
diff --git a/src/Testing/permutation_tests.jl b/src/Testing/permutation_tests.jl
@@ -0,0 +1,107 @@
+
+function permutest(disp ::Disp,n_perm = 10000)
+    """
+    This function permutes the residuals returned by 'dispersion' to provide global and pairwise
+    P-values. Currently, no corrections are made for multiple testing. MultipleTesting.jl provides
+    this functionality, if desired
+
+    The function accepts:
+        disp: The named tuple returned by 'dispersion'
+        n_perm: the number of permutations
+
+    This function returns a named tuple containing:
+        P = Global P-value
+        pairwise_P = pairwise P-value
+        F = Global F-Statistic 
+        pairwise_F = pairwise F-statistics
+
+    https://github.com/juliangehring/MultipleTesting.jl
+    """
+    # read in values from `disp`
+    F = disp.F
+    R = disp.residuals
+    F_pairs = disp.pairwise_F
+    group = disp.group
+    levels = disp.levels
+    level_names = string.(levels)
+    inds = [group .== level for level in levels]
+    n = length(levels)
+
+    # pre-calculate values for ANOVA function
+    r = vcat(R...)
+    X = vcat(R...)
+    N = length(X)
+    nj = Tuple(length.(R))
+    X̅ = mean(X)
+    k = length(R)
+    # generate empty containers for F values
+    perm = Vector{Float64}(undef,n_perm)  
+    #rs = copy(R)#Vector{Vector}(undef,n)  
+    # run permutation
+    @inbounds for p in 1:n_perm
+        shuffle!(r)
+        rs = [view(r,inds[i]) for i in 1:n]
+
+        perm[p] =f(rs,N,nj,X̅,k)
+    end
+    ppairs =zeros(k,k)
+
+    @inbounds for i in 1:k-1
+        for j in i+1:k
+            r_1 = r[group .== levels[i]]
+            r_2 = r[group .== levels[j]]
+            pstat =permutest(r_1,r_2, F_pairs[j,i],n_perm)
+            ppairs[j,i] = pstat
+        end
+    end
+    # calculate P-values and return P and F values
+    P = sum(perm .> F)/n_perm
+    P_pairs = AxisArray(ppairs,level_names,level_names)
+    F_pairs = AxisArray(LowerTriangular(F_pairs), level_names,level_names)
+
+    return (P =P,pairwise_P = P_pairs,F = F, pairwise_F =F_pairs)
+end
+
+function permutest(r_1,r_2 ,F,n_perm = 1000)
+    r = vcat(r_1,r_2)  
+    nj = (length(r_1),length(r_2))
+    n = 2
+    N = sum(nj)
+    # pre-calculate values for ANOVA function
+    X̅ = mean(r)
+    k = 2
+    inds = (1:nj[1],1:nj[2])
+    # generate empty containers for F values
+    perm = Vector{Float64}(undef,n_perm) 
+    # run permutation
+    @inbounds for p in 1:n_perm
+        shuffle!(r)
+        rs = (view(r,inds[1]) ,view(r,inds[2]) )
+        perm[p] =f(rs,N,nj,X̅,k)
+    end  
+    P = sum(perm .> F)/n_perm
+    return P
+end
+
+
+get_tri(x) = [x[i,j] for i in 2:size(x)[1] for j in 1:i]
+function mantel(x_1,x_2, n_perm = 1000)
+    """
+    A permutation based test of the correlation between two matrices. Given two dissimilarity/distance matrices of equal size, calculates the P-value as
+    the probability of obtaining the same or greater correlation coefficient after permuting one of the matrices.
+    """
+    x = get_tri(x_1)
+    y = get_tri(x_2)
+    perm_c = Vector{Float64}(undef,n_perm)
+
+    c = cor(x,y)
+    @inbounds for i in 1:n_perm
+        shuffle!(y)
+        perm_c[i] = cor(x,y)
+    end
+
+    sum(perm_c .> c)/n_perm
+end
+
+
+