Skip to content

Commit

Permalink
Merge pull request #17 from kescobo/testcov
Browse files Browse the repository at this point in the history
Improve Tests
  • Loading branch information
kescobo authored Apr 2, 2018
2 parents 93ce37f + f440526 commit c89b250
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 28 deletions.
1 change: 1 addition & 0 deletions src/Microbiome.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export
hclustplot,
annotationbar,
## utils
metaphlan_import,
panphlan_calcs,
bysample,
taxfilter,
Expand Down
1 change: 1 addition & 0 deletions src/abundances.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ abundancetable(table::AbstractArray{T,2},
species = ["feature_$x" for x in indices(table, 1)]) where T<:Real =
ComMatrix(Float64.(table), species, site)


"""
Filter an abundance table to the top `n` features accross all samples
Expand Down
45 changes: 28 additions & 17 deletions src/biobakery_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,30 @@ end
#==============
MetaPhlAn Utils
==============#
const taxlevels = Dict([
:kingom => 1,
:phylum => 2,
:class => 3,
:order => 4,
:family => 5,
:genus => 6,
:species => 7,
:subspecies => 8])

function metaphlan_import(path::String; level=0, shortnames::Bool=true)
df = readtable(path)
for n in names(df)
df[n] = coalesce.(df[n], 0)
end

if typeof(level) <: Symbol
in(level, keys(taxlevels)) || error("$level not a valid taxonomic level")
level = taxlevels[level]
end

level > 0 && taxfilter!(df, level, shortnames=shortnames)
return abundancetable(df)
end

"""
taxfilter!(df::DataFrame, level::Int=7; shortnames::Bool=true)
Expand All @@ -57,40 +81,27 @@ Filter a MetaPhlAn table (df) to a particular taxon level.
If shortnames is true (default), also changes names in the first column to
remove higher order taxa
"""
function taxfilter!(taxonomic_profile::DataFrames.DataFrame, level::Int; shortnames::Bool=true)
taxonomic_profile = taxonomic_profile[length.(
split.(taxonomic_profile[1], '|')) .== level, :]
function taxfilter!(taxonomic_profile::DataFrames.DataFrame, level::Int=7; shortnames::Bool=true)
filter!(row->length(split(row[1], '|')) == level, taxonomic_profile)
if shortnames
matches = collect.(eachmatch.(r"[kpcofgs]__(\w+)", taxonomic_profile[1]))
taxonomic_profile[1] = String.([m[level].captures[1] for m in matches])
end
return taxonomic_profile
end

taxfilter!(tp::DataFrames.DataFrame) = taxfilter(tp, 7)

function taxfilter!(taxonomic_profile::DataFrames.DataFrame, level::Symbol; shortnames::Bool=true)
taxlevels = Dict([
:kingom => 1,
:phylum => 2,
:class => 3,
:order => 4,
:family => 5,
:genus => 6,
:species => 7,
:subspecies => 8])
in(level, keys(taxlevels)) || error("$level not a valid taxonomic level")
taxfilter!(taxonomic_profile, taxlevels[level], shortnames=shortnames)
end


function taxfilter(taxonomic_profile::DataFrames.DataFrame, level::Int; shortnames::Bool=true)
function taxfilter(taxonomic_profile::DataFrames.DataFrame, level::Int=7; shortnames::Bool=true)
filt = deepcopy(taxonomic_profile)
taxfilter!(filt, level, shortnames=shortnames)
return filt
end

taxfilter(tp::DataFrames.DataFrame) = taxfilter(tp, 7)

function taxfilter(taxonomic_profile::DataFrames.DataFrame, level::Symbol; shortnames::Bool=true)
filt = deepcopy(taxonomic_profile)
taxfilter!(filt, level, shortnames=shortnames)
Expand Down
6 changes: 3 additions & 3 deletions src/distances.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,6 @@ function getdelta(A::AbstractArray{T,2}) where T <: AbstractFloat
end


eigenvalue(p::PCoA, n::Int) = p.eigenvalues[n]
principalcoord(p::PCoA, n::Int) = [p[i,n] for i in 1:size(p,1)]
variance(p::PCoA, n::Int) = p.variance_explained[n]
eigenvalue(p::PCoA, inds...) = p.eigenvalues[inds...]
variance(p::PCoA, inds...) = p.variance_explained[inds...]
principalcoord(p::PCoA, inds...) = p[:,inds...]
4 changes: 2 additions & 2 deletions src/leafordering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ Based on:
[Bar-Joseph et. al. "Fast optimal leaf ordering for hierarchical clustering." _Bioinformatics_. (2001)](https://doi.org/10.1093/bioinformatics/17.suppl_1.S22)
"""
function optimalorder(hc::Hclust, dm::Array{Float64,2})
ord = copy(hc.order)
orderleaves!(ord, hc, dm)
ord = deepcopy(hc)
optimalorder!(ord, dm)
return ord
end

Expand Down
8 changes: 4 additions & 4 deletions src/plotting.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
@recipe function f(pc::PCoA)
xticks := false
yticks := false
xlabel --> "PCo1 ($(round(pc.variance_explained[1] * 100, 2))%)"
ylabel --> "PCo2 ($(round(pc.variance_explained[2] * 100, 2))%)"
xlabel --> "PCo1 ($(round(variance(pc, 1) * 100, 2))%)"
ylabel --> "PCo2 ($(round(variance(pc, 2) * 100, 2))%)"
seriestype := :scatter
principalcoord(pc, 1), principalcoord(pc,2)
end
Expand All @@ -13,7 +13,7 @@ end
typeof(abun) <: AbstractComMatrix || error("AbundancePlot not defined for $(typeof(abun))")

topabund = min(topabund, nfeatures(abun))
in(sorton, [:top, :hclust, Symbol.(samplenames(abun))...]) || error("invalid sorton option") #replace `, abun.samples...` in the Array, but the code only handles :top and :hclust below anyway
in(sorton, [:top, :hclust, Symbol.(samplenames(abun))...]) || error("invalid sorton option")
2 <= topabund < 12 || error("n must be between 2 and 12")

top = filterabund(abun, topabund)
Expand All @@ -27,7 +27,7 @@ end
hc = hclust(DM, :single)
srt = hc.order
else
error("invalid sorton option")
error("invalid sorton option") # TODO: add feature-specific sorting
end

bar_position := :stack
Expand Down
43 changes: 43 additions & 0 deletions test/metaphlan_test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#SampleID sample1_taxonomic_profile sample2_taxonomic_profile sample3_taxonomic_profile sample4_taxonomic_profile sample5_taxonomic_profile sample6_taxonomic_profile sample7_taxonomic_profile
k__Archaea 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii|t__Methanobrevibacter_smithii_unclassified 0 0 0 0 0 0 14.13558
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_unclassified 0 0 0 0 0 0 0
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera 0 0 0 0 0 0 0
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera|s__Methanosphaera_stadtmanae 0 0 0 0 0 0 0
k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera|s__Methanosphaera_stadtmanae|t__GCF_000012545 0 0 0 0 0 0 0
k__Bacteria 100 100 99.47661 95.82479 100 99.22118 85.40265
k__Bacteria|p__Actinobacteria 4.0393 11.0838 19.29022 5.54708 32.80592 2.53519 1.72918
k__Bacteria|p__Actinobacteria|c__Actinobacteria 4.0393 11.0838 19.29022 5.54708 32.80592 2.53519 1.72918
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales 0.01631 0.28592 0.08487 0.03942 0.03655 0 0.18847
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae 0.00884 0.24289 0.05324 0.01759 0.02901 0 0.03716
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces 0.00884 0.24289 0.05324 0.01759 0.02901 0 0.03716
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_cardiffensis 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_cardiffensis|t__GCF_000364865 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_graevenitzii 0 0.00491 0.02464 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_graevenitzii|t__Actinomyces_graevenitzii_unclassified 0 0.00491 0.02464 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_johnsonii 0 0.02467 0.00706 0.00329 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_johnsonii|t__Actinomyces_johnsonii_unclassified 0 0.02467 0.00706 0.00329 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_massiliensis 0 0.04506 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_massiliensis|t__Actinomyces_massiliensis_unclassified 0 0.04506 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_naeslundii 0 0.00712 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_naeslundii|t__GCF_000285995 0 0.00712 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_odontolyticus 0.00796 0.08499 0.01431 0.0143 0.02901 0 0.0004
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_odontolyticus|t__Actinomyces_odontolyticus_unclassified 0.00796 0.08499 0.01431 0.0143 0.02901 0 0.0004
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_oris 0 0.04157 0 0 0 0 0.00967
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_oris|t__GCF_000180155 0 0.04157 0 0 0 0 0.00967
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_sp_HPA0247 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_sp_HPA0247|t__GCF_000411415 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_sp_ICM39 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_sp_ICM39|t__GCF_000282935 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_sp_ICM47 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_sp_ICM47|t__GCF_000278725 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_turicensis 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_turicensis|t__GCF_000296505 0 0 0 0 0 0 0
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_viscosus 0.00088 0.03457 0.00722 0 0 0 0.02709
k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinomyces|s__Actinomyces_viscosus|t__GCF_000175315 0.00088 0.03457 0.00722 0 0 0 0.02709
53 changes: 51 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
using Microbiome
using Distances
using DataFrames
using Clustering
using Colors
using StatPlots
using Base.Test

@testset "Abundances" begin
Expand Down Expand Up @@ -57,10 +60,20 @@ using Base.Test
end

@test featurenames(filt)[end] == "other"

# Plotting

@test typeof(abundanceplot(abund, topabund=5)) <: Plots.Plot
@test_skip typeof(abundanceplot(abund, sorton=:hclust)) <: Plots.Plot # Needs BrayCurtis()
@test_skip typeof(abundanceplot(abund, sorton=:x1)) <: Plots.Plot # Needs method feature sorting

@test typeof(annotationbar(parse.(Color, ["red", "white", "blue"]))) <: Plots.Plot

end

@testset "Distances" begin
# Constructors
srand(1)
M = rand(100, 10)
df = hcat(DataFrame(x=collect(1:100)), DataFrame(M))
abund = abundancetable(
Expand All @@ -87,8 +100,44 @@ end
# PCoA
p = pcoa(dm, correct_neg=true)
@test sum(p.variance_explained) 1
for i in p.eigenvalues; @test i > 0; end
for i in 1:size(p, 2)
@test eigenvalue(p, i) > 0
@test typeof(eigenvalue(p, i)) <: Real
end

@test sum([variance(p, i) for i in 1:size(p,2)]) 1
@test sort(variance(p, 1:size(p,2)), rev=true) == variance(p, 1:size(p,2))

@test length(principalcoord(p, 1)) == size(dm, 1)
for i in 1:9; @test typeof(eigenvalue(p, i)) <: Real; end
@test principalcoord(p, 1:size(p,2)) == p.eigenvectors

# Plotting
@test typeof(plot(p)) <: Plots.Plot
@test typeof(plot(p)) <: Plots.Plot

end

@testset "Leaf Ordering" begin
srand(42)
m = rand(100, 10)

dm = pairwise(Jaccard(), m)
h = hclust(dm, :single);

ordered = optimalorder(h, dm)

@test ordered.order == [7, 3, 1, 9, 2, 6, 10, 4, 5, 8]
@test ordered.merge == h.merge

# Plotting

@test typeof(hclustplot(ordered)) <: Plots.Plot

end

@testset "Biobakery Utilities" begin
abund = metaphlan_import("metaphlan_test.tsv", level=:species, shortnames=true)

@test typeof(abund) <: ComMatrix
@test size(abund) == (15, 7)
end

0 comments on commit c89b250

Please sign in to comment.