Skip to content

Commit

Permalink
reslove conflict in pc methods #140
Browse files Browse the repository at this point in the history
Merge branch 'master' into aqpdf

# Conflicts:
#	man/profile_compare-methods.Rd
  • Loading branch information
brownag committed Jun 19, 2020
2 parents ce41cab + 0a7c354 commit 7c3fe66
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 114 deletions.
2 changes: 2 additions & 0 deletions R/profile_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@ pc.SPC <- function(s, vars, rescale.result=FALSE, ...){
## this is only a problem when using profile IDs that are numeric and not stable when alpha-sorted
##
## However, this will result in changes to sorting of profile_id(), @site, @horizon
##
## Also, this will break with merge of aqpdf branch were sorting by ID is not performed at init time
s <- rebuildSPC(s)


Expand Down
2 changes: 2 additions & 0 deletions man/ROSETTA.centroids.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Theoretical water-retention parameters for uniform soil material of each texture

\references{
van Genuchten, M.Th. (1980). "A closed-form equation for predicting the hydraulic conductivity of unsaturated soils". Soil Science Society of America Journal. 44 (5): 892-898.

Schaap, M.G., F.J. Leij, and M.Th. van Genuchten. 2001. ROSETTA: A computer program for estimating soil hydraulic parameters with hierarchical pedotransfer functions. Journal of Hydrology 251(34): 163-176.
}

\source{
Expand Down
103 changes: 43 additions & 60 deletions man/profile_compare-methods.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -73,117 +73,98 @@ Our approach builds on the work of (Moore, 1972) and the previously mentioned de

\seealso{\code{\link{slice}}, \code{\link{daisy}}}
\examples{
\donttest{
## 1. check out the influence depth-weight coef:
require(lattice)
library(lattice)

z <- rep(1:100,4)
k <- rep(c(0,0.1,0.05,0.01), each=100)
w <- 100*exp(-k*z)

xyplot(z ~ w, groups=k, ylim=c(105,-5), xlim=c(-5,105), type='l',
ylab='Depth', xlab='Weighting Factor',
ylab='Depth', xlab='Weighting Factor', asp=1.5,
auto.key=list(columns=4, lines=TRUE, points=FALSE, title="k", cex=0.8, size=3),
panel=function(...) {
panel.grid(h=-1,v=-1)
panel.superpose(...)
}
)

## 2. basic implementation, requires at least two properties
# implementation for a data.frame class object
data(sp1)
d <- profile_compare(sp1, vars=c('prop','group'), max_d=100, k=0.01,
plot.depth.matrix=TRUE)

# upgrade to SoilProfileCollection
depths(sp1) <- id ~ top + bottom
op <- par(mfrow=c(1,2))
# perform comparison on SoilProfileCollection object
# compare soil/non-soil matrix plot
d <- profile_compare(sp1, vars=c('prop','group'), max_d=100, k=0.01,
plot.depth.matrix=TRUE)

# plot profile collection
plot(sp1)
# annotate max depth of profile comparison
abline(h=100, col='red', lty=2)
par(op)


# more soil properties
data(sp2)
depths(sp2) <- id ~ top + bottom

d.1 <- profile_compare(sp2, vars=c('prop','field_ph','hue','value'),
max_d=100, k=0.01, plot.depth.matrix=TRUE)
max_d=100, k=0.01, plot.depth.matrix=TRUE)

# add some missing data:
sp2$prop[1:2] <- NA
d.2 <- profile_compare(sp2, vars=c('prop','field_ph','hue','value'),
max_d=100, k=0.01, plot.depth.matrix=TRUE)
max_d=100, k=0.01, plot.depth.matrix=TRUE)

# note small changes in D:
cor(d.1, d.2)

## 3. identify profiles within a collection that contain all NAs
require(plyr)
s <- ldply(1:10, random_profile)
depths(s) <- id ~ top + bottom
set.seed(1010101)
s <- union(lapply(letters[1:10], random_profile, SPC=TRUE))

# replace first profile's data with NA
na.required <- nrow(s[1, ])
s$p1[1:na.required] <- NA
s$p2[1:na.required] <- NA

# attempt profile comparison: this won't work, throws an error
# d <- profile_compare(s, vars=c('p1','p2'), max_d=100, k=0)
d <- profile_compare(s, vars=c('p1','p2'), max_d=100, k=0)

# check for soils that are missing all clay / total RF data
f.check.NA <- function(i) length(which(is.na(i$p1) | is.na(i$p2))) / nrow(i) == 1
missing.too.much.data.idx <- which(profileApply(s, f.check.NA))



# remove bad profiles and try again: works

## AGB: this says the IDs are out of order... but I think it is probably
## mis-calculating... See:
##
## s[-missing.too.much.data.idx, ]$id
## site(s[-missing.too.much.data.idx, ])$id
## spc_in_sync(s)
## spc_in_sync(s[-missing.too.much.data.idx,])

# s.no.na <- profile_compare(s[-missing.too.much.data.idx, ], vars=c('p1','p2'),
# s.no.na <- profile_compare(s[-missing.too.much.data.idx, ],
# vars=c('p1','p2'),
# max_d=100, k=0, plot.depth.matrix=TRUE)

## 4. better plotting of dendrograms with ape package:
if(require(ape) & require(cluster) & require(MASS)) {
h <- diana(d)
p <- as.phylo(as.hclust(h))
plot(ladderize(p), cex=0.75, label.offset=1)
tiplabels(col=cutree(h, 3), pch=15)

## 5. other uses of the dissimilarity matrix
# Sammon Mapping: doesn't like '0' values in dissimilarity matrix
d.sam <- sammon(d)

# simple plot
dev.off() ; dev.new()
plot(d.sam$points, type = "n", xlim=range(d.sam$points[,1] * 1.5))
text(d.sam$points, labels=row.names(as.data.frame(d.sam$points)),
cex=0.75, col=cutree(h, 3))


data(sp2)
depths(sp2) <- id ~ top + bottom
site(sp2) <- ~ surface

d <- profile_compare(sp2, vars=c('prop','field_ph','hue','value'),
max_d=100, k=0)

h <- diana(d)
p <- as.phylo(as.hclust(h))
plot(p, show.tip.label=FALSE)
tiplabels(sp2$surface, col=cutree(h, 3), bg=NA, cex=0.75)

## 5. other uses of the dissimilarity matrix
# Sammon Mapping: doesn't like '0' values in dissimilarity matrix
d.sam <- sammon(d)

# simple plot
dev.off() ; dev.new()
plot(d.sam$points, type = "n", xlim=range(d.sam$points[,1] * 1.5))
text(d.sam$points, labels=row.names(as.data.frame(d.sam$points)),
cex=0.75, col=cutree(h, 3))

}


## 6. try out the 'sample_interval' argument
# compute using sucessively larger sampling intervals
data(sp3)
d <- profile_compare(sp3, vars=c('clay','cec','ph'),
max_d=100, k=0.01)
max_d=100, k=0.01)
d.2 <- profile_compare(sp3, vars=c('clay','cec','ph'),
max_d=100, k=0.01, sample_interval=2)
max_d=100, k=0.01, sample_interval=2)
d.10 <- profile_compare(sp3, vars=c('clay','cec','ph'),
max_d=100, k=0.01, sample_interval=10)
max_d=100, k=0.01, sample_interval=10)
d.20 <- profile_compare(sp3, vars=c('clay','cec','ph'),
max_d=100, k=0.01, sample_interval=20)
max_d=100, k=0.01, sample_interval=20)

# check the results via hclust / dendrograms
oldpar <- par(mfcol=c(1,4), mar=c(2,1,2,2))
Expand All @@ -192,6 +173,8 @@ plot(as.dendrogram(hclust(d.2)), horiz=TRUE, main='Every 2nd Depth Slice')
plot(as.dendrogram(hclust(d.10)), horiz=TRUE, main='Every 10th Depth Slice')
plot(as.dendrogram(hclust(d.20)), horiz=TRUE, main='Every 20th Depth Slice')
par(oldpar)

}
}


Expand Down
43 changes: 14 additions & 29 deletions misc/test_suite/check_profile_compare.R
Original file line number Diff line number Diff line change
@@ -1,33 +1,18 @@
# check results via MDS
library(MASS)
s <- sammon(d)
s.2 <- sammon(d.2)
s.10 <- sammon(d.10)
s.20 <- sammon(d.20)
library(aqp)
library(sharpshootR)
library(cluster)

# check
plot(s$points, axes=FALSE, xlab='', ylab='') ; box()
points(s.2$points, col=2, cex=0.75)
points(s.10$points, col=4, cex=0.5)
points(s.20$points, col=5, cex=0.25)

# convert to mostly consistent scale
sp <- scale(s$points)
sp.2 <- scale(s.2$points)
sp.10 <- scale(s.10$points)
sp.20 <- scale(s.20$points)
# this has been a problem for years, alpha-sorting changes ordering
set.seed(1010101)
s <- union(lapply(letters[1:10], random_profile, n_prop=3, method='LPP', SPC=TRUE))

# label mid-points
label.pos <- rbind(
(sp.2 + sp) / 2,
(sp.10 + sp.2) / 2,
(sp.20 + sp.10) / 2
)
pr <- princomp(horizons(s)[, c('p1', 'p2', 'p3')], cor = TRUE)
s$pc.1 <- pr$scores[, 1]


d <- profile_compare(s, vars=c('p1','p2', 'p3'), max_d=100, k=0, rescale.result=TRUE)
hc <- diana(d)

(plotProfileDendrogram(s, hc, color='pc.1', debug = TRUE, width=0.25))

# check
plot(rbind(sp,sp.2,sp.10,sp.20), type='n', axes=FALSE, xlab='', ylab='') ; box()
points(rbind(sp,sp.2,sp.10,sp.20), pch=15, col=rep(1:4, each=9), cex=0.75)
text(label.pos, label=rep(1:3, each=9), cex=0.75, pos=1)
arrows(sp[,1], sp[,2], sp.2[,1], sp.2[,2], len=0.08, col='grey')
arrows(sp.2[,1], sp.2[,2], sp.10[,1], sp.10[,2], len=0.08, col='grey')
arrows(sp.10[,1], sp.10[,2], sp.20[,1], sp.20[,2], len=0.08, col='grey')
52 changes: 27 additions & 25 deletions tests/testthat/test-profile_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ site(d) <- s

## tests

test_that("profile_compare basics", {
test_that("profile_compare works as expected", {

# compute betwee-profile dissimilarity, no depth weighting
d.dis <- suppressMessages(profile_compare(d, vars=c('clay', 'ph', 'frags'), k=0,
Expand All @@ -54,30 +54,32 @@ test_that("profile_compare basics", {
})


##
## TODO: this test no longer makes sense after aqpdf merge, standby...
##


# ensure that tapply-based re sort of profile IDs doesn't throw an error
# this will not be a problem after the re-write
# https://github.com/ncss-tech/aqp/issues/7
test_that("profile_compare gracefully handles numeric IDs (#7)", {

# this has been a problem for years, alpha-sorting changes ordering
set.seed(1010101)
s <- union(lapply(1:10, random_profile, SPC=TRUE))

# expected ID orering after union() of random profiles
expect_equal(profile_id(s), c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))

# expected ID ordering after rebuildSPC
rebuilt.ids <- profile_id(rebuildSPC(s))
expect_equal(rebuilt.ids, c("1", "10", "2", "3", "4", "5", "6", "7", "8", "9"))

# this works as of 2020-06-09
# rebuildSPC() called by profile_compare, on input SPC re-orders via alpha-sort
d <- profile_compare(s, vars=c('p1','p2'), max_d=100, k=0)

# new sorting by rebuildSPC / tapply in profile_compare
expect_equal(attr(d, 'Labels'), rebuilt.ids)
})
# # ensure that tapply-based re sort of profile IDs doesn't throw an error
# # this will not be a problem after the re-write
# # https://github.com/ncss-tech/aqp/issues/7
# test_that("profile_compare gracefully handles numeric IDs (#7)", {
#
# # this has been a problem for years, alpha-sorting changes ordering
# set.seed(1010101)
# s <- union(lapply(1:10, random_profile, SPC=TRUE))
#
# # expected ID orering after union() of random profiles
# expect_equal(profile_id(s), c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))
#
# # expected ID ordering after rebuildSPC
# rebuilt.ids <- profile_id(rebuildSPC(s))
# expect_equal(rebuilt.ids, c("1", "10", "2", "3", "4", "5", "6", "7", "8", "9"))
#
# # this works as of 2020-06-09, but will not work with aqpdf branch
# # rebuildSPC() called by profile_compare, on input SPC re-orders via alpha-sort
# d <- profile_compare(s, vars=c('p1','p2'), max_d=100, k=0)
#
# # new sorting by rebuildSPC / tapply in profile_compare
# expect_equal(attr(d, 'Labels'), rebuilt.ids)
# })


0 comments on commit 7c3fe66

Please sign in to comment.