From eca49eeded74f7815a85347467115a5888d468f9 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Fri, 17 May 2024 16:38:43 -0700 Subject: [PATCH 1/4] mutate_profile: add `mutate_profile_raw()` for building sets of dynamic mutate expressions - `mutate_profile()` gains `col_names` argument for dynamic naming of columns, and --- R/mutate_profile.R | 170 ++++++++++++++++++++++++++++-------------- man/mutate_profile.Rd | 43 ++++++++++- 2 files changed, 157 insertions(+), 56 deletions(-) diff --git a/R/mutate_profile.R b/R/mutate_profile.R index 7152fa2be..e9410120e 100644 --- a/R/mutate_profile.R +++ b/R/mutate_profile.R @@ -4,6 +4,7 @@ #' @description \code{mutate_profile()} is a function used for transforming SoilProfileCollections. Each expression is applied to site or horizon level attributes of individual profiles. This distinguishes this function from \code{transform}, which is applied to all values in a collection, regardless of which profile they came from. #' @param object A SoilProfileCollection #' @param ... A set of comma-delimited R expressions that resolve to a transformation to be applied to a single profile e.g \code{mutate_profile(hzdept = max(hzdept) - hzdept)} +#' @param col_names character. Optional column names. Should match the number of expressions in `...`. #' @param horizon_level logical. If `TRUE` results of expressions are added to the SoilProfileCollection's horizon slot, if `FALSE` the results are added to the site slot. If `NULL` (default) the results are stored in the site or horizon slot based on the number of rows in each slot compared to the length of the result calculated from the _first_ and _last_ profile in the collection. #' #' @details If the length an expression's result matches the number of horizons, the result is stored as a horizon-level variable. If the result has length 1, it is stored as a site-level variable. In the ambiguous case where the first and last profile have only _one_ horizon, the results are stored in the horizon slot by default. To force results into site slot use `horizon_level = FALSE`. @@ -12,74 +13,133 @@ #' #' @rdname mutate_profile #' @export mutate_profile +#' @examples +#' +#' data(sp4) +#' depths(sp4) <- id ~ top + bottom +#' +#' mutate_profile(sp4, clay_wtd_average = weighted.mean(clay, bottom - top)) +#' # if (!isGeneric("mutate_profile")) -setGeneric("mutate_profile", function(object, ..., horizon_level = NULL) standardGeneric("mutate_profile")) +setGeneric("mutate_profile", function(object, ..., col_names = NULL, horizon_level = NULL) standardGeneric("mutate_profile")) -setMethod("mutate_profile", signature(object = "SoilProfileCollection"), function(object, ..., horizon_level = NULL) { +setMethod("mutate_profile", signature(object = "SoilProfileCollection"), + function(object, ..., col_names = NULL, horizon_level = NULL) { # capture expression(s) at function .dots <- substitute(list(...)) .dots <- .dots[2:length(.dots)] .names <- names(.dots) - - idn <- idname(object) - hzidn <- hzidname(object) if (is.null(.names)) { - .names <- as.character(.dots) + if (!is.null(col_names) && length(col_names) == length(.dots)) { + .names <- col_names + } else { + .names <- as.character(.dots) + } } - # cleaner to have horizon_level be applied to each expression independently - hzin <- horizon_level - if (is.null(horizon_level) || !is.logical(horizon_level)) { - horizon_level <- rep(FALSE, length(.dots)) - } + mutate_profile_raw(object, .dots, .names, horizon_level = horizon_level) +}) + +#' @param expr A list of expressions in terms of column names in site or horizon table of `object` +#' @rdname mutate_profile +#' @export mutate_profile +#' @examples +#' data(jacobs2000) +#' +#' set.seed(123) +#' +#' ## col_names allows for column names to be calculated +#' x <- mutate_profile(jacobs2000, bottom - top / sum(bottom - top), +#' col_names = paste0("relthk", floor(runif(1, 0, 100)))) +#' x$relthk28 +#' +#' # mutate_profile_raw allows for lists of expressions to be evaluated +#' master_desgn <- c("O", "A", "E", "B", "C", "R", "L", "M") +#' thk_names <- paste0("thk_", master_desgn) +#' +#' # calculate thickness for each horizon +#' x$thk <- x$bottom - x$top +#' +#' ## construct an arbitrary number of expressions using variable inputs +#' ops <- lapply(master_desgn, function(x) { +#' substitute(sum(thk[grepl(PATTERN, name)], na.rm = TRUE), list(PATTERN = x)) +#' }) +#' names(ops) <- thk_names +#' +#' # do mutation +#' y <- mutate_profile_raw(x, ops) +#' +#' site(y)[c(idname(y), thk_names)] +mutate_profile_raw <- function(object, expr, col_names = NULL, horizon_level = NULL) { + + idn <- idname(object) + hzidn <- hzidname(object) + + # preference is use col_names if specified + + # if no col_names, use the names of expr list + if (is.null(col_names)) { + col_names <- names(expr) + } + + # if expr is unnamed, use character conversion of expression + if (is.null(col_names)) { + col_names <- as.character(expr) + } + + # cleaner to have horizon_level be applied to each expression independently + hzin <- horizon_level + if (is.null(horizon_level) || !is.logical(horizon_level)) { + horizon_level <- rep(FALSE, length(expr)) + } + + x <- data.table::data.table(object@site)[object@horizons, on = idn] + o1 <- object[1, ] + o2 <- object[nrow(object), ] + o1c <- compositeSPC(o1) + o2c <- compositeSPC(o2) + + # iterate over expressions left to right + for (i in 1:length(expr)) { - x <- data.table::data.table(object@site)[object@horizons, on = idn] - o1 <- object[1, ] - o2 <- object[nrow(object), ] - o1c <- compositeSPC(o1) - o2c <- compositeSPC(o2) + # default is to create site-level properties unless result matches number of horizons + # decide whether we are adding/modifying a site or horizon level variable so + # that degenerate cases do not create identical columns in site and horizon table or get put in unexpected slot + # 2021-10-29: updated to use first and last profile, and allowing user override via argument + di <- expr[[i]] + res_eval1 <- .data_dots(o1c, eval(di))[[1]] + res_eval2 <- .data_dots(o2c, eval(di))[[1]] - # iterate over expressions left to right - for (i in 1:length(.dots)) { - - # default is to create site-level properties unless result matches number of horizons - # decide whether we are adding/modifying a site or horizon level variable so - # that degenerate cases do not create identical columns in site and horizon table or get put in unexpected slot - # 2021-10-29: updated to use first and last profile, and allowing user override via argument - di <- .dots[[i]] - res_eval1 <- .data_dots(o1c, eval(di))[[1]] - res_eval2 <- .data_dots(o2c, eval(di))[[1]] - - # allow user to override the determination - # check length of first/last profile result against number of horizons - if (length(res_eval1) == nrow(o1) && length(res_eval2) == nrow(o2)) { - horizon_level[i] <- TRUE - } - .SD <- NULL - - # remove any existing columnnames before joining in result - if (any(.names[i] %in% names(object))) { - for (n in .names[i]) { - object[[n]] <- NULL - } + # allow user to override the determination + # check length of first/last profile result against number of horizons + if (length(res_eval1) == nrow(o1) && length(res_eval2) == nrow(o2)) { + horizon_level[i] <- TRUE + } + .SD <- NULL + + # remove any existing columnnames before joining in result + if (any(col_names[i] %in% names(object))) { + for (n in col_names[i]) { + object[[n]] <- NULL } - - if (isFALSE(hzin) || !horizon_level[i]) { - res <- x[, list(eval(.dots[[i]], envir = .SD)), by = c(idn)] - if (length(res[[2]]) > length(object)) { - stop("mutate_profile: some profiles returned more than one result and `horizon_level=FALSE`", call. = FALSE) - } - colnames(res) <- c(idn, .names[i]) - site(object) <- res - } else { - res <- x[, list(.hzidname = .SD[[hzidn]], eval(.dots[[i]], envir = .SD)), by = c(idn)] - colnames(res) <- c(idn, hzidn, .names[i]) - horizons(object) <- res + } + + if (isFALSE(hzin) || !horizon_level[i]) { + res <- x[, list(eval(expr[[i]], envir = .SD)), by = c(idn)] + if (length(res[[2]]) > length(object)) { + stop("mutate_profile: some profiles returned more than one result and `horizon_level=FALSE`", call. = FALSE) } - + colnames(res) <- c(idn, col_names[i]) + site(object) <- res + } else { + res <- x[, list(.hzidname = .SD[[hzidn]], eval(expr[[i]], envir = .SD)), by = c(idn)] + colnames(res) <- c(idn, hzidn, col_names[i]) + horizons(object) <- res } - - return(object) -}) + + } + + return(object) +} diff --git a/man/mutate_profile.Rd b/man/mutate_profile.Rd index 42a46fd67..047deb373 100644 --- a/man/mutate_profile.Rd +++ b/man/mutate_profile.Rd @@ -3,16 +3,23 @@ \name{mutate_profile} \alias{mutate_profile} \alias{mutate_profile,SoilProfileCollection-method} +\alias{mutate_profile_raw} \title{Transform a SPC (by profile) with a set of expressions} \usage{ -mutate_profile(object, ..., horizon_level = NULL) +mutate_profile(object, ..., col_names = NULL, horizon_level = NULL) + +mutate_profile_raw(object, expr, col_names = NULL, horizon_level = NULL) } \arguments{ \item{object}{A SoilProfileCollection} \item{...}{A set of comma-delimited R expressions that resolve to a transformation to be applied to a single profile e.g \code{mutate_profile(hzdept = max(hzdept) - hzdept)}} +\item{col_names}{character. Optional column names. Should match the number of expressions in \code{...}.} + \item{horizon_level}{logical. If \code{TRUE} results of expressions are added to the SoilProfileCollection's horizon slot, if \code{FALSE} the results are added to the site slot. If \code{NULL} (default) the results are stored in the site or horizon slot based on the number of rows in each slot compared to the length of the result calculated from the \emph{first} and \emph{last} profile in the collection.} + +\item{expr}{A list of expressions in terms of column names in site or horizon table of \code{object}} } \value{ A SoilProfileCollection. @@ -23,6 +30,40 @@ A SoilProfileCollection. \details{ If the length an expression's result matches the number of horizons, the result is stored as a horizon-level variable. If the result has length 1, it is stored as a site-level variable. In the ambiguous case where the first and last profile have only \emph{one} horizon, the results are stored in the horizon slot by default. To force results into site slot use \code{horizon_level = FALSE}. } +\examples{ + +data(sp4) +depths(sp4) <- id ~ top + bottom + +mutate_profile(sp4, clay_wtd_average = weighted.mean(clay, bottom - top)) + +data(jacobs2000) + +set.seed(123) + +## col_names allows for column names to be calculated +x <- mutate_profile(jacobs2000, bottom - top / sum(bottom - top), + col_names = paste0("relthk", floor(runif(1, 0, 100)))) +x$relthk28 + +# mutate_profile_raw allows for lists of expressions to be evaluated +master_desgn <- c("O", "A", "E", "B", "C", "R", "L", "M") +thk_names <- paste0("thk_", master_desgn) + +# calculate thickness for each horizon +x$thk <- x$bottom - x$top + +## construct an arbitrary number of expressions using variable inputs +ops <- lapply(master_desgn, function(x) { + substitute(sum(thk[grepl(PATTERN, name)], na.rm = TRUE), list(PATTERN = x)) +}) +names(ops) <- thk_names + +# do mutation +y <- mutate_profile_raw(x, ops) + +site(y)[c(idname(y), thk_names)] +} \author{ Andrew G. Brown. } From dfc34f4d782b55974f3112d7306bcfbb28825d29 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Fri, 17 May 2024 16:57:40 -0700 Subject: [PATCH 2/4] NAMESPACE --- NAMESPACE | 1 + R/mutate_profile.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index ea7f94412..fb440674e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -105,6 +105,7 @@ export(mollic.thickness.requirement) export(mostLikelyHzSequence) export(munsell2rgb) export(mutate_profile) +export(mutate_profile_raw) export(overlapMetrics) export(panel.depth_function) export(parseMunsell) diff --git a/R/mutate_profile.R b/R/mutate_profile.R index e9410120e..1b9f86830 100644 --- a/R/mutate_profile.R +++ b/R/mutate_profile.R @@ -44,7 +44,7 @@ setMethod("mutate_profile", signature(object = "SoilProfileCollection"), #' @param expr A list of expressions in terms of column names in site or horizon table of `object` #' @rdname mutate_profile -#' @export mutate_profile +#' @export #' @examples #' data(jacobs2000) #' From ad2b142664c3b0fe1ddf820d69dc41926a80c8cb Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Tue, 15 Oct 2024 09:04:34 -0700 Subject: [PATCH 3/4] Update NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index e1f4c4f93..905d0139d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * Any SoilProfileCollection objects previously written to file (.rda, .rds) with aqp <2.1.x will need to be rebuilt using `rebuildSPC()` due to changes to S4 object structure * `estimatePSCS()` gains argument `"lieutex"` for in lieu textures which are used in the new routine for identification of the particle size control section of organic soils * new function `collapseHz()` combines and aggregates data for adjacent horizons matching a pattern or sharing a common ID + * new function `mutate_profile_raw()` for building sets of dynamic mutate expressions. Also `mutate_profile()` gains col_names argument for dynamic naming of columns. # aqp 2.0.4 (2024-07-30) * CRAN release From 5d38871c01c8eef06d7b4740d468c5d9b0e0de9f Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Tue, 15 Oct 2024 09:17:14 -0700 Subject: [PATCH 4/4] Add tests --- tests/testthat/test-mutate_profile.R | 29 +++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-mutate_profile.R b/tests/testthat/test-mutate_profile.R index 7c46d9fee..66c0741f6 100644 --- a/tests/testthat/test-mutate_profile.R +++ b/tests/testthat/test-mutate_profile.R @@ -35,7 +35,30 @@ test_that("transform & mutate_profile", { # forcing horizon level result into site produces an error expect_error({res3 <- mutate_profile(trunc(res, 0, 5), rt2 = (bottom - top) / sum(thickness), horizon_level = FALSE)}) - # however forcing a site-level result into horizon works - res4 <- mutate_profile(trunc(res, 0, 5), rt3 = sum((bottom - top) / sum(thickness)), horizon_level = TRUE) - expect_equal(length(res4$rt3), nrow(res4)) + # however forcing a site-level result into horizon works (using custom column name) + res4 <- mutate_profile(trunc(res, 0, 5), sum((bottom - top) / sum(thickness)), col_names = "foo", horizon_level = TRUE) + expect_equal(length(res4$foo), nrow(res4)) +}) + +test_that("mutate_profile_raw", { + data(jacobs2000) + set.seed(123) + x <- mutate_profile(jacobs2000, bottom - top, + col_names = paste0("thk", floor(runif(1, 0, 100)))) + expect_false(is.null(x$thk28)) + + # example with dynamic number of columns and names + master_desgn <- c("O", "A", "E", "B", "C", "R", "L", "M") + thk_names <- paste0("thk_", master_desgn) + + x$thk <- x$bottom - x$top + + ## construct an arbitrary number of expressions using variable inputs + ops <- lapply(master_desgn, function(x) substitute(sum(thk[grepl(VAR, name)], na.rm = TRUE), list(VAR = x))) + names(ops) <- thk_names + + # do mutation + y <- mutate_profile_raw(x, ops) + + expect_true(all(c(idname(y), thk_names) %in% siteNames(y))) })