Merge branch 'PecanProject:develop' into develop

PecanProject · Aug 28, 2023 · 289f59a · 289f59a
2 parents 957d72c + ce327b9
commit 289f59a
Show file tree

Hide file tree

Showing 34 changed files with 838 additions and 26 deletions.
diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION
@@ -66,21 +66,24 @@ Suggests:
     data.table,
     here,
     knitr,
+    mockery,
     RPostgreSQL,
     RPostgres,
     RSQLite,
     rcrossref,
     rmarkdown (>= 2.19),
     testthat (>= 2.0.0),
-    tidyverse
+    tidyverse,
+    withr
 X-Comment-Remotes:
     Installing markdown from GitHub because as of 2023-02-05, this is the
     easiest way to get version >= 2.19 onto Docker images that use older
     Rstudio Package Manager snapshots.
     When building on a system that finds a new enough version on CRAN,
     OK to remove the Remotes line and this comment.
 Remotes:
-    github::rstudio/[email protected]
+    github::rstudio/[email protected],
+    github::r-lib/[email protected]
 License: BSD_3_clause + file LICENSE
 VignetteBuilder: knitr
 Copyright: Authors

diff --git a/base/db/R/assign.treatments.R b/base/db/R/assign.treatments.R
@@ -13,7 +13,7 @@
 ##' Assigns all control treatments the same value, then assigns unique treatments
 ##' within each site. Each site is required to have a control treatment.
 ##' The algorithm (incorrectly) assumes that each site has a unique set of experimental
-##' treatments. This assumption is required by the data in BETTdb that does not always consistently name treatments or quantity them in the managements table. Also it avoids having the need to estimate treatment by site interactions in the meta analysis model. This model uses data in the control treatment to estimate model parameters so the impact of the assumption is minimal.
+##' treatments. This assumption is required by the data in BETYdb that does not always consistently name treatments or quantity them in the managements table. Also it avoids having the need to estimate treatment by site interactions in the meta analysis model. This model uses data in the control treatment to estimate model parameters so the impact of the assumption is minimal.
 ##' @name assign.treatments
 ##' @title assign.treatments
 ##' @param data input data

diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R
@@ -71,7 +71,7 @@ dbfile.input.insert <- function(in.path, in.prefix, siteid, startdate, enddate,
       "SELECT * FROM inputs WHERE site_id=", siteid,
       " AND name= '", name,
       "' AND format_id=", formatid,
-      parent
+      parent, ";"
     ),
     con = con
   )
@@ -120,26 +120,26 @@ dbfile.input.insert <- function(in.path, in.prefix, siteid, startdate, enddate,
         "INSERT INTO inputs ",
         "(site_id, format_id, name) VALUES (",
         siteid, ", ", formatid, ", '", name,
-        "'", ") RETURNING id"
+        "'", ") RETURNING id;"
       )
     } else if (parent == "" && !is.null(startdate)) {
       cmd <- paste0(
         "INSERT INTO inputs ",
         "(site_id, format_id, start_date, end_date, name) VALUES (",
         siteid, ", ", formatid, ", '", startdate, "', '", enddate, "','", name,
-        "') RETURNING id"
+        "') RETURNING id;"
       )
     } else if (is.null(startdate)) {
       cmd <- paste0(
         "INSERT INTO inputs ",
         "(site_id, format_id, name, parent_id) VALUES (",
-        siteid, ", ", formatid, ", '", name, "',", parentid, ") RETURNING id"
+        siteid, ", ", formatid, ", '", name, "',", parentid, ") RETURNING id;"
       )
     } else {
       cmd <- paste0(
         "INSERT INTO inputs ",
         "(site_id, format_id, start_date, end_date, name, parent_id) VALUES (",
-        siteid, ", ", formatid, ", '", startdate, "', '", enddate, "','", name, "',", parentid, ") RETURNING id"
+        siteid, ", ", formatid, ", '", startdate, "', '", enddate, "','", name, "',", parentid, ") RETURNING id;"
       )
     }
     # This is the id that we just registered
@@ -150,7 +150,7 @@ dbfile.input.insert <- function(in.path, in.prefix, siteid, startdate, enddate,
       inputid <- db.query(
         query = paste0(
           "SELECT id FROM inputs WHERE site_id=", siteid,
-          " AND format_id=", formatid
+          " AND format_id=", formatid, ";"
         ),
         con = con
       )$id

diff --git a/base/db/R/query.dplyr.R b/base/db/R/query.dplyr.R
@@ -61,7 +61,7 @@ dplyr.count <- function(df) {
 #' @param unit string containing CF-style time unit including origin (e.g. "days since 2010-01-01")
 #' @export
 ncdays2date <- function(time, unit) {
-  date    <- lubridate::parse_date_time(unit, c("ymd_hms", "ymd_h", "ymd"))
+  date    <- lubridate::parse_date_time(unit, c("ymd_HMS", "ymd_H", "ymd"))
   days    <- PEcAn.utils::ud_convert(time, unit, paste("days since ", date))
   seconds <- PEcAn.utils::ud_convert(days, "days", "seconds")
   return(as.POSIXct.numeric(seconds, origin = date, tz = "UTC"))
@@ -124,7 +124,7 @@ workflows <- function(bety, ensemble = FALSE) {
 #' @export
 workflow <- function(bety, workflow_id) {
   workflows(bety) %>%
-    dplyr::filter(.data$workflow_id == !!.data$workflow_id)
+    dplyr::filter(.data$workflow_id == !!workflow_id)
 }  # workflow
 
 

diff --git a/base/db/R/query.file.path.R b/base/db/R/query.file.path.R
@@ -8,11 +8,11 @@
 ##' @author Betsy Cowdery
 query.file.path <- function(input.id, host_name, con){
   machine.host <- PEcAn.DB::default_hostname(host_name)
-  machine <- db.query(query = paste0("SELECT * from machines where hostname = '",machine.host,"'"), con = con)
+  machine <- db.query(query = paste0("SELECT * from machines where hostname = '",machine.host,"';"), con = con)
   dbfile <- db.query(
     query = paste(
       "SELECT file_name,file_path from dbfiles where container_id =", input.id,
-      " and container_type = 'Input' and machine_id =", machine$id
+      " and container_type = 'Input' and machine_id =", machine$id, ";"
     ),
     con = con
   )

diff --git a/base/db/R/query.yields.R b/base/db/R/query.yields.R
@@ -24,21 +24,28 @@ query.yields <- function(trait = 'yield', spstr, extra.columns = '', con = NULL,
                          ids_are_cultivars = FALSE, ...){
 
   member_column <- if (ids_are_cultivars) {"cultivar_id"} else {"specie_id"}
+
+  if(!is.null(extra.columns)) {
+    if(!is.character(extra.columns) || length(extra.columns) != 1) {
+      PEcAn.logger::logger.severe("`extra.columns` must be a string")
+    }
+  }
+
   query <- paste("select
             yields.id, yields.citation_id, yields.site_id, treatments.name,
             yields.date, yields.time, yields.cultivar_id, yields.specie_id,
             yields.mean, yields.statname, yields.stat, yields.n,
             variables.name as vname,
             month(yields.date) as month,",
-                 extra.columns,
+                if(extra.columns != '') { paste(extra.columns, ",", sep = "") } else {""},
                  "treatments.control, sites.greenhouse
           from yields
             left join treatments on  (yields.treatment_id = treatments.id)
             left join sites on (yields.site_id = sites.id)
             left join variables on (yields.variable_id = variables.id)
           where ", member_column, " in (", spstr,");", sep = "")
   if(!trait == 'yield'){
-    query <- gsub(");", paste(" and variables.name in ('", trait,"');", sep = ""), query)
+    query <- gsub(";", paste(" and variables.name in ('", trait,"');", sep = ""), query)
   }
 
   return(fetch.stats2se(connection = con, query = query))

diff --git a/base/db/man/assign.treatments.Rd b/base/db/man/assign.treatments.Rd
diff --git a/base/db/tests/testthat/test.assign.treatments.R b/base/db/tests/testthat/test.assign.treatments.R
@@ -0,0 +1,33 @@
+test_that("`assign.treatments` correctly assigns control treatment", {
+  data <- data.frame(
+    site_id = c(1, 1, 2, 2, 3, 3),
+    citation_id = c(101, 101, 201, 201, 301, 301),
+    control = c(1, 0, 0, 1, 0, 0),
+    trt_id = NA
+  )
+
+  updated_data <- assign.treatments(data)
+  expect_equal(updated_data$trt_id, c("control", NA, NA, "control", "control", "control"))
+})
+
+test_that("`assign.treatments` gives an error if no control treatment is set for a site", {
+  data <- data.frame(
+    site_id = c(1, 1, 2, 2, 3, 3),
+    citation_id = c(101, 101, 201, 201, 301, 301),
+    control = c(0, 0, 0, 1, 0, 0),
+    trt_id = c(NA, NA, NA, NA, "not_control", NA)
+  )
+
+  expect_error(assign.treatments(data), "No control treatment set")
+})
+
+test_that("`drop.columns` able to drop specified columns from data", {
+  data <- data.frame(
+    id = c(1, 2, 3),
+    name = c("a", "b", "c"),
+    value = c(1.2, 4.5, 6.7)
+  )
+
+  updated_data <- drop.columns(data, c("name", "not_a_column"))
+  expect_equal(colnames(updated_data), c("id", "value"))
+})
diff --git a/base/db/tests/testthat/test.check.lists.R b/base/db/tests/testthat/test.check.lists.R
@@ -0,0 +1,23 @@
+test_that("`check.lists` returns false for appropriate cases", {
+  x <- data.frame(id = c(1, 2, 3))
+  y <- data.frame(id = c(1, 2, 3, 4))
+
+  # for unequal number of rows
+  expect_false(check.lists(x, y))
+
+  # for wrong filename passed 
+  expect_false(check.lists(x, y, filename = "wrong.csv"))
+
+  # if x and y are actually unequal
+  y <- data.frame(id = c(1, 2, 4))
+  expect_false(check.lists(x, y, filename = "species.csv"))
+})
+
+test_that("`check.lists` able to correctly work for matching data frames to lists read from csv files", {
+  withr::with_tempfile("tf", fileext = ".csv",{
+    x <- data.frame(id = c(1, 2, 3))
+    y <- data.frame(id = c(1, 2, 3)) 
+    write.csv(y, file = tf)
+    expect_true(check.lists(x, read.csv(tf), filename = "species.csv"))
+  })
+})
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
@@ -0,0 +1,39 @@
+test_that("`convert_input()` able to call the respective download function for a data item with the correct arguments", {
+  mocked_res <- mockery::mock(list(c("A", "B")))
+
+  mockery::stub(convert_input, 'dbfile.input.check', data.frame())
+  mockery::stub(convert_input, 'db.query', data.frame(id = 1))
+  mockery::stub(convert_input, 'PEcAn.remote::remote.execute.R', mocked_res)
+  mockery::stub(convert_input, 'purrr::map_dfr', data.frame(missing = c(FALSE), empty = c(FALSE)))
+
+  convert_input(
+    input.id = NA,
+    outfolder = "test",
+    formatname = NULL,
+    mimetype = NULL,
+    site.id = 1,
+    start_date = "2011-01-01",
+    end_date = "2011-12-31",
+    pkg = 'PEcAn.data.atmosphere',
+    fcn = 'download.AmerifluxLBL',
+    con = NULL,
+    host = data.frame(name = "localhost"),
+    browndog = NULL,
+    write = FALSE,
+    lat.in = 40,
+    lon.in = -88
+  )
+
+  args <- mockery::mock_args(mocked_res)
+  expect_equal(
+    args[[1]]$script, 
+    "PEcAn.data.atmosphere::download.AmerifluxLBL(lat.in=40, lon.in=-88, overwrite=FALSE, outfolder='test/', start_date='2011-01-01', end_date='2011-12-31')"
+  )
+})
+
+test_that("`.get.file.deletion.commands()` able to return correct file deletion commands", {
+  res <- .get.file.deletion.commands(c("test"))
+  expect_equal(res$move.to.tmp, "dir.create(c('./tmp'), recursive=TRUE, showWarnings=FALSE); file.rename(from=c('test'), to=c('./tmp/test'))")
+  expect_equal(res$delete.tmp, "unlink(c('./tmp'), recursive=TRUE)")
+  expect_equal(res$replace.from.tmp, "file.rename(from=c('./tmp/test'), to=c('test'));unlink(c('./tmp'), recursive=TRUE)")
+})
diff --git a/base/db/tests/testthat/test.covariate.functions.R b/base/db/tests/testthat/test.covariate.functions.R
@@ -0,0 +1,32 @@
+test_that("`append.covariate` able to append new column for covariates in given data based on id", {
+  data <- data.frame(
+    id = c(1, 2, 3, 4),
+    name = c("a", "b", "c", "d")
+  )
+  covariates.data <- data.frame(
+    trait_id = c( 1, 2, 3, 4, 4),
+    level = c("A", "B", "C", "D", "E"),
+    name = c("a", "b", "c", "d", "e")
+  )
+  updated_data <- append.covariate(data, "new_covariates_col", covariates.data)
+  expect_equal(updated_data$new_covariates_col, c("A", "B", "C", "D"))
+  expect_equal(colnames(updated_data), c("id", "new_covariates_col", "name"))
+})
+
+test_that("`filter_sunleaf_traits`able to filter out upper canopy leaves", {
+  data <- data.frame(
+    id = c(1, 2, 3, 4),
+    name = c("a", "b", "c", "d")
+  )
+  covariates <- data.frame(
+    trait_id = c(1, 2, 3, 4),
+    name = c("leaf", "canopy_layer", "canopy_layer", "sunlight"),
+    level = c(1.2, 0.5, 0.7, 0.67)
+  )
+
+  updated_data <- filter_sunleaf_traits(data, covariates)
+  expect_equal(updated_data$name, c("a", "c", "d"))
+
+  # temporary column gets removed
+  expect_equal(colnames(updated_data), c("id", "name"))
+})