From f9a0cff6a1b9d8f3bba85d7bd3a2d853c57b8342 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 27 Jun 2024 05:07:49 +0530 Subject: [PATCH 01/12] Refactor met.process.R to correctly handle standerdized_result Signed-off-by: Abhinav Pandey --- modules/data.atmosphere/R/met.process.R | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index 8e1408b3887..dc15d0395fa 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -282,7 +282,9 @@ met.process <- function(site, input_met, start_date, end_date, model, # Change to Site Level - Standardized Met (i.e. ready for conversion to model specific format) if (stage$standardize) { standardize_result <- list() - + is_standardized <- FALSE + ready.id <- list(input.id = NULL, dbfile.id = NULL) + for (i in seq_along(cf.id[[1]])) { if (register$scale == "regional") { @@ -301,6 +303,7 @@ met.process <- function(site, input_met, start_date, end_date, model, host = host, overwrite = overwrite$standardize) # Expand to support ensemble names in the future + is_standardized <- TRUE } else if (register$scale == "site") { ##### Site Level Processing standardize_result[[i]] <- .metgapfill.module(cf.id = list(input.id = cf.id$input.id[i], dbfile.id = cf.id$dbfile.id[i]), @@ -314,15 +317,23 @@ met.process <- function(site, input_met, start_date, end_date, model, host = host, overwrite = overwrite$standardize, ensemble_name = i) + is_standardized <- TRUE + } else { + is_standardized <- FALSE + } + + if(is_standardized) { + ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id) + ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id) } } # End for loop - ready.id <- list(input.id = NULL, dbfile.id = NULL) + # ready.id <- list(input.id = NULL, dbfile.id = NULL) - for (i in seq_along(standardize_result)) { - ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id) - ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id) - } + # for (i in seq_along(standardize_result)) { + # ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id) + # ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id) + # } } else { ready.id <- input_met$id From dd53e34c0c226569bfa5b56728b160b2488d70af Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 28 Jun 2024 00:08:12 +0530 Subject: [PATCH 02/12] Remove ucommented sections and replace NULL declarations with c() Signed-off-by: Abhinav Pandey --- modules/data.atmosphere/R/met.process.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index dc15d0395fa..cef7eca1619 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -283,7 +283,7 @@ met.process <- function(site, input_met, start_date, end_date, model, if (stage$standardize) { standardize_result <- list() is_standardized <- FALSE - ready.id <- list(input.id = NULL, dbfile.id = NULL) + ready.id <- list(input.id = c(), dbfile.id = c()) for (i in seq_along(cf.id[[1]])) { @@ -328,12 +328,6 @@ met.process <- function(site, input_met, start_date, end_date, model, } } # End for loop - # ready.id <- list(input.id = NULL, dbfile.id = NULL) - - # for (i in seq_along(standardize_result)) { - # ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id) - # ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id) - # } } else { ready.id <- input_met$id From 2a9e286a08e9466db359c3181e5313ab71a9684d Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 28 Jun 2024 16:22:54 +0530 Subject: [PATCH 03/12] Revert to default NULL instead of c() Signed-off-by: Abhinav Pandey --- modules/data.atmosphere/R/met.process.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index cef7eca1619..6d14a51d3ed 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -283,7 +283,7 @@ met.process <- function(site, input_met, start_date, end_date, model, if (stage$standardize) { standardize_result <- list() is_standardized <- FALSE - ready.id <- list(input.id = c(), dbfile.id = c()) + ready.id <- list(input.id = NULL, dbfile.id = NULL) for (i in seq_along(cf.id[[1]])) { From 72144036a676895b79cd93ccb22d0dc84c9ab8f4 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 28 Jun 2024 19:44:33 +0530 Subject: [PATCH 04/12] Add small modification to dbfiles.R Signed-off-by: Abhinav Pandey --- base/db/R/dbfiles.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index e0f9556779b..55cc17c5059 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -255,9 +255,9 @@ dbfile.input.check <- function(siteid, startdate = NULL, enddate = NULL, mimetyp } # setup parent part of query if specified - if (is.na(parentid)) { - parent <- "" - } else { + parent <- "" + + if (!is.na(parentid)) { parent <- paste0(" AND parent_id=", parentid) } From a61d38e5c995aa798f1b9e9dc74be8e263bfe03b Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 28 Jun 2024 20:47:25 +0530 Subject: [PATCH 05/12] Fix 'return' mistake in dbfiles.R Signed-off-by: Abhinav Pandey --- base/db/R/dbfiles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index 55cc17c5059..95cd71873c7 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -251,7 +251,7 @@ dbfile.input.check <- function(siteid, startdate = NULL, enddate = NULL, mimetyp formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con) if (is.null(formatid)) { - invisible(data.frame()) + return(invisible(data.frame())) } # setup parent part of query if specified From da6955a7edd8148fb38e52157841498c87d06b90 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 10 Jul 2024 21:51:25 +0530 Subject: [PATCH 06/12] Omit return Type Signed-off-by: Abhinav Pandey --- base/db/R/dbfiles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index 95cd71873c7..4a7f82ff90f 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -251,7 +251,7 @@ dbfile.input.check <- function(siteid, startdate = NULL, enddate = NULL, mimetyp formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con) if (is.null(formatid)) { - return(invisible(data.frame())) + (invisible(data.frame())) } # setup parent part of query if specified From 3eddf585ebf4b1052820369529daa380b64f17ab Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 11 Jul 2024 23:56:02 +0530 Subject: [PATCH 07/12] Update return statement in dbfiles.R Signed-off-by: Abhinav Pandey --- base/db/R/dbfiles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index 4a7f82ff90f..784905feca4 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -251,7 +251,7 @@ dbfile.input.check <- function(siteid, startdate = NULL, enddate = NULL, mimetyp formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con) if (is.null(formatid)) { - (invisible(data.frame())) + return (invisible(data.frame())) } # setup parent part of query if specified From afaf73f50cc69bcff093c274f0f89b817a32b8b3 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Sat, 13 Jul 2024 23:49:37 +0530 Subject: [PATCH 08/12] Update invisible statements to `return` after execution Signed-off-by: Abhinav Pandey --- base/db/R/dbfiles.R | 26 ++++++++++++------------- modules/data.atmosphere/R/met.process.R | 3 +++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index 784905feca4..ce337e6d243 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -59,9 +59,8 @@ dbfile.input.insert <- function(in.path, in.prefix, siteid, startdate, enddate, # setup parent part of query if specified - if (is.na(parentid)) { - parent <- "" - } else { + parent <- "" + if (!is.na(parentid)) { parent <- paste0(" AND parent_id=", parentid) } @@ -459,7 +458,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE # find appropriate pft pftid <- get.id(table = "pfts", values = "name", colnames = pft, con = con) if (is.null(pftid)) { - invisible(data.frame()) + return invisible(data.frame()) } # find appropriate format @@ -470,7 +469,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con) if (is.null(formatid)) { - invisible(data.frame()) + return invisible(data.frame()) } # find appropriate posterior @@ -482,7 +481,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE con = con )[["id"]] if (is.null(posteriorid)) { - invisible(data.frame()) + return invisible(data.frame()) } invisible(dbfile.check(type = "Posterior", container.id = posteriorid, con = con, hostname = hostname)) @@ -648,12 +647,12 @@ dbfile.file <- function(type, id, con, hostname = PEcAn.remote::fqdn()) { if (nrow(files) > 1) { PEcAn.logger::logger.warn("multiple files found for", id, "returned; using the first one found") - invisible(file.path(files[1, "file_path"], files[1, "file_name"])) + return invisible(file.path(files[1, "file_path"], files[1, "file_name"])) } else if (nrow(files) == 1) { - invisible(file.path(files[1, "file_path"], files[1, "file_name"])) + return invisible(file.path(files[1, "file_path"], files[1, "file_name"])) } else { PEcAn.logger::logger.warn("no files found for ", id, "in database") - invisible(NA) + return invisible(NA) } } @@ -671,7 +670,8 @@ dbfile.id <- function(type, file, con, hostname = PEcAn.remote::fqdn()) { # find appropriate host hostid <- db.query(query = paste0("SELECT id FROM machines WHERE hostname='", hostname, "'"), con = con)[["id"]] if (is.null(hostid)) { - invisible(NA) + PEcAn.logger::logger.warn("hostid not found in database") + return invisible(NA) } # find file @@ -689,12 +689,12 @@ dbfile.id <- function(type, file, con, hostname = PEcAn.remote::fqdn()) { if (nrow(ids) > 1) { PEcAn.logger::logger.warn("multiple ids found for", file, "returned; using the first one found") - invisible(ids[1, "container_id"]) + return invisible(ids[1, "container_id"]) } else if (nrow(ids) == 1) { - invisible(ids[1, "container_id"]) + return invisible(ids[1, "container_id"]) } else { PEcAn.logger::logger.warn("no id found for", file, "in database") - invisible(NA) + return invisible(NA) } } diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index 6d14a51d3ed..753c5a2dee3 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -325,6 +325,9 @@ met.process <- function(site, input_met, start_date, end_date, model, if(is_standardized) { ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id) ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id) + } else { + ready.id$input.id <- c(ready.id$input.id, NA) + ready.id$dbfile.id <- c(ready.id$dbfile.id, NA) } } # End for loop From 0b3d0c4894d738dca09f8f63dc82e1a6f1e50066 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Sat, 13 Jul 2024 23:57:04 +0530 Subject: [PATCH 09/12] refactor return statements Signed-off-by: Abhinav Pandey --- base/db/R/dbfiles.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index ce337e6d243..498c1ed5702 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -458,7 +458,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE # find appropriate pft pftid <- get.id(table = "pfts", values = "name", colnames = pft, con = con) if (is.null(pftid)) { - return invisible(data.frame()) + return (invisible(data.frame())) } # find appropriate format @@ -469,7 +469,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con) if (is.null(formatid)) { - return invisible(data.frame()) + return (invisible(data.frame())) } # find appropriate posterior @@ -481,7 +481,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE con = con )[["id"]] if (is.null(posteriorid)) { - return invisible(data.frame()) + return (invisible(data.frame())) } invisible(dbfile.check(type = "Posterior", container.id = posteriorid, con = con, hostname = hostname)) @@ -647,12 +647,12 @@ dbfile.file <- function(type, id, con, hostname = PEcAn.remote::fqdn()) { if (nrow(files) > 1) { PEcAn.logger::logger.warn("multiple files found for", id, "returned; using the first one found") - return invisible(file.path(files[1, "file_path"], files[1, "file_name"])) + (invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) } else if (nrow(files) == 1) { - return invisible(file.path(files[1, "file_path"], files[1, "file_name"])) + (invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) } else { PEcAn.logger::logger.warn("no files found for ", id, "in database") - return invisible(NA) + invisible(NA) } } @@ -671,7 +671,7 @@ dbfile.id <- function(type, file, con, hostname = PEcAn.remote::fqdn()) { hostid <- db.query(query = paste0("SELECT id FROM machines WHERE hostname='", hostname, "'"), con = con)[["id"]] if (is.null(hostid)) { PEcAn.logger::logger.warn("hostid not found in database") - return invisible(NA) + return (invisible(NA)) } # find file @@ -689,12 +689,12 @@ dbfile.id <- function(type, file, con, hostname = PEcAn.remote::fqdn()) { if (nrow(ids) > 1) { PEcAn.logger::logger.warn("multiple ids found for", file, "returned; using the first one found") - return invisible(ids[1, "container_id"]) + invisible(ids[1, "container_id"]) } else if (nrow(ids) == 1) { - return invisible(ids[1, "container_id"]) + invisible(ids[1, "container_id"]) } else { PEcAn.logger::logger.warn("no id found for", file, "in database") - return invisible(NA) + invisible(NA) } } From 4f9320ecbf459abdcbf42af57dda0acfae4d427f Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 30 Aug 2024 06:28:11 +0530 Subject: [PATCH 10/12] Update base/db/R/dbfiles.R Co-authored-by: Chris Black --- base/db/R/dbfiles.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index 498c1ed5702..ed3398d2e3a 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -647,7 +647,7 @@ dbfile.file <- function(type, id, con, hostname = PEcAn.remote::fqdn()) { if (nrow(files) > 1) { PEcAn.logger::logger.warn("multiple files found for", id, "returned; using the first one found") - (invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) + return(invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) } else if (nrow(files) == 1) { (invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) } else { From e9367b50d542eae88b46d5442e83ca1bcb63dbb4 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 30 Aug 2024 06:29:15 +0530 Subject: [PATCH 11/12] Apply suggestions from code review Co-authored-by: Chris Black --- base/db/R/dbfiles.R | 4 ++-- modules/data.atmosphere/R/met.process.R | 17 +++++------------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R index ed3398d2e3a..90ffedf0115 100644 --- a/base/db/R/dbfiles.R +++ b/base/db/R/dbfiles.R @@ -649,10 +649,10 @@ dbfile.file <- function(type, id, con, hostname = PEcAn.remote::fqdn()) { PEcAn.logger::logger.warn("multiple files found for", id, "returned; using the first one found") return(invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) } else if (nrow(files) == 1) { - (invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) + return(invisible(file.path(files[1, "file_path"], files[1, "file_name"]))) } else { PEcAn.logger::logger.warn("no files found for ", id, "in database") - invisible(NA) + return(invisible(NA)) } } diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index 4cf37348e55..b0b5d781ec6 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -284,7 +284,6 @@ met.process <- function(site, input_met, start_date, end_date, model, # Change to Site Level - Standardized Met (i.e. ready for conversion to model specific format) if (stage$standardize) { standardize_result <- list() - is_standardized <- FALSE ready.id <- list(input.id = NULL, dbfile.id = NULL) for (i in seq_along(cf.id[[1]])) { @@ -305,10 +304,9 @@ met.process <- function(site, input_met, start_date, end_date, model, host = host, overwrite = overwrite$standardize) # Expand to support ensemble names in the future - is_standardized <- TRUE } else if (register$scale == "site") { ##### Site Level Processing - standardize_result[[i]] <- .metgapfill.module(cf.id = list(input.id = cf.id$input.id[i], dbfile.id = cf.id$dbfile.id[i]), + id_stdized <- .metgapfill.module(cf.id = list(input.id = cf.id$input.id[i], dbfile.id = cf.id$dbfile.id[i]), register = register, dir = dir, met = met, @@ -319,18 +317,13 @@ met.process <- function(site, input_met, start_date, end_date, model, host = host, overwrite = overwrite$standardize, ensemble_name = i) - is_standardized <- TRUE } else { - is_standardized <- FALSE + # No action taken. These ids will be dropped from ready.id + id_stdized <- NULL } - if(is_standardized) { - ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id) - ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id) - } else { - ready.id$input.id <- c(ready.id$input.id, NA) - ready.id$dbfile.id <- c(ready.id$dbfile.id, NA) - } + ready.id$input.id <- c(ready.id$input.id, id_stdized$input.id) + ready.id$dbfile.id <- c(ready.id$dbfile.id, id_stdized$dbfile.id) } # End for loop From 3ff0eb3a1b5462443a500b9ab2a78d84268ba38b Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Fri, 30 Aug 2024 08:14:36 +0530 Subject: [PATCH 12/12] Apply standardization changes Signed-off-by: Abhinav Pandey --- modules/data.atmosphere/R/met.process.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R index b0b5d781ec6..4b87668226c 100644 --- a/modules/data.atmosphere/R/met.process.R +++ b/modules/data.atmosphere/R/met.process.R @@ -283,14 +283,14 @@ met.process <- function(site, input_met, start_date, end_date, model, #--------------------------------------------------------------------------------------------------# # Change to Site Level - Standardized Met (i.e. ready for conversion to model specific format) if (stage$standardize) { - standardize_result <- list() + id_stdized <- list() ready.id <- list(input.id = NULL, dbfile.id = NULL) for (i in seq_along(cf.id[[1]])) { if (register$scale == "regional") { #### Site extraction - standardize_result[[i]] <- .extract.nc.module(cf.id = list(input.id = cf.id$container_id[i], + id_stdized <- .extract.nc.module(cf.id = list(input.id = cf.id$container_id[i], dbfile.id = cf.id$id[i]), register = register, dir = dir,