From 50cca2cea06cfa1c7ddc7a1956accad0ccf243bb Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Tue, 2 Jul 2024 16:46:00 -0400 Subject: [PATCH 01/12] v1 hourly downscale --- .../R/downscale_function_hrly.R | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 modules/assim.sequential/R/downscale_function_hrly.R diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R new file mode 100644 index 00000000000..ff340e16b3c --- /dev/null +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -0,0 +1,96 @@ +##' @title North America Downscale Function +##' @name NA_downscale_hrly +##' @author Harunobu Ishii +##' +##' @param nc_data In quotes, file path for .nc containing ensemble data. +##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat". +##' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00). +##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder +##' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations +##' +##' @description This function uses the randomForest model. +##' +##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. + + +NA_downscale_hrly <- function(nc_data, coords, date, covariates){ + + # Read the input data and site coordinates + input_data <- ncvar_get(nc_data, "NEE") + weights_rrel <- ncvar_get(nc_data, "weights_rrel") + + # Timereadable + time <- nc_data$dim$time$vals + time_units <- nc_data$dim$time$units + time_origin <- as.POSIXct(substr(time_units, 12, 31), format="%Y-%m-%dT%H:%M") + time_readable <- time_origin + time * 3600 # Convert hours to seconds + + # Extract predictors from covariates raster using site coordinates + site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326") + index <- which(time_readable == date) + data <- input_data[index, , ] + carbon_data <- as.data.frame(data) + predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) + + # Arrange relative weights of each ensemble member over time and space/site + curr_weights_rrel <- weights_rrel[, , index] + names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data))) + colnames(curr_weights_rrel) <- paste0("ensemble",seq(1:ncol(curr_weights_rrel))) + + # Combine each ensemble member with all predictors + ensembles <- list() + for (i in seq_along(carbon_data)) { + ensembles[[i]] <- cbind(carbon_data[[i]], predictors) + } + + # Rename the carbon_data column for each ensemble member + for (i in 1:length(ensembles)) { + ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]") + } + + # Split the observations in each data frame into two data frames based on the proportion of 3/4 + ensembles <- lapply(ensembles, function(df) { + sample <- sample(1:nrow(df), size = round(0.75*nrow(df))) + train <- df[sample, ] + test <- df[-sample, ] + split_list <- list(train, test) + return(split_list) + }) + + # Rename the training and testing data frames for each ensemble member + for (i in 1:length(ensembles)) { + # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles))) + names(ensembles[[i]]) <- c("training", "testing") + } + + # Train a random forest model for each ensemble member using the training data + rf_output <- list() + for (i in 1:length(ensembles)) { + rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand, + data = ensembles[[i]][[1]], + ntree = 1000, + na.action = stats::na.omit, + keep.forest = T, + importance = T) + } + + # Generate predictions (maps) for each ensemble member using the trained models + maps <- list(ncol(rf_output)) + for (i in 1:length(rf_output)) { + maps[[i]] <- terra::predict(object = covariates, + model = rf_output[[i]],na.rm = T) + } + + # Organize the results into a single output list + downscale_output <- list(ensembles, rf_output, maps, curr_weights_rrel) + + # Rename each element of the output list with appropriate ensemble numbers + for (i in 1:(length(downscale_output)-1)) { + names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]]))) + } + + # Rename the main components of the output list + names(downscale_output) <- c("data", "models", "maps", "weights_rrel") + + return(downscale_output) +} From 95e9691b52dfa0249e0cf8401b07419d508141b7 Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 15 Jul 2024 16:19:42 -0400 Subject: [PATCH 02/12] changelog updated --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e69c58ee70..01f0e45d9b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha - Added new feature of preparing initial conditions for MODIS LAI, AGB, ISCN SOC, and soil moisture across NA anchor sites. - Added GEDI AGB preparation workflow. - Added new feature of downloading datasets from the NASA DAAC ORNL database. +- Extended downscale function and created 'downscale_hrly' so that it handles more frequent data ### Fixed From a214027f721f4b5d7ddade2a5c6f6890ef71261a Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 15 Jul 2024 16:30:29 -0400 Subject: [PATCH 03/12] man file created created --- modules/assim.sequential/NAMESPACE | 3 ++ .../R/downscale_function_hrly.R | 29 ++++++++++--------- .../assim.sequential/man/NA_downscale_hrly.Rd | 29 +++++++++++++++++++ 3 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 modules/assim.sequential/man/NA_downscale_hrly.Rd diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 8157eadc616..9a06bb92560 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -13,6 +13,7 @@ export(GEF.MultiSite) export(GEF.MultiSite.Nimble) export(GrabFillMatrix) export(Local.support) +export(NA_downscale_hrly) export(Obs.data.prepare.MultiSite) export(Prep_OBS_SDA) export(Remote_Sync_launcher) @@ -59,7 +60,9 @@ export(tobit_model_censored) export(y_star_create) import(furrr) import(lubridate) +import(ncdf4) import(nimble) +import(terra) importFrom(dplyr,"%>%") importFrom(lubridate,"%m+%") importFrom(magrittr,"%>%") diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index ff340e16b3c..3d6a10041ce 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -1,17 +1,18 @@ -##' @title North America Downscale Function -##' @name NA_downscale_hrly -##' @author Harunobu Ishii -##' -##' @param nc_data In quotes, file path for .nc containing ensemble data. -##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat". -##' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00). -##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder -##' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations -##' -##' @description This function uses the randomForest model. -##' -##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. - +#' @title North America Downscale Function +#' @name NA_downscale_hrly +#' @author Harunobu Ishii +#' +#' @param nc_data In quotes, file path for .nc containing ensemble data. +#' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat". +#' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00). +#' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder +#' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations +#' +#' @description This function uses the randomForest model. +#' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. +#' @import terra +#' @import ncdf4 +#' @export NA_downscale_hrly <- function(nc_data, coords, date, covariates){ diff --git a/modules/assim.sequential/man/NA_downscale_hrly.Rd b/modules/assim.sequential/man/NA_downscale_hrly.Rd new file mode 100644 index 00000000000..1a8984575c7 --- /dev/null +++ b/modules/assim.sequential/man/NA_downscale_hrly.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/downscale_function_hrly.R +\name{NA_downscale_hrly} +\alias{NA_downscale_hrly} +\title{North America Downscale Function} +\usage{ +NA_downscale_hrly(nc_data, coords, date, covariates) +} +\arguments{ +\item{nc_data}{In quotes, file path for .nc containing ensemble data.} + +\item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".} + +\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).} + +\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder} +} +\value{ +It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. +} +\description{ +This function uses the randomForest model. +} +\details{ +This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations +} +\author{ +Harunobu Ishii +} From 502bf89310b624dc7bf471e0b39c09b74fe7c509 Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 15 Jul 2024 16:36:49 -0400 Subject: [PATCH 04/12] import package list updated --- modules/assim.sequential/NAMESPACE | 1 - modules/assim.sequential/R/downscale_function_hrly.R | 1 - 2 files changed, 2 deletions(-) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 9a06bb92560..1abf9dea8b9 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -62,7 +62,6 @@ import(furrr) import(lubridate) import(ncdf4) import(nimble) -import(terra) importFrom(dplyr,"%>%") importFrom(lubridate,"%m+%") importFrom(magrittr,"%>%") diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index 3d6a10041ce..758ea6af7fb 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -10,7 +10,6 @@ #' #' @description This function uses the randomForest model. #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. -#' @import terra #' @import ncdf4 #' @export From e143b38942f21ef13b7fc1ba2310cfd3f3b000dc Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Wed, 24 Jul 2024 14:31:46 -0400 Subject: [PATCH 05/12] Name added to CITATION --- CITATION.cff | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index e00ef7b29f6..7af92146298 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -120,7 +120,9 @@ authors: - given-names: Eric R. Scott affiliation: University of Arizona orcid: 'https://orcid.org/0000-0002-7430-7879' - + - given-names: Harunobu Ishii + affiliation: Boston University Software & Application Innovation Lab(SAIL) + preferred-citation: type: article title: Facilitating feedbacks between field measurements and ecosystem models From 85ed698cdc196b9aac130174c6ab8bea3716e64e Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 29 Jul 2024 09:22:43 -0400 Subject: [PATCH 06/12] Suggested change in namespace and file input style modified --- .../assim.sequential/R/downscale_function_hrly.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index 758ea6af7fb..3950d46830a 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -1,20 +1,18 @@ -#' @title North America Downscale Function -#' @name NA_downscale_hrly +#' SDA Downscale Function for Hourly Data +#' +#' This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations +#' #' @author Harunobu Ishii -#' -#' @param nc_data In quotes, file path for .nc containing ensemble data. +#' @param nc_file In quotes, file path for .nc containing ensemble data. #' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat". #' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00). #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder -#' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations -#' -#' @description This function uses the randomForest model. #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. #' @import ncdf4 #' @export -NA_downscale_hrly <- function(nc_data, coords, date, covariates){ - +SDA_downscale_hrly <- function(nc_file, coords, date, covariates){ + nc_data <- nc_open(nc_file) # Read the input data and site coordinates input_data <- ncvar_get(nc_data, "NEE") weights_rrel <- ncvar_get(nc_data, "weights_rrel") From c6bb0d022fb9fe68d3d77c20074154823c3c6b3c Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 29 Jul 2024 10:10:33 -0400 Subject: [PATCH 07/12] Time units uses lubridate --- .../assim.sequential/R/downscale_function_hrly.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index 3950d46830a..d05c31075ea 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -9,6 +9,7 @@ #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. #' @import ncdf4 +#' @import lubridate #' @export SDA_downscale_hrly <- function(nc_file, coords, date, covariates){ @@ -20,8 +21,16 @@ SDA_downscale_hrly <- function(nc_file, coords, date, covariates){ # Timereadable time <- nc_data$dim$time$vals time_units <- nc_data$dim$time$units - time_origin <- as.POSIXct(substr(time_units, 12, 31), format="%Y-%m-%dT%H:%M") - time_readable <- time_origin + time * 3600 # Convert hours to seconds + time_origin_str <- substr(time_units, 12, 31) + time_origin <- ymd_hm(time_origin_str, tz="EST") + # Check if time units are in hours and convert appropriately + if (grepl("hours", time_units)) { + time_readable <- time_origin + dhours(time) + } else if (grepl("seconds", time_units)) { + time_readable <- time_origin + dseconds(time) + } else { + stop("Unsupported time units") + } # Extract predictors from covariates raster using site coordinates site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326") From 900acee8f5f368be27620963718aef2d075c9b0b Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 29 Jul 2024 14:28:10 -0400 Subject: [PATCH 08/12] downscale func takes time series --- modules/assim.sequential/NAMESPACE | 2 +- .../R/downscale_function_hrly.R | 122 +++++++++--------- ...ownscale_hrly.Rd => SDA_downscale_hrly.Rd} | 19 ++- 3 files changed, 67 insertions(+), 76 deletions(-) rename modules/assim.sequential/man/{NA_downscale_hrly.Rd => SDA_downscale_hrly.Rd} (68%) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 1abf9dea8b9..5984425da4e 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -13,12 +13,12 @@ export(GEF.MultiSite) export(GEF.MultiSite.Nimble) export(GrabFillMatrix) export(Local.support) -export(NA_downscale_hrly) export(Obs.data.prepare.MultiSite) export(Prep_OBS_SDA) export(Remote_Sync_launcher) export(SDA_OBS_Assembler) export(SDA_control) +export(SDA_downscale_hrly) export(SDA_remote_launcher) export(SDA_timeseries_plot) export(adj.ens) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index d05c31075ea..c89475e0c5f 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -12,18 +12,18 @@ #' @import lubridate #' @export -SDA_downscale_hrly <- function(nc_file, coords, date, covariates){ - nc_data <- nc_open(nc_file) +SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ + # Read the input data and site coordinates + nc_data <- nc_open(nc_file) input_data <- ncvar_get(nc_data, "NEE") - weights_rrel <- ncvar_get(nc_data, "weights_rrel") + covariate_names <- names(covariates) # Timereadable time <- nc_data$dim$time$vals time_units <- nc_data$dim$time$units time_origin_str <- substr(time_units, 12, 31) time_origin <- ymd_hm(time_origin_str, tz="EST") - # Check if time units are in hours and convert appropriately if (grepl("hours", time_units)) { time_readable <- time_origin + dhours(time) } else if (grepl("seconds", time_units)) { @@ -34,70 +34,64 @@ SDA_downscale_hrly <- function(nc_file, coords, date, covariates){ # Extract predictors from covariates raster using site coordinates site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326") - index <- which(time_readable == date) - data <- input_data[index, , ] - carbon_data <- as.data.frame(data) predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) - # Arrange relative weights of each ensemble member over time and space/site - curr_weights_rrel <- weights_rrel[, , index] - names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data))) - colnames(curr_weights_rrel) <- paste0("ensemble",seq(1:ncol(curr_weights_rrel))) - - # Combine each ensemble member with all predictors - ensembles <- list() - for (i in seq_along(carbon_data)) { - ensembles[[i]] <- cbind(carbon_data[[i]], predictors) - } - - # Rename the carbon_data column for each ensemble member - for (i in 1:length(ensembles)) { - ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]") - } - - # Split the observations in each data frame into two data frames based on the proportion of 3/4 - ensembles <- lapply(ensembles, function(df) { - sample <- sample(1:nrow(df), size = round(0.75*nrow(df))) - train <- df[sample, ] - test <- df[-sample, ] - split_list <- list(train, test) - return(split_list) - }) - - # Rename the training and testing data frames for each ensemble member - for (i in 1:length(ensembles)) { - # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles))) - names(ensembles[[i]]) <- c("training", "testing") - } - - # Train a random forest model for each ensemble member using the training data - rf_output <- list() - for (i in 1:length(ensembles)) { - rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand, - data = ensembles[[i]][[1]], - ntree = 1000, - na.action = stats::na.omit, - keep.forest = T, - importance = T) - } - - # Generate predictions (maps) for each ensemble member using the trained models - maps <- list(ncol(rf_output)) - for (i in 1:length(rf_output)) { - maps[[i]] <- terra::predict(object = covariates, - model = rf_output[[i]],na.rm = T) - } + downscale_output<- list() - # Organize the results into a single output list - downscale_output <- list(ensembles, rf_output, maps, curr_weights_rrel) + # Train & Test split + sample <- sample(1:nrow(predictors), size = round(0.75*nrow(predictors))) - # Rename each element of the output list with appropriate ensemble numbers - for (i in 1:(length(downscale_output)-1)) { - names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]]))) + # Predict for each time stamp of the year selected + time_indices <- which(year(time_readable) == yyyy) + for (index in time_indices) { + if(index == 37986){ + break + } + data <- input_data[index, , ] + carbon_data <- as.data.frame(data) + names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data))) + + # Combine carbon data and covariates/predictors and split into training/test + full_data <- cbind(carbon_data, predictors) + train_data <- full_data[sample, ] + test_data <- full_data[-sample, ] + + # Combine each ensemble member with all predictors + models <- list() + maps <- list() + predictions <- list() + ensembles <- list() + for (i in seq_along(carbon_data)) { + ensemble_col <- paste0("ensemble", i) + formula <- stats::as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + "))) + models[[i]] <- randomForest::randomForest(formula, + data = train_data, + ntree = 1000, + na.action = stats::na.omit, + keep.forest = TRUE, + importance = TRUE) + + maps[[i]] <- terra::predict(covariates, model = models[[i]], na.rm = TRUE) + predictions[[i]] <- stats::predict(models[[i]], test_data) + } + + # Organize the results into a single output list + curr_downscaled <- list( data = list(training = train_data, testing = test_data), + models = models, + maps = maps, + predictions = predictions + ) + + # Rename each element of the output list with appropriate ensemble numbers + for (i in 1:length(curr_downscaled$data)) { + names(curr_downscaled$data[[i]]) <- paste0("ensemble", seq(1:ncol(carbon_data))) + } + names(curr_downscaled$models) <- paste0("ensemble", seq(1:ncol(carbon_data))) + names(curr_downscaled$maps) <- paste0("ensemble", seq(1:ncol(carbon_data))) + names(curr_downscaled$predictions) <- paste0("ensemble", seq(1:ncol(carbon_data))) + + downscale_output[[as.character(time_readable[index])]]<-curr_downscaled } - - # Rename the main components of the output list - names(downscale_output) <- c("data", "models", "maps", "weights_rrel") - + nc_close(nc_data) return(downscale_output) } diff --git a/modules/assim.sequential/man/NA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd similarity index 68% rename from modules/assim.sequential/man/NA_downscale_hrly.Rd rename to modules/assim.sequential/man/SDA_downscale_hrly.Rd index 1a8984575c7..1b9b66212ed 100644 --- a/modules/assim.sequential/man/NA_downscale_hrly.Rd +++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd @@ -1,28 +1,25 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/downscale_function_hrly.R -\name{NA_downscale_hrly} -\alias{NA_downscale_hrly} -\title{North America Downscale Function} +\name{SDA_downscale_hrly} +\alias{SDA_downscale_hrly} +\title{SDA Downscale Function for Hourly Data} \usage{ -NA_downscale_hrly(nc_data, coords, date, covariates) +SDA_downscale_hrly(nc_file, coords, yyyy, covariates) } \arguments{ -\item{nc_data}{In quotes, file path for .nc containing ensemble data.} +\item{nc_file}{In quotes, file path for .nc containing ensemble data.} \item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".} -\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).} - \item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder} + +\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).} } \value{ It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. } \description{ -This function uses the randomForest model. -} -\details{ -This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations +This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations } \author{ Harunobu Ishii From 8c4234ad7b4b496ad2d16bad8f4f7931e6a7ed7c Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 29 Jul 2024 14:29:32 -0400 Subject: [PATCH 09/12] downscale func takes time series --- modules/assim.sequential/R/downscale_function_hrly.R | 2 +- modules/assim.sequential/man/SDA_downscale_hrly.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index c89475e0c5f..b76caac31f3 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -5,7 +5,7 @@ #' @author Harunobu Ishii #' @param nc_file In quotes, file path for .nc containing ensemble data. #' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat". -#' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00). +#' @param yyyy In string, format is yyyy(year of interest) #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. #' @import ncdf4 diff --git a/modules/assim.sequential/man/SDA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd index 1b9b66212ed..6cfe7eba902 100644 --- a/modules/assim.sequential/man/SDA_downscale_hrly.Rd +++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd @@ -11,9 +11,9 @@ SDA_downscale_hrly(nc_file, coords, yyyy, covariates) \item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".} -\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder} +\item{yyyy}{In string, format is yyyy(year of interest)} -\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).} +\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder} } \value{ It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. From bfeb8e577033a038481a3d92d80ff50e848c7da5 Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Mon, 29 Jul 2024 14:32:48 -0400 Subject: [PATCH 10/12] Time Zone Checked --- .../assim.sequential/R/downscale_function_hrly.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index b76caac31f3..01f18904f00 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -19,11 +19,22 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ input_data <- ncvar_get(nc_data, "NEE") covariate_names <- names(covariates) - # Timereadable + + # Extract time and units time <- nc_data$dim$time$vals time_units <- nc_data$dim$time$units time_origin_str <- substr(time_units, 12, 31) - time_origin <- ymd_hm(time_origin_str, tz="EST") + + # Check if timezone is specified in the time units string + if (grepl("UTC|GMT", time_units)) { + time_origin <- ymd_hm(time_origin_str, tz = "UTC") + } else if (grepl("EST", time_units)) { + time_origin <- ymd_hm(time_origin_str, tz = "EST") + } else { + time_origin <- ymd_hm(time_origin_str, tz = "UTC") # Default to UTC if not specified + } + + # Timereadable if (grepl("hours", time_units)) { time_readable <- time_origin + dhours(time) } else if (grepl("seconds", time_units)) { From f56fd9c6a4ca9dd56700f65ac25744a638ed09bf Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Wed, 31 Jul 2024 14:14:58 -0400 Subject: [PATCH 11/12] Updated downscale based on comment --- .../assim.sequential/R/downscale_function_hrly.R | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index 01f18904f00..70b09e6fc7d 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -8,14 +8,13 @@ #' @param yyyy In string, format is yyyy(year of interest) #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member. -#' @import ncdf4 -#' @import lubridate #' @export SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ # Read the input data and site coordinates - nc_data <- nc_open(nc_file) + nc_data <- ncdf4::nc_open(nc_file) + on.exit(ncdf4::nc_close(nc_data)) input_data <- ncvar_get(nc_data, "NEE") covariate_names <- names(covariates) @@ -27,11 +26,11 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ # Check if timezone is specified in the time units string if (grepl("UTC|GMT", time_units)) { - time_origin <- ymd_hm(time_origin_str, tz = "UTC") + time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC") } else if (grepl("EST", time_units)) { - time_origin <- ymd_hm(time_origin_str, tz = "EST") + time_origin <- lubridate::ymd_hm(time_origin_str, tz = "EST") } else { - time_origin <- ymd_hm(time_origin_str, tz = "UTC") # Default to UTC if not specified + time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC") # Default to UTC if not specified } # Timereadable @@ -55,9 +54,6 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ # Predict for each time stamp of the year selected time_indices <- which(year(time_readable) == yyyy) for (index in time_indices) { - if(index == 37986){ - break - } data <- input_data[index, , ] carbon_data <- as.data.frame(data) names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data))) @@ -103,6 +99,5 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ downscale_output[[as.character(time_readable[index])]]<-curr_downscaled } - nc_close(nc_data) return(downscale_output) } From 6ca6b651cc2cd90dc78af01865738581edeb1014 Mon Sep 17 00:00:00 2001 From: Harunobu Ishii Date: Wed, 31 Jul 2024 17:04:42 -0400 Subject: [PATCH 12/12] name space added --- modules/assim.sequential/R/downscale_function_hrly.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R index 70b09e6fc7d..25da4c62150 100644 --- a/modules/assim.sequential/R/downscale_function_hrly.R +++ b/modules/assim.sequential/R/downscale_function_hrly.R @@ -15,7 +15,7 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ # Read the input data and site coordinates nc_data <- ncdf4::nc_open(nc_file) on.exit(ncdf4::nc_close(nc_data)) - input_data <- ncvar_get(nc_data, "NEE") + input_data <- ncdf4::ncvar_get(nc_data, "NEE") covariate_names <- names(covariates) @@ -35,9 +35,9 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){ # Timereadable if (grepl("hours", time_units)) { - time_readable <- time_origin + dhours(time) + time_readable <- time_origin + lubridate::dhours(time) } else if (grepl("seconds", time_units)) { - time_readable <- time_origin + dseconds(time) + time_readable <- time_origin + lubridate::dseconds(time) } else { stop("Unsupported time units") }