From 50cca2cea06cfa1c7ddc7a1956accad0ccf243bb Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Tue, 2 Jul 2024 16:46:00 -0400
Subject: [PATCH 01/12] v1 hourly downscale

---
 .../R/downscale_function_hrly.R               | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 modules/assim.sequential/R/downscale_function_hrly.R

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
new file mode 100644
index 00000000000..ff340e16b3c
--- /dev/null
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -0,0 +1,96 @@
+##' @title North America Downscale Function
+##' @name NA_downscale_hrly
+##' @author Harunobu Ishii
+##'
+##' @param nc_data  In quotes, file path for .nc containing ensemble data.
+##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
+##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+##' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+##'
+##' @description This function uses the randomForest model.
+##'
+##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+
+
+NA_downscale_hrly <- function(nc_data, coords, date, covariates){
+  
+  # Read the input data and site coordinates
+  input_data <- ncvar_get(nc_data, "NEE")
+  weights_rrel <- ncvar_get(nc_data, "weights_rrel")
+  
+  # Timereadable
+  time <- nc_data$dim$time$vals
+  time_units <- nc_data$dim$time$units
+  time_origin <- as.POSIXct(substr(time_units, 12, 31), format="%Y-%m-%dT%H:%M")
+  time_readable <- time_origin + time * 3600  # Convert hours to seconds
+  
+  # Extract predictors from covariates raster using site coordinates
+  site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")
+  index <- which(time_readable == date)
+  data <- input_data[index, , ]
+  carbon_data <- as.data.frame(data)
+  predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) 
+
+  # Arrange relative weights of each ensemble member over time and space/site
+  curr_weights_rrel <- weights_rrel[, , index]
+  names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
+  colnames(curr_weights_rrel) <- paste0("ensemble",seq(1:ncol(curr_weights_rrel)))
+  
+  # Combine each ensemble member with all predictors
+  ensembles <- list()
+  for (i in seq_along(carbon_data)) {
+    ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
+  }
+  
+  # Rename the carbon_data column for each ensemble member
+  for (i in 1:length(ensembles)) {
+    ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
+  }
+  
+  # Split the observations in each data frame into two data frames based on the proportion of 3/4
+  ensembles <- lapply(ensembles, function(df) {
+    sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
+    train  <- df[sample, ]
+    test   <- df[-sample, ]
+    split_list <- list(train, test)
+    return(split_list)
+  })
+  
+  # Rename the training and testing data frames for each ensemble member
+  for (i in 1:length(ensembles)) {
+    # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
+    names(ensembles[[i]]) <- c("training", "testing")
+  }
+  
+  # Train a random forest model for each ensemble member using the training data
+  rf_output <- list()
+  for (i in 1:length(ensembles)) {
+    rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
+                                                 data = ensembles[[i]][[1]],
+                                                 ntree = 1000,
+                                                 na.action = stats::na.omit,
+                                                 keep.forest = T,
+                                                 importance = T)
+  }
+  
+  # Generate predictions (maps) for each ensemble member using the trained models
+  maps <- list(ncol(rf_output))
+  for (i in 1:length(rf_output)) {
+    maps[[i]] <- terra::predict(object = covariates,
+                                model = rf_output[[i]],na.rm = T)
+  }
+  
+  # Organize the results into a single output list
+  downscale_output <- list(ensembles, rf_output, maps, curr_weights_rrel)
+  
+  # Rename each element of the output list with appropriate ensemble numbers
+  for (i in 1:(length(downscale_output)-1)) {
+    names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
+  }
+  
+  # Rename the main components of the output list
+  names(downscale_output) <- c("data", "models", "maps", "weights_rrel")
+  
+  return(downscale_output)
+}

From 95e9691b52dfa0249e0cf8401b07419d508141b7 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 15 Jul 2024 16:19:42 -0400
Subject: [PATCH 02/12] changelog updated

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e69c58ee70..01f0e45d9b4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
 - Added new feature of preparing initial conditions for MODIS LAI, AGB, ISCN SOC, and soil moisture across NA anchor sites.
 - Added GEDI AGB preparation workflow.
 - Added new feature of downloading datasets from the NASA DAAC ORNL database.
+- Extended downscale function and created 'downscale_hrly' so that it handles more frequent data
 
 ### Fixed
 

From a214027f721f4b5d7ddade2a5c6f6890ef71261a Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 15 Jul 2024 16:30:29 -0400
Subject: [PATCH 03/12] man file created created

---
 modules/assim.sequential/NAMESPACE            |  3 ++
 .../R/downscale_function_hrly.R               | 29 ++++++++++---------
 .../assim.sequential/man/NA_downscale_hrly.Rd | 29 +++++++++++++++++++
 3 files changed, 47 insertions(+), 14 deletions(-)
 create mode 100644 modules/assim.sequential/man/NA_downscale_hrly.Rd

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 8157eadc616..9a06bb92560 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -13,6 +13,7 @@ export(GEF.MultiSite)
 export(GEF.MultiSite.Nimble)
 export(GrabFillMatrix)
 export(Local.support)
+export(NA_downscale_hrly)
 export(Obs.data.prepare.MultiSite)
 export(Prep_OBS_SDA)
 export(Remote_Sync_launcher)
@@ -59,7 +60,9 @@ export(tobit_model_censored)
 export(y_star_create)
 import(furrr)
 import(lubridate)
+import(ncdf4)
 import(nimble)
+import(terra)
 importFrom(dplyr,"%>%")
 importFrom(lubridate,"%m+%")
 importFrom(magrittr,"%>%")
diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index ff340e16b3c..3d6a10041ce 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -1,17 +1,18 @@
-##' @title North America Downscale Function
-##' @name NA_downscale_hrly
-##' @author Harunobu Ishii
-##'
-##' @param nc_data  In quotes, file path for .nc containing ensemble data.
-##' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
-##' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
-##' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
-##' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
-##'
-##' @description This function uses the randomForest model.
-##'
-##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-
+#' @title North America Downscale Function
+#' @name NA_downscale_hrly
+#' @author Harunobu Ishii
+#'
+#' @param nc_data  In quotes, file path for .nc containing ensemble data.
+#' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+#' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
+#' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
+#' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+#'
+#' @description This function uses the randomForest model.
+#' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+#' @import terra
+#' @import ncdf4
+#' @export
 
 NA_downscale_hrly <- function(nc_data, coords, date, covariates){
   
diff --git a/modules/assim.sequential/man/NA_downscale_hrly.Rd b/modules/assim.sequential/man/NA_downscale_hrly.Rd
new file mode 100644
index 00000000000..1a8984575c7
--- /dev/null
+++ b/modules/assim.sequential/man/NA_downscale_hrly.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downscale_function_hrly.R
+\name{NA_downscale_hrly}
+\alias{NA_downscale_hrly}
+\title{North America Downscale Function}
+\usage{
+NA_downscale_hrly(nc_data, coords, date, covariates)
+}
+\arguments{
+\item{nc_data}{In quotes, file path for .nc containing ensemble data.}
+
+\item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
+
+\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
+
+\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+}
+\value{
+It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+}
+\description{
+This function uses the randomForest model.
+}
+\details{
+This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+}
+\author{
+Harunobu Ishii
+}

From 502bf89310b624dc7bf471e0b39c09b74fe7c509 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 15 Jul 2024 16:36:49 -0400
Subject: [PATCH 04/12] import package list updated

---
 modules/assim.sequential/NAMESPACE                   | 1 -
 modules/assim.sequential/R/downscale_function_hrly.R | 1 -
 2 files changed, 2 deletions(-)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 9a06bb92560..1abf9dea8b9 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -62,7 +62,6 @@ import(furrr)
 import(lubridate)
 import(ncdf4)
 import(nimble)
-import(terra)
 importFrom(dplyr,"%>%")
 importFrom(lubridate,"%m+%")
 importFrom(magrittr,"%>%")
diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 3d6a10041ce..758ea6af7fb 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -10,7 +10,6 @@
 #'
 #' @description This function uses the randomForest model.
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-#' @import terra
 #' @import ncdf4
 #' @export
 

From e143b38942f21ef13b7fc1ba2310cfd3f3b000dc Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Wed, 24 Jul 2024 14:31:46 -0400
Subject: [PATCH 05/12] Name added to CITATION

---
 CITATION.cff | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index e00ef7b29f6..7af92146298 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -120,7 +120,9 @@ authors:
   - given-names: Eric R. Scott
     affiliation: University of Arizona
     orcid: 'https://orcid.org/0000-0002-7430-7879'
-  
+  - given-names: Harunobu Ishii
+    affiliation: Boston University Software & Application Innovation Lab(SAIL)
+
 preferred-citation:
   type: article
   title: Facilitating feedbacks between field measurements and ecosystem models

From 85ed698cdc196b9aac130174c6ab8bea3716e64e Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 09:22:43 -0400
Subject: [PATCH 06/12] Suggested change in  namespace and file input style
 modified

---
 .../assim.sequential/R/downscale_function_hrly.R | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 758ea6af7fb..3950d46830a 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -1,20 +1,18 @@
-#' @title North America Downscale Function
-#' @name NA_downscale_hrly
+#' SDA　Downscale Function for Hourly Data
+#' 
+#' This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+#' 
 #' @author Harunobu Ishii
-#'
-#' @param nc_data  In quotes, file path for .nc containing ensemble data.
+#' @param nc_file  In quotes, file path for .nc containing ensemble data.
 #' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
 #' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
-#' @details This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
-#'
-#' @description This function uses the randomForest model.
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 #' @import ncdf4
 #' @export
 
-NA_downscale_hrly <- function(nc_data, coords, date, covariates){
-  
+SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
+  nc_data <- nc_open(nc_file)
   # Read the input data and site coordinates
   input_data <- ncvar_get(nc_data, "NEE")
   weights_rrel <- ncvar_get(nc_data, "weights_rrel")

From c6bb0d022fb9fe68d3d77c20074154823c3c6b3c Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 10:10:33 -0400
Subject: [PATCH 07/12] Time units uses lubridate

---
 .../assim.sequential/R/downscale_function_hrly.R    | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 3950d46830a..d05c31075ea 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -9,6 +9,7 @@
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 #' @import ncdf4
+#' @import lubridate
 #' @export
 
 SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
@@ -20,8 +21,16 @@ SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
   # Timereadable
   time <- nc_data$dim$time$vals
   time_units <- nc_data$dim$time$units
-  time_origin <- as.POSIXct(substr(time_units, 12, 31), format="%Y-%m-%dT%H:%M")
-  time_readable <- time_origin + time * 3600  # Convert hours to seconds
+  time_origin_str <- substr(time_units, 12, 31)
+  time_origin <- ymd_hm(time_origin_str, tz="EST")
+  # Check if time units are in hours and convert appropriately
+  if (grepl("hours", time_units)) {
+    time_readable <- time_origin + dhours(time)
+  } else if (grepl("seconds", time_units)) {
+    time_readable <- time_origin + dseconds(time)
+  } else {
+    stop("Unsupported time units")
+  }
   
   # Extract predictors from covariates raster using site coordinates
   site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")

From 900acee8f5f368be27620963718aef2d075c9b0b Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 14:28:10 -0400
Subject: [PATCH 08/12] downscale func takes time series

---
 modules/assim.sequential/NAMESPACE            |   2 +-
 .../R/downscale_function_hrly.R               | 122 +++++++++---------
 ...ownscale_hrly.Rd => SDA_downscale_hrly.Rd} |  19 ++-
 3 files changed, 67 insertions(+), 76 deletions(-)
 rename modules/assim.sequential/man/{NA_downscale_hrly.Rd => SDA_downscale_hrly.Rd} (68%)

diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 1abf9dea8b9..5984425da4e 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -13,12 +13,12 @@ export(GEF.MultiSite)
 export(GEF.MultiSite.Nimble)
 export(GrabFillMatrix)
 export(Local.support)
-export(NA_downscale_hrly)
 export(Obs.data.prepare.MultiSite)
 export(Prep_OBS_SDA)
 export(Remote_Sync_launcher)
 export(SDA_OBS_Assembler)
 export(SDA_control)
+export(SDA_downscale_hrly)
 export(SDA_remote_launcher)
 export(SDA_timeseries_plot)
 export(adj.ens)
diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index d05c31075ea..c89475e0c5f 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -12,18 +12,18 @@
 #' @import lubridate
 #' @export
 
-SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
-  nc_data <- nc_open(nc_file)
+SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
+  
   # Read the input data and site coordinates
+  nc_data <- nc_open(nc_file)
   input_data <- ncvar_get(nc_data, "NEE")
-  weights_rrel <- ncvar_get(nc_data, "weights_rrel")
+  covariate_names <- names(covariates)
   
   # Timereadable
   time <- nc_data$dim$time$vals
   time_units <- nc_data$dim$time$units
   time_origin_str <- substr(time_units, 12, 31)
   time_origin <- ymd_hm(time_origin_str, tz="EST")
-  # Check if time units are in hours and convert appropriately
   if (grepl("hours", time_units)) {
     time_readable <- time_origin + dhours(time)
   } else if (grepl("seconds", time_units)) {
@@ -34,70 +34,64 @@ SDA_downscale_hrly <- function(nc_file, coords, date, covariates){
   
   # Extract predictors from covariates raster using site coordinates
   site_coordinates <- terra::vect(readr::read_csv(coords), geom=c("lon", "lat"), crs="EPSG:4326")
-  index <- which(time_readable == date)
-  data <- input_data[index, , ]
-  carbon_data <- as.data.frame(data)
   predictors <- as.data.frame(terra::extract(covariates, site_coordinates,ID = FALSE)) 
 
-  # Arrange relative weights of each ensemble member over time and space/site
-  curr_weights_rrel <- weights_rrel[, , index]
-  names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
-  colnames(curr_weights_rrel) <- paste0("ensemble",seq(1:ncol(curr_weights_rrel)))
-  
-  # Combine each ensemble member with all predictors
-  ensembles <- list()
-  for (i in seq_along(carbon_data)) {
-    ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
-  }
-  
-  # Rename the carbon_data column for each ensemble member
-  for (i in 1:length(ensembles)) {
-    ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
-  }
-  
-  # Split the observations in each data frame into two data frames based on the proportion of 3/4
-  ensembles <- lapply(ensembles, function(df) {
-    sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
-    train  <- df[sample, ]
-    test   <- df[-sample, ]
-    split_list <- list(train, test)
-    return(split_list)
-  })
-  
-  # Rename the training and testing data frames for each ensemble member
-  for (i in 1:length(ensembles)) {
-    # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
-    names(ensembles[[i]]) <- c("training", "testing")
-  }
-  
-  # Train a random forest model for each ensemble member using the training data
-  rf_output <- list()
-  for (i in 1:length(ensembles)) {
-    rf_output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
-                                                 data = ensembles[[i]][[1]],
-                                                 ntree = 1000,
-                                                 na.action = stats::na.omit,
-                                                 keep.forest = T,
-                                                 importance = T)
-  }
-  
-  # Generate predictions (maps) for each ensemble member using the trained models
-  maps <- list(ncol(rf_output))
-  for (i in 1:length(rf_output)) {
-    maps[[i]] <- terra::predict(object = covariates,
-                                model = rf_output[[i]],na.rm = T)
-  }
+  downscale_output<- list()
   
-  # Organize the results into a single output list
-  downscale_output <- list(ensembles, rf_output, maps, curr_weights_rrel)
+  # Train & Test split
+  sample <- sample(1:nrow(predictors), size = round(0.75*nrow(predictors)))
   
-  # Rename each element of the output list with appropriate ensemble numbers
-  for (i in 1:(length(downscale_output)-1)) {
-    names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
+  # Predict for each time stamp of the year selected
+  time_indices <- which(year(time_readable) == yyyy)
+  for (index in time_indices) {
+    if(index == 37986){
+      break
+    }
+    data <- input_data[index, , ]
+    carbon_data <- as.data.frame(data)
+    names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
+
+    # Combine carbon data and covariates/predictors and split into training/test
+    full_data <- cbind(carbon_data, predictors)
+    train_data <- full_data[sample, ]
+    test_data <- full_data[-sample, ]
+    
+    # Combine each ensemble member with all predictors
+    models <- list()
+    maps <- list()
+    predictions <- list()
+    ensembles <- list()
+    for (i in seq_along(carbon_data)) {
+      ensemble_col <- paste0("ensemble", i)
+      formula <- stats::as.formula(paste(ensemble_col, "~", paste(covariate_names, collapse = " + ")))
+      models[[i]] <- randomForest::randomForest(formula,
+                                                data = train_data,
+                                                ntree = 1000,
+                                                na.action = stats::na.omit,
+                                                keep.forest = TRUE,
+                                                importance = TRUE)
+      
+      maps[[i]] <- terra::predict(covariates, model = models[[i]], na.rm = TRUE)
+      predictions[[i]] <- stats::predict(models[[i]], test_data)
+    }
+
+    # Organize the results into a single output list
+    curr_downscaled <- list( data = list(training = train_data, testing = test_data),
+                             models = models,
+                             maps = maps,
+                             predictions = predictions
+                            )
+    
+    # Rename each element of the output list with appropriate ensemble numbers
+    for (i in 1:length(curr_downscaled$data)) {
+      names(curr_downscaled$data[[i]]) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    }
+    names(curr_downscaled$models) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    names(curr_downscaled$maps) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    names(curr_downscaled$predictions) <- paste0("ensemble", seq(1:ncol(carbon_data)))
+    
+    downscale_output[[as.character(time_readable[index])]]<-curr_downscaled
   }
-  
-  # Rename the main components of the output list
-  names(downscale_output) <- c("data", "models", "maps", "weights_rrel")
-  
+  nc_close(nc_data)
   return(downscale_output)
 }
diff --git a/modules/assim.sequential/man/NA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
similarity index 68%
rename from modules/assim.sequential/man/NA_downscale_hrly.Rd
rename to modules/assim.sequential/man/SDA_downscale_hrly.Rd
index 1a8984575c7..1b9b66212ed 100644
--- a/modules/assim.sequential/man/NA_downscale_hrly.Rd
+++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
@@ -1,28 +1,25 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/downscale_function_hrly.R
-\name{NA_downscale_hrly}
-\alias{NA_downscale_hrly}
-\title{North America Downscale Function}
+\name{SDA_downscale_hrly}
+\alias{SDA_downscale_hrly}
+\title{SDA　Downscale Function for Hourly Data}
 \usage{
-NA_downscale_hrly(nc_data, coords, date, covariates)
+SDA_downscale_hrly(nc_file, coords, yyyy, covariates)
 }
 \arguments{
-\item{nc_data}{In quotes, file path for .nc containing ensemble data.}
+\item{nc_file}{In quotes, file path for .nc containing ensemble data.}
 
 \item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
 
-\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
-
 \item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+
+\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
 }
 \value{
 It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 }
 \description{
-This function uses the randomForest model.
-}
-\details{
-This function will downscale forecast data (hourly) to unmodeled locations using covariates and site locations
+This function uses the randomForest model to downscale forecast data (hourly) to unmodeled locations using covariates and site locations
 }
 \author{
 Harunobu Ishii

From 8c4234ad7b4b496ad2d16bad8f4f7931e6a7ed7c Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 14:29:32 -0400
Subject: [PATCH 09/12] downscale func takes time series

---
 modules/assim.sequential/R/downscale_function_hrly.R | 2 +-
 modules/assim.sequential/man/SDA_downscale_hrly.Rd   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index c89475e0c5f..b76caac31f3 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -5,7 +5,7 @@
 #' @author Harunobu Ishii
 #' @param nc_file  In quotes, file path for .nc containing ensemble data.
 #' @param coords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
-#' @param date In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).
+#' @param yyyy In string, format is yyyy(year of interest)
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
 #' @import ncdf4
diff --git a/modules/assim.sequential/man/SDA_downscale_hrly.Rd b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
index 1b9b66212ed..6cfe7eba902 100644
--- a/modules/assim.sequential/man/SDA_downscale_hrly.Rd
+++ b/modules/assim.sequential/man/SDA_downscale_hrly.Rd
@@ -11,9 +11,9 @@ SDA_downscale_hrly(nc_file, coords, yyyy, covariates)
 
 \item{coords}{In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".}
 
-\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
+\item{yyyy}{In string, format is yyyy(year of interest)}
 
-\item{date}{In quotes, format is yyyy-mm-dd hh:mm:ss EST. Restricted to time within file supplied to 'data' (hours since 1986-01-01T00:00).}
+\item{covariates}{SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder}
 }
 \value{
 It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.

From bfeb8e577033a038481a3d92d80ff50e848c7da5 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Mon, 29 Jul 2024 14:32:48 -0400
Subject: [PATCH 10/12] Time Zone Checked

---
 .../assim.sequential/R/downscale_function_hrly.R  | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index b76caac31f3..01f18904f00 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -19,11 +19,22 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   input_data <- ncvar_get(nc_data, "NEE")
   covariate_names <- names(covariates)
   
-  # Timereadable
+  
+  # Extract time and units
   time <- nc_data$dim$time$vals
   time_units <- nc_data$dim$time$units
   time_origin_str <- substr(time_units, 12, 31)
-  time_origin <- ymd_hm(time_origin_str, tz="EST")
+  
+  # Check if timezone is specified in the time units string
+  if (grepl("UTC|GMT", time_units)) {
+    time_origin <- ymd_hm(time_origin_str, tz = "UTC")
+  } else if (grepl("EST", time_units)) {
+    time_origin <- ymd_hm(time_origin_str, tz = "EST")
+  } else {
+    time_origin <- ymd_hm(time_origin_str, tz = "UTC")  # Default to UTC if not specified
+  }
+  
+  # Timereadable
   if (grepl("hours", time_units)) {
     time_readable <- time_origin + dhours(time)
   } else if (grepl("seconds", time_units)) {

From f56fd9c6a4ca9dd56700f65ac25744a638ed09bf Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Wed, 31 Jul 2024 14:14:58 -0400
Subject: [PATCH 11/12] Updated downscale based on comment

---
 .../assim.sequential/R/downscale_function_hrly.R  | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 01f18904f00..70b09e6fc7d 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -8,14 +8,13 @@
 #' @param yyyy In string, format is yyyy(year of interest)
 #' @param covariates SpatRaster stack, used as predictors in randomForest. Layers within stack should be named. Recommended that this stack be generated using 'covariates' instructions in assim.sequential/inst folder
 #' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
-#' @import ncdf4
-#' @import lubridate
 #' @export
 
 SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   
   # Read the input data and site coordinates
-  nc_data <- nc_open(nc_file)
+  nc_data <- ncdf4::nc_open(nc_file)
+  on.exit(ncdf4::nc_close(nc_data))
   input_data <- ncvar_get(nc_data, "NEE")
   covariate_names <- names(covariates)
   
@@ -27,11 +26,11 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   
   # Check if timezone is specified in the time units string
   if (grepl("UTC|GMT", time_units)) {
-    time_origin <- ymd_hm(time_origin_str, tz = "UTC")
+    time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC")
   } else if (grepl("EST", time_units)) {
-    time_origin <- ymd_hm(time_origin_str, tz = "EST")
+    time_origin <- lubridate::ymd_hm(time_origin_str, tz = "EST")
   } else {
-    time_origin <- ymd_hm(time_origin_str, tz = "UTC")  # Default to UTC if not specified
+    time_origin <- lubridate::ymd_hm(time_origin_str, tz = "UTC")  # Default to UTC if not specified
   }
   
   # Timereadable
@@ -55,9 +54,6 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   # Predict for each time stamp of the year selected
   time_indices <- which(year(time_readable) == yyyy)
   for (index in time_indices) {
-    if(index == 37986){
-      break
-    }
     data <- input_data[index, , ]
     carbon_data <- as.data.frame(data)
     names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
@@ -103,6 +99,5 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
     
     downscale_output[[as.character(time_readable[index])]]<-curr_downscaled
   }
-  nc_close(nc_data)
   return(downscale_output)
 }

From 6ca6b651cc2cd90dc78af01865738581edeb1014 Mon Sep 17 00:00:00 2001
From: Harunobu Ishii <harunobuishii547@gmail.com>
Date: Wed, 31 Jul 2024 17:04:42 -0400
Subject: [PATCH 12/12] name space added

---
 modules/assim.sequential/R/downscale_function_hrly.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/assim.sequential/R/downscale_function_hrly.R b/modules/assim.sequential/R/downscale_function_hrly.R
index 70b09e6fc7d..25da4c62150 100644
--- a/modules/assim.sequential/R/downscale_function_hrly.R
+++ b/modules/assim.sequential/R/downscale_function_hrly.R
@@ -15,7 +15,7 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   # Read the input data and site coordinates
   nc_data <- ncdf4::nc_open(nc_file)
   on.exit(ncdf4::nc_close(nc_data))
-  input_data <- ncvar_get(nc_data, "NEE")
+  input_data <- ncdf4::ncvar_get(nc_data, "NEE")
   covariate_names <- names(covariates)
   
   
@@ -35,9 +35,9 @@ SDA_downscale_hrly <- function(nc_file, coords, yyyy, covariates){
   
   # Timereadable
   if (grepl("hours", time_units)) {
-    time_readable <- time_origin + dhours(time)
+    time_readable <- time_origin + lubridate::dhours(time)
   } else if (grepl("seconds", time_units)) {
-    time_readable <- time_origin + dseconds(time)
+    time_readable <- time_origin + lubridate::dseconds(time)
   } else {
     stop("Unsupported time units")
   }