diff --git a/R/get_ensemble_climate_data.R b/R/get_ensemble_climate_data.R index 2759691..30d6fac 100644 --- a/R/get_ensemble_climate_data.R +++ b/R/get_ensemble_climate_data.R @@ -1,26 +1,29 @@ #' Download ensemble climate data -#' -#' @description Download ensemble data for all models, returns the 10th, 50th and 90th percentile -#' of all models (15 for A1, 13 for B2). Ensemble requets can be for countries -#' or basins. -#' +#' +#' @description Download ensemble data for all models, returns the 10th, 50th and 90th +#' percentile of all models (15 for A1, 13 for B2). Ensemble requets can be for +#' countries or basins. +#' #' @import httr plyr reshape2 jsonlite -#' @param locator The ISO3 country code that you want data about. (http://unstats.un.org/unsd/methods/m49/m49alpha.htm) or the basin ID [1-468] +#' @param locator The ISO3 country code that you want data about. +#' (http://unstats.un.org/unsd/methods/m49/m49alpha.htm) or the basin ID [1-468] #' @param geo_type basin or country depending on the locator type #' @param type the type of data you want "mavg" for monthly averages, "annualavg" -#' @param cvar The variable you're interested in. "pr" for precipitation, "tas" for temperature in celcius. -#' @param start The starting year you want data for, can be in the past or the future. Must conform to the periods outlined in the world bank API. If given values don't conform to dates, the fuction will automatically round them. -#' @param end The ending year you want data for, can be in the past or the future. Similar to the start date, dates will be rounded to the nearest end dat. - - -get_ensemble_climate_data <- function(locator,geo_type,type, cvar, start, end){ +#' @param cvar The variable you're interested in. "pr" for precipitation, "tas" for +#' temperature in celcius. +#' @param start The starting year you want data for, can be in the past or the future. +#' Must conform to the periods outlined in the world bank API. If given values don't +#' conform to dates, the fuction will automatically round them. +#' @param end The ending year you want data for, can be in the past or the future. +#' Similar to the start date, dates will be rounded to the nearest end dat. +get_ensemble_climate_data <- function(locator, geo_type, type, cvar, start, end){ base_url <- "http://api.worldbank.org/climateweb/rest/v1/" - + ### Error handling if(geo_type == "country"){ check_ISO_code(locator) } - + if(geo_type == "basin"){ if(is.na(as.numeric(locator))){ stop("You must enter a valid Basin number between 1 and 468") @@ -28,22 +31,29 @@ get_ensemble_climate_data <- function(locator,geo_type,type, cvar, start, end){ if(as.numeric(locator) < 1 || as.numeric(locator) > 468){ as.numeric(locator) < 1 || as.numeric(locator) > 468 } - - } - - - data_url <- paste(geo_type,type,"ensemble",cvar,start,end,locator,sep="/") + + data_url <- paste(geo_type, type, "ensemble", cvar, start, end, locator, sep="/") extension <- ".json" - full_url <- paste(base_url,data_url,extension,sep="") - raw_data <- try(content(GET(full_url),as="text"),silent=T) + full_url <- paste(base_url, data_url, extension, sep="") + raw_data <- try(content(GET(full_url), as="text"), silent=TRUE) data_out <- jsonlite::fromJSON(raw_data) - - - if(sum(grep("unexpected",data_out)) > 0){ +# json <- jsonlite::fromJSON(raw_data, FALSE) +# data_out <- plyr::rbind.fill(lapply(json, function(z) { +# z <- lapply(z, function(w) { +# if (is(w, "list")) { +# unlist(w) +# } else { +# w +# } +# }) +# data.frame(z, stringsAsFactors = FALSE) +# })) + + if(sum(grep("unexpected", data_out)) > 0){ stop(paste("You entered a country for which there is no data. ",locator," is not a country with any data")) } - #data_out <- ldply(parsed_data,data.frame) + # data_out <- ldply(parsed_data,data.frame) if(type == "mavg" && start < 2010){ ### Unpack the lists tmp <- data.frame(sapply(data_out$monthV,unlist)) @@ -53,33 +63,41 @@ get_ensemble_climate_data <- function(locator,geo_type,type, cvar, start, end){ data_out <- melt(tmp,id.vars =c("fromYear","toYear"), variable.name = "percentile", value.name = "data") data_out$month <- rep(1:12,dim(data_out)[1]/12) } - - if(type == "mavg" && start > 2010){ + + if (type == "mavg" && start > 2010) { do_list <- list() - for( i in 1:length(unique(data_out$scenario))){ + for(i in 1:length(unique(data_out$scenario))) { ### Unpack the lists - split_do <- subset(data_out,data_out$scenario == unique(data_out$scenario)[i]) + split_do <- subset(data_out,data_out$scenario == unique(data_out$scenario)[i]) tmp <- data.frame(sapply(split_do$monthV,unlist)) colnames(tmp) <- split_do$percentile tmp$fromYear <- rep(start,12) tmp$toYear <- rep(end,12) - do_list[[i]] <- melt(tmp,id.vars =c("fromYear","toYear"), variable.name = c("percentile"), value.name = "data") - do_list[[i]]$scenario <- rep(split_do$scenario[1],dim(do_list[[i]])[1]) - do_list[[i]]$month <- rep(1:12,dim(do_list[[i]])[1]/12) - } - data_out <- do.call(rbind,do_list) + do_list[[i]] <- melt(tmp,id.vars =c("fromYear", "toYear"), variable.name = c("percentile"), value.name = "data") + do_list[[i]]$scenario <- rep(split_do$scenario[1], dim(do_list[[i]])[1]) + do_list[[i]]$month <- rep(1:12, dim(do_list[[i]])[1]/12) + } + data_out <- do.call(rbind, do_list) } - - - if(start < 2010){ - tmp_names <- c("scenario",colnames(data_out)) - data_out <- cbind(rep("past",dim(data_out)[1]),data_out) - colnames(data_out)<- tmp_names - + + if (start < 2010) { + tmp_names <- c("scenario", colnames(data_out)) + data_out <- cbind(rep("past", dim(data_out)[1]), data_out) + colnames(data_out) <- tmp_names } - - data_out$locator <- rep(locator,dim(data_out)[1]) - + + data_out$locator <- rep(locator, dim(data_out)[1]) + data_out <- fix_emblist(data_out) + return(data_out) } +fix_emblist <- function(vv) { + types <- vapply(vv, class, "") + for (i in seq_along(types)) { + if (types[[i]] == "list") { + vv[,i] <- unlist(vv[,i]) + } + } + vv +} diff --git a/R/get_ensemble_data_recursive.R b/R/get_ensemble_data_recursive.R index 330698c..dc4d3b3 100644 --- a/R/get_ensemble_data_recursive.R +++ b/R/get_ensemble_data_recursive.R @@ -1,7 +1,7 @@ #' Wrapper for get_ensemble_climate_data() #' -#'@description Function to recursively call the get_ensemble_climate_data(). Handles a vector of basins -#'or countries as well as multiple dates. +#' @description Function to recursively call the get_ensemble_climate_data(). Handles a vector of basins +#' or countries as well as multiple dates. #' #' @import plyr #' @param locator The ISO3 country code that you want data about. (http://unstats.un.org/unsd/methods/m49/m49alpha.htm) or the basin ID [1-468] @@ -12,29 +12,28 @@ #' @param end The ending year you want data for, can be in the past or the future. Similar to the start date, dates will be rounded to the nearest end dat. #' @examples \dontrun{ #' get_ensemble_data_recursive(c("1","2"),"basin","mavg","pr",1920,1940) -#'} - - - -get_ensemble_data_recursive <- function(locator,geo_type,type, cvar, start, end){ - dates <- date_correct(start,end) +#' } +get_ensemble_data_recursive <- function(locator, geo_type, type, cvar, start, end){ + dates <- date_correct(start, end) data_out <- list() counter <- 1 - for(i in 1:length(locator)){ - for(j in 1:length(dates[,1])){ - data_out[[counter]] <- get_ensemble_climate_data(locator[i],geo_type,type,cvar,dates[j,1],dates[j,2]) + for (i in 1:length(locator)) { + for (j in 1:length(dates[, 1])) { + data_out[[counter]] <- get_ensemble_climate_data(locator[i], geo_type, type, cvar, dates[j, 1], dates[j, 2]) counter <- counter + 1 } } - - dat_out <- ldply(data_out,data.frame) - if(grepl("ann",type)){ + + dat_out <- ldply(data_out, data.frame) + if (grepl("ann", type)) { to_rep <- "annualVal" ### Because of the new parsing methods data values come back as a list. This will unlist them and flatten them out dat_out$annualData <- unlist(dat_out$annualData) - } else { to_rep <- "monthVals"} - colnames(dat_out)[which(colnames(dat_out)==to_rep)] <- "data" - + } else { + to_rep <- "monthVals" + } + colnames(dat_out)[which(colnames(dat_out) == to_rep)] <- "data" + return(dat_out) - + } diff --git a/R/get_ensemble_temp.R b/R/get_ensemble_temp.R index c71a17f..ecc58ee 100644 --- a/R/get_ensemble_temp.R +++ b/R/get_ensemble_temp.R @@ -1,55 +1,57 @@ -#'Download ensemble temperature data -#'@description Function wraps get_ensemble_climate_data() and returns precipitation -#'by basin or country in mm. Output is the 10th 50th and 90th percentile for all -#'gcm's for the a1 and b2 scenarios. +#' Download ensemble temperature data #' -#'@param locator A vector of either watershed basin ID's from http://data.worldbank.org/sites/default/files/climate_data_api_basins.pdf -#' It can be just a single basin id, or a vector of ids. ids should be strings. -#'@param type the type of data to retrieve, must be "mavg" for monthly averages, -#' "annualavg" for annual averages, "manom" for monthly anomaly, and "annualanom" for -#' annual anomaly. -#'@param start the start year to gather data for. -#'@param end the end year to gather data to. -#'@return a dataframe with precipitation predictions in mm for all scenarios, gcms, for each time period. +#' @export +#' @description Function wraps \code{\link{get_ensemble_climate_data}} and returns +#' precipitation by basin or country in mm. Output is the 10th 50th and 90th percentile +#' for all gcm's for the a1 and b2 scenarios. #' -#'@details start and end year can be any years, but all years will be coerced -#' into periods outlined by the API (http://data.worldbank.org/developers/climate -data-api) -#' anomaly periods are only valid for future scenarios and based on a -#' reference period of 1969 - 1999, see API for full details. -#'@examples \dontrun{ -#'# Get data for 2 basins, annual average precipitation for all valid time periods -#'# then subset them, and plot -#' temp_dat <- get_ensemble_temp(c("2","231"),"annualavg",1900,3000) +#' @param locator A vector of either watershed basin ID's from +#' http://data.worldbank.org/sites/default/files/climate_data_api_basins.pdf +#' It can be just a single basin id, or a vector of ids. ids should be strings. +#' @param type the type of data to retrieve, must be "mavg" for monthly averages, +#' "annualavg" for annual averages, "manom" for monthly anomaly, and "annualanom" for +#' annual anomaly. +#' @param start the start year to gather data for. +#' @param end the end year to gather data to. +#' @return a dataframe with precipitation predictions in mm for all scenarios, gcms, +#' for each time period. +#' +#' @details start and end year can be any years, but all years will be coerced +#' into periods outlined by the API (http://data.worldbank.org/developers/climate-data-api) +#' anomaly periods are only valid for future scenarios and based on a +#' reference period of 1969 - 1999, see API for full details. +#' @examples \dontrun{ +#' # Get data for 2 basins, annual average precipitation for all valid time periods +#' # then subset them, and plot +#' temp_dat <- get_ensemble_temp(locator=c(2,231), type="annualavg", start=1900, end=3000) #' temp_dat <- subset(temp_dat,temp_dat$scenario!="b1") #' temp_dat$uniqueGroup <- paste(temp_dat$percentile,temp_dat$locator,sep="-") -#' ggplot(temp_dat,aes(x=fromYear,y=annualVal,group=uniqueGroup,colour=as.factor(locator), -#' linetype=as.factor(percentile)))+geom_path() -#' +#' ggplot(temp_dat, aes(x=fromYear, y=data, group=uniqueGroup, +#' colour=as.factor(locator), linetype=as.factor(percentile))) + +#' geom_path() +#' #' ### Get data for 2 countries with monthly precipitation values -#' temp_dat <- get_ensemble_temp(c("USA","BRA"),"mavg",2020,2030) +#' temp_dat <- get_ensemble_temp(locator = c("USA","BRA"), type = "mavg", start = 2020, end = 2030) #' temp_dat <- subset(temp_dat,temp_dat$scenario!="b1") -#' temp_dat$uniqueGroup <- paste(temp_dat$percentile,temp_dat$locator,sep="-") -#' ggplot(temp_dat,aes(x=as.factor(month),y=monthVals,group=uniqueGroup, -#' colour=locator))+geom_path() -#'} -#'@export - - -get_ensemble_temp <- function(locator,type, start, end){ +#' temp_dat$uniqueGroup <- paste(temp_dat$percentile, temp_dat$locator,sep="-") +#' ggplot(temp_dat, aes(x=as.factor(month), y=data, group=uniqueGroup, colour=locator)) + +#' geom_path() +#' } +get_ensemble_temp <- function(locator, type, start, end){ ### check type is valid - typevec <- c("mavg","annualavg","manom","annualanom") - if(!type%in%typevec){ + typevec <- c("mavg", "annualavg", "manom", "annualanom") + if (!type %in% typevec) { stop("Please enter a valid data type to retrieve, see help for details") } - - if(start < 2000 && type%in%typevec[3:4]){ + + if (start < 2000 && type %in% typevec[3:4]) { stop("Anomaly requests are only valid for future scenarios") } #Convert numeric basin numbers to strings if they were entered incorrectly locator <- as.character(locator) geo_ref <- check_locator(locator) - - output <- get_ensemble_data_recursive(locator,geo_ref,type, "tas", start, end) + + output <- get_ensemble_data_recursive(locator, geo_type = geo_ref, type, cvar = "tas", start, end) return(output) } diff --git a/man/get_ensemble_climate_data.Rd b/man/get_ensemble_climate_data.Rd index c0c1eab..ccd08e8 100644 --- a/man/get_ensemble_climate_data.Rd +++ b/man/get_ensemble_climate_data.Rd @@ -7,21 +7,26 @@ get_ensemble_climate_data(locator, geo_type, type, cvar, start, end) } \arguments{ -\item{locator}{The ISO3 country code that you want data about. (http://unstats.un.org/unsd/methods/m49/m49alpha.htm) or the basin ID [1-468]} +\item{locator}{The ISO3 country code that you want data about. +(http://unstats.un.org/unsd/methods/m49/m49alpha.htm) or the basin ID [1-468]} \item{geo_type}{basin or country depending on the locator type} \item{type}{the type of data you want "mavg" for monthly averages, "annualavg"} -\item{cvar}{The variable you're interested in. "pr" for precipitation, "tas" for temperature in celcius.} +\item{cvar}{The variable you're interested in. "pr" for precipitation, "tas" for +temperature in celcius.} -\item{start}{The starting year you want data for, can be in the past or the future. Must conform to the periods outlined in the world bank API. If given values don't conform to dates, the fuction will automatically round them.} +\item{start}{The starting year you want data for, can be in the past or the future. +Must conform to the periods outlined in the world bank API. If given values don't +conform to dates, the fuction will automatically round them.} -\item{end}{The ending year you want data for, can be in the past or the future. Similar to the start date, dates will be rounded to the nearest end dat.} +\item{end}{The ending year you want data for, can be in the past or the future. +Similar to the start date, dates will be rounded to the nearest end dat.} } \description{ -Download ensemble data for all models, returns the 10th, 50th and 90th percentile -of all models (15 for A1, 13 for B2). Ensemble requets can be for countries -or basins. +Download ensemble data for all models, returns the 10th, 50th and 90th +percentile of all models (15 for A1, 13 for B2). Ensemble requets can be for +countries or basins. } diff --git a/man/get_ensemble_temp.Rd b/man/get_ensemble_temp.Rd index ec15353..a5cff8b 100644 --- a/man/get_ensemble_temp.Rd +++ b/man/get_ensemble_temp.Rd @@ -7,7 +7,8 @@ get_ensemble_temp(locator, type, start, end) } \arguments{ -\item{locator}{A vector of either watershed basin ID's from http://data.worldbank.org/sites/default/files/climate_data_api_basins.pdf +\item{locator}{A vector of either watershed basin ID's from +http://data.worldbank.org/sites/default/files/climate_data_api_basins.pdf It can be just a single basin id, or a vector of ids. ids should be strings.} \item{type}{the type of data to retrieve, must be "mavg" for monthly averages, @@ -19,35 +20,37 @@ annual anomaly.} \item{end}{the end year to gather data to.} } \value{ -a dataframe with precipitation predictions in mm for all scenarios, gcms, for each time period. +a dataframe with precipitation predictions in mm for all scenarios, gcms, +for each time period. } \description{ -Function wraps get_ensemble_climate_data() and returns precipitation -by basin or country in mm. Output is the 10th 50th and 90th percentile for all -gcm's for the a1 and b2 scenarios. +Function wraps \code{\link{get_ensemble_climate_data}} and returns +precipitation by basin or country in mm. Output is the 10th 50th and 90th percentile +for all gcm's for the a1 and b2 scenarios. } \details{ start and end year can be any years, but all years will be coerced - into periods outlined by the API (http://data.worldbank.org/developers/climate -data-api) - anomaly periods are only valid for future scenarios and based on a - reference period of 1969 - 1999, see API for full details. +into periods outlined by the API (http://data.worldbank.org/developers/climate-data-api) +anomaly periods are only valid for future scenarios and based on a +reference period of 1969 - 1999, see API for full details. } \examples{ \dontrun{ # Get data for 2 basins, annual average precipitation for all valid time periods # then subset them, and plot -temp_dat <- get_ensemble_temp(c("2","231"),"annualavg",1900,3000) +temp_dat <- get_ensemble_temp(locator=c(2,231), type="annualavg", start=1900, end=3000) temp_dat <- subset(temp_dat,temp_dat$scenario!="b1") temp_dat$uniqueGroup <- paste(temp_dat$percentile,temp_dat$locator,sep="-") -ggplot(temp_dat,aes(x=fromYear,y=annualVal,group=uniqueGroup,colour=as.factor(locator), -linetype=as.factor(percentile)))+geom_path() +ggplot(temp_dat, aes(x=fromYear, y=data, group=uniqueGroup, + colour=as.factor(locator), linetype=as.factor(percentile))) + + geom_path() ### Get data for 2 countries with monthly precipitation values -temp_dat <- get_ensemble_temp(c("USA","BRA"),"mavg",2020,2030) +temp_dat <- get_ensemble_temp(locator = c("USA","BRA"), type = "mavg", start = 2020, end = 2030) temp_dat <- subset(temp_dat,temp_dat$scenario!="b1") -temp_dat$uniqueGroup <- paste(temp_dat$percentile,temp_dat$locator,sep="-") -ggplot(temp_dat,aes(x=as.factor(month),y=monthVals,group=uniqueGroup, -colour=locator))+geom_path() +temp_dat$uniqueGroup <- paste(temp_dat$percentile, temp_dat$locator,sep="-") +ggplot(temp_dat, aes(x=as.factor(month), y=data, group=uniqueGroup, colour=locator)) + + geom_path() } }