diff --git a/NAMESPACE b/NAMESPACE index 8b305db..34dd49b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,8 @@ export(GetModuleList) export(LoadModule) export(ModuleHelp) export(RerunWorkflow) +export(ZoonModel) +export(ZoonPredict) export(workflow) import(RCurl) import(assertthat) diff --git a/R/GetPackage.R b/R/GetPackage.R index 41089b4..fb77792 100644 --- a/R/GetPackage.R +++ b/R/GetPackage.R @@ -13,7 +13,7 @@ GetPackage <- function (package) { # otherwise use install.packages install.packages(package, - repos = "http://cran.ma.imperial.ac.uk/") + repos = "https://cran.rstudio.com") # now load the package library(package, diff --git a/R/zoonHelpers.R b/R/zoonHelpers.R index 471a711..5122037 100644 --- a/R/zoonHelpers.R +++ b/R/zoonHelpers.R @@ -17,28 +17,28 @@ LoadModule <- function(module){ # Sub and deparse argument, then remove extra quote marks. module <- deparse(substitute(module)) module <- gsub('"', '', module) - + # Create url that matches zoon repo # .sha will vary based on whether this is pegged to a specific version of # modules zoonURL <- sprintf('https://raw.githubusercontent.com/zoonproject/modules/%s/R/%s.R', .sha, module) - + # If module is a path, load module if (file.exists(module)){ txt <- parse(text = paste(readLines(module), collapse="\n")) - # If zoonURL is a zoon repo url, load module - # Could probably do same thing as GetModule here to avoid repeated web call + # If zoonURL is a zoon repo url, load module + # Could probably do same thing as GetModule here to avoid repeated web call } else if (url.exists(zoonURL, .opts=list(ssl.verifypeer=FALSE))){ txt <- parse(text = getURL(zoonURL, ssl.verifypeer=FALSE)) - # If module on its own is a url, load module + # If module on its own is a url, load module } else if (url.exists(module, .opts=list(ssl.verifypeer=FALSE))){ txt <- parse( text = getURL(module, ssl.verifypeer=FALSE)) - # Otherwise throw error. + # Otherwise throw error. } else { stop(paste('Cannot find "', module, - '". Check the URL or check that the module is at github.com/zoonproject')) + '". Check the URL or check that the module is at github.com/zoonproject')) } # Load to global environment eval(txt, envir = globalenv()) @@ -74,7 +74,7 @@ GetModule <- function(module, forceReproducible){ zoonURL <- paste0('https://raw.githubusercontent.com/zoonproject/modules/master/R/', module, '.R') - + # If the module is in global namespace, use that function # unless forceReproduce is TRUE, in which case we want to get from repo. # @@ -85,20 +85,20 @@ GetModule <- function(module, forceReproducible){ } else { rawText <- getURL(zoonURL, ssl.verifypeer=FALSE) } - + # getURL returns "Not Found" if no webpage found. # Use this to avoid two web call.s if(rawText == "Not Found") { stop(paste('Cannot find "', module, - '". Check that the module is on the zoon repository or in the global namespace.')) + '". Check that the module is on the zoon repository or in the global namespace.')) } - + # Parse text from webpage. txt <- parse(text = rawText) # Evaluate text in the workflow call environment eval(txt, envir = parent.frame(4)) - + return(module) } @@ -159,18 +159,12 @@ RunModels <- function(df, modelFunction, paras, workEnv){ modelFold <- do.call(modelFunction, c(.df = list(df[df$fold != i, ]), paras), envir = workEnv) - - pred <- predict(modelFold, - newdata = df[df$fold == i, 6:NCOL(df), drop = FALSE], - type = 'response') - - # if pred is a matrix/dataframe, take only the first column - if(!is.null(dim(pred))) { - pred <- pred[, 1] - } - + + pred <- ZoonPredict(.model$model, + newdata = df[df$fold == i, 6:NCOL(df), drop = FALSE]) + dfOut$predictions[df$fold == i] <- pred - + } } @@ -180,19 +174,12 @@ RunModels <- function(df, modelFunction, paras, workEnv){ # If external validation dataset exists, predict that;. if(0 %in% df$fold){ - pred <- predict(m, - newdata = df[df$fold == 0, 6:NCOL(df), drop = FALSE], - type = 'response') - - # if pred is a matrix/dataframe, take only the first column - if(!is.null(dim(pred))) { - pred <- pred[, 1] - } - - dfOut$predictions[df$fold == 0] <- pred - - - + + pred <- ZoonPredict(.model$model, + newdata = df[df$fold == 0, 6:NCOL(df), drop = FALSE]) + + dfOut$predictions[df$fold == 0] <- pred + } # Return list of crossvalid and external validation predictions @@ -219,50 +206,50 @@ RunModels <- function(df, modelFunction, paras, workEnv){ # Also occurrence = "list(mod1, mod2)" is probably bad. CheckModList <- function(x){ - + # Should accept occurrence = 'module1', but NOT # occurrence = 'module1(k=2)', or occurrence = 'list(mod1, mod1)' if (inherits(x, 'character')){ if (grepl('[!#$%&*+,-/:;<>?@[\ ]^_`| ]', x)){ stop(paste('If specifying module arguments please use the form', - 'Module(para = 2), without quotes. No special characters should exist', - 'in module names.')) + 'Module(para = 2), without quotes. No special characters should exist', + 'in module names.')) } } # If argument is passed as unquoted moduleName: occurrence = ModuleName, if (class(x) == 'name'){ ModuleList <- list(list(module = as.character(x), paras = list())) - - # If list of modules given: occurrence = list(Mod1, Mod2), - # If list(Mod1(k=2), Mod2(p = 3)), parameters sorted in - # FormatModuleList + + # If list of modules given: occurrence = list(Mod1, Mod2), + # If list(Mod1(k=2), Mod2(p = 3)), parameters sorted in + # FormatModuleList } else if (x[[1]] == 'list') { listCall <- as.list(x) listCall[[1]] <- NULL ModuleList <- lapply(listCall, FormatModuleList) - - # If Chained modules given: occurrence = Chain(Mod1, Mod2), + + # If Chained modules given: occurrence = Chain(Mod1, Mod2), } else if (x[[1]] == 'Chain'){ listCall <- as.list(x) listCall[[1]] <- NULL ModuleList <- lapply(listCall, FormatModuleList) attr(ModuleList, 'chain') <- TRUE - - # If unquoted module w/ paras given: occurrence = Module1(k=2) + + # If unquoted module w/ paras given: occurrence = Module1(k=2) } else if (identical(class(x[[1]]), 'name')){ # Parameters paras <- as.list(x) paras[[1]] <- NULL ModuleList <- list(list(module = as.character(x[[1]]), paras = paras)) - # Deal with all quoted forms - # Can include 'module1', 'module(para = 2)', 'module(p = 2, q = 'wer')' + # Deal with all quoted forms + # Can include 'module1', 'module(para = 2)', 'module(p = 2, q = 'wer')' } else if(inherits(x, 'character')){ ModuleList <- list(SplitArgs(x)) } else { stop(paste('Please check the format of argument', as.character(x))) } - + return(ModuleList) } @@ -287,9 +274,9 @@ SplitArgs <- function(string){ } sepArgs <- (strsplit(args, ','))[[1]] arguments <- lapply(strsplit(sepArgs, '='), - function(x) gsub(' ', '', x[2])) + function(x) gsub(' ', '', x[2])) names(arguments) <- unlist(lapply(strsplit(sepArgs, '='), - function(x) gsub(' ', '', x[1]))) + function(x) gsub(' ', '', x[1]))) return(list(module = module, paras = arguments)) } @@ -304,7 +291,7 @@ SplitArgs <- function(string){ FormatModuleList <- function(x){ # Turn 'call' or 'name' into list. listx <- as.list(x) - + # Empty list to populate. newList <- list() newList$module <- listx[[1]] @@ -331,14 +318,14 @@ ExtractAndCombData <- function(occurrence, ras){ occurrence <- occurrence[!bad.coords, ] warning ('Some occurrence points are outside the raster extent and have been removed before modelling') } - + # extract covariates from lat long values in df. occurrenceCovariates <- as.matrix(raster::extract(ras, occurrence[, c('longitude', 'latitude')])) names(occurrenceCovariates) <- names(ras) - + # combine with the occurrence data df <- cbind(occurrence, occurrenceCovariates) - + # Return as list of df and ras as required by process modules return(list(df=df, ras=ras)) @@ -381,11 +368,11 @@ Chain <- function(...){ SortArgs <- function(occSub, covSub, proSub, modSub, outSub, forceReproducible){ call <- paste0("workflow(", "occurrence = ", occSub, - ", covariate = ", covSub, - ", process = ", proSub, - ", model = ", modSub, - ", output = ", outSub, - ", forceReproducible = ", as.character(forceReproducible), ")") + ", covariate = ", covSub, + ", process = ", proSub, + ", model = ", modSub, + ", output = ", outSub, + ", forceReproducible = ", as.character(forceReproducible), ")") } @@ -398,7 +385,7 @@ SortArgs <- function(occSub, covSub, proSub, modSub, outSub, forceReproducible){ #@name SplitCall SplitCall <- function(call){ - + # Regex to find each argument within call. # Find 3 patterns and sub whole string with just middle pattern # Middle pattern is the argument name. @@ -408,14 +395,14 @@ SplitCall <- function(call){ model <- gsub('(.*model = )(.*)(, output.*$)', '\\2', call) output <- gsub('(.*output = )(.*)(, forceReproducible.*$)', '\\2', call) forceReproducible <- gsub('(.*forceReproducible = )(.*)())', '\\2', call) - + # Make vector and add names. split <- c(occurrence, covariate, process, model, output, forceReproducible) names(split) <- c('occurrence', 'covariate', 'process', - 'model', 'output', 'forceReproducible') + 'model', 'output', 'forceReproducible') return(split) - + } @@ -434,15 +421,15 @@ SplitCall <- function(call){ #@name ErrorAndSave ErrorAndSave <- function(cond, mod, e){ - + # Create list to be populated # Include the call from the workflow environment e w <- list(occurrence.output = NULL, - covariate.output = NULL, - process.output = NULL, - model.output = NULL, - report = NULL, - call = e$call) + covariate.output = NULL, + process.output = NULL, + model.output = NULL, + report = NULL, + call = e$call) # Depending on mod argument, replace NULLS in w with the value of module # output. To get the module output we have to reference the workflow @@ -460,13 +447,13 @@ ErrorAndSave <- function(cond, mod, e){ w$model.output <- e$model.output } class(w) <- 'zoonWorkflow' - + # Select the module type using numeric mod argument module <- c('occurrence', 'covariate', 'process', 'model', 'output')[mod] - + # R CMD check apparently dislikes this assignment to the global environemtn assign('tmpZoonWorkflow', w, envir = .GlobalEnv) - + # Give useful messages. # What were the errors that were caught be tryCatch. message('Caught errors:\n', cond) diff --git a/R/zoonPredict.R b/R/zoonPredict.R new file mode 100644 index 0000000..e616805 --- /dev/null +++ b/R/zoonPredict.R @@ -0,0 +1,91 @@ +# functions for handling prediction from model module outputs + +#' ZoonPredict +#' +#' \strong{module developer tool:} Predict from a ZoonModel object +#' +#' @details \strong{This function is only intended to be used when developing +#' new modules, not for running zoon workflows}. +#' Given a \code{zoonModel} object returned by a model +#' module using the function \code{ZoonModel}, make a prediction to +#' a new dataframe. +#' For an example, see the source code for the module \code{mgcv}. +#' +#' +#' +#' @param zoonModel a \code{zoonModel} object +#' +#' @param newdata a dataframe containing data to predict to. +#' @name ZoonPredict +#' @export +#' +# @family module developer tools +ZoonPredict <- function(zoonModel, newdata) { + + # check the model + if (!inherits(zoonModel, 'zoonModel')) { + stop ('zoonPredict can only be used with zoonModel objects') + } + + # get required packages + require (zoonModel$packages, + character.only = TRUE) + + # define prediction function using module code + fun_text <- sprintf('fun <- function (model, newdata) {%s}', + zoonModel$code) + fun <- eval(parse(text = fun_text)) + + # run the predictor and return result + ans <- fun(zoonModel$model, newdata = newdata) + return (ans) +} + + +#' ZoonModel +#' +#' \strong{module developer tool:} Create a Zoon model object +#' +#' @details \strong{This function is only intended to be used when developing +#' new modules, not for running zoon workflows}. +#' Given a \code{zoonModel} object returned by a model +#' module using the function \code{ZoonModel}, make a prediction to +#' a new dataframe. +#' For an example, see the source code for the module \code{InteractiveMap}. +#' +#' @param model a fitted model object to be used for making predictions +#' +#' @param code code to make predictions from \code{model} object to +#' a dataframe \code{newdata} containing new covaraite observations. +#' The code must use the objects named \code{model} and \code{newdata} and +#' no other objects and must return a numeric vector, witht he same length +#' as the number of rows in \code{newdata} giving predictions on the response +#' scale (e.g. probabilities of presence). +#' +#' @param packages a character vector giving the names of packages +#' needed to run the code zoonModel a \code{zoonModel} object +#' +#' @return an object of class \code{zoonModel} containing all of the +#' information and code required to make predictions, using the function +#' \code{\link{ZoonPredict}} +#' +#' @name ZoonModel +#' @export +# @family module developer tools +ZoonModel <- function(model, + code, + packages) { + + # catch the code as text + code <- deparse(substitute(code)) + code <- paste(code, collapse = '\n') + + # create a list of these elements + ans <- list(model = model, + code = code, + packages = packages) + + class(ans) <- 'zoonModel' + + return (ans) +} \ No newline at end of file diff --git a/man/ZoonModel.Rd b/man/ZoonModel.Rd new file mode 100644 index 0000000..8f5b262 --- /dev/null +++ b/man/ZoonModel.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/zoonPredict.R +\name{ZoonModel} +\alias{ZoonModel} +\title{ZoonModel} +\usage{ +ZoonModel(model, code, packages) +} +\arguments{ +\item{model}{a fitted model object to be used for making predictions} + +\item{code}{code to make predictions from \code{model} object to +a dataframe \code{newdata} containing new covaraite observations. +The code must use the objects named \code{model} and \code{newdata} and +no other objects and must return a numeric vector, witht he same length +as the number of rows in \code{newdata} giving predictions on the response +scale (e.g. probabilities of presence).} + +\item{packages}{a character vector giving the names of packages +needed to run the code zoonModel a \code{zoonModel} object} +} +\value{ +an object of class \code{zoonModel} containing all of the + information and code required to make predictions, using the function + \code{\link{ZoonPredict}} +} +\description{ +\strong{module developer tool:} Create a Zoon model object +} +\details{ +\strong{This function is only intended to be used when developing +new modules, not for running zoon workflows}. +Given a \code{zoonModel} object returned by a model +module using the function \code{ZoonModel}, make a prediction to +a new dataframe. +For an example, see the source code for the module \code{InteractiveMap}. +} + diff --git a/man/ZoonPredict.Rd b/man/ZoonPredict.Rd new file mode 100644 index 0000000..1eb845a --- /dev/null +++ b/man/ZoonPredict.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/zoonPredict.R +\name{ZoonPredict} +\alias{ZoonPredict} +\title{ZoonPredict} +\usage{ +ZoonPredict(zoonModel, newdata) +} +\arguments{ +\item{zoonModel}{a \code{zoonModel} object} + +\item{newdata}{a dataframe containing data to predict to.} +} +\description{ +\strong{module developer tool:} Predict from a ZoonModel object +} +\details{ +\strong{This function is only intended to be used when developing +new modules, not for running zoon workflows}. +Given a \code{zoonModel} object returned by a model +module using the function \code{ZoonModel}, make a prediction to +a new dataframe. +For an example, see the source code for the module \code{mgcv}. +} +