Skip to content

Commit

Permalink
Merge pull request #126 from goldingn/master
Browse files Browse the repository at this point in the history
Implementing safer prediction mechanism
  • Loading branch information
goldingn committed Sep 9, 2015
2 parents a6ded78 + 0181375 commit 0e6a27e
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 75 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export(GetModuleList)
export(LoadModule)
export(ModuleHelp)
export(RerunWorkflow)
export(ZoonModel)
export(ZoonPredict)
export(workflow)
import(RCurl)
import(assertthat)
Expand Down
2 changes: 1 addition & 1 deletion R/GetPackage.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ GetPackage <- function (package) {

# otherwise use install.packages
install.packages(package,
repos = "http://cran.ma.imperial.ac.uk/")
repos = "https://cran.rstudio.com")

# now load the package
library(package,
Expand Down
135 changes: 61 additions & 74 deletions R/zoonHelpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,28 @@ LoadModule <- function(module){
# Sub and deparse argument, then remove extra quote marks.
module <- deparse(substitute(module))
module <- gsub('"', '', module)

# Create url that matches zoon repo
# .sha will vary based on whether this is pegged to a specific version of
# modules
zoonURL <-
sprintf('https://raw.githubusercontent.com/zoonproject/modules/%s/R/%s.R',
.sha, module)

# If module is a path, load module
if (file.exists(module)){
txt <- parse(text = paste(readLines(module), collapse="\n"))
# If zoonURL is a zoon repo url, load module
# Could probably do same thing as GetModule here to avoid repeated web call
# If zoonURL is a zoon repo url, load module
# Could probably do same thing as GetModule here to avoid repeated web call
} else if (url.exists(zoonURL, .opts=list(ssl.verifypeer=FALSE))){
txt <- parse(text = getURL(zoonURL, ssl.verifypeer=FALSE))
# If module on its own is a url, load module
# If module on its own is a url, load module
} else if (url.exists(module, .opts=list(ssl.verifypeer=FALSE))){
txt <- parse( text = getURL(module, ssl.verifypeer=FALSE))
# Otherwise throw error.
# Otherwise throw error.
} else {
stop(paste('Cannot find "', module,
'". Check the URL or check that the module is at github.com/zoonproject'))
'". Check the URL or check that the module is at github.com/zoonproject'))
}
# Load to global environment
eval(txt, envir = globalenv())
Expand Down Expand Up @@ -74,7 +74,7 @@ GetModule <- function(module, forceReproducible){
zoonURL <-
paste0('https://raw.githubusercontent.com/zoonproject/modules/master/R/',
module, '.R')

# If the module is in global namespace, use that function
# unless forceReproduce is TRUE, in which case we want to get from repo.
#
Expand All @@ -85,20 +85,20 @@ GetModule <- function(module, forceReproducible){
} else {
rawText <- getURL(zoonURL, ssl.verifypeer=FALSE)
}

# getURL returns "Not Found" if no webpage found.
# Use this to avoid two web call.s
if(rawText == "Not Found") {
stop(paste('Cannot find "', module,
'". Check that the module is on the zoon repository or in the global namespace.'))
'". Check that the module is on the zoon repository or in the global namespace.'))
}

# Parse text from webpage.
txt <- parse(text = rawText)

# Evaluate text in the workflow call environment
eval(txt, envir = parent.frame(4))

return(module)
}

Expand Down Expand Up @@ -159,18 +159,12 @@ RunModels <- function(df, modelFunction, paras, workEnv){
modelFold <- do.call(modelFunction, c(.df = list(df[df$fold != i, ]),
paras),
envir = workEnv)

pred <- predict(modelFold,
newdata = df[df$fold == i, 6:NCOL(df), drop = FALSE],
type = 'response')

# if pred is a matrix/dataframe, take only the first column
if(!is.null(dim(pred))) {
pred <- pred[, 1]
}


pred <- ZoonPredict(.model$model,
newdata = df[df$fold == i, 6:NCOL(df), drop = FALSE])

dfOut$predictions[df$fold == i] <- pred

}
}

Expand All @@ -180,19 +174,12 @@ RunModels <- function(df, modelFunction, paras, workEnv){

# If external validation dataset exists, predict that;.
if(0 %in% df$fold){
pred <- predict(m,
newdata = df[df$fold == 0, 6:NCOL(df), drop = FALSE],
type = 'response')

# if pred is a matrix/dataframe, take only the first column
if(!is.null(dim(pred))) {
pred <- pred[, 1]
}

dfOut$predictions[df$fold == 0] <- pred




pred <- ZoonPredict(.model$model,
newdata = df[df$fold == 0, 6:NCOL(df), drop = FALSE])

dfOut$predictions[df$fold == 0] <- pred

}

# Return list of crossvalid and external validation predictions
Expand All @@ -219,50 +206,50 @@ RunModels <- function(df, modelFunction, paras, workEnv){
# Also occurrence = "list(mod1, mod2)" is probably bad.

CheckModList <- function(x){

# Should accept occurrence = 'module1', but NOT
# occurrence = 'module1(k=2)', or occurrence = 'list(mod1, mod1)'
if (inherits(x, 'character')){
if (grepl('[!#$%&*+,-/:;<>?@[\ ]^_`| ]', x)){
stop(paste('If specifying module arguments please use the form',
'Module(para = 2), without quotes. No special characters should exist',
'in module names.'))
'Module(para = 2), without quotes. No special characters should exist',
'in module names.'))
}
}

# If argument is passed as unquoted moduleName: occurrence = ModuleName,
if (class(x) == 'name'){
ModuleList <- list(list(module = as.character(x), paras = list()))

# If list of modules given: occurrence = list(Mod1, Mod2),
# If list(Mod1(k=2), Mod2(p = 3)), parameters sorted in
# FormatModuleList
# If list of modules given: occurrence = list(Mod1, Mod2),
# If list(Mod1(k=2), Mod2(p = 3)), parameters sorted in
# FormatModuleList
} else if (x[[1]] == 'list') {
listCall <- as.list(x)
listCall[[1]] <- NULL
ModuleList <- lapply(listCall, FormatModuleList)

# If Chained modules given: occurrence = Chain(Mod1, Mod2),
# If Chained modules given: occurrence = Chain(Mod1, Mod2),
} else if (x[[1]] == 'Chain'){
listCall <- as.list(x)
listCall[[1]] <- NULL
ModuleList <- lapply(listCall, FormatModuleList)
attr(ModuleList, 'chain') <- TRUE

# If unquoted module w/ paras given: occurrence = Module1(k=2)
# If unquoted module w/ paras given: occurrence = Module1(k=2)
} else if (identical(class(x[[1]]), 'name')){
# Parameters
paras <- as.list(x)
paras[[1]] <- NULL
ModuleList <- list(list(module = as.character(x[[1]]), paras = paras))
# Deal with all quoted forms
# Can include 'module1', 'module(para = 2)', 'module(p = 2, q = 'wer')'
# Deal with all quoted forms
# Can include 'module1', 'module(para = 2)', 'module(p = 2, q = 'wer')'
} else if(inherits(x, 'character')){
ModuleList <- list(SplitArgs(x))
} else {
stop(paste('Please check the format of argument', as.character(x)))
}

return(ModuleList)
}

Expand All @@ -287,9 +274,9 @@ SplitArgs <- function(string){
}
sepArgs <- (strsplit(args, ','))[[1]]
arguments <- lapply(strsplit(sepArgs, '='),
function(x) gsub(' ', '', x[2]))
function(x) gsub(' ', '', x[2]))
names(arguments) <- unlist(lapply(strsplit(sepArgs, '='),
function(x) gsub(' ', '', x[1])))
function(x) gsub(' ', '', x[1])))
return(list(module = module, paras = arguments))
}

Expand All @@ -304,7 +291,7 @@ SplitArgs <- function(string){
FormatModuleList <- function(x){
# Turn 'call' or 'name' into list.
listx <- as.list(x)

# Empty list to populate.
newList <- list()
newList$module <- listx[[1]]
Expand All @@ -331,14 +318,14 @@ ExtractAndCombData <- function(occurrence, ras){
occurrence <- occurrence[!bad.coords, ]
warning ('Some occurrence points are outside the raster extent and have been removed before modelling')
}

# extract covariates from lat long values in df.
occurrenceCovariates <- as.matrix(raster::extract(ras, occurrence[, c('longitude', 'latitude')]))
names(occurrenceCovariates) <- names(ras)

# combine with the occurrence data
df <- cbind(occurrence, occurrenceCovariates)


# Return as list of df and ras as required by process modules
return(list(df=df, ras=ras))
Expand Down Expand Up @@ -381,11 +368,11 @@ Chain <- function(...){
SortArgs <- function(occSub, covSub, proSub, modSub, outSub, forceReproducible){
call <- paste0("workflow(",
"occurrence = ", occSub,
", covariate = ", covSub,
", process = ", proSub,
", model = ", modSub,
", output = ", outSub,
", forceReproducible = ", as.character(forceReproducible), ")")
", covariate = ", covSub,
", process = ", proSub,
", model = ", modSub,
", output = ", outSub,
", forceReproducible = ", as.character(forceReproducible), ")")
}


Expand All @@ -398,7 +385,7 @@ SortArgs <- function(occSub, covSub, proSub, modSub, outSub, forceReproducible){
#@name SplitCall

SplitCall <- function(call){

# Regex to find each argument within call.
# Find 3 patterns and sub whole string with just middle pattern
# Middle pattern is the argument name.
Expand All @@ -408,14 +395,14 @@ SplitCall <- function(call){
model <- gsub('(.*model = )(.*)(, output.*$)', '\\2', call)
output <- gsub('(.*output = )(.*)(, forceReproducible.*$)', '\\2', call)
forceReproducible <- gsub('(.*forceReproducible = )(.*)())', '\\2', call)

# Make vector and add names.
split <- c(occurrence, covariate, process, model, output, forceReproducible)
names(split) <- c('occurrence', 'covariate', 'process',
'model', 'output', 'forceReproducible')
'model', 'output', 'forceReproducible')

return(split)

}


Expand All @@ -434,15 +421,15 @@ SplitCall <- function(call){
#@name ErrorAndSave

ErrorAndSave <- function(cond, mod, e){

# Create list to be populated
# Include the call from the workflow environment e
w <- list(occurrence.output = NULL,
covariate.output = NULL,
process.output = NULL,
model.output = NULL,
report = NULL,
call = e$call)
covariate.output = NULL,
process.output = NULL,
model.output = NULL,
report = NULL,
call = e$call)

# Depending on mod argument, replace NULLS in w with the value of module
# output. To get the module output we have to reference the workflow
Expand All @@ -460,13 +447,13 @@ ErrorAndSave <- function(cond, mod, e){
w$model.output <- e$model.output
}
class(w) <- 'zoonWorkflow'

# Select the module type using numeric mod argument
module <- c('occurrence', 'covariate', 'process', 'model', 'output')[mod]

# R CMD check apparently dislikes this assignment to the global environemtn
assign('tmpZoonWorkflow', w, envir = .GlobalEnv)

# Give useful messages.
# What were the errors that were caught be tryCatch.
message('Caught errors:\n', cond)
Expand Down
Loading

0 comments on commit 0e6a27e

Please sign in to comment.