diff --git a/.Rbuildignore b/.Rbuildignore index ea23d1f..0e925a0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,9 +3,20 @@ ^.*\.Rproj$ ^\.Rproj\.user$ vignettes/cache +vignettes/figure Notes_for_zoon_developers.md zoonDemo.R zoonQuickStart.R .travis.yml code_of_conduct.md vignettes/figure +tests/testthat/*.pdf +tests/testthat/testChangeWorkflow.R +tests/testthat/testGetMaxEnt.R +tests/testthat/testplot.zoonWorkflow.R +tests/testthat/testprint.zoonSummary.R +tests/testthat/testprint.zoonWorkflow.R +tests/testthat/testRerunWorkflow.R +tests/testthat/testRunModules.R +tests/testthat/testsummary.zoonWorkflow.R +tests/testthat/testWholeWorkflows.R \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3324a64..eb05fb2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ vignettes/*.md vignettes/*.R vignettes/figure vignettes/cache + +# outputs from testing +tests/testthat/*.pdf \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 6e70dcb..0e43f2a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,9 @@ Package: zoon Type: Package Title: Reproducible, Accessible & Shareable Species Distribution Modelling -Version: 0.3.3 +Version: 0.4.21 Author: Tim Lucas, Nick Golding, Tom August, Greg McInerny, Emiel van Loon -Maintainer: Nick Golding +Maintainer: Tom August Description: Reproducible and remixable species distribution modelling. The package reads user submitted modules from an online repository, runs full SDM workflows and returns output that is fully reproducible. @@ -11,14 +11,16 @@ License: BSD_3_clause + file LICENSE Imports: RCurl, dismo, - rfigshare + rfigshare, + methods Depends: raster (>= 2.4-20), R (>= 3.2.0) Suggests: knitr, - testthat + testthat, + gam VignetteBuilder: knitr LazyData: TRUE URL: https://github.com/zoonproject/zoon -RoxygenNote: 5.0.0 +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 4c58402..b7b9e03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,5 +19,16 @@ export(ZoonPredict) export(workflow) import(RCurl) import(dismo) +import(methods) import(raster) +importFrom(graphics,par) +importFrom(graphics,plot.new) +importFrom(graphics,rect) +importFrom(graphics,segments) +importFrom(graphics,strwidth) importFrom(rfigshare,fs_new_article) +importFrom(utils,browseURL) +importFrom(utils,capture.output) +importFrom(utils,download.file) +importFrom(utils,install.packages) +importFrom(utils,sessionInfo) diff --git a/R/BuildModule.R b/R/BuildModule.R index 1d0851c..cbcbace 100644 --- a/R/BuildModule.R +++ b/R/BuildModule.R @@ -28,6 +28,8 @@ #' #' @return Name of the module. Outputs a file #' @name BuildModule +#' @import methods +#' @importFrom utils capture.output #' @export BuildModule <- function(object, type, dir='.', title = '', description = '', diff --git a/R/ChangeWorkflow.R b/R/ChangeWorkflow.R index 1192311..0c0f928 100644 --- a/R/ChangeWorkflow.R +++ b/R/ChangeWorkflow.R @@ -12,6 +12,7 @@ #' #'@export #'@name ChangeWorkflow +#'@importFrom utils sessionInfo #'@examples \dontrun{ #' w <- workflow(UKAnophelesPlumbeus, #' UKAir, diff --git a/R/GetPackage.R b/R/GetPackage.R index 105a5cb..ddcf9db 100644 --- a/R/GetPackage.R +++ b/R/GetPackage.R @@ -7,10 +7,8 @@ #' @param package The name of the package with or without #' quotes #' @return NULL -#' @examples -#' \dontrun{ -#' GetPackage('gam') -#' } +#' @importFrom utils install.packages +#' @examples GetPackage('gam') #' @export GetPackage <- function (package) { diff --git a/R/HelpFunctions.R b/R/HelpFunctions.R index e28effd..897c5d3 100644 --- a/R/HelpFunctions.R +++ b/R/HelpFunctions.R @@ -39,6 +39,7 @@ ModuleHelp <- function(module){ # Download a file with libcurl and no messages to the console +#'@importFrom utils download.file DownloadQuietly <- function(url, file) { download.file(url, @@ -52,6 +53,7 @@ DownloadQuietly <- function(url, file) { # Display a module helpfile in accordance with the 'help_type' option # help is either displayed as an HTML or text, pdf is not supported and # an error is returned +#'@importFrom utils browseURL DisplayModuleHelp <- function (url) { diff --git a/R/RerunWorkflow.R b/R/RerunWorkflow.R index 010ade3..57f6e6b 100644 --- a/R/RerunWorkflow.R +++ b/R/RerunWorkflow.R @@ -2,18 +2,20 @@ #' #' Takes a workflow object and reruns it. #' -#'@param workflow A zoonWorkflow object from a previous zoon analysis -#'@param from Which modules should be run. If NULL (default), run from the +#' @param workflow A zoonWorkflow object from a previous zoon analysis +#' @param from Which modules should be run. If NULL (default), run from the #' first NULL output (i.e. where the workflow broke). Otherwise takes an #' integer and runs from that module. #' -#'@return A list with the results of each module and a copy of the +#' @return A list with the results of each module and a copy of the #' call used to execute the workflow. #' -#'@export -#'@name RerunWorkflow -#'@examples \dontrun{ -#' w <- workflow(UKAnophelesPlumbeus, UKAir, +#' @export +#' @name RerunWorkflow +#' @importFrom utils sessionInfo +#' @examples \dontrun{ +#' w <- workflow(UKAnophelesPlumbeus, +#' UKAir, #' OneHundredBackground, #' LogisticRegression, #' SameTimePlaceMap) diff --git a/R/SearchFunctions.R b/R/SearchFunctions.R index 0be5bfe..367333c 100644 --- a/R/SearchFunctions.R +++ b/R/SearchFunctions.R @@ -5,9 +5,14 @@ #'@return A list with all module names. #'@name GetModuleList #'@param renew Download from github even if we already have a module list. +#' @details This function will only work on a platform that supports the +#' method 'libcurl' in the function url. This can be tested using the function +#' \code{capabilities} (see example). #' #'@export -#'@examples \dontrun{GetModuleList()} +#'@examples +#'# GetModuleList requires libcurl to be supported +#'if(capabilities('libcurl')) GetModuleList() GetModuleList <- function(renew = FALSE){ diff --git a/R/ZoonFigshare.R b/R/ZoonFigshare.R index bcfe0d9..5353cca 100644 --- a/R/ZoonFigshare.R +++ b/R/ZoonFigshare.R @@ -18,6 +18,7 @@ #' @param tags Character vector of searchable tags. #' #' @importFrom rfigshare fs_new_article +#' @importFrom utils browseURL #' @export ZoonFigshare <- function(zoonWorkflow, title = 'My Zoon Workflow', diff --git a/R/plot.zoonWorkflow.R b/R/plot.zoonWorkflow.R index 9ab869f..ed04ea1 100644 --- a/R/plot.zoonWorkflow.R +++ b/R/plot.zoonWorkflow.R @@ -1,3 +1,5 @@ +#' @importFrom graphics strwidth + GetCex <- function(string, width = 9) { # given a string (character vector of length one), # and a target width on screen, in user coordinates @@ -7,6 +9,8 @@ GetCex <- function(string, width = 9) { return (cex) } +#' @importFrom graphics strwidth segments + ModuleLabels <- function(colr, IsChain, IsList){ ###___ function for writing the module labels @@ -43,6 +47,7 @@ ModuleLabels <- function(colr, IsChain, IsList){ } #END ModuleLabels +#' @importFrom graphics rect segments strwidth Boxed2 <- function (NoOfModules, InModuleList, IsList, IsChain, ModuleNames) { ###___ function for writing the boxes @@ -226,6 +231,7 @@ CallLister <- function( callList ){ #' @param \dots currently ignored #' #' @method plot zoonWorkflow +#' @importFrom graphics plot.new par rect #' @export plot.zoonWorkflow <- function(x, ...) { @@ -233,11 +239,11 @@ plot.zoonWorkflow <- function(x, ...) { plot.new() par(mar = c(0, 0, 0, 0)) - plot(-99, -99, - xlim = c(0, 110), - ylim = c(-100, 125), - xlab = "", ylab = "", - axes = FALSE) + graphics::plot(-99, -99, + xlim = c(0, 110), + ylim = c(-100, 125), + xlab = "", ylab = "", + axes = FALSE) rect( -200, -200, 200, 200, col = "cornsilk1", diff --git a/R/zoon.R b/R/zoon.R index c7849c0..5bc7788 100644 --- a/R/zoon.R +++ b/R/zoon.R @@ -39,21 +39,22 @@ NULL #' give the modules in the form of a function #' e.g. occurrence = AModule(para1 = 2, para2 = 'detail') #' -#'@param occurrence Occurrence module to be used. -#'@param covariate Covariate module to be used. -#'@param process Process module to be used. -#'@param model SDM model module to be used. -#'@param output Output module to be used. -#'@param forceReproducible Logical whether to force zoon to collect modules +#' @param occurrence Occurrence module to be used. +#' @param covariate Covariate module to be used. +#' @param process Process module to be used. +#' @param model SDM model module to be used. +#' @param output Output module to be used. +#' @param forceReproducible Logical whether to force zoon to collect modules #' from the online repo. This ensure the analysis is reproducible. #' -#'@return A list with the results of each module and a copy of the +#' @return A list with the results of each module and a copy of the #' code used to execute the workflow (what's there now should be source-able #' though I'm sure there is a much neater approach than the one I took - the #' ultimate aim would be a much nicer way of enhancing reproducibility). -#'@export -#'@name workflow -#'@examples +#' @export +#' @name workflow +#' @importFrom utils sessionInfo +#' @examples #'# run a workflow, using the logistic regression model #'\dontrun{ #' diff --git a/R/zoonHelpers.R b/R/zoonHelpers.R index de200b4..7397fcc 100644 --- a/R/zoonHelpers.R +++ b/R/zoonHelpers.R @@ -440,7 +440,7 @@ ErrorModule <- function(cond, mod, e){ message() # Where did workflow break and where is the progress stored? x <- paste("Stopping workflow due to error in", module, "module.\n", - "Workflow progress stored in object 'tmpZoonWorkflow'.") + "Workflow progress will be returned.") # Throw error. The call for this error is meaningless so don't print it. stop(x, call. = FALSE) } @@ -485,6 +485,7 @@ Writeable <- function (dir) { #' #' @export #' @name GetMaxEnt +#' @importFrom utils browseURL GetMaxEnt <- function () { # Send the user to download the MaxEnt executable, # then find and upload it diff --git a/inst/doc/Building_a_module.R b/inst/doc/Building_a_module.R deleted file mode 100644 index fd345be..0000000 --- a/inst/doc/Building_a_module.R +++ /dev/null @@ -1,123 +0,0 @@ -## ---- eval = FALSE------------------------------------------------------- -# NewModule <- function(.df){ - -## ---- eval = FALSE------------------------------------------------------- -# # Specify the packages we need using the function -# # GetPackage -# zoon:::GetPackage("gam") - -## ---- eval = FALSE------------------------------------------------------- -# # Create a data.frame of covariate data -# covs <- as.data.frame(.df[, 6:ncol(.df)]) -# names(covs) <- names(.df)[6:ncol(.df)] -# -# # Run our gam model -# m <- gam::gam(formula = .df$value ~ ., -# data = covs, -# family = binomial) - -## ---- eval = FALSE------------------------------------------------------- -# # Create a ZoonModel object to return. -# # this includes our model, predict method -# # and the packages we need. -# ZoonModel(model = m, -# code = { -# -# # create empty vector of predictions -# p <- rep(NA, nrow(newdata)) -# -# # omit NAs in new data -# newdata_clean <- na.omit(newdata) -# -# # get NA indices -# na_idx <- attr(newdata_clean, 'na.action') -# -# # if there are no NAs then the index should -# # include all rows, else it should name the -# # rows to ignore -# if (is.null(na_idx)){ -# na_idx <- 1:nrow(newdata) -# } else { -# idx <- -na_idx -# } -# -# # Use the predict function in gam to predict -# # our new values -# p[idx] <- gam::predict.gam(model, -# newdata_clean, -# type = 'response') -# return (p) -# }, -# packages = 'gam') - -## ---- eval = FALSE------------------------------------------------------- -# NewModule <- function(.df){ -# -# # Specify the packages we need using the function -# # GetPackage -# zoon:::GetPackage("gam") -# -# # Create a data.frame of covariate data -# covs <- as.data.frame(.df[, 6:ncol(.df)]) -# names(covs) <- names(.df)[6:ncol(.df)] -# -# # Run our gam model -# m <- gam::gam(formula = .df$value ~ ., -# data = covs, -# family = binomial) -# -# # Create a ZoonModel object to return. -# # this includes our model, predict method -# # and the packages we need. -# ZoonModel(model = m, -# code = { -# -# # create empty vector of predictions -# p <- rep(NA, nrow(newdata)) -# -# # omit NAs in new data -# newdata_clean <- na.omit(newdata) -# -# # get their indices -# na_idx <- attr(newdata_clean, 'na.action') -# -# # if there are no NAs then the index should -# # include all rows, else it should name the -# # rows to ignore -# if (is.null(na_idx)){ -# na_idx <- 1:nrow(newdata) -# } else { -# idx <- -na_idx -# } -# -# # Use the predict function in gam to predict -# # our new values -# p[idx] <- gam::predict.gam(model, -# newdata_clean, -# type = 'response') -# return (p) -# }, -# packages = 'gam') -# -# } - -## ----BuildMod, eval = FALSE---------------------------------------------- -# BuildModule(object = NewModule, -# type = 'model', -# dir = '.', -# title = 'GAM sdm model', -# description = 'This is my mega cool new model.', -# details = 'This module performs GAMs (Generalised Additive Models) using the \\code{gam} function from the package \\code{gam}.', -# paras = NULL, -# author = 'Z. Oon', -# email = 'zoon@zoon.com') - -## ----newmodworkflow, eval = FALSE---------------------------------------- -# rm(NewModule) -# LoadModule('NewModule.R') -# work1 <- workflow(occurrence = UKAnophelesPlumbeus, -# covariate = UKAir, -# process = OneHundredBackground, -# model = NewModule, -# output = PrintMap) - diff --git a/inst/doc/Building_a_module.Rmd b/inst/doc/Building_a_module.Rmd deleted file mode 100644 index fd1900f..0000000 --- a/inst/doc/Building_a_module.Rmd +++ /dev/null @@ -1,229 +0,0 @@ ---- -title: "Building modules" -author: "Tim Lucas & Tom August" -date: "`r Sys.Date()`" -output: html_vignette ---- - - - -# Building a module - -The process of making a module is essentially - -1. Write an R function -2. Run `BuildModule` with the function and metadata -3. *Optional* -- Upload to the zoon modules repository - -## An example - -Here is a simple function that will become our module. It is a model module that uses general additive models. We will work through it one element at a time - -First we start our function by declaring all the parameters we need, including all the defaults - -```{r, eval = FALSE} -NewModule <- function(.df){ -``` - -Since this is a model module the only default is `.df`. To find out more about defaults see the section [Module IO definitions for module developers](#tag1). - -Next we specify the packages our function needs. These should be specified by using GetPackage function in the zoon package. This function will load the package if the user of your module already has it or will install it from CRAN if they don't. For this reason make sure your package only uses packages that are on CRAN. - -```{r, eval = FALSE} -# Specify the packages we need using the function -# GetPackage -zoon:::GetPackage("gam") -``` - -Next we can add the code that does our modelling, here we create a simple GAM (Generalised Additive Model) using the package [gam](https://cran.r-project.org/web/packages/gam/index.html) - -```{r, eval = FALSE} -# Create a data.frame of covariate data -covs <- as.data.frame(.df[, 6:ncol(.df)]) -names(covs) <- names(.df)[6:ncol(.df)] - -# Run our gam model -m <- gam::gam(formula = .df$value ~ ., - data = covs, - family = binomial) -``` - -The final stage of building a model module is to create a ZoonModel object. This is important as it ensures that all model module output are the same and specifically that zoon can predict from them easily. - -We build a ZoonModel object by using the function `ZoonModel`. This takes three parameters - -1. *model* - Your model object -2. *code* - A section of code that will use `model` [your model] and `newdata` [a new set of covariate data], to return a vector of predicted values, one for each row of `newdata` -3. *packages* - A vector of characters naming the packages needed to run *code* - -```{r, eval = FALSE} -# Create a ZoonModel object to return. -# this includes our model, predict method -# and the packages we need. -ZoonModel(model = m, - code = { - - # create empty vector of predictions - p <- rep(NA, nrow(newdata)) - - # omit NAs in new data - newdata_clean <- na.omit(newdata) - - # get NA indices - na_idx <- attr(newdata_clean, 'na.action') - - # if there are no NAs then the index should - # include all rows, else it should name the - # rows to ignore - if (is.null(na_idx)){ - na_idx <- 1:nrow(newdata) - } else { - idx <- -na_idx - } - - # Use the predict function in gam to predict - # our new values - p[idx] <- gam::predict.gam(model, - newdata_clean, - type = 'response') - return (p) - }, - packages = 'gam') -``` - -With all these elements in place we now have our module complete. All together it looks like this. - -```{r, eval = FALSE} -NewModule <- function(.df){ - - # Specify the packages we need using the function - # GetPackage - zoon:::GetPackage("gam") - - # Create a data.frame of covariate data - covs <- as.data.frame(.df[, 6:ncol(.df)]) - names(covs) <- names(.df)[6:ncol(.df)] - - # Run our gam model - m <- gam::gam(formula = .df$value ~ ., - data = covs, - family = binomial) - - # Create a ZoonModel object to return. - # this includes our model, predict method - # and the packages we need. - ZoonModel(model = m, - code = { - - # create empty vector of predictions - p <- rep(NA, nrow(newdata)) - - # omit NAs in new data - newdata_clean <- na.omit(newdata) - - # get their indices - na_idx <- attr(newdata_clean, 'na.action') - - # if there are no NAs then the index should - # include all rows, else it should name the - # rows to ignore - if (is.null(na_idx)){ - na_idx <- 1:nrow(newdata) - } else { - idx <- -na_idx - } - - # Use the predict function in gam to predict - # our new values - p[idx] <- gam::predict.gam(model, - newdata_clean, - type = 'response') - return (p) - }, - packages = 'gam') - -} -``` - -We then run `BuildModule` adding fairly extensive meta data and directing BuildModule to save the file in the working directory '.'. As this module has no parameters other than `.df` which is not user specified, set `paras` to list(). Default parameters, like `.df`, all start with with a `.` and will be written into the module documentation automatically. - -```{r BuildMod, eval = FALSE} -BuildModule(object = NewModule, - type = 'model', - dir = '.', - title = 'GAM sdm model', - description = 'This is my mega cool new model.', - details = 'This module performs GAMs (Generalised Additive Models) using the \\code{gam} function from the package \\code{gam}.', - paras = NULL, - author = 'Z. Oon', - email = 'zoon@zoon.com') -``` - -This is now a run-able module. - -```{r newmodworkflow, eval = FALSE} -rm(NewModule) -LoadModule('NewModule.R') -work1 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = OneHundredBackground, - model = NewModule, - output = PrintMap) -``` - -Once we're happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit [the development pages](https://zoonproject.wordpress.com/) for more information. - - -# Module IO definitions for module developers - -The input arguments and return values of modules are strict. However, any module type can have additional input arguments but these must be named. A lot of the data frames include '+ covariates'. This indicates that the number of covariate columns is flexible. - -### Occurrence -In: No default inputs - -Out: data.frame with column names: longitude, latitude, value, type, fold - -### Covariate -In: No default inputs - -Out: raster layer or raster stack - -### Process -In: list called **.data** with 2 elements - -- *df* - A data.frame with columns: values, type, fold, longitude, latitude + covariates -- *ras* - A covariate rasterstack/layer - -Out: list with 2 elements - -- *df* - A data.frame with columns: values, type, fold, longitude, latitude + covariates -- *ras* - A covariate rasterstack/layer - -### Model -In: data.frame from process called **.df** - -Out: A ZoonModel object (see the example above) - -### Output -In: list called **.model** with 2 elements - -- *model* - A ZoonModel object from a model module -- *data* - A data.frame from a process module with the added column `predictions` - -Also a Rasterlayer called **.ras** from the covariate module - -Out: Anything - -# Pictoral description of inputs and outputs -![OccurrenceModule](occurrenceInOut.svg) -![CovariateModule](covariateInOut.svg) -![ProcessModule](processInOut.svg) -![ModelModule](modelInOut.svg) -![OuputModule](outputInOut.svg) - - - - diff --git a/inst/doc/Building_a_module.html b/inst/doc/Building_a_module.html index 2bed0ac..1a46200 100644 --- a/inst/doc/Building_a_module.html +++ b/inst/doc/Building_a_module.html @@ -1,433 +1,763 @@ - - - - -Building a module - - - - - - - - + + - + + + + -h4 { - font-size:1.0em; -} -h5 { - font-size:0.9em; -} -h6 { - font-size:0.8em; -} + -a:visited { - color: rgb(50%, 0%, 50%); -} + -pre, img { - max-width: 100%; -} -pre { - overflow-x: auto; -} -pre code { - display: block; padding: 0.5em; + +
+ - - - - - -

Building a module

+ +
+

The “1, 2, 3” of module building

The process of making a module is essentially

- -
    +
    1. Write an R function
    2. Run BuildModule with the function and metadata
    3. -
    4. Optional – Upload to the zoon modules repository
    5. +
    6. Optional – Upload to the zoon modules repository
    +

    Each module type is slightly different to write though the same three basic steps apply. Below we show an example of how to write each of the module types. We also link to pre-existing modules that you can use as templates.

    +
+
+

How to build an occurrence module

+

The aim of an occurrence module is to return a data.frame of occurrence data which can be used for modelling a species distribution. The example I’m going to show gets data from a fictional survey we have undertaken. The data was saved as a .csv and to share it we have placed it on Figshare.

+
# Load zoon
+library(zoon)
+
+# Start building our function
+Lorem_ipsum_UK <- function(){
+

In this case we have not given our function any arguments as we simply want to return the online dataset. However you could add arguments here to modify what your function returns (for an example see the SpOcc module).

+
# I'm going to use the package 'RCurl' so first I get that
+# using the zoon 'GetPackage' function
+GetPackage('RCurl')
+

It is important that you use the GetPackage function rather than library or require as it will also install the package if the user does not already have it installed.

+
# Next I retrieve the data from figshare
+URL <- "http://files.figshare.com/2519918/Lorem_ipsum_data.csv"
+x <- getURL(URL)
+out <- read.csv(textConnection(x))
+

Now it is time to think about how we return our data. The output format for occurrence modules is very important. If you do not ensure that the format is correct then your module will not work properly when entered into a workflow. An occurrence module must return a data.frame with the columns longitude, latitude, value, type and fold, the details are given at the end of this document

+

Our occurrence data does not have all of these columns so we need to add them. Here is what our data currently look like.

+
##    startDate latitude longitude
+## 1 2014-06-25 51.98917 0.8917427
+## 2 2014-06-25 51.98917 0.8917427
+## 3 2007-08-28 52.21136 0.6602159
+## 4       <NA> 51.97564 0.9833449
+## 5 1973-01-01 52.34187 0.7142953
+## 6 2013-04-12 52.23719 0.7877316
+

So we need to do a little reformatting

+
# Keep only Lat Long columns
+out <- out[, c("latitude", "longitude")]
+
+# Add in the columns we dont have
+out$value <- 1 # all our data are presences
+out$type <- 'presence'
+out$fold <- 1 # we don't add any folds
+
+# Now the data is in the correct format we can return it
+return(out)
+

We have now written the R code for our occurrence module, this is what it looks like when you put it all together.

+
Lorem_ipsum_UK <- function(){
+  
+  GetPackage('RCurl')
+  
+  # Get data
+  URL <- "http://files.figshare.com/2519918/Lorem_ipsum_data.csv"
+  x <- getURL(URL)
+  out <- read.csv(textConnection(x))
+  out <- out[, c("latitude", "longitude")]
+  
+  # Add in the columns we dont have
+  out$value <- 1 # all our data are presences
+  out$type <- 'presence'
+  out$fold <- 1 # we wont add any folds
+  
+  return(out)
+}
+

Now that we have our function written we can test it very simply in a workflow like this.

+
workl1 <- workflow(occurrence = Lorem_ipsum_UK,
+                   covariate = UKBioclim,
+                   process = OneHundredBackground,
+                   model = LogisticRegression,
+                   output = PrintMap)
+

+

This is a nice way to debug your function and ensure you are getting the results you expect.

+

Once you are happy that your function is working as you expect it to you can build you code into a module using the BuildModule function in zoon. This script adds in metadata including the type of module, authors’ names, a brief description and documentation for the arguments it accepts (though this one doesn’t accept any arguments).

+
# Let's build our module
+BuildModule(Lorem_ipsum_UK,
+            type = 'occurrence',
+            title = 'A dataset of Lorem ipsum occurrences',
+            description = paste0('The module retrieves a dataset of',
+            'Lorem ipsum records from figshare. This dataset contains',
+            'precence only data and was collected between 1990 and',
+            '2000 by members of to Lorem ipsum appreciation society'),
+            details = 'This dataset is fake, Lorem ipsum does not exist',
+            author = 'A.B. Ceidi',
+            email = 'ABCD@anemail.com',
+            dataType = 'presence-only')
+
## [1] "Lorem_ipsum_UK"
+

This function is fairly self explanatory however it is worth noting the dataType field. This must be any of ‘presence-only’, ‘presence/absence’, ‘abundance’ or ‘proportion’. This is important so that people using your module in the future will know what it is going to output.

+

BuildModule has now written an R file in our working directory containing the function and metadata, so that it can be shared with others.

+
# First we remove the function from our workspace
+rm(list = 'Lorem_ipsum_UK')
+
+# This is how you would use a module that a colleague has sent you
+LoadModule(module = 'Lorem_ipsum_UK.R')
+
+work2 <- workflow(occurrence = Lorem_ipsum_UK,
+                  covariate = UKBioclim,
+                  process = OneHundredBackground,
+                  model = LogisticRegression,
+                  output = PrintMap)
+

Once we’re happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit the development pages for more information.

+
+
+

How to write a covariate module

+

The aim of a covariate module is to provide spatial information that will help to explain the distribution of a species. For example this data could be climate data, habitat data or topology.

+

A covariate module, like an occurrence module, does not have to take any arguments but must return a raster layer, brick or stack.

+

In this example we will create a covariate module that can provide a number of different climate layers for the area covering Australia.

+
# Our function will take an argument to set the variable
+# the user wants returned
+AustraliaAir <- function(variable = 'rhum'){
+

When your module has arguments, as here, it is important to include default for all arguments. This make it easier for other users to use your modules and allows your module to be tested effectively when you upload it to the zoon repository.

+

The first step is to load the R packages that your code is going to need. It is important that you use the GetPackage function rather than library or require as it will also install the package if the user does not already have it installed.

+

In this example we do not need any external packages as the data we are downloading is a RasterStack object, and zoon already loads the raster package to deal with RasterStacks.

+
## class       : RasterStack 
+## dimensions  : 18, 20, 360, 7  (nrow, ncol, ncell, nlayers)
+## resolution  : 2.3, 2.222222  (x, y)
+## extent      : 111, 157, -46, -6  (xmin, xmax, ymin, ymax)
+## coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
+## names       :           air,           hgt,          rhum,          shum,         omega,          uwnd,          vwnd 
+## min values  :  2.740523e+02,  1.362261e+03,  3.268068e+01,  3.416514e-03, -5.856714e-02, -6.532974e+00, -3.080663e+00 
+## max values  :  2.964156e+02,  1.523139e+03,  8.030254e+01,  1.252498e-02,  5.942655e-02,  1.489644e+01,  4.789505e+00
+

To share this we have saved the object as an R data file and placed it on Figshare - attributing those that created the data.

+

In our function we download this data into R

+
# Load in the data
+URL <- "http://files.figshare.com/2527274/aus_air.rdata"
+load(url(URL)) # The object is called 'ras'
+
+# Subset the data according the the variable parameter
+ras <- subset(ras, variables)
+
+return(ras)
+

We can test our function works by running it in a workflow with other modules

+
AustraliaAir <- function(variables = 'rhum'){
+
+  URL <- "http://files.figshare.com/2527274/aus_air.rdata"
+  load(url(URL)) # The object is called 'ras'
+  ras <- subset(ras, variables)
+  return(ras)
+  
+}
 
-

An example

- +# Select the variables we want +myVariables <- c('air','hgt','rhum','shum','omega','uwnd','vwnd') + +work3 <- workflow(occurrence = SpOcc(extent = c(111, 157, -46, -6), + species = 'Varanus varius', + limit = 500), + covariate = AustraliaAir(variables = myVariables), + process = OneHundredBackground, + model = LogisticRegression, + output = PrintMap)
+

+

Once we are happy with the function we have written we need to use the BuildModule function to convert our function into a module by adding in the necessary metadata

+
# Build our module
+BuildModule(AustraliaAir,
+            type = 'covariate',
+            title = 'Australia Air data from NCEP',
+            description = paste('This modules provides access to the',
+                                'NCEP air data for austrlia provided by',
+                                'NCEP and should be attributed to Climatic',
+                                'Research Unit, University of East Anglia'),
+            details = paste('These data are redistributed under the terms of',
+                            'the Open Database License',
+                            'http://opendatacommons.org/licenses/odbl/1.0/'),
+            author = 'Z.O. Onn',
+            email = 'zoon@zoon-zoon.com',
+            paras = list(variables = paste('A character vector of air variables',
+                         'you wish to return. This can include any number of',
+                         "the following: 'air','hgt','rhum','shum','omega',",
+                         "'uwnd','vwnd'")))
+
## [1] "AustraliaAir"
+

BuildModule is fairly self explanatory but it is worth noting the paras argument. This takes a named list of the parameters the module takes. This should follow the following structure; list(parameterName = ‘Parameter description.’, anotherParameter = ‘Another description.’)

+

Once BuildModule has been run there will be an R file in our working directory that represents our module and can be shared with others. This R script can be used as follows.

+
# remove the original function from our environment
+rm(list = 'AustraliaAir')
+
+# Load the module script
+LoadModule('AustraliaAir.R')
+
+work4 <- workflow(occurrence = SpOcc(extent = c(111, 157, -46, -6),
+                                     species = 'Varanus varius',
+                                     limit = 500),
+                  covariate = AustraliaAir,
+                  process = OneHundredBackground,
+                  model = LogisticRegression,
+                  output = PrintMap)
+

Once we’re happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit the development pages for more information.

+
+
+

How to write a process module

+

The aim of a process model is to modify the occurrence data or/and the covariate data prior to modelling. Examples include adding background points, or adding folds for cross-validation.

+

A process model returns data in exactly the same format that it accepts data. It takes and returns a list of two elements. The first element is a data.frame with the columns values, type, fold, longitude, latitude (see Occurrence module output), and additional covariate columns. The covariate columns are added internally in the zoon workflow by combining the output of the covariate module. The second element of the list is a RasterBrick, RasterLayer, or RasterStack as output by a covariate module.

+

In this example we are going to create a process module that cuts down our occurrence data to a user supplied extent.

+

When writing a module it is useful to have example input to test with. One way to do this is to run a similar workflow and use the outputs of that workflow to test yours. Here is an example:

+
# We run a very simple workflow so that we can get example input
+# for our module
+work5 <- workflow(occurrence = UKAnophelesPlumbeus,
+                  covariate  = UKAir,
+                  process    = NoProcess,
+                  model      = LogisticRegression,
+                  output     = PrintMap)
+

+
# The output from a process module is in the same format as the 
+# input, so we can use the output of NoProcess as the testing
+# input for our module. Note that this object should be called
+# .data
+.data <- work5$process.output[[1]]
+
+str(.data, 2)
+
## List of 2
+##  $ df :'data.frame': 188 obs. of  6 variables:
+##   ..$ longitude: num [1:188] 1.01 -0.16 -2.83 -0.63 -3.53 ...
+##   ..$ latitude : num [1:188] 52.4 51.6 53.4 51.6 56 ...
+##   ..$ value    : num [1:188] 1 1 1 1 1 1 1 1 1 1 ...
+##   ..$ type     : chr [1:188] "presence" "presence" "presence" "presence" ...
+##   ..$ fold     : num [1:188] 1 1 1 1 1 1 1 1 1 1 ...
+##   ..$ layer    : num [1:188] 271 272 272 272 271 ...
+##  $ ras:Formal class 'RasterLayer' [package "raster"] with 12 slots
+

It is important to note that the list object that is passed into a process module is named .data, and so when writing our module we need to adhere to this convention.

+
# Start writing our module
+ClipOccurence <- function(.data, extent = c(-180, 180, -180, 180)){
+

Here we have remembered to give .data as an argument as this is a default for process modules. In addition we have supplied an argument for the extent and set the default to the entire globe (i.e. no clipping). It is important that all of your arguments have defaults (even if the default might not be a good idea in practice), as this allows the zoon system to perform automatic testing on your modules when you share them online.

+
# Write the body of our function
+# extract the occurrence data from the .data object
+occDF <- .data$df
+
+# Subset by longitude
+occSub <- occDF[occDF$longitude >= extent[1] &
+                occDF$longitude <= extent[2], ]
+
+# Subset by latitude
+occSub <- occSub[occSub$latitude >= extent[3] &
+                 occSub$latitude <= extent[4], ]
+
+# assign this data.frame back to the .data object
+.data$df <- occSub
+

So our simple process function looks like this:

+
ClipOccurrence <- function(.data, extent = c(-180, 180, -180, 180)){
+  
+  # Write the body of our function
+  # extract the occurrence data from the .data object
+  occDF <- .data$df
+  
+  occSub <- occDF[occDF$longitude >= extent[1] &
+                  occDF$longitude <= extent[2], ]
+ 
+  occSub <- occSub[occSub$latitude >= extent[3] &
+                   occSub$latitude <= extent[4], ]
+  
+  .data$df <- occSub
+  
+  return(.data)
+  
+}
+

Our next step is to test that this function will work in a workflow. Once we have read in our function so that it is available in our working environment we can then include it in a workflow as we would a normal module.

+
# Run a workflow with our new process
+# In this example we first add background points, then clip the data
+work6 <- workflow(occurrence = UKAnophelesPlumbeus,
+                  covariate  = UKAir,
+                  process    = Chain(OneHundredBackground,
+                                     ClipOccurrence(extent = c(-3, 2, 50, 53))),
+                  model      = LogisticRegression,
+                  output     = PrintMap)
+

+

We can see that the data has been clipped to the extent we specified in the map printed by the output module.

+

The next stage is to turn this function into a module which is shareable. To do this we need to add metadata to our function using the BuildModule function

+
# Build our module
+BuildModule(ClipOccurrence,
+            type = 'process',
+            title = 'Clip occurrence data to extent',
+            description = paste('This process module clips the occurrence',
+                                'data that is returned from the occurrence',
+                                'module to a user defined extent'),
+            details = paste('The extent is a square region which denotes the',
+                            'area within which observations will be kept.',
+                            'All data that falls outside of the extent will',
+                            'be removed and will be not be used in the',
+                            'modelling process'),
+            author = 'Z.O. Onn',
+            email = 'zoon@zoon-zoon.com',
+            paras = list(extent = paste('A numeric vector of length for',
+                                        'giving (in this order) the minimum',
+                                        'longitude, maximum longitude, minimum',
+                                        'latitude, maximum latitude.')),
+            dataType = c('presence-only', 'presence/absence', 'abundance',
+                         'proportion'))
+
## [1] "ClipOccurrence"
+

Much of how to use BuildModule is self-explanatory but two parameters are worth mentioning here. The paras argument takes a named list of the parameters the module takes. This should follow the following structure; list(parameterName = ‘Parameter description.’, anotherParameter = ‘Another description.’), but should not include the defaults (i.e. we do not include .data). dataType describes the types of occurrence data that this module will work with. Certain modules might only work with presence-only data for example. In our case, our module will work with any type of data and so we list all the data types in the dataType field.

+

Once BuildModule has been run there will be an R file in our working directory that represents our module and can be shared with others. This R script can be used as follows.

+
# remove the original function from our environment
+rm(list = 'ClipOccurrence')
+
+# Load the module script
+LoadModule('ClipOccurrence.R')
+
## [1] "ClipOccurrence"
+
work7 <- workflow(occurrence = CWBZimbabwe,
+                  covariate = Bioclim(extent = c(31, 34, -22, -18)),
+                  process = ClipOccurrence(extent = c(32, 33, -21, -19)),
+                  model = LogisticRegression,
+                  output = PrintMap)
+

+

Once we’re happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit the development pages for more information.

+
+
+

How to write a model module

Here is a simple function that will become our module. It is a model module that uses general additive models. We will work through it one element at a time

-

First we start our function by declaring all the parameters we need, including all the defaults

- -
NewModule <- function(.df){
-
- -

Since this is a model module the only default is .df. To find out more about defaults see the section Module IO definitions for module developers.

- -

Next we specify the packages our function needs. These should be specified by using GetPackage function in the zoon package. This function will load the package if the user of your module already has it or will install it from CRAN if they don't. For this reason make sure your package only uses packages that are on CRAN.

- -
# Specify the packages we need using the function
+
GamGam <- function(.df){
+

Since this is a model module the only default is .df. To find out more about defaults see the section Module IO definitions for module developers.

+

Next we specify the packages our function needs. These should be specified by using GetPackage function in the zoon package. This function will load the package if the user of your module already has it or will install it from CRAN if they don’t. For this reason make sure your package only uses packages that are on CRAN.

+
# Specify the packages we need using the function
 # GetPackage
-zoon:::GetPackage("gam")
-
- -

Next we can add the code that does our modelling, here we create a simple GAM (Generalised Additive Model) using the package gam

- -
# Create a data.frame of covariate data
+zoon::GetPackage("gam")
+

Next we can add the code that does our modelling, here we create a simple GAM (Generalised Additive Model) using the package gam

+
# Create a data.frame of covariate data
 covs <- as.data.frame(.df[, 6:ncol(.df)])
 names(covs) <- names(.df)[6:ncol(.df)]
 
+# do a bit of copy-pasting to define smooth terms for each covariate
+f <- sprintf('.df$value ~ s(%s)',
+                    paste(colnames(covs),
+                          collapse = ') + s('))
+
 # Run our gam model
-m <- gam::gam(formula = .df$value ~ .,
+m <- gam::gam(formula = formula(f),
               data = covs,
-              family = binomial)
-
- -

The final stage of building a model module is to create a ZoonModel object. This is important as it ensures that all model module output are the same and specifically that zoon can predict from them easily.

- -

We build a ZoonModel object by using the function ZoonModel. This takes three parameters

- -
    -
  1. model - Your model object
  2. -
  3. code - A section of code that will use model [your model] and newdata [a new set of covariate data], to return a vector of predicted values, one for each row of newdata
  4. -
  5. packages - A vector of characters naming the packages needed to run code
  6. -
- -
# Create a ZoonModel object to return.
+              family = binomial)
+

The final stage of building a model module is to write some code within the function to create a ZoonModel object. This is important as it standardises all outputs from model modules and crucially enables zoon to make predictions from them in a predictable and standard way.

+

We build a ZoonModel object by using the function ZoonModel. This takes three parameters

+
    +
  • model: Your model object
  • +
  • code: A section of code that will use model [your model] and newdata [a new set of covariate data], to return a vector of predicted values, one for each row of newdata
  • +
  • packages: A vector of characters naming the packages needed to run code
  • +
+
# Create a ZoonModel object to return.
 # this includes our model, predict method
 # and the packages we need.
 ZoonModel(model = m,
           code = {
-
+          
           # create empty vector of predictions
           p <- rep(NA, nrow(newdata))
-
+          
           # omit NAs in new data
           newdata_clean <- na.omit(newdata)
-
+          
           # get NA indices
-          na_idx <- attr(newdata_clean, 'na.action')
-
+          na_idx <- attr(newdata_clean, 'na.action')
+          
           # if there are no NAs then the index should 
           # include all rows, else it should name the 
           # rows to ignore
           if (is.null(na_idx)){
-            na_idx <- 1:nrow(newdata)
+            idx <- 1:nrow(newdata)
           } else {
             idx <- -na_idx
           }
-
+          
           # Use the predict function in gam to predict
           # our new values
           p[idx] <- gam::predict.gam(model,
                                      newdata_clean,
-                                     type = 'response')
+                                     type = 'response')
           return (p)
         },
-        packages = 'gam')
-
- + packages = 'gam')

With all these elements in place we now have our module complete. All together it looks like this.

- -
NewModule <- function(.df){
+
GamGam <- function(.df){
 
   # Specify the packages we need using the function
   # GetPackage
-  zoon:::GetPackage("gam")
-
+  zoon::GetPackage("gam")
+  
   # Create a data.frame of covariate data
   covs <- as.data.frame(.df[, 6:ncol(.df)])
   names(covs) <- names(.df)[6:ncol(.df)]
-
+  
+  # do a bit of copy-pasting to define smooth terms for each covariate
+  f <- sprintf('.df$value ~ s(%s)',
+                      paste(colnames(covs),
+                            collapse = ') + s('))
+  
   # Run our gam model
-  m <- gam::gam(formula = .df$value ~ .,
-         data = covs,
-         family = binomial)
-
+  m <- gam::gam(formula = formula(f),
+                data = covs,
+                family = binomial)
+  
   # Create a ZoonModel object to return.
   # this includes our model, predict method
   # and the packages we need.
   ZoonModel(model = m,
             code = {
-
+            
             # create empty vector of predictions
             p <- rep(NA, nrow(newdata))
-
+            
             # omit NAs in new data
             newdata_clean <- na.omit(newdata)
-
+            
             # get their indices
-            na_idx <- attr(newdata_clean, 'na.action')
-
+            na_idx <- attr(newdata_clean, 'na.action')
+            
             # if there are no NAs then the index should 
             # include all rows, else it should name the 
             # rows to ignore
             if (is.null(na_idx)){
-              na_idx <- 1:nrow(newdata)
+              idx <- 1:nrow(newdata)
             } else {
               idx <- -na_idx
             }
-
+            
             # Use the predict function in gam to predict
             # our new values
             p[idx] <- gam::predict.gam(model,
                                        newdata_clean,
-                                       type = 'response')
+                                       type = 'response')
             return (p)
           },
-          packages = 'gam')
-
-}
-
- -

We then run BuildModule adding fairly extensive meta data and directing BuildModule to save the file in the working directory '.'. As this module has no parameters other than .df which is not user specified, set paras to list(). Default parameters, like .df, all start with with a . and will be written into the module documentation automatically.

- -
BuildModule(object = NewModule,
-            type = 'model',
-            dir = '.',
-            title = 'GAM sdm model',
-            description = 'This is my mega cool new model.',
-            details = 'This module performs GAMs (Generalised Additive Models) using the \\code{gam} function from the package \\code{gam}.',
-            paras = NULL,
-            author = 'Z. Oon',
-            email = 'zoon@zoon.com')
-
- + packages = 'gam') + +}
+

We then run BuildModule on our function, adding the required metadata. As this module has no parameters other than .df which is not user specified, we don’t need to set the paras argument, which would normally be used to document arguments. Default arguments, like .df are all signified by starting with a . and don’t need to be documented as this will be written into the module documentation automatically.

+
BuildModule(object = GamGam,
+            type = 'model',
+            title = 'GAM sdm model',
+            description = 'This is my mega cool new model.',
+            details = paste('This module performs GAMs (Generalised Additive',
+                            'Models) using the gam function from the package gam.'),
+            author = 'Z. Oon',
+            email = 'zoon@zoon.com',
+            dataType = c('presence-only', 'presence/absence'))
+
## [1] "GamGam"

This is now a run-able module.

- -
rm(NewModule)
-LoadModule('NewModule.R')
-work1 <- workflow(occurrence = UKAnophelesPlumbeus,
+
# remove the function in our workspace else
+# this will cause problems
+rm(GamGam)
+
+# Load in teh module we just built
+LoadModule('GamGam.R')
+
## [1] "GamGam"
+
# Run a workflow using our module
+work8 <- workflow(occurrence = UKAnophelesPlumbeus,
                   covariate = UKAir,
                   process  = OneHundredBackground,
-                  model = NewModule,
-                  output   = PrintMap)
-
- -

Once we're happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit the development pages for more information.

- -

Module IO definitions for module developers

- -

The input arguments and return values of modules are strict. However, any module type can have additional input arguments but these must be named. A lot of the data frames include '+ covariates'. This indicates that the number of covariate columns is flexible.

- + model = GamGam, + output = PrintMap)
+

+

Once we’re happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit the development pages for more information.

+
+
+

How to write a output module

+

An output module is the last module in a zoon workflow and is an opportunity to summarise the model results, make predictions, or otherwise visualise the data or results. The input to output modules is a combination of the outputs of occurrence, covariate, process and model modules providing many possible output types.

+

In this example we will create an output module that uses the model output to predict the species occurrence in a new location given by a user-provided raster.

+

When writing a module it is useful to have example input to test with. One way to do this is to run a similar workflow and use the outputs of that workflow to test yours. Here is an example:

+
# We run a very simple workflow so that we can get example input
+# for our module
+work9 <- workflow(occurrence = UKAnophelesPlumbeus,
+                  covariate  = UKAir,
+                  process    = OneHundredBackground,
+                  model      = LogisticRegression,
+                  output     = PrintMap)
+
+# The input to an output module is a combination of the output
+# from the model module and the covariate module. We can recreate
+# it for this work flow like this
+.model <- work9$model.output[[1]]
+.ras <- work9$covariate.output[[1]]
+

Both .model and .ras are default arguements for an output model so it is important that you have them as arguements for your module, even if you dont use them both. It is also important that you stick to the same naming conventions.

+
# Our output module takes the default parameters and a user-defined
+# Raster* object that has the same structure as the raster layer output
+# by the covariate module
+PredictNewRasterMap <- function(.model, .ras, raster = .ras){
+

It is important to have default values for all user defined parameters so that your module can be tested when you upload it to the zoon website. Here we set our default ‘new area’ raster to be the same as the raster used to create the model. Clearly this is not how we envisage the module being used in a real application (unless they genuinely wanted to predict back to the same area), however this ensures that this module will always work with its default arguments, no matter what workflow it is placed in.

+
# The first step is to load in the packages we need
+zoon::GetPackage(raster) 
+  
+# Then extract the covariate values
+# from the user provided raster
+vals <- data.frame(getValues(raster))
+colnames(vals) <- names(raster)
+

Once we have these new values we can predict using the ZoonPredict function. This function is very useful as it simplifies the process of making predictions from the ouput of a model module. See the InteractiveMap module for an innovative visualisation using predicted values.

+
# Make predictions to the new values
+pred <- ZoonPredict(.model$model,
+                    newdata = vals)
+
+# Create a copy of the users' raster...
+# (just a single layer)
+pred_ras <- raster[[1]]
+    
+# ... and assign the predicted values to it
+pred_ras <- setValues(pred_ras, pred)
+

Once we have the raster of predicted values we can plot it and return the results to the user.

+
# Plot the predictions as a map
+plot(pred_ras)
+
+# Return the raster of predictions
+return (pred_ras)
+

Our function now looks like this:

+
PredictNewRasterMap <- function(.model, .ras, raster = .ras){
+  
+  zoon::GetPackage(raster)
+  
+  # Extract the values from the user provided raster
+  vals <- data.frame(getValues(raster))
+  colnames(vals) <- names(raster)
+  
+  # Make predictions to the new values
+  pred <- ZoonPredict(.model$model,
+                      newdata = vals)
+  
+  pred_ras <- raster[[1]]
+  pred_ras <- setValues(pred_ras, pred)
+  
+  # Print the predictions as a map
+  plot(pred_ras)
+  
+  return(pred_ras)
+}
+

Our next step is to test that this function will work in a workflow. Once we have read in our function so that it is available in our working environment we can then include it in a workflow as we would a normal module.

+
# Run it with the defaults
+work10 <- workflow(occurrence = UKAnophelesPlumbeus,
+                   covariate  = UKBioclim,
+                   process    = OneHundredBackground,
+                   model      = LogisticRegression,
+                   output     = PredictNewRasterMap)
+

+
# Now I'm going to run it with a different raster
+library(raster)
+
+# Get Bioclim data (using the getData function in the raster package,
+# which zoon loads) ...
+BioclimData <- getData('worldclim', var = 'bio', res = 5)
+BioclimData <- BioclimData[[1:19]]
+
+# ... and crop to Australia
+cropped <- crop(BioclimData,
+                c(109,155,-46,-7))
+
+# Run it with my new raster
+work11 <- workflow(occurrence = UKAnophelesPlumbeus,
+                   covariate  = UKBioclim,
+                   process    = OneHundredBackground,
+                   model      = LogisticRegression,
+                   output     = PredictNewRasterMap(raster = cropped))
+

+
# The prediction map should also be returned as a raster
+str(work11$report, 2)
+
## List of 1
+##  $ :Formal class 'RasterLayer' [package "raster"] with 12 slots
+

The next stage is to turn this function into a module which is shareable. To do this we need to add metadata to our function using the BuildModule function

+
# Build our module
+BuildModule(PredictNewRasterMap,
+            type = 'output',
+            title = 'Predict to a new raster and map',
+            description = paste('This output module predicts the species',
+                                'distribution in a new area given a new',
+                                'raster'),
+            details = paste('The results are printed as a map and a raster is',
+                            'returned with the predicted values. It is important',
+                            'that the new raster has the same structure as the',
+                            'raster provided by the covariate module.',
+                            'It must have the same covariate columns in the',
+                            'same order.'),
+            author = 'Z.O. On',
+            email = 'zoon@zoon-zoon.com',
+            paras = list(raster = paste('A RasterBrick, RasterLayer or RasterStack in',
+                                        'the same format as the raster provided',
+                                        'by the covariate module. Predicted values',
+                                        'will be estimated for this raster using',
+                                        'the results from the model module')),
+            dataType = c('presence-only', 'presence/absence', 'abundance',
+                         'proportion'))
+
## [1] "PredictNewRasterMap"
+

Much of how to use BuildModule is self-explanatory but two parameters are worth mentioning here. The paras argument takes a named list of the parameters the module takes in the following structure: list(parameterName = 'Parameter description.', anotherParameter = 'Another description.'), but should not include the defaults (i.e. we do not include .model or .ras). dataType describes the types of occurrence data that this module will work with. Certain modules might only work with presence-only data for example. In our case, our module will work with any type of data and so we list all the data types in the dataType field.

+

Once BuildModule has been run there will be an R file in our working directory that represents our module and can be shared with others. This R script can be used as follows.

+
# remove the original function from our environment
+rm(list = 'PredictNewRasterMap')
+
+# Load the module script
+LoadModule('PredictNewRasterMap.R')
+
## [1] "PredictNewRasterMap"
+
# Now I model a crop pest from Zimbabwe in its home
+# range and in Australia by chaining together
+# output modules
+work12 <- workflow(occurrence = CWBZimbabwe,
+                   covariate = Bioclim(extent = c(28, 38, -24, -16)),
+                   process = NoProcess,
+                   model = RandomForest,
+                   output = Chain(PrintMap,
+                                  PredictNewRasterMap(raster = cropped)))
+
## Loading required package: randomForest
+## randomForest 4.6-12
+## Type rfNews() to see new features/changes/bug fixes.
+

+

Once we’re happy with the module, we will hopefully upload it to the zoon repository. The repository is currently under development. Visit the development pages for more information.

+
+
+

Module IO definitions for module developers

+

The default input arguments and return values of modules are strict. However, any module type can have additional named input arguments, provided they have default values. A lot of the data frames include ‘+ covariates’. This indicates that the number of covariate columns is flexible.

+

Occurrence

- -

In: No default inputs

- -

Out: data.frame with column names: longitude, latitude, value, type, fold

- +

In: No default inputs

+

Out: data.frame with column names:

+
    +
  • longitude: The longitude of the observation.
  • +
  • latitude: The latitude of the observation.
  • +
  • value: The response value for the observation when used in a model. This can be 1 or 0 for presence/absence, an integer for abundance (e.g. 1, 3, 67), or a decimal number between 0 and 1 for proportions (e.g. 0.12, 0.5, 0.98).
  • +
  • type: This is linked to value and dictates for each row of the data.frame the type of value given. This can be one of the following; 'presence', 'absence', 'background', 'abundance', 'proportion'.
  • +
  • fold: Folds are used to test your model. If we have, for example, 3 folds (1, 2, 3) then we can use the PerformanceMeasures output module to test the performance of the model. A common method, implemented by PerformanceMeasures is to build the model using all but one fold, and then test the models ability to predict the fold that was held back.
  • +
+
+

Covariate

- -

In: No default inputs

- -

Out: raster layer or raster stack

- +

In: No default inputs

+

Out: RasterLayer, RasterBrick or RasterStack object

+
+

Process

- -

In: list called .data with 2 elements

- +

In: list named .data with 2 named elements:

    -
  • df - A data.frame with columns: values, type, fold, longitude, latitude + covariates
  • -
  • ras - A covariate rasterstack/layer
  • +
  • df: A data.frame with columns: 'values', 'type', 'fold', 'longitude', 'latitude' plus additional names columns giving associated covariate values. See occurrence module for details on these columns.
  • +
  • ras: A RasterLayer, RasterBrick or RasterStack object of covariate rasters
- -

Out: list with 2 elements

- +

Out: list with 2 elements

    -
  • df - A data.frame with columns: values, type, fold, longitude, latitude + covariates
  • -
  • ras - A covariate rasterstack/layer
  • +
  • df: A data.frame with columns: values, type, fold, longitude, latitude plus additional names columns giving associated covariate values
  • +
  • ras: A RasterLayer, RasterBrick or RasterStack object of covariate rasters
- +
+

Model

-

In: data.frame from process called .df

- -

Out: A ZoonModel object (see the example above)

- +

Out: A ZoonModel object (see the example above)

+
+

Output

- -

In: list called .model with 2 elements

- +

In:

    -
  • model - A ZoonModel object from a model module
  • -
  • data - A data.frame from a process module with the added column predictions
  • +
  • A list named .model with 2 named elements:

  • +
  • model: A ZoonModel object from a model module
  • +
  • data: A data.frame from a process module with the added column predictions

  • +
  • A RasterLayer, RasterBrick or RasterStack object named .ras, provided by the covariate module

+

Out: Anything!

+
+
+
+

Pictoral description of inputs and outputs

+

OccurrenceModule CovariateModule ProcessModule ModelModule OuputModule

+
-

Also a Rasterlayer called .ras from the covariate module

-

Out: Anything

+
-

Pictoral description of inputs and outputs

+ + + + + diff --git a/inst/doc/Module_IO_for_devs.R b/inst/doc/Module_IO_for_devs.R deleted file mode 100644 index 8b13789..0000000 --- a/inst/doc/Module_IO_for_devs.R +++ /dev/null @@ -1 +0,0 @@ - diff --git a/inst/doc/Module_IO_for_devs.Rmd b/inst/doc/Module_IO_for_devs.Rmd deleted file mode 100644 index 7a277f4..0000000 --- a/inst/doc/Module_IO_for_devs.Rmd +++ /dev/null @@ -1,86 +0,0 @@ ---- -title: "Module IO definitions for module developers" -author: "Tim Lucas & Nick Golding" -date: "`r Sys.Date()`" -output: - html_vignette: - pdf_document: - theme: peaksea - highlight: zenburn ---- - - - -# Module IO definitions for module developers - -### Occurrence -Out: `data.frame` with columns named: - -* `longitude` -* `latitude` -* `value` -* `type` -* `fold` - -### Covariate -out: `RasterLayer` or `RasterStack` object as defined in the `raster` package - -### Process -in: `list` with elements: - -1. `data.frame` with columns named: - * `longitude` - * `latitude` - * `value` - * `type` - * `fold` - * and additional columns giving covariate values -2. `RasterLayer` or `RasterStack` object as defined in the `raster` package - -out: list with elements: - -1. a `data.frame` with columns named: - * `longitude` - * `latitude` - * `value` - * `type` - * `fold` - * and additional columns giving covariate values -2. `RasterLayer` or `RasterStack` object as defined in the `raster` package - - -### Model -in: `.df` - a `data.frame` with columns named: - -* `longitude` -* `latitude` -* `value` -* `type` -* `fold` -* and additional columns giving covariate values - -out: a `ZoonModel` object defined using the `ZoonModel` function - -### Output -in: - -1. `.ras` - `RasterLayer` or `RasterStack` object as defined in the `raster` package -2. a list with named elements: - * `model` - a `ZoonModel` object defined using the `ZoonModel` function - * `data` - a `data.frame` with columns named: - * `longitude` - * `latitude` - * `value` - * `type` - * `fold` - * `predictions` - * and additional columns giving covariate values - -out: Anything - - - - diff --git a/inst/doc/Module_IO_for_devs.html b/inst/doc/Module_IO_for_devs.html deleted file mode 100644 index 9294300..0000000 --- a/inst/doc/Module_IO_for_devs.html +++ /dev/null @@ -1,137 +0,0 @@ - - - - - - - - - - - - - - -Module IO definitions for module developers - - - - - - - - - - - - - - - - -
-

Module IO definitions for module developers

-
-

Occurrence

-

Out: data.frame with columns named:

-
    -
  • longitude
  • -
  • latitude
  • -
  • value
  • -
  • type
  • -
  • fold
  • -
-
-
-

Covariate

-

out: RasterLayer or RasterStack object as defined in the raster package

-
-
-

Process

-

in: list with elements:

-
    -
  1. data.frame with columns named:
  2. -
-
    -
  • longitude
  • -
  • latitude
  • -
  • value
  • -
  • type
  • -
  • fold
  • -
  • and additional columns giving covariate values
  • -
-
    -
  1. RasterLayer or RasterStack object as defined in the raster package
  2. -
-

out: list with elements:

-
    -
  1. a data.frame with columns named:
  2. -
-
    -
  • longitude
  • -
  • latitude
  • -
  • value
  • -
  • type
  • -
  • fold
  • -
  • and additional columns giving covariate values
  • -
-
    -
  1. RasterLayer or RasterStack object as defined in the raster package
  2. -
-
-
-

Model

-

in: .df - a data.frame with columns named:

-
    -
  • longitude
  • -
  • latitude
  • -
  • value
  • -
  • type
  • -
  • fold
  • -
  • and additional columns giving covariate values
  • -
-

out: a ZoonModel object defined using the ZoonModel function

-
-
-

Output

-

in:

-
    -
  1. .ras - RasterLayer or RasterStack object as defined in the raster package
  2. -
  3. a list with named elements:
  4. -
-
    -
  • model - a ZoonModel object defined using the ZoonModel function
  • -
  • data - a data.frame with columns named:
  • -
  • longitude
  • -
  • latitude
  • -
  • value
  • -
  • type
  • -
  • fold
  • -
  • predictions
  • -
  • and additional columns giving covariate values
  • -
-

out: Anything

-
-
- - - - - - - - diff --git a/inst/doc/basic-zoon-usage.R b/inst/doc/basic-zoon-usage.R deleted file mode 100644 index f35d498..0000000 --- a/inst/doc/basic-zoon-usage.R +++ /dev/null @@ -1,50 +0,0 @@ -## ----setup, include=FALSE------------------------------------------------ -library(knitr) -opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") - -## ----methods, eval = FALSE----------------------------------------------- -# library(devtools) -# install_github('zoonproject/zoon') - -## ----load---------------------------------------------------------------- -library(zoon) - -## ----basic, warning = FALSE---------------------------------------------- -work1 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = OneHundredBackground, - model = RandomForest, - output = PrintMap) - -class(work1) -str(work1, 1) - -## ----getmodlist, eval = FALSE-------------------------------------------- -# GetModuleList() - -## ----help, eval = FALSE-------------------------------------------------- -# ModuleHelp(LogisticRegression) - -## ----args, warning = FALSE----------------------------------------------- -work2 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = BackgroundAndCrossvalid(k = 2), - model = LogisticRegression, - output = PerformanceMeasures) - -## ----chain, warnings = FALSE--------------------------------------------- -work3 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = Chain(OneHundredBackground, Crossvalidate), - model = LogisticRegression, - output = PerformanceMeasures) - -## ----list, warning = FALSE----------------------------------------------- -work4 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = OneHundredBackground, - model = list(LogisticRegression, RandomForest), - output = SameTimePlaceMap) - -str(work4, 1) - diff --git a/inst/doc/basic-zoon-usage.Rmd b/inst/doc/basic-zoon-usage.Rmd deleted file mode 100644 index a62b478..0000000 --- a/inst/doc/basic-zoon-usage.Rmd +++ /dev/null @@ -1,193 +0,0 @@ ---- -title: "Basic zoon usage" -author: "Tim Lucas" -date: "`r Sys.Date()`" -output: - html_vignette: - theme: peaksea - highlight: zenburn - pdf_document: - theme: peaksea - highlight: zenburn ---- - - - - -```{r setup, include=FALSE} -library(knitr) -opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") -``` - -An Introduction to the **zoon** package -======================================= - - -**Zoon** is a package to aid reproducibility and between-model comparisons in species distribution modelling. Each step in an analysis is a 'module'. These modules will include: -+ Data collection of **occurrence** and environmental **covariate** data from online databases. -+ **Process** steps such as removal of spatial autocorrelation in the data or generation of background pseudoabsences. -+ The fitting of **models**. -+ Model **output** including diagnostics, reports and vizualisation. - - - -Getting set up ----------------------------- - -First install from github -```{r methods, eval = FALSE} -library(devtools) -install_github('zoonproject/zoon') -``` - -and load - -```{r load} -library(zoon) -``` - -Basic usage ----------------------------- - -A basic worklow is run using the `workflow` function. We must chose a module for each type: occurrence, covariate, process, model and output. - -```{r basic, warning = FALSE} -work1 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = OneHundredBackground, - model = RandomForest, - output = PrintMap) - -class(work1) -str(work1, 1) -``` - -In this case we are using the following modules which do the following things: -+ `UKAnophelesPlumbeus`: Uses occurrence points of _Anopheles plumbeus_ in the UK collected from GBIF -+ `UKAir`: Uses NCEP air temperature data for the UK -+ `OneHundredBackground`: Randomly creates 100 pseudoabsence or background datapoints -+ `LogisticRegression`: Run a random forest to model the relationship between _A. plumbeus_ and air temperature -+ `PrintMap`: Predicts the model across the whole of the UK and prints to graphics device. - -For output we get an object of class "zoonWorkflow". This object is basically a big list with all the data, models and output we collected and created in our analysis. - -Getting Help --------------- - -To find a list of modules available on the online repository use - -```{r getmodlist, eval = FALSE} -GetModuleList() -``` - -To find help on a specific module use - -```{r help, eval = FALSE} -ModuleHelp(LogisticRegression) -``` -Note that you can't use `?` as the modules are held on a repository. Therefore the module documentation files are not included with the basic zoon install. - - - -More complex analyses ------------------------ - -The syntax for including arguments to modules is simply `ModuleName(parameter = 'value')`. For example, to do two fold crossvalidation we do - -```{r args, warning = FALSE} -work2 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = BackgroundAndCrossvalid(k = 2), - model = LogisticRegression, - output = PerformanceMeasures) -``` - -Here we are providing an argument to the module `BackgroundAndCrossvalid`. We are setting `k` (the number of cross validation folds) to 2. - -We are using an output module `PerformanceMeasures` which calculates a number of measures of the effectiveness of our model: AUC, kappa, sensitivity, specificity etc. - - -### Multiple modules with Chain - -We might want to combine multiple modules in our analysis. For this we use the function Chain. - -```{r chain, warnings = FALSE} -work3 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = Chain(OneHundredBackground, Crossvalidate), - model = LogisticRegression, - output = PerformanceMeasures) -``` -Here we drawing some pseudoabsence background points, and doing crossvalidation (which is the same as `work2`, but explicitely using the separate modules.) - -The effect of `Chain` depends on the module type: -+`occurrence`: All data from chained modules are combined. -+`covariate`: All raster data from chained modules are stacked. -+`process`: The processes are run sequentially, the output of one going into the next. -+`model`: Model modules cannot be chained. -+`output`: Each output module that is chained is run separately on the output from other modules. - -`Chain` can be used on as many module type as is required. - -### Multiple modules with list - -If you want to run separate analyses that can then be compared for example, specifiy a list of modules. - -```{r list, warning = FALSE} -work4 <- workflow(occurrence = UKAnophelesPlumbeus, - covariate = UKAir, - process = OneHundredBackground, - model = list(LogisticRegression, RandomForest), - output = SameTimePlaceMap) - -str(work4, 1) -``` -Here, the analysis is split into two and both logistic regression and random forest (a machine learning algorithm) are used to model the data. Looking at the structure of the output we can see that the output from the first three modules are a list of length one. When the analysis splits into two, the output of the modules (in `work4$model.output` and `work4$report`) is then a list of length two. One for each branch of the split analysis. - - - -### A larger example - -Here is an example of a larger analysis. - -```{r largeAnalysis, cache = TRUE, warning = FALSE} -work5 <- workflow(occurrence = Chain(SpOcc(species = 'Eresus kollari', - extent = c(-10, 10, 45, 65)), - SpOcc(species = 'Eresus sandaliatus', - extent = c(-10, 10, 45, 65))), - - covariate = UKAir, - - process = BackgroundAndCrossvalid(k = 2), - - model = list(LogisticRegression, RandomForest), - - output = Chain(SameTimePlaceMap, PerformanceMeasures) - ) - -str(work5, 1) - - -par(mfrow=c(1,2)) -plot(work5$report[[1]][[1]], - main = paste('Logistic Regression: AUC = ', - round(work5$report[[1]][[2]]$auc, 2))) -plot(work5$report[[2]][[1]], - main = paste('Random forest: AUC = ', - round(work5$report[[2]][[2]]$auc, 2))) -``` - -Here we are collecting occurrence data for two species, _Eresus kollari_ and _E. sandaliatus_ and combining them (having presumably decided that this is ecologically appropriate.) We are using the air temperature data from NCEP again. We are sampling 100 pseudo absence points and running two fold crossvalidation. - -We run logistic regression and random forest on the data separately. We then predict the model back over the extent of our environmental data and calculate some measures of how good the models are. Collating the output into one plot we can see the very different forms of the models and can see that the random forest has a higher AUC (implying it predicts the data better.) - - - - - - - - diff --git a/inst/doc/basic-zoon-usage.html b/inst/doc/basic-zoon-usage.html index 8998e6e..5d8aee0 100644 --- a/inst/doc/basic-zoon-usage.html +++ b/inst/doc/basic-zoon-usage.html @@ -1,268 +1,111 @@ - - - - -An Introduction to the <strong>zoon</strong> package - + - - + + + - - - - - - +
+ - - - - - -

An Introduction to the zoon package

- -

Zoon is a package to aid reproducibility and between-model comparisons in species distribution modelling. Each step in an analysis is a 'module'. These modules will include:

- +
+
+
+

An Introduction to the zoon package

+

Zoon is a package to aid reproducibility and between-model comparisons in species distribution modelling. Each step in an analysis is a ‘module’. These modules will include: + Data collection of occurrence and environmental covariate data from online databases. + Process steps such as removal of spatial autocorrelation in the data or generation of background pseudoabsences. + The fitting of models. + Model output including diagnostics, reports and vizualisation.

+

Getting set up

-

First install from github

- -
library(devtools)
-install_github('zoonproject/zoon')
-
- +
library(devtools)
+install_github('zoonproject/zoon')

and load

- -
library(zoon)
-
- -
## Loading required package: raster
-## Loading required package: sp
-
- +
library(zoon)
+
+

Basic usage

-

A basic worklow is run using the workflow function. We must chose a module for each type: occurrence, covariate, process, model and output.

- -
work1 <- workflow(occurrence = UKAnophelesPlumbeus,
+
work1 <- workflow(occurrence = UKAnophelesPlumbeus,
                   covariate  = UKAir,
                   process    = OneHundredBackground,
                   model      = RandomForest,
-                  output     = PrintMap)
-
- -
## Loading required package: dismo
-## Loading required package: randomForest
-## randomForest 4.6-10
-## Type rfNews() to see new features/changes/bug fixes.
-
- -

plot of chunk basic

- -
class(work1)
-
- -
## [1] "zoonWorkflow"
-
- -
str(work1, 1)
-
- -
## List of 7
+                  output     = PrintMap)
+

+
class(work1)
+
## [1] "zoonWorkflow"
+
str(work1, 1)
+
## List of 9
 ##  $ occurrence.output:List of 1
 ##  $ covariate.output :List of 1
 ##  $ process.output   :List of 1
@@ -270,118 +113,80 @@ 

Basic usage

## $ report :List of 1 ## $ call : chr "workflow(occurrence = UKAnophelesPlumbeus, covariate = UKAir, process = OneHundredBackground, model = RandomForest, output = Pr"| __truncated__ ## $ call.list :List of 5 -## - attr(*, "class")= chr "zoonWorkflow" -
- -

In this case we are using the following modules which do the following things:

- -
    -
  • UKAnophelesPlumbeus: Uses occurrence points of Anopheles plumbeus in the UK collected from GBIF
  • -
  • UKAir: Uses NCEP air temperature data for the UK
  • -
  • OneHundredBackground: Randomly creates 100 pseudoabsence or background datapoints
  • -
  • LogisticRegression: Run a random forest to model the relationship between A. plumbeus and air temperature
  • -
  • PrintMap: Predicts the model across the whole of the UK and prints to graphics device.
  • -
- -

For output we get an object of class “zoonWorkflow”. This object is basically a big list with all the data, models and output we collected and created in our analysis.

- +## $ session.info :List of 7 +## ..- attr(*, "class")= chr "sessionInfo" +## $ module.versions :List of 5 +## - attr(*, "class")= chr "zoonWorkflow"
+

In this case we are using the following modules which do the following things: + UKAnophelesPlumbeus: Uses occurrence points of Anopheles plumbeus in the UK collected from GBIF + UKAir: Uses NCEP air temperature data for the UK + OneHundredBackground: Randomly creates 100 pseudoabsence or background datapoints + LogisticRegression: Run a random forest to model the relationship between A. plumbeus and air temperature + PrintMap: Predicts the model across the whole of the UK and prints to graphics device.

+

For output we get an object of class “zoonWorkflow”. This object is basically a big list with all the data, models and output we collected and created in our analysis.

+
+

Getting Help

-

To find a list of modules available on the online repository use

- -
GetModuleList()
-
- +
GetModuleList()

To find help on a specific module use

- -
ModuleHelp(LogisticRegression)
-
- -

Note that you can't use ? as the modules are held on a repository. Therefore the module documentation files are not included with the basic zoon install.

- +
ModuleHelp(LogisticRegression)
+

Note that you can’t use ? as the modules are held on a repository. Therefore the module documentation files are not included with the basic zoon install.

+
+

More complex analyses

- -

The syntax for including arguments to modules is simply ModuleName(parameter = 'value'). For example, to do two fold crossvalidation we do

- -
work2 <- workflow(occurrence = UKAnophelesPlumbeus,
+

The syntax for including arguments to modules is simply ModuleName(parameter = 'value'). For example, to do two fold crossvalidation we do

+
work2 <- workflow(occurrence = UKAnophelesPlumbeus,
                   covariate  = UKAir,
                   process    = BackgroundAndCrossvalid(k = 2),
                   model      = LogisticRegression,
-                  output     = PerformanceMeasures)
-
- + output = PerformanceMeasures)
## Loading required package: SDMTools
 ## 
-## Attaching package: 'SDMTools'
+## Attaching package: 'SDMTools'
 ## 
-## The following object is masked from 'package:raster':
+## The following object is masked from 'package:raster':
 ## 
 ##     distance
 ## 
 ## Model performance measures:
-## auc :  0.6699829261886
-## kappa :  0.411378555798687
+## auc :  0.679209351195167
+## kappa :  0.464271488887118
 ## omissions :  0
 ## sensitivity :  1
-## specificity :  0.333333333333333
-## proportionCorrect :  0.799256505576208
-## 
-
- +## specificity : 0.382716049382716 +## proportionCorrect : 0.814126394052045 +##

Here we are providing an argument to the module BackgroundAndCrossvalid. We are setting k (the number of cross validation folds) to 2.

-

We are using an output module PerformanceMeasures which calculates a number of measures of the effectiveness of our model: AUC, kappa, sensitivity, specificity etc.

- +

Multiple modules with Chain

-

We might want to combine multiple modules in our analysis. For this we use the function Chain.

- -
work3 <- workflow(occurrence = UKAnophelesPlumbeus,
+
work3 <- workflow(occurrence = UKAnophelesPlumbeus,
                   covariate  = UKAir,
                   process    = Chain(OneHundredBackground, Crossvalidate),
                   model      = LogisticRegression,
-                  output     = PerformanceMeasures)
-
- + output = PerformanceMeasures)
## Warning in OneHundredBackground(.data = structure(list(df = structure(list(: There are fewer than 100 cells in the environmental raster.
-## Using all available cells (81) instead
-
- +## Using all available cells (81) instead
## Model performance measures:
-## auc :  0.665977147360126
-## kappa :  0.464271488887118
+## auc :  0.671854478592067
+## kappa :  0.397867404664035
 ## omissions :  0
 ## sensitivity :  1
-## specificity :  0.382716049382716
-## proportionCorrect :  0.814126394052045
-## 
-
- +## specificity : 0.320987654320988 +## proportionCorrect : 0.795539033457249 +##

Here we drawing some pseudoabsence background points, and doing crossvalidation (which is the same as work2, but explicitely using the separate modules.)

- -

The effect of Chain depends on the module type: -+occurrence: All data from chained modules are combined. -+covariate: All raster data from chained modules are stacked. -+process: The processes are run sequentially, the output of one going into the next. -+model: Model modules cannot be chained. -+output: Each output module that is chained is run separately on the output from other modules.

- +

The effect of Chain depends on the module type: +occurrence: All data from chained modules are combined. +covariate: All raster data from chained modules are stacked. +process: The processes are run sequentially, the output of one going into the next. +model: Model modules cannot be chained. +output: Each output module that is chained is run separately on the output from other modules.

Chain can be used on as many module type as is required.

- +
+

Multiple modules with list

-

If you want to run separate analyses that can then be compared for example, specifiy a list of modules.

- -
work4 <- workflow(occurrence = UKAnophelesPlumbeus,
+
work4 <- workflow(occurrence = UKAnophelesPlumbeus,
                   covariate  = UKAir,
                   process    = OneHundredBackground,
                   model      = list(LogisticRegression, RandomForest),
                   output     = SameTimePlaceMap)
 
-str(work4, 1)
-
- -
## List of 7
+str(work4, 1)
+
## List of 9
 ##  $ occurrence.output:List of 1
 ##  $ covariate.output :List of 1
 ##  $ process.output   :List of 1
@@ -389,20 +194,20 @@ 

Multiple modules with list

## $ report :List of 2 ## $ call : chr "workflow(occurrence = UKAnophelesPlumbeus, covariate = UKAir, process = OneHundredBackground, model = list(LogisticRegression, "| __truncated__ ## $ call.list :List of 5 -## - attr(*, "class")= chr "zoonWorkflow" -
- +## $ session.info :List of 7 +## ..- attr(*, "class")= chr "sessionInfo" +## $ module.versions :List of 5 +## - attr(*, "class")= chr "zoonWorkflow"

Here, the analysis is split into two and both logistic regression and random forest (a machine learning algorithm) are used to model the data. Looking at the structure of the output we can see that the output from the first three modules are a list of length one. When the analysis splits into two, the output of the modules (in work4$model.output and work4$report) is then a list of length two. One for each branch of the split analysis.

- +
+

A larger example

-

Here is an example of a larger analysis.

- -
work5 <- workflow(occurrence = Chain(SpOcc(species = 'Eresus kollari', 
+
work5 <- workflow(occurrence = Chain(SpOcc(species = 'Eresus kollari', 
                                        extent = c(-10, 10, 45, 65)),
-                                     SpOcc(species = 'Eresus sandaliatus', 
+                                     SpOcc(species = 'Eresus sandaliatus', 
                                        extent = c(-10, 10, 45, 65))),
-
+ 
                   covariate  = UKAir,
 
                   process    = BackgroundAndCrossvalid(k = 2),
@@ -410,12 +215,25 @@ 

A larger example

model = list(LogisticRegression, RandomForest), output = Chain(SameTimePlaceMap, PerformanceMeasures) - ) - -str(work5, 1) -
- -
## List of 7
+         )
+
## Model performance measures:
+## auc :  0.588449179773381
+## kappa :  -0.00740230676536421
+## omissions :  0.698630136986301
+## sensitivity :  0.301369863013699
+## specificity :  0.691358024691358
+## proportionCorrect :  0.506493506493506
+##  
+## Model performance measures:
+## auc :  0.896922036191443
+## kappa :  0.688731682668014
+## omissions :  0.123287671232877
+## sensitivity :  0.876712328767123
+## specificity :  0.814814814814815
+## proportionCorrect :  0.844155844155844
+## 
+
str(work5, 1)
+
## List of 9
 ##  $ occurrence.output:List of 1
 ##  $ covariate.output :List of 1
 ##  $ process.output   :List of 1
@@ -423,24 +241,45 @@ 

A larger example

## $ report :List of 2 ## $ call : chr "workflow(occurrence = Chain(SpOcc(species = \"Eresus kollari\", extent = c(-10, 10, 45, 65)), SpOcc(species = \"Eresus san"| __truncated__ ## $ call.list :List of 5 -## - attr(*, "class")= chr "zoonWorkflow" -
- -
par(mfrow=c(1,2))
+##  $ session.info     :List of 7
+##   ..- attr(*, "class")= chr "sessionInfo"
+##  $ module.versions  :List of 5
+##  - attr(*, "class")= chr "zoonWorkflow"
+
par(mfrow=c(1,2))
 plot(work5$report[[1]][[1]], 
-  main = paste('Logistic Regression: AUC = ', 
+  main = paste('Logistic Regression: AUC = ', 
              round(work5$report[[1]][[2]]$auc, 2)))
 plot(work5$report[[2]][[1]],
-  main = paste('Random forest: AUC = ', 
-             round(work5$report[[2]][[2]]$auc, 2)))
-
+ main = paste('Random forest: AUC = ', + round(work5$report[[2]][[2]]$auc, 2)))
+

+

Here we are collecting occurrence data for two species, Eresus kollari and E. sandaliatus and combining them (having presumably decided that this is ecologically appropriate.) We are using the air temperature data from NCEP again. We are sampling 100 pseudo absence points and running two fold crossvalidation.

+

We run logistic regression and random forest on the data separately. We then predict the model back over the extent of our environmental data and calculate some measures of how good the models are. Collating the output into one plot we can see the very different forms of the models and can see that the random forest has a higher AUC (implying it predicts the data better.)

+
+
+
-

plot of chunk largeAnalysis

-

Here we are collecting occurrence data for two species, Eresus kollari and E. sandaliatus and combining them (having presumably decided that this is ecologically appropriate.) We are using the air temperature data from NCEP again. We are sampling 100 pseudo absence points and running two fold crossvalidation.

+
-

We run logistic regression and random forest on the data separately. We then predict the model back over the extent of our environmental data and calculate some measures of how good the models are. Collating the output into one plot we can see the very different forms of the models and can see that the random forest has a higher AUC (implying it predicts the data better.)

+ + + + + diff --git a/inst/doc/interactive_zoon_usage.R b/inst/doc/interactive_zoon_usage.R deleted file mode 100644 index cd2f800..0000000 --- a/inst/doc/interactive_zoon_usage.R +++ /dev/null @@ -1,42 +0,0 @@ -## ----setup, include=FALSE------------------------------------------------ -library(knitr) -opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") - -## ----packages------------------------------------------------------------ -library(dismo) -library(zoon) - -## ----noninteractive------------------------------------------------------ -w <- workflow(UKAnophelesPlumbeus, UKAir, OneHundredBackground, LogisticRegression, PrintMap) - -## ----LoadModules--------------------------------------------------------- -LoadModule('UKAnophelesPlumbeus') -LoadModule('UKAir') -LoadModule('OneHundredBackground') -LoadModule('LogisticRegression') -LoadModule('PrintMap') - -## ----runDataMods--------------------------------------------------------- -oc <- UKAnophelesPlumbeus() -cov <- UKAir() - -## ----extract------------------------------------------------------------- -data <- zoon:::ExtractAndCombData(oc, cov) - -## ----procAndModel-------------------------------------------------------- -proc <- OneHundredBackground(data) - -mod <- LogisticRegression(proc$df) - -## ----output-------------------------------------------------------------- -model <- list(model = mod, data = proc$df) - -out <- PrintMap(model, cov) - -## ----cross validation---------------------------------------------------- -modCrossvalid <- zoon:::RunModels(proc$df, 'LogisticRegression', list(), environment()) - -modelCrossvalid <- list(model = modCrossvalid$model, data = proc$df) - -out <- PrintMap(modelCrossvalid, cov) - diff --git a/inst/doc/interactive_zoon_usage.Rmd b/inst/doc/interactive_zoon_usage.Rmd deleted file mode 100644 index 65a65b9..0000000 --- a/inst/doc/interactive_zoon_usage.Rmd +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: "Interactive zoon usage (for devs)" -author: "Tim Lucas" -date: "`r Sys.Date()`" -output: - html_vignette: - theme: peaksea - highlight: zenburn - pdf_document: - theme: peaksea - highlight: zenburn ---- - - - -```{r setup, include=FALSE} -library(knitr) -opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") -``` - - -# Using zoon modules interactively. - -While the point of zoon is to run full workflows which are then reproducible, during development of modules it can be useful to run individual modules in the same way you would run normal R functions. - -It is not entirely simple to do this, so this vignette just clarifies how. - -First load packages. You need to explicitely load Dismo as we are now going to use Dismo functions outside the zoon environment. -```{r packages} -library(dismo) -library(zoon) -``` - -This is the workflow we will run. It might be worth running it here to make sure there are no problems. - -```{r noninteractive} -w <- workflow(UKAnophelesPlumbeus, UKAir, OneHundredBackground, LogisticRegression, PrintMap) -``` - -It's worth noting that this is a simple workflow. Chaining modules will be fairly easy but depends on the module type. Workflows using list() are likely to not be easy. - -Get the modules from the zoon repository and load them into the working environment. -```{r LoadModules} -LoadModule('UKAnophelesPlumbeus') -LoadModule('UKAir') -LoadModule('OneHundredBackground') -LoadModule('LogisticRegression') -LoadModule('PrintMap') -``` - -Run the data modules. To chain occurrence modules, just `rbind` the resulting dataframes. To chain covariate modules, use `raster::stack` to combine the covariate data. -```{r runDataMods} -oc <- UKAnophelesPlumbeus() -cov <- UKAir() -``` - - -We have to run `zoon:::ExtractAndCombData`. This combines the occurrence and raster data. -```{r extract} -data <- zoon:::ExtractAndCombData(oc, cov) -``` - -Next run the process and model modules. To chain process models, simply run each in turn with the output of one going into the next. The simple way to run model modules is to use the module function as below. If crossvalidation is important then you need to run the modules slightly differently (see below). -```{r procAndModel} -proc <- OneHundredBackground(data) - -mod <- LogisticRegression(proc$df) -``` - -Finally, combine some output into a list and run the output modules. -```{r output} -model <- list(model = mod, data = proc$df) - -out <- PrintMap(model, cov) -``` - -## Cross and external validation - -Crossvalidation requires the modules to be run using the function `zoon:::RunModels` which runs the model on each fold of the crossvalidating data and predicts the remaining data. It also runs a model and predicts any external validation data. - -```{r cross validation} -modCrossvalid <- zoon:::RunModels(proc$df, 'LogisticRegression', list(), environment()) - -modelCrossvalid <- list(model = modCrossvalid$model, data = proc$df) - -out <- PrintMap(modelCrossvalid, cov) -``` - -## Running workflows with list. - -As mentioned above, workflows using list() are likely to not be easy, but then these aren't particularly required while developing a package. To run workflows using list, it would be best to use `LoadModule` as above and then run through the `workflow` source code interactively. diff --git a/inst/doc/interactive_zoon_usage.html b/inst/doc/interactive_zoon_usage.html index de337ae..4ae56cf 100644 --- a/inst/doc/interactive_zoon_usage.html +++ b/inst/doc/interactive_zoon_usage.html @@ -1,317 +1,157 @@ - - - -Using zoon modules interactively. + - - - - + + + - - - - - - +
+ - - - - + +

Using zoon modules interactively.

- -

While the point of zoon is to run full workflows which are then reproducible, during development of modules it can be useful to run individual modules in the same way you would run normal R functions.

- -

It is not entirely simple to do this, so this vignette just clarifies how.

- +

While the point of zoon is to run full workflows which are then reproducible, during development of modules it can be useful to run individual modules in the same way you would run normal R functions.

+

It is not entirely simple to do this, so this vignette just clarifies how.

First load packages. You need to explicitely load Dismo as we are now going to use Dismo functions outside the zoon environment.

- -
library(dismo)
-library(zoon)
-
- -

This is the workflow we will run. It might be worth running it here to make sure there are no problems.

- -
w <- workflow(UKAnophelesPlumbeus, UKAir, OneHundredBackground, LogisticRegression, PrintMap)
-
- +
library(dismo)
+library(zoon)
+

This is the workflow we will run. It might be worth running it here to make sure there are no problems.

+
w <- workflow(UKAnophelesPlumbeus, UKAir, OneHundredBackground, LogisticRegression, PrintMap)
## Warning in OneHundredBackground(.data = structure(list(df = structure(list(: There are fewer than 100 cells in the environmental raster.
-## Using all available cells (81) instead
-
- -

plot of chunk noninteractive

- -

It's worth noting that this is a simple workflow. Chaining modules will be fairly easy but depends on the module type. Workflows using list() are likely to not be easy.

- +## Using all available cells (81) instead +

+

It’s worth noting that this is a simple workflow. Chaining modules will be fairly easy but depends on the module type. Workflows using list() are likely to not be easy.

Get the modules from the zoon repository and load them into the working environment.

- -
LoadModule('UKAnophelesPlumbeus')
-
- -
## [1] "UKAnophelesPlumbeus"
-
- -
LoadModule('UKAir')
-
- -
## [1] "UKAir"
-
- -
LoadModule('OneHundredBackground')
-
- -
## [1] "OneHundredBackground"
-
- -
LoadModule('LogisticRegression')
-
- -
## [1] "LogisticRegression"
-
- -
LoadModule('PrintMap')
-
- -
## [1] "PrintMap"
-
- +
LoadModule('UKAnophelesPlumbeus')
+
## [1] "UKAnophelesPlumbeus"
+
LoadModule('UKAir')
+
## [1] "UKAir"
+
LoadModule('OneHundredBackground')
+
## [1] "OneHundredBackground"
+
LoadModule('LogisticRegression')
+
## [1] "LogisticRegression"
+
LoadModule('PrintMap')
+
## [1] "PrintMap"

Run the data modules. To chain occurrence modules, just rbind the resulting dataframes. To chain covariate modules, use raster::stack to combine the covariate data.

- -
oc <- UKAnophelesPlumbeus()
-cov <- UKAir()
-
- +
oc <- UKAnophelesPlumbeus()
+cov <- UKAir()

We have to run zoon:::ExtractAndCombData. This combines the occurrence and raster data.

- -
data <- zoon:::ExtractAndCombData(oc, cov)
-
- +
data <- zoon:::ExtractAndCombData(oc, cov)

Next run the process and model modules. To chain process models, simply run each in turn with the output of one going into the next. The simple way to run model modules is to use the module function as below. If crossvalidation is important then you need to run the modules slightly differently (see below).

- -
proc <- OneHundredBackground(data)
-
- +
proc <- OneHundredBackground(data)
## Warning in OneHundredBackground(data): There are fewer than 100 cells in the environmental raster.
-## Using all available cells (81) instead
-
- -
mod <- LogisticRegression(proc$df)
-
- +## Using all available cells (81) instead +
mod <- LogisticRegression(proc$df)

Finally, combine some output into a list and run the output modules.

+
model <- list(model = mod, data = proc$df)
 
-
model <- list(model = mod, data = proc$df)
-
-out <- PrintMap(model, cov)
-
- -

plot of chunk output

- +out <- PrintMap(model, cov)
+

+

Cross and external validation

-

Crossvalidation requires the modules to be run using the function zoon:::RunModels which runs the model on each fold of the crossvalidating data and predicts the remaining data. It also runs a model and predicts any external validation data.

- -
modCrossvalid <- zoon:::RunModels(proc$df, 'LogisticRegression', list(), environment())
+
modCrossvalid <- zoon:::RunModels(proc$df, 'LogisticRegression', list(), environment())
 
 modelCrossvalid <- list(model = modCrossvalid$model, data = proc$df)
 
-out <- PrintMap(modelCrossvalid, cov)
-
+out <- PrintMap(modelCrossvalid, cov)
+

+
+
+

Running workflows with list.

+

As mentioned above, workflows using list() are likely to not be easy, but then these aren’t particularly required while developing a package. To run workflows using list, it would be best to use LoadModule as above and then run through the workflow source code interactively.

+
+
-

plot of chunk cross validation

-

Running workflows with list.

+
-

As mentioned above, workflows using list() are likely to not be easy, but then these aren't particularly required while developing a package. To run workflows using list, it would be best to use LoadModule as above and then run through the workflow source code interactively.

+ + + + + diff --git a/man/GetModuleList.Rd b/man/GetModuleList.Rd index e1922f1..205a1ca 100644 --- a/man/GetModuleList.Rd +++ b/man/GetModuleList.Rd @@ -15,7 +15,13 @@ A list with all module names. \description{ Get a list of all the modules available on the github repo. } +\details{ +This function will only work on a platform that supports the +method 'libcurl' in the function url. This can be tested using the function +\code{capabilities} (see example). +} \examples{ -\dontrun{GetModuleList()} +# GetModuleList requires libcurl to be supported +if(capabilities('libcurl')) GetModuleList() } diff --git a/man/GetPackage.Rd b/man/GetPackage.Rd index 4ad9620..507d247 100644 --- a/man/GetPackage.Rd +++ b/man/GetPackage.Rd @@ -16,9 +16,6 @@ load the package if it exists, else install it from CRAN and then load } \examples{ - -\dontrun{ GetPackage('gam') } -} diff --git a/man/RerunWorkflow.Rd b/man/RerunWorkflow.Rd index f15fcf5..cea415d 100644 --- a/man/RerunWorkflow.Rd +++ b/man/RerunWorkflow.Rd @@ -22,7 +22,8 @@ Takes a workflow object and reruns it. } \examples{ \dontrun{ -w <- workflow(UKAnophelesPlumbeus, UKAir, +w <- workflow(UKAnophelesPlumbeus, + UKAir, OneHundredBackground, LogisticRegression, SameTimePlaceMap) diff --git a/vignettes/Building_a_module.Rmd b/vignettes/Building_a_module.Rmd index 03a1d62..381335d 100644 --- a/vignettes/Building_a_module.Rmd +++ b/vignettes/Building_a_module.Rmd @@ -5,11 +5,11 @@ date: '`r Sys.Date()`' output: html_document: toc: yes +vignette: > + %\VignetteIndexEntry{Building modules} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} --- - ```{r, echo = FALSE} require(zoon, quietly = TRUE) @@ -412,10 +412,10 @@ Next we specify the packages our function needs. These should be specified by us ```{r, eval = FALSE} # Specify the packages we need using the function # GetPackage -zoon:::GetPackage("gam") +zoon::GetPackage("gam") ``` -Next we can add the code that does our modelling, here we create a simple GAM (Generalised Additive Model) using the package [gam](https://cran.r-project.org/web/packages/gam/index.html) +Next we can add the code that does our modelling, here we create a simple GAM (Generalised Additive Model) using the package [gam](https://cran.r-project.org/package=gam) ```{r, eval = FALSE} # Create a data.frame of covariate data @@ -483,7 +483,7 @@ GamGam <- function(.df){ # Specify the packages we need using the function # GetPackage - zoon:::GetPackage("gam") + zoon::GetPackage("gam") # Create a data.frame of covariate data covs <- as.data.frame(.df[, 6:ncol(.df)]) @@ -605,7 +605,10 @@ PredictNewRasterMap <- function(.model, .ras, raster = .ras){ It is important to have default values for all user defined parameters so that your module can be tested when you upload it to the zoon website. Here we set our default 'new area' raster to be the same as the raster used to create the model. Clearly this is not how we envisage the module being used in a real application (unless they genuinely wanted to predict back to the same area), however this ensures that this module will always work with its default arguments, no matter what workflow it is placed in. ```{r, eval = FALSE} -# The first step is to extract the covariate values +# The first step is to load in the packages we need +zoon::GetPackage(raster) + +# Then extract the covariate values # from the user provided raster vals <- data.frame(getValues(raster)) colnames(vals) <- names(raster) @@ -641,6 +644,8 @@ Our function now looks like this: ```{r} PredictNewRasterMap <- function(.model, .ras, raster = .ras){ + zoon::GetPackage(raster) + # Extract the values from the user provided raster vals <- data.frame(getValues(raster)) colnames(vals) <- names(raster) @@ -802,6 +807,13 @@ Out: Anything! ![ModelModule](modelInOut.svg) ![OuputModule](outputInOut.svg) - +```{r, echo = FALSE} +# Clean up +unlink('AustraliaAir.R') +unlink('ClipOccurrence.R') +unlink('GamGam.R') +unlink('Lorem_ipsum_UK.R') +unlink('PredictNewRasterMap.R') +``` diff --git a/vignettes/basic-zoon-usage.Rmd b/vignettes/basic-zoon-usage.Rmd index a62b478..975885f 100644 --- a/vignettes/basic-zoon-usage.Rmd +++ b/vignettes/basic-zoon-usage.Rmd @@ -2,22 +2,17 @@ title: "Basic zoon usage" author: "Tim Lucas" date: "`r Sys.Date()`" -output: - html_vignette: - theme: peaksea - highlight: zenburn - pdf_document: - theme: peaksea - highlight: zenburn +output: + html_document: + toc: yes +vignette: > + %\VignetteIndexEntry{Basic zoon usage} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} --- - - -```{r setup, include=FALSE} +```{r, include=FALSE} library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") ``` @@ -25,7 +20,6 @@ opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="cente An Introduction to the **zoon** package ======================================= - **Zoon** is a package to aid reproducibility and between-model comparisons in species distribution modelling. Each step in an analysis is a 'module'. These modules will include: + Data collection of **occurrence** and environmental **covariate** data from online databases. + **Process** steps such as removal of spatial autocorrelation in the data or generation of background pseudoabsences. @@ -38,14 +32,14 @@ Getting set up ---------------------------- First install from github -```{r methods, eval = FALSE} +```{r, eval = FALSE} library(devtools) install_github('zoonproject/zoon') ``` and load -```{r load} +```{r} library(zoon) ``` @@ -54,7 +48,7 @@ Basic usage A basic worklow is run using the `workflow` function. We must chose a module for each type: occurrence, covariate, process, model and output. -```{r basic, warning = FALSE} +```{r, warning = FALSE} work1 <- workflow(occurrence = UKAnophelesPlumbeus, covariate = UKAir, process = OneHundredBackground, @@ -79,13 +73,13 @@ Getting Help To find a list of modules available on the online repository use -```{r getmodlist, eval = FALSE} +```{r, eval = FALSE} GetModuleList() ``` To find help on a specific module use -```{r help, eval = FALSE} +```{r, eval = FALSE} ModuleHelp(LogisticRegression) ``` Note that you can't use `?` as the modules are held on a repository. Therefore the module documentation files are not included with the basic zoon install. @@ -97,7 +91,7 @@ More complex analyses The syntax for including arguments to modules is simply `ModuleName(parameter = 'value')`. For example, to do two fold crossvalidation we do -```{r args, warning = FALSE} +```{r, warning = FALSE} work2 <- workflow(occurrence = UKAnophelesPlumbeus, covariate = UKAir, process = BackgroundAndCrossvalid(k = 2), @@ -114,7 +108,7 @@ We are using an output module `PerformanceMeasures` which calculates a number of We might want to combine multiple modules in our analysis. For this we use the function Chain. -```{r chain, warnings = FALSE} +```{r, warnings = FALSE} work3 <- workflow(occurrence = UKAnophelesPlumbeus, covariate = UKAir, process = Chain(OneHundredBackground, Crossvalidate), @@ -136,7 +130,7 @@ The effect of `Chain` depends on the module type: If you want to run separate analyses that can then be compared for example, specifiy a list of modules. -```{r list, warning = FALSE} +```{r, warning = FALSE} work4 <- workflow(occurrence = UKAnophelesPlumbeus, covariate = UKAir, process = OneHundredBackground, @@ -153,7 +147,7 @@ Here, the analysis is split into two and both logistic regression and random for Here is an example of a larger analysis. -```{r largeAnalysis, cache = TRUE, warning = FALSE} +```{r, warning = FALSE} work5 <- workflow(occurrence = Chain(SpOcc(species = 'Eresus kollari', extent = c(-10, 10, 45, 65)), SpOcc(species = 'Eresus sandaliatus', diff --git a/vignettes/interactive_zoon_usage.Rmd b/vignettes/interactive_zoon_usage.Rmd index 65a65b9..97354d5 100644 --- a/vignettes/interactive_zoon_usage.Rmd +++ b/vignettes/interactive_zoon_usage.Rmd @@ -2,18 +2,14 @@ title: "Interactive zoon usage (for devs)" author: "Tim Lucas" date: "`r Sys.Date()`" -output: - html_vignette: - theme: peaksea - highlight: zenburn - pdf_document: - theme: peaksea - highlight: zenburn +output: + html_document: + toc: yes +vignette: > + %\VignetteIndexEntry{Interactive zoon usage} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} --- - ```{r setup, include=FALSE} @@ -21,7 +17,6 @@ library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") ``` - # Using zoon modules interactively. While the point of zoon is to run full workflows which are then reproducible, during development of modules it can be useful to run individual modules in the same way you would run normal R functions.