Merge pull request #42 from goldingn/journal_edits

Journal edits
zoonproject · Jul 26, 2017 · 313690b · 313690b
2 parents 89300b0 + 78549ba
commit 313690b
Show file tree

Hide file tree

Showing 25 changed files with 530 additions and 82 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,8 +8,9 @@ spThin.R
 .Rproj.user
 zoon_app_paper.Rproj
 
-# pdf doc
+# output formats
 ms.pdf
+ms.docx
 
 # powerpoint docs for generating figures
 *.pptx

diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,3 @@
+YEAR: 2017
+COPYRIGHT HOLDER: Nick Golding
+ORGANIZATION: Zoon project
diff --git a/compile.sh b/compile.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+pandoc -s -S -V geometry:margin=1in -H response_header.tex response_to_reviewers.md -o response_to_reviewers.pdf
+
diff --git a/figs/combinefengupdate-1.png b/figs/combinefengupdate-1.png
diff --git a/figs/fengChangeWorkflow-1.png b/figs/fengChangeWorkflow-1.png
diff --git a/figs/fengworkflow-1.png b/figs/fengworkflow-1.png
diff --git a/figs/maxent_buffer-1.png b/figs/maxent_buffer-1.png
diff --git a/figs/maxent_buffer-2.png b/figs/maxent_buffer-2.png
diff --git a/figs/maxent_buffer-3.png b/figs/maxent_buffer-3.png
diff --git a/figs/maxent_buffer-4.png b/figs/maxent_buffer-4.png
diff --git a/figs/maxent_buffer-5.png b/figs/maxent_buffer-5.png
diff --git a/figs/maxent_buffer-6.png b/figs/maxent_buffer-6.png
diff --git a/figs/maxent_comparison-1.png b/figs/maxent_comparison-1.png
diff --git a/figs/maxent_comparison-2.png b/figs/maxent_comparison-2.png
diff --git a/figs/maxent_comparison-3.png b/figs/maxent_comparison-3.png
diff --git a/figs/maxent_comparison-4.png b/figs/maxent_comparison-4.png
diff --git a/figs/maxent_comparison-5.png b/figs/maxent_comparison-5.png
diff --git a/figs/maxent_comparison-6.png b/figs/maxent_comparison-6.png
diff --git a/figs/maxent_plotting-1.png b/figs/maxent_plotting-1.png
diff --git a/ms.R b/ms.R
@@ -0,0 +1,279 @@
+## ----knitrOpts, echo = FALSE, cache = FALSE, eval = TRUE-----------------
+# set up knitr options
+knitr::opts_chunk$set(fig.path = 'figs/',
+               message = FALSE,
+               warning = FALSE,
+               fig.align = 'center',
+               dev = c('png'),
+               cache = TRUE)
+
+## ----raster_dir, echo = FALSE, cache = FALSE, eval = TRUE----------------
+# set a place for the rasters to be downloaded to (to stop them being grabbed
+# again each time)
+raster_dir <- './raster_data'
+if (!dir.exists(raster_dir)) dir.create(raster_dir)
+options("rasterDataDir" = raster_dir)
+
+## ----plotworkflow, eval = FALSE, echo = TRUE-----------------------------
+## mosquito1 <- workflow(occurrence = UKAnophelesPlumbeus,
+##                       covariate  = UKBioclim,
+##                       process    = Background(n = 500),
+##                       model      = MaxEnt,
+##                       output     = InteractiveMap)
+
+## ----workflow2, eval = FALSE, echo = TRUE--------------------------------
+## mosquito2 <- workflow(occurrence = UKAnophelesPlumbeus,
+##                       covariate  = UKBioclim,
+##                       process    = Chain(Background(n = 500),
+##                                          StandardiseCov),
+##                       model      = list(MaxEnt,
+##                                         GBM,
+##                                         RandomForest),
+##                       output     = Appify)
+
+## ----loadzoon, eval = TRUE, echo = FALSE, cache = FALSE------------------
+# Keep cache = FALSE. Not supposed to cache chunks with library()
+#  I assume we want this not echoed.
+library(zoon)
+
+set.seed(1633)
+
+
+## ----fengworkflow, eval = TRUE, dpi = 600, fig.show = "hide", results = "hide", message = FALSE, fig.height = 7, fig.width = 9----
+FengPapes <- 
+  workflow(occurrence = SpOcc('Dasypus novemcinctus',
+                              extent = c(-130, -20, -60, 60)),
+           covariate = Bioclim(extent = c(-130, -20, -60, 60),
+                               layers = c(1:4, 6, 9, 10, 12, 15)),
+           process = Chain(Clean, 
+                           MESSMask,  
+                           Background(n = 10000,
+                                      bias = 200), 
+                           Crossvalidate(k = 5)),
+           model = MaxEnt,
+           output = PrintMap(points = FALSE,
+                             threshold = 0.05,
+                             thresholdmethod = 'falsenegative',
+                             xlim = c(-130, -70),
+                             ylim = c(20, 50)))
+
+## ----fengChangeWorkflow, eval = TRUE, fig.show = "hide", fig.height = 9, fig.width = 7.5, dev.args = list(pointsize = 16)----
+FengPapesUpdate <- 
+  ChangeWorkflow(workflow = FengPapes,
+                   output = Chain(ResponseCurve(cov = 1),
+                                  InteractiveMap)) 
+
+## ----combinefengupdate, eval = TRUE, echo = FALSE, dpi = 600, fig.show = "hide", fig.height = 4.5, fig.width = 4.5----
+# combine the static and interactive maps and first effect plot
+# load both images
+r_map1 <- brick('figs/fengworkflow-1.png')
+r_map <- brick('figs/interactive_map.png')
+r_resp <- raster('figs/fengChangeWorkflow-1.png')
+r_resp <- brick(r_resp, r_resp, r_resp)
+
+# set up layout so that heights are the same for all panels
+plot_height <- min(nrow(r_map1), nrow(r_map), nrow(r_resp))
+
+# get rescaling factor for each
+rescale_map1 <- plot_height / nrow(r_map1)
+rescale_map <- plot_height / nrow(r_map)
+rescale_resp <- plot_height / nrow(r_resp)
+
+# get widths
+width_map1 <- ncol(r_map1) * rescale_map1
+width_map <- ncol(r_map) * rescale_map
+width_resp <- ncol(r_resp) * rescale_resp
+
+# set up layout (maximum total of 200 columns) with a gap in between
+# mar won't work for these
+gap <- 2
+
+# top row: map1 and resp
+widths_top <- c(width_map1, width_resp)
+widths_top <- round(widths_top * ((200 - (1 * gap)) / sum(widths_top)))
+widths_top <- c(widths_top[1], gap, widths_top[2])
+
+top_row <- rep(1:3, widths_top)
+bottom_row <- rep(5, length(top_row))
+
+heights <- c(plot_height + 1, plot_height)
+heights <- round(heights * ((50 - (1)) / sum(heights)))
+heights <- c(heights[1], 1, heights[2])
+
+top_matrix <- do.call(rbind, replicate(heights[1], top_row, simplify = FALSE))
+middle_matrix <- matrix(4, nrow = heights[2], ncol = ncol(top_matrix))
+bottom_matrix <- matrix(5, nrow = heights[3], ncol = ncol(top_matrix))
+
+mat <- rbind(top_matrix, middle_matrix, bottom_matrix)
+layout(mat)
+
+# plot static map
+plotRGB(r_map1, maxpixels = Inf)
+
+# add a panel letter
+mtext(text = 'A',
+        side = 3,
+        line = -1.5,
+        adj = 0)  
+
+# gap
+plot.new()
+
+# plot the response curve
+plotRGB(r_resp, maxpixels = Inf,
+        scale = max(maxValue(r_resp)))
+
+# add a panel letter
+mtext(text = 'B',
+        side = 3,
+        line = -1.5,
+        adj = 0)  
+
+# gap
+plot.new()
+
+# plot interactive map
+plotRGB(r_map, maxpixels = Inf)
+
+# add a panel letter
+mtext(text = 'C',
+        side = 3,
+        line = -1.5,
+        adj = 0)
+
+## ----spthinmodule, eval = TRUE-------------------------------------------
+spThin <- function (.data, thin = 50) {
+
+  # check these are presence-background data
+  stopifnot(all(.data$df$type %in% c('presence', 'background')))
+
+  # install & load the package
+  zoon::GetPackage('spThin')
+
+  # get dataframe & index to presence data
+  df <- na.omit(.data$df)
+  pres_idx <- which(df$type == 'presence')
+
+  # prepare presence data subset and apply thinning
+  sub_df <- data.frame(LAT = df$latitude[pres_idx],
+                       LONG = df$longitude[pres_idx],
+                       SPEC = NA)
+  th <- thin(loc.data = sub_df,
+             thin.par = thin,
+             reps = 1,
+             locs.thinned.list.return = TRUE,
+             write.files = FALSE,
+             write.log.file = FALSE)
+
+  # get index to rows in sub_df, update the full dataset and return
+  pres_keep_idx <- as.numeric(rownames(th[[1]]))
+  .data$df <- rbind(df[pres_idx,][pres_keep_idx, ],
+                    df[-pres_idx, ])
+  return (.data)
+}
+
+## ----buildmodule, eval = TRUE, results = 'hide', fig.keep = 'none'-------
+BuildModule(object = spThin,
+            type = 'process',
+            title = 'Spatial thinning of Presence-only Data',
+            description = paste('Apply the stochastic spatial thinning',
+                                'algorithm implemented in the spThin',
+                                'package to presence data in a',
+                                'presence-background dataset'),
+            details = paste('Full details of the algorithm are available in',
+                            'the open-access article by Aiello-Lammens',
+                            'et al. (2015): dx.doi.org/10.1111/ecog.01132'),
+            author = 'zoon Developers',
+            email = '[email protected]',
+            paras = list(thin = paste('Thinning parameter - the required',
+                         'minimum distance (in kilometres) between points',
+                         'after applying the thinning procedure')),
+            dataType = 'presence-only',
+            check = TRUE)
+
+## ----maxent_comparison, dpi = 600, fig.show = "hide", eval = TRUE, out.height = 600, out.width = 600----
+MaxEntComparison <- workflow(
+  occurrence = CarolinaWrenPO,
+  covariate  = CarolinaWrenRasters,
+  process    = Chain(SubsampleOccurrence(500),
+                     Background(n = 10000),
+                     CarolinaWrenValidation),
+  model      = list(MaxEnt(args = 'threshold=false'),
+                    MaxNet,
+                    MaxNet(regmult = 0.005),
+                    MaxNet(features = 'l'),
+                    MaxNet(regmult = 0.005, features = 'l'),
+                    LogisticRegression),
+  output     = Chain(PrintMap(points = FALSE),
+                     PerformanceMeasures))
+
+## ----maxent_buffer, eval = TRUE, echo = FALSE, dpi = 600, fig.show = "hide", out.height = 7, out.width = 7----
+
+# loop through adding a margin to each of the previous figures
+for (i in 1:6) {
+  fpath_in <- sprintf('figs/maxent_comparison-%i.png', i)
+  fpath_out <- sprintf('figs/maxent_buffer-%i.png', i)
+  img <- brick(fpath_in)
+  ext <- as.vector(extent(img))
+
+  png(fpath_out,
+      width = ncol(img),
+      height = nrow(img))
+
+  par(oma = rep(0, 4), mar = rep(0, 4))
+  plot.new()
+  plot.window(xlim = ext[1:2],
+            ylim = ext[3:4],
+            asp = 1)
+  plotRGB(img, maxpixels = Inf, add = TRUE)
+
+  dev.off()
+}
+
+## ----save_workflows, echo = FALSE, eval = TRUE---------------------------
+# save the three workflow objects
+save(FengPapes, FengPapesUpdate, MaxEntComparison,
+     file = 'zoon_applications_paper_workflows.RData')
+
+## ----maxent_plotting, eval = TRUE, echo = FALSE, dpi = 600, fig.show = "hide", fig.height = 7, fig.width = 9----
+
+# plot the figure to disk, then plot in a separate chunk with a legend
+par(mfrow = c(3, 2))
+
+# loop through plotting them
+for (i in 1:6) {
+  fpath <- sprintf('figs/maxent_buffer-%i.png', i)
+  img <- brick(fpath)
+  plotRGB(img, maxpixels = Inf)
+  mtext(LETTERS[i],
+        side = 3,
+        line = -1.5,
+        adj = 0)
+}
+
+
+## ----maxent_aucs, eval = TRUE, echo = FALSE------------------------------
+# AUCs to be embedded in the text below
+perflist <- MaxEntComparison$report[seq(2, 12, by = 2)]
+AUCs <- sapply(perflist, function(x) x$auc)
+names(AUCs) <- c('MaxEnt',
+                 'MaxNet',
+                 'MaxNet with no regularisation',
+                 'MaxNet with only linear features',
+                 'MaxNet with only linear features and no regularisation',
+                 'Logistic regression')
+AUCs <- round(AUCs, digits = 4)
+
+## ----results = 'asis', echo = FALSE--------------------------------------
+cat(paste(names(AUCs), AUCs, collapse = '; '))
+cat('.')
+
+## ----flow_diagram, echo = FALSE, fig.cap = "Figure 1. The modular SDM structure encoded by a zoon workflow. A) Description of the five module types. B) Flow diagram illustrating how objects are passed between different module types: 'data frame' - an dataframe of occurrence records; 'raster' - a RasterStack object of the covariates; 'model' - a ZoonModel object, generating standardised predictions from a given model. C) The flow diagram implied by chaining two `process' modules. D) The flow diagram implied by listing three 'model' modules. Full details of module inputs and outputs, and the effects of listing and chaining each module type are given in the zoon vignette 'Building a module'."----
+knitr::include_graphics("figs/diagrams.png")
+
+## ----feng_papes_plots, cache = FALSE, echo = FALSE, fig.cap = "Figure 2. Outputs of the workflow objects 'FengPapes' and 'FengPapesUpdate'. A) Map of the MaxEnt predicted distribution, with a 5\\% omission rate threshold, by the 'PrintMap' module in the workflow 'FengPapes' which encodes the core of a published analysis. B) A response curve produced by the 'ResponseCurve' module for the first covariate, bio1 in the workflow 'FengPapesUpdate', which modifies the original analysis workflow. C) A screenshot of the interactive map produced by the 'InteractiveMap' modules in the workflow 'FengPapesUpdate', displaying raw occurrence data and predicted distribution over a global map, allowing users to interactively explore their results. White areas are masked due to being in the MESS mask. Any SDM analysis distributed as a zoon workflow can be easily be explored and scrutinized by modifying its output modules using the function 'ChangeWorkflow'."----
+knitr::include_graphics("figs/combinefengupdate-1.png")
+
+## ----maxent_plots, cache = FALSE, echo = FALSE, fig.cap = "Figure 3. Prediction of the distribution of the Carolina wren from 6 different models, produced by the workflow 'MaxEntComparison'. A) MaxEnt without threshold features. B) MaxNet with default settings. C) MaxNet without regularisation. D) MaxNet with regularisation but only linear features. E) MaxNet without regularisation and only linear features. F) Logistic regression. MaxNet with full features but no regularisation (C) gave the most local complexity, indicative of overfitting to the data. The models with only linear features (D-F) had a broader distribution, indicative of underfitting. Differences between MaxNet with linear features and no  regularisation (E) and logistic regression (F) are due to the downweighting applied to background data in the former."----
+knitr::include_graphics("figs/maxent_plotting-1.png")
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#!/bin/bash
		pandoc -s -S -V geometry:margin=1in -H response_header.tex response_to_reviewers.md -o response_to_reviewers.pdf