diff --git a/.Rbuildignore b/.Rbuildignore index 9acbafd3..4a444a01 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,9 @@ logos .travis.yml docs man/figures/plot_greta_legend.R +cran-comments.md +vignettes/build_vignettes.R +vignettes/example_models_cache +vignettes/get_started_cache +vignettes/technical_details_cache +vignettes/get_started_files diff --git a/.gitignore b/.gitignore index fdc028f1..2c9bf0e3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,16 @@ .Rhistory .RData .Ruserdata + README_cache -vignettes/getting_started_cache -vignettes/how_does_this_work_cache +vignettes/example_models_cache +vignettes/get_started_cache +vignettes/technical_details_cache + vignettes/figures + *.pdf docs/*.Rmd docs/examples docs/*_cache +cran-comments.md diff --git a/.travis.yml b/.travis.yml index 0f242b18..3b60e161 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ r_packages: - knitr - rmarkdown - rsvg - - MCMCvis + - bayesplot - extraDistr r_github_packages: diff --git a/DESCRIPTION b/DESCRIPTION index b086fa19..a5562ef7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,22 +1,11 @@ Package: greta Type: Package -Title: Probabilistic Modelling with TensorFlow -Version: 0.1.9 -Date: 2017-05-28 +Title: Simple and Scalable Statistical Modelling in R +Version: 0.2.0 +Date: 2017-06-26 Authors@R: person("Nick", "Golding", role = c("aut", "cre"), email = "nick.golding.research@gmail.com") -Description: Existing tools for fitting bespoke statistical models (such as - BUGS, JAGS and STAN) are very effective for moderately-sized problems, but - don't scale so well to large datasets. These tools also require users to learn - a domain-specific language and fix errors at compile time. greta enables users - to construct probabilistic models interactively in native R code, then sample - from those models efficiently using Hamiltonian Monte Carlo. TensorFlow is used - to perform all of the calculations, so greta is particularly fast where the - model contains large linear algebra operations. greta can also be run across - distributed machines or on GPUs, just by installing the relevant version of - TensorFlow. This package is in the early stages of development. Future releases - will likely enable fitting models with fast approximate inference schemes, - different samplers, and more distributions and operations. +Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see for more information). License: Apache License 2.0 URL: https://github.com/goldingn/greta BugReports: https://github.com/goldingn/greta/issues @@ -27,27 +16,29 @@ LazyData: true Depends: R (>= 3.0) Collate: - 'greta_package.R' + 'package.R' 'overloaded.R' 'node_class.R' 'node_types.R' 'variable.R' - 'distributions.R' + 'probability_distributions.R' 'unknowns_class.R' 'greta_array_class.R' 'as_data.R' 'utils.R' - 'syntax.R' + 'distribution.R' 'operators.R' 'functions.R' - 'transformations.R' + 'transforms.R' 'structures.R' 'extract_replace_combine.R' 'dynamics_module.R' 'dag_class.R' 'greta_model_class.R' 'progress_bar.R' + 'inference.R' 'samplers.R' + 'install_tensorflow.R' Imports: R6, tensorflow, @@ -58,7 +49,8 @@ Suggests: knitr, rmarkdown, DiagrammeR, - MCMCvis, + bayesplot, + lattice, testthat, mvtnorm, MCMCpack, diff --git a/NAMESPACE b/NAMESPACE index 4268c75a..3e93a410 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ S3method(as.greta_array,logical) S3method(as.greta_array,matrix) S3method(as.greta_array,node) S3method(as.greta_array,numeric) +S3method(as.matrix,greta_array) S3method(as_data,default) S3method(as_data,greta_array) S3method(asin,greta_array) @@ -75,7 +76,6 @@ S3method(tail,greta_array) S3method(tan,greta_array) export("%*%") export("distribution<-") -export("likelihood<-") export(as_data) export(bernoulli) export(beta) @@ -91,16 +91,17 @@ export(distribution) export(dynamics) export(exponential) export(f) -export(free) export(gamma) export(greta_array) export(hypergeometric) export(icauchit) export(icloglog) export(ilogit) +export(install_tensorflow) export(inverse_gamma) export(iprobit) export(laplace) +export(lkj_correlation) export(log1pe) export(logistic) export(lognormal) @@ -111,6 +112,7 @@ export(multivariate_normal) export(negative_binomial) export(normal) export(ones) +export(opt) export(pareto) export(poisson) export(stashed_samples) @@ -132,6 +134,7 @@ importFrom(reticulate,py_set_attr) importFrom(stats,na.omit) importFrom(stats,rnorm) importFrom(stats,runif) +importFrom(tensorflow,install_tensorflow) importFrom(utils,head) importFrom(utils,setTxtProgressBar) importFrom(utils,tail) diff --git a/R/as_data.R b/R/as_data.R index 355ed7ec..ec0e40cf 100644 --- a/R/as_data.R +++ b/R/as_data.R @@ -1,5 +1,5 @@ #' @name as_data -#' @title Declare R Objects as Data +#' @title convert other objects to greta arrays #' @description define an object in an R session as a data greta array for use #' as data in a greta model. #' @param x an R object that can be coerced to a greta_array (see details). diff --git a/R/dag_class.R b/R/dag_class.R index f52e3d61..df88a6d1 100644 --- a/R/dag_class.R +++ b/R/dag_class.R @@ -110,7 +110,7 @@ dag_class <- R6Class( # define all nodes, node densities and free states in the environment lapply(self$node_list, function (x) x$define_tf(self)) - # define an overall log density and relevant gradients there + # define an overall log density and gradients, plus adjusted versions self$define_joint_density() self$define_gradients() @@ -132,6 +132,42 @@ dag_class <- R6Class( }, + # define tensor for overall log density and gradients + define_joint_density = function () { + + # get names of densities for all distribution nodes + density_names <- self$get_tf_names(types = 'distribution') + + # get TF density tensors for all distribution + densities <- lapply(density_names, get, envir = self$tf_environment) + + # reduce_sum them + summed_densities <- lapply(densities, tf$reduce_sum) + + # remove their names and sum them together + names(summed_densities) <- NULL + joint_density <- tf$add_n(summed_densities) + + # assign overall density to environment + assign('joint_density', joint_density, envir = self$tf_environment) + + # define adjusted joint density + + # get names of adjustment tensors for all variable nodes + adj_names <- paste0(self$get_tf_names(types = 'variable'), '_adj') + + # get TF density tensors for all distribution + adj <- lapply(adj_names, get, envir = self$tf_environment) + + # remove their names and sum them together + names(adj) <- NULL + total_adj <- tf$add_n(adj) + + # assign overall density to environment + assign('joint_density_adj', joint_density + total_adj, envir = self$tf_environment) + + }, + define_gradients = function () { # get names of free states for all variable nodes @@ -144,47 +180,36 @@ dag_class <- R6Class( # names of tensors free_name <- paste0(name, '_free') gradient_name <- paste0(name, '_gradient') + gradient_adj_name <- paste0(name, '_gradient_adj') + # raw gradients gradient <- tf$gradients(self$tf_environment$joint_density, self$tf_environment[[free_name]]) gradient_reshape <- tf$reshape(gradient, shape(-1)) - self$tf_environment[[gradient_name]] <- gradient_reshape + # adjusted gradients + gradient_adj <- tf$gradients(self$tf_environment$joint_density_adj, + self$tf_environment[[free_name]]) + gradient_adj_reshape <- tf$reshape(gradient_adj, shape(-1)) + self$tf_environment[[gradient_adj_name]] <- gradient_adj_reshape + } # combine the gradients into one tensor gradient_names <- paste0(variable_tf_names, '_gradient') - gradient_list <- lapply(gradient_names, get, envir = self$tf_environment) - self$tf_environment$gradients <- tf$concat(gradient_list, 0L) - }, - - # define tensor for overall log density and gradients - define_joint_density = function () { - - # get names of densities for all distribution nodes - density_names <- self$get_tf_names(types = 'distribution') - - # get TF density tensors for all distribution - densities <- lapply(density_names, get, envir = self$tf_environment) + # same for adjusted gradients + gradient_adj_names <- paste0(variable_tf_names, '_gradient_adj') + gradient_adj_list <- lapply(gradient_adj_names, + get, + envir = self$tf_environment) - # convert to double precision floats - densities_double <- lapply(densities, tf$cast, tf$float64) - - # reduce_sum them - summed_densities <- lapply(densities_double, tf$reduce_sum) - - # remove their names and sum them together - names(summed_densities) <- NULL - sum_total <- tf$add_n(summed_densities) - - # assign overall density to environment - assign('joint_density', sum_total, envir = self$tf_environment) + self$tf_environment$gradients_adj <- tf$concat(gradient_adj_list, 0L) }, @@ -217,17 +242,20 @@ dag_class <- R6Class( }, - log_density = function() { + # get log density and gradient of joint density w.r.t. free states of all + # variable nodes, with or without applying the jacobian adjustment + log_density = function(adjusted = TRUE) { - with(self$tf_environment, - sess$run(joint_density, feed_dict = parameter_dict)) + cleanly(with(self$tf_environment, + sess$run(joint_density_adj, feed_dict = parameter_dict))) }, - # get gradient of joint density w.r.t. free states of all variable nodes - gradients = function () { - with(self$tf_environment, - sess$run(gradients, feed_dict = parameter_dict)) + gradients = function (adjusted = TRUE) { + + cleanly(with(self$tf_environment, + sess$run(gradients_adj, feed_dict = parameter_dict))) + }, # return the current values of the traced nodes, as a named vector diff --git a/R/syntax.R b/R/distribution.R similarity index 56% rename from R/syntax.R rename to R/distribution.R index 8176f564..7c56ac1e 100644 --- a/R/syntax.R +++ b/R/distribution.R @@ -1,28 +1,20 @@ -# syntax definitions - #' @name distribution -#' @aliases distribution likelihood -#' @title Define a Distribution Over a greta Array -#' @description \code{distribution} links observed data, variables, and other -#' greta arrays to probability distributions. For example a model likelhood -#' can be set by using \code{distribution} on some observed data. -#' \code{likelihood} is an alias for \code{distribution}. It is deprecated and -#' will be removed in version 0.2. +#' @aliases distribution +#' @title define a distribution over data + +#' @description \code{distribution} defines probability distributions over +#' observed data, e.g. to set a model likelihood. #' -#' @param greta_array a greta array. For the assignment method it must be a -#' greta array that doesn't already have a probability distribution. +#' @param greta_array a data greta array. For the assignment method it must not +#' already have a probability distribution assigned #' #' @param value a greta array with a distribution (see -#' \code{\link{greta-distributions}}) +#' \code{\link{distributions}}) #' #' @details The extract method returns the greta array if it has a distribution, -#' or \code{NULL} if it doesn't. It has now real function, but is included for +#' or \code{NULL} if it doesn't. It has no real use-case, but is included for #' completeness #' -#' \code{distribution} can also be used to create truncated distributions, by -#' first defining a greta array with constraints (the truncation) and then -#' defining the distribution on that greta array. See below for an example. -#' #' @export #' @examples #' @@ -31,17 +23,15 @@ #' # observed data and mean parameter to be estimated #' # (explicitly coerce data to a greta array so we can refer to it later) #' y = as_data(rnorm(5, 0, 3)) -#' mu = variable() +#' +#' mu = uniform(-3, 3) +#' #' # define the distribution over y (the model likelihood) #' distribution(y) = normal(mu, 1) #' #' # get the distribution over y #' distribution(y) #' -#' # define a truncated-positive standard normal random variable -#' tn = variable(lower = 0) -#' distribution(tn) = normal(0, 1) -#' `distribution<-` <- function (greta_array, value) { # stash the old greta array to return @@ -52,17 +42,23 @@ # only for greta arrays without distributions if (!is.null(greta_array$node$distribution)) { - stop ('left hand side already has a distribution assigned', + stop ("left hand side already has a distribution assigned", call. = FALSE) } - # can only assign with greta arrays + # only for data greta arrays + if (node_type(greta_array$node) != 'data') { + stop ("distributions can only be assigned to data greta arrays", + call. = FALSE) + } + + # can only assign with greta arrays ... if (!is.greta_array(value)) { - stop ('right hand side must be a greta array', + stop ("right hand side must be a greta array", call. = FALSE) } - # that have distributions + # ... that have distributions distribution_node <- value$node$distribution if (!inherits(distribution_node, 'distribution_node')) { @@ -92,28 +88,6 @@ # also adds distribution_node as this node's distribution distribution_node$replace_target(greta_array$node) - # if the greta_array was a variable, check its constraints as truncation - if (inherits(greta_array$node, 'variable_node')) { - - truncated <- greta_array$node$lower != -Inf | - greta_array$node$upper != Inf - - if (truncated) { - - # check the distribution can handle truncation - if (is.null(distribution_node$tf_cdf_function)) { - stop(distribution_node$distribution_name, - ' distribution cannot be truncated', - call. = FALSE) - } else { - distribution_node$truncation <- c(greta_array$node$lower, - greta_array$node$upper) - } - - } - - } - # return greta_array (pre-conversion to a greta array) greta_array_tmp @@ -144,7 +118,3 @@ distribution <- function (greta_array) { distrib } - -#' @rdname distribution -#' @export -`likelihood<-` <- `distribution<-` diff --git a/R/dynamics_module.R b/R/dynamics_module.R index e9dd6f78..385a87e4 100644 --- a/R/dynamics_module.R +++ b/R/dynamics_module.R @@ -202,7 +202,7 @@ iterate_lambda_vectorised <- function(matrices, state, n, m, niter) { #' @name dynamics-module #' @aliases dynamics -#' @title Functions for modelling dynamical systems +#' @title methods for modelling structured dynamical systems #' #' @description A module providing functions specific to dynamical modelling. So #' far only for iterating Leslie matrices. \code{iterate_lambda} iterates a diff --git a/R/extract_replace_combine.R b/R/extract_replace_combine.R index 6347433d..ba54db52 100644 --- a/R/extract_replace_combine.R +++ b/R/extract_replace_combine.R @@ -1,9 +1,9 @@ #' @name extract-replace-combine #' @aliases extract replace cbind rbind c rep -#' @title Extract, Replace and Combine greta Arrays +#' @title extract, replace and combine greta arrays #' -#' @description Generic methods to extract and replace elements of greta arrays, or to -#' combine greta arrays. +#' @description Generic methods to extract and replace elements of greta arrays, +#' or to combine greta arrays. #' #' @section Usage: \preformatted{ #' # extract diff --git a/R/functions.R b/R/functions.R index 5559794b..fcd4f2c6 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1,10 +1,10 @@ -#' @name greta-functions +#' @name functions #' -#' @title R functions that work for greta arrays +#' @title functions for greta arrays #' #' @description This is a list of functions in base R that are currently -#' implemented to transform greta arrays Also see \link{greta-operators} and -#' \link{greta-transforms}. +#' implemented to transform greta arrays. Also see \link{operators} and +#' \link{transforms}. #' #' @section Usage: \preformatted{ #' @@ -70,8 +70,8 @@ #' converted into a greta array. #' #' \code{sweep()} only works on two-dimensional greta arrays (so \code{MARGIN} -#' can only be either 1 or 2), and for subtraction, addition, division and -#' multiplication. +#' can only be either 1 or 2), and only for subtraction, addition, division +#' and multiplication. #' #' @examples #' x = as_data(matrix(1:9, nrow = 3, ncol = 3)) @@ -216,7 +216,7 @@ chol.greta_array <- function (x, ...) { op("chol", x, dimfun = dimfun, tf_operation = 'tf_chol') } -#' @rdname greta-overloaded +#' @rdname overloaded #' @export diag <- function (x = 1, nrow, ncol) UseMethod('diag', x) @@ -415,7 +415,7 @@ tf_sweep <- function (x, STATS, MARGIN, FUN) { } -#' @rdname greta-overloaded +#' @rdname overloaded #' @export sweep <- function (x, MARGIN, STATS, FUN = "-", check.margin = TRUE, ...) UseMethod('sweep', x) diff --git a/R/greta_array_class.R b/R/greta_array_class.R index 39192ab8..c5a2ffb5 100644 --- a/R/greta_array_class.R +++ b/R/greta_array_class.R @@ -234,3 +234,8 @@ tail.greta_array <- function (x, n = 6L, ...) { ans } + +# return the unknowns array for this greta array +#' @export +as.matrix.greta_array <- function (x, ...) + x$node$value() diff --git a/R/greta_model_class.R b/R/greta_model_class.R index 2e8c80eb..a16c9227 100644 --- a/R/greta_model_class.R +++ b/R/greta_model_class.R @@ -1,14 +1,14 @@ # greta_model objects -#' @name greta-model -#' @title Greta Model Objects +#' @name model +#' @title greta model objects #' @description Create a \code{greta_model} object representing a statistical #' model (using \code{model}), and plot a graphical representation of the #' model. Statistical inference can be performed on \code{greta_model} objects #' with \code{\link{mcmc}} NULL -#' @rdname greta-model +#' @rdname model #' @export #' @importFrom parallel detectCores #' @@ -172,7 +172,7 @@ model <- function (..., } -#' @name greta-model +#' @name model #' #' @details \code{define_model} is an alias for \code{model}. It is deprecated, #' and will be removed in version 0.2. @@ -188,14 +188,14 @@ as.greta_model.dag_class <- function (x, ...) { ans } -#' @rdname greta-model +#' @rdname model #' @param x a \code{greta_model} object #' @export print.greta_model <- function (x, ...) { cat('greta model') } -#' @rdname greta-model +#' @rdname model #' @param y unused default argument #' #' @details The plot method produces a visual representation of the defined diff --git a/R/greta_package.R b/R/greta_package.R deleted file mode 100644 index 069ec274..00000000 --- a/R/greta_package.R +++ /dev/null @@ -1,56 +0,0 @@ -# package file - -#' greta: Probabilistic Modelling with TensorFlow -#' @name greta -#' -#' @description greta lets you write probabilistic models interactively in -#' native R code, then sample from them efficiently using Hamiltonian Monte -#' Carlo. -#' -#' The computational heavy lifting is done by TensorFlow, Google's automatic -#' differentiation library. greta is particularly fast where the model -#' contains lots of linear algebra, and greta models can be easily set up to -#' run across CPUs or GPUs just by installing the relevant version of -#' TensorFlow. -#' -#' See the example below for the general set up of a greta model, and -#' \link{greta-distributions}, \link{greta-operators}, \link{greta-functions}, -#' \link{greta-transforms} and \link{greta-structures} for details of the -#' currently implemented syntax and how to combine them into models -#' -#' @docType package -#' @import tensorflow -#' @import R6 -#' @importFrom grDevices colorRampPalette -#' @examples -#' \dontrun{ -#' # define a simple model -#' mu = variable() -#' sigma = lognormal(1, 0.1) -#' x = rnorm(10) -#' distribution(x) = normal(mu, sigma) -#' -#' m <- model(mu, sigma) -#' -#' # and sample from it -#' draws <- mcmc(m, -#' n_samples = 100, -#' warmup = 10) -#' } -NULL - -# crate the node list object whenever the package is loaded -.onLoad <- function (libname, pkgname) { - - # silence TF's CPU instructions message - Sys.setenv(TF_CPP_MIN_LOG_LEVEL=2) - - # warn if TF version is bad - check_tf_version('warn') - - # default float type - options(greta_tf_float = tf$float32) - -} - - diff --git a/R/inference.R b/R/inference.R new file mode 100644 index 00000000..46b9a801 --- /dev/null +++ b/R/inference.R @@ -0,0 +1,346 @@ +#' @name inference +#' @title statistical inference on greta models +#' @description Carry out statistical inference on greta models by +#' MCMC or likelihood/posterior optimisation. +NULL + +#' @rdname inference +#' @export +#' @importFrom stats na.omit +#' +#' @details If the sampler is aborted before finishing, the samples collected so +#' far can be retrieved with \code{stashed_samples()}. Only samples from the +#' sampling phase will be returned. +stashed_samples <- function () { + + stashed <- exists('trace_stash', envir = greta_stash) + + if (stashed) { + + # get them, remove the NAs, and return + draws <- greta_stash$trace_stash + draws_clean <- na.omit(draws) + draws_prepped <- prepare_draws(draws_clean) + + return (draws_prepped) + + } else { + + return (invisible()) + + } + +} + +# create an object stash in greta's namespace, to return traces to the user when +# they abort a run +greta_stash <- new.env() + +stash_trace <- function (trace) + assign('trace_stash', trace, envir = greta_stash) + +#' @rdname inference +#' @export +#' @importFrom stats rnorm runif +#' @importFrom utils setTxtProgressBar txtProgressBar +#' +#' @param model greta_model object +#' @param method the method used to sample or optimise values. Currently only +#' one method is available for each procedure: \code{hmc} and \code{adagrad} +#' @param n_samples the number of MCMC samples to draw (after any warm-up, but +#' before thinning) +#' @param thin the MCMC thinning rate; every \code{thin} samples is retained, +#' the rest are discarded +#' @param warmup the number of samples to spend warming up the mcmc sampler. +#' During this phase the sampler moves toward the highest density area and +#' tunes sampler hyperparameters. +#' @param verbose whether to print progress information to the console +#' @param pb_update how regularly to update the progress bar (in iterations) +#' @param control an optional named list of hyperparameters and options to +#' control behaviour of the sampler or optimiser. See Details. +#' @param initial_values an optional named vector of initial values for the free +#' parameters in the model. These will be used as the starting point for +#' sampling/optimisation +#' +#' @details For \code{mcmc()} if \code{verbose = TRUE}, the progress bar shows +#' the number of iterations so far and the expected time to complete the phase +#' of model fitting (warmup or sampling). Updating the progress bar regularly +#' slows down sampling, by as much as 9 seconds per 1000 updates. So if you +#' want the sampler to run faster, you can change \code{pb_update} to increase +#' the number of iterations between updates of the progress bar, or turn the +#' progress bar off altogether by setting \code{verbose = FALSE}. +#' +#' Occasionally, a proposed set of parameters can cause numerical instability +#' (I.e. the log density or its gradient is \code{NA}, \code{Inf} or +#' \code{-Inf}); normally because the log joint density is so low that it +#' can't be represented as a floating point number. When this happens, the +#' progress bar will also display the proportion of samples so far that were +#' 'bad' (numerically unstable) and therefore rejected. +#' If you're getting a lot of numerical instability, you might want to +#' manually define starting values to move the sampler into a more reasonable +#' part of the parameter space. Alternatively, you could redefine the model +#' (via \code{model}) to have double precision, though this will slow down +#' sampling. +#' +#' Currently, the only implemented MCMC procedure is static Hamiltonian +#' Monte Carlo (\code{method = "hmc"}). During the warmup iterations, the +#' leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the +#' sampler efficiency. The \code{control} argument can be used to specify the +#' initial value for epsilon, along with two other hyperparameters: \code{Lmin} +#' and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the +#' upper and lower limits to the number of leapfrog steps per iteration (from +#' which the number is selected uniformly at random). +#' +#' The default control options for HMC are: +#' \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)} +#' +#' @return \code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object that can be analysed using +#' functions from the coda package. This will contain mcmc samples of the +#' greta arrays used to create \code{model}. +#' +#' @examples +#' \dontrun{ +#' # define a simple model +#' mu = variable() +#' sigma = lognormal(1, 0.1) +#' x = rnorm(10) +#' distribution(x) = normal(mu, sigma) +#' m <- model(mu, sigma) +#' +#' # carry out mcmc on the model +#' draws <- mcmc(m, +#' n_samples = 100, +#' warmup = 10) +#' } +mcmc <- function (model, + method = c("hmc"), + n_samples = 1000, + thin = 1, + warmup = 100, + verbose = TRUE, + pb_update = 10, + control = list(), + initial_values = NULL) { + + method <- match.arg(method) + + # find variable names to label samples + target_greta_arrays <- model$target_greta_arrays + names <- names(target_greta_arrays) + + # check they're not data nodes, provide a useful error message if they are + are_data <- vapply(target_greta_arrays, + function (x) inherits(x$node, 'data_node'), + FUN.VALUE = FALSE) + + if (any(are_data)) { + is_are <- ifelse(sum(are_data) == 1, 'is a data greta array', 'are data greta arrays') + bad_greta_arrays <- paste(names[are_data], collapse = ', ') + msg <- sprintf('%s %s, data greta arrays cannot be sampled', + bad_greta_arrays, + is_are) + stop (msg, call. = FALSE) + } + + # get the dag containing the target nodes + dag <- model$dag + + # random starting locations + if (is.null(initial_values)) { + + # try several times + valid <- FALSE + attempts <- 1 + while (!valid & attempts < 10) { + + initial_values <- dag$example_parameters() + # increase the jitter each time + initial_values[] <- rnorm(length(initial_values), 0, 1 + attempts / 5) + + # test validity of values + valid <- valid_parameters(dag, initial_values) + attempts <- attempts + 1 + + } + + if (!valid) { + stop ('Could not find reasonable starting values after ', attempts, + ' attempts. Please specify initial values manually via the ', + 'initial_values argument to mcmc', + call. = FALSE) + } + + } else { + + if (!valid_parameters(dag, initial_values)) { + stop ('The log density and gradients could not be evaluated at these ', + 'initial values.', + call. = FALSE) + } + + } + + + # get default control options + con <- switch(method, + hmc = list(Lmin = 10, + Lmax = 20, + epsilon = 0.005)) + + # update them with user overrides + con[names(control)] <- control + + # fetch the algorithm + method <- switch(method, + hmc = hmc) + + # if warmup is required, do that now and update init + if (warmup > 0) { + + if (verbose) + pb_warmup <- create_progress_bar('warmup', c(warmup, n_samples), pb_update) + else + pb_warmup <- NULL + + # run it + warmup_draws <- method(dag = dag, + init = initial_values, + n_samples = warmup, + thin = thin, + verbose = verbose, + pb = pb_warmup, + tune = TRUE, + stash = FALSE, + control = con) + + # use the last draw of the full parameter vector as the init + initial_values <- attr(warmup_draws, 'last_x') + con <- attr(warmup_draws, 'control') + + } + + if (verbose) + pb_sampling <- create_progress_bar('sampling', c(warmup, n_samples), pb_update) + else + pb_sampling <- NULL + + # run the sampler + draws <- method(dag = dag, + init = initial_values, + n_samples = n_samples, + thin = thin, + verbose = verbose, + pb = pb_sampling, + tune = FALSE, + stash = TRUE, + control = con) + + # if this was successful, trash the stash, prepare and return the draws + rm('trace_stash', envir = greta_stash) + prepare_draws(draws) + +} + +#' @importFrom coda mcmc mcmc.list +prepare_draws <- function (draws) { + # given a matrix of draws returned by the sampler, prepare it and return + draws_df <- data.frame(draws) + draws_mcmc <- coda::mcmc(draws_df) + coda::mcmc.list(draws_mcmc) +} + + +#' @rdname inference +#' @export +#' +#' @param max_iterations the maximum number of iterations before giving up +#' @param tolerance the numerical tolerance for the solution, the optimiser stops when the (absolute) difference in the joint density between successive iterations drops below this level +#' +#' @details Currently, the only implemented optimisation algorithm is Adagrad +#' (\code{method = "adagrad"}). The \code{control} argument can be used to +#' specify the optimiser hyperparameters: \code{learning_rate} (default 0.8), +#' \code{initial_accumulator_value} (default 0.1) and \code{use_locking} +#' (default \code{TRUE}). The are passed directly to TensorFlow's optimisers, +#' see +#' \href{https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer}{the +#' TensorFlow docs} for more information +#' +#' @return \code{opt} - a list containing the following named elements: +#' \itemize{ +#' \item{par}{the best set of parameters found} +#' \item{value}{the log joint density of the model at the parameters par} +#' \item{iterations}{the number of iterations taken by the optimiser} +#' \item{convergence}{an integer code, 0 indicates successful completion, 1 +#' indicates the iteration limit max_iterations had been reached} +#' } +#' +#' @examples +#' \dontrun{ +#' # find the MAP estimate +#' opt_res <- opt(m) +#' } +opt <- function (model, + method = c("adagrad"), + max_iterations = 100, + tolerance = 1e-6, + control = list(), + initial_values = NULL) { + + # get the tensorflow environment + tfe <- model$dag$tf_environment + + # get the method + method <- match.arg(method) + optimise_fun <- switch (method, + adagrad = tf$train$AdagradOptimizer) + + # default control options + con <- switch (method, + adagrad = list(learning_rate = 0.8, + initial_accumulator_value = 0.1, + use_locking = TRUE)) + + # update them with user overrides + con[names(control)] <- control + + # set up optimiser + tfe$optimiser <- do.call(optimise_fun, con) + with(tfe, train <- optimiser$minimize(-joint_density)) + + # random initial values if unspecified + if (is.null(initial_values)) { + initial_values <- model$dag$example_parameters() + initial_values[] <- rnorm(length(initial_values)) + } + + # initialize the variables, then set the ones we care about + with(tfe, sess$run(tf$global_variables_initializer())) + parameters <- relist_tf(initial_values, model$dag$parameters_example) + + for (i in seq_along(parameters)) { + variable_name <- paste0(names(parameters)[i], '_free') + vble <- tfe[[variable_name]] + init <- tf$constant(parameters[[i]], + shape = vble$shape, + dtype = tf_float()) + tmp <- tfe$sess$run(vble$assign(init)) + } + + diff <- old_obj <- Inf + it <- 0 + + while (it < max_iterations & diff > tolerance) { + it <- it + 1 + with(tfe, sess$run(train)) + obj <- with(tfe, sess$run(-joint_density)) + diff <- abs(old_obj - obj) + old_obj <- obj + } + + list(par = model$dag$trace_values(), + value = with(tfe, sess$run(joint_density)), + iterations = it, + convergence = ifelse(it < max_iterations, 0, 1)) + +} + diff --git a/R/install_tensorflow.R b/R/install_tensorflow.R new file mode 100644 index 00000000..39773827 --- /dev/null +++ b/R/install_tensorflow.R @@ -0,0 +1,3 @@ +#' @importFrom tensorflow install_tensorflow +#' @export +tensorflow::install_tensorflow diff --git a/R/node_class.R b/R/node_class.R index 5a8f9b1e..7042439f 100644 --- a/R/node_class.R +++ b/R/node_class.R @@ -5,6 +5,7 @@ node <- R6Class( unique_name = '', children = list(), parents = list(), + representations = list(), .value = array(NA), dim = NA, distribution = NULL, diff --git a/R/node_types.R b/R/node_types.R index adc10f7c..26f3c284 100644 --- a/R/node_types.R +++ b/R/node_types.R @@ -206,21 +206,21 @@ variable_node <- R6Class ( # assign this as the free state tf_name <- dag$tf_name(self) - free_name <- sprintf('%s_free', tf_name) assign(free_name, tf_obj, envir = dag$tf_environment) - # map from the free to constrained state in a new tensor + # get the log jacobian adjustment for the free state + tf_adj <- self$tf_adjustment(dag) + adj_name <- sprintf('%s_adj', tf_name) + assign(adj_name, + tf_adj, + envir = dag$tf_environment) - # fetch the free node + # map from the free to constrained state in a new tensor tf_free <- get(free_name, envir = dag$tf_environment) - - # appy transformation node <- self$tf_from_free(tf_free, dag$tf_environment) - - # assign back to environment with base name (density will use this) assign(tf_name, node, envir = dag$tf_environment) @@ -242,16 +242,86 @@ variable_node <- R6Class ( } else if (self$constraint == 'low') { - y <- fl(upper) - tf_log1pe(x) + y <- fl(upper) - tf$exp(x) } else if (self$constraint == 'high') { - y <- tf_log1pe(x) + fl(lower) + y <- tf$exp(x) + fl(lower) + + } else { + + y <- x } y + }, + + # adjustments for univariate variables + tf_log_jacobian_adjustment = function (free) { + + ljac_none <- function (x) + fl(0) + + ljac_exp <- function (x) + tf$reduce_sum(x) + + ljac_logistic <- function (x) { + lrange <- log(self$upper - self$lower) + tf$reduce_sum(x - fl(2) * tf_log1pe(x) + lrange) + } + + ljac_corr_mat <- function (x) { + + # find dimension + n <- x$get_shape()$as_list()[1] + K <- (1 + sqrt(8 * n + 1)) / 2 + + # draw the rest of the owl + l1mz2 <- tf$log(1 - tf$square(tf$tanh(x))) + i <- rep(1:(K - 1), (K - 1):1) + powers <- tf$constant(K - i - 1, + dtype = tf_float(), + shape = shape(length(i), 1)) + fl(0.5) * tf$reduce_sum(powers * l1mz2) + tf$reduce_sum(l1mz2) + + } + + ljac_cov_mat <- function (x) { + + # find dimension + n <- x$get_shape()$as_list()[1] + K <- (sqrt(8 * n + 1) - 1) / 2 + + k <- seq_len(K) + powers <- tf$constant(K - k + 2, dtype = tf_float(), shape = c(K, 1)) + fl(K * log(2)) + tf$reduce_sum(powers * x[k - 1]) + + } + + fun <- switch (self$constraint, + none = ljac_none, + high = ljac_exp, + low = ljac_exp, + both = ljac_logistic, + correlation_matrix = ljac_corr_mat, + covariance_matrix = ljac_cov_mat) + + fun(free) + + }, + + # create a tensor giving the log jacobian adjustment for this variable + tf_adjustment = function (dag) { + + # find free version of node + free_tensor_name <- paste0(dag$tf_name(self), '_free') + free_tensor <- get(free_tensor_name, envir = dag$tf_environment) + + # apply jacobian adjustment to it + self$tf_log_jacobian_adjustment(free_tensor) + } ) @@ -260,8 +330,11 @@ variable_node <- R6Class ( # helper function to create a variable node # by default, make x (the node # containing the value) a free parameter of the correct dimension -vble = function(...) - variable_node$new(...) +vble = function (truncation, dim = 1) { + if (is.null(truncation)) truncation <- c(-Inf, Inf) + variable_node$new(lower = truncation[1], upper = truncation[2], dim = dim) +} + distribution_node <- R6Class ( 'distribution_node', @@ -270,10 +343,12 @@ distribution_node <- R6Class ( distribution_name = 'no distribution', discrete = NA, target = NULL, + user_node = NULL, + bounds = c(-Inf, Inf), truncation = NULL, parameters = list(), - initialize = function (name = 'no distribution', dim = NULL, discrete = FALSE) { + initialize = function (name = 'no distribution', dim = NULL, truncation = NULL, discrete = FALSE) { super$initialize(dim) @@ -282,10 +357,27 @@ distribution_node <- R6Class ( self$discrete <- discrete # initialize the target values of this distribution - self$add_target(self$create_target()) + self$add_target(self$create_target(truncation)) + + # if there's a truncation, it's different from the bounds, and it's a truncatable distribution, set the truncation + if (!is.null(truncation) & + !identical(truncation, self$bounds) & + !is.null(self$tf_cdf_function)) { + + self$truncation <- truncation + + } + + # set the target as the user node (user-facing representation) by default + self$user_node <- self$target }, + # create a target variable node (unconstrained by default) + create_target = function (truncation) { + vble(truncation, dim = self$dim) + }, + # create target node, add as a child, and give it this distribution add_target = function (new_target) { @@ -326,11 +418,16 @@ distribution_node <- R6Class ( }, + # which node to use af the *tf* target (overwritten by some distributions) + get_tf_target_node = function () { + self$target + }, + tf_log_density = function (dag) { # fetch inputs - - tf_target <- get(dag$tf_name(self$target), + tf_target_node <- self$get_tf_target_node() + tf_target <- get(dag$tf_name(tf_target_node), envir = dag$tf_environment) tf_parameters <- self$tf_fetch_parameters(dag) @@ -367,12 +464,12 @@ distribution_node <- R6Class ( lower <- self$truncation[1] upper <- self$truncation[2] - if (lower == -Inf) { + if (lower == self$bounds[1]) { # if only upper is constrained, just need the cdf at the upper offset <- self$tf_log_cdf_function(fl(upper), parameters) - } else if (upper == Inf) { + } else if (upper == self$bounds[2]) { # if only lower is constrained, get the log of the integral above it offset <- tf$log(fl(1) - self$tf_cdf_function(fl(lower), parameters)) diff --git a/R/operators.R b/R/operators.R index 77a64fb1..603ba01b 100644 --- a/R/operators.R +++ b/R/operators.R @@ -1,11 +1,10 @@ -#' @name greta-operators +#' @name operators #' -#' @title Operators for Greta Arrays +#' @title arithmetic, logical and relational operators for greta arrays #' #' @description This is a list of currently implemented arithmetic, logical and -#' relational operators, and extract/replace syntax to combine greta arrays -#' into probabilistic models. Also see \link{greta-functions} and -#' \link{greta-transforms}. +#' relational operators to combine greta arrays into probabilistic models. +#' Also see \link{functions} and \link{transforms}. #' #' @section Usage: \preformatted{ #' # arithmetic operators @@ -123,7 +122,7 @@ NULL `%*%.default` <- function (x, y) .Primitive("%*%")(x, y) -#' @rdname greta-overloaded +#' @rdname overloaded #' @export `%*%` <- function (x, y) { diff --git a/R/overloaded.R b/R/overloaded.R index 5bd9a598..b6562171 100644 --- a/R/overloaded.R +++ b/R/overloaded.R @@ -1,4 +1,4 @@ -#' @name greta-overloaded +#' @name overloaded #' #' @title Functions overloaded by greta #' diff --git a/R/package.R b/R/package.R new file mode 100644 index 00000000..f981a84c --- /dev/null +++ b/R/package.R @@ -0,0 +1,58 @@ +# package file + +#' greta: simple and scalable statistical modelling in R +#' @name greta +#' +#' @description greta lets you write statistical models interactively in native +#' R code, then sample from them efficiently using Hamiltonian Monte Carlo. +#' +#' The computational heavy lifting is done by TensorFlow, Google's automatic +#' differentiation library. So greta is particularly fast where the model +#' contains lots of linear algebra, and greta models can be run across CPU +#' clusters or on GPUs. +#' +#' See the simple example below, and take a look at the +#' \href{https://goldingn.github.io/greta}{greta website} for more information +#' including +#' \href{https://goldingn.github.io/greta/get_started.html}{tutorials} and +#' \href{https://goldingn.github.io/greta/example_models.html}{examples}. +#' +#' @docType package +#' @import tensorflow +#' @import R6 +#' @importFrom grDevices colorRampPalette +#' @examples +#' \dontrun{ +#' # a simple Bayesian regression model for the iris data +#' +#' # priors +#' int = normal(0, 5) +#' coef = normal(0, 3) +#' sd = lognormal(0, 3) +#' +#' # likelihood +#' mean <- int + coef * iris$Petal.Length +#' distribution(iris$Sepal.Length) = normal(mean, sd) +#' +#' # build and sample +#' m <- model(int, coef, sd) +#' draws <- mcmc(m, n_samples = 100) +#' } +NULL + +# crate the node list object whenever the package is loaded +.onLoad <- function (libname, pkgname) { + + # silence TF's CPU instructions message + Sys.setenv(TF_CPP_MIN_LOG_LEVEL=2) + + # warn if TF version is bad + check_tf_version('startup') + + # default float type + if (reticulate::py_module_available('tensorflow')) + options(greta_tf_float = tf$float32) + +} + + diff --git a/R/distributions.R b/R/probability_distributions.R similarity index 70% rename from R/distributions.R rename to R/probability_distributions.R index de98685a..70f3142e 100644 --- a/R/distributions.R +++ b/R/probability_distributions.R @@ -9,6 +9,9 @@ uniform_distribution <- R6Class ( initialize = function (min, max, dim) { + if (is.greta_array(min) | is.greta_array(max)) + stop ('min and max must be fixed, they cannot be another greta array') + good_types <- is.numeric(min) && length(min) == 1 & is.numeric(max) && length(max) == 1 @@ -38,6 +41,8 @@ uniform_distribution <- R6Class ( self$min <- min self$max <- max + self$bounds <- c(min, max) + # initialize the rest super$initialize('uniform', dim) @@ -50,45 +55,40 @@ uniform_distribution <- R6Class ( }, - # default value - create_target = function() { - vble(lower = self$min, - upper = self$max, - dim = self$dim) + # default value (ignore any truncation arguments) + create_target = function (...) { + vble(truncation = c(self$min, self$max), + dim = self$dim) }, tf_distrib = function (parameters) { - tf$contrib$distributions$Uniform(low = parameters$min, - high = parameters$max) - }, - # weird hack to make TF see a gradient here - tf_log_density_function = function (x, parameters) { - fl(self$log_density) + x * fl(0) + tf_ld <- fl(self$log_density) + + # weird hack to make TF see a gradient here + log_prob = function (x) + tf_ld + x * fl(0) + + list(log_prob = log_prob, cdf = NULL, log_cdf = NULL) + } ) ) - normal_distribution <- R6Class ( 'normal_distribution', inherit = distribution_node, public = list( - initialize = function (mean, sd, dim) { + initialize = function (mean, sd, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(mean, sd, target_dim = dim) - super$initialize('normal', dim) + super$initialize('normal', dim, truncation) self$add_parameter(mean, 'mean') self$add_parameter(sd, 'sd') }, - # default value - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Normal(loc = parameters$mean, scale = parameters$sd) @@ -102,18 +102,15 @@ lognormal_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (meanlog, sdlog, dim) { + initialize = function (meanlog, sdlog, dim, truncation) { dim <- check_dims(meanlog, sdlog, target_dim = dim) - super$initialize('lognormal', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('lognormal', dim, truncation) self$add_parameter(meanlog, 'meanlog') self$add_parameter(sdlog, 'sdlog') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { mean <- parameters$meanlog @@ -154,11 +151,6 @@ bernoulli_distribution <- R6Class ( self$add_parameter(prob, 'prob') }, - # default value (should get overwritten anyway!) - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Bernoulli(probs = parameters$prob) }, @@ -185,11 +177,6 @@ binomial_distribution <- R6Class ( }, - # default value (should get overwritten anyway!) - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Binomial(total_count = parameters$size, probs = parameters$prob) @@ -217,11 +204,6 @@ beta_binomial_distribution <- R6Class ( }, - # default value (should get overwritten anyway!) - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { size <- parameters$size @@ -257,11 +239,6 @@ poisson_distribution <- R6Class ( self$add_parameter(lambda, 'lambda') }, - # default value (should get overwritten anyway!) - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Poisson(rate = parameters$lambda) }, @@ -286,11 +263,6 @@ negative_binomial_distribution <- R6Class ( self$add_parameter(prob, 'prob') }, - # default value (should get overwritten anyway!) - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$NegativeBinomial(total_count = parameters$size, probs = fl(1) - parameters$prob) @@ -317,11 +289,6 @@ hypergeometric_distribution <- R6Class ( self$add_parameter(k, 'k') }, - # default value (should get overwritten anyway!) - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { m <- parameters$m @@ -347,20 +314,16 @@ gamma_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (shape, rate, dim) { + initialize = function (shape, rate, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(shape, rate, target_dim = dim) - super$initialize('gamma', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('gamma', dim, truncation) self$add_parameter(shape, 'shape') self$add_parameter(rate, 'rate') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - - tf_distrib = function (parameters) { tf$contrib$distributions$Gamma(concentration = parameters$shape, rate = parameters$rate) @@ -374,19 +337,16 @@ inverse_gamma_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (alpha, beta, dim) { + initialize = function (alpha, beta, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(alpha, beta, target_dim = dim) - super$initialize('inverse_gamma', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('inverse_gamma', dim, truncation) self$add_parameter(alpha, 'alpha') self$add_parameter(beta, 'beta') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$InverseGamma(concentration = parameters$alpha, rate = parameters$beta) @@ -395,25 +355,21 @@ inverse_gamma_distribution <- R6Class ( ) ) - weibull_distribution <- R6Class ( 'weibull_distribution', inherit = distribution_node, public = list( - initialize = function (shape, scale, dim) { + initialize = function (shape, scale, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(shape, scale, target_dim = dim) - super$initialize('weibull', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('weibull', dim, truncation) self$add_parameter(shape, 'shape') self$add_parameter(scale, 'scale') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { a <- parameters$shape @@ -443,18 +399,15 @@ exponential_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (rate, dim) { + initialize = function (rate, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(rate, target_dim = dim) - super$initialize('exponential', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('exponential', dim, truncation) self$add_parameter(rate, 'rate') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Exponential(rate = parameters$rate) } @@ -467,19 +420,16 @@ pareto_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (a, b, dim) { + initialize = function (a, b, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(a, b, target_dim = dim) - super$initialize('pareto', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('pareto', dim, truncation) self$add_parameter(a, 'a') self$add_parameter(b, 'b') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { a <- parameters$a @@ -506,20 +456,15 @@ student_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (df, mu, sigma, dim) { + initialize = function (df, mu, sigma, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(df, mu, sigma, target_dim = dim) - super$initialize('student', dim) + super$initialize('student', dim, truncation) self$add_parameter(df, 'df') self$add_parameter(mu, 'mu') self$add_parameter(sigma, 'sigma') }, - # default value - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$StudentT(df = parameters$df, loc = parameters$mu, @@ -534,19 +479,14 @@ laplace_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (mu, sigma, dim) { + initialize = function (mu, sigma, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(mu, sigma, target_dim = dim) - super$initialize('laplace', dim) + super$initialize('laplace', dim, truncation) self$add_parameter(mu, 'mu') self$add_parameter(sigma, 'sigma') }, - # default value - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Laplace(loc = parameters$mu, scale = parameters$sigma) @@ -560,19 +500,16 @@ beta_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (shape1, shape2, dim) { + initialize = function (shape1, shape2, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(shape1, shape2, target_dim = dim) - super$initialize('beta', dim) + check_unit(truncation) + self$bounds <- c(0, 1) + super$initialize('beta', dim, truncation) self$add_parameter(shape1, 'shape1') self$add_parameter(shape2, 'shape2') }, - # default value - create_target = function() { - vble(lower = 0, upper = 1, dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Beta(concentration1 = parameters$shape1, concentration0 = parameters$shape2) @@ -586,19 +523,14 @@ cauchy_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (location, scale, dim) { + initialize = function (location, scale, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(location, scale, target_dim = dim) - super$initialize('cauchy', dim) + super$initialize('cauchy', dim, truncation) self$add_parameter(location, 'location') self$add_parameter(scale, 'scale') }, - # default value - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { loc <- parameters$location @@ -625,18 +557,15 @@ chi_squared_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (df, dim) { + initialize = function (df, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(df, target_dim = dim) - super$initialize('chi_squared', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('chi_squared', dim, truncation) self$add_parameter(df, 'df') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Chi2(df = parameters$df) } @@ -644,25 +573,19 @@ chi_squared_distribution <- R6Class ( ) ) - logistic_distribution <- R6Class ( 'logistic_distribution', inherit = distribution_node, public = list( - initialize = function (location, scale, dim) { + initialize = function (location, scale, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(location, scale, target_dim = dim) - super$initialize('logistic', dim) + super$initialize('logistic', dim, truncation) self$add_parameter(location, 'location') self$add_parameter(scale, 'scale') }, - # default value - create_target = function() { - vble(dim = self$dim) - }, - tf_distrib = function (parameters) { tf$contrib$distributions$Logistic(loc = parameters$location, scale = parameters$scale) @@ -681,19 +604,16 @@ f_distribution <- R6Class ( inherit = distribution_node, public = list( - initialize = function (df1, df2, dim) { + initialize = function (df1, df2, dim, truncation) { # add the nodes as children and parameters dim <- check_dims(df1, df2, target_dim = dim) - super$initialize('d', dim) + check_positive(truncation) + self$bounds <- c(0, Inf) + super$initialize('d', dim, truncation) self$add_parameter(df1, 'df1') self$add_parameter(df2, 'df2') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { df1 <- parameters$df1 @@ -767,16 +687,12 @@ dirichlet_distribution <- R6Class ( # coerce the parameter arguments to nodes and add as children and # parameters - super$initialize('dirichlet', c(dim, length(alpha))) + self$bounds <- c(0, Inf) + super$initialize('dirichlet', c(dim, length(alpha)), truncation = c(0, Inf)) self$add_parameter(alpha, 'alpha') }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { # transpose and scale probs to get absolute density correct alpha <- tf$transpose(parameters$alpha) @@ -847,11 +763,6 @@ dirichlet_multinomial_distribution <- R6Class ( }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { # transpose and scale probs to get absolute density correct alpha <- tf$transpose(parameters$alpha) @@ -922,11 +833,6 @@ multinomial_distribution <- R6Class ( }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { # transpose and scale probs to get absolute density correct probs <- tf$transpose(parameters$prob) @@ -988,11 +894,6 @@ categorical_distribution <- R6Class ( }, - # default value - create_target = function() { - vble(lower = 0, dim = self$dim) - }, - tf_distrib = function (parameters) { # transpose and scale probs to get absolute density correct probs <- tf$transpose(parameters$prob) @@ -1078,13 +979,37 @@ multivariate_normal_distribution <- R6Class ( }, - # default value - create_target = function() { - vble(dim = self$dim) + # fetch the tensors for the parameters, using the cholesky factor of Sigma + # if available + tf_fetch_parameters = function (dag) { + + # find names + tf_names <- lapply(self$parameters, dag$tf_name) + + # replace Sigma's tensor with the cholesky factor, if available + cf <- self$parameters$Sigma$representations$cholesky_factor + if (!is.null(cf)) + tf_names$Sigma <- dag$tf_name(cf) + + # fetch tensors + lapply(tf_names, get, envir = dag$tf_environment) + }, tf_distrib = function (parameters) { - L <- tf$cholesky(parameters$Sigma) + + # check if Sigma (the node version) has a cholesky factor to use + cf <- self$parameters$Sigma$representations$cholesky_factor + + # how to make sure that's the value being used as the parameter? + + # need to check the parameters definition bit + + if (is.null(cf)) + L <- tf$cholesky(parameters$Sigma) + else + L <- tf$transpose(parameters$Sigma) + mu = tf$transpose(parameters$mean) tf$contrib$distributions$MultivariateNormalTriL(loc = mu, scale_tril = L) @@ -1129,19 +1054,47 @@ wishart_distribution <- R6Class ( }, - # default value - create_target = function() { + # default value, cholesky factor (ignores truncation) + create_target = function (truncation) { # handle reshaping via a greta array - free_greta_array <- vble(dim = prod(self$dim)) - matrix_greta_array <- flat_to_symmetric(free_greta_array, self$dim) - matrix_greta_array$node + k <- self$dim[1] + free_greta_array <- vble(truncation = c(-Inf, Inf), + dim = k + k * (k - 1) / 2) + free_greta_array$constraint = "covariance_matrix" + + # first create a greta array for the cholesky + chol_greta_array <- flat_to_chol(free_greta_array, self$dim) + + # create symmetric matrix to return as target node + matrix_greta_array <- chol_to_symmetric(chol_greta_array) + target_node <- matrix_greta_array$node + + # assign the cholesky factor as a representation of it + target_node$representations$cholesky_factor <- chol_greta_array$node + + # return the symmetric node + target_node }, tf_distrib = function (parameters) { - tf$contrib$distributions$WishartFull(df = parameters$df, - scale = parameters$Sigma) + + # if there is a cholesky factor for Sigma,use that + is_cholesky <- !is.null(self$parameters$Sigma$representations$cholesky_factor) + + if (is_cholesky) { + + tf$contrib$distributions$WishartCholesky(df = parameters$df, + scale = parameters$Sigma) + + } else { + + tf$contrib$distributions$WishartFull(df = parameters$df, + scale = parameters$Sigma) + + } + }, tf_log_density_function = function (x, parameters) { @@ -1156,6 +1109,117 @@ wishart_distribution <- R6Class ( ) ) +lkj_correlation_distribution <- R6Class ( + 'lkj_correlation_distribution', + inherit = distribution_node, + public = list( + + initialize = function (eta, dim = 2) { + + # check dim is a scalar integer greater than 1 + dim_old <- dim + dim <- as.integer(dim) + if (length(dim) > 1 || dim <= 1 || !is.finite(dim)) { + + stop ('dim must be a scalar integer greater than one, but was: ', + capture.output(dput(dim_old)), + call. = FALSE) + + } + + if (!is.greta_array(eta)) { + + if (!is.numeric(eta) || !length(eta) == 1 || eta <= 0) { + stop ("eta must be a positive scalar value, or a scalar greta array", + call. = FALSE) + } + + } + + # add the nodes as children and parameters + eta <- as.greta_array(eta) + + if (!is_scalar(eta)) { + + stop ("eta must be a scalar, but had dimensions: ", + capture.output(dput(dim(eta))), + call. = FALSE) + + } + + super$initialize('lkj_correlation', c(dim, dim)) + self$add_parameter(eta, 'eta') + + # make the initial value PD + self$value(unknowns(dims = c(dim, dim), data = diag(dim))) + + }, + + # default (cholesky factor, ignores truncation) + create_target = function (truncation) { + + # handle reshaping via a greta array + k <- self$dim[1] + free_greta_array <- vble(truncation = c(-Inf, Inf), + dim = k * (k - 1) / 2) + free_greta_array$constraint = "correlation_matrix" + + # first create a greta array for the cholesky + chol_greta_array <- flat_to_chol(free_greta_array, self$dim, correl = TRUE) + + # create symmetric matrix to return as target node + matrix_greta_array <- chol_to_symmetric(chol_greta_array) + target_node <- matrix_greta_array$node + + # assign the cholesky factor as a representation of it + target_node$representations$cholesky_factor <- chol_greta_array$node + + # return the symmetric node + target_node + + }, + + # if the target has a cholesky factor, use that + get_tf_target_node = function () { + + tf_target_node <- self$target$representations$cholesky_factor + + if (is.null(tf_target_node)) + tf_target_node <- self$target + + tf_target_node + + }, + + tf_distrib = function (parameters) { + + eta <- parameters$eta + + # if the cholesky factor exists, we'll be using that + is_cholesky <- !is.null(self$target$representations$cholesky_factor) + + log_prob = function (x) { + + if (!is_cholesky) + x <- tf$cholesky(x) + + diags <- tf$diag_part(x) + det <- tf$square(tf$reduce_prod(diags)) + prob <- det ^ (eta - fl(1)) + tf$log(prob) + + } + + list(log_prob = log_prob, cdf = NULL, log_cdf = NULL) + + }, + + # no CDF for multivariate distributions + tf_cdf_function = NULL, + tf_log_cdf_function = NULL + + ) +) # shorthand for distribution parameter constructors distrib <- function (distribution, ...) { @@ -1164,22 +1228,25 @@ distrib <- function (distribution, ...) { constructor <- get(paste0(distribution, '_distribution')) distrib <- constructor$new(...) - # return the value node as a greta array - value <- distrib$target + # return the user-facing representation of the node as a greta array + value <- distrib$user_node as.greta_array(value) } - # export constructors -#' @name greta-distributions -#' @title greta probability distributions +#' @name distributions +#' @title probability distributions #' @description These functions can be used to define random variables in a #' greta model. They return a variable greta array that follows the specified #' distribution. This variable greta array can be used to represent a #' parameter with prior distribution, or used with \code{\link{distribution}} -#' to define a distribution over an existing greta array. +#' to define a distribution over a data greta array. +#' +#' @param truncation a length-two vector giving values between which to truncate +#' the distribution, similarly to the \code{lower} and \code{upper} arguments +#' to \code{\link{variable}} #' #' @param min,max scalar values giving optional limits to \code{uniform} #' variables. Like \code{lower} and \code{upper}, these must be specified as @@ -1189,7 +1256,7 @@ distrib <- function (distribution, ...) { #' #' @param mean,meanlog,location,mu unconstrained parameters #' -#' @param sd,sdlog,sigma,lambda,shape,rate,df,scale,shape1,shape2,alpha,beta,df1,df2,a,b +#' @param sd,sdlog,sigma,lambda,shape,rate,df,scale,shape1,shape2,alpha,beta,df1,df2,a,b,eta #' positive parameters, \code{alpha} must be a vector for \code{dirichlet} and \code{dirichlet_multinomial}. #' #' @param size,m,n,k positive integer parameter @@ -1242,33 +1309,34 @@ distrib <- function (distribution, ...) { #' corresponds: #' #' \tabular{ll}{ greta \tab reference\cr -#' \code{uniform} \tab \code{\link[stats:dunif]{stats::dunif}}\cr -#' \code{normal} \tab \code{\link[stats:dnorm]{stats::dnorm}}\cr -#' \code{lognormal} \tab \code{\link[stats:dlnorm]{stats::dlnorm}}\cr -#' \code{bernoulli} \tab \code{\link[extraDistr:dbern]{extraDistr::dbern}}\cr -#' \code{binomial} \tab \code{\link[stats:dbinom]{stats::dbinom}}\cr -#' \code{beta_binomial} \tab \code{\link[extraDistr:dbbinom]{extraDistr::dbbinom}}\cr -#' \code{negative_binomial} \tab \code{\link[stats:dnbinom]{stats::dnbinom}}\cr -#' \code{hypergeometric} \tab \code{\link[stats:dhyper]{stats::dhyper}}\cr -#' \code{poisson} \tab \code{\link[stats:dpois]{stats::dpois}}\cr -#' \code{gamma} \tab \code{\link[stats:dgamma]{stats::dgamma}}\cr -#' \code{inverse_gamma} \tab \code{\link[extraDistr:dinvgamma]{extraDistr::dinvgamma}}\cr -#' \code{weibull} \tab \code{\link[stats:dweibull]{stats::dweibull}}\cr -#' \code{exponential} \tab \code{\link[stats:dexp]{stats::dexp}}\cr -#' \code{pareto} \tab \code{\link[extraDistr:dpareto]{extraDistr::dpareto}}\cr -#' \code{student} \tab \code{\link[extraDistr:dnst]{extraDistr::dnst}}\cr -#' \code{laplace} \tab \code{\link[extraDistr:dlaplace]{extraDistr::dlaplace}}\cr -#' \code{beta} \tab \code{\link[stats:dbeta]{stats::dbeta}}\cr -#' \code{cauchy} \tab \code{\link[stats:dcauchy]{stats::dcauchy}}\cr -#' \code{chi_squared} \tab \code{\link[stats:dchisq]{stats::dchisq}}\cr -#' \code{logistic} \tab \code{\link[stats:dlogis]{stats::dlogis}}\cr -#' \code{f} \tab \code{\link[stats:df]{stats::df}}\cr -#' \code{multivariate_normal} \tab \code{\link[mvtnorm:dmvnorm]{mvtnorm::dmvnorm}}\cr -#' \code{multinomial} \tab \code{\link[stats:dmultinom]{stats::dmultinom}}\cr -#' \code{categorical} \tab {\code{\link[stats:dmultinom]{stats::dmultinom}} (size = 1)}\cr -#' \code{dirichlet} \tab \code{\link[extraDistr:ddirichlet]{extraDistr::ddirichlet}}\cr -#' \code{dirichlet_multinomial} \tab \code{\link[extraDistr:ddirmnom]{extraDistr::ddirmnom}}\cr -#' \code{wishart} \tab \code{\link[MCMCpack:dwish]{MCMCpack::dwish}}\cr } +#' \code{uniform} \tab \link[stats:dunif]{stats::dunif}\cr +#' \code{normal} \tab \link[stats:dnorm]{stats::dnorm}\cr +#' \code{lognormal} \tab \link[stats:dlnorm]{stats::dlnorm}\cr +#' \code{bernoulli} \tab \link[extraDistr:dbern]{extraDistr::dbern}\cr +#' \code{binomial} \tab \link[stats:dbinom]{stats::dbinom}\cr +#' \code{beta_binomial} \tab \link[extraDistr:dbbinom]{extraDistr::dbbinom}\cr +#' \code{negative_binomial} \tab \link[stats:dnbinom]{stats::dnbinom}\cr +#' \code{hypergeometric} \tab \link[stats:dhyper]{stats::dhyper}\cr +#' \code{poisson} \tab \link[stats:dpois]{stats::dpois}\cr +#' \code{gamma} \tab \link[stats:dgamma]{stats::dgamma}\cr +#' \code{inverse_gamma} \tab \link[extraDistr:dinvgamma]{extraDistr::dinvgamma}\cr +#' \code{weibull} \tab \link[stats:dweibull]{stats::dweibull}\cr +#' \code{exponential} \tab \link[stats:dexp]{stats::dexp}\cr +#' \code{pareto} \tab \link[extraDistr:dpareto]{extraDistr::dpareto}\cr +#' \code{student} \tab \link[extraDistr:dnst]{extraDistr::dnst}\cr +#' \code{laplace} \tab \link[extraDistr:dlaplace]{extraDistr::dlaplace}\cr +#' \code{beta} \tab \link[stats:dbeta]{stats::dbeta}\cr +#' \code{cauchy} \tab \link[stats:dcauchy]{stats::dcauchy}\cr +#' \code{chi_squared} \tab \link[stats:dchisq]{stats::dchisq}\cr +#' \code{logistic} \tab \link[stats:dlogis]{stats::dlogis}\cr +#' \code{f} \tab \link[stats:df]{stats::df}\cr +#' \code{multivariate_normal} \tab \link[mvtnorm:dmvnorm]{mvtnorm::dmvnorm}\cr +#' \code{multinomial} \tab \link[stats:dmultinom]{stats::dmultinom}\cr +#' \code{categorical} \tab {\link[stats:dmultinom]{stats::dmultinom} (size = 1)}\cr +#' \code{dirichlet} \tab \link[extraDistr:ddirichlet]{extraDistr::ddirichlet}\cr +#' \code{dirichlet_multinomial} \tab \link[extraDistr:ddirmnom]{extraDistr::ddirmnom}\cr +#' \code{wishart} \tab \link[stats:rWishart]{stats::rWishart}\cr +#' \code{lkj_correlation} \tab \href{https://rdrr.io/github/rmcelreath/rethinking/man/dlkjcorr.html}{rethinking::dlkjcorr}\cr } #' #' @examples #' # a uniform parameter constrained to be between 0 and 1 @@ -1307,143 +1375,142 @@ distrib <- function (distribution, ...) { #' theta = wishart(df = 5, Sigma = Sig) NULL -#' @rdname greta-distributions +#' @rdname distributions #' @export -uniform <- function (min, max, dim = NULL) { - - if (is.greta_array(min) | is.greta_array(max)) - stop ('min and max must be fixed, they cannot be another greta array') - +uniform <- function (min, max, dim = NULL) distrib('uniform', min, max, dim) -} - -#' @rdname greta-distributions +#' @rdname distributions #' @export -normal <- function (mean, sd, dim = NULL) - distrib('normal', mean, sd, dim) +normal <- function (mean, sd, dim = NULL, truncation = c(-Inf, Inf)) + distrib('normal', mean, sd, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -lognormal <- function (meanlog, sdlog, dim = NULL) - distrib('lognormal', meanlog, sdlog, dim) +lognormal <- function (meanlog, sdlog, dim = NULL, truncation = c(0, Inf)) + distrib('lognormal', meanlog, sdlog, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export bernoulli <- function (prob, dim = NULL) distrib('bernoulli', prob, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export binomial <- function (size, prob, dim = NULL) distrib('binomial', size, prob, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export beta_binomial <- function (size, alpha, beta, dim = NULL) distrib('beta_binomial', size, alpha, beta, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export negative_binomial <- function (size, prob, dim = NULL) distrib('negative_binomial', size, prob, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export hypergeometric <- function (m, n, k, dim = NULL) distrib('hypergeometric', m, n, k, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export poisson <- function (lambda, dim = NULL) distrib('poisson', lambda, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export -gamma <- function (shape, rate, dim = NULL) - distrib('gamma', shape, rate, dim) +gamma <- function (shape, rate, dim = NULL, truncation = c(0, Inf)) + distrib('gamma', shape, rate, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -inverse_gamma <- function (alpha, beta, dim = NULL) - distrib('inverse_gamma', alpha, beta, dim) +inverse_gamma <- function (alpha, beta, dim = NULL, truncation = c(0, Inf)) + distrib('inverse_gamma', alpha, beta, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -weibull <- function (shape, scale, dim = NULL) - distrib('weibull', shape, scale, dim) +weibull <- function (shape, scale, dim = NULL, truncation = c(0, Inf)) + distrib('weibull', shape, scale, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -exponential <- function (rate, dim = NULL) - distrib('exponential', rate, dim) +exponential <- function (rate, dim = NULL, truncation = c(0, Inf)) + distrib('exponential', rate, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -pareto <- function (a, b, dim = NULL) - distrib('pareto', a, b, dim) +pareto <- function (a, b, dim = NULL, truncation = c(0, Inf)) + distrib('pareto', a, b, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -student <- function (df, mu, sigma, dim = NULL) - distrib('student', df, mu, sigma, dim) +student <- function (df, mu, sigma, dim = NULL, truncation = c(-Inf, Inf)) + distrib('student', df, mu, sigma, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -laplace <- function (mu, sigma, dim = NULL) - distrib('laplace', mu, sigma, dim) +laplace <- function (mu, sigma, dim = NULL, truncation = c(-Inf, Inf)) + distrib('laplace', mu, sigma, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -beta <- function (shape1, shape2, dim = NULL) - distrib('beta', shape1, shape2, dim) +beta <- function (shape1, shape2, dim = NULL, truncation = c(0, 1)) + distrib('beta', shape1, shape2, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -cauchy <- function (location, scale, dim = NULL) - distrib('cauchy', location, scale, dim) +cauchy <- function (location, scale, dim = NULL, truncation = c(-Inf, Inf)) + distrib('cauchy', location, scale, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -chi_squared <- function (df, dim = NULL) - distrib('chi_squared', df, dim) +chi_squared <- function (df, dim = NULL, truncation = c(0, Inf)) + distrib('chi_squared', df, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -logistic <- function (location, scale, dim = NULL) - distrib('logistic', location, scale, dim) +logistic <- function (location, scale, dim = NULL, truncation = c(-Inf, Inf)) + distrib('logistic', location, scale, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export -f <- function (df1, df2, dim = NULL) - distrib('f', df1, df2, dim) +f <- function (df1, df2, dim = NULL, truncation = c(0, Inf)) + distrib('f', df1, df2, dim, truncation) -#' @rdname greta-distributions +#' @rdname distributions #' @export multivariate_normal <- function (mean, Sigma, dim = 1) distrib('multivariate_normal', mean, Sigma, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export wishart <- function (df, Sigma) distrib('wishart', df, Sigma) -#' @rdname greta-distributions +#' @rdname distributions +#' @export +lkj_correlation <- function (eta, dim = 2) + distrib('lkj_correlation', eta, dim) + +#' @rdname distributions #' @export multinomial <- function (size, prob, dim = 1) distrib('multinomial', size, prob, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export categorical <- function (prob, dim = 1) distrib('categorical', prob, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export dirichlet <- function (alpha, dim = 1) distrib('dirichlet', alpha, dim) -#' @rdname greta-distributions +#' @rdname distributions #' @export dirichlet_multinomial <- function (size, alpha, dim = 1) distrib('dirichlet_multinomial', size, alpha, dim) diff --git a/R/progress_bar.R b/R/progress_bar.R index 266c1400..cbb1e889 100644 --- a/R/progress_bar.R +++ b/R/progress_bar.R @@ -6,7 +6,8 @@ # 'phase' must be either 'warmup' or 'sampling' # 'iter' must be a length-two vector giving the total warmup and sampling # iterations respectively -create_progress_bar <- function (phase, iter, ...) { +# 'pb_update' gives the number of iterations between updates of the progress bar +create_progress_bar <- function (phase, iter, pb_update, ...) { # name for formatting name <- switch(phase, @@ -28,13 +29,23 @@ create_progress_bar <- function (phase, iter, ...) { name, count_pad) + pb <- progress::progress_bar$new(format = format_text, + total = iter_this, + incomplete = ' ', + clear = FALSE, + show_after = 0, + ...) + + # add the increment information and return + pb_update <- round(pb_update) + + if (!is.numeric(pb_update) || length(pb_update) != 1 || !is.finite(pb_update) || pb_update <= 0) + stop ("pb_update must be a finite, positive, scalar integer") + + assign("pb_update", pb_update, envir = pb$.__enclos_env__) + + pb - progress::progress_bar$new(format = format_text, - total = iter_this, - incomplete = ' ', - clear = FALSE, - show_after = 0, - ...) } @@ -42,29 +53,37 @@ create_progress_bar <- function (phase, iter, ...) { # to numerical instability # 'pb' is a progress_bar R6 object created by create_progress_bar # 'it' is the current iteration -# 'rejects' is the total number of rejections so far due o numerical instability +# 'rejects' is the total number of rejections so far due to numerical instability iterate_progress_bar <- function (pb, it, rejects) { - if (rejects > 0) { - reject_perc <- 100 * rejects / it - if (reject_perc < 1) { - reject_perc_string <- '<1' + increment <- pb$.__enclos_env__$pb_update + + if (it %% increment == 0) { + + if (rejects > 0) { + reject_perc <- 100 * rejects / it + if (reject_perc < 1) { + reject_perc_string <- '<1' + } else { + reject_perc_string <- prettyNum(round(reject_perc)) + } + # pad the end of the line to keep the update bar a consistent width + pad_char <- pmax(0, 2 - nchar(reject_perc_string)) + pad <- paste0(rep(' ', pad_char), collapse = '') + + reject_text <- paste0('| ', reject_perc_string, '% bad', pad) } else { - reject_perc_string <- prettyNum(round(reject_perc)) + reject_text <- ' ' } - # pad the end of the line to keep the update bar a consistent width - pad_char <- pmax(0, 2 - nchar(reject_perc_string)) - pad <- paste0(rep(' ', pad_char), collapse = '') - reject_text <- paste0('| ', reject_perc_string, '% bad', pad) - } else { - reject_text <- ' ' - } + total <- pb$.__enclos_env__$private$total + iter_pretty <- prettyNum(it, width = nchar(total)) - total <- pb$.__enclos_env__$private$total - iter_pretty <- prettyNum(it, width = nchar(total)) + amount <- ifelse(it > 0, increment, 0) + invisible(pb$tick(amount, + tokens = list(iter = iter_pretty, + rejection = reject_text))) - invisible(pb$tick(tokens = list(iter = iter_pretty, - rejection = reject_text))) + } } diff --git a/R/samplers.R b/R/samplers.R index cecacf42..4aaa56b4 100644 --- a/R/samplers.R +++ b/R/samplers.R @@ -1,250 +1,3 @@ -#' @name greta-inference -#' @title Statistical Inference on Greta Models -#' @description Carry out statistical inference on parameters of interest by -#' MCMC. \code{mcmc} runs the MCMC sampler. If the sampler is aborted before -#' finishing, the samples collected so far can be retrieved with -#' \code{stashed_samples()}. -NULL - -#' @rdname greta-inference -#' @export -#' @importFrom stats rnorm runif -#' @importFrom utils setTxtProgressBar txtProgressBar -#' -#' @param model greta_model object -#' @param method the method used to sample values. Currently only \code{hmc} is -#' implemented -#' @param n_samples the number of samples to draw (after any warm-up, but before -#' thinning) -#' @param thin the thinning rate; every \code{thin} samples is retained, the -#' rest are discarded -#' @param warmup the number of samples to spend warming up the sampler. During -#' this phase the sampler moves toward the highest density area and may tune -#' sampler hyperparameters. -#' @param verbose whether to print progress information to the console -#' @param control an optional named list of hyperparameters and options to -#' control behaviour of the sampler. See Details. -#' @param initial_values an optional named vector of initial values for the free -#' parameters in the model -#' - -#' @details If \code{verbose = TRUE}, the progress bar shows the number of -#' iterations so far and the expected time to complete the phase of model -#' fitting (warmup or sampling). Occasionally, a proposed set of parameters -#' can cause numerical instability (I.e. the log density or its gradient is -#' \code{NA}, \code{Inf} or \code{-Inf}); normally because the log joint -#' density is so low that it can't be represented as a floating point number. -#' When this happens, the progress bar will also display the proportion of -#' samples so far that were 'bad' (numerically unstable) and therefore -#' rejected. -#' If you're getting a lot of numerical instability, you might want to -#' manually define starting values to move the sampler into a more reasonable -#' part of the parameter space. Alternatively, you could redefine the model -#' (via \code{model}) to have double precision, though this will slow down -#' sampling. -#' -#' Currently, the only implemented MCMC procedure is static Hamiltonian -#' Monte Carlo (\code{method = 'hmc'}). During the warmup iterations, the -#' leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the -#' sampler efficiency. The \code{control} argument can be used to specify the -#' initial value for epsilon, along with two other hyperparameters: \code{Lmin} -#' and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the -#' upper and lower limits to the number of leapfrog steps per iteration (from -#' which the number is selected uniformly at random). -#' -#' The default control options for HMC are: -#' \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)} -#' -#' @return an \code{mcmc.list} object that can be analysed using -#' functions from the coda package. This will contain mcmc samples of the -#' greta arrays used to create \code{model}. -#' -#' @examples -#' \dontrun{ -#' -#' # define a simple model -#' mu = variable() -#' sigma = lognormal(1, 0.1) -#' x = rnorm(10) -#' distribution(x) = normal(mu, sigma) -#' m <- model(mu, sigma) -#' -#' # carry out mcmc on the model -#' draws <- mcmc(m, -#' n_samples = 100, -#' warmup = 10) -#' } -mcmc <- function (model, - method = c('hmc'), - n_samples = 1000, - thin = 1, - warmup = 100, - verbose = TRUE, - control = list(), - initial_values = NULL) { - - method <- match.arg(method) - - # find variable names to label samples - target_greta_arrays <- model$target_greta_arrays - names <- names(target_greta_arrays) - - # check they're not data nodes, provide a useful error message if they are - are_data <- vapply(target_greta_arrays, - function (x) inherits(x$node, 'data_node'), - FUN.VALUE = FALSE) - - if (any(are_data)) { - is_are <- ifelse(sum(are_data) == 1, 'is a data greta array', 'are data greta arrays') - bad_greta_arrays <- paste(names[are_data], collapse = ', ') - msg <- sprintf('%s %s, data greta arrays cannot be sampled', - bad_greta_arrays, - is_are) - stop (msg, call. = FALSE) - } - - # get the dag containing the target nodes - dag <- model$dag - - # random starting locations - if (is.null(initial_values)) { - - # try several times - valid <- FALSE - attempts <- 1 - while (!valid & attempts < 10) { - - initial_values <- dag$example_parameters() - # increase the jitter each time - initial_values[] <- rnorm(length(initial_values), 0, 1 + attempts / 5) - - # test validity of values - valid <- valid_parameters(dag, initial_values) - attempts <- attempts + 1 - - } - - if (!valid) { - stop ('Could not find reasonable starting values after ', attempts, - ' attempts. Please specify initial values manually via the ', - 'initial_values argument to mcmc', - call. = FALSE) - } - - } else { - - if (!valid_parameters(dag, initial_values)) { - stop ('The log density and gradients could not be evaluated at these ', - 'initial values.', - call. = FALSE) - } - - } - - - # get default control options - con <- switch(method, - hmc = list(Lmin = 10, - Lmax = 20, - epsilon = 0.005)) - - # update them with user overrides - con[names(control)] <- control - - # fetch the algorithm - method <- switch(method, - hmc = hmc) - - # if warmup is required, do that now and update init - if (warmup > 0) { - - if (verbose) - pb_warmup <- create_progress_bar('warmup', c(warmup, n_samples)) - else - pb_warmup <- NULL - - # run it - warmup_draws <- method(dag = dag, - init = initial_values, - n_samples = warmup, - thin = thin, - verbose = verbose, - pb = pb_warmup, - tune = TRUE, - stash = FALSE, - control = con) - - # use the last draw of the full parameter vector as the init - initial_values <- attr(warmup_draws, 'last_x') - con <- attr(warmup_draws, 'control') - - } - - if (verbose) - pb_sampling <- create_progress_bar('sampling', c(warmup, n_samples)) - else - pb_sampling <- NULL - - # run the sampler - draws <- method(dag = dag, - init = initial_values, - n_samples = n_samples, - thin = thin, - verbose = verbose, - pb = pb_sampling, - tune = FALSE, - stash = TRUE, - control = con) - - # if this was successful, trash the stash, prepare and return the draws - rm('trace_stash', envir = greta_stash) - prepare_draws(draws) - -} - - -#' @importFrom coda mcmc mcmc.list -prepare_draws <- function (draws) { - # given a matrix of draws returned by the sampler, prepare it and return - draws_df <- data.frame(draws) - draws_mcmc <- coda::mcmc(draws_df) - coda::mcmc.list(draws_mcmc) -} - -#' @rdname greta-inference -#' @export -#' @importFrom stats na.omit -#' -#' @details \code{stashed_samples()} returns an \code{mcmc.list} object if (and -#' only if) the sampler was aborted during the sampling phase. -stashed_samples <- function () { - - stashed <- exists('trace_stash', envir = greta_stash) - - if (stashed) { - - # get them, remove the NAs, and return - draws <- greta_stash$trace_stash - draws_clean <- na.omit(draws) - draws_prepped <- prepare_draws(draws_clean) - - return (draws_prepped) - - } else { - - return (invisible()) - - } - -} - -# create an object stash in greta's namespace, to return traces to the user when -# they abort a run -greta_stash <- new.env() - -stash_trace <- function (trace) - assign('trace_stash', trace, envir = greta_stash) - hmc <- function (dag, init, n_samples, @@ -270,6 +23,10 @@ hmc <- function (dag, numerical_rejections <- 0 + # start the progress bar + if (verbose) + iterate_progress_bar(pb = pb, it = 0, rejects = 0) + # set initial location, log joint density and gradients x <- init dag$send_parameters(x) @@ -292,9 +49,6 @@ hmc <- function (dag, if (stash) on.exit(stash_trace(trace)) - # set up log joint density store - ljd <- rep(NA, n_samples) - # track acceptance accept_trace <- rep(0, n_samples) @@ -310,20 +64,20 @@ hmc <- function (dag, x_old <- x logprob_old <- logprob grad_old <- grad - p_old <- rnorm(npar) + p <- p_old <- rnorm(npar) # start leapfrog steps reject <- FALSE - p <- p_old + 0.5 * epsilon * grad + # p <- p_old + 0.5 * epsilon * grad n_steps <- base::sample(Lmin:Lmax, 1) for (l in seq_len(n_steps)) { # step + p <- p + 0.5 * epsilon * grad x <- x + epsilon * p # send parameters dag$send_parameters(x) - logprob <- dag$log_density() grad <- dag$gradients() # check gradients are finite @@ -332,12 +86,10 @@ hmc <- function (dag, break() } - p <- p + epsilon * grad + p <- p + 0.5 * epsilon * grad } - p <- p - 0.5 * epsilon * grad - # if the step was bad, reject it out of hand if (reject) { @@ -351,11 +103,12 @@ hmc <- function (dag, # otherwise do the Metropolis accept/reject step # inner products - p_prod <- (t(p) %*% p)[1, 1] - p_prod_old <- (t(p_old) %*% p_old)[1, 1] + p_prod <- 0.5 * sum(p ^ 2) + p_prod_old <- 0.5 * sum(p_old ^ 2) # acceptance ratio - log_accept_ratio = logprob - 0.5 * p_prod - logprob_old + 0.5 * p_prod_old + logprob <- dag$log_density() + log_accept_ratio = logprob - p_prod - logprob_old + p_prod_old log_u = log(runif(1)) if (log_u < log_accept_ratio) { @@ -382,7 +135,6 @@ hmc <- function (dag, if (i %% thin == 0) { dag$send_parameters(x) trace[i / thin, ] <- dag$trace_values() - ljd[i / thin] <- dag$log_density() } if (verbose) @@ -417,7 +169,6 @@ hmc <- function (dag, control$epsilon <- mean(epsilon_trace[start:end], na.rm = TRUE) } - attr(trace, 'density') <- -ljd attr(trace, 'last_x') <- x attr(trace, 'control') <- control trace diff --git a/R/structures.R b/R/structures.R index 147b8da4..9917e133 100644 --- a/R/structures.R +++ b/R/structures.R @@ -1,5 +1,5 @@ -#' @name greta-structures -#' @title greta Data Structures +#' @name structures +#' @title create data greta arrays #' @description These structures can be used to set up more complex models. For #' example, scalar parameters can be embedded in a greta array by first #' creating a greta array with \code{zeros()} or \code{ones()}, and then @@ -18,7 +18,7 @@ NULL #' @export -#' @rdname greta-structures +#' @rdname structures #' @examples #' # a 3 row, 4 column greta array of 0s #' z <- zeros(3, 4) @@ -27,7 +27,7 @@ zeros <- function (...) as.greta_array(array(data = 0, dim = c(...))) #' @export -#' @rdname greta-structures +#' @rdname structures #' @examples #' # a 3x3x3 greta array of 1s #' z <- ones(3, 3, 3) @@ -36,7 +36,7 @@ ones <- function (...) as.greta_array(array(data = 1, dim = c(...))) #' @export -#' @rdname greta-structures +#' @rdname structures #' @examples #' # a 2x4 greta array filled with pi #' z <- greta_array(pi, dim = c(2, 4)) diff --git a/R/transformations.R b/R/transforms.R similarity index 84% rename from R/transformations.R rename to R/transforms.R index 2169a27c..a5131cc6 100644 --- a/R/transformations.R +++ b/R/transforms.R @@ -1,10 +1,9 @@ -#' @name greta-transforms -#' @aliases greta-inverse-links -#' @title Transformation Functions for Greta Arrays +#' @name transforms +#' @aliases inverse-links +#' @title transformation functions for greta arrays #' -#' @description transformations for greta arrays, which may also be -#' used as inverse link functions. Also see \link{greta-operators} and -#' \link{greta-functions}. +#' @description transformations for greta arrays, which may also be used as +#' inverse link functions. Also see \link{operators} and \link{functions}. #' #' @param x a real-valued (i.e. values ranging from -Inf to Inf) greta array to #' transform to a constrained value @@ -51,31 +50,31 @@ tf_icauchit <- function (x) tf_log1pe <- function (x) tf$nn$softplus(x) -#' @rdname greta-transforms +#' @rdname transforms #' @export iprobit <- function (x) op('iprobit', x, tf_operation = 'tf_iprobit') -#' @rdname greta-transforms +#' @rdname transforms #' @export ilogit <- function (x) op('ilogit', x, tf_operation = 'tf_ilogit') -#' @rdname greta-transforms +#' @rdname transforms #' @export icloglog <- function (x) op('icloglog', x, tf_operation = 'tf_icloglog') -#' @rdname greta-transforms +#' @rdname transforms #' @export icauchit <- function (x) op('icauchit', x, tf_operation = 'tf_icauchit') -#' @rdname greta-transforms +#' @rdname transforms #' @export log1pe <- function (x) op('log1pe', x, diff --git a/R/utils.R b/R/utils.R index 990a3e6e..48aaa7d9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -190,33 +190,50 @@ all_greta_arrays <- function (env = parent.frame(), } -# check the version of tensorflow is valid. error, warn, or message if not and -# (if not an error) return an invisible logical saying whether it is valid -check_tf_version <- function (alert = c('error', 'warn', 'message')) { +# check tensorflow is installed and the version of tensorflow is valid. error, +# warn, or message if not and (if not an error) return an invisible logical +# saying whether it is valid +check_tf_version <- function (alert = c('none', 'error', 'warn', 'message', 'startup')) { alert <- match.arg(alert) + text <- NULL + available <- TRUE - tf_version <- tf$`__version__` - tf_version_split <- strsplit(tf_version, '.', fixed = TRUE)[[1]] - tf_version_valid <- as.numeric(tf_version_split[1]) >= 1 + if (!reticulate::py_module_available('tensorflow')) { - if (!tf_version_valid) { + text <- "TensorFlow isn't installed." + available <- FALSE - text <- paste0('\n\n greta requires TensorFlow version 1.0.0 or higher, ', - 'but you have version ', tf_version, '\n ', - 'You can write models, but not sample from them.\n ', - 'See https://www.tensorflow.org/install for installation ', - 'instructions.\n\n') + } else { + + tf_version <- tf$`__version__` + tf_version_split <- strsplit(tf_version, '.', fixed = TRUE)[[1]] + tf_version_valid <- as.numeric(tf_version_split[1]) >= 1 + + if (!tf_version_valid) { + + text <- paste0("you have version ", tf_version) + available <- FALSE + + } + + } + + if (!is.null(text)) { + text <- paste0("\n\n greta requires TensorFlow version 1.0.0 or higher, ", + "but ", text, "\n ", + "Use install_tensorflow() to install the latest version.", + "\n\n") switch(alert, error = stop (text, call. = FALSE), warn = warning (text, call. = FALSE), - message = message(text)) - + message = message(text), + startup = packageStartupMessage(text), + none = NULL) } - # if not an error, return a logical on whether it was valid - invisible(tf_version_valid) + invisible(available) } @@ -231,31 +248,115 @@ tf_lbeta <- function (a, b) # given a flat tensor, convert it into a square symmetric matrix by considering # it as the non-zero elements of the lower-triangular decomposition of the # square matrix -tf_flat_to_symmetric = function (x, dims) { +tf_flat_to_chol = function (x, dims) { - # create a dummy array to find the indices + # indices to the cholesky factor L_dummy <- dummy(dims) - indices <- sort(L_dummy[upper.tri(L_dummy, diag = TRUE)]) + indices_diag <- diag(L_dummy) + indices_offdiag <- sort(L_dummy[upper.tri(L_dummy, diag = FALSE)]) + + # indices to the free state + x_index_diag <- seq_along(indices_diag) - 1 + x_index_offdiag <- length(indices_diag) + seq_along(indices_offdiag) - 1 # create an empty vector to fill with the values - values <- tf$zeros(shape(prod(dims), 1), dtype = tf_float()) - values <- tf_recombine(values, indices, x) + values_0 <- tf$zeros(shape(prod(dims), 1), dtype = tf_float()) + values_0_diag <- tf_recombine(values_0, indices_diag, tf$exp(x[x_index_diag])) + values_z <- tf_recombine(values_0_diag, indices_offdiag, x[x_index_offdiag]) + + # reshape into lower triangular and return + tf$reshape(values_z, shape(dims[1], dims[2])) + +} + +# convert an unconstrained vector into symmetric correlation matrix +tf_flat_to_chol_correl = function (x, dims) { + + # to -1, 1 scale + y <- tf$tanh(x) + + k <- dims[1] + + # list of indices mapping relevant part of each row to an element of y + y_index_list <- list() + count <- 0 + for (i in 1:(k - 1)) { + nelem <- k - i + y_index_list[[i]] <- count + seq_len(nelem) - 1 + count <- count + nelem + } + + # dummy list to store transformed versions of rows + values_list <- y_index_list + values_list[[1]] <- tf$reshape(y[y_index_list[[1]]], shape(k - 1)) + sum_sqs <- tf$square(values_list[[1]]) + + if (k > 2) { + + for (i in 2:(k - 1)) { + # relevant columns (0-indexed) + idx <- i:(k - 1) - 1 + # components of z on this row (straight from y) + z <- tf$reshape(y[y_index_list[[i]]], shape(k - i)) + # assign to w, using relevant parts of the sum of squares + values_list[[i]] <- z * tf$sqrt(fl(1) - sum_sqs[idx]) + # increment sum of squares + sum_sqs_part <- tf$square(values_list[[i]]) + sum_sqs[idx] + sum_sqs <- tf_recombine(sum_sqs, idx, sum_sqs_part) + } + + } + + # dummy array to find the indices + L_dummy <- dummy(dims) + indices_diag <- diag(L_dummy) + indices_offdiag <- sort(L_dummy[upper.tri(L_dummy, diag = FALSE)]) + + # diagonal & off-diagonal elements + values_diag <- tf$concat(list(tf$ones(1L, dtype = tf_float()), + sqrt(fl(1) - sum_sqs)), 0L) + values_offdiag <- tf$concat(values_list, 0L) + + # plug elements into a vector of 0s + values_0 <- tf$zeros(shape(prod(dims), 1), dtype = tf_float()) + values_0_diag <- tf_recombine(values_0, indices_diag, values_diag) + values_z <- tf_recombine(values_0_diag, indices_offdiag, values_offdiag) + + # reshape into cholesky and return + tf$reshape(values_z, shape(dims[1], dims[2])) - # reshape into lower triangular, then symmetric matrix - L <- tf$reshape(values, shape(dims[1], dims[2])) - tf$matmul(tf$transpose(L), L) } -flat_to_symmetric <- function (x, dim) { +tf_chol_to_symmetric <- function (U) + tf$matmul(tf$transpose(U), U) + +flat_to_chol <- function (x, dim, correl = FALSE) { dimfun <- function (elem_list) dim + fun <- ifelse(correl, + "tf_flat_to_chol_correl", + "tf_flat_to_chol") + # sum the elements - op('flat_to_symmetric', + op('flat_to_chol', x, operation_args = list(dims = dim), - tf_operation = 'tf_flat_to_symmetric', + tf_operation = fun, + dimfun = dimfun) + +} + +chol_to_symmetric <- function (L) { + + dimfun <- function (elem_list) + dim(elem_list[[1]]) + + # sum the elements + op('chol_to_symmetric', + L, + tf_operation = 'tf_chol_to_symmetric', dimfun = dimfun) } @@ -312,3 +413,47 @@ tf_int <- function () # cast a scalar as a float or integer of the correct type in TF code fl <- function(x) tf$constant(x, dtype = tf_float()) + +# evaluate expressions (dag density or gradient), capturing numerical errors +# like matrix inversions as bad samples, and erroring otherwise +cleanly <- function (expr) { + + res <- tryCatch(expr, error = function(e) e) + + # if it errored + if (inherits(res, 'error')) { + + numerical_messages <- c("is not invertible", + "Cholesky decomposition was not successful") + + numerical_errors <- vapply(numerical_messages, + grepl, + res$message, + FUN.VALUE = 0) == 1 + + # if it was just a numerical error, quietly return a bad value + if (any(numerical_errors)) + res <- NA + else + stop("greta hit a tensorflow error:\n\n", res, call. = FALSE) + + } + + res + +} + +# check truncation for different distributions +check_positive <- function (truncation) { + if (truncation[1] < 0) { + stop ("lower bound must be 0 or higher", + call. = FALSE) + } +} + +check_unit <- function (truncation) { + if (truncation[1] < 0 | truncation[2] > 1) { + stop ("lower and upper bounds must be between 0 and 1", + call. = FALSE) + } +} diff --git a/R/variable.R b/R/variable.R index 94bfc5c0..b5a5d824 100644 --- a/R/variable.R +++ b/R/variable.R @@ -1,14 +1,11 @@ #' @rdname variable #' @export -#' @title greta variables +#' @title create greta variables #' @description \code{variable()} creates greta arrays representing unknown -#' parameters, to be lerned during model fitting. These parameters are not +#' parameters, to be learned during model fitting. These parameters are not #' associated with a probability distribution. To create a variable greta #' array following a specific probability distribution, see -#' \code{\link{greta-distributions}}. -#' -#' \code{free()} is an alias for \code{variable()}. It is deprecated and -#' will be removed in version 0.2 +#' \code{\link{distributions}}. #' #' @param lower,upper scalar values giving optional limits to variables. These #' must be specified as numerics, they cannot be greta arrays (though see @@ -50,7 +47,3 @@ variable <- function (lower = -Inf, upper = Inf, dim = 1) { as.greta_array(node) } - -#' @rdname variable -#' @export -free <- variable diff --git a/README.Rmd b/README.Rmd index 6e4e6537..ce0b9408 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,13 +16,13 @@ knitr::include_graphics('README_files/top_banner.png') ### greta is an R package for writing statistical models and fitting them by MCMC. -greta let you write your own model like in BUGS, JAGS and Stan, except that you write models right in R, it scales well to massive datasets, and it's easy to extend and build on. +greta lets you write your own model like in BUGS, JAGS and Stan, except that you write models right in R, it scales well to massive datasets, and it's easy to extend and build on. -### See [the website](https://goldingn.github.io/greta/) for more information, [tutorials](https://goldingn.github.io/greta/getting_started.html), [examples](https://goldingn.github.io/greta/example_models.html), and [package documentation](https://goldingn.github.io/greta/reference-index.html). +### See the [website](https://goldingn.github.io/greta/) for more information, [tutorials](https://goldingn.github.io/greta/get_started.html), [examples](https://goldingn.github.io/greta/example_models.html), and [package documentation](https://goldingn.github.io/greta/reference-index.html). -You can install the package from GitHub: +You can install the package from CRAN: ```r -devtools::install_github('goldingn/greta') +install.packages("greta") ``` I would love to hear any feedback, bug reports or feature requests via the [issues tracker](https://github.com/goldingn/greta/issues). I would also be very keen for contributions from anyone with time to spare! diff --git a/README.md b/README.md index 7c89fb56..8278798e 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,14 @@ ### greta is an R package for writing statistical models and fitting them by MCMC. -greta let you write your own model like in BUGS, JAGS and Stan, except that you write models right in R, it scales well to massive datasets, and it's easy to extend and build on. +greta lets you write your own model like in BUGS, JAGS and Stan, except that you write models right in R, it scales well to massive datasets, and it's easy to extend and build on. -### See [the website](https://goldingn.github.io/greta/) for more information, [tutorials](https://goldingn.github.io/greta/getting_started.html), [examples](https://goldingn.github.io/greta/example_models.html), and [package documentation](https://goldingn.github.io/greta/reference-index.html). +### See the [website](https://goldingn.github.io/greta/) for more information, [tutorials](https://goldingn.github.io/greta/get_started.html), [examples](https://goldingn.github.io/greta/example_models.html), and [package documentation](https://goldingn.github.io/greta/reference-index.html). -You can install the package from GitHub: +You can install the package from CRAN: ``` r -devtools::install_github('goldingn/greta') +install.packages("greta") ``` I would love to hear any feedback, bug reports or feature requests via the [issues tracker](https://github.com/goldingn/greta/issues). I would also be very keen for contributions from anyone with time to spare! diff --git a/docs/as_data.html b/docs/as_data.html index 42f2c20f..4d1488f3 100644 --- a/docs/as_data.html +++ b/docs/as_data.html @@ -11,7 +11,7 @@ -Declare R Objects as Data +convert other objects to greta arrays @@ -335,7 +335,7 @@ -

Declare R Objects as Data

+

convert other objects to greta arrays

diff --git a/docs/build_website.R b/docs/build_website.R index ca860186..6d385b42 100644 --- a/docs/build_website.R +++ b/docs/build_website.R @@ -4,11 +4,13 @@ write_topic <- function (data) { rmd <- whisker::whisker.render(topic_template, data) cat(rmd, file = paste0("docs/", data$name, ".Rmd")) } + write_index <- function (data) { index_template <- readLines('docs/_index_template.txt') rmd <- whisker::whisker.render(index_template, data) cat(rmd, file = "docs/reference-index.Rmd") } + # codeface and split argument names in a topic split_args <- function (topic) { for (i in seq_along(topic$arguments)) { @@ -103,16 +105,16 @@ data_index <- pkgdown:::data_reference_index(pkg) # name the sections and their members, by path sections <- list(list(title = "creating greta arrays", desc = "Create greta arrays representing observed data or fixed values", - members = c("as_data.html", "greta-structures.html")), + members = c("structures.html", "as_data.html")), list(title = "variables & distributions", desc = "Create variables and assign probability distributions over greta arrays", - members = c("variable.html", "distribution.html", "greta-distributions.html")), + members = c("variable.html", "distributions.html", "distribution.html")), list(title = "manipulating greta arrays", desc = "Functions and operations for modifying greta arrays", - members = c("greta-operators.html", "greta-functions.html", "extract-replace-combine.html", "greta-transforms.html")), + members = c("operators.html", "functions.html", "extract-replace-combine.html", "transforms.html")), list(title = "modelling", desc = "Define and visualise models and fit them to data", - members = c("greta-model.html", "greta-inference.html")), + members = c("model.html", "inference.html")), list(title = "modules", desc = "Collections of methods for doing more specialist analyses", members = c("dynamics-module.html"))) diff --git a/docs/distribution.html b/docs/distribution.html index f864057d..c9a4bf79 100644 --- a/docs/distribution.html +++ b/docs/distribution.html @@ -11,7 +11,7 @@ -Define a Distribution Over a greta Array +define a distribution over data @@ -335,7 +335,7 @@ -

Define a Distribution Over a greta Array

+

define a distribution over data

@@ -344,35 +344,30 @@

Description

-distribution links observed data, variables, and other greta arrays to probability distributions. For example a model likelhood can be set by using distribution on some observed data. likelihood is an alias for distribution. It is deprecated and will be removed in version 0.2. +distribution defines probability distributions over observed data, e.g. to set a model likelihood.

Usage

distribution(greta_array) <- value
 
-distribution(greta_array)
-
-likelihood(greta_array) <- value 
+distribution(greta_array)

Arguments

greta_array

-a greta array. For the assignment method it must be a greta array that doesn’t already have a probability distribution. +a data greta array. For the assignment method it must not already have a probability distribution assigned

value

-a greta array with a distribution (see greta-distributions) +a greta array with a distribution (see distributions)

Details

-The extract method returns the greta array if it has a distribution, or NULL if it doesn’t. It has now real function, but is included for completeness -

-

-distribution can also be used to create truncated distributions, by first defining a greta array with constraints (the truncation) and then defining the distribution on that greta array. See below for an example. +The extract method returns the greta array if it has a distribution, or NULL if it doesn’t. It has no real use-case, but is included for completeness

Examples @@ -383,16 +378,14 @@

# observed data and mean parameter to be estimated # (explicitly coerce data to a greta array so we can refer to it later) y = as_data(rnorm(5, 0, 3)) -mu = variable() + +mu = uniform(-3, 3) + # define the distribution over y (the model likelihood) -distribution(y) = normal(mu, 1) +distribution(y) = normal(mu, 1) # get the distribution over y -distribution(y) - -# define a truncated-positive standard normal random variable -tn = variable(lower = 0) -distribution(tn) = normal(0, 1) +distribution(y) diff --git a/docs/distributions.html b/docs/distributions.html index 5dd389fc..fb1b4252 100644 --- a/docs/distributions.html +++ b/docs/distributions.html @@ -476,7 +476,7 @@

uniform -stats::dunif +stats::dunif @@ -484,7 +484,7 @@

normal -stats::dnorm +stats::dnorm @@ -492,7 +492,7 @@

lognormal -stats::dlnorm +stats::dlnorm @@ -500,7 +500,7 @@

bernoulli -extraDistr::dbern +extraDistr::dbern @@ -508,7 +508,7 @@

binomial -stats::dbinom +stats::dbinom @@ -516,7 +516,7 @@

beta_binomial -extraDistr::dbbinom +extraDistr::dbbinom @@ -524,7 +524,7 @@

negative_binomial -stats::dnbinom +stats::dnbinom @@ -532,7 +532,7 @@

hypergeometric -stats::dhyper +stats::dhyper @@ -540,7 +540,7 @@

poisson -stats::dpois +stats::dpois @@ -548,7 +548,7 @@

gamma -stats::dgamma +stats::dgamma @@ -556,7 +556,7 @@

inverse_gamma -extraDistr::dinvgamma +extraDistr::dinvgamma @@ -564,7 +564,7 @@

weibull -stats::dweibull +stats::dweibull @@ -572,7 +572,7 @@

exponential -stats::dexp +stats::dexp @@ -580,7 +580,7 @@

pareto -extraDistr::dpareto +extraDistr::dpareto @@ -588,7 +588,7 @@

student -extraDistr::dnst +extraDistr::dnst @@ -596,7 +596,7 @@

laplace -extraDistr::dlaplace +extraDistr::dlaplace @@ -604,7 +604,7 @@

beta -stats::dbeta +stats::dbeta @@ -612,7 +612,7 @@

cauchy -stats::dcauchy +stats::dcauchy @@ -620,7 +620,7 @@

chi_squared -stats::dchisq +stats::dchisq @@ -628,7 +628,7 @@

logistic -stats::dlogis +stats::dlogis @@ -636,7 +636,7 @@

f -stats::df +stats::df @@ -644,7 +644,7 @@

multivariate_normal -mvtnorm::dmvnorm +mvtnorm::dmvnorm @@ -652,7 +652,7 @@

multinomial -stats::dmultinom +stats::dmultinom @@ -660,7 +660,7 @@

categorical -stats::dmultinom (size = 1) +stats::dmultinom (size = 1) @@ -668,7 +668,7 @@

dirichlet -extraDistr::ddirichlet +extraDistr::ddirichlet @@ -676,7 +676,7 @@

dirichlet_multinomial -extraDistr::ddirmnom +extraDistr::ddirmnom @@ -684,7 +684,7 @@

wishart -stats::rWishart +stats::rWishart @@ -692,7 +692,7 @@

lkj_correlation -rethinking::dlkjcorr +rethinking::dlkjcorr diff --git a/docs/dynamics-module.html b/docs/dynamics-module.html index ea54ddd6..ed89501a 100644 --- a/docs/dynamics-module.html +++ b/docs/dynamics-module.html @@ -11,7 +11,7 @@ -Functions for modelling dynamical systems +methods for modelling structured dynamical systems @@ -335,7 +335,7 @@ -

Functions for modelling dynamical systems

+

methods for modelling structured dynamical systems

diff --git a/docs/example_models.html b/docs/example_models.html index 97b9a71a..06b2aadf 100644 --- a/docs/example_models.html +++ b/docs/example_models.html @@ -342,13 +342,13 @@

Example models

BUGS models

-

The OpenBUGS website provides a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The Stan wiki provides Stan implementations of these models.

-

The following sections provide greta implementations of some of these example models, alongside the BUGS code from openbugs.net and Stan code and R version of the data from the Stan example models wiki.

+

The BUGS project provide a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The Stan wiki provides Stan implementations of these models.

+

The following sections provide greta implementations of some of these example models, alongside the BUGS code from WinBUGS examples volume 2 (pdf) and Stan code and an R version of the data from the Stan example models wiki.


Air

Air analyses reported respiratory illness versus exposure to nitrogen dioxide in 103 children. The parameters alpha, beta and sigma2 are known in advance, and the data are grouped into three categories.

-

See the BUGS page for details

+

See WinBUGS examples volume 2 (pdf) for details.

data

@@ -422,7 +422,7 @@

Stan code

Beetles

Beetles considers dose-response data from an experiment applying carbon disulphide to 8 beetles. The original example compares three different link functions; the logit, probit and complementary log-log. Here, only the code for the logit link is shown. You can implement the other two link functions in greta by changing ilogit to iprobit or icloglog.

-

See the BUGS page for details

+

See WinBUGS examples volume 2 (pdf) for details.

data

diff --git a/docs/extract-replace-combine.html b/docs/extract-replace-combine.html index 37fed4b7..ebb42009 100644 --- a/docs/extract-replace-combine.html +++ b/docs/extract-replace-combine.html @@ -11,7 +11,7 @@ -Extract, Replace and Combine greta Arrays +extract, replace and combine greta arrays @@ -335,7 +335,7 @@ -

Extract, Replace and Combine greta Arrays

+

extract, replace and combine greta arrays

diff --git a/docs/figures/full_graph.png b/docs/figures/full_graph.png index 44b3810b..2f1c9986 100644 Binary files a/docs/figures/full_graph.png and b/docs/figures/full_graph.png differ diff --git a/docs/figures/likelihood_graph.png b/docs/figures/likelihood_graph.png index caf916ac..4af299fc 100644 Binary files a/docs/figures/likelihood_graph.png and b/docs/figures/likelihood_graph.png differ diff --git a/docs/get_started.html b/docs/get_started.html index 2c882cdd..81bc0e8b 100644 --- a/docs/get_started.html +++ b/docs/get_started.html @@ -11,7 +11,7 @@ -Getting started with greta +Get started with greta @@ -335,12 +335,35 @@ -

Getting started with greta

+

Get started with greta


+
+

Installation

+

You can install the stable version of greta from CRAN:

+
install.packages("greta")
+

Alternatively, you can install the latest release, or the development version, from GitHub using the devtools package:

+
devtools::install_github("goldingn/greta")  # latest release
+devtools::install_github("goldingn/greta@dev")  # development version
+
library(greta)
+
+
+

TensorFlow

+

Before you can fit models with greta, you will also need to have a working installation of Google’s TensorFlow python package (version 1.0.0 or higher). greta exports install_tensorflow() from the tensorflow R package, which will attempt to install the latest version of TensorFlow from within your R session.

+
install_tensorflow()
+

You can use this function to install different versions of TensorFlow, including versions with GPU acceleration. If you’re having trouble with this step, this guide may help.

+
+
+
+

DiagrammeR

+

greta’s plotting functionality depends on the DiagrammeR package. Because Diagrammer automatically installs a lot of other packages it can take a long time to install, so DiagrammeR isn’t installed automatically with greta. If you want to plot models, you can install DiagrammeR from CRAN.

+
install.packages('DiagrammeR')
+
+
+

How greta works

greta lets us build statistical models interactively in R, and then sample from them by MCMC. We build greta models with greta array objects, which behave much like R’s array, matrix and vector objects for numeric data. Like those numeric data objects, greta arrays can be manipulated with functions and mathematical operators to create new greta arrays.

@@ -379,12 +402,12 @@

How greta works

[3,] ? ? ?

This allows us to create a wide range of models, like in the general-purpose modelling languages like BUGS and Stan. But unlike those languages we build greta models interactively in R so get feedback immediately if there’s a mistake like a misspelled variable name or if one of our greta arrays has the wrong shape.

greta also lets us declare that a greta array follows a probability distribution, allowing us to train models using observed data, and to define prior distributions over our parameters, for Bayesian analyses.

-

The rest of this vignette walks through an example of fitting a model using greta. If you’d like to see examples of some common models fitted in greta and with equivalent BUGS and Stan code, take a look at Example models. If you’d like more technical details about how greta works under the hood, check out How does this work?.

+

The rest of this vignette walks through an example of fitting a model using greta. If you’d like to see examples of some common models fitted in greta and with equivalent BUGS and Stan code, take a look at Example models. If you’d like more technical details about how greta works under the hood, check out Technical details.


Building a model

-

The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We’ll be stepping through a model for linear regression between two of the variables in the iris dataset, which is included with base R. The model is semi-Bayesian, just to illustrate how to do both Bayesian (specifying priors over variables) and frequentist (no priors) inference in greta. In a real analysis it might make more sense just to pick one of those two approaches.

+

The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We’ll be stepping through a model for linear regression between two of the variables in the iris dataset, which is included with base R. The model is Bayesian (we specify priors over the variables), though it is also possible to do frequentist (no priors) inference in greta, using variable() instead of a probability distribution to create random variables.

Here’s the whole script to specify and fit the model:

library(greta)
 
@@ -393,9 +416,9 @@ 

Building a model

y <- as_data(iris$Sepal.Length) # variables and priors -int = variable() +int = normal(0, 1) coef = normal(0, 3) -sd = lognormal(0, 3) +sd = student(3, 0, 1, truncation = c(0, Inf)) # operations mean <- int + coef * x @@ -410,7 +433,7 @@

Building a model

plot(m) # sampling -draws <- mcmc(model, n_samples = 1000)
+draws <- mcmc(m, n_samples = 1000)

Throughout this example, and the rest of greta’s documentation, <- is used when assigning deterministic values, and = when assigning variable or stochastic values. This makes the model code a little bit clearer to read, since = corresponds to the ~ symbol used to define stochastic relationships in BUGS and Stan. However, it doesn’t actually make any difference to the model which assignment operator you use.


@@ -482,13 +505,21 @@

data structures

Variables and priors

The second section of the script creates three greta arrays to represent the parameters in our model:

-
int = variable()
+
int = normal(0, 1)
 coef = normal(0, 3)
-sd = lognormal(0, 3)
-
-

variables

-

The first line uses the variable() function to create a greta array int representing a variable - an unknown value that we will learn when we fit the model to data. variable() has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of lower = -Inf, upper = Inf meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).

-
int
+sd = student(3, 0, 1, truncation = c(0, Inf))
+

Each of these is a variable greta array, and each is assumed a priori (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.

+
+

variables without probability distributions

+

If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the variable() function. E.g. in a frequentist version of the model we could create int with:

+
(int = variable())
+
greta array (variable)
+
+     [,1]
+[1,]  ?  
+

variable() has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of lower = -Inf, upper = Inf meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).

+

We can create a variable constrained between two values by specifying lower and upper. So we could have created the positive variable sd (the standard deviation of the likelihood) with:

+
(sd = variable(lower = 0))
greta array (variable)
 
      [,1]
@@ -503,43 +534,18 @@ 

variables

variables with probability distributions

-

Variable greta arrays like int that are created with variable(), are not associated with probability distributions. I.e. we haven’t made any assumptions or statements about what distribution we expect their values to follow. That’s normally the case for a frequentist analysis, but for a Bayesian model we need to specify prior distributions over our parameters, stating the distribution we expect them to be drawn from (before considering the data we’re using in the model).

-

In our example script, we simultaneously create the variables coef and sd, and state the prior distributions for them, using two of greta’s probability distribution functions. Each of these distributions takes as arguments the distribution’s parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions argument at its default value to create scalar greta arrays:

-
coef
-
greta array (variable following a normal distribution)
-
-     [,1]
-[1,]  ?  
-
sd
-
greta array (variable following a lognormal distribution)
-
-     [,1]
-[1,]  ?  
-

greta implements a number of common probability distributions, and more are being added. You can see a list of the currently available distributions in the ?greta-distributions helpfile.

-
-
-

creating priors with distribution()

-

greta’s probability distributions have two roles: to create variables following a certain distribution; and, when used in combination with the distribution() function, to state the distribution of an existing greta array.

-

In fact, creating a new variable using a probability distribution is the same as first creating a variable (with the appropriate constraint) and then stating that it follows a distribution. So our code in the script for creating coef is equivalent to this:

-
coef = variable()
-distribution(coef) = normal(0, 3)
-

The former is somewhat similar to the BUGS approach, where the variable and its distribution are defined at the same time. The latter approach (using distribution()) is similar to Stan code in that the variable is defined first, and the distribution statement added later.

-

As well as taking more lines of code, the second approach relies on us to get the constraints of the variable right. Because sd must be positive to be a valid standard deviation parameter, the distribution() approach for that parameter would look like this:

-
sd = variable(lower = 0)
-distribution(sd) = lognormal(0, 3)
+

In our example script, when we created the variables int, coef and sd, we simultaneously stated the prior distributions for them using some of greta’s probability distribution functions. You can see a list of the currently available distributions in the ?greta::distributions helpfile. Each of these distribution functions takes as arguments the distribution’s parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions arguments at their default value to create scalar greta arrays.

+

Both int and coef were given zero-mean normal distributions, which are a common choice of prior for unconstrained variables in Bayesian analyses. For the strictly positive parameter sd, we chose a slightly unconvential option, a positive-truncated (non-standard) student’s t distribution, which we create using greta’s built-in support for truncated distributions.

-
-

Truncated distributions

-

In general, it’s easier to define parameters with priors just by using the probability distributions, like we did in the example script. However distribution() has two other uses: it can be used to define a likelihood over observed data; and it can be used to create variables with truncated distributions.

-

If we define a variable with constraints (using lower and upper), we can then assign it a probability distribution, and greta will instead use the truncated probability distribution, usng the specified constraints. For example, to define a variable z that has a standard normal distribution truncated between -1 and 2, we can do:

-
z = variable(lower = -1, upper = 2)
-distribution(z) = normal(0, 1)
-z
+
+

variables with truncated distributions

+

Some of greta’s probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the truncation argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do:

+
(z = normal(0, 1, truncation = c(-1, 1)))
greta array (variable following a normal distribution)
 
      [,1]
 [1,]  ?  
-

Truncation like this isn’t yet implemented for all probability distributions, but greta will let you know if it can’t truncate the distribution.

+

greta will account for this truncation when calculating the density of this distribution; rescaling it to be a valid probability distribution. We can only truncate to within the support of the distribution; e.g. greta will throw an error if we try to truncate a lognormal distribution (which must be positive) to have a lower bound of -1.


@@ -560,7 +566,7 @@

Operations

[4,] ? [5,] ? [6,] ?
-

greta arrays can be manipulated using R’s standard arithmetic, logical and relational operators; including +, * and many others. The ?`greta-operators`) helpfile lists the operators that are implemeneted for greta arrays. You can also use a lot of common R functions for numeric data, such as sum(), log() and others. the available functions are listed in the ?`greta-functions` helpfile. All of these mathematical manipulations of greta arrays produce ‘operation’ greta arrays.

+

greta arrays can be manipulated using R’s standard arithmetic, logical and relational operators; including +, * and many others. The ?greta::operators helpfile lists the operators that are implemeneted for greta arrays. You can also use a lot of common R functions for numeric data, such as sum(), log() and others. the available functions are listed in the ?greta::functions helpfile. All of these mathematical manipulations of greta arrays produce ‘operation’ greta arrays.

Extract and replace

We can use R’s extract and replace syntax (using [) on greta arrays, just like with R’s vectors, matrices and arrays. E.g. to extract elements from mean we can do:

@@ -610,16 +616,17 @@

Functions

Likelihood

-

With the predicted sepal length mean, we can now define a likelihood for the observed sepal length data y:

+

So far, we have created greta arrays representing the variables in our model (with prior distributions) and created other greta arrays from them and some fixed, independent data. To perform statistical inference on this model, we also need to link it to some observed dependent data. By comparing the sepal lengths predicted under different parameter values with the observed sepal lengths, we can estimate the most plausible values of those parameters. We do that by defining a likelihood for the observed sepal length data y.

+

By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we’re trying to work out the parameters of that distribution. In greta we do that with the distribution() assignment function:

distribution(y) = normal(mean, sd)
-

With the syntax distribution(<lhs>) <- <rhs>, we are stating that the greta array on the left <lhs> has the same distribution as the greta array on the right <rhs>. In this case both <lhs> (y) and <rhs> are column vectors with the same dimensions, so each element in y has a normal distribution with the corresponding parameters.

+

With the syntax distribution(<lhs>) = <rhs>, we are stating that the data greta array on the left <lhs> has the same distribution as the greta array on the right <rhs>. In this case, we’re temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays mean and sd), but then stating that values of that distribution have been observed (y). In this case both <lhs> (y) and <rhs> are column vectors with the same dimensions, so each element in y has a normal distribution with the corresponding parameters.


Defining the model

Now all of the greta arrays making up the model have been created, we need to combine them and set up the model so that we can sample from it, using model():

m <- model(int, coef, sd)
-

() returns a ‘greta model’ object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to () to flag them as the parameters we’re interested in. When sampling from the model with mcmc() those will be the greta arrays for which samples will be returned. Alternatively, we can run () without passing any greta arrays, in which all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead.

+

model() returns a ‘greta model’ object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to model() to flag them as the parameters we’re interested in. When sampling from the model with mcmc() those will be the greta arrays for which samples will be returned. Alternatively, we can run model() without passing any greta arrays, in which case all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead.


@@ -630,7 +637,7 @@

Plotting

The greta arrays in your workspace that are used in the model are all represented as nodes (shapes) with names. These are either data (squares; x and y), variables (large circles; int, coef, sd) or the results of operations (small circles; mean). The operations used to create the operation greta arrays are printed on the arrows from the arrays they were made from. There are also nodes for greta arrays that were implicitly defined in our model. The data nodes (squares) with numbers are the parameters used to define the prior distributions, and there’s also an intermediate operation node (small circle), which was the result of multiplying coef and x (before adding int to create mean).

-

Here’s a legend for the plot (it’s in the ?`greta-model` helpfile too):

+

Here’s a legend for the plot (it’s in the ?greta::model helpfile too):

@@ -643,16 +650,11 @@

Plotting

-
-

Installing DiagrammeR

-

These plots are made possible by the DiagrammeR R package. Installing greta doesn’t automatically install DiagrammeR, because DiagrammeR auto-installs a number of other packages, such as igraph, that can be time consuming and tedious to install, and aren’t necessary when running greta on high-performance computing systems. If we try to plot but don’t already have DiagrammeR installed, greta will prompt us to do so. We can install DiagrammeR and its dependencies with:

-
install.packages('DiagrammeR')

-

Sampling

-

When defining the model, greta combines all of the distributions together to define the joint density of the model, a measure of how likely (or probable, if we’re being Bayesian) are a particular candidate set of values for the variables in the model.

+

When defining the model, greta combines all of the distributions together to define the joint density of the model, a measure of how ‘good’ (or how probable if we’re being Bayesian) are a particular candidate set of values for the variables in the model.

Now we have a greta model that will give us the joint density for a candidate set of values, so we can use that to carry out inference on the model. We do that using an Markov chain Monte Carlo (MCMC) algorithm to sample values of the parameters we’re interested in, using the mcmc() function:

draws <- mcmc(m, n_samples = 1000)

Here we’re using 1000 steps of the static Hamiltonian Monte Carlo (HMC) algorithm, which uses the gradients of the joint density to efficiently explore the set of parameters. By default, greta also spends 100 iterations ‘warming up’ (tuning the sampler parameters) and ‘burning in’ (moving to the area of highest probability) the sampler.

@@ -678,16 +680,16 @@

Sampling

int 4.1299 4.2257 4.2793 4.3409 4.4422 coef 0.3782 0.3997 0.4144 0.4287 0.4490 sd 0.3717 0.3922 0.4085 0.4255 0.4521 -

The MCMCvis package makes some nice plots of the MCMC chain and the parameter estimates

-
library (MCMCvis)
-MCMCtrace(draws)
-MCMCplot(draws, xlim = c(-1, 5))
+

The bayesplot package makes some nice plots of the MCMC chain and the parameter estimates

+
library (bayesplot)
+mcmc_trace(draws)
+mcmc_intervals(draws)

If your model is taking a long time whilst in the sampling phase and you want to take a look at the samples. You can stop the sampler (e.g. using the stop button in RStudio) and then retrieve the samples drawn so far, by using stashed_samples(). Note that this won’t return anything if you stop the model during the warmup phase (since those aren’t valid posterior samples) or if the sampler completed successfully.

Tweaking the sampler

Static HMC is currently the only sampling algorithm available in greta. The sampler will automatically tune itself during the warmup phase, to make it as efficient as possible. If the chain looks like it’s moving too slowly, or if you are getting a lot of messages about propsals being rejected, the first thing to try is increasing the length of the warmup period from its default of 100 interstions (via the warmup argument). If you’re still getting a lot of rejected samples, it’s often a good idea to manual set the initial values for the sampler (via the initial_values argument). This is often the case when you have lots of data; the more information there is, the more extreme the log probability, and the higher the risk of numerical problems.

-

A downside of HMC is that it can’t be used to sample discrete variables (e.g. integers), so we can’t specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it’s in the likelihood. If we try to build such a model, greta will give us an error when we run () stage. Future versions of greta will implement samplers for discrete MCMC, as well as self-tuning versions of HMC.

+

A downside of HMC is that it can’t be used to sample discrete variables (e.g. integers), so we can’t specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it’s in the likelihood. If we try to build such a model, greta will give us an error when we run model(). Future versions of greta will implement a wider range of MCMC samplers, including some for discrete variables.


diff --git a/docs/get_started_files/figure-html/mcmcvis-1.png b/docs/get_started_files/figure-html/mcmcvis-1.png index 93d33812..d2968cff 100644 Binary files a/docs/get_started_files/figure-html/mcmcvis-1.png and b/docs/get_started_files/figure-html/mcmcvis-1.png differ diff --git a/docs/get_started_files/figure-html/mcmcvis-2.png b/docs/get_started_files/figure-html/mcmcvis-2.png index 2ae55c21..398edf32 100644 Binary files a/docs/get_started_files/figure-html/mcmcvis-2.png and b/docs/get_started_files/figure-html/mcmcvis-2.png differ diff --git a/docs/greta.html b/docs/greta.html index 06773bfb..43a63ec9 100644 --- a/docs/greta.html +++ b/docs/greta.html @@ -11,7 +11,7 @@ -greta: Probabilistic Modelling with TensorFlow +greta: simple and scalable statistical modelling in R @@ -335,7 +335,7 @@ -

greta: Probabilistic Modelling with TensorFlow

+

greta: simple and scalable statistical modelling in R

@@ -344,13 +344,13 @@

Description

-greta lets you write probabilistic models interactively in native R code, then sample from them efficiently using Hamiltonian Monte Carlo. +greta lets you write statistical models interactively in native R code, then sample from them efficiently using Hamiltonian Monte Carlo.

-The computational heavy lifting is done by TensorFlow, Google’s automatic differentiation library. greta is particularly fast where the model contains lots of linear algebra, and greta models can be easily set up to run across CPUs or GPUs just by installing the relevant version of TensorFlow. +The computational heavy lifting is done by TensorFlow, Google’s automatic differentiation library. So greta is particularly fast where the model contains lots of linear algebra, and greta models can be run across CPU clusters or on GPUs.

-See the example below for the general set up of a greta model, and greta-distributions, greta-operators, greta-functions, greta-transforms and greta-structures for details of the currently implemented syntax and how to combine them into models +See the simple example below, and take a look at the greta website for more information including tutorials and examples.

Usage @@ -359,18 +359,20 @@

Examples

-# define a simple model
-mu = variable()
-sigma = lognormal(1, 0.1)
-x = rnorm(10)
-distribution(x) = normal(mu, sigma)
-
-m <- model(mu, sigma)
-
-# and sample from it
-draws <- mcmc(m,
-              n_samples = 100,
-              warmup = 10)
+# a simple Bayesian regression model for the iris data + +# priors +int = normal(0, 5) +coef = normal(0, 3) +sd = lognormal(0, 3) + +# likelihood +mean <- int + coef * iris$Petal.Length +distribution(iris$Sepal.Length) = normal(mean, sd) + +# build and sample +m <- model(int, coef, sd) +draws <- mcmc(m, n_samples = 100) diff --git a/docs/index.Rmd b/docs/index.Rmd index 1609824c..05c915d9 100644 --- a/docs/index.Rmd +++ b/docs/index.Rmd @@ -101,7 +101,7 @@ a, a:hover{ ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE, cache = TRUE) -set.seed(1) +set.seed(2017-06-19) ``` @@ -344,17 +344,12 @@

Description

-variable() creates greta arrays representing unknown parameters, to be lerned during model fitting. These parameters are not associated with a probability distribution. To create a variable greta array following a specific probability distribution, see greta-distributions. -

-

-free() is an alias for variable(). It is deprecated and will be removed in version 0.2 +variable() creates greta arrays representing unknown parameters, to be learned during model fitting. These parameters are not associated with a probability distribution. To create a variable greta array following a specific probability distribution, see distributions.

Usage

-
variable(lower = -Inf, upper = Inf, dim = 1)
-
-free(lower = -Inf, upper = Inf, dim = 1) 
+
variable(lower = -Inf, upper = Inf, dim = 1) 

Arguments

diff --git a/docs/why_greta.Rmd b/docs/why_greta.Rmd index d3f62a40..3cf6e1b7 100644 --- a/docs/why_greta.Rmd +++ b/docs/why_greta.Rmd @@ -26,7 +26,7 @@ body{
-There's a recent convention of naming statistical modelling software after pioneers in the field (e.g. [STAN](https://en.wikipedia.org/wiki/Stanislaw_Ulam) and [Edward](https://en.wikipedia.org/wiki/George_E._P._Box)). +There's a recent convention of naming statistical modelling software after pioneers in the field (e.g. [Stan](https://en.wikipedia.org/wiki/Stanislaw_Ulam) and [Edward](https://en.wikipedia.org/wiki/George_E._P._Box)). [Grete Hermann](https://en.wikipedia.org/wiki/Grete_Hermann) wasn't a statistician. She wrote [the first algorithms](http://dl.acm.org/citation.cfm?id=307342&coll=portal&dl=ACM) for computer algebra; in the 1920s, well before the first electronic computer was built. This work laid the foundations for the computer algebra libraries that enable modern statistical modelling. diff --git a/docs/why_greta.html b/docs/why_greta.html index 6a2876ff..ce8c4d4b 100644 --- a/docs/why_greta.html +++ b/docs/why_greta.html @@ -254,7 +254,7 @@

why ‘greta’?

-

There’s a recent convention of naming statistical modelling software after pioneers in the field (e.g. STAN and Edward).

+

There’s a recent convention of naming statistical modelling software after pioneers in the field (e.g. Stan and Edward).

Grete Hermann wasn’t a statistician. She wrote the first algorithms for computer algebra; in the 1920s, well before the first electronic computer was built. This work laid the foundations for the computer algebra libraries that enable modern statistical modelling.

In case that’s not enough reason to admire her, Grete Hermann also disproved a popular theorem in quantum theory and was part of the German resistance against the Nazi regime prior to World War Two.


diff --git a/greta.Rproj b/greta.Rproj index e19a96bc..57cdc9a0 100644 --- a/greta.Rproj +++ b/greta.Rproj @@ -18,6 +18,5 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source --no-lock -PackageBuildArgs: --no-build-vignettes PackageCheckArgs: --as-cran PackageRoxygenize: rd,collate,namespace diff --git a/inst/doc/example_models.R b/inst/doc/example_models.R deleted file mode 100644 index c6ac8d4a..00000000 --- a/inst/doc/example_models.R +++ /dev/null @@ -1,4 +0,0 @@ -## ----setup, include = FALSE---------------------------------------------- -knitr::opts_chunk$set(comment = NA, cache = TRUE) -library (greta) - diff --git a/inst/doc/example_models.Rmd b/inst/doc/example_models.Rmd deleted file mode 100644 index 809baba1..00000000 --- a/inst/doc/example_models.Rmd +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Example models" -output: - html_document: - css: greta.css - toc: yes - toc_float: - collapsed: false - toc_depth: 3 - theme: lumen - highlight: default -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteIndexEntry{Example models} - %\usepackage[utf8]{inputenc} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set(comment = NA, cache = TRUE) -library (greta) -``` - -## BUGS models - -The [OpenBUGS website](http://www.openbugs.net/w/Examples) provides a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The [Stan wiki](https://github.com/stan-dev/example-models/wiki/BUGS-Examples-Sorted-Alphabetically) provides Stan implementations of these models. - -The following sections provide greta implementations of some of these example models, alongside the BUGS code from [openbugs.net](http://www.openbugs.net) and Stan code and R version of the data from the [Stan example models wiki](https://github.com/stan-dev/example-models/wiki). - -
- -```{r child='examples/air.Rmd'} -``` - -
- -```{r child='examples/beetles.Rmd'} -``` - -
diff --git a/inst/doc/get_started.R b/inst/doc/get_started.R deleted file mode 100644 index 7f698719..00000000 --- a/inst/doc/get_started.R +++ /dev/null @@ -1,125 +0,0 @@ -## ----setup, include=FALSE------------------------------------------------ - -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - cache = TRUE, - comment = NA, - progress = FALSE) - -set.seed(123) - -library(greta) - -if (!file.exists('figures')) - dir.create('figures') - -file.copy('../man/figures/plotlegend.png', - 'figures/plotlegend.png') - -## ----truncated----------------------------------------------------------- -z = variable(lower = -1, upper = 2) -distribution(z) = normal(0, 1) -z - -## ----linear_predictor---------------------------------------------------- -mean <- int + coef * x - -## ----mean---------------------------------------------------------------- -dim(mean) -head(mean) - -## ----extract------------------------------------------------------------- -mean[1:3] - -## ----replace------------------------------------------------------------- -z <- zeros(4, 3) -z[, 1] <- normal(0, 1, dim = 4) -z - -## ----drop---------------------------------------------------------------- -z <- matrix(1, nrow = 2, ncol = 2) -dim(z[, 1]) -dim(z[, 1, drop = FALSE]) - -## ----drop_greta---------------------------------------------------------- -z_greta <- as_data(z) -dim(z_greta[, 1]) - -## ----function1----------------------------------------------------------- -atanh <- function (z) - (log(1 + z) - log(1 - z)) / 2 - -atanh(z_greta) - -## ----likelihood---------------------------------------------------------- -distribution(y) = normal(mean, sd) - -## ----hidden_model, echo = FALSE------------------------------------------ -x <- as_data(iris$Petal.Length) -y <- as_data(iris$Sepal.Length) -int = variable() -coef = normal(0, 3) -sd = lognormal(0, 3) -mean <- int + coef * x -distribution(y) = normal(mean, sd) - -## ----define_model-------------------------------------------------------- -m <- model(int, coef, sd) - -## ----plot, eval = FALSE-------------------------------------------------- -# plot(m) - -## ----plot_hidden, echo = FALSE, results='hide'--------------------------- -gr <- plot(m) -fname <- "figures/full_graph.png" -DiagrammeR::export_graph(gr, - file_name = fname, - file_type = 'png', - width = 895 * 2, - height = 313 * 2) - -## ----plot_coef, echo = FALSE, results='hide'----------------------------- -coef = normal(0, 3) -m_coef <- model(coef) -gr <- plot(m_coef) -fname <- "figures/coef_graph.png" -DiagrammeR::export_graph(gr, - file_name = fname, - file_type = 'png', - width = 325 * 2, - height = 123 * 2) - -## ----plot_likelihood, echo = FALSE, results='hide'----------------------- -sd = variable() -y <- as_data(iris$Sepal.Length) -mean <- ones(150) -distribution(y) = normal(mean, sd) -m_likelihood <- model(sd) -gr <- plot(m_likelihood) -idx <- which(gr$nodes_df$label == 'mean\n') -gr$nodes_df$shape[idx] <- 'circle' -gr$nodes_df$fillcolor[idx] <- 'lightgray' -gr$nodes_df$width[idx] <- 0.2 -gr$nodes_df$height[idx] <- 0.2 -gr$nodes_df <- gr$nodes_df[c(3, 1, 2, 4), ] -fname <- "figures/likelihood_graph.png" -DiagrammeR::export_graph(gr, - file_name = fname, - file_type = "png", - width = 325 * 2, - height = 105 * 2) - -## ----install_diagrammer, eval = FALSE------------------------------------ -# install.packages('DiagrammeR') - -## ----mcmc, message=FALSE, results='hide', progress = FALSE--------------- -draws <- mcmc(m, n_samples = 1000) - -## ----coda_summary-------------------------------------------------------- -summary(draws) - -## ----mcmcvis, out.width=c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'---- -library (MCMCvis) -MCMCtrace(draws) -MCMCplot(draws, xlim = c(-1, 5)) - diff --git a/inst/doc/get_started.Rmd b/inst/doc/get_started.Rmd deleted file mode 100644 index f3da0a8d..00000000 --- a/inst/doc/get_started.Rmd +++ /dev/null @@ -1,434 +0,0 @@ ---- -title: "Getting started with greta" -output: - html_document: - css: greta.css - toc: yes - toc_float: - collapsed: false - toc_depth: 4 - theme: lumen - highlight: default -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteIndexEntry{Getting started with greta} - %\usepackage[utf8]{inputenc} ---- - -```{r setup, include=FALSE} - -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - cache = TRUE, - comment = NA, - progress = FALSE) - -set.seed(123) - -library(greta) - -if (!file.exists('figures')) - dir.create('figures') - -file.copy('../man/figures/plotlegend.png', - 'figures/plotlegend.png') -``` - -
- -## How greta works - -greta lets us build statistical models interactively in R, and then sample from them by MCMC. We build greta models with *greta array* objects, which behave much like R's array, matrix and vector objects for numeric data. Like those numeric data objects, greta arrays can be manipulated with functions and mathematical operators to create new greta arrays. - -The key difference between greta arrays and numeric data objects is that when you do something to a greta array, greta *doesn't calculate the values* of the new greta array. Instead, it just remembers what operation to do, and works out the size and shape of the result. - -For example, we can create a greta array `z` representing some data (a 3x3 matrix of 1s): -```{r ones} -(z <- ones(3, 3)) -``` -we can then create a new greta array `z2` by doing something to `z`: -```{r ones_op} -(z2 <- z + z ^ 2) -``` -greta knows the result must also be a 3x3 matrix, but it doesn't try to calculate the results. Instead it treats the new greta array `z2` like a placeholder, for which it will calculate the results later. - -Because greta only creates *placeholders* when we're building our model, we can construct models using greta arrays that represent unknown variables. For example, if we create a new greta array `a` representing some unknown values, we can still manipulate it as though it were data: - -```{r variable} -(a <- variable(dim = c(3, 3))) -(a2 <- a + a ^ 2) -``` - -This allows us to create a wide range of models, like in the general-purpose modelling languages like BUGS and Stan. But unlike those languages we build greta models interactively in R so get feedback immediately if there's a mistake like a misspelled variable name or if one of our greta arrays has the wrong shape. - -greta also lets us declare that a greta array follows a probability distribution, allowing us to train models using observed data, and to define prior distributions over our parameters, for Bayesian analyses. - -The rest of this vignette walks through an example of fitting a model using greta. -If you'd like to see examples of some common models fitted in greta and with equivalent BUGS and Stan code, take a look at [Example models](example_models.html). -If you'd like more technical details about how greta works under the hood, check out [How does this work?](how_does_this_work.html). - -
- -## Building a model - -The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We'll be stepping through a model for linear regression between two of the variables in the [`iris`](https://en.wikipedia.org/wiki/Iris_flower_data_set) dataset, which is included with base R. The model is *semi-Bayesian*, just to illustrate how to do both Bayesian (specifying priors over variables) and frequentist (no priors) inference in greta. In a real analysis it might make more sense just to pick one of those two approaches. - -Here's the whole script to specify and fit the model: - -```{r first_model, eval = FALSE} -library(greta) - -# data -x <- as_data(iris$Petal.Length) -y <- as_data(iris$Sepal.Length) - -# variables and priors -int = variable() -coef = normal(0, 3) -sd = lognormal(0, 3) - -# operations -mean <- int + coef * x - -# likelihood -distribution(y) = normal(mean, sd) - -# defining the model -m <- model(int, coef, sd) - -# plotting -plot(m) - -# sampling -draws <- mcmc(model, n_samples = 1000) -``` - -Throughout this example, and the rest of greta's documentation, `<-` is used when assigning *deterministic* values, and `=` when assigning variable or *stochastic* values. This makes the model code a little bit clearer to read, since `=` corresponds to the `~` symbol used to define stochastic relationships in BUGS and Stan. However, it doesn't actually make any difference to the model which assignment operator you use. - -
- -### Data - -The first section of the script takes the iris data (which is automatically loaded) and converts the two columns we care about into greta arrays: - -```{r data} -x <- as_data(iris$Petal.Length) -y <- as_data(iris$Sepal.Length) -``` - -The greta function `as_data()` converts other R objects into greta arrays. In this case it's converting numeric vectors (the two columns of the iris dataframe) into greta arrays. `as_data()` can also convert matrices and R arrays with numeric, integer or logical (`TRUE` or `FALSE`) values into greta arrays. It can also convert dataframes to greta arrays, so long as all elements are either numeric, integer or logical. - -E.g. we can convert the first 5 rows and 4 columns of the iris dataframe, and print the result: - -```{r print_greta_array} -as_data(iris[1:5, 1:4]) -``` - -Whenever `as_data()` converts logical values to greta arrays, it converts them to 1s (for `TRUE`) and 0s (for `FALSE`). E.g. if we first convert the last column of `iris` from a factor into a logical vector, we can see this: - -```{r logical_data} -(is_setosa <- iris$Species[c(1, 41, 81, 121)] == 'setosa') -as_data(is_setosa) -``` - -You can also see from this example that greta arrays always consider a vector as either a column vector (the default) or a row vector, and greta arrays always have at least two dimensions: -```{r dim} -dim(as_data(is_setosa)) -``` - -#### explicit vs. automatic conversion - -For many models, we don't *have* to explicitly convert data to greta arrays, the R objects will be converted automatically when we do an [operation](#Operations) on them. That's handy for when we want to use constants in our model because it saves us manually converting numbers each time. However, it's good practice to explicitly convert your data to a greta array using `as_data()`. This has two advantages: it lets greta work out the names of your data greta arrays (e.g. `x` and `y` in our example) which it can use when [plotting](#Plotting) the model; and `as_data()` will check your data for missing (`NA`) or non-finite (`Inf` or `-Inf`) values, which will break the model. - -#### data structures - -greta also provides some convenience functions to generate fixed numeric values. `ones()` and `zeros()` create greta arrays filled with either 1 or zero, and with the specified dimensions: - -```{r structures} -ones(1, 3) -zeros(2, 2) -``` -The `greta_array()` function is generalises this to let you create greta arrays with any values, in the same way as R's `array()` function: - -```{r greta_array} -greta_array(pi, dim = c(2, 2)) -greta_array(0:1, dim = c(3, 3)) -``` - -`greta_array()` is just a thin wrapper around `array()`, provided for convenience. A command like `greta_array()` has exactly the same effect as: `as_data(array)`. - -
- -### Variables and priors - -The second section of the script creates three greta arrays to represent the parameters in our model: -```{r variables} -int = variable() -coef = normal(0, 3) -sd = lognormal(0, 3) -``` - -#### variables - -The first line uses the `variable()` function to create a greta array `int` representing a variable - an unknown value that we will learn when we fit the model to data. `variable()` has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of `lower = -Inf, upper = Inf` meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar). - -```{r int} -int -``` - -If we had instead wanted a 2x3 array of positive variables we could have created it like this: -```{r positive_variable} -variable(lower = 0, dim = c(2, 3)) -``` - -#### variables with probability distributions - -Variable greta arrays like `int` that are created with `variable()`, are not associated with probability distributions. I.e. we haven't made any assumptions or statements about what distribution we expect their values to follow. That's normally the case for a frequentist analysis, but for a Bayesian model we need to specify *prior distributions* over our parameters, stating the distribution we expect them to be drawn from (*before* considering the data we're using in the model). - -In our example script, we simultaneously create the variables `coef` and `sd`, and state the prior distributions for them, using two of greta's probability distribution functions. Each of these distributions takes as arguments the distribution's parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions argument at its default value to create scalar greta arrays: - -```{r priors} -coef -sd -``` - -greta implements a number of common probability distributions, and more are being added. You can see a list of the currently available distributions in the `?greta-distributions` helpfile. - -#### creating priors with `distribution()` - -greta's probability distributions have two roles: to create variables following a certain distribution; and, when used in combination with the `distribution()` function, to state the distribution of an existing greta array. - -In fact, creating a new variable using a probability distribution is the same as first creating a variable (with the appropriate constraint) and then stating that it follows a distribution. So our code in the script for creating `coef` is equivalent to this: -```{r normal_prior2, eval = FALSE} -coef = variable() -distribution(coef) = normal(0, 3) -``` - -The former is somewhat similar to the BUGS approach, where the variable and its distribution are defined at the same time. The latter approach (using `distribution()`) -is similar to Stan code in that the variable is defined first, and the distribution statement added later. - -As well as taking more lines of code, the second approach relies on us to get the constraints of the variable right. Because `sd` must be positive to be a valid standard deviation parameter, the `distribution()` approach for that parameter would look like this: - -```{r sd_distribution, eval = FALSE} -sd = variable(lower = 0) -distribution(sd) = lognormal(0, 3) -``` - -#### Truncated distributions - -In general, it's easier to define parameters with priors just by using the probability distributions, like we did in the example script. However `distribution()` has two other uses: it can be used to define a [likelihood](#Likelihood) over observed data; and it can be used to create variables with truncated distributions. - -If we define a variable with constraints (using `lower` and `upper`), we can then assign it a probability distribution, and greta will instead use the truncated probability distribution, usng the specified constraints. For example, to define a variable `z` that has a standard normal distribution truncated between -1 and 2, we can do: - -```{r truncated} -z = variable(lower = -1, upper = 2) -distribution(z) = normal(0, 1) -z -``` - -Truncation like this isn't yet implemented for all probability distributions, but greta will let you know if it can't truncate the distribution. - -
- -### Operations - -The third section of the script uses mathematical operators to combine some of our parameters and data, to calculate the predicted sepal lengths, for a given parameter set: - -```{r linear_predictor} -mean <- int + coef * x -``` - -Because both `int` and `coef` are both scalars, the resulting greta array `mean` has the same dimensions as `x`; a column vector with 150 elements: - -```{r mean} -dim(mean) -head(mean) -``` - -greta arrays can be manipulated using R's standard arithmetic, logical and relational operators; including `+`, `*` and many others. The `` ?`greta-operators` ``) helpfile lists the operators that are implemeneted for greta arrays. You can also use a lot of common R functions for numeric data, such as `sum()`, `log()` and others. the available functions are listed in the `` ?`greta-functions` `` helpfile. All of these mathematical manipulations of greta arrays produce 'operation' greta arrays. - -#### Extract and replace - -We can use R's extract and replace syntax (using `[`) on greta arrays, just like with R's vectors, matrices and arrays. E.g. to extract elements from `mean` we can do: - -```{r extract} -mean[1:3] -``` - -We can assign values from one greta array to another too. For example, if we wanted to create a matrix that has random normal variables in the first column, but zeros everywhere else, we could do: - -```{r replace} -z <- zeros(4, 3) -z[, 1] <- normal(0, 1, dim = 4) -z -``` - -R's subset operator `[` has an argument `drop`, which determines whether to reduce the number of dimensions of a array or matrix when the object has zero elements in that dimension. By default, `drop = TRUE` for R objects, so matrices are automatically converted into vectors (which have dimension `NULL`) if you take out a single column: - -```{r drop} -z <- matrix(1, nrow = 2, ncol = 2) -dim(z[, 1]) -dim(z[, 1, drop = FALSE]) -``` - -greta arrays must always have two dimensions, so greta always acts as though `drop=FALSE`: - -```{r drop_greta} -z_greta <- as_data(z) -dim(z_greta[, 1]) -``` - -#### Functions - -We can write our own functions for greta arrays, using the existing operators and functions. For example, we could define the [inverse hyperbolic tangent function](https://en.wikipedia.org/wiki/Inverse_hyperbolic_function#Inverse_hyperbolic_tangent) for greta arrays like this: - -```{r function1} -atanh <- function (z) - (log(1 + z) - log(1 - z)) / 2 - -atanh(z_greta) -``` - -
- -### Likelihood - -With the predicted sepal length `mean`, we can now define a likelihood for the observed sepal length data `y`: -```{r likelihood} -distribution(y) = normal(mean, sd) -``` - -With the syntax `distribution() <- `, we are stating that the greta array on the left `` has the same distribution as the greta array on the right ``. In this case both `` (`y`) and `` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters. - -
- -### Defining the model - -Now all of the greta arrays making up the model have been created, we need to combine them and set up the model so that we can sample from it, using `model()`: - -```{r hidden_model, echo = FALSE} -x <- as_data(iris$Petal.Length) -y <- as_data(iris$Sepal.Length) -int = variable() -coef = normal(0, 3) -sd = lognormal(0, 3) -mean <- int + coef * x -distribution(y) = normal(mean, sd) -``` -```{r define_model} -m <- model(int, coef, sd) -``` - -`()` returns a 'greta model' object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to `()` to flag them as the parameters we're interested in. When sampling from the model with `mcmc()` those will be the greta arrays for which samples will be returned. Alternatively, we can run `()` without passing any greta arrays, in which all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead. - -
- -### Plotting - -greta provides a plot function for greta models to help you visualise and check the model before sampling from it. -```{r plot, eval = FALSE} -plot(m) -``` -```{r plot_hidden, echo = FALSE, results='hide'} -gr <- plot(m) -fname <- "figures/full_graph.png" -DiagrammeR::export_graph(gr, - file_name = fname, - file_type = 'png', - width = 895 * 2, - height = 313 * 2) -``` -

- -The greta arrays in your workspace that are used in the model are all represented as nodes (shapes) with names. These are either data (squares; `x` and `y`), variables (large circles; `int`, `coef`, `sd`) or the results of operations (small circles; `mean`). The operations used to create the operation greta arrays are printed on the arrows from the arrays they were made from. -There are also nodes for greta arrays that were *implicitly defined* in our model. The data nodes (squares) with numbers are the parameters used to define the prior distributions, and there's also an intermediate operation node (small circle), which was the result of multiplying `coef` and `x` (before adding `int` to create `mean`). - -Here's a legend for the plot (it's in the `` ?`greta-model` `` helpfile too): - -

- -The fourth type of node (diamonds) represents probability distributions. These have greta arrays as parameters (linked via solid lines), and have other greta arrays as *values*(linked via dashed lines). Distributions calculate the probability density of the *values*, given the parameters and their distribution type. - -For example, a plot of just the prior distribution over `coef` (defined as `coef = normal(0, 3)`) shows the parameters as data leading into the normal distribution, and a dashed arrow leading out to the distribution's value, the variable `coef`: - -```{r plot_coef, echo = FALSE, results='hide'} -coef = normal(0, 3) -m_coef <- model(coef) -gr <- plot(m_coef) -fname <- "figures/coef_graph.png" -DiagrammeR::export_graph(gr, - file_name = fname, - file_type = 'png', - width = 325 * 2, - height = 123 * 2) -``` -

- -It's the same for the model likelihood, but this time the distribution's parameters are a variable (`sd`) and the result of an operation (`mean`), and the distribution's value is given by data (the observed sepal lengths `y`): - -```{r plot_likelihood, echo = FALSE, results='hide'} -sd = variable() -y <- as_data(iris$Sepal.Length) -mean <- ones(150) -distribution(y) = normal(mean, sd) -m_likelihood <- model(sd) -gr <- plot(m_likelihood) -idx <- which(gr$nodes_df$label == 'mean\n') -gr$nodes_df$shape[idx] <- 'circle' -gr$nodes_df$fillcolor[idx] <- 'lightgray' -gr$nodes_df$width[idx] <- 0.2 -gr$nodes_df$height[idx] <- 0.2 -gr$nodes_df <- gr$nodes_df[c(3, 1, 2, 4), ] -fname <- "figures/likelihood_graph.png" -DiagrammeR::export_graph(gr, - file_name = fname, - file_type = "png", - width = 325 * 2, - height = 105 * 2) -``` -

- -#### Installing DiagrammeR - -These plots are made possible by the [DiagrammeR R package](http://rich-iannone.github.io/DiagrammeR/). Installing greta doesn't automatically install DiagrammeR, because DiagrammeR auto-installs a number of other packages, such as igraph, that can be time consuming and tedious to install, and aren't necessary when running greta on high-performance computing systems. If we try to plot but don't already have DiagrammeR installed, greta will prompt us to do so. We can install DiagrammeR and its dependencies with: - -```{r install_diagrammer, eval = FALSE} -install.packages('DiagrammeR') -``` - -
- -### Sampling - -When defining the model, greta combines all of the distributions together to define the *joint density* of the model, a measure of how likely (or probable, if we're being Bayesian) are a particular candidate set of values for the variables in the model. - -Now we have a greta model that will give us the joint density for a candidate set of values, so we can use that to carry out inference on the model. We do that using an Markov chain Monte Carlo (MCMC) algorithm to sample values of the parameters we're interested in, using the `mcmc()` function: - -```{r mcmc, message=FALSE, results='hide', progress = FALSE} -draws <- mcmc(m, n_samples = 1000) -``` - -Here we're using 1000 steps of the static Hamiltonian Monte Carlo (HMC) algorithm, which uses the gradients of the joint density to efficiently explore the set of parameters. By default, greta also spends 100 iterations 'warming up' (tuning the sampler parameters) and 'burning in' (moving to the area of highest probability) the sampler. - -`draws` is an `mcmc.list` object, from the coda R package. So we can use functions from coda, or one of the many other MCMC software packages that use this format, to plot and summarise the MCMC samples. - -```{r coda_summary} -summary(draws) -``` -The MCMCvis package makes some nice plots of the MCMC chain and the parameter estimates -```{r mcmcvis, out.width=c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'} -library (MCMCvis) -MCMCtrace(draws) -MCMCplot(draws, xlim = c(-1, 5)) -``` - -If your model is taking a long time whilst in the sampling phase and you want to take a look at the samples. You can stop the sampler (e.g. using the stop button in RStudio) and then retrieve the samples drawn so far, by using `stashed_samples()`. Note that this won't return anything if you stop the model during the warmup phase (since those aren't valid posterior samples) or if the sampler completed successfully. - -#### Tweaking the sampler - -Static HMC is currently the only sampling algorithm available in greta. The sampler will automatically tune itself during the warmup phase, to make it as efficient as possible. If the chain looks like it's moving too slowly, or if you are getting a lot of messages about propsals being rejected, the first thing to try is increasing the length of the warmup period from its default of 100 interstions (via the `warmup` argument). If you're still getting a lot of rejected samples, it's often a good idea to manual set the initial values for the sampler (via the `initial_values` argument). This is often the case when you have lots of data; the more information there is, the more extreme the log probability, and the higher the risk of numerical problems. - -A downside of HMC is that it can't be used to sample discrete variables (e.g. integers), so we can't specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it's in the likelihood. If we try to build such a model, greta will give us an error when we run `()` stage. Future versions of greta will implement samplers for discrete MCMC, as well as self-tuning versions of HMC. - -
- diff --git a/inst/doc/how_does_this_work.R b/inst/doc/how_does_this_work.R deleted file mode 100644 index e9102ca0..00000000 --- a/inst/doc/how_does_this_work.R +++ /dev/null @@ -1,59 +0,0 @@ -## ----setup, include=FALSE------------------------------------------------ -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - cache = TRUE, - comment = NA, - progress = FALSE) -set.seed(1) -library(greta) - -## ----greta_array1-------------------------------------------------------- -x <- ones(3, 3) -is.list(x) -class(x) - -## ----greta_array2-------------------------------------------------------- -class(x$node) - -## ----nodes1-------------------------------------------------------------- -# data nodes have no children -length(x$node$children) - -# operation nodes have one or more children -z <- x * 3 -length(z$node$children) - -## ----nodes2-------------------------------------------------------------- -x$node$value() -# R calls this a matrix because it's 2d, but it's an array -class(x$node$value()) -z$node$value() -class(z$node$value()) - -## ----tensors1------------------------------------------------------------ -x$node$tf - -## ----tensors2------------------------------------------------------------ -z$node$tf - -## ----free_state---------------------------------------------------------- -a = variable(lower = 0) -class(a$node) -a$node$tf_from_free - -## ----distributions1------------------------------------------------------ -b = normal(0, 1) -class(b$node) -class(b$node$distribution) -# a is the target of its own distribution -class(b$node$distribution$target) - -## ----distributions2------------------------------------------------------ -b$node$distribution$tf_log_density - -## ----dag1---------------------------------------------------------------- -model <- model(b) -model$dag$send_parameters -model$dag$log_density -model$dag$gradients - diff --git a/man/as_data.Rd b/man/as_data.Rd index fbaca33d..5597b961 100644 --- a/man/as_data.Rd +++ b/man/as_data.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/as_data.R \name{as_data} \alias{as_data} -\title{Declare R Objects as Data} +\title{convert other objects to greta arrays} \usage{ as_data(x) } diff --git a/man/distribution.Rd b/man/distribution.Rd index 5e0029e9..f42be846 100644 --- a/man/distribution.Rd +++ b/man/distribution.Rd @@ -1,41 +1,30 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/syntax.R +% Please edit documentation in R/distribution.R \name{distribution} \alias{distribution} \alias{distribution<-} -\alias{likelihood} \alias{distribution} -\alias{likelihood<-} -\title{Define a Distribution Over a greta Array} +\title{define a distribution over data} \usage{ distribution(greta_array) <- value distribution(greta_array) - -likelihood(greta_array) <- value } \arguments{ -\item{greta_array}{a greta array. For the assignment method it must be a -greta array that doesn't already have a probability distribution.} +\item{greta_array}{a data greta array. For the assignment method it must not +already have a probability distribution assigned} \item{value}{a greta array with a distribution (see -\code{\link{greta-distributions}})} +\code{\link{distributions}})} } \description{ -\code{distribution} links observed data, variables, and other - greta arrays to probability distributions. For example a model likelhood - can be set by using \code{distribution} on some observed data. - \code{likelihood} is an alias for \code{distribution}. It is deprecated and - will be removed in version 0.2. +\code{distribution} defines probability distributions over + observed data, e.g. to set a model likelihood. } \details{ The extract method returns the greta array if it has a distribution, - or \code{NULL} if it doesn't. It has now real function, but is included for + or \code{NULL} if it doesn't. It has no real use-case, but is included for completeness - - \code{distribution} can also be used to create truncated distributions, by - first defining a greta array with constraints (the truncation) and then - defining the distribution on that greta array. See below for an example. } \examples{ @@ -44,15 +33,13 @@ The extract method returns the greta array if it has a distribution, # observed data and mean parameter to be estimated # (explicitly coerce data to a greta array so we can refer to it later) y = as_data(rnorm(5, 0, 3)) -mu = variable() + +mu = uniform(-3, 3) + # define the distribution over y (the model likelihood) distribution(y) = normal(mu, 1) # get the distribution over y distribution(y) -# define a truncated-positive standard normal random variable -tn = variable(lower = 0) -distribution(tn) = normal(0, 1) - } diff --git a/man/greta-distributions.Rd b/man/distributions.Rd similarity index 62% rename from man/greta-distributions.Rd rename to man/distributions.Rd index 410c9c4e..88b37f8e 100644 --- a/man/greta-distributions.Rd +++ b/man/distributions.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/distributions.R -\name{greta-distributions} -\alias{greta-distributions} +% Please edit documentation in R/probability_distributions.R +\name{distributions} +\alias{distributions} \alias{uniform} \alias{normal} \alias{lognormal} @@ -25,17 +25,18 @@ \alias{f} \alias{multivariate_normal} \alias{wishart} +\alias{lkj_correlation} \alias{multinomial} \alias{categorical} \alias{dirichlet} \alias{dirichlet_multinomial} -\title{greta probability distributions} +\title{probability distributions} \usage{ uniform(min, max, dim = NULL) -normal(mean, sd, dim = NULL) +normal(mean, sd, dim = NULL, truncation = c(-Inf, Inf)) -lognormal(meanlog, sdlog, dim = NULL) +lognormal(meanlog, sdlog, dim = NULL, truncation = c(0, Inf)) bernoulli(prob, dim = NULL) @@ -49,34 +50,36 @@ hypergeometric(m, n, k, dim = NULL) poisson(lambda, dim = NULL) -gamma(shape, rate, dim = NULL) +gamma(shape, rate, dim = NULL, truncation = c(0, Inf)) -inverse_gamma(alpha, beta, dim = NULL) +inverse_gamma(alpha, beta, dim = NULL, truncation = c(0, Inf)) -weibull(shape, scale, dim = NULL) +weibull(shape, scale, dim = NULL, truncation = c(0, Inf)) -exponential(rate, dim = NULL) +exponential(rate, dim = NULL, truncation = c(0, Inf)) -pareto(a, b, dim = NULL) +pareto(a, b, dim = NULL, truncation = c(0, Inf)) -student(df, mu, sigma, dim = NULL) +student(df, mu, sigma, dim = NULL, truncation = c(-Inf, Inf)) -laplace(mu, sigma, dim = NULL) +laplace(mu, sigma, dim = NULL, truncation = c(-Inf, Inf)) -beta(shape1, shape2, dim = NULL) +beta(shape1, shape2, dim = NULL, truncation = c(0, 1)) -cauchy(location, scale, dim = NULL) +cauchy(location, scale, dim = NULL, truncation = c(-Inf, Inf)) -chi_squared(df, dim = NULL) +chi_squared(df, dim = NULL, truncation = c(0, Inf)) -logistic(location, scale, dim = NULL) +logistic(location, scale, dim = NULL, truncation = c(-Inf, Inf)) -f(df1, df2, dim = NULL) +f(df1, df2, dim = NULL, truncation = c(0, Inf)) multivariate_normal(mean, Sigma, dim = 1) wishart(df, Sigma) +lkj_correlation(eta, dim = 2) + multinomial(size, prob, dim = 1) categorical(prob, dim = 1) @@ -97,7 +100,11 @@ or a vector of positive integers. See details.} \item{mean, meanlog, location, mu}{unconstrained parameters} -\item{sd, sdlog, sigma, lambda, shape, rate, df, scale, shape1, shape2, alpha, beta, df1, df2, a, b}{positive parameters, \code{alpha} must be a vector for \code{dirichlet} and \code{dirichlet_multinomial}.} +\item{sd, sdlog, sigma, lambda, shape, rate, df, scale, shape1, shape2, alpha, beta, df1, df2, a, b, eta}{positive parameters, \code{alpha} must be a vector for \code{dirichlet} and \code{dirichlet_multinomial}.} + +\item{truncation}{a length-two vector giving values between which to truncate +the distribution, similarly to the \code{lower} and \code{upper} arguments +to \code{\link{variable}}} \item{prob}{probability parameter (\code{0 < prob < 1}), must be a vector for \code{multinomial} and \code{categorical}} @@ -111,7 +118,7 @@ These functions can be used to define random variables in a greta model. They return a variable greta array that follows the specified distribution. This variable greta array can be used to represent a parameter with prior distribution, or used with \code{\link{distribution}} - to define a distribution over an existing greta array. + to define a distribution over a data greta array. } \details{ The discrete probability distributions (\code{bernoulli}, @@ -154,33 +161,34 @@ The discrete probability distributions (\code{bernoulli}, corresponds: \tabular{ll}{ greta \tab reference\cr - \code{uniform} \tab \code{\link[stats:dunif]{stats::dunif}}\cr - \code{normal} \tab \code{\link[stats:dnorm]{stats::dnorm}}\cr - \code{lognormal} \tab \code{\link[stats:dlnorm]{stats::dlnorm}}\cr - \code{bernoulli} \tab \code{\link[extraDistr:dbern]{extraDistr::dbern}}\cr - \code{binomial} \tab \code{\link[stats:dbinom]{stats::dbinom}}\cr - \code{beta_binomial} \tab \code{\link[extraDistr:dbbinom]{extraDistr::dbbinom}}\cr - \code{negative_binomial} \tab \code{\link[stats:dnbinom]{stats::dnbinom}}\cr - \code{hypergeometric} \tab \code{\link[stats:dhyper]{stats::dhyper}}\cr - \code{poisson} \tab \code{\link[stats:dpois]{stats::dpois}}\cr - \code{gamma} \tab \code{\link[stats:dgamma]{stats::dgamma}}\cr - \code{inverse_gamma} \tab \code{\link[extraDistr:dinvgamma]{extraDistr::dinvgamma}}\cr - \code{weibull} \tab \code{\link[stats:dweibull]{stats::dweibull}}\cr - \code{exponential} \tab \code{\link[stats:dexp]{stats::dexp}}\cr - \code{pareto} \tab \code{\link[extraDistr:dpareto]{extraDistr::dpareto}}\cr - \code{student} \tab \code{\link[extraDistr:dnst]{extraDistr::dnst}}\cr - \code{laplace} \tab \code{\link[extraDistr:dlaplace]{extraDistr::dlaplace}}\cr - \code{beta} \tab \code{\link[stats:dbeta]{stats::dbeta}}\cr - \code{cauchy} \tab \code{\link[stats:dcauchy]{stats::dcauchy}}\cr - \code{chi_squared} \tab \code{\link[stats:dchisq]{stats::dchisq}}\cr - \code{logistic} \tab \code{\link[stats:dlogis]{stats::dlogis}}\cr - \code{f} \tab \code{\link[stats:df]{stats::df}}\cr - \code{multivariate_normal} \tab \code{\link[mvtnorm:dmvnorm]{mvtnorm::dmvnorm}}\cr - \code{multinomial} \tab \code{\link[stats:dmultinom]{stats::dmultinom}}\cr - \code{categorical} \tab {\code{\link[stats:dmultinom]{stats::dmultinom}} (size = 1)}\cr - \code{dirichlet} \tab \code{\link[extraDistr:ddirichlet]{extraDistr::ddirichlet}}\cr - \code{dirichlet_multinomial} \tab \code{\link[extraDistr:ddirmnom]{extraDistr::ddirmnom}}\cr - \code{wishart} \tab \code{\link[MCMCpack:dwish]{MCMCpack::dwish}}\cr } + \code{uniform} \tab \link[stats:dunif]{stats::dunif}\cr + \code{normal} \tab \link[stats:dnorm]{stats::dnorm}\cr + \code{lognormal} \tab \link[stats:dlnorm]{stats::dlnorm}\cr + \code{bernoulli} \tab \link[extraDistr:dbern]{extraDistr::dbern}\cr + \code{binomial} \tab \link[stats:dbinom]{stats::dbinom}\cr + \code{beta_binomial} \tab \link[extraDistr:dbbinom]{extraDistr::dbbinom}\cr + \code{negative_binomial} \tab \link[stats:dnbinom]{stats::dnbinom}\cr + \code{hypergeometric} \tab \link[stats:dhyper]{stats::dhyper}\cr + \code{poisson} \tab \link[stats:dpois]{stats::dpois}\cr + \code{gamma} \tab \link[stats:dgamma]{stats::dgamma}\cr + \code{inverse_gamma} \tab \link[extraDistr:dinvgamma]{extraDistr::dinvgamma}\cr + \code{weibull} \tab \link[stats:dweibull]{stats::dweibull}\cr + \code{exponential} \tab \link[stats:dexp]{stats::dexp}\cr + \code{pareto} \tab \link[extraDistr:dpareto]{extraDistr::dpareto}\cr + \code{student} \tab \link[extraDistr:dnst]{extraDistr::dnst}\cr + \code{laplace} \tab \link[extraDistr:dlaplace]{extraDistr::dlaplace}\cr + \code{beta} \tab \link[stats:dbeta]{stats::dbeta}\cr + \code{cauchy} \tab \link[stats:dcauchy]{stats::dcauchy}\cr + \code{chi_squared} \tab \link[stats:dchisq]{stats::dchisq}\cr + \code{logistic} \tab \link[stats:dlogis]{stats::dlogis}\cr + \code{f} \tab \link[stats:df]{stats::df}\cr + \code{multivariate_normal} \tab \link[mvtnorm:dmvnorm]{mvtnorm::dmvnorm}\cr + \code{multinomial} \tab \link[stats:dmultinom]{stats::dmultinom}\cr + \code{categorical} \tab {\link[stats:dmultinom]{stats::dmultinom} (size = 1)}\cr + \code{dirichlet} \tab \link[extraDistr:ddirichlet]{extraDistr::ddirichlet}\cr + \code{dirichlet_multinomial} \tab \link[extraDistr:ddirmnom]{extraDistr::ddirmnom}\cr + \code{wishart} \tab \link[stats:rWishart]{stats::rWishart}\cr + \code{lkj_correlation} \tab \href{https://rdrr.io/github/rmcelreath/rethinking/man/dlkjcorr.html}{rethinking::dlkjcorr}\cr } } \examples{ # a uniform parameter constrained to be between 0 and 1 diff --git a/man/dynamics-module.Rd b/man/dynamics-module.Rd index af2a3e9d..c8a00081 100644 --- a/man/dynamics-module.Rd +++ b/man/dynamics-module.Rd @@ -6,7 +6,7 @@ \alias{iterate_lambda} \alias{iterate_state} \alias{iterate_lambda_vectorised} -\title{Functions for modelling dynamical systems} +\title{methods for modelling structured dynamical systems} \arguments{ \item{matrix}{a square, two-dimensional (i.e. matrix-like) greta array representing transition probabilities between states} diff --git a/man/extract-replace-combine.Rd b/man/extract-replace-combine.Rd index 3f771595..37205ac1 100644 --- a/man/extract-replace-combine.Rd +++ b/man/extract-replace-combine.Rd @@ -8,7 +8,7 @@ \alias{rbind} \alias{c} \alias{rep} -\title{Extract, Replace and Combine greta Arrays} +\title{extract, replace and combine greta arrays} \arguments{ \item{i, j}{indices specifying elements to extract or replace} @@ -21,8 +21,8 @@ \item{drop, recursive}{generic arguments that are ignored for greta arrays} } \description{ -Generic methods to extract and replace elements of greta arrays, or to - combine greta arrays. +Generic methods to extract and replace elements of greta arrays, + or to combine greta arrays. } \section{Usage}{ \preformatted{ diff --git a/man/greta-functions.Rd b/man/functions.Rd similarity index 85% rename from man/greta-functions.Rd rename to man/functions.Rd index 3fa85261..8f7bdef3 100644 --- a/man/greta-functions.Rd +++ b/man/functions.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/functions.R -\name{greta-functions} -\alias{greta-functions} -\title{R functions that work for greta arrays} +\name{functions} +\alias{functions} +\title{functions for greta arrays} \description{ This is a list of functions in base R that are currently - implemented to transform greta arrays Also see \link{greta-operators} and - \link{greta-transforms}. + implemented to transform greta arrays. Also see \link{operators} and + \link{transforms}. } \details{ TensorFlow only enables rounding to integers, so \code{round()} will @@ -22,8 +22,8 @@ TensorFlow only enables rounding to integers, so \code{round()} will converted into a greta array. \code{sweep()} only works on two-dimensional greta arrays (so \code{MARGIN} - can only be either 1 or 2), and for subtraction, addition, division and - multiplication. + can only be either 1 or 2), and only for subtraction, addition, division + and multiplication. } \section{Usage}{ \preformatted{ diff --git a/man/greta-inference.Rd b/man/greta-inference.Rd deleted file mode 100644 index d8e6edc9..00000000 --- a/man/greta-inference.Rd +++ /dev/null @@ -1,95 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/samplers.R -\name{greta-inference} -\alias{greta-inference} -\alias{mcmc} -\alias{stashed_samples} -\title{Statistical Inference on Greta Models} -\usage{ -mcmc(model, method = c("hmc"), n_samples = 1000, thin = 1, warmup = 100, - verbose = TRUE, control = list(), initial_values = NULL) - -stashed_samples() -} -\arguments{ -\item{model}{greta_model object} - -\item{method}{the method used to sample values. Currently only \code{hmc} is -implemented} - -\item{n_samples}{the number of samples to draw (after any warm-up, but before -thinning)} - -\item{thin}{the thinning rate; every \code{thin} samples is retained, the -rest are discarded} - -\item{warmup}{the number of samples to spend warming up the sampler. During -this phase the sampler moves toward the highest density area and may tune -sampler hyperparameters.} - -\item{verbose}{whether to print progress information to the console} - -\item{control}{an optional named list of hyperparameters and options to -control behaviour of the sampler. See Details.} - -\item{initial_values}{an optional named vector of initial values for the free -parameters in the model} -} -\value{ -an \code{mcmc.list} object that can be analysed using - functions from the coda package. This will contain mcmc samples of the - greta arrays used to create \code{model}. -} -\description{ -Carry out statistical inference on parameters of interest by - MCMC. \code{mcmc} runs the MCMC sampler. If the sampler is aborted before - finishing, the samples collected so far can be retrieved with - \code{stashed_samples()}. -} -\details{ -If \code{verbose = TRUE}, the progress bar shows the number of - iterations so far and the expected time to complete the phase of model - fitting (warmup or sampling). Occasionally, a proposed set of parameters - can cause numerical instability (I.e. the log density or its gradient is - \code{NA}, \code{Inf} or \code{-Inf}); normally because the log joint - density is so low that it can't be represented as a floating point number. - When this happens, the progress bar will also display the proportion of - samples so far that were 'bad' (numerically unstable) and therefore - rejected. - If you're getting a lot of numerical instability, you might want to - manually define starting values to move the sampler into a more reasonable - part of the parameter space. Alternatively, you could redefine the model - (via \code{model}) to have double precision, though this will slow down - sampling. - - Currently, the only implemented MCMC procedure is static Hamiltonian - Monte Carlo (\code{method = 'hmc'}). During the warmup iterations, the - leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the - sampler efficiency. The \code{control} argument can be used to specify the - initial value for epsilon, along with two other hyperparameters: \code{Lmin} - and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the - upper and lower limits to the number of leapfrog steps per iteration (from - which the number is selected uniformly at random). - - The default control options for HMC are: - \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)} - -\code{stashed_samples()} returns an \code{mcmc.list} object if (and - only if) the sampler was aborted during the sampling phase. -} -\examples{ -\dontrun{ - -# define a simple model -mu = variable() -sigma = lognormal(1, 0.1) -x = rnorm(10) -distribution(x) = normal(mu, sigma) -m <- model(mu, sigma) - -# carry out mcmc on the model -draws <- mcmc(m, - n_samples = 100, - warmup = 10) -} -} diff --git a/man/greta.Rd b/man/greta.Rd index 0e99af2e..631dae20 100644 --- a/man/greta.Rd +++ b/man/greta.Rd @@ -1,39 +1,40 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/greta_package.R +% Please edit documentation in R/package.R \docType{package} \name{greta} \alias{greta} \alias{greta-package} -\title{greta: Probabilistic Modelling with TensorFlow} +\title{greta: simple and scalable statistical modelling in R} \description{ -greta lets you write probabilistic models interactively in - native R code, then sample from them efficiently using Hamiltonian Monte - Carlo. +greta lets you write statistical models interactively in native + R code, then sample from them efficiently using Hamiltonian Monte Carlo. The computational heavy lifting is done by TensorFlow, Google's automatic - differentiation library. greta is particularly fast where the model - contains lots of linear algebra, and greta models can be easily set up to - run across CPUs or GPUs just by installing the relevant version of - TensorFlow. + differentiation library. So greta is particularly fast where the model + contains lots of linear algebra, and greta models can be run across CPU + clusters or on GPUs. - See the example below for the general set up of a greta model, and - \link{greta-distributions}, \link{greta-operators}, \link{greta-functions}, - \link{greta-transforms} and \link{greta-structures} for details of the - currently implemented syntax and how to combine them into models + See the simple example below, and take a look at the + \href{https://goldingn.github.io/greta}{greta website} for more information + including + \href{https://goldingn.github.io/greta/get_started.html}{tutorials} and + \href{https://goldingn.github.io/greta/example_models.html}{examples}. } \examples{ \dontrun{ -# define a simple model -mu = variable() -sigma = lognormal(1, 0.1) -x = rnorm(10) -distribution(x) = normal(mu, sigma) +# a simple Bayesian regression model for the iris data -m <- model(mu, sigma) +# priors +int = normal(0, 5) +coef = normal(0, 3) +sd = lognormal(0, 3) -# and sample from it -draws <- mcmc(m, - n_samples = 100, - warmup = 10) +# likelihood +mean <- int + coef * iris$Petal.Length +distribution(iris$Sepal.Length) = normal(mean, sd) + +# build and sample +m <- model(int, coef, sd) +draws <- mcmc(m, n_samples = 100) } } diff --git a/man/inference.Rd b/man/inference.Rd new file mode 100644 index 00000000..48437a17 --- /dev/null +++ b/man/inference.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inference.R +\name{inference} +\alias{inference} +\alias{stashed_samples} +\alias{mcmc} +\alias{opt} +\title{statistical inference on greta models} +\usage{ +stashed_samples() + +mcmc(model, method = c("hmc"), n_samples = 1000, thin = 1, warmup = 100, + verbose = TRUE, pb_update = 10, control = list(), + initial_values = NULL) + +opt(model, method = c("adagrad"), max_iterations = 100, tolerance = 1e-06, + control = list(), initial_values = NULL) +} +\arguments{ +\item{model}{greta_model object} + +\item{method}{the method used to sample or optimise values. Currently only +one method is available for each procedure: \code{hmc} and \code{adagrad}} + +\item{n_samples}{the number of MCMC samples to draw (after any warm-up, but +before thinning)} + +\item{thin}{the MCMC thinning rate; every \code{thin} samples is retained, +the rest are discarded} + +\item{warmup}{the number of samples to spend warming up the mcmc sampler. +During this phase the sampler moves toward the highest density area and +tunes sampler hyperparameters.} + +\item{verbose}{whether to print progress information to the console} + +\item{pb_update}{how regularly to update the progress bar (in iterations)} + +\item{control}{an optional named list of hyperparameters and options to +control behaviour of the sampler or optimiser. See Details.} + +\item{initial_values}{an optional named vector of initial values for the free +parameters in the model. These will be used as the starting point for +sampling/optimisation} + +\item{max_iterations}{the maximum number of iterations before giving up} + +\item{tolerance}{the numerical tolerance for the solution, the optimiser stops when the (absolute) difference in the joint density between successive iterations drops below this level} +} +\value{ +\code{mcmc} & \code{stashed_samples} - an \code{mcmc.list} object that can be analysed using + functions from the coda package. This will contain mcmc samples of the + greta arrays used to create \code{model}. + +\code{opt} - a list containing the following named elements: + \itemize{ + \item{par}{the best set of parameters found} + \item{value}{the log joint density of the model at the parameters par} + \item{iterations}{the number of iterations taken by the optimiser} + \item{convergence}{an integer code, 0 indicates successful completion, 1 + indicates the iteration limit max_iterations had been reached} + } +} +\description{ +Carry out statistical inference on greta models by + MCMC or likelihood/posterior optimisation. +} +\details{ +If the sampler is aborted before finishing, the samples collected so + far can be retrieved with \code{stashed_samples()}. Only samples from the + sampling phase will be returned. + +For \code{mcmc()} if \code{verbose = TRUE}, the progress bar shows + the number of iterations so far and the expected time to complete the phase + of model fitting (warmup or sampling). Updating the progress bar regularly + slows down sampling, by as much as 9 seconds per 1000 updates. So if you + want the sampler to run faster, you can change \code{pb_update} to increase + the number of iterations between updates of the progress bar, or turn the + progress bar off altogether by setting \code{verbose = FALSE}. + + Occasionally, a proposed set of parameters can cause numerical instability + (I.e. the log density or its gradient is \code{NA}, \code{Inf} or + \code{-Inf}); normally because the log joint density is so low that it + can't be represented as a floating point number. When this happens, the + progress bar will also display the proportion of samples so far that were + 'bad' (numerically unstable) and therefore rejected. + If you're getting a lot of numerical instability, you might want to + manually define starting values to move the sampler into a more reasonable + part of the parameter space. Alternatively, you could redefine the model + (via \code{model}) to have double precision, though this will slow down + sampling. + + Currently, the only implemented MCMC procedure is static Hamiltonian + Monte Carlo (\code{method = "hmc"}). During the warmup iterations, the + leapfrog stepsize hyperparameter \code{epsilon} is tuned to maximise the + sampler efficiency. The \code{control} argument can be used to specify the + initial value for epsilon, along with two other hyperparameters: \code{Lmin} + and \code{Lmax}; positive integers (with \code{Lmax > Lmin}) giving the + upper and lower limits to the number of leapfrog steps per iteration (from + which the number is selected uniformly at random). + + The default control options for HMC are: + \code{control = list(Lmin = 10, Lmax = 20, epsilon = 0.005)} + +Currently, the only implemented optimisation algorithm is Adagrad + (\code{method = "adagrad"}). The \code{control} argument can be used to + specify the optimiser hyperparameters: \code{learning_rate} (default 0.8), + \code{initial_accumulator_value} (default 0.1) and \code{use_locking} + (default \code{TRUE}). The are passed directly to TensorFlow's optimisers, + see + \href{https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer}{the + TensorFlow docs} for more information +} +\examples{ +\dontrun{ +# define a simple model +mu = variable() +sigma = lognormal(1, 0.1) +x = rnorm(10) +distribution(x) = normal(mu, sigma) +m <- model(mu, sigma) + +# carry out mcmc on the model +draws <- mcmc(m, + n_samples = 100, + warmup = 10) +} +\dontrun{ +# find the MAP estimate +opt_res <- opt(m) +} +} diff --git a/man/greta-model.Rd b/man/model.Rd similarity index 97% rename from man/greta-model.Rd rename to man/model.Rd index 4162aa36..7f5961eb 100644 --- a/man/greta-model.Rd +++ b/man/model.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/greta_model_class.R -\name{greta-model} -\alias{greta-model} +\name{model} +\alias{model} +\alias{model} \alias{model} -\alias{greta-model} \alias{define_model} \alias{print.greta_model} \alias{plot.greta_model} -\title{Greta Model Objects} +\title{greta model objects} \usage{ model(..., precision = c("single", "double"), n_cores = NULL, compile = TRUE) diff --git a/man/greta-operators.Rd b/man/operators.Rd similarity index 78% rename from man/greta-operators.Rd rename to man/operators.Rd index 60612ecf..cc5b0f6a 100644 --- a/man/greta-operators.Rd +++ b/man/operators.Rd @@ -1,13 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/operators.R -\name{greta-operators} -\alias{greta-operators} -\title{Operators for Greta Arrays} +\name{operators} +\alias{operators} +\title{arithmetic, logical and relational operators for greta arrays} \description{ This is a list of currently implemented arithmetic, logical and - relational operators, and extract/replace syntax to combine greta arrays - into probabilistic models. Also see \link{greta-functions} and - \link{greta-transforms}. + relational operators to combine greta arrays into probabilistic models. + Also see \link{functions} and \link{transforms}. } \details{ greta's operators are used just like R's the standard arithmetic, diff --git a/man/greta-overloaded.Rd b/man/overloaded.Rd similarity index 94% rename from man/greta-overloaded.Rd rename to man/overloaded.Rd index f657b731..23e1a70c 100644 --- a/man/greta-overloaded.Rd +++ b/man/overloaded.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/overloaded.R, R/operators.R, R/functions.R -\name{greta-overloaded} -\alias{greta-overloaded} +\name{overloaded} +\alias{overloaded} \alias{\%*\%} \alias{diag} \alias{sweep} diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 00000000..c94ec96b --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/install_tensorflow.R +\docType{import} +\name{reexports} +\alias{reexports} +\alias{install_tensorflow} +\title{Objects exported from other packages} +\keyword{internal} +\description{ +These objects are imported from other packages. Follow the links +below to see their documentation. + +\describe{ + \item{tensorflow}{\code{\link[tensorflow]{install_tensorflow}}} +}} + diff --git a/man/greta-structures.Rd b/man/structures.Rd similarity index 94% rename from man/greta-structures.Rd rename to man/structures.Rd index 32a56008..86b7c163 100644 --- a/man/greta-structures.Rd +++ b/man/structures.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/structures.R -\name{greta-structures} -\alias{greta-structures} +\name{structures} +\alias{structures} \alias{zeros} \alias{ones} \alias{greta_array} -\title{greta Data Structures} +\title{create data greta arrays} \usage{ zeros(...) diff --git a/man/greta-transforms.Rd b/man/transforms.Rd similarity index 79% rename from man/greta-transforms.Rd rename to man/transforms.Rd index b87d4026..973238fa 100644 --- a/man/greta-transforms.Rd +++ b/man/transforms.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transformations.R -\name{greta-transforms} -\alias{greta-transforms} -\alias{greta-inverse-links} +% Please edit documentation in R/transforms.R +\name{transforms} +\alias{transforms} +\alias{inverse-links} \alias{iprobit} \alias{ilogit} \alias{icloglog} \alias{icauchit} \alias{log1pe} -\title{Transformation Functions for Greta Arrays} +\title{transformation functions for greta arrays} \usage{ iprobit(x) @@ -25,9 +25,8 @@ log1pe(x) transform to a constrained value} } \description{ -transformations for greta arrays, which may also be - used as inverse link functions. Also see \link{greta-operators} and - \link{greta-functions}. +transformations for greta arrays, which may also be used as + inverse link functions. Also see \link{operators} and \link{functions}. } \details{ greta does not allow you to state the transformation/link on the diff --git a/man/variable.Rd b/man/variable.Rd index 72f771f3..fbc43d9c 100644 --- a/man/variable.Rd +++ b/man/variable.Rd @@ -2,12 +2,9 @@ % Please edit documentation in R/variable.R \name{variable} \alias{variable} -\alias{free} -\title{greta variables} +\title{create greta variables} \usage{ variable(lower = -Inf, upper = Inf, dim = 1) - -free(lower = -Inf, upper = Inf, dim = 1) } \arguments{ \item{lower, upper}{scalar values giving optional limits to variables. These @@ -21,13 +18,10 @@ or a vector of positive integers. See details.} } \description{ \code{variable()} creates greta arrays representing unknown - parameters, to be lerned during model fitting. These parameters are not + parameters, to be learned during model fitting. These parameters are not associated with a probability distribution. To create a variable greta array following a specific probability distribution, see - \code{\link{greta-distributions}}. - - \code{free()} is an alias for \code{variable()}. It is deprecated and - will be removed in version 0.2 + \code{\link{distributions}}. } \details{ \code{lower} and \code{upper} must be fixed, they cannot be greta arrays. diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 895e3734..9a6bafd6 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -29,6 +29,23 @@ set_distribution <- function(dist, data) { data$node$register() } +# evaluate the (unadjusted) density of distribution greta array at some data +get_density <- function (distrib, data) { + + x <- as_data(data) + distribution(x) = distrib + + # create dag and define the density + dag <- greta:::dag_class$new(list(x)) + x$node$distribution$define_tf(dag) + + # get the log density as a vector + tensor_name <- dag$tf_name(distrib$node$distribution) + tensor <- get(tensor_name, envir = dag$tf_environment) + as.vector(grab(tensor)) + +} + compare_distribution <- function (greta_fun, r_fun, parameters, x) { # calculate the absolute difference in the log density of some data between # greta and a r benchmark. @@ -232,9 +249,7 @@ compare_truncated_distribution <- function (greta_fun, r_log_density <- log(r_fun(x)) # create greta array for truncated distribution - z <- variable(truncation[1], truncation[2]) - dist = do.call(greta_fun, parameters) - distribution(z) = dist + dist = do.call(greta_fun, c(parameters, list(dim = 1, truncation = truncation))) # set data as the target x_ <- as_data(x) @@ -299,9 +314,10 @@ cpb <- eval(parse(text = capture.output(dput(greta:::create_progress_bar)))) mock_create_progress_bar <- function(...) cpb(..., stream = stdout(), force = TRUE) -mock_mcmc <- function (n_samples = 101) { - pb <- create_progress_bar('sampling', c(0, n_samples)) - iterate_progress_bar(pb, n_samples, rejects = 1) +mock_mcmc <- function (n_samples = 1010) { + pb <- create_progress_bar('sampling', c(0, n_samples), pb_update = 10) + # for (i in seq_len(n_samples)) + iterate_progress_bar(pb, n_samples, rejects = 10) } # apparently testthat can't see these @@ -321,7 +337,7 @@ qstudent <- extraDistr::qnst # mock up pareto to have differently named parameters (a and b are use for the # truncation) -preto <- function(a_, b_) pareto(a_, b_) +preto <- function(a_, b_, dim, truncation) pareto(a_, b_, dim, truncation) dpreto <- function(x, a_, b_) extraDistr::dpareto(x, a_, b_) ppreto <- function(q, a_, b_) extraDistr::ppareto(q, a_, b_) qpreto <- function(p, a_, b_) extraDistr::qpareto(p, a_, b_) diff --git a/tests/testthat/test_distributions.R b/tests/testthat/test_distributions.R index e0181d3c..06c0d576 100644 --- a/tests/testthat/test_distributions.R +++ b/tests/testthat/test_distributions.R @@ -2,6 +2,7 @@ context('distributions') test_that('normal distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::normal, @@ -15,6 +16,7 @@ test_that('normal distribution has correct density', { test_that('uniform distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::uniform, @@ -28,6 +30,7 @@ test_that('uniform distribution has correct density', { test_that('lognormal distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::lognormal, @@ -41,6 +44,7 @@ test_that('lognormal distribution has correct density', { test_that('bernoulli distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::bernoulli, @@ -54,6 +58,7 @@ test_that('bernoulli distribution has correct density', { test_that('binomial distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::binomial, @@ -67,6 +72,7 @@ test_that('binomial distribution has correct density', { test_that('beta-binomial distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::beta_binomial, @@ -80,6 +86,7 @@ test_that('beta-binomial distribution has correct density', { test_that('negative binomial distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::negative_binomial, @@ -93,6 +100,7 @@ test_that('negative binomial distribution has correct density', { test_that('hypergeometric distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::hypergeometric, @@ -106,6 +114,7 @@ test_that('hypergeometric distribution has correct density', { test_that('poisson distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::poisson, @@ -119,6 +128,7 @@ test_that('poisson distribution has correct density', { test_that('gamma distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::gamma, @@ -133,6 +143,7 @@ test_that('gamma distribution has correct density', { test_that('inverse gamma distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::inverse_gamma, @@ -146,6 +157,7 @@ test_that('inverse gamma distribution has correct density', { test_that('weibull distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::weibull, @@ -159,6 +171,7 @@ test_that('weibull distribution has correct density', { test_that('exponential distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::exponential, @@ -172,6 +185,7 @@ test_that('exponential distribution has correct density', { test_that('pareto distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::pareto, @@ -185,6 +199,7 @@ test_that('pareto distribution has correct density', { test_that('student distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::student, @@ -198,6 +213,7 @@ test_that('student distribution has correct density', { test_that('laplace distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::laplace, @@ -211,6 +227,7 @@ test_that('laplace distribution has correct density', { test_that('beta distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::beta, @@ -224,6 +241,7 @@ test_that('beta distribution has correct density', { test_that('cauchy distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::cauchy, @@ -237,6 +255,7 @@ test_that('cauchy distribution has correct density', { test_that('logistic distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::logistic, @@ -251,6 +270,7 @@ test_that('logistic distribution has correct density', { test_that('f distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::f, @@ -264,6 +284,7 @@ test_that('f distribution has correct density', { test_that('chi squared distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') difference <- compare_distribution(greta::chi_squared, @@ -277,12 +298,13 @@ test_that('chi squared distribution has correct density', { test_that('multivariate normal distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') # parameters to test m <- 5 mn <- rnorm(m) - sig <- rWishart(1, m + 1, diag(m))[, , 1] + sig <- MCMCpack::rwish(m + 1, diag(m)) # function converting Sigma to sigma dmvnorm2 <- function (x, mean, Sigma, log = FALSE) @@ -299,12 +321,13 @@ test_that('multivariate normal distribution has correct density', { test_that('Wishart distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') # parameters to test m <- 5 df <- m + 1 - sig <- rWishart(1, df, diag(m))[, , 1] + sig <- MCMCpack::rwish(df, diag(m)) # wrapper for argument names dwishart <- function (x, df, Sigma, log = FALSE) { @@ -319,7 +342,40 @@ test_that('Wishart distribution has correct density', { compare_distribution(greta::wishart, dwishart, parameters = list(df = df, Sigma = sig), - x = rWishart(1, df, sig)[, , 1])) + x = MCMCpack::rwish(df, sig))) + + expect_true(all(difference < 1e-4)) + +}) + +test_that('lkj distribution has correct density', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + # parameters to test + m <- 5 + eta <- 3 + + # lkj density + dlkj_correlation <- function (x, eta, log = FALSE) { + res <- det(x) ^ (eta - 1) + if (log) res <- log(res) + return (res) + } + + rcorrelation <- function (m) { + wish <- MCMCpack::rwish(m + 1, diag(m)) + iwish <- solve(wish) + cov2cor(iwish) + } + + # no vectorised lkj, so loop through all of these + difference <- replicate(10, + compare_distribution(greta::lkj_correlation, + dlkj_correlation, + parameters = list(eta = eta), + x = rcorrelation(m))) expect_true(all(difference < 1e-4)) @@ -327,6 +383,7 @@ test_that('Wishart distribution has correct density', { test_that('multinomial distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') # parameters to test @@ -349,6 +406,7 @@ test_that('multinomial distribution has correct density', { test_that('categorical distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') # parameters to test @@ -370,6 +428,7 @@ test_that('categorical distribution has correct density', { test_that('dirichlet distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') # parameters to test @@ -384,8 +443,10 @@ test_that('dirichlet distribution has correct density', { expect_true(all(difference < 1e-4)) }) + test_that('dirichlet-multinomial distribution has correct density', { + skip_if_not(check_tf_version()) source('helpers.R') # parameters to test @@ -404,6 +465,7 @@ test_that('dirichlet-multinomial distribution has correct density', { test_that('scalar-valued distributions can be defined in models', { + skip_if_not(check_tf_version()) source('helpers.R') x <- randn(5) @@ -470,22 +532,23 @@ test_that('scalar-valued distributions can be defined in models', { model(uniform(-13, 2.4)) # multivariate continuous distributions - sig <- rWishart(4, 3, diag(3))[, , 1] + sig <- MCMCpack::rwish(4, diag(3)) model(multivariate_normal(rnorm(3), sig)) model(wishart(4, sig)) + model(lkj_correlation(5, dim = 3)) model(dirichlet(runif(3))) }) test_that('array-valued distributions can be defined in models', { + skip_if_not(check_tf_version()) source('helpers.R') dim <- c(5, 2) x <- randn(5, 2) y <- round(randu(5, 2)) - p <- iprobit(normal(0, 1, dim = dim)) # variable (need to define a likelihood) a <- variable(dim = dim) @@ -493,21 +556,27 @@ test_that('array-valued distributions can be defined in models', { model(a) # univariate discrete distributions + p <- iprobit(normal(0, 1, dim = dim)) distribution(y) = bernoulli(p) model(p) + p <- iprobit(normal(0, 1, dim = dim)) distribution(y) = binomial(1, p) model(p) + p <- iprobit(normal(0, 1, dim = dim)) distribution(y) = beta_binomial(1, p, 0.2) model(p) + p <- iprobit(normal(0, 1, dim = dim)) distribution(y) = negative_binomial(1, p) model(p) + p <- iprobit(normal(0, 1, dim = dim)) distribution(y) = hypergeometric(10, 5, p) model(p) + p <- iprobit(normal(0, 1, dim = dim)) distribution(y) = poisson(p) model(p) @@ -546,19 +615,21 @@ test_that('array-valued distributions can be defined in models', { model(f(24.3, 2.4, dim = dim)) # multivariate continuous distributions - sig <- rWishart(4, 3, diag(3))[, , 1] + sig <- MCMCpack:::rwish(4, diag(3)) model(multivariate_normal(rnorm(3), sig, dim = dim[1])) model(dirichlet(runif(3), dim = dim[1])) + model(wishart(4, sig)) + model(lkj_correlation(3, dim = dim[1])) }) test_that('distributions can be sampled from', { + skip_if_not(check_tf_version()) source('helpers.R') x <- randn(100) y <- round(randu(100)) - p <- iprobit(normal(0, 1, dim = 100)) # variable (with a density) a <- variable() @@ -578,21 +649,27 @@ test_that('distributions can be sampled from', { sample_distribution(d) # univariate discrete + p <- iprobit(normal(0, 1, dim = 100)) distribution(y) = bernoulli(p) sample_distribution(p) + p <- iprobit(normal(0, 1, dim = 100)) distribution(y) = binomial(1, p) sample_distribution(p) + p <- iprobit(normal(0, 1, dim = 100)) distribution(y) = negative_binomial(1, p) sample_distribution(p) + p <- iprobit(normal(0, 1, dim = 100)) distribution(y) = hypergeometric(10, 5, p) sample_distribution(p) + p <- iprobit(normal(0, 1, dim = 100)) distribution(y) = poisson(p) sample_distribution(p) + p <- iprobit(normal(0, 1, dim = 100)) distribution(y) = beta_binomial(1, p, 0.3) sample_distribution(p) @@ -632,9 +709,10 @@ test_that('distributions can be sampled from', { sample_distribution(uniform(-13, 2.4), lower = -13, upper = 2.4) # multivariate continuous - sig <- rWishart(4, 3, diag(3))[, , 1] + sig <- MCMCpack::rwish(4, diag(3)) sample_distribution(multivariate_normal(rnorm(3), sig)) sample_distribution(wishart(4, sig)) + sample_distribution(lkj_correlation(4, dim = 3)) sample_distribution(dirichlet(runif(3))) }) @@ -703,6 +781,37 @@ test_that('wishart distribution errors informatively', { }) + +test_that('lkj_correlation distribution errors informatively', { + + source('helpers.R') + + dim <- 3 + + expect_true(inherits(lkj_correlation(3, dim), + 'greta_array')) + + expect_error(lkj_correlation(-1, dim), + "^eta must be a positive scalar value, or a scalar greta array") + + expect_error(lkj_correlation(c(3, 3), dim), + "^eta must be a positive scalar value, or a scalar greta array") + + expect_error(lkj_correlation(uniform(0, 1, dim = 2), dim), + "^eta must be a scalar, but had dimensions") + + expect_error(lkj_correlation(4, dim = -1), + "dim must be a scalar integer greater than one, but was:") + + expect_error(lkj_correlation(4, dim = c(3, 3)), + "dim must be a scalar integer greater than one, but was:") + + expect_error(lkj_correlation(4, dim = NA), + "dim must be a scalar integer greater than one, but was:") + + +}) + test_that('multivariate_normal distribution errors informatively', { source('helpers.R') @@ -842,8 +951,6 @@ test_that('dirichlet distribution errors informatively', { }) - - test_that('dirichlet-multinomial distribution errors informatively', { source('helpers.R') @@ -864,7 +971,6 @@ test_that('dirichlet-multinomial distribution errors informatively', { expect_error(dirichlet_multinomial(c(1, 2), alpha_a), 'size must be a scalar, but has dimensions 2 x 1') - # scalars expect_error(dirichlet_multinomial(size, alpha = 1), 'the dirichlet distribution is for vectors, but the parameters were scalar') @@ -876,3 +982,15 @@ test_that('dirichlet-multinomial distribution errors informatively', { '^dim must be a scalar positive integer, but was:') }) + +test_that('Wishart can use a choleskied Sigma', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + sig <- lkj_correlation(3, dim = 4) + w <- wishart(5, sig) + m <- model(w) + draws <- mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE) + +}) diff --git a/tests/testthat/test_dynamics_module.R b/tests/testthat/test_dynamics_module.R index 7f0fa700..ba61ee2c 100644 --- a/tests/testthat/test_dynamics_module.R +++ b/tests/testthat/test_dynamics_module.R @@ -2,6 +2,7 @@ context('dynamics module') test_that('lambda iteration works', { + skip_if_not(check_tf_version()) source('helpers.R') n <- 10 @@ -27,6 +28,7 @@ test_that('lambda iteration works', { test_that('state iteration works', { + skip_if_not(check_tf_version()) source('helpers.R') n <- 10 @@ -54,6 +56,7 @@ test_that('state iteration works', { test_that('vectorised lambda iteration works', { + skip_if_not(check_tf_version()) source('helpers.R') n <- 10 diff --git a/tests/testthat/test_extract_replace_combine.R b/tests/testthat/test_extract_replace_combine.R index 19121506..62d275e8 100644 --- a/tests/testthat/test_extract_replace_combine.R +++ b/tests/testthat/test_extract_replace_combine.R @@ -2,6 +2,7 @@ context('extract/replace/combine') test_that('extract works like R', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(10) @@ -59,6 +60,7 @@ test_that('extract works like R', { test_that('replace works like R', { + skip_if_not(check_tf_version()) source('helpers.R') # check using expressions, and comparing the whole object to which replacement @@ -174,6 +176,7 @@ test_that('replace works like R', { test_that('rep works like R', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(10) @@ -217,6 +220,7 @@ test_that('rep works like R', { test_that('rbind, cbind and c work like R', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(5, 1) @@ -294,6 +298,7 @@ test_that('stochastic and operation greta arrays can be extracted', { test_that('extract, replace, combine work in models', { + skip_if_not(check_tf_version()) source('helpers.R') # extract diff --git a/tests/testthat/test_functions.R b/tests/testthat/test_functions.R index 59f6ab51..0a9271dd 100644 --- a/tests/testthat/test_functions.R +++ b/tests/testthat/test_functions.R @@ -2,6 +2,7 @@ context('functions') test_that('simple functions work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') x <- randn(25, 4) @@ -41,6 +42,7 @@ test_that('simple functions work as expected', { test_that('matrix functions work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') a <- rWishart(1, 6, diag(5))[, , 1] @@ -57,6 +59,7 @@ test_that('matrix functions work as expected', { test_that('reducing functions work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(1, 3) @@ -71,6 +74,7 @@ test_that('reducing functions work as expected', { test_that('sweep works as expected', { + skip_if_not(check_tf_version()) source('helpers.R') stats_list <- list(randn(5), randn(25)) diff --git a/tests/testthat/test_greta_array_class.R b/tests/testthat/test_greta_array_class.R index 4b2fb06b..db8fe4b1 100644 --- a/tests/testthat/test_greta_array_class.R +++ b/tests/testthat/test_greta_array_class.R @@ -81,6 +81,7 @@ test_that('length and dim work', { test_that('head and tail work', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(10, 1) @@ -98,5 +99,26 @@ test_that('head and tail work', { }) +test_that('as.matrix works', { + + source('helpers.R') + + # data + d <- greta_array(0:1, dim = c(3, 3)) + d_mat <- as.matrix(d) + expect_true(inherits(d_mat, 'matrix')) + + # variable + v = normal(0, 1, dim = 2) + v_mat <- as.matrix(v) + expect_true(inherits(v_mat, 'matrix')) + + # operation + o <- v[1] + o_mat <- as.matrix(o) + expect_true(inherits(o_mat, 'matrix')) + +}) + diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R new file mode 100644 index 00000000..84808388 --- /dev/null +++ b/tests/testthat/test_inference.R @@ -0,0 +1,149 @@ +context('inference methods') + +test_that('opt converges', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- rnorm(5, 2, 0.1) + z = variable(dim = 5) + distribution(x) = normal(z, 0.1) + + m <- model(z) + o <- opt(m) + + # should have converged + expect_equal(o$convergence, 0) + + # should be fewer than 100 iterations + expect_lte(o$iterations, 100) + + # should be close to the truth + expect_true(all(abs(x - o$par) < 1e-3)) + +}) + + +test_that('opt accepts initial values', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- rnorm(5, 2, 0.1) + z = variable(dim = 5) + distribution(x) = normal(z, 0.1) + + m <- model(z) + o <- opt(m, initial_values = rnorm(5)) + + # should have converged + expect_equal(o$convergence, 0) + + # should be fewer than 100 iterations + expect_lte(o$iterations, 100) + + # should be close to the truth + expect_true(all(abs(x - o$par) < 1e-3)) + +}) + +test_that('rejected mcmc proposals', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + # set up for numerical rejection of initial location + x <- rnorm(10000, 1e6, 1) + z = normal(-1e6, 1e-6) + distribution(x) = normal(z, 1e6) + m <- model(z) + + with_mock( + `greta:::create_progress_bar` = mock_create_progress_bar, + m <- model(z), + out <- capture_output(mcmc(m, n_samples = 10, warmup = 0)), + expect_match(out, '100% bad') + ) + + # bad initial values + expect_error(mcmc(m, n_samples = 1, warmup = 0, initial_values = 1e20), + 'could not be evaluated at these initial values') + + # really bad proposals + x <- rnorm(100000, 1e12, 1) + z = normal(-1e12, 1e-12) + distribution(x) = normal(z, 1e-12) + m <- model(z) + expect_error(mcmc(m, n_samples = 1, warmup = 0), + 'Could not find reasonable starting values after 10 attempts') + +}) + +test_that('mcmc works with verbosity and warmup', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + x <- rnorm(10) + z = normal(0, 1) + distribution(x) = normal(z, 1) + m <- model(z) + mcmc(m, n_samples = 50, warmup = 50, verbose = TRUE) + +}) + +test_that('progress bar gives a range of messages', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + # 10/1010 should be <1% + with_mock( + `greta:::create_progress_bar` = mock_create_progress_bar, + `greta:::mcmc` = mock_mcmc, + out <- capture_output(mcmc(1010)), + expect_match(out, '<1% bad') + ) + + # 10/500 should be 50% + with_mock( + `greta:::create_progress_bar` = mock_create_progress_bar, + `greta:::mcmc` = mock_mcmc, + out <- capture_output(mcmc(500)), + expect_match(out, '2% bad') + ) + + # 10/10 should be 100% + with_mock( + `greta:::create_progress_bar` = mock_create_progress_bar, + `greta:::mcmc` = mock_mcmc, + out <- capture_output(mcmc(10)), + expect_match(out, '100% bad') + ) + +}) + +test_that('stashed_samples works', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + # set up model + a <- normal(0, 1) + m <- model(a) + + draws <- mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE) + + # with a completed sample, this should be NULL + ans <- stashed_samples() + expect_null(ans) + + # mock up a stash + stash <- greta:::greta_stash + assign('trace_stash', as.matrix(rnorm(17)), envir = stash) + + # should convert to an mcmc.list + ans <- stashed_samples() + expect_s3_class(ans, 'mcmc.list') + +}) diff --git a/tests/testthat/test_misc.R b/tests/testthat/test_misc.R index 18ec2e66..512c2bab 100644 --- a/tests/testthat/test_misc.R +++ b/tests/testthat/test_misc.R @@ -2,27 +2,35 @@ context('miscellaneous methods') test_that('check_tf_version works', { + skip_if_not(check_tf_version()) + # record the true version and forge an old version true_version <- tf$`__version__` tf$`__version__` <- "0.9.0" # expected text - expected_message <- - paste0("\n\n greta requires TensorFlow version 1.0.0 or higher, but you ", - "have version 0.9.0\n You can write models, but not sample from ", - "them.\n See https://www.tensorflow.org/install for installation ", - "instructions.\n\n") + expected_message <- "you have version 0.9.0" - expect_error(greta:::check_tf_version('error'), + expect_error(check_tf_version('error'), expected_message) - expect_warning(greta:::check_tf_version('warn'), + expect_warning(check_tf_version('warn'), expected_message) - expect_message(greta:::check_tf_version('message'), + expect_message(check_tf_version('message'), expected_message) # reset the true version tf$`__version__` <- true_version + # forge a missing installation + expected_message <- "isn't installed" + + with_mock( + `reticulate::py_module_available` = function(x) {FALSE}, + expect_error(check_tf_version('error'), expected_message), + expect_warning(check_tf_version('warn'), expected_message), + expect_message(check_tf_version('message'), expected_message) + ) + }) test_that('.onLoad runs', { @@ -33,6 +41,8 @@ test_that('.onLoad runs', { test_that('tensorflow coercion works', { + skip_if_not(check_tf_version()) + float <- greta:::tf_as_float(1) integer <- greta:::tf_as_integer(1) logical <- greta:::tf_as_logical(1) @@ -62,6 +72,8 @@ test_that('all_greta_arrays works', { test_that('greta_model objects print', { + skip_if_not(check_tf_version()) + m <- model(normal(0, 1)) message <- capture.output(print(m)) expect_equal(message, 'greta model') @@ -70,6 +82,7 @@ test_that('greta_model objects print', { test_that('define and mcmc error informatively', { + skip_if_not(check_tf_version()) source('helpers.R') x <- as_data(randn(10)) @@ -138,39 +151,9 @@ test_that('check_dims errors informatively', { }) -test_that('rejected mcmc proposals', { - - source('helpers.R') - - # set up for numerical rejection of initial location - x <- rnorm(10000, 1e6, 1) - z = normal(-1e6, 1e-6) - distribution(x) = normal(z, 1e6) - m <- model(z) - - with_mock( - `greta:::create_progress_bar` = mock_create_progress_bar, - m <- model(z), - out <- capture_output(mcmc(m, n_samples = 1, warmup = 0)), - expect_match(out, '100% bad') - ) - - # bad initial values - expect_error(mcmc(m, n_samples = 1, warmup = 0, initial_values = 1e20), - 'could not be evaluated at these initial values') - - # reallybad proposals - x <- rnorm(100000, 1e12, 1) - z = normal(-1e12, 1e-12) - distribution(x) = normal(z, 1e-12) - m <- model(z) - expect_error(mcmc(m, n_samples = 1, warmup = 0), - 'Could not find reasonable starting values after 10 attempts') - -}) - test_that('disjoint graphs are checked', { + skip_if_not(check_tf_version()) source('helpers.R') # if the target nodes aren't related, they sould be checked separately @@ -195,6 +178,7 @@ test_that('disjoint graphs are checked', { test_that("plotting models doesn't error", { + skip_if_not(check_tf_version()) source('helpers.R') a = uniform(0, 1) @@ -205,9 +189,9 @@ test_that("plotting models doesn't error", { }) - test_that("structures work correctly", { + skip_if_not(check_tf_version()) source('helpers.R') a <- ones(2, 2) @@ -220,67 +204,22 @@ test_that("structures work correctly", { }) -test_that('mcmc works with verbosity and warmup', { - - x <- rnorm(10) - z = normal(0, 1) - distribution(x) = normal(z, 1) - m <- model(z) - mcmc(m, n_samples = 50, warmup = 50, verbose = TRUE) - -}) - -test_that('progress bar gives a range of messages', { - - source('helpers.R') - - # 1/101 should be <1% - with_mock( - `greta:::create_progress_bar` = mock_create_progress_bar, - `greta:::mcmc` = mock_mcmc, - out <- capture_output(mcmc(101)), - expect_match(out, '<1% bad') - ) - - # 1/50 should be 50% - with_mock( - `greta:::create_progress_bar` = mock_create_progress_bar, - `greta:::mcmc` = mock_mcmc, - out <- capture_output(mcmc(50)), - expect_match(out, '2% bad') - ) - - # 1/1 should be 100% - with_mock( - `greta:::create_progress_bar` = mock_create_progress_bar, - `greta:::mcmc` = mock_mcmc, - out <- capture_output(mcmc(1)), - expect_match(out, '100% bad') - ) - -}) - - -test_that('stashed_samples works', { +test_that("cleanly() handles TF errors nicely", { source('helpers.R') - # set up model - a <- normal(0, 1) - m <- model(a) - - draws <- mcmc(m, warmup = 10, n_samples = 10, verbose = FALSE) + inversion_stop <- function () + stop ("this non-invertible thing is not invertible") - # with a completed sample, this should be NULL - ans <- stashed_samples() - expect_null(ans) + cholesky_stop <- function () + stop ("Cholesky decomposition was not successful") - # mock up a stash - stash <- greta:::greta_stash - assign('trace_stash', as.matrix(rnorm(17)), envir = stash) + other_stop <- function () + stop ("Fetchez la vache!") - # should convert to an mcmc.list - ans <- stashed_samples() - expect_s3_class(ans, 'mcmc.list') + expect_equal(cleanly(inversion_stop()), NA) + expect_equal(cleanly(cholesky_stop()), NA) + expect_error(cleanly(other_stop()), + "greta hit a tensorflow error:") }) diff --git a/tests/testthat/test_operators.R b/tests/testthat/test_operators.R index 1388ec92..bbfbe200 100644 --- a/tests/testthat/test_operators.R +++ b/tests/testthat/test_operators.R @@ -2,6 +2,7 @@ context('operators') test_that('arithmetic operators work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(25, 4) @@ -21,6 +22,7 @@ test_that('arithmetic operators work as expected', { test_that('logical operators work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(25, 4) > 0 @@ -36,6 +38,7 @@ test_that('logical operators work as expected', { test_that('relational operators work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(25, 4) @@ -52,6 +55,7 @@ test_that('relational operators work as expected', { test_that('random strings of operators work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') for (i in 1:10) { diff --git a/tests/testthat/test_syntax.R b/tests/testthat/test_syntax.R index 84083656..6e2ef77c 100644 --- a/tests/testthat/test_syntax.R +++ b/tests/testthat/test_syntax.R @@ -2,42 +2,18 @@ context('syntax') test_that('`distribution<-` works in models', { + skip_if_not(check_tf_version()) source('helpers.R') # with a distribution parameter y <- as_data(randn(5)) expect_equal(node_type(y$node), 'data') - y_op <- y * 1 - expect_equal(node_type(y_op$node), 'operation') # data mu <- normal(0, 1) distribution(y) = normal(mu, 2) sample_distribution(mu) - # operation - mu <- normal(0, 1) - distribution(y_op) = normal(mu, 1) - sample_distribution(mu) - - # with a variable - y <- as_data(randn(5)) - expect_equal(node_type(y$node), 'data') - y_op <- y * 1 - expect_equal(node_type(y_op$node), 'operation') - - # data - mu <- variable() - distribution(y) = normal(mu, 2) - sample_distribution(mu) - - # op - mu <- variable() - distribution(y_op) = normal(mu, 1) - sample_distribution(mu) - - # test truncation - }) test_that('distribution() works', { @@ -45,30 +21,18 @@ test_that('distribution() works', { source('helpers.R') a = normal(0, 1) - b = variable() - c = as_data(randn(5)) - d = c * 1 + x = as_data(randn(5)) # when run on a distribution, should just return the same greta array expect_identical(distribution(a), a) # when run on something without a distribution, should return NULL - expect_null(distribution(b)) - expect_null(distribution(c)) - expect_null(distribution(d)) + expect_null(distribution(x)) # once assigned, should return the original distribution a2 = normal(0, 1) - distribution(b) = a2 - expect_equal(distribution(b), b) - - a2 = normal(0, 1) - distribution(c) = a2 - expect_equal(distribution(c), c) - - a3 = normal(0, 1) - distribution(d) = a3 - expect_equal(distribution(d), d) + distribution(x) = a2 + expect_equal(distribution(x), x) }) @@ -108,14 +72,20 @@ test_that('`distribution<-` errors informatively', { expect_error({distribution(y2) = d}, 'right hand side has already been assigned fixed values') - # unsupported truncation - z = variable(lower = 0) - expect_error({distribution(z) = bernoulli(0.4)}, - 'distribution cannot be truncated') - - # shouldn't error with -Inf, Inf + # assignment to a variable z = variable() - distribution(z) = student(5, 0, 1) + expect_error({distribution(z) = normal(0, 1)}, + 'distributions can only be assigned to data greta arrays') + + # assignment to an op + z2 = z ^ 2 + expect_error({distribution(z2) = normal(0, 1)}, + 'distributions can only be assigned to data greta arrays') + + # assignment to another distribution + u = uniform(0, 1) + expect_error({distribution(z2) = normal(0, 1)}, + 'distributions can only be assigned to data greta arrays') }) diff --git a/tests/testthat/test_transformations.R b/tests/testthat/test_transformations.R index 18b2564c..14026e88 100644 --- a/tests/testthat/test_transformations.R +++ b/tests/testthat/test_transformations.R @@ -2,6 +2,7 @@ context('transformations') test_that('transformations work as expected', { + skip_if_not(check_tf_version()) source('helpers.R') a <- randn(25, 4) diff --git a/tests/testthat/test_truncated.R b/tests/testthat/test_truncated.R index b90fdaba..cdba30ca 100644 --- a/tests/testthat/test_truncated.R +++ b/tests/testthat/test_truncated.R @@ -2,6 +2,7 @@ context('truncated distributions') test_that('truncated normal has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated normal @@ -40,6 +41,7 @@ test_that('truncated normal has correct densities', { test_that('truncated lognormal has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -47,7 +49,7 @@ test_that('truncated lognormal has correct densities', { 'lnorm', parameters = list(meanlog = -1, sdlog = 2.4), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -63,7 +65,7 @@ test_that('truncated lognormal has correct densities', { 'lnorm', parameters = list(meanlog = -1, sdlog = 2.4), - truncation = c(-Inf, 2)) + truncation = c(0, 2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -76,84 +78,9 @@ test_that('truncated lognormal has correct densities', { }) -test_that('truncated uniform has correct densities', { - - source('helpers.R') - - # non-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(-Inf, Inf)) - expect_true(all(difference < 1e-4)) - - # positive-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(0, Inf)) - expect_true(all(difference < 1e-4)) - - # negative-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(-Inf, 4)) - expect_true(all(difference < 1e-4)) - - # fully-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(1, 2)) - expect_true(all(difference < 1e-4)) - -}) - -test_that('truncated uniform has correct densities', { - - source('helpers.R') - - # non-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(-Inf, Inf)) - expect_true(all(difference < 1e-4)) - - # positive-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(0, Inf)) - expect_true(all(difference < 1e-4)) - - # negative-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(-Inf, 4)) - expect_true(all(difference < 1e-4)) - - # fully-truncated - difference <- compare_truncated_distribution(uniform, - 'unif', - parameters = list(min = -1, - max = 5), - truncation = c(1, 2)) - expect_true(all(difference < 1e-4)) - -}) - test_that('truncated gamma has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -161,7 +88,7 @@ test_that('truncated gamma has correct densities', { 'gamma', parameters = list(shape = 2, rate = 2), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -177,7 +104,7 @@ test_that('truncated gamma has correct densities', { 'gamma', parameters = list(shape = 2, rate = 2), - truncation = c(-Inf, 2)) + truncation = c(0, 2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -192,6 +119,7 @@ test_that('truncated gamma has correct densities', { test_that('truncated inverse gamma has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -199,7 +127,7 @@ test_that('truncated inverse gamma has correct densities', { 'invgamma', parameters = list(alpha = 2, beta = 1.2), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -215,7 +143,7 @@ test_that('truncated inverse gamma has correct densities', { 'invgamma', parameters = list(alpha = 2, beta = 1.2), - truncation = c(-Inf, 2)) + truncation = c(0, 2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -230,6 +158,7 @@ test_that('truncated inverse gamma has correct densities', { test_that('truncated weibull has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -237,7 +166,7 @@ test_that('truncated weibull has correct densities', { 'weibull', parameters = list(shape = 2, scale = 1.2), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -253,7 +182,7 @@ test_that('truncated weibull has correct densities', { 'weibull', parameters = list(shape = 2, scale = 1.2), - truncation = c(-Inf, 2)) + truncation = c(0, 2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -268,13 +197,14 @@ test_that('truncated weibull has correct densities', { test_that('truncated exponential has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated difference <- compare_truncated_distribution(exponential, 'exp', parameters = list(rate = 2), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -288,7 +218,7 @@ test_that('truncated exponential has correct densities', { difference <- compare_truncated_distribution(exponential, 'exp', parameters = list(rate = 2), - truncation = c(-Inf, 2)) + truncation = c(0, 2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -302,13 +232,14 @@ test_that('truncated exponential has correct densities', { test_that('truncated pareto has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated difference <- compare_truncated_distribution(preto, 'preto', parameters = list(a_ = 1.9, b_ = 4.3), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -322,7 +253,7 @@ test_that('truncated pareto has correct densities', { difference <- compare_truncated_distribution(preto, 'preto', parameters = list(a_ = 1.9, b_ = 4.3), - truncation = c(-Inf, 21.3)) + truncation = c(0, 21.3)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -336,6 +267,7 @@ test_that('truncated pareto has correct densities', { test_that('truncated student has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -379,6 +311,7 @@ test_that('truncated student has correct densities', { test_that('truncated laplace has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -417,6 +350,7 @@ test_that('truncated laplace has correct densities', { test_that('truncated beta has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -424,7 +358,7 @@ test_that('truncated beta has correct densities', { 'beta', parameters = list(shape1 = 2.1, shape2 = 2.3), - truncation = c(-Inf, Inf)) + truncation = c(0, 1)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -432,7 +366,7 @@ test_that('truncated beta has correct densities', { 'beta', parameters = list(shape1 = 2.1, shape2 = 2.3), - truncation = c(0.1, Inf)) + truncation = c(0.1, 1)) expect_true(all(difference < 1e-4)) # negative-truncated @@ -440,7 +374,7 @@ test_that('truncated beta has correct densities', { 'beta', parameters = list(shape1 = 2.1, shape2 = 2.3), - truncation = c(-Inf, 0.2)) + truncation = c(0, 0.2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -455,6 +389,7 @@ test_that('truncated beta has correct densities', { test_that('truncated cauchy has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -493,6 +428,7 @@ test_that('truncated cauchy has correct densities', { test_that('truncated logistic has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -531,6 +467,7 @@ test_that('truncated logistic has correct densities', { test_that('truncated f has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated @@ -538,7 +475,7 @@ test_that('truncated f has correct densities', { 'f', parameters = list(df1 = 1.3, df2 = 4.7), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -554,7 +491,7 @@ test_that('truncated f has correct densities', { 'f', parameters = list(df1 = 1.3, df2 = 4.7), - truncation = c(-Inf, 0.2)) + truncation = c(0, 0.2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -569,13 +506,14 @@ test_that('truncated f has correct densities', { test_that('truncated chi squared has correct densities', { + skip_if_not(check_tf_version()) source('helpers.R') # non-truncated difference <- compare_truncated_distribution(chi_squared, 'chisq', parameters = list(df = 9.3), - truncation = c(-Inf, Inf)) + truncation = c(0, Inf)) expect_true(all(difference < 1e-4)) # positive-truncated @@ -589,7 +527,7 @@ test_that('truncated chi squared has correct densities', { difference <- compare_truncated_distribution(chi_squared, 'chisq', parameters = list(df = 9.3), - truncation = c(-Inf, 0.2)) + truncation = c(0, 0.2)) expect_true(all(difference < 1e-4)) # fully-truncated @@ -600,3 +538,17 @@ test_that('truncated chi squared has correct densities', { expect_true(all(difference < 1e-4)) }) + + +test_that('bad truncations error', { + + skip_if_not(check_tf_version()) + source('helpers.R') + + expect_error(lognormal(0, 1, truncation = c(-1, Inf)), + "lower bound must be 0 or higher") + + expect_error(beta(1, 1, truncation = c(-1, 2)), + "lower and upper bounds must be between 0 and 1") + +}) diff --git a/vignettes/build_vignettes.R b/vignettes/build_vignettes.R new file mode 100644 index 00000000..c357236c --- /dev/null +++ b/vignettes/build_vignettes.R @@ -0,0 +1,3 @@ +rmarkdown::render('vignettes/get_started.Rmd', output_format = 'all') +rmarkdown::render('vignettes/example_models.Rmd', output_format = 'all') +rmarkdown::render('vignettes/technical_details.Rmd', output_format = 'all') diff --git a/vignettes/example_models.Rmd b/vignettes/example_models.Rmd index 809baba1..537f9fd3 100644 --- a/vignettes/example_models.Rmd +++ b/vignettes/example_models.Rmd @@ -16,15 +16,17 @@ vignette: > --- ```{r setup, include = FALSE} -knitr::opts_chunk$set(comment = NA, cache = TRUE) +knitr::opts_chunk$set(comment = NA, + eval = greta:::check_tf_version("message"), + cache = TRUE) library (greta) ``` ## BUGS models -The [OpenBUGS website](http://www.openbugs.net/w/Examples) provides a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The [Stan wiki](https://github.com/stan-dev/example-models/wiki/BUGS-Examples-Sorted-Alphabetically) provides Stan implementations of these models. +The BUGS project provide a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The [Stan wiki](https://github.com/stan-dev/example-models/wiki/BUGS-Examples-Sorted-Alphabetically) provides Stan implementations of these models. -The following sections provide greta implementations of some of these example models, alongside the BUGS code from [openbugs.net](http://www.openbugs.net) and Stan code and R version of the data from the [Stan example models wiki](https://github.com/stan-dev/example-models/wiki). +The following sections provide greta implementations of some of these example models, alongside the BUGS code from [WinBUGS examples volume 2](https://www.mrc-bsu.cam.ac.uk/wp-content/uploads/WinBUGS_Vol2.pdf) (pdf) and Stan code and an R version of the data from the [Stan example models wiki](https://github.com/stan-dev/example-models/wiki).
diff --git a/inst/doc/example_models.html b/vignettes/example_models.html similarity index 99% rename from inst/doc/example_models.html rename to vignettes/example_models.html index d387fd13..31f5ddb6 100644 --- a/inst/doc/example_models.html +++ b/vignettes/example_models.html @@ -227,13 +227,13 @@

Example models

BUGS models

-

The OpenBUGS website provides a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The Stan wiki provides Stan implementations of these models.

-

The following sections provide greta implementations of some of these example models, alongside the BUGS code from openbugs.net and Stan code and R version of the data from the Stan example models wiki.

+

The BUGS project provide a number of example models written in the BUGS modelling language. These models will run in WinBUGS and OpenBUGS, and likely also in JAGS. The Stan wiki provides Stan implementations of these models.

+

The following sections provide greta implementations of some of these example models, alongside the BUGS code from WinBUGS examples volume 2 (pdf) and Stan code and an R version of the data from the Stan example models wiki.


Air

Air analyses reported respiratory illness versus exposure to nitrogen dioxide in 103 children. The parameters alpha, beta and sigma2 are known in advance, and the data are grouped into three categories.

-

See the BUGS page for details

+

See WinBUGS examples volume 2 (pdf) for details.

data

@@ -307,7 +307,7 @@

Stan code

Beetles

Beetles considers dose-response data from an experiment applying carbon disulphide to 8 beetles. The original example compares three different link functions; the logit, probit and complementary log-log. Here, only the code for the logit link is shown. You can implement the other two link functions in greta by changing ilogit to iprobit or icloglog.

-

See the BUGS page for details

+

See WinBUGS examples volume 2 (pdf) for details.

data

diff --git a/vignettes/examples/air.Rmd b/vignettes/examples/air.Rmd index f860d3ec..40e80446 100644 --- a/vignettes/examples/air.Rmd +++ b/vignettes/examples/air.Rmd @@ -2,7 +2,7 @@ *Air* analyses reported respiratory illness versus exposure to nitrogen dioxide in 103 children. The parameters `alpha`, `beta` and `sigma2` are known in advance, and the data are grouped into three categories. -[See the BUGS page for details](http://www.openbugs.net/Examples/Air.html) +See [WinBUGS examples volume 2](https://www.mrc-bsu.cam.ac.uk/wp-content/uploads/WinBUGS_Vol2.pdf) (pdf) for details. #### data
diff --git a/vignettes/examples/beetles.Rmd b/vignettes/examples/beetles.Rmd index fa9dbbed..be375481 100644 --- a/vignettes/examples/beetles.Rmd +++ b/vignettes/examples/beetles.Rmd @@ -2,7 +2,8 @@ *Beetles* considers dose-response data from an experiment applying carbon disulphide to 8 beetles. The original example compares three different link functions; the logit, probit and complementary log-log. Here, only the code for the logit link is shown. You can implement the other two link functions in greta by changing `ilogit` to `iprobit` or `icloglog`. -[See the BUGS page for details](http://www.openbugs.net/Examples/Beetles.html) +See [WinBUGS examples volume 2](https://www.mrc-bsu.cam.ac.uk/wp-content/uploads/WinBUGS_Vol2.pdf) (pdf) for details. + #### data
diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd index f3da0a8d..6a1c56f5 100644 --- a/vignettes/get_started.Rmd +++ b/vignettes/get_started.Rmd @@ -1,5 +1,5 @@ --- -title: "Getting started with greta" +title: "Get started with greta" output: html_document: css: greta.css @@ -11,14 +11,14 @@ output: highlight: default vignette: > %\VignetteEngine{knitr::rmarkdown} - %\VignetteIndexEntry{Getting started with greta} + %\VignetteIndexEntry{Get started with greta} %\usepackage[utf8]{inputenc} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, + eval = greta:::check_tf_version("message"), cache = TRUE, comment = NA, progress = FALSE) @@ -34,6 +34,51 @@ file.copy('../man/figures/plotlegend.png', 'figures/plotlegend.png') ``` +
+ +## Installation + +You can install the stable version of greta from CRAN: + +```{r install_greta, eval = FALSE} +install.packages("greta") +``` + +Alternatively, you can install the latest release, or the development version, from GitHub using the devtools package: + +```{r install_greta_github, eval = FALSE} +devtools::install_github("goldingn/greta") # latest release +devtools::install_github("goldingn/greta@dev") # development version +``` + +```{r load} +library(greta) +``` + +
+ +### TensorFlow + +Before you can fit models with greta, you will also need to have a working installation of Google's [TensorFlow](https://www.tensorflow.org/) python package (version 1.0.0 or higher). greta exports `install_tensorflow()` from the `tensorflow` R package, which will attempt to install the latest version of TensorFlow from within your R session. + +```{r install_tensorflow, eval = FALSE} +install_tensorflow() +``` + +You can use this function to install different versions of TensorFlow, including versions with GPU acceleration. If you're having trouble with this step, [this guide](https://tensorflow.rstudio.com/installation.html) may help. + +
+ +### DiagrammeR + +greta's [plotting functionality](#plotting) depends on the [DiagrammeR package](http://rich-iannone.github.io/DiagrammeR/). Because Diagrammer automatically installs a lot of other packages it can take a long time to install, so DiagrammeR isn't installed automatically with greta. If you want to plot models, you can install DiagrammeR from CRAN. + +```{r install_diagrammer, eval = FALSE} +install.packages('DiagrammeR') +``` + + +
## How greta works @@ -65,13 +110,13 @@ greta also lets us declare that a greta array follows a probability distribution The rest of this vignette walks through an example of fitting a model using greta. If you'd like to see examples of some common models fitted in greta and with equivalent BUGS and Stan code, take a look at [Example models](example_models.html). -If you'd like more technical details about how greta works under the hood, check out [How does this work?](how_does_this_work.html). +If you'd like more technical details about how greta works under the hood, check out [Technical details](technical_details.html).
## Building a model -The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We'll be stepping through a model for linear regression between two of the variables in the [`iris`](https://en.wikipedia.org/wiki/Iris_flower_data_set) dataset, which is included with base R. The model is *semi-Bayesian*, just to illustrate how to do both Bayesian (specifying priors over variables) and frequentist (no priors) inference in greta. In a real analysis it might make more sense just to pick one of those two approaches. +The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We'll be stepping through a model for linear regression between two of the variables in the [`iris`](https://en.wikipedia.org/wiki/Iris_flower_data_set) dataset, which is included with base R. The model is *Bayesian* (we specify priors over the variables), though it is also possible to do frequentist (no priors) inference in greta, using `variable()` instead of a probability distribution to create random variables. Here's the whole script to specify and fit the model: @@ -83,9 +128,9 @@ x <- as_data(iris$Petal.Length) y <- as_data(iris$Sepal.Length) # variables and priors -int = variable() +int = normal(0, 1) coef = normal(0, 3) -sd = lognormal(0, 3) +sd = student(3, 0, 1, truncation = c(0, Inf)) # operations mean <- int + coef * x @@ -100,7 +145,7 @@ m <- model(int, coef, sd) plot(m) # sampling -draws <- mcmc(model, n_samples = 1000) +draws <- mcmc(m, n_samples = 1000) ``` Throughout this example, and the rest of greta's documentation, `<-` is used when assigning *deterministic* values, and `=` when assigning variable or *stochastic* values. This makes the model code a little bit clearer to read, since `=` corresponds to the `~` symbol used to define stochastic relationships in BUGS and Stan. However, it doesn't actually make any difference to the model which assignment operator you use. @@ -163,70 +208,48 @@ greta_array(0:1, dim = c(3, 3)) The second section of the script creates three greta arrays to represent the parameters in our model: ```{r variables} -int = variable() +int = normal(0, 1) coef = normal(0, 3) -sd = lognormal(0, 3) +sd = student(3, 0, 1, truncation = c(0, Inf)) ``` -#### variables +Each of these is a variable greta array, and each is assumed *a priori* (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions. -The first line uses the `variable()` function to create a greta array `int` representing a variable - an unknown value that we will learn when we fit the model to data. `variable()` has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of `lower = -Inf, upper = Inf` meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar). +#### variables without probability distributions -```{r int} -int -``` +If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the `variable()` function. E.g. in a frequentist version of the model we could create `int` with: -If we had instead wanted a 2x3 array of positive variables we could have created it like this: -```{r positive_variable} -variable(lower = 0, dim = c(2, 3)) +```{r int_variable} +(int = variable()) ``` -#### variables with probability distributions - -Variable greta arrays like `int` that are created with `variable()`, are not associated with probability distributions. I.e. we haven't made any assumptions or statements about what distribution we expect their values to follow. That's normally the case for a frequentist analysis, but for a Bayesian model we need to specify *prior distributions* over our parameters, stating the distribution we expect them to be drawn from (*before* considering the data we're using in the model). +`variable()` has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of `lower = -Inf, upper = Inf` meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar). -In our example script, we simultaneously create the variables `coef` and `sd`, and state the prior distributions for them, using two of greta's probability distribution functions. Each of these distributions takes as arguments the distribution's parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions argument at its default value to create scalar greta arrays: +We can create a variable constrained between two values by specifying `lower` and `upper`. So we could have created the positive variable `sd` (the standard deviation of the likelihood) with: -```{r priors} -coef -sd +```{r positive_variable} +(sd = variable(lower = 0)) ``` -greta implements a number of common probability distributions, and more are being added. You can see a list of the currently available distributions in the `?greta-distributions` helpfile. - -#### creating priors with `distribution()` - -greta's probability distributions have two roles: to create variables following a certain distribution; and, when used in combination with the `distribution()` function, to state the distribution of an existing greta array. - -In fact, creating a new variable using a probability distribution is the same as first creating a variable (with the appropriate constraint) and then stating that it follows a distribution. So our code in the script for creating `coef` is equivalent to this: -```{r normal_prior2, eval = FALSE} -coef = variable() -distribution(coef) = normal(0, 3) +If we had instead wanted a 2x3 array of positive variables we could have created it like this: +```{r matrix_variable} +variable(lower = 0, dim = c(2, 3)) ``` -The former is somewhat similar to the BUGS approach, where the variable and its distribution are defined at the same time. The latter approach (using `distribution()`) -is similar to Stan code in that the variable is defined first, and the distribution statement added later. - -As well as taking more lines of code, the second approach relies on us to get the constraints of the variable right. Because `sd` must be positive to be a valid standard deviation parameter, the `distribution()` approach for that parameter would look like this: +#### variables with probability distributions -```{r sd_distribution, eval = FALSE} -sd = variable(lower = 0) -distribution(sd) = lognormal(0, 3) -``` +In our example script, when we created the variables `int`, `coef` and `sd`, we simultaneously stated the prior distributions for them using some of greta's probability distribution functions. You can see a list of the currently available distributions in the [`?greta::distributions`](https://goldingn.github.io/greta/distributions.html) helpfile. Each of these distribution functions takes as arguments the distribution's parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions arguments at their default value to create scalar greta arrays. -#### Truncated distributions +Both `int` and `coef` were given zero-mean normal distributions, which are a common choice of prior for unconstrained variables in Bayesian analyses. For the strictly positive parameter `sd`, we chose a slightly unconvential option, a positive-truncated (non-standard) student's t distribution, which we create using greta's built-in support for truncated distributions. -In general, it's easier to define parameters with priors just by using the probability distributions, like we did in the example script. However `distribution()` has two other uses: it can be used to define a [likelihood](#Likelihood) over observed data; and it can be used to create variables with truncated distributions. +#### variables with truncated distributions -If we define a variable with constraints (using `lower` and `upper`), we can then assign it a probability distribution, and greta will instead use the truncated probability distribution, usng the specified constraints. For example, to define a variable `z` that has a standard normal distribution truncated between -1 and 2, we can do: +Some of greta's probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the `truncation` argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do: -```{r truncated} -z = variable(lower = -1, upper = 2) -distribution(z) = normal(0, 1) -z +```{r truncated1} +(z = normal(0, 1, truncation = c(-1, 1))) ``` - -Truncation like this isn't yet implemented for all probability distributions, but greta will let you know if it can't truncate the distribution. +greta will account for this truncation when calculating the density of this distribution; rescaling it to be a valid probability distribution. We can only truncate to within the support of the distribution; e.g. greta will throw an error if we try to truncate a lognormal distribution (which must be positive) to have a lower bound of -1.
@@ -245,7 +268,7 @@ dim(mean) head(mean) ``` -greta arrays can be manipulated using R's standard arithmetic, logical and relational operators; including `+`, `*` and many others. The `` ?`greta-operators` ``) helpfile lists the operators that are implemeneted for greta arrays. You can also use a lot of common R functions for numeric data, such as `sum()`, `log()` and others. the available functions are listed in the `` ?`greta-functions` `` helpfile. All of these mathematical manipulations of greta arrays produce 'operation' greta arrays. +greta arrays can be manipulated using R's standard arithmetic, logical and relational operators; including `+`, `*` and many others. The [`?greta::operators`](https://goldingn.github.io/greta/operators.html) helpfile lists the operators that are implemeneted for greta arrays. You can also use a lot of common R functions for numeric data, such as `sum()`, `log()` and others. the available functions are listed in the [`?greta::functions`](https://goldingn.github.io/greta/functions.html) helpfile. All of these mathematical manipulations of greta arrays produce 'operation' greta arrays. #### Extract and replace @@ -293,12 +316,14 @@ atanh(z_greta) ### Likelihood -With the predicted sepal length `mean`, we can now define a likelihood for the observed sepal length data `y`: +So far, we have created greta arrays representing the variables in our model (with prior distributions) and created other greta arrays from them and some fixed, *independent* data. To perform statistical inference on this model, we also need to link it to some observed *dependent* data. By comparing the sepal lengths predicted under different parameter values with the observed sepal lengths, we can estimate the most plausible values of those parameters. We do that by defining a likelihood for the observed sepal length data `y`. + +By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we're trying to work out the parameters of that distribution. In greta we do that with the `distribution()` assignment function: ```{r likelihood} distribution(y) = normal(mean, sd) ``` -With the syntax `distribution() <- `, we are stating that the greta array on the left `` has the same distribution as the greta array on the right ``. In this case both `` (`y`) and `` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters. +With the syntax `distribution() = `, we are stating that the data greta array on the left `` has the same distribution as the greta array on the right ``. In this case, we're temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays `mean` and `sd`), but then stating that values of that distribution have been observed (`y`). In this case both `` (`y`) and `` are column vectors with the same dimensions, so each element in `y` has a normal distribution with the corresponding parameters.
@@ -309,9 +334,9 @@ Now all of the greta arrays making up the model have been created, we need to co ```{r hidden_model, echo = FALSE} x <- as_data(iris$Petal.Length) y <- as_data(iris$Sepal.Length) -int = variable() +int = normal(0, 1) coef = normal(0, 3) -sd = lognormal(0, 3) +sd = student(3, 0, 1, truncation = c(0, Inf)) mean <- int + coef * x distribution(y) = normal(mean, sd) ``` @@ -319,7 +344,7 @@ distribution(y) = normal(mean, sd) m <- model(int, coef, sd) ``` -`()` returns a 'greta model' object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to `()` to flag them as the parameters we're interested in. When sampling from the model with `mcmc()` those will be the greta arrays for which samples will be returned. Alternatively, we can run `()` without passing any greta arrays, in which all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead. +`model()` returns a 'greta model' object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to `model()` to flag them as the parameters we're interested in. When sampling from the model with `mcmc()` those will be the greta arrays for which samples will be returned. Alternatively, we can run `model()` without passing any greta arrays, in which case all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead.
@@ -335,15 +360,15 @@ fname <- "figures/full_graph.png" DiagrammeR::export_graph(gr, file_name = fname, file_type = 'png', - width = 895 * 2, - height = 313 * 2) + width = 958 * 2, + height = 450 * 2) ```

The greta arrays in your workspace that are used in the model are all represented as nodes (shapes) with names. These are either data (squares; `x` and `y`), variables (large circles; `int`, `coef`, `sd`) or the results of operations (small circles; `mean`). The operations used to create the operation greta arrays are printed on the arrows from the arrays they were made from. There are also nodes for greta arrays that were *implicitly defined* in our model. The data nodes (squares) with numbers are the parameters used to define the prior distributions, and there's also an intermediate operation node (small circle), which was the result of multiplying `coef` and `x` (before adding `int` to create `mean`). -Here's a legend for the plot (it's in the `` ?`greta-model` `` helpfile too): +Here's a legend for the plot (it's in the [`?greta::model`](https://goldingn.github.io/greta/model.html) helpfile too):

@@ -376,6 +401,7 @@ gr <- plot(m_likelihood) idx <- which(gr$nodes_df$label == 'mean\n') gr$nodes_df$shape[idx] <- 'circle' gr$nodes_df$fillcolor[idx] <- 'lightgray' +gr$nodes_df$color[idx] <- 'lightgray' gr$nodes_df$width[idx] <- 0.2 gr$nodes_df$height[idx] <- 0.2 gr$nodes_df <- gr$nodes_df[c(3, 1, 2, 4), ] @@ -388,19 +414,11 @@ DiagrammeR::export_graph(gr, ```

-#### Installing DiagrammeR - -These plots are made possible by the [DiagrammeR R package](http://rich-iannone.github.io/DiagrammeR/). Installing greta doesn't automatically install DiagrammeR, because DiagrammeR auto-installs a number of other packages, such as igraph, that can be time consuming and tedious to install, and aren't necessary when running greta on high-performance computing systems. If we try to plot but don't already have DiagrammeR installed, greta will prompt us to do so. We can install DiagrammeR and its dependencies with: - -```{r install_diagrammer, eval = FALSE} -install.packages('DiagrammeR') -``` -
### Sampling -When defining the model, greta combines all of the distributions together to define the *joint density* of the model, a measure of how likely (or probable, if we're being Bayesian) are a particular candidate set of values for the variables in the model. +When defining the model, greta combines all of the distributions together to define the *joint density* of the model, a measure of how 'good' (or how probable if we're being Bayesian) are a particular candidate set of values for the variables in the model. Now we have a greta model that will give us the joint density for a candidate set of values, so we can use that to carry out inference on the model. We do that using an Markov chain Monte Carlo (MCMC) algorithm to sample values of the parameters we're interested in, using the `mcmc()` function: @@ -415,11 +433,18 @@ Here we're using 1000 steps of the static Hamiltonian Monte Carlo (HMC) algorith ```{r coda_summary} summary(draws) ``` -The MCMCvis package makes some nice plots of the MCMC chain and the parameter estimates -```{r mcmcvis, out.width=c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'} -library (MCMCvis) -MCMCtrace(draws) -MCMCplot(draws, xlim = c(-1, 5)) +The bayesplot package makes some nice plots of the MCMC chain and the parameter estimates + +```{r mcmcvis_public, eval = FALSE} +library (bayesplot) +mcmc_trace(draws) +mcmc_intervals(draws) +``` + +```{r mcmcvis, echo = FALSE, message = FALSE, out.width=c('400px', '400px'), fig.height=4, fig.width=5, fig.show='hold'} +library (bayesplot) +mcmc_trace(draws, facet_args = list(nrow = 3, ncol = 1)) +mcmc_intervals(draws) ``` If your model is taking a long time whilst in the sampling phase and you want to take a look at the samples. You can stop the sampler (e.g. using the stop button in RStudio) and then retrieve the samples drawn so far, by using `stashed_samples()`. Note that this won't return anything if you stop the model during the warmup phase (since those aren't valid posterior samples) or if the sampler completed successfully. @@ -428,7 +453,7 @@ If your model is taking a long time whilst in the sampling phase and you want to Static HMC is currently the only sampling algorithm available in greta. The sampler will automatically tune itself during the warmup phase, to make it as efficient as possible. If the chain looks like it's moving too slowly, or if you are getting a lot of messages about propsals being rejected, the first thing to try is increasing the length of the warmup period from its default of 100 interstions (via the `warmup` argument). If you're still getting a lot of rejected samples, it's often a good idea to manual set the initial values for the sampler (via the `initial_values` argument). This is often the case when you have lots of data; the more information there is, the more extreme the log probability, and the higher the risk of numerical problems. -A downside of HMC is that it can't be used to sample discrete variables (e.g. integers), so we can't specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it's in the likelihood. If we try to build such a model, greta will give us an error when we run `()` stage. Future versions of greta will implement samplers for discrete MCMC, as well as self-tuning versions of HMC. +A downside of HMC is that it can't be used to sample discrete variables (e.g. integers), so we can't specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it's in the likelihood. If we try to build such a model, greta will give us an error when we run `model()`. Future versions of greta will implement a wider range of MCMC samplers, including some for discrete variables.
diff --git a/inst/doc/get_started.html b/vignettes/get_started.html similarity index 76% rename from inst/doc/get_started.html rename to vignettes/get_started.html index 836eb4f2..799f1b5a 100644 --- a/inst/doc/get_started.html +++ b/vignettes/get_started.html @@ -11,7 +11,7 @@ -Getting started with greta +Get started with greta @@ -220,12 +220,35 @@ -

Getting started with greta

+

Get started with greta


+
+

Installation

+

You can install the stable version of greta from CRAN:

+
install.packages("greta")
+

Alternatively, you can install the latest release, or the development version, from GitHub using the devtools package:

+
devtools::install_github("goldingn/greta")  # latest release
+devtools::install_github("goldingn/greta@dev")  # development version
+
library(greta)
+
+
+

TensorFlow

+

Before you can fit models with greta, you will also need to have a working installation of Google’s TensorFlow python package (version 1.0.0 or higher). greta exports install_tensorflow() from the tensorflow R package, which will attempt to install the latest version of TensorFlow from within your R session.

+
install_tensorflow()
+

You can use this function to install different versions of TensorFlow, including versions with GPU acceleration. If you’re having trouble with this step, this guide may help.

+
+
+
+

DiagrammeR

+

greta’s plotting functionality depends on the DiagrammeR package. Because Diagrammer automatically installs a lot of other packages it can take a long time to install, so DiagrammeR isn’t installed automatically with greta. If you want to plot models, you can install DiagrammeR from CRAN.

+
install.packages('DiagrammeR')
+
+
+

How greta works

greta lets us build statistical models interactively in R, and then sample from them by MCMC. We build greta models with greta array objects, which behave much like R’s array, matrix and vector objects for numeric data. Like those numeric data objects, greta arrays can be manipulated with functions and mathematical operators to create new greta arrays.

@@ -269,7 +292,7 @@

How greta works

Building a model

-

The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We’ll be stepping through a model for linear regression between two of the variables in the iris dataset, which is included with base R. The model is semi-Bayesian, just to illustrate how to do both Bayesian (specifying priors over variables) and frequentist (no priors) inference in greta. In a real analysis it might make more sense just to pick one of those two approaches.

+

The rest of the vignette explains step-by-step how to build, visualise and fit a model with greta. We’ll be stepping through a model for linear regression between two of the variables in the iris dataset, which is included with base R. The model is Bayesian (we specify priors over the variables), though it is also possible to do frequentist (no priors) inference in greta, using variable() instead of a probability distribution to create random variables.

Here’s the whole script to specify and fit the model:

library(greta)
 
@@ -278,9 +301,9 @@ 

Building a model

y <- as_data(iris$Sepal.Length) # variables and priors -int = variable() +int = normal(0, 1) coef = normal(0, 3) -sd = lognormal(0, 3) +sd = student(3, 0, 1, truncation = c(0, Inf)) # operations mean <- int + coef * x @@ -295,7 +318,7 @@

Building a model

plot(m) # sampling -draws <- mcmc(model, n_samples = 1000)
+draws <- mcmc(m, n_samples = 1000)

Throughout this example, and the rest of greta’s documentation, <- is used when assigning deterministic values, and = when assigning variable or stochastic values. This makes the model code a little bit clearer to read, since = corresponds to the ~ symbol used to define stochastic relationships in BUGS and Stan. However, it doesn’t actually make any difference to the model which assignment operator you use.


@@ -367,13 +390,21 @@

data structures

Variables and priors

The second section of the script creates three greta arrays to represent the parameters in our model:

-
int = variable()
+
int = normal(0, 1)
 coef = normal(0, 3)
-sd = lognormal(0, 3)
-
-

variables

-

The first line uses the variable() function to create a greta array int representing a variable - an unknown value that we will learn when we fit the model to data. variable() has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of lower = -Inf, upper = Inf meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).

-
int
+sd = student(3, 0, 1, truncation = c(0, Inf))
+

Each of these is a variable greta array, and each is assumed a priori (before fitting to data) to follow a different probability distribution. In other wirds, these are prior distributions over variables, which we need to specify to make this a full Bayesian analysis. Before going through how to specify variables with proability distributions, it will be clearer to first demonstrate the alternative; variables without probabiltiy distributions.

+
+

variables without probability distributions

+

If we were carrying out a frequentist analysis of this model, we could create variable greta arrays (values we want to learn) without probability distributions using the variable() function. E.g. in a frequentist version of the model we could create int with:

+
(int = variable())
+
greta array (variable)
+
+     [,1]
+[1,]  ?  
+

variable() has three arguments. The first two arguments determine the constraints on this parameter; we left them at their default setting of lower = -Inf, upper = Inf meaning the variables can take any value on the real number line. The third argument gives the dimensions of the greta array to return, in this case we left it at its default value of 1x1 (a scalar).

+

We can create a variable constrained between two values by specifying lower and upper. So we could have created the positive variable sd (the standard deviation of the likelihood) with:

+
(sd = variable(lower = 0))
greta array (variable)
 
      [,1]
@@ -388,43 +419,18 @@ 

variables

variables with probability distributions

-

Variable greta arrays like int that are created with variable(), are not associated with probability distributions. I.e. we haven’t made any assumptions or statements about what distribution we expect their values to follow. That’s normally the case for a frequentist analysis, but for a Bayesian model we need to specify prior distributions over our parameters, stating the distribution we expect them to be drawn from (before considering the data we’re using in the model).

-

In our example script, we simultaneously create the variables coef and sd, and state the prior distributions for them, using two of greta’s probability distribution functions. Each of these distributions takes as arguments the distribution’s parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions argument at its default value to create scalar greta arrays:

-
coef
-
greta array (variable following a normal distribution)
-
-     [,1]
-[1,]  ?  
-
sd
-
greta array (variable following a lognormal distribution)
-
-     [,1]
-[1,]  ?  
-

greta implements a number of common probability distributions, and more are being added. You can see a list of the currently available distributions in the ?greta-distributions helpfile.

-
-
-

creating priors with distribution()

-

greta’s probability distributions have two roles: to create variables following a certain distribution; and, when used in combination with the distribution() function, to state the distribution of an existing greta array.

-

In fact, creating a new variable using a probability distribution is the same as first creating a variable (with the appropriate constraint) and then stating that it follows a distribution. So our code in the script for creating coef is equivalent to this:

-
coef = variable()
-distribution(coef) = normal(0, 3)
-

The former is somewhat similar to the BUGS approach, where the variable and its distribution are defined at the same time. The latter approach (using distribution()) is similar to Stan code in that the variable is defined first, and the distribution statement added later.

-

As well as taking more lines of code, the second approach relies on us to get the constraints of the variable right. Because sd must be positive to be a valid standard deviation parameter, the distribution() approach for that parameter would look like this:

-
sd = variable(lower = 0)
-distribution(sd) = lognormal(0, 3)
+

In our example script, when we created the variables int, coef and sd, we simultaneously stated the prior distributions for them using some of greta’s probability distribution functions. You can see a list of the currently available distributions in the ?distributions helpfile. Each of these distribution functions takes as arguments the distribution’s parameters (either as numbers or other greta arrays), as well as the dimensions of the resulting greta array. As before, we left the dimensions arguments at their default value to create scalar greta arrays.

+

Both int and coef were given zero-mean normal distributions, which are a common choice of prior for unconstrained variables in Bayesian analyses. For the strictly positive parameter sd, we chose a slightly unconvential option, a positive-truncated (non-standard) student’s t distribution, which we create using greta’s built-in support for truncated distributions.

-
-

Truncated distributions

-

In general, it’s easier to define parameters with priors just by using the probability distributions, like we did in the example script. However distribution() has two other uses: it can be used to define a likelihood over observed data; and it can be used to create variables with truncated distributions.

-

If we define a variable with constraints (using lower and upper), we can then assign it a probability distribution, and greta will instead use the truncated probability distribution, usng the specified constraints. For example, to define a variable z that has a standard normal distribution truncated between -1 and 2, we can do:

-
z = variable(lower = -1, upper = 2)
-distribution(z) = normal(0, 1)
-z
+
+

variables with truncated distributions

+

Some of greta’s probability distributions (those that are continous and univariate) can be specified as truncated distributions. By modifying the truncation argument, we can state that the resulting distribution should be truncated between the two truncation bounds. So to create a standard normal distribution truncated between -1 and 1 we can do:

+
(z = normal(0, 1, truncation = c(-1, 1)))
greta array (variable following a normal distribution)
 
      [,1]
 [1,]  ?  
-

Truncation like this isn’t yet implemented for all probability distributions, but greta will let you know if it can’t truncate the distribution.

+

greta will account for this truncation when calculating the density of this distribution; rescaling it to be a valid probability distribution. We can only truncate to within the support of the distribution; e.g. greta will throw an error if we try to truncate a lognormal distribution (which must be positive) to have a lower bound of -1.


@@ -495,16 +501,17 @@

Functions

Likelihood

-

With the predicted sepal length mean, we can now define a likelihood for the observed sepal length data y:

+

So far, we have created greta arrays representing the variables in our model (with prior distributions) and created other greta arrays from them and some fixed, independent data. To perform statistical inference on this model, we also need to link it to some observed dependent data. By comparing the sepal lengths predicted under different parameter values with the observed sepal lengths, we can estimate the most plausible values of those parameters. We do that by defining a likelihood for the observed sepal length data y.

+

By defining a likelihood over observed data, we are stating that these observed data are actually a random sample from some probability distribution, and we’re trying to work out the parameters of that distribution. In greta we do that with the distribution() assignment function:

distribution(y) = normal(mean, sd)
-

With the syntax distribution(<lhs>) <- <rhs>, we are stating that the greta array on the left <lhs> has the same distribution as the greta array on the right <rhs>. In this case both <lhs> (y) and <rhs> are column vectors with the same dimensions, so each element in y has a normal distribution with the corresponding parameters.

+

With the syntax distribution(<lhs>) = <rhs>, we are stating that the data greta array on the left <lhs> has the same distribution as the greta array on the right <rhs>. In this case, we’re temporarily creating a random variable with a normal distribution (with parameters determined by the greta arrays mean and sd), but then stating that values of that distribution have been observed (y). In this case both <lhs> (y) and <rhs> are column vectors with the same dimensions, so each element in y has a normal distribution with the corresponding parameters.


Defining the model

Now all of the greta arrays making up the model have been created, we need to combine them and set up the model so that we can sample from it, using model():

m <- model(int, coef, sd)
-

() returns a ‘greta model’ object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to () to flag them as the parameters we’re interested in. When sampling from the model with mcmc() those will be the greta arrays for which samples will be returned. Alternatively, we can run () without passing any greta arrays, in which all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead.

+

model() returns a ‘greta model’ object, which combines all of the greta arrays that make up the model. We can pass greta arrays as arguments to model() to flag them as the parameters we’re interested in. When sampling from the model with mcmc() those will be the greta arrays for which samples will be returned. Alternatively, we can run model() without passing any greta arrays, in which case all of the greta arrays (except for data greta arrays) in the working environment will be set as the targets for sampling instead.


@@ -512,7 +519,7 @@

Plotting

greta provides a plot function for greta models to help you visualise and check the model before sampling from it.

plot(m)

- +

The greta arrays in your workspace that are used in the model are all represented as nodes (shapes) with names. These are either data (squares; x and y), variables (large circles; int, coef, sd) or the results of operations (small circles; mean). The operations used to create the operation greta arrays are printed on the arrows from the arrays they were made from. There are also nodes for greta arrays that were implicitly defined in our model. The data nodes (squares) with numbers are the parameters used to define the prior distributions, and there’s also an intermediate operation node (small circle), which was the result of multiplying coef and x (before adding int to create mean).

Here’s a legend for the plot (it’s in the ?`greta-model` helpfile too):

@@ -526,18 +533,13 @@

Plotting

It’s the same for the model likelihood, but this time the distribution’s parameters are a variable (sd) and the result of an operation (mean), and the distribution’s value is given by data (the observed sepal lengths y):

- +

-
-

Installing DiagrammeR

-

These plots are made possible by the DiagrammeR R package. Installing greta doesn’t automatically install DiagrammeR, because DiagrammeR auto-installs a number of other packages, such as igraph, that can be time consuming and tedious to install, and aren’t necessary when running greta on high-performance computing systems. If we try to plot but don’t already have DiagrammeR installed, greta will prompt us to do so. We can install DiagrammeR and its dependencies with:

-
install.packages('DiagrammeR')

-

Sampling

-

When defining the model, greta combines all of the distributions together to define the joint density of the model, a measure of how likely (or probable, if we’re being Bayesian) are a particular candidate set of values for the variables in the model.

+

When defining the model, greta combines all of the distributions together to define the joint density of the model, a measure of how ‘good’ (or how probable if we’re being Bayesian) are a particular candidate set of values for the variables in the model.

Now we have a greta model that will give us the joint density for a candidate set of values, so we can use that to carry out inference on the model. We do that using an Markov chain Monte Carlo (MCMC) algorithm to sample values of the parameters we’re interested in, using the mcmc() function:

draws <- mcmc(m, n_samples = 1000)

Here we’re using 1000 steps of the static Hamiltonian Monte Carlo (HMC) algorithm, which uses the gradients of the joint density to efficiently explore the set of parameters. By default, greta also spends 100 iterations ‘warming up’ (tuning the sampler parameters) and ‘burning in’ (moving to the area of highest probability) the sampler.

@@ -553,26 +555,26 @@

Sampling

plus standard error of the mean: Mean SD Naive SE Time-series SE -int 4.3088 0.07731 0.0024448 0.0019365 -coef 0.4084 0.01886 0.0005965 0.0005538 -sd 0.4084 0.02100 0.0006642 0.0005674 +int 4.2839 0.08063 0.0025497 0.0022142 +coef 0.4139 0.01952 0.0006172 0.0006316 +sd 0.4097 0.02221 0.0007025 0.0007025 2. Quantiles for each variable: 2.5% 25% 50% 75% 97.5% -int 4.1531 4.2596 4.3058 4.3580 4.4692 -coef 0.3708 0.3957 0.4085 0.4206 0.4454 -sd 0.3692 0.3942 0.4078 0.4224 0.4544
-

The MCMCvis package makes some nice plots of the MCMC chain and the parameter estimates

-
library (MCMCvis)
-MCMCtrace(draws)
-MCMCplot(draws, xlim = c(-1, 5))
-

+int 4.1299 4.2257 4.2793 4.3409 4.4422 +coef 0.3782 0.3997 0.4144 0.4287 0.4490 +sd 0.3717 0.3922 0.4085 0.4255 0.4521 +

The bayesplot package makes some nice plots of the MCMC chain and the parameter estimates

+
library (bayesplot)
+mcmc_trace(draws)
+mcmc_intervals(draws)
+

If your model is taking a long time whilst in the sampling phase and you want to take a look at the samples. You can stop the sampler (e.g. using the stop button in RStudio) and then retrieve the samples drawn so far, by using stashed_samples(). Note that this won’t return anything if you stop the model during the warmup phase (since those aren’t valid posterior samples) or if the sampler completed successfully.

Tweaking the sampler

Static HMC is currently the only sampling algorithm available in greta. The sampler will automatically tune itself during the warmup phase, to make it as efficient as possible. If the chain looks like it’s moving too slowly, or if you are getting a lot of messages about propsals being rejected, the first thing to try is increasing the length of the warmup period from its default of 100 interstions (via the warmup argument). If you’re still getting a lot of rejected samples, it’s often a good idea to manual set the initial values for the sampler (via the initial_values argument). This is often the case when you have lots of data; the more information there is, the more extreme the log probability, and the higher the risk of numerical problems.

-

A downside of HMC is that it can’t be used to sample discrete variables (e.g. integers), so we can’t specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it’s in the likelihood. If we try to build such a model, greta will give us an error when we run () stage. Future versions of greta will implement samplers for discrete MCMC, as well as self-tuning versions of HMC.

+

A downside of HMC is that it can’t be used to sample discrete variables (e.g. integers), so we can’t specify a model with a discrete distribution (e.g. Poisson or Binomial), unless it’s in the likelihood. If we try to build such a model, greta will give us an error when we run model(). Future versions of greta will implement a wider range of MCMC samplers, including some for discrete variables.


diff --git a/vignettes/get_started_files/figure-html/mcmcvis-1.png b/vignettes/get_started_files/figure-html/mcmcvis-1.png new file mode 100644 index 00000000..bbedf346 Binary files /dev/null and b/vignettes/get_started_files/figure-html/mcmcvis-1.png differ diff --git a/vignettes/get_started_files/figure-html/mcmcvis-2.png b/vignettes/get_started_files/figure-html/mcmcvis-2.png new file mode 100644 index 00000000..397a6d52 Binary files /dev/null and b/vignettes/get_started_files/figure-html/mcmcvis-2.png differ diff --git a/vignettes/how_does_this_work.Rmd b/vignettes/how_does_this_work.Rmd deleted file mode 100644 index 057139e5..00000000 --- a/vignettes/how_does_this_work.Rmd +++ /dev/null @@ -1,139 +0,0 @@ ---- -title: "How does this work?" -output: - html_document: - css: greta.css - toc: yes - toc_float: - collapsed: false - toc_depth: 4 - theme: lumen - highlight: default -vignette: > - %\VignetteEngine{knitr::rmarkdown} - %\VignetteIndexEntry{How does this work?} - %\usepackage[utf8]{inputenc} ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, - cache = TRUE, - comment = NA, - progress = FALSE) -set.seed(1) -library(greta) -``` - -This page provides technical implementation details for potential contributors or the curious. You don't need to read this to be able to use greta. - -## greta arrays, nodes and tensors - -There are three layers to how greta defines a model: users manipulate *greta arrays*, these define *nodes*, and nodes then define *Tensors*. - -### greta arrays - -greta arrays are the user-facing representation of the model. greta arrays are actually just `lists`, but with the classes `greta_array` and `array`. - -```{r greta_array1} -x <- ones(3, 3) -is.list(x) -class(x) -``` - -### nodes - -The only list element of a greta array is `node`, an R6 object inheriting from the R6 class 'node', as well as one of the three node types: 'data', 'operation' or 'variable'. - -```{r greta_array2} -class(x$node) -``` - -There is a fourth node type: 'distribution', but these are never directly associated with greta arrays. - -These R6 node objects are where the magic happens. -When created, each node points to its 'child' nodes - the nodes for the greta arrays that were used to create this one. - -```{r nodes1} -# data nodes have no children -length(x$node$children) - -# operation nodes have one or more children -z <- x * 3 -length(z$node$children) -``` - -Each node also has a list of its parents, the nodes that have been created from this one. - -When `model()` is called, that inheritance information is used to construct the directed acyclic graph (DAG) that defines the model. The inheritance also preserves intermediate nodes, such as those creates in multi-part operations, but not assigned as greta arrays. - -Nodes also have a value member, which is an array for data nodes or an 'unknowns' object for other node types. The unknowns class is a thin wrapper around arrays, which prints the question marks. Generic functions for working on arrays (e.g. `dim`, `length`, `print`) make use these node values to return something familiar to the user. -```{r nodes2} -x$node$value() -# R calls this a matrix because it's 2d, but it's an array -class(x$node$value()) -z$node$value() -class(z$node$value()) -``` - -### Tensors - -In addition to remembering their shape and size and where they are in the DAG, each node has methods to define a corresponding TensorFlow Tensor object in a specified environment. That doesn't happen until the user runs `model()`, which creates a 'dag_class' object to store the relevant nodes, the environment for the tensors, and methods to talk to the TensorFlow graph. - -The node `tf()` method takes the dag as an argument, and defines a tensor representing itself in the tensorflow environment, with a name determined by the dag object. - -```{r tensors1} -x$node$tf -``` - -Because R6 objects are pass-by-reference (rather than pass-by-copy), the dag accumulates all of the defined tensors, rather than being re-written each time. Similarly, because nodes are R6 objects and know which are their children, they can make sure those children are defined as tensors before they are. The `define_tf()` member function ensures that that happens, enabling operation nodes to use the tensors for their children when defining their own tensor. - -```{r tensors2} -z$node$tf -``` - -## variables and free states - -Hamiltonian Monte Carlo (HMC) requires all of the parameters to be transformed to a continuous scale for sampling. Variable nodes are therefore converted to tensors by first defining a 'free' (unconstrained) version of themselves as a tensor, and then applying a transformation function to convert them back to the scale of interest. - -```{r free_state} -a = variable(lower = 0) -class(a$node) -a$node$tf_from_free -``` - -## distributions - -distribution nodes are node objects just like the others, but they are not *directly* associated with greta arrays. Instead, greta arrays may have a distribution node in their `distribution` slot to indicate that their values are assumed to follow that distribution. The distribution node will also be listed as a parent node, and likewise the 'target node' will be listed as a child of the distribution. Distribution nodes also have child nodes (data, variables or operations) representing their parameters. - -```{r distributions1} -b = normal(0, 1) -class(b$node) -class(b$node$distribution) -# a is the target of its own distribution -class(b$node$distribution$target) -``` - -When they define themselves as tensors, distribution nodes define the log density of their target node/tensor given the tensors representing their parameters. - -```{r distributions2} -b$node$distribution$tf_log_density -``` - -If the distribution was truncated, the log density is normalised using the cumulative distribution function. - -## Joint density - -Those log-densities for these distributions are summed on the TensorFlow graph to create a Tensor for the joint log-density of the model. -TensorFlow's automatic gradient capabilities are then used to define a Tensor for the gradient of the log-density with respect to each parameter in the model. - -The `dag` R6 object contained within the model then exposes methods to send parameters to the TensorFlow graph and return the joint density and gradient. - -```{r dag1} -model <- model(b) -model$dag$send_parameters -model$dag$log_density -model$dag$gradients -``` - -These methods are used by MCMC samplers to explore the model parameters. diff --git a/inst/doc/how_does_this_work.Rmd b/vignettes/technical_details.Rmd similarity index 89% rename from inst/doc/how_does_this_work.Rmd rename to vignettes/technical_details.Rmd index 057139e5..778fb1f2 100644 --- a/inst/doc/how_does_this_work.Rmd +++ b/vignettes/technical_details.Rmd @@ -1,5 +1,5 @@ --- -title: "How does this work?" +title: "Technical details" output: html_document: css: greta.css @@ -11,13 +11,13 @@ output: highlight: default vignette: > %\VignetteEngine{knitr::rmarkdown} - %\VignetteIndexEntry{How does this work?} + %\VignetteIndexEntry{Technical details} %\usepackage[utf8]{inputenc} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, - eval = TRUE, + eval = greta:::check_tf_version("message"), cache = TRUE, comment = NA, progress = FALSE) @@ -86,7 +86,7 @@ The node `tf()` method takes the dag as an argument, and defines a tensor repres x$node$tf ``` -Because R6 objects are pass-by-reference (rather than pass-by-copy), the dag accumulates all of the defined tensors, rather than being re-written each time. Similarly, because nodes are R6 objects and know which are their children, they can make sure those children are defined as tensors before they are. The `define_tf()` member function ensures that that happens, enabling operation nodes to use the tensors for their children when defining their own tensor. +Because R6 objects are pass-by-reference (rather than pass-by-value), the dag accumulates all of the defined tensors, rather than being re-written each time. Similarly, because nodes are R6 objects and know which are their children, they can make sure those children are defined as tensors before they are. The `define_tf()` member function ensures that that happens, enabling operation nodes to use the tensors for their children when defining their own tensor. ```{r tensors2} z$node$tf diff --git a/inst/doc/how_does_this_work.html b/vignettes/technical_details.html similarity index 99% rename from inst/doc/how_does_this_work.html rename to vignettes/technical_details.html index d4842609..c4070f05 100644 --- a/inst/doc/how_does_this_work.html +++ b/vignettes/technical_details.html @@ -11,7 +11,7 @@ -How does this work? +Technical details @@ -220,7 +220,7 @@ -

How does this work?

+

Technical details

@@ -276,26 +276,38 @@

Tensors

In addition to remembering their shape and size and where they are in the DAG, each node has methods to define a corresponding TensorFlow Tensor object in a specified environment. That doesn’t happen until the user runs model(), which creates a ‘dag_class’ object to store the relevant nodes, the environment for the tensors, and methods to talk to the TensorFlow graph.

The node tf() method takes the dag as an argument, and defines a tensor representing itself in the tensorflow environment, with a name determined by the dag object.

x$node$tf
-
function (dag) 
-{
-    assign(dag$tf_name(self), tf$constant(self$value(), dtype = tf_float()), 
-        envir = dag$tf_environment)
-}
-<environment: 0x10434d790>
-

Because R6 objects are pass-by-reference (rather than pass-by-copy), the dag accumulates all of the defined tensors, rather than being re-written each time. Similarly, because nodes are R6 objects and know which are their children, they can make sure those children are defined as tensors before they are. The define_tf() member function ensures that that happens, enabling operation nodes to use the tensors for their children when defining their own tensor.

+
function (dag) {
+      assign(dag$tf_name(self),
+             tf$constant(self$value(), dtype = tf_float()),
+             envir = dag$tf_environment)
+    }
+<environment: 0x128eed298>
+

Because R6 objects are pass-by-reference (rather than pass-by-value), the dag accumulates all of the defined tensors, rather than being re-written each time. Similarly, because nodes are R6 objects and know which are their children, they can make sure those children are defined as tensors before they are. The define_tf() member function ensures that that happens, enabling operation nodes to use the tensors for their children when defining their own tensor.

z$node$tf
-
function (dag) 
-{
-    op <- self$switch_op(self$operation)
-    fun <- eval(parse(text = op))
-    arg_tf_names <- lapply(self$children, dag$tf_name)
-    args <- lapply(arg_tf_names, get, envir = dag$tf_environment)
-    if (length(self$operation_args) > 0) 
+
function (dag) {
+
+      # switch out the op for non-sugared variety
+      op <- self$switch_op(self$operation)
+
+      # get the function
+      fun <- eval(parse(text = op))
+
+      # fetch the tensors for the environment
+      arg_tf_names <- lapply(self$children, dag$tf_name)
+      args <- lapply(arg_tf_names, get, envir = dag$tf_environment)
+
+      # fetch additional (non-tensor) arguments, if any
+      if (length(self$operation_args) > 0)
         args <- c(args, self$operation_args)
-    node <- do.call(fun, args)
-    assign(dag$tf_name(self), node, envir = dag$tf_environment)
-}
-<environment: 0x132ddc298>
+ + # apply function on tensors + node <- do.call(fun, args) + + # assign it in the environment + assign(dag$tf_name(self), node, envir = dag$tf_environment) + + } +<environment: 0x11d9e24d8>
@@ -305,25 +317,37 @@

variables and free states

class(a$node)
[1] "variable_node" "node"          "R6"           
a$node$tf_from_free
-
function (x, env) 
-{
-    upper <- self$upper
-    lower <- self$lower
-    if (self$constraint == "none") {
+
function (x, env) {
+
+      upper <- self$upper
+      lower <- self$lower
+
+      if (self$constraint == 'none') {
+
         y <- x
-    }
-    else if (self$constraint == "both") {
+
+      } else if (self$constraint == 'both') {
+
         y <- tf_ilogit(x) * fl(upper - lower) + fl(lower)
+
+      } else if (self$constraint == 'low') {
+
+        y <- fl(upper) - tf$exp(x)
+
+      } else if (self$constraint == 'high') {
+
+        y <- tf$exp(x) + fl(lower)
+
+      } else {
+
+        y <- x
+
+      }
+
+      y
+
     }
-    else if (self$constraint == "low") {
-        y <- fl(upper) - tf_log1pe(x)
-    }
-    else if (self$constraint == "high") {
-        y <- tf_log1pe(x) + fl(lower)
-    }
-    y
-}
-<environment: 0x12bd11060>
+<environment: 0x128086eb0>

distributions

@@ -339,16 +363,24 @@

distributions

[1] "variable_node" "node"          "R6"           

When they define themselves as tensors, distribution nodes define the log density of their target node/tensor given the tensors representing their parameters.

b$node$distribution$tf_log_density
-
function (dag) 
-{
-    tf_target <- get(dag$tf_name(self$target), envir = dag$tf_environment)
-    tf_parameters <- self$tf_fetch_parameters(dag)
-    ld <- self$tf_log_density_function(tf_target, tf_parameters)
-    if (!is.null(self$truncation)) 
+
function (dag) {
+
+      # fetch inputs
+      tf_target_node <- self$get_tf_target_node()
+      tf_target <- get(dag$tf_name(tf_target_node),
+                       envir = dag$tf_environment)
+      tf_parameters <- self$tf_fetch_parameters(dag)
+
+      # calculate log density
+      ld <- self$tf_log_density_function(tf_target, tf_parameters)
+
+      # check for truncation
+      if (!is.null(self$truncation))
         ld <- ld - self$tf_log_density_offset(tf_parameters)
-    ld
-}
-<environment: 0x12bc2c098>
+ + ld + } +<environment: 0x11fec6bc0>

If the distribution was truncated, the log density is normalised using the cumulative distribution function.

@@ -357,27 +389,35 @@

Joint density

The dag R6 object contained within the model then exposes methods to send parameters to the TensorFlow graph and return the joint density and gradient.

model <- model(b)
 model$dag$send_parameters
-
function (parameters, flat = TRUE) 
-{
-    parameters <- relist_tf(parameters, self$parameters_example)
-    names(parameters) <- paste0(names(parameters), "_free")
-    self$tf_environment$parameters <- parameters
-    with(self$tf_environment, parameter_dict <- do.call(dict, 
-        parameters))
-}
-<environment: 0x12873e1c0>
+
function (parameters, flat = TRUE) {
+
+      # convert parameters to a named list and change TF names to free versions
+      parameters <- relist_tf(parameters, self$parameters_example)
+      names(parameters) <- paste0(names(parameters), '_free')
+
+      # create a feed dict in the TF environment
+      self$tf_environment$parameters <- parameters
+      with(self$tf_environment,
+           parameter_dict <- do.call(dict, parameters))
+
+    }
+<environment: 0x110ece698>
model$dag$log_density
-
function () 
-{
-    with(self$tf_environment, sess$run(joint_density, feed_dict = parameter_dict))
-}
-<environment: 0x12873e1c0>
+
function(adjusted = TRUE) {
+
+      cleanly(with(self$tf_environment,
+                   sess$run(joint_density_adj, feed_dict = parameter_dict)))
+
+    }
+<environment: 0x110ece698>
model$dag$gradients
-
function () 
-{
-    with(self$tf_environment, sess$run(gradients, feed_dict = parameter_dict))
-}
-<environment: 0x12873e1c0>
+
function (adjusted = TRUE) {
+
+      cleanly(with(self$tf_environment,
+                   sess$run(gradients_adj, feed_dict = parameter_dict)))
+
+    }
+<environment: 0x110ece698>

These methods are used by MCMC samplers to explore the model parameters.