1.58 - Book release drum beat

Various tuning adjustments, and one feature addition in this version. Feature: - coerce_index now works with multiple columns. See ?coerce_index for details. Tuning: - simplehist much more efficient now - add stancode method for stanfit objects - map2stan uses rstan's own multicore code now, instead of my implementation. Should be equivalent, but produces less Terminal spam now. - Various documentation edits.
rmcelreath · Dec 24, 2015 · e71dbc1 · e71dbc1
1 parent a9acd53
commit e71dbc1
Show file tree

Hide file tree

Showing 10 changed files with 129 additions and 29 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 1.57
-Date: 2015-09-25
+Version: 1.58
+Date: 2015-12-24
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>
 Imports: coda, MASS, mvtnorm, loo

diff --git a/R/map2stan-class.r b/R/map2stan-class.r
@@ -67,6 +67,12 @@ function(object) {
     return( invisible( object@model ) )
 }
 )
+setMethod("stancode", "stanfit",
+function(object) {
+    cat( object@stanmodel@model_code )
+    return( invisible( object@stanmodel@model_code ) )
+}
+)
 setMethod("stancode", "list",
 function(object) {
     cat( object$model )
@@ -148,7 +154,11 @@ setMethod("summary", "map2stan", function(object){
 
 # resample from compiled map2stan fit
 # can also run on multiple cores
-resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC=TRUE , WAIC=TRUE , rng_seed , data , ... ) {
+resample <- function( object , ... ) {
+    if ( class(object)!="map2stan" ) stop("Requires previous map2stan fit.")
+    map2stan(object,...)
+}
+resample_old <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC=TRUE , WAIC=TRUE , rng_seed , data , ... ) {
     if ( !(class(object)%in%(c("map2stan"))) )
         stop( "Requires map2stan fit" )
     if ( missing(data) ) data <- object@data

diff --git a/R/map2stan.r b/R/map2stan.r
@@ -1344,7 +1344,24 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
         #initlist <- list()
         #for ( achain in 1:chains ) initlist[[achain]] <- start
         initlist <- start # should have one list for each chain
+
+        if ( missing(rng_seed) ) rng_seed <- sample( 1:1e5 , 1 )
+        if ( is.null(previous_stanfit) )
+            fit <- try(stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , warmup=warmup , chains=chains , cores=cores , pars=pars , seed=rng_seed , ... ))
+        else
+            fit <- try(stan( fit=previous_stanfit , model_name=modname , data=d , init=initlist , iter=iter , warmup=warmup , chains=chains , cores=cores , pars=pars , seed=rng_seed , ... ))
+
+        if ( class(fit)=="try-error" ) {
+                # something went wrong in at least one chain
+                msg <- attr(fit,"condition")$message
+                if ( cores > 1 )
+                    stop(concat("Something went wrong in at least one chain. Debug your model while setting chains=1 and cores=1. Once the model is working with a single chain and core, try using multiple chains/cores again.\n",msg))
+                else
+                    stop(concat("Something went wrong, when calling Stan. Check any debug messages for clues, detective.\n",msg))
+            }
 
+        ###### PREVIOUS multicore code, before Stan added its own ########
+        if ( FALSE ) {
         if ( chains==1 | cores==1 ) {
 
             # single core
@@ -1404,6 +1421,9 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
             }
 
         }#multicore
+        }
+        # END PREVIOUS multicore code
+        #############################
 
     } else {
         fit <- NULL

diff --git a/R/plotting.r b/R/plotting.r
@@ -155,7 +155,12 @@ show.naive.posterior <- function( est , se , model=NULL , level=0.95 , xlab="est
 }
 
 # simple histogram
-simplehist <- function( x , ylab="Frequency" , xlab="Count" , ycounts=TRUE , adjust=1 , lcol="black" , bins=NULL , show.counts=0 , xlim=NULL , ylim=NULL , ... ) {
+simplehist <- function( x , round=TRUE , ylab="Frequency" , ... ) {
+    if ( round==TRUE ) x <- round(x)
+    plot(table(x),ylab=ylab,...)
+}
+
+simplehist_old <- function( x , ylab="Frequency" , xlab="Count" , ycounts=TRUE , adjust=1 , lcol="black" , bins=NULL , show.counts=0 , xlim=NULL , ylim=NULL , ... ) {
     # first, check if integers only or continuous.
     freqs <- {}
     x2 <- round(x)

diff --git a/R/utilities.r b/R/utilities.r
@@ -221,10 +221,10 @@ mcreplicate <- function (n, expr, refresh = 0.1, mc.cores=2 ) {
     }
     result <- simplify2array(mclapply(1:n, eval.parent(substitute(function(i, 
         ...) {
-        show_progress(i)
+        if (refresh>0) show_progress(i)
         expr
     })),mc.cores=mc.cores))
-    cat("\n")
+    if (refresh>0) cat("\n")
     result
 }
 
@@ -239,7 +239,35 @@ check_index <- function( x ) {
     if ( any(diffs)!=1 ) message( "At least one gap in consecutive values" )
 }
 
-coerce_index <- function( x ) as.integer(as.factor(as.character(x)))
+# old vector-only coerce_index
+# coerce_index <- function( x ) as.integer(as.factor(as.character(x)))
+
+# new coerce_index that can take multiple vectors
+# ensures labels in all are part of same index set
+coerce_index <- function( ... ) {
+    L <- list(...)
+    if ( is.list(L[[1]]) && length(L)==1 ) L <- L[[1]]
+    if ( length(L)==1 ) {
+        return( as.integer(as.factor(as.character(L[[1]]))) )
+    } else {
+        # multiple inputs
+        vnames <- match.call()
+        vnames <- as.character(vnames)[2:(length(L)+1)]
+        # generate levels that include all labels from all inputs
+        M <- L
+        for ( i in 1:length(L) ) M[[i]] <- as.character(L[[i]])
+        Mall <- M[[1]]
+        for ( i in 2:length(L) ) Mall <- c( Mall , M[[i]] )
+        Mall <- unique(Mall)
+        new_levels <- levels(as.factor(Mall))
+        for ( i in 1:length(L) ) {
+            M[[i]] <- factor(M[[i]],levels=new_levels)
+            M[[i]] <- as.integer(M[[i]])
+        }
+        names(M) <- paste( vnames , "_idx" , sep="" )
+        return(M)
+    } 
+}
 
 # sd and var functions that don't use n-1 denominator
 sd2 <- function( x , na.rm=TRUE ) {

diff --git a/R/zWAIC-methods.R b/R/zWAIC-methods.R
@@ -12,10 +12,10 @@ function( object , n=1000 , refresh=0.1 , pointwise=FALSE , ... ) {
 # extracts log_lik matrix from stanfit and computes WAIC
 setMethod("WAIC", "stanfit",
 function( object , n=0 , refresh=0.1 , pointwise=FALSE , log_lik="log_lik" , ... ) {
-    if ( !is.null(extract.samples(object)[[log_lik]]) )
-        ll_matrix <- extract.samples(object)[[log_lik]]
-    else
-        stop(concat("Log-likelihood matrix '",log_lik,"'' not found."))
+
+    ll_matrix <- extract.samples(object,pars=log_lik)[[1]]
+
+    # stop(concat("Log-likelihood matrix '",log_lik,"'' not found."))
 
     n_obs <- ncol(ll_matrix)
     n_samples <- nrow(ll_matrix)

diff --git a/README.md b/README.md
@@ -21,13 +21,15 @@ You can find a manual with expanded installation and usage instructions here: ``
 
 Here's the brief verison. 
 
-You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. Then you can install ``rethinking`` from within R using:
+You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. The biggest challenge is getting a C++ compiler configured to work with your installation of R. The instructions at ``https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started`` are quite thorough. Obey them, and you'll likely succeed.
+
+Then you can install ``rethinking`` from within R using:
 ```
-install.packages(c("coda","mvtnorm","devtools"))
+install.packages(c("coda","mvtnorm","devtools","loo"))
 library(devtools)
 devtools::install_github("rmcelreath/rethinking")
 ```
-If there are any problems, they likely arise when trying to install ``rstan``, so the ``rethinking`` package has nothing to do with it. See the manual linked above for some hints about getting ``rstan`` installed.
+If there are any problems, they likely arise when trying to install ``rstan``, so the ``rethinking`` package has little to do with it. See the manual linked above for some hints about getting ``rstan`` installed. But always consult the RStan section of the website at ``mc-stan.org`` for the latest information on RStan.
 
 ## MAP estimation
 
@@ -95,7 +97,7 @@ fit.stan <- map2stan(
     f , 
     data=list(y=c(-1,1)) , 
     start=list(mu=0,sigma=1) ,
-    chains=4 , cores=4 , iter=1e4 , warmup=1000
+    chains=4 , cores=4 , iter=2000 , warmup=1000
 )
 ```
 The ``parallel`` package is used here, relying upon ``mclapply`` (Mac, UNIX) or ``parLapply`` (Windows). It is best to run parallel operations in the Terminal/Command Prompt, as GUI interfaces sometimes crash when forking processes.
@@ -191,7 +193,7 @@ Internally, a Cholesky factor ``L_Rho_group`` is used to perform sampling. It wi
 
 ## Semi-automated Bayesian imputation
 
-It is possible to code simple Bayesian imputations this way. For example, let's simulate a simple regression with missing predictor values:
+It is possible to code simple Bayesian imputations. For example, let's simulate a simple regression with missing predictor values:
 ```
 N <- 100
 N_miss <- 10
@@ -247,7 +249,7 @@ mGP <- map2stan(
     ),
     warmup=1000 , iter=5000 , chains=4 )
 ```
-Note the use of the ``constraints`` list to pass custom parameter constraints to Stan. This example is explored in more detail in the (in prep) book. 
+Note the use of the ``constraints`` list to pass custom parameter constraints to Stan. This example is explored in more detail in the book. 
 
 ## Information criteria
 

diff --git a/man/coerce_index.Rd b/man/coerce_index.Rd
@@ -0,0 +1,40 @@
+\name{coerce_index}
+\alias{coerce_index}
+\alias{check_index}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Build and check integer index variables}
+\description{
+  These functions assist with building (\code{coerce_index}) and checking (\code{check_index}) integer index variables of the kind needed to define varying effect models.
+}
+\usage{
+coerce_index( ... )
+check_index( x )
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{...}{A comma-separted list of variables. See details.}
+  \item{x}{A vector of integers to check for contiguity}
+}
+\details{
+  Varying effect models often require index variables that begin at 1 and comprise only integers. These variables are used to lookup specific parameter values inside of the model. For example, it is common to define varying intercepts with a linear model like \code{a0 + a_id[id[i]]}. Here the variable \code{id} is an index variable. It has one value for each case, defining which individual applies to that case.
+
+  When raw data exist as factors, these index variables much be converted to integers. This is trickier than it sounds, because R uses an internal integer represntation for factors, \code{levels}, that can conflict with ordinary integer representations.
+
+  The function \code{coerce_index} deals with that complication. When the input is a single vector of factors, it returns an integer vector beginning at 1 and with contiguous values. 
+
+  When the input is instead a comma-separated list of factors, it returns a list in which each factor has been converted to integers, but all levels in all factors were merged so that the same labels across factors always have the same integer values in the result. For example, suppose cases refer to dyads and there are two factors, \code{id1} and \code{id2}, that indicate which pair of individuals are present in each dyad. The labels in these variables should refer to the same individuals. Passing both simultaneously to \code{coerce_index} ensures that the results respect that fact.
+
+  The function \code{check_index} merely checks an integer vector to see if it is contiguous.
+}
+\value{
+For \code{coerce_index}, the result is either a single vector of integers (if the input was a single vector) or rather a list of vectors of integers (if the input was a list of vectors).
+}
+\references{}
+\author{Richard McElreath}
+\seealso{}
+\examples{
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{ }
+
diff --git a/man/rethinking.Rd b/man/rethinking.Rd
@@ -11,8 +11,8 @@
   \tabular{ll}{
     Package: \tab rethinking\cr
     Type: \tab Package\cr
-    Version: \tab 1.56\cr
-    Date: \tab 10 July 2015\cr
+    Version: \tab 1.58\cr
+    Date: \tab 24 Dec 2015\cr
     License: \tab GPL-3 \cr
   }
 
@@ -21,7 +21,7 @@
   Richard McElreath
 }
 \references{
-  McElreath (2016). Statistical Rethinking: A Bayesian Course with R Examples. CRC Press.
+  McElreath (2016). Statistical Rethinking: A Bayesian Course with Examples in R and Stan. CRC Press.
 }
 
 \examples{}  

diff --git a/man/simplehist.Rd b/man/simplehist.Rd
@@ -6,22 +6,17 @@
   Simple integer-valued histograms, for displaying count distributions.
 }
 \usage{
-simplehist( x , ylab="Frequency" , xlab="Count" , ycounts=TRUE , adjust=1 , 
-lcol="black" , bins=NULL , show.counts=0 , xlim=NULL , ylim=NULL , ... )
+simplehist( x , round=TRUE , ylab="Frequency" , ... )
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
   \item{x}{Vector of values to construct histogram from}
+  \item{round}{When \code{TRUE}, rounds values in \code{x} before plotting}
   \item{ylab}{Label on vertical axis}
-  \item{xlab}{Label on horizontal axis}
-  \item{ycounts}{?}
-  \item{lcol}{Line color}
-  \item{bins}{Bins, defaults to zero and positive integers}
-  \item{show.counts}{?}
-  \item{...}{Other parameters to pass to plotting functions.}
+  \item{...}{Other parameters to pass to plot}
 }
 \details{
-  This function constructs clean histograms for count data. It needs to be rewritten, though.
+  This function constructs clean histograms for count data. Non-integer data can be rounded to nearest integer before plotting. Internally, this function is little more than \code{plot(table(x))}.
 }
 \value{
 }