Skip to content

Commit

Permalink
1.58 - Book release drum beat
Browse files Browse the repository at this point in the history
Various tuning adjustments, and one feature addition in this version.

Feature:
- coerce_index now works with multiple columns. See ?coerce_index for
details.

Tuning:
- simplehist much more efficient now
- add stancode method for stanfit objects
- map2stan uses rstan's own multicore code now, instead of my
implementation. Should be equivalent, but produces less Terminal spam
now.
- Various documentation edits.
  • Loading branch information
Richard McElreath committed Dec 24, 2015
1 parent a9acd53 commit e71dbc1
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 29 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.57
Date: 2015-09-25
Version: 1.58
Date: 2015-12-24
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS, mvtnorm, loo
Expand Down
12 changes: 11 additions & 1 deletion R/map2stan-class.r
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ function(object) {
return( invisible( object@model ) )
}
)
setMethod("stancode", "stanfit",
function(object) {
cat( object@stanmodel@model_code )
return( invisible( object@stanmodel@model_code ) )
}
)
setMethod("stancode", "list",
function(object) {
cat( object$model )
Expand Down Expand Up @@ -148,7 +154,11 @@ setMethod("summary", "map2stan", function(object){

# resample from compiled map2stan fit
# can also run on multiple cores
resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC=TRUE , WAIC=TRUE , rng_seed , data , ... ) {
resample <- function( object , ... ) {
if ( class(object)!="map2stan" ) stop("Requires previous map2stan fit.")
map2stan(object,...)
}
resample_old <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC=TRUE , WAIC=TRUE , rng_seed , data , ... ) {
if ( !(class(object)%in%(c("map2stan"))) )
stop( "Requires map2stan fit" )
if ( missing(data) ) data <- object@data
Expand Down
20 changes: 20 additions & 0 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,24 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
#initlist <- list()
#for ( achain in 1:chains ) initlist[[achain]] <- start
initlist <- start # should have one list for each chain

if ( missing(rng_seed) ) rng_seed <- sample( 1:1e5 , 1 )
if ( is.null(previous_stanfit) )
fit <- try(stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , warmup=warmup , chains=chains , cores=cores , pars=pars , seed=rng_seed , ... ))
else
fit <- try(stan( fit=previous_stanfit , model_name=modname , data=d , init=initlist , iter=iter , warmup=warmup , chains=chains , cores=cores , pars=pars , seed=rng_seed , ... ))

if ( class(fit)=="try-error" ) {
# something went wrong in at least one chain
msg <- attr(fit,"condition")$message
if ( cores > 1 )
stop(concat("Something went wrong in at least one chain. Debug your model while setting chains=1 and cores=1. Once the model is working with a single chain and core, try using multiple chains/cores again.\n",msg))
else
stop(concat("Something went wrong, when calling Stan. Check any debug messages for clues, detective.\n",msg))
}

###### PREVIOUS multicore code, before Stan added its own ########
if ( FALSE ) {
if ( chains==1 | cores==1 ) {

# single core
Expand Down Expand Up @@ -1404,6 +1421,9 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
}

}#multicore
}
# END PREVIOUS multicore code
#############################

} else {
fit <- NULL
Expand Down
7 changes: 6 additions & 1 deletion R/plotting.r
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,12 @@ show.naive.posterior <- function( est , se , model=NULL , level=0.95 , xlab="est
}

# simple histogram
simplehist <- function( x , ylab="Frequency" , xlab="Count" , ycounts=TRUE , adjust=1 , lcol="black" , bins=NULL , show.counts=0 , xlim=NULL , ylim=NULL , ... ) {
simplehist <- function( x , round=TRUE , ylab="Frequency" , ... ) {
if ( round==TRUE ) x <- round(x)
plot(table(x),ylab=ylab,...)
}

simplehist_old <- function( x , ylab="Frequency" , xlab="Count" , ycounts=TRUE , adjust=1 , lcol="black" , bins=NULL , show.counts=0 , xlim=NULL , ylim=NULL , ... ) {
# first, check if integers only or continuous.
freqs <- {}
x2 <- round(x)
Expand Down
34 changes: 31 additions & 3 deletions R/utilities.r
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ mcreplicate <- function (n, expr, refresh = 0.1, mc.cores=2 ) {
}
result <- simplify2array(mclapply(1:n, eval.parent(substitute(function(i,
...) {
show_progress(i)
if (refresh>0) show_progress(i)
expr
})),mc.cores=mc.cores))
cat("\n")
if (refresh>0) cat("\n")
result
}

Expand All @@ -239,7 +239,35 @@ check_index <- function( x ) {
if ( any(diffs)!=1 ) message( "At least one gap in consecutive values" )
}

coerce_index <- function( x ) as.integer(as.factor(as.character(x)))
# old vector-only coerce_index
# coerce_index <- function( x ) as.integer(as.factor(as.character(x)))

# new coerce_index that can take multiple vectors
# ensures labels in all are part of same index set
coerce_index <- function( ... ) {
L <- list(...)
if ( is.list(L[[1]]) && length(L)==1 ) L <- L[[1]]
if ( length(L)==1 ) {
return( as.integer(as.factor(as.character(L[[1]]))) )
} else {
# multiple inputs
vnames <- match.call()
vnames <- as.character(vnames)[2:(length(L)+1)]
# generate levels that include all labels from all inputs
M <- L
for ( i in 1:length(L) ) M[[i]] <- as.character(L[[i]])
Mall <- M[[1]]
for ( i in 2:length(L) ) Mall <- c( Mall , M[[i]] )
Mall <- unique(Mall)
new_levels <- levels(as.factor(Mall))
for ( i in 1:length(L) ) {
M[[i]] <- factor(M[[i]],levels=new_levels)
M[[i]] <- as.integer(M[[i]])
}
names(M) <- paste( vnames , "_idx" , sep="" )
return(M)
}
}

# sd and var functions that don't use n-1 denominator
sd2 <- function( x , na.rm=TRUE ) {
Expand Down
8 changes: 4 additions & 4 deletions R/zWAIC-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ function( object , n=1000 , refresh=0.1 , pointwise=FALSE , ... ) {
# extracts log_lik matrix from stanfit and computes WAIC
setMethod("WAIC", "stanfit",
function( object , n=0 , refresh=0.1 , pointwise=FALSE , log_lik="log_lik" , ... ) {
if ( !is.null(extract.samples(object)[[log_lik]]) )
ll_matrix <- extract.samples(object)[[log_lik]]
else
stop(concat("Log-likelihood matrix '",log_lik,"'' not found."))

ll_matrix <- extract.samples(object,pars=log_lik)[[1]]

# stop(concat("Log-likelihood matrix '",log_lik,"'' not found."))

n_obs <- ncol(ll_matrix)
n_samples <- nrow(ll_matrix)
Expand Down
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ You can find a manual with expanded installation and usage instructions here: ``

Here's the brief verison.

You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. Then you can install ``rethinking`` from within R using:
You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. The biggest challenge is getting a C++ compiler configured to work with your installation of R. The instructions at ``https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started`` are quite thorough. Obey them, and you'll likely succeed.

Then you can install ``rethinking`` from within R using:
```
install.packages(c("coda","mvtnorm","devtools"))
install.packages(c("coda","mvtnorm","devtools","loo"))
library(devtools)
devtools::install_github("rmcelreath/rethinking")
```
If there are any problems, they likely arise when trying to install ``rstan``, so the ``rethinking`` package has nothing to do with it. See the manual linked above for some hints about getting ``rstan`` installed.
If there are any problems, they likely arise when trying to install ``rstan``, so the ``rethinking`` package has little to do with it. See the manual linked above for some hints about getting ``rstan`` installed. But always consult the RStan section of the website at ``mc-stan.org`` for the latest information on RStan.

## MAP estimation

Expand Down Expand Up @@ -95,7 +97,7 @@ fit.stan <- map2stan(
f ,
data=list(y=c(-1,1)) ,
start=list(mu=0,sigma=1) ,
chains=4 , cores=4 , iter=1e4 , warmup=1000
chains=4 , cores=4 , iter=2000 , warmup=1000
)
```
The ``parallel`` package is used here, relying upon ``mclapply`` (Mac, UNIX) or ``parLapply`` (Windows). It is best to run parallel operations in the Terminal/Command Prompt, as GUI interfaces sometimes crash when forking processes.
Expand Down Expand Up @@ -191,7 +193,7 @@ Internally, a Cholesky factor ``L_Rho_group`` is used to perform sampling. It wi

## Semi-automated Bayesian imputation

It is possible to code simple Bayesian imputations this way. For example, let's simulate a simple regression with missing predictor values:
It is possible to code simple Bayesian imputations. For example, let's simulate a simple regression with missing predictor values:
```
N <- 100
N_miss <- 10
Expand Down Expand Up @@ -247,7 +249,7 @@ mGP <- map2stan(
),
warmup=1000 , iter=5000 , chains=4 )
```
Note the use of the ``constraints`` list to pass custom parameter constraints to Stan. This example is explored in more detail in the (in prep) book.
Note the use of the ``constraints`` list to pass custom parameter constraints to Stan. This example is explored in more detail in the book.

## Information criteria

Expand Down
40 changes: 40 additions & 0 deletions man/coerce_index.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
\name{coerce_index}
\alias{coerce_index}
\alias{check_index}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Build and check integer index variables}
\description{
These functions assist with building (\code{coerce_index}) and checking (\code{check_index}) integer index variables of the kind needed to define varying effect models.
}
\usage{
coerce_index( ... )
check_index( x )
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{...}{A comma-separted list of variables. See details.}
\item{x}{A vector of integers to check for contiguity}
}
\details{
Varying effect models often require index variables that begin at 1 and comprise only integers. These variables are used to lookup specific parameter values inside of the model. For example, it is common to define varying intercepts with a linear model like \code{a0 + a_id[id[i]]}. Here the variable \code{id} is an index variable. It has one value for each case, defining which individual applies to that case.

When raw data exist as factors, these index variables much be converted to integers. This is trickier than it sounds, because R uses an internal integer represntation for factors, \code{levels}, that can conflict with ordinary integer representations.

The function \code{coerce_index} deals with that complication. When the input is a single vector of factors, it returns an integer vector beginning at 1 and with contiguous values.

When the input is instead a comma-separated list of factors, it returns a list in which each factor has been converted to integers, but all levels in all factors were merged so that the same labels across factors always have the same integer values in the result. For example, suppose cases refer to dyads and there are two factors, \code{id1} and \code{id2}, that indicate which pair of individuals are present in each dyad. The labels in these variables should refer to the same individuals. Passing both simultaneously to \code{coerce_index} ensures that the results respect that fact.

The function \code{check_index} merely checks an integer vector to see if it is contiguous.
}
\value{
For \code{coerce_index}, the result is either a single vector of integers (if the input was a single vector) or rather a list of vectors of integers (if the input was a list of vectors).
}
\references{}
\author{Richard McElreath}
\seealso{}
\examples{
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{ }

6 changes: 3 additions & 3 deletions man/rethinking.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
\tabular{ll}{
Package: \tab rethinking\cr
Type: \tab Package\cr
Version: \tab 1.56\cr
Date: \tab 10 July 2015\cr
Version: \tab 1.58\cr
Date: \tab 24 Dec 2015\cr
License: \tab GPL-3 \cr
}

Expand All @@ -21,7 +21,7 @@
Richard McElreath
}
\references{
McElreath (2016). Statistical Rethinking: A Bayesian Course with R Examples. CRC Press.
McElreath (2016). Statistical Rethinking: A Bayesian Course with Examples in R and Stan. CRC Press.
}

\examples{}
Expand Down
13 changes: 4 additions & 9 deletions man/simplehist.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,17 @@
Simple integer-valued histograms, for displaying count distributions.
}
\usage{
simplehist( x , ylab="Frequency" , xlab="Count" , ycounts=TRUE , adjust=1 ,
lcol="black" , bins=NULL , show.counts=0 , xlim=NULL , ylim=NULL , ... )
simplehist( x , round=TRUE , ylab="Frequency" , ... )
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{x}{Vector of values to construct histogram from}
\item{round}{When \code{TRUE}, rounds values in \code{x} before plotting}
\item{ylab}{Label on vertical axis}
\item{xlab}{Label on horizontal axis}
\item{ycounts}{?}
\item{lcol}{Line color}
\item{bins}{Bins, defaults to zero and positive integers}
\item{show.counts}{?}
\item{...}{Other parameters to pass to plotting functions.}
\item{...}{Other parameters to pass to plot}
}
\details{
This function constructs clean histograms for count data. It needs to be rewritten, though.
This function constructs clean histograms for count data. Non-integer data can be rounded to nearest integer before plotting. Internally, this function is little more than \code{plot(table(x))}.
}
\value{
}
Expand Down

0 comments on commit e71dbc1

Please sign in to comment.