diff --git a/DESCRIPTION b/DESCRIPTION index 7c3b643..aaa9976 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: rethinking Type: Package Title: Statistical Rethinking book package -Version: 2.12 -Date: 2020-06-26 +Version: 2.13 +Date: 2020-08-26 Author: Richard McElreath Maintainer: Richard McElreath Imports: coda, MASS, mvtnorm, loo, shape diff --git a/R/distributions.r b/R/distributions.r index 1d65ced..0a9eacb 100644 --- a/R/distributions.r +++ b/R/distributions.r @@ -179,6 +179,12 @@ rgampois <- function( n , mu , scale ) { rnbinom( n , size=shape , prob=prob ) } +rgampois2 <- function (n , mu , scale) { + p_p <- scale / (scale + mu) + p_n <- scale + rnbinom(n, size = p_n, prob = p_p) +} + # laplace (double exponential) dlaplace <- function(x,location=0,lambda=1,log=FALSE) { # f(y) = (1/(2b)) exp( -|y-a|/b ) diff --git a/R/sim.r b/R/sim.r index f003956..7948665 100644 --- a/R/sim.r +++ b/R/sim.r @@ -41,7 +41,11 @@ sim_core <- function( fit , data , post , vars , n , refresh=0 , replace=list() } # get simulation partner function rlik <- flik - if ( ll==FALSE ) substr( rlik , 1 , 1 ) <- "r" + if ( ll==FALSE ) { + substr( rlik , 1 , 1 ) <- "r" + # hook for gampois, which needs rgampois2 + if ( rlik=="rgampois" ) rlik <- "rgampois2" + } # check for aggregated binomial, but only when ll==TRUE aggregated_binomial <- FALSE @@ -295,6 +299,8 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , ... ) { # get simulation partner function rlik <- flik substr( rlik , 1 , 1 ) <- "r" + # hook for gampois, which needs rgampois2 + if ( rlik=="rgampois" ) rlik <- "rgampois2" # pull out parameters in likelihood pars <- vector(mode="list",length=length(lik[[3]])-1) diff --git a/R/ulam-class.R b/R/ulam-class.R index f301bb8..759ab15 100644 --- a/R/ulam-class.R +++ b/R/ulam-class.R @@ -17,6 +17,11 @@ setMethod("precis", "ulam", function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasing=FALSE , lp__=FALSE , omit=NULL ,... ) { low <- (1-prob)/2 upp <- 1-low + + # when fit with cmdstan, all parameters/variable in result + # so want to filter at minimum by object@pars + if ( missing(pars) ) pars <- object@pars + result <- summary(object@stanfit,pars=pars,probs=c(low,upp))$summary[,c(1,3:7)] result <- as.data.frame( result ) @@ -42,11 +47,13 @@ function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasin return( new( "precis" , result , digits=digits ) ) }) - +# models fit with cmdstan=TRUE include all parameters/variables +# so need to trim what is returned using object@pars setMethod("extract.samples","ulam", -function(object,n,clean=TRUE,...) { +function(object,n,clean=TRUE,pars,...) { #require(rstan) - p <- rstan::extract(object@stanfit,...) + if (missing(pars)) pars <- object@pars + p <- rstan::extract(object@stanfit,pars=pars,...) # get rid of dev and lp__ if ( clean==TRUE ) { p[['dev']] <- NULL diff --git a/R/ulam-function.R b/R/ulam-function.R index 9d4a041..a196069 100644 --- a/R/ulam-function.R +++ b/R/ulam-function.R @@ -19,7 +19,7 @@ ulam_options <- new.env(parent=emptyenv()) ulam_options$use_cmdstan <- FALSE set_ulam_cmdstan <- function(x=TRUE) assign( "use_cmdstan" , x , env=ulam_options ) -ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , constraints , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , coerce_int=TRUE , sample_prior=FALSE , file=NULL , cmdstan=ulam_options$use_cmdstan , threads=1 , grain=1 , ... ) { +ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , warmup , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , constraints , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , coerce_int=TRUE , sample_prior=FALSE , file=NULL , cmdstan=ulam_options$use_cmdstan , threads=1 , grain=1 , ... ) { if ( !is.null(file) ) { rds_file_name <- concat( file , ".rds" ) @@ -33,6 +33,8 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , if ( !missing(data) ) data <- as.list(data) + if ( missing(warmup) ) warmup <- floor(iter/2) + # check for previous fit passed instead of formula prev_stanfit <- FALSE if ( class(flist)=="ulam" ) { @@ -1403,7 +1405,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , if ( prev_stanfit==FALSE ) { if ( cmdstan==FALSE ) # rstan interface - stanfit <- stan( model_code = model_code , data = data , pars=use_pars , chains=chains , cores=cores , iter=iter , control=control , ... ) + stanfit <- stan( model_code = model_code , data = data , pars=use_pars , chains=chains , cores=cores , iter=iter , control=control , warmup=warmup , ... ) else { # use cmdstanr interface require( cmdstanr , quietly=TRUE ) @@ -1414,24 +1416,57 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , compile=filex[[3]], cpp_options=list(stan_threads=TRUE) ) # set_num_threads( threads ) - cmdstanfit <- mod$sample( data=data , chains=chains , parallel_chains=cores , iter_sampling=iter , adapt_delta=as.numeric(control[['adapt_delta']]) , threads_per_chain=threads , ... ) + # iter means only post-warmup samples for cmdstanr + # so need to compute iter explicitly + cmdstanfit <- mod$sample( + data=data , + chains=chains , parallel_chains=cores , + iter_warmup=warmup, iter_sampling=floor(iter-warmup) , + save_warmup=TRUE , + adapt_delta=as.numeric(control[['adapt_delta']]) , + threads_per_chain=threads , ... ) stanfit <- rstan::read_stan_csv(cmdstanfit$output_files()) } - } else - # rstan interface, previous stanfit object - stanfit <- stan( fit = prev_stanfit_object , data = data , pars=use_pars , - chains=chains , cores=cores , iter=iter , control=control , ... ) + } else { + if ( cmdstan==FALSE ) { + # rstan interface, previous stanfit object + stanfit <- stan( fit = prev_stanfit_object , data = data , pars=use_pars , + chains=chains , cores=cores , iter=iter , control=control , warmup=warmup , ... ) + } else { + # SAME AS ABOVE FOR NOW - how to referece exe? + # use cmdstanr interface + require( cmdstanr , quietly=TRUE ) + filex <- cmdstanr_model_write( model_code ) + mod <- cmdstan_model( + stan_file=filex[[1]], + # exe_file=filex[[2]], + compile=filex[[3]], + cpp_options=list(stan_threads=TRUE) ) + # set_num_threads( threads ) + # iter means only post-warmup samples for cmdstanr + # so need to compute iter explicitly + cmdstanfit <- mod$sample( + data=data , + chains=chains , parallel_chains=cores , + iter_warmup=warmup, iter_sampling=floor(iter-warmup) , + save_warmup=TRUE , + adapt_delta=as.numeric(control[['adapt_delta']]) , + threads_per_chain=threads , ... ) + stanfit <- rstan::read_stan_csv(cmdstanfit$output_files()) + } + } } else { # WITH explicit start list + # Still needs cmdstanr interface f_init <- "random" if ( class(start)=="list" ) f_init <- function() return(start) if ( class(start)=="function" ) f_init <- start if ( prev_stanfit==FALSE ) stanfit <- stan( model_code = model_code , data = data , pars=use_pars , - chains=chains , cores=cores , iter=iter , control=control , init=f_init , ... ) + chains=chains , cores=cores , iter=iter , control=control , init=f_init , warmup=warmup , ... ) else stanfit <- stan( fit = prev_stanfit_object , data = data , pars=use_pars , - chains=chains , cores=cores , iter=iter , control=control , init=f_init , ... ) + chains=chains , cores=cores , iter=iter , control=control , init=f_init , warmup=warmup , ... ) } } diff --git a/R/utilities.r b/R/utilities.r index 185d53b..1bea0d2 100644 --- a/R/utilities.r +++ b/R/utilities.r @@ -276,6 +276,9 @@ var2 <- function( x , na.rm=TRUE ) { # use E(x^2) - E(x)^2 form mean(x^2) - mean(x)^2 } +# aliases +sd_pop <- sd2 +var_pop <- var2 # function for formatting summary table output in various show methods # x should be a list or data frame diff --git a/README.md b/README.md index f72851e..e3bd6a9 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ There are some advantages to accessing Stan through ``cmdstanr`` rather than rst ``` devtools::install_github("stan-dev/cmdstanr") ``` -If you haven't installed cmdstan previously, you will also need to do that with ``install_cmdstan()``. Then you need to add ``cmdstan=TRUE`` to any ``ulam`` code to use cmdstan instead of rstan. +If you haven't installed cmdstan previously, you will also need to do that with ``install_cmdstan()``. Then you need to add ``cmdstan=TRUE`` to any ``ulam`` code to use cmdstan instead of rstan. To use cmdstan as the default interface, do ``set_ulam_cmdstan(TRUE)``. Once rstan and cmdstan are installed (almost there), then you can install ``rethinking`` from within R using: ``` @@ -419,7 +419,7 @@ x <- rnorm(N) m <- 1 + rpois(N,2) y <- rbinom( N , size=m , prob=inv_logit(-3+x) ) dat <- list( y=y , x=x , m=m ) -# single thread +# two threads m1 <- ulam( alist( y ~ binomial_logit( m , logit_p ), diff --git a/data/Kline.csv b/data/Kline.csv old mode 100755 new mode 100644 diff --git a/data/elephants.csv b/data/elephants.csv old mode 100755 new mode 100644 diff --git a/data/galapagos.csv b/data/galapagos.csv old mode 100755 new mode 100644 diff --git a/data/homeworkch3.R b/data/homeworkch3.R old mode 100755 new mode 100644 diff --git a/data/islands.csv b/data/islands.csv old mode 100755 new mode 100644 diff --git a/data/rugged.csv b/data/rugged.csv old mode 100755 new mode 100644 diff --git a/data/salamanders.csv b/data/salamanders.csv old mode 100755 new mode 100644 diff --git a/data/tulips.csv b/data/tulips.csv old mode 100755 new mode 100644 diff --git a/tests/book_chapters/test_chapter13.R b/tests/book_chapters/test_chapter13.R index afb04b8..a24fbc2 100644 --- a/tests/book_chapters/test_chapter13.R +++ b/tests/book_chapters/test_chapter13.R @@ -8,6 +8,7 @@ expect_equiv_eps <- function( x , y , eps=0.01 ) { expect_equivalent( x , y , tolerance=eps ) } +# set_ulam_cmdstan(TRUE) ## R code 13.1 library(rethinking) diff --git a/tests/rethinking_tests/test_ulam1.R b/tests/rethinking_tests/test_ulam1.R index a642b46..4d30c27 100644 --- a/tests/rethinking_tests/test_ulam1.R +++ b/tests/rethinking_tests/test_ulam1.R @@ -8,6 +8,8 @@ expect_equiv_eps <- function( x , y , eps=0.01 ) { expect_equivalent( x , y , tolerance=eps ) } +set_ulam_cmdstan(TRUE) + # student_t data(Howell1) @@ -23,7 +25,7 @@ mt <- ulam( b ~ normal( 0 , 1 ) , sigma ~ exponential( 1 ), nu ~ gamma(2, 0.1) - ) , data=d2 , sample=TRUE , refresh=-1 , seed=1 ) + ) , data=d2 , sample=TRUE , seed=1 ) test_that("Student_t regression code match", expect_known_hash( mt@model , "98532fc851" ) @@ -94,6 +96,7 @@ UCBadmit$male <- as.integer( ifelse( UCBadmit$applicant.gender=="male" , 1 , 0 ) UCBadmit$dept <- rep( 1:6 , each=2 ) UCBadmit$ag <- UCBadmit$applicant.gender UCBadmit$applicant.gender <- NULL +UCBadmit$reject <- NULL test_that("quap to ulam binomial",{ z <- quap( @@ -103,7 +106,7 @@ test_that("quap to ulam binomial",{ a ~ dnorm(0,4) ), data=UCBadmit ) zz <- ulam( z , sample=FALSE ) - expect_known_hash( zz$model , "93f10cbc50" ) + expect_known_hash( zz$model , "e1f6f2c9a3" ) }) test_that("ulam constraints",{ @@ -115,7 +118,7 @@ test_that("ulam constraints",{ ), constraints=list(a="lower=0"), data=UCBadmit , sample=FALSE ) - expect_known_hash( z$model , "ea06a8ca36" ) + expect_known_hash( z$model , "608f2ad66b" ) }) test_that("ulam binomial log_lik",{ @@ -126,7 +129,7 @@ test_that("ulam binomial log_lik",{ a ~ normal(0,1), b ~ normal(0,0.5) ), data=UCBadmit , sample=TRUE , log_lik=TRUE ) - expect_known_hash( z@model , "097208dacb" ) + expect_known_hash( z@model , "2add972dae" ) expect_equivalent( length(WAIC(z)) , 4 ) }) @@ -160,7 +163,7 @@ test_that("ulam continuous missing data 1",{ b ~ normal(0,1), bx ~ normal(0,1) ), data=UCBadmit , sample=TRUE , log_lik=FALSE ) - expect_known_hash( z2b2@model , "ed1532565d" ) + expect_known_hash( z2b2@model , "6300da1946" ) expect_equivalent( dim(precis(z2b2,2)) , c(5,6) ) }) @@ -177,7 +180,7 @@ test_that("ulam continuous missing data 2",{ bx ~ normal(0,1) ), data=UCBadmit , sample=TRUE , log_lik=TRUE ) - expect_known_hash( z2b_auto@model , "0656b0bec9" ) + expect_known_hash( z2b_auto@model , "12312ebec6" ) expect_equivalent( dim(precis(z2b_auto,2)) , c(5,6) ) }) @@ -197,7 +200,7 @@ test_that("ulam continuous missing data 3",{ b ~ normal(0,1), c(bx,bx2) ~ normal(0,1) ), data=UCBadmit , sample=FALSE ) - expect_known_hash( z2c$model , "db529f4ba3" ) + expect_known_hash( z2c$model , "111badfdd3" ) }) @@ -210,6 +213,7 @@ UCBadmit$applicant.gender <- NULL UCBadmit$male2 <- UCBadmit$male UCBadmit$male2[1:2] <- (-9) # missingness code UCBadmit$male2 <- as.integer(UCBadmit$male2) +UCBadmit$reject <- NULL test_that("ulam discrete missing 1",{ z <- ulam( @@ -227,7 +231,7 @@ test_that("ulam discrete missing 1",{ a[dept] ~ normal(0,1), b ~ normal(0,1) ), data=UCBadmit , sample=TRUE , log_lik=FALSE ) - expect_known_hash( z@model , "db97f3f2de" ) + expect_known_hash( z@model , "aed7571a87" ) }) # same but with custom() @@ -247,7 +251,7 @@ test_that("ulam discrete missing 2",{ a[dept] ~ normal(0,4), b ~ normal(0,1) ), data=UCBadmit , sample=FALSE , log_lik=FALSE ) - expect_known_hash( z2d$model , "6ca98ae43e" ) + expect_known_hash( z2d$model , "73cd87c7f2" ) }) # log_sum_exp form @@ -267,6 +271,6 @@ test_that("ulam discrete missing 3",{ a ~ normal(0,4), b ~ normal(0,1) ), data=UCBadmit , sample=FALSE ) - expect_known_hash( z2e$model , "eded9b0b6e" ) + expect_known_hash( z2e$model , "dfca821d69" ) })