From cc673d1c8f9310b3c54c831563adf200c3181480 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Wed, 20 Nov 2024 11:27:07 -0700 Subject: [PATCH] Lfmcmc rand engine (#51) * Removing vscode cached * Updating news file and correcting how seeds are set * Adding another set of tests * Modified vignette with better example * Remove unneeded code in kernelfun in lfmcmc vignette * Move test from test-lfmcmc-other-models.R to test-lfmcmc.R * Add Andrew Pulsipher as co-author * Fix LFMCMC unit tests * Fix errors in examples in LFMCMC.R * Document data type coercion in lfmcmc.R * Add test of type coercion to test-lfmcmc.R * Fix type coercion tests in test-lfmcmc.R to use correct values --------- Co-authored-by: Andrew Pulsipher --- DESCRIPTION | 2 + NEWS.md | 18 ++++++ R/LFMCMC.R | 73 ++++++++++++++++------- inst/tinytest/test-lfmcmc.R | 96 ++++++++++++++++++++++++++---- man/LFMCMC.Rd | 25 ++++---- man/epiworldR-package.Rd | 1 + src/cpp11.cpp | 8 +-- src/lfmcmc.cpp | 27 +++++---- vignettes/likelihood-free-mcmc.Rmd | 21 +++---- 9 files changed, 201 insertions(+), 70 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bd48843f..1bdb0678 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,6 +7,8 @@ Authors@R: c( email="g.vegayon@gmail.com", comment = c(ORCID = "0000-0002-3171-0844")), person(given="Derek", family="Meyer", role=c("aut"), email="derekmeyer37@gmail.com", comment = c(ORCID = "0009-0005-1350-6988")), + person(given="Andrew", family="Pulsipher", role=c("aut"), + email="pulsipher.a@gmail.com", comment = c(ORCID = "0000-0002-0773-3210")), person(given="Susan", family="Holmes", role = "rev", comment = c(what = "JOSS reviewer", ORCID="0000-0002-2208-8168")), person(given="Abinash", family="Satapathy", role = "rev", comment = diff --git a/NEWS.md b/NEWS.md index d0be8221..6d273637 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +# epiworldR 0.4-3 (development version) + +## New features + +* The package now includes the `LFMCMC` module that implements + the likelihood-free Markov Chain Monte Carlo algorithm. This + module is used to estimate the parameters of the models. + +* The new function `add_param()` allows the user to add parameters + to the model. + +* The new function `rm_globalevent()` allows the user to remove + global events from the model. + +* The function `today()` returns the current day (step) of the + simulation. + + # epiworldR 0.3-2 * Starting version 0.3-0, `epiworldR` is versioned using the same version as the C++ library, `epiworld`. diff --git a/R/LFMCMC.R b/R/LFMCMC.R index b18ba818..6b8da1fe 100644 --- a/R/LFMCMC.R +++ b/R/LFMCMC.R @@ -1,10 +1,12 @@ #' Likelihood-Free Markhov Chain Monte Carlo (LFMCMC) #' -#' #' @aliases epiworld_lfmcmc +#' @param model A model of class [epiworld_model] or `NULL` (see details). #' @details -#' Performs a Likelihood-Free Markhov Chain Monte Carlo simulation -#' @param model A model of class [epiworld_model] +#' Performs a Likelihood-Free Markhov Chain Monte Carlo simulation. When +#' `model` is not `NULL`, the model uses the same random-number generator +#' engine as the model. Otherwise, when `model` is `NULL`, a new random-number +#' generator engine is created. #' @returns #' The `LFMCMC` function returns a model of class [epiworld_lfmcmc]. #' @examples @@ -24,14 +26,14 @@ #' #' ## Setup LFMCMC #' # Extract the observed data from the model -#' obs_data <- unname(as.integer(get_today_total(model_sir))) +#' obs_data <- get_today_total(model_sir) #' #' # Define the simulation function #' simfun <- function(params) { #' set_param(model_sir, "Recovery rate", params[1]) #' set_param(model_sir, "Transmission rate", params[2]) #' run(model_sir, ndays = 50) -#' res <- unname(as.integer(get_today_total(model_sir))) +#' res <- get_today_total(model_sir) #' return(res) #' } #' @@ -50,9 +52,9 @@ #' #' ## Run LFMCMC simulation #' # Set initial parameters -#' par0 <- as.double(c(0.1, 0.5)) +#' par0 <- c(0.1, 0.5) #' n_samp <- 2000 -#' epsil <- as.double(1.0) +#' epsil <- 1.0 #' #' # Run the LFMCMC simulation #' run_lfmcmc( @@ -73,43 +75,74 @@ #' get_params_mean(lfmcmc_model) #' #' @export -LFMCMC <- function(model) { - if (!inherits(model, "epiworld_model")) - stop("model should be of class 'epiworld_model'. It is of class ", class(model)) +LFMCMC <- function(model = NULL) { + + if ((length(model) > 0) && !inherits(model, "epiworld_model")) + stop( + "model should be of class 'epiworld_model'. It is of class ", + paste(class(model), collapse = "\", ") + ) structure( LFMCMC_cpp(model), class = c("epiworld_lfmcmc") ) + } #' @rdname LFMCMC #' @param lfmcmc LFMCMC model -#' @param params_init_ Initial model parameters -#' @param n_samples_ Number of samples -#' @param epsilon_ Epsilon parameter +#' @param params_init_ Initial model parameters, treated as double +#' @param n_samples_ Number of samples, treated as integer +#' @param epsilon_ Epsilon parameter, treated as double #' @param seed Random engine seed #' @returns The simulated model of class [epiworld_lfmcmc]. #' @export -run_lfmcmc <- function(lfmcmc, params_init_, n_samples_, epsilon_, seed = NULL) UseMethod("run_lfmcmc") +run_lfmcmc <- function( + lfmcmc, params_init_, n_samples_, epsilon_, + seed = NULL + ) { + UseMethod("run_lfmcmc") +} #' @export -run_lfmcmc.epiworld_lfmcmc <- function(lfmcmc, params_init_, n_samples_, epsilon_, seed = NULL) { - if (length(seed)) set.seed(seed) - run_lfmcmc_cpp(lfmcmc, params_init_, n_samples_, epsilon_, sample.int(1e4, 1)) +run_lfmcmc.epiworld_lfmcmc <- function( + lfmcmc, params_init_, n_samples_, epsilon_, + seed = NULL + ) { + + if (length(seed)) + set.seed(seed) + + run_lfmcmc_cpp( + lfmcmc, + as.double(params_init_), + as.integer(n_samples_), + as.double(epsilon_), + sample.int(1e4, 1) + ) + invisible(lfmcmc) + } #' @rdname LFMCMC #' @param lfmcmc LFMCMC model -#' @param observed_data_ Observed data +#' @param observed_data_ Observed data, treated as double #' @returns The lfmcmc model with the observed data added #' @export -set_observed_data <- function(lfmcmc, observed_data_) UseMethod("set_observed_data") +set_observed_data <- function( + lfmcmc, observed_data_ + ) { + UseMethod("set_observed_data") +} #' @export set_observed_data.epiworld_lfmcmc <- function(lfmcmc, observed_data_) { - set_observed_data_cpp(lfmcmc, observed_data_) + set_observed_data_cpp( + lfmcmc, + as.double(observed_data_) + ) invisible(lfmcmc) } diff --git a/inst/tinytest/test-lfmcmc.R b/inst/tinytest/test-lfmcmc.R index 4c476281..1425b42d 100644 --- a/inst/tinytest/test-lfmcmc.R +++ b/inst/tinytest/test-lfmcmc.R @@ -5,13 +5,15 @@ model_seed <- 122 # Create and run SIR Model for LFMCMC simulation ------------------------------- model_sir <- ModelSIR(name = "COVID-19", prevalence = .1, - transmission_rate = .9, recovery_rate = .3) + transmission_rate = .3, recovery_rate = .3) agents_smallworld(model_sir, n = 1000, k = 5, d = FALSE, p = 0.01) verbose_off(model_sir) run(model_sir, ndays = 50, seed = model_seed) +# Check init of LFMCMC model without epiworld model ---------------------------- +expect_silent(lfmcmc_nomodel <- LFMCMC()) + # Check bad init of LFMCMC model ----------------------------------------------- -expect_error(lfmcmc_bad <- LFMCMC(), 'argument "model" is missing') expect_error(lfmcmc_bad <- LFMCMC(c("not_a_model")), "model should be of class 'epiworld_model'") # Create LFMCMC model ---------------------------------------------------------- @@ -22,7 +24,7 @@ expect_inherits(lfmcmc_model, "epiworld_lfmcmc") expect_length(class(lfmcmc_model), 1) # Extract observed data from the model -obs_data <- unname(as.integer(get_today_total(model_sir))) +obs_data <- get_today_total(model_sir) expect_silent(set_observed_data(lfmcmc_model, obs_data)) @@ -31,20 +33,19 @@ simfun <- function(params) { set_param(model_sir, "Recovery rate", params[1]) set_param(model_sir, "Transmission rate", params[2]) run(model_sir, ndays = 50) - res <- unname(as.integer(get_today_total(model_sir))) + res <- get_today_total(model_sir) return(res) } sumfun <- function(dat) { return(dat) } propfun <- function(params_prev) { - res <- params_prev + rnorm(length(params_prev), ) + res <- plogis(qlogis(params_prev) + rnorm(length(params_prev))) return(res) } kernelfun <- function(stats_now, stats_obs, epsilon) { - ans <- sum(mapply(function(v1, v2) (v1 - v2)^2, stats_obs, stats_now)) - return(ifelse(sqrt(ans) < epsilon, 1.0, 0.0)) + dnorm(sqrt(sum((stats_now - stats_obs)^2))) } # Check adding functions to LFMCMC @@ -55,9 +56,9 @@ expect_silent(set_kernel_fun(lfmcmc_model, kernelfun)) # Run LFMCMC simulation -------------------------------------------------------- # Initial parameters -par0 <- as.double(c(0.1, 0.5)) +par0 <- c(0.1, 0.5) n_samp <- 2000 -epsil <- as.double(1.0) +epsil <- 1.0 expect_silent(run_lfmcmc( lfmcmc = lfmcmc_model, @@ -72,8 +73,8 @@ expect_silent(set_par_names(lfmcmc_model, c("Immune recovery", "Infectiousness") expect_stdout(print(lfmcmc_model)) -expect_equal(get_stats_mean(lfmcmc_model), c(4.45, 2.6135, 992.4365)) -expect_equal(get_params_mean(lfmcmc_model), c(11.58421, 18.96851), tolerance = 0.00001) +expect_equal(get_stats_mean(lfmcmc_model), c(284.7140, 0.8485, 713.9375)) +expect_equal(get_params_mean(lfmcmc_model), c(0.3132901, 0.2782186), tolerance = 0.00001) # Check LFMCMC using factory functions ----------------------------------------- expect_silent(use_proposal_norm_reflective(lfmcmc_model)) @@ -87,6 +88,22 @@ expect_silent(run_lfmcmc( seed = model_seed )) +# Check LFMCMC type coercion of parameters and observed data ------------------- +obs_data_int <- as.integer(obs_data) +expect_silent(set_observed_data(lfmcmc_model, obs_data_int)) + +par0_int <- as.integer(c(1, 5)) +n_samp_double <- as.double(2000.0) +epsil_int <- as.integer(1) + +expect_silent(run_lfmcmc( + lfmcmc = lfmcmc_model, + params_init_ = par0_int, + n_samples_ = n_samp_double, + epsilon_ = epsil_int, + seed = model_seed +)) + # Check running LFMCMC with missing parameters --------------------------------- expect_silent(run_lfmcmc( lfmcmc = lfmcmc_model, @@ -120,3 +137,60 @@ expect_error(run_lfmcmc( )) expect_error(run_lfmcmc(lfmcmc = lfmcmc_model)) + +# Check running LFMCMC without epiworld model ---------------------------------- +model_seed <- 4222 +set.seed(model_seed) +Y <- rnorm(2000, mean = -5, sd = 2.5) + +# Define LFMCMC functions +simfun <- function(par) { + rnorm(2000, mean = par[1], sd = par[2]) +} + +sumfun <- function(x) { + c(mean(x), sd(x)) +} + +propfun <- function(par) { + + par_new <- par + rnorm(2, sd = 0.1) + + # Reflecting par2 + if (par_new[2] < 0) { + par_new[2] <- par[2] - (par_new[2] - par[2]) + } + + return(par_new) +} + +kernelfun <- function(stats_now, stats_obs, epsilon) { + + dnorm(sqrt(sum((stats_obs - stats_now)^2))) + +} + +# Setup LFMCMC +lfmcmc_model <- LFMCMC() |> + set_simulation_fun(simfun) |> + set_summary_fun(sumfun) |> + set_proposal_fun(propfun) |> + set_kernel_fun(kernelfun) |> + set_observed_data(Y) + +# Run LFMCMC +x <- run_lfmcmc( + lfmcmc = lfmcmc_model, + params_init_ = c(0, 1), + n_samples_ = 3000, + epsilon_ = 1.0, + seed = model_seed +) + +x_means <- get_params_mean(x) + +expect_equivalent( + x_means, + c(-5, 2.5), + tolerance = 0.1 +) diff --git a/man/LFMCMC.Rd b/man/LFMCMC.Rd index 7e9204da..be54c14a 100644 --- a/man/LFMCMC.Rd +++ b/man/LFMCMC.Rd @@ -18,7 +18,7 @@ \alias{print.epiworld_lfmcmc} \title{Likelihood-Free Markhov Chain Monte Carlo (LFMCMC)} \usage{ -LFMCMC(model) +LFMCMC(model = NULL) run_lfmcmc(lfmcmc, params_init_, n_samples_, epsilon_, seed = NULL) @@ -47,19 +47,19 @@ get_stats_mean(lfmcmc) \method{print}{epiworld_lfmcmc}(x, ...) } \arguments{ -\item{model}{A model of class \link{epiworld_model}} +\item{model}{A model of class \link{epiworld_model} or \code{NULL} (see details).} \item{lfmcmc}{LFMCMC model} -\item{params_init_}{Initial model parameters} +\item{params_init_}{Initial model parameters, treated as double} -\item{n_samples_}{Number of samples} +\item{n_samples_}{Number of samples, treated as integer} -\item{epsilon_}{Epsilon parameter} +\item{epsilon_}{Epsilon parameter, treated as double} \item{seed}{Random engine seed} -\item{observed_data_}{Observed data} +\item{observed_data_}{Observed data, treated as double} \item{fun}{The LFMCMC kernel function} @@ -102,7 +102,10 @@ The lfmcmc model Likelihood-Free Markhov Chain Monte Carlo (LFMCMC) } \details{ -Performs a Likelihood-Free Markhov Chain Monte Carlo simulation +Performs a Likelihood-Free Markhov Chain Monte Carlo simulation. When +\code{model} is not \code{NULL}, the model uses the same random-number generator +engine as the model. Otherwise, when \code{model} is \code{NULL}, a new random-number +generator engine is created. } \examples{ ## Setup an SIR model to use in the simulation @@ -121,14 +124,14 @@ run(model_sir, ndays = 50, seed = model_seed) ## Setup LFMCMC # Extract the observed data from the model -obs_data <- unname(as.integer(get_today_total(model_sir))) +obs_data <- get_today_total(model_sir) # Define the simulation function simfun <- function(params) { set_param(model_sir, "Recovery rate", params[1]) set_param(model_sir, "Transmission rate", params[2]) run(model_sir, ndays = 50) - res <- unname(as.integer(get_today_total(model_sir))) + res <- get_today_total(model_sir) return(res) } @@ -147,9 +150,9 @@ lfmcmc_model <- LFMCMC(model_sir) |> ## Run LFMCMC simulation # Set initial parameters -par0 <- as.double(c(0.1, 0.5)) +par0 <- c(0.1, 0.5) n_samp <- 2000 -epsil <- as.double(1.0) +epsil <- 1.0 # Run the LFMCMC simulation run_lfmcmc( diff --git a/man/epiworldR-package.Rd b/man/epiworldR-package.Rd index 33015655..56f0aed4 100644 --- a/man/epiworldR-package.Rd +++ b/man/epiworldR-package.Rd @@ -24,6 +24,7 @@ Useful links: Authors: \itemize{ \item Derek Meyer \email{derekmeyer37@gmail.com} (\href{https://orcid.org/0009-0005-1350-6988}{ORCID}) + \item Andrew Pulsipher \email{pulsipher.a@gmail.com} (\href{https://orcid.org/0000-0002-0773-3210}{ORCID}) } Other contributors: diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 4c6addcf..499185ec 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -398,17 +398,17 @@ extern "C" SEXP _epiworldR_LFMCMC_cpp(SEXP model) { END_CPP11 } // lfmcmc.cpp -SEXP run_lfmcmc_cpp(SEXP lfmcmc, std::vector params_init_, size_t n_samples_, epiworld_double epsilon_, int seed); +SEXP run_lfmcmc_cpp(SEXP lfmcmc, std::vector params_init_, size_t n_samples_, double epsilon_, int seed); extern "C" SEXP _epiworldR_run_lfmcmc_cpp(SEXP lfmcmc, SEXP params_init_, SEXP n_samples_, SEXP epsilon_, SEXP seed) { BEGIN_CPP11 - return cpp11::as_sexp(run_lfmcmc_cpp(cpp11::as_cpp>(lfmcmc), cpp11::as_cpp>>(params_init_), cpp11::as_cpp>(n_samples_), cpp11::as_cpp>(epsilon_), cpp11::as_cpp>(seed))); + return cpp11::as_sexp(run_lfmcmc_cpp(cpp11::as_cpp>(lfmcmc), cpp11::as_cpp>>(params_init_), cpp11::as_cpp>(n_samples_), cpp11::as_cpp>(epsilon_), cpp11::as_cpp>(seed))); END_CPP11 } // lfmcmc.cpp -SEXP set_observed_data_cpp(SEXP lfmcmc, std::vector< int > observed_data_); +SEXP set_observed_data_cpp(SEXP lfmcmc, std::vector< double > observed_data_); extern "C" SEXP _epiworldR_set_observed_data_cpp(SEXP lfmcmc, SEXP observed_data_) { BEGIN_CPP11 - return cpp11::as_sexp(set_observed_data_cpp(cpp11::as_cpp>(lfmcmc), cpp11::as_cpp>>(observed_data_))); + return cpp11::as_sexp(set_observed_data_cpp(cpp11::as_cpp>(lfmcmc), cpp11::as_cpp>>(observed_data_))); END_CPP11 } // lfmcmc.cpp diff --git a/src/lfmcmc.cpp b/src/lfmcmc.cpp index 5b615c55..98453140 100644 --- a/src/lfmcmc.cpp +++ b/src/lfmcmc.cpp @@ -9,7 +9,7 @@ using namespace epiworld; -#define TData_default std::vector< int > +#define TData_default std::vector< double > #define WrapLFMCMC(a) \ cpp11::external_pointer> (a) @@ -29,8 +29,13 @@ SEXP LFMCMC_cpp( new LFMCMC() ); - cpp11::external_pointer> modelptr(model); - lfmcmc_ptr->set_rand_engine(modelptr->get_rand_endgine()); + if (Rf_inherits(model, "epiworld_model")) { + cpp11::external_pointer> modelptr(model); + lfmcmc_ptr->set_rand_engine(modelptr->get_rand_endgine()); + } else { + auto new_ptr = std::make_shared(std::mt19937()); + lfmcmc_ptr->set_rand_engine(new_ptr); + } return lfmcmc_ptr; } @@ -42,9 +47,9 @@ SEXP LFMCMC_cpp( [[cpp11::register]] SEXP run_lfmcmc_cpp( SEXP lfmcmc, - std::vector params_init_, + std::vector params_init_, size_t n_samples_, - epiworld_double epsilon_, + double epsilon_, int seed ) { WrapLFMCMC(lfmcmc_ptr)(lfmcmc); @@ -60,7 +65,7 @@ SEXP run_lfmcmc_cpp( [[cpp11::register]] SEXP set_observed_data_cpp( SEXP lfmcmc, - std::vector< int > observed_data_ + std::vector< double > observed_data_ ) { WrapLFMCMC(lfmcmc_ptr)(lfmcmc); lfmcmc_ptr->set_observed_data(observed_data_); @@ -105,7 +110,7 @@ SEXP use_proposal_norm_reflective_cpp( SEXP lfmcmc ) { WrapLFMCMC(lfmcmc_ptr)(lfmcmc); - lfmcmc_ptr->set_proposal_fun(make_proposal_norm_reflective>(.5, 0, 1)); + lfmcmc_ptr->set_proposal_fun(make_proposal_norm_reflective>(.5, 0, 1)); return lfmcmc; } @@ -123,7 +128,7 @@ SEXP set_simulation_fun_cpp( auto params_doubles = cpp11::doubles(params); return cpp11::as_cpp( - cpp11::integers(fun(params_doubles)) + cpp11::doubles(fun(params_doubles)) ); }; @@ -146,8 +151,8 @@ SEXP set_summary_fun_cpp( LFMCMC* ) -> void { - auto dat_int = cpp11::integers(dat); - auto res_tmp = cpp11::integers(fun(dat_int)); + auto dat_int = cpp11::doubles(dat); + auto res_tmp = cpp11::doubles(fun(dat_int)); if (res.size() == 0u) res.resize(res_tmp.size()); @@ -198,7 +203,7 @@ SEXP use_kernel_fun_gaussian_cpp( SEXP lfmcmc ) { WrapLFMCMC(lfmcmc_ptr)(lfmcmc); - lfmcmc_ptr->set_kernel_fun(kernel_fun_gaussian>); + lfmcmc_ptr->set_kernel_fun(kernel_fun_gaussian>); return lfmcmc; } diff --git a/vignettes/likelihood-free-mcmc.Rmd b/vignettes/likelihood-free-mcmc.Rmd index c5e87fe4..99901034 100644 --- a/vignettes/likelihood-free-mcmc.Rmd +++ b/vignettes/likelihood-free-mcmc.Rmd @@ -32,7 +32,7 @@ model_seed <- 122 model_sir <- ModelSIR( name = "COVID-19", prevalence = .1, - transmission_rate = .9, + transmission_rate = .3, recovery_rate = .3 ) @@ -58,7 +58,7 @@ summary(model_sir) ## Setup LFMCMC ```{r lfmcmc-setup} # Extract the observed data from the model -obs_data <- unname(as.integer(get_today_total(model_sir))) +obs_data <- get_today_total(model_sir) # Define the LFMCMC simulation function simfun <- function(params) { @@ -71,8 +71,8 @@ simfun <- function(params) { ndays = 50 ) - res <- unname(as.integer(get_today_total(model_sir))) - return(res) + get_today_total(model_sir) + } # Define the LFMCMC summary function @@ -83,19 +83,14 @@ sumfun <- function(dat) { # Define the LFMCMC proposal function # - Based on proposal_fun_normal from lfmcmc-meat.hpp propfun <- function(params_prev) { - res <- params_prev + rnorm(length(params_prev), ) + res <- plogis(qlogis(params_prev) + rnorm(length(params_prev))) return(res) } # Define the LFMCMC kernel function # - Based on kernel_fun_uniform from lfmcmc-meat.hpp kernelfun <- function(stats_now, stats_obs, epsilon) { - - ans <- sum(mapply(function(v1, v2) (v1 - v2)^2, - stats_obs, - stats_now)) - - return(ifelse(sqrt(ans) < epsilon, 1.0, 0.0)) + dnorm(sqrt(sum((stats_now - stats_obs)^2))) } # Create the LFMCMC model @@ -110,9 +105,9 @@ lfmcmc_model <- LFMCMC(model_sir) |> ## Run LFMCMC simulation ```{r lfmcmc-run} # Set initial parameters -par0 <- as.double(c(0.1, 0.5)) +par0 <- c(0.1, 0.5) n_samp <- 2000 -epsil <- as.double(1.0) +epsil <- 1.0 # Run the LFMCMC simulation run_lfmcmc(