Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tidy-friendly wrapper for synthdid_estimate #90

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ Description: Estimate average treatment effects in panel data.
Depends:
R (>= 3.4)
Imports:
abind,
mvtnorm
License: GPL (>= 2) | BSD_3_clause + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
Suggests:
testthat,
ggplot2,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(lindsey_density_estimate)
export(panel.matrices)
export(sc_estimate)
export(simulate_dgp)
export(synthdid)
export(synthdid_controls)
export(synthdid_effect_curve)
export(synthdid_estimate)
Expand Down
1 change: 1 addition & 0 deletions R/synthdid-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@
#'}
#'
#' @keywords internal
#' @aliases synthdid-package
"_PACKAGE"
125 changes: 111 additions & 14 deletions R/synthdid.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @param v a vector
sparsify_function = function(v) { v[v <= max(v)/4] = 0; v/sum(v) }


#' Computes the synthetic diff-in-diff estimate for an average treatment effect on a treated block.
#'
#' See 'Synthetic Difference in Differences' by Arkhangelsky et al. This implements Algorithm 1.
Expand Down Expand Up @@ -43,11 +44,11 @@ synthdid_estimate <- function(Y, N0, T0, X = array(dim = c(dim(Y), 0)),
weights = list(omega = NULL, lambda = NULL),
update.omega = is.null(weights$omega), update.lambda = is.null(weights$lambda),
min.decrease = 1e-5 * noise.level, max.iter = 1e4,
sparsify = sparsify_function,
max.iter.pre.sparsify = 100) {
sparsify = sparsify_function,
max.iter.pre.sparsify = 100) {
stopifnot(nrow(Y) > N0, ncol(Y) > T0, length(dim(X)) %in% c(2, 3), dim(X)[1:2] == dim(Y), is.list(weights),
is.null(weights$lambda) || length(weights$lambda) == T0, is.null(weights$omega) || length(weights$omega) == N0,
!is.null(weights$lambda) || update.lambda, !is.null(weights$omega) || update.omega)
is.null(weights$lambda) || length(weights$lambda) == T0, is.null(weights$omega) || length(weights$omega) == N0,
!is.null(weights$lambda) || update.lambda, !is.null(weights$omega) || update.omega)
if (length(dim(X)) == 2) { dim(X) = c(dim(X), 1) }
if (is.null(sparsify)) { max.iter.pre.sparsify = max.iter }
N1 = nrow(Y) - N0
Expand All @@ -60,10 +61,10 @@ synthdid_estimate <- function(Y, N0, T0, X = array(dim = c(dim(Y), 0)),
if (update.lambda) {
Yc = collapsed.form(Y, N0, T0)
lambda.opt = sc.weight.fw(Yc[1:N0, ], zeta = zeta.lambda, intercept = lambda.intercept, lambda=weights$lambda,
min.decrease = min.decrease, max.iter = max.iter.pre.sparsify)
min.decrease = min.decrease, max.iter = max.iter.pre.sparsify)
if(!is.null(sparsify)) {
lambda.opt = sc.weight.fw(Yc[1:N0, ], zeta = zeta.lambda, intercept = lambda.intercept, lambda=sparsify(lambda.opt$lambda),
min.decrease = min.decrease, max.iter = max.iter)
lambda.opt = sc.weight.fw(Yc[1:N0, ], zeta = zeta.lambda, intercept = lambda.intercept, lambda=sparsify(lambda.opt$lambda),
min.decrease = min.decrease, max.iter = max.iter)
}
weights$lambda = lambda.opt$lambda
weights$lambda.vals = lambda.opt$vals
Expand All @@ -72,10 +73,10 @@ synthdid_estimate <- function(Y, N0, T0, X = array(dim = c(dim(Y), 0)),
if (update.omega) {
Yc = collapsed.form(Y, N0, T0)
omega.opt = sc.weight.fw(t(Yc[, 1:T0]), zeta = zeta.omega, intercept = omega.intercept, lambda=weights$omega,
min.decrease = min.decrease, max.iter = max.iter.pre.sparsify)
min.decrease = min.decrease, max.iter = max.iter.pre.sparsify)
if(!is.null(sparsify)) {
omega.opt = sc.weight.fw(t(Yc[, 1:T0]), zeta = zeta.omega, intercept = omega.intercept, lambda=sparsify(omega.opt$lambda),
min.decrease = min.decrease, max.iter = max.iter)
omega.opt = sc.weight.fw(t(Yc[, 1:T0]), zeta = zeta.omega, intercept = omega.intercept, lambda=sparsify(omega.opt$lambda),
min.decrease = min.decrease, max.iter = max.iter)
}
weights$omega = omega.opt$lambda
weights$omega.vals = omega.opt$vals
Expand All @@ -87,9 +88,9 @@ synthdid_estimate <- function(Y, N0, T0, X = array(dim = c(dim(Y), 0)),
Xc = apply(X, 3, function(Xi) { collapsed.form(Xi, N0, T0) })
dim(Xc) = c(dim(Yc), dim(X)[3])
weights = sc.weight.fw.covariates(Yc, Xc, zeta.lambda = zeta.lambda, zeta.omega = zeta.omega,
lambda.intercept = lambda.intercept, omega.intercept = omega.intercept,
min.decrease = min.decrease, max.iter = max.iter,
lambda = weights$lambda, omega = weights$omega, update.lambda = update.lambda, update.omega = update.omega)
lambda.intercept = lambda.intercept, omega.intercept = omega.intercept,
min.decrease = min.decrease, max.iter = max.iter,
lambda = weights$lambda, omega = weights$omega, update.lambda = update.lambda, update.omega = update.omega)
}

X.beta = contract3(X, weights$beta)
Expand All @@ -106,6 +107,102 @@ synthdid_estimate <- function(Y, N0, T0, X = array(dim = c(dim(Y), 0)),
return(estimate)
}

#' A wrapper function for estimating one or more synthetic diff-in-diffs given a model
#' formula and a data frame.
#' @param formula a formula representing the relationship to be estimated. For example,
#' \code{formula = y ~ d | unit + time} where y is the dependent variable,
#' d is the time-and-unit-varying treatment variable, and unit and time are
#' unit and time identifiers, respectively. The unit identifier must precede
#' the time identifier. Optional control variables \code{x1} and \code{x2} may
#' be added by appending \code{| ~ x1 + x2} to the end of the formula.
#' Additionally, you can estimate models of multiple outcomes by expressing the
#' left hand side as a vector of multiple variables \code{c(y1, y2, y3)}.
#' @param data a data.frame object including each variable in formula.
#' @param method the estimator to use. One of `did_estimate`, `sc_estimate`, or `synthdid_estimate`.
#' Default is `synthdid_estimate`.
#' @param ... optional parameters to pass on to synthdid_estimate.
#' @return A list of average treatment effect estimates with 'weights' and 'setup' attached as attributes.
#' 'weights' contains the estimated weights lambda and omega and corresponding intercepts,
#' as well as regression coefficients beta if X is passed.
#' 'setup' is a list describing the problem passed in: Y, N0, T0, X.
#' @examples
#' data(CPS)
#' # No controls
#' formula_1 <- c(urate, hours) ~ min_wage | state + year
#' # Controlling for log wage
#' formula_2 <- c(urate, hours) ~ min_wage | state + year | ~ log_wage
#' # Estimate models
#' estimates_nocontrol <- synthdid(formula_1, CPS)
#' estimates_lwcontrol <- synthdid(formula_2, CPS)
#'
#' @export synthdid
synthdid <- function(formula,
data,
method = synthdid_estimate,
...){

# Flag whether controls are included
has_controls <- length(formula[[3]][[2]]) == 3

# Define panel dimensions
if(has_controls){
panel_dims <- formula[[3]][[2]][[3]]
} else {
panel_dims <- formula[[3]][[3]]
}
unit_var <- as.character(panel_dims[[2]])
time_var <- as.character(panel_dims[[3]])

# Define all dependent variables
dep_vars <- as.character(formula[[2]])
if(length(dep_vars) > 1){
n_dep_vars <- length(dep_vars) - 1
dep_vars <- tail(dep_vars, n_dep_vars)
}

# Define treatment variable
if(has_controls){
treat_var <- as.character(formula[[3]][[2]][[2]])
} else {
treat_var <- as.character(formula[[3]][[2]])
}

# Extract controls
if(has_controls){
control_formula <- update(as.formula(formula[[3]][[3]]), ~ . -1)
control_matrix <- model.matrix(control_formula, data)

# Create N x t matrix for each variable
control_matlist <- apply(control_matrix, 2, function(x){
d <- data.frame(unit = data[[unit_var]],
time = data[[time_var]],
x)
reshape(d, direction = "wide", idvar = "unit", timevar = "time")[, -1]
})

# Bind matrices into array
X <- abind::abind(control_matlist, along = 3)
}

# Data can't be a tibble
data <- data.frame(data)

# Estimate multiple models
estimates <- lapply(dep_vars, function(dep_var) {
setup <- panel.matrices(data, unit_var, time_var, dep_var, treat_var)

if(!has_controls) X <- array(dim = c(dim(setup$Y), 0))
estimate <- method(setup$Y, setup$N0, setup$T0, X = X, ...)
estimate
})

# Rename list elements with model names
names(estimates) <- dep_vars
return(estimates)
}



#' synthdid_estimate for synthetic control estimates.
#' Takes all the same parameters, but by default, passes options to use the synthetic control estimator
#' By default, this uses only 'infinitesimal' ridge regularization when estimating the weights.
Expand All @@ -118,7 +215,7 @@ synthdid_estimate <- function(Y, N0, T0, X = array(dim = c(dim(Y), 0)),
#' @export sc_estimate
sc_estimate = function(Y, N0, T0, eta.omega = 1e-6, ...) {
estimate = synthdid_estimate(Y, N0, T0, eta.omega = eta.omega,
weights = list(lambda = rep(0, T0)), omega.intercept = FALSE, ...)
weights = list(lambda = rep(0, T0)), omega.intercept = FALSE, ...)
attr(estimate, 'estimator') = "sc_estimate"
estimate
}
Expand Down
2 changes: 1 addition & 1 deletion man/synthdid-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions man/synthdid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions tests/testthat/test_synthdid.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@


test_that("a simple workflow doesn't error", {
setup = random.low.rank()
tau.hat = synthdid_estimate(setup$Y,setup$N0,setup$T0)
Expand All @@ -12,6 +14,60 @@ test_that("a simple workflow doesn't error", {
expect_equal(1, 1)
})

test_that("a simple workflow doesn't error when estimating multiple outcomes", {
data(CPS)
# No controls
formula_1 <- c(urate, hours) ~ min_wage | state + year
# Controlling for log wage
formula_2 <- c(urate, hours) ~ min_wage | state + year | ~ log_wage
# Controlling for multiple variables
formula_3 <- c(urate, hours) ~ min_wage | state + year | ~ log_wage + open_carry + abort_ban
# Estimate models
estimates_nocontrol <- synthdid(formula_1, CPS)
estimates_lwcontrol <- synthdid(formula_2, CPS)
estimates_3control <- synthdid(formula_3, CPS)

expect_equal(1, 1)
})

test_that("synthdid wrapper gives same results as synthdid_estimate", {
data(CPS)
# Controlling for hours
formula <- urate ~ min_wage | state + year | ~ hours
# Estimate using wrapper function
estimates_wrap <- synthdid(formula, CPS)

# Estimate using synthdid_estimate directly
# Create panel matrix
setup <- panel.matrices(CPS,
unit = "state",
time = "year",
outcome = "urate",
treatment = "min_wage")

# Create control array
covariate_df <- reshape(CPS[, c("state", "year", "hours")],
direction = "wide", idvar = "state", timevar = "year")

covariate_array <- array(as.matrix(covariate_df[, -1]), c(50, 40, 1))
estimates_direct <- synthdid_estimate(setup$Y, setup$N0, setup$T0,
X = covariate_array)

expect_equal(as.numeric(estimates_wrap$urate), as.numeric(estimates_direct))
})


test_that("estimating SC and DID works with synthdid wrapper", {
data(CPS)
formula <- c(urate, hours) ~ min_wage | state + year | ~ log_wage
estimates_sc <- synthdid(formula, CPS, method = sc_estimate)
estimates_did <- synthdid(formula, CPS, method = did_estimate)
estimates_synthdid <- synthdid(formula, CPS, method = synthdid_estimate)

expect_equal(1, 1)
})


test_that("plotting doesn't error with (i) dates as colnames (ii) spaghetti units", {
data(california_prop99)

Expand Down Expand Up @@ -168,3 +224,6 @@ test_that("treated effect shifts correctly with scalar shifts to the 4 blocks",
}
}
})