Skip to content

Commit

Permalink
ipw estimator work almost done
Browse files Browse the repository at this point in the history
  • Loading branch information
BERENZ committed Feb 19, 2025
1 parent 878834e commit b4eee20
Show file tree
Hide file tree
Showing 9 changed files with 406 additions and 348 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ nonprobsvy News and Updates
- documentation improved.
- switching completely to snake_case.
- extensive cleaning of the code.
- more unit-tests added.

### Documentation

Expand Down
49 changes: 49 additions & 0 deletions inst/tinytest/_code_for_all_.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
## testing bootstrap
set.seed(2024)

# Load required data
data(admin)
data(jvs)

# Create objects ----------------------------------------------------------

# Create survey design object
jvs_svy <- svydesign(
ids = ~1,
weights = ~weight,
strata = ~size + nace + region,
data = jvs
)

N <- sum(weights(jvs_svy))
pop_totals <- colSums(model.matrix(~region + private + nace + size, jvs)*jvs$weight)
pop_means <- pop_totals[-1]/N


# simulated data (Kim & Yang 2019) ----------------------------------------------------------

kim2019_N <- 1e5 ## 1000000
n <- 500
x1 <- rnorm(n = kim2019_N, mean = 1, sd = 1)
x2 <- rexp(n = kim2019_N, rate = 1)
epsilon <- rnorm(n = kim2019_N)
y1 <- 1 + x1 + x2 + epsilon
y2 <- 0.5*(x1 - 0.5)^2 + x2 + epsilon
p1 <- exp(x2)/(1+exp(x2))
p2 <- exp(-0.5+0.5*(x2-2)^2)/(1+exp(-0.5+0.5*(x2-2)^2))
flag_bd1 <- rbinom(n = kim2019_N, size = 1, prob = p1)
flag_srs <- as.numeric(1:kim2019_N %in% sample(1:kim2019_N, size = n))
base_w_srs <- kim2019_N/n
population <- data.frame(x1,x2,y1,y2,p1,p2,base_w_srs, flag_bd1, flag_srs)
base_w_bd <- kim2019_N/sum(population$flag_bd1)

kim2019_sample_prob <- svydesign(ids= ~1, weights = ~ base_w_srs, data = subset(population, flag_srs == 1))
kim2019_sample_nonprob <- subset(population, flag_bd1 == 1)
kim2019_y_true <- c(mean(y1), mean(y2))
kim2019_totals <- colSums(model.matrix(~ x1 + x2, population))


# simulated high-dim data (Yang 2020) -------------------------------------



16 changes: 1 addition & 15 deletions inst/tinytest/test_check_balance.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,4 @@
# Load required data
data(admin)
data(jvs)


# Create objects ----------------------------------------------------------

# Create survey design object
expect_silent(
jvs_svy <- svydesign(
ids = ~1,
weights = ~weight,
strata = ~size + nace + region,
data = jvs
))
source("_code_for_all_.R")

# Create standard IPW estimator
expect_silent(ipw_est1 <- nonprob(
Expand Down
56 changes: 56 additions & 0 deletions inst/tinytest/test_ci_coverage.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
source("_code_for_all_.R")


# unit-level data ---------------------------------------------------------

### ipw mle ---------------------------------------------------------------------

ipw_mle <- nonprob(
selection = ~x1 + x2,
target = ~y1 + y2,
svydesign = kim2019_sample_prob,
data = kim2019_sample_nonprob,
method_selection = "logit")

expect_equal(
ipw_mle$confidence_interval$lower_bound < kim2019_y_true &
ipw_mle$confidence_interval$upper_bound > kim2019_y_true,
c(TRUE, TRUE)
)


### ipw gee ---------------------------------------------------------------------

ipw_gee <- nonprob(
selection = ~x1 + x2,
target = ~y1 + y2,
svydesign = kim2019_sample_prob,
data = kim2019_sample_nonprob,
method_selection = "logit",
control_selection = control_sel(est_method = "gee"))

expect_equal(
ipw_gee$confidence_interval$lower_bound < kim2019_y_true &
ipw_gee$confidence_interval$upper_bound > kim2019_y_true,
c(TRUE, TRUE)
)


# pop level data ----------------------------------------------------------

### ipw mle (is gee) ---------------------------------------------------------------------

ipw_mle <- nonprob(
selection = ~x1 + x2,
target = ~y1 + y2,
pop_total = kim2019_totals,
data = kim2019_sample_nonprob,
method_selection = "logit")

expect_equal(
ipw_mle$confidence_interval$lower_bound < kim2019_y_true &
ipw_mle$confidence_interval$upper_bound > kim2019_y_true,
c(TRUE, TRUE)
)


21 changes: 1 addition & 20 deletions inst/tinytest/test_ipw.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,4 @@
# Load test data
data(jvs)
data(admin)


# create objects ----------------------------------------------------------

# Setup survey design
expect_silent(
jvs_svy <- svydesign(
ids = ~1,
weights = ~weight,
strata = ~size + nace + region,
data = jvs)
)


N <- sum(weights(jvs_svy))
pop_totals <- colSums(model.matrix(~region + private + nace + size, jvs)*jvs$weight)
pop_means <- pop_totals[-1]/N
source("_code_for_all_.R")

# population data only ----------------------------------------------------------------

Expand Down
27 changes: 3 additions & 24 deletions inst/tinytest/test_ipw_boot.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,4 @@
## testing bootstrap
set.seed(2024)

# Load required data
data(admin)
data(jvs)

# Create objects ----------------------------------------------------------

# Create survey design object
expect_silent(
jvs_svy <- svydesign(
ids = ~1,
weights = ~weight,
strata = ~size + nace + region,
data = jvs
))

N <- sum(weights(jvs_svy))
pop_totals <- colSums(model.matrix(~region + private + nace + size, jvs)*jvs$weight)
pop_means <- pop_totals[-1]/N

source("_code_for_all_.R")

# single-core bootstrap ---------------------------------------------------

Expand All @@ -35,14 +14,14 @@ expect_silent(nonprob(
control_inference = control_inf(var_method = "bootstrap", num_boot = 2)
))

expect_silent(nonprob(
expect_silent(suppressWarnings(nonprob(
selection = ~region + private + nace + size,
target = ~single_shift,
pop_totals = pop_totals,
data = admin,
method_selection = "logit",
control_inference = control_inf(var_method = "bootstrap", num_boot = 2)
))
)))

### calibrated IPW estimator --------------------------------------------------

Expand Down
40 changes: 39 additions & 1 deletion inst/tinytest/test_nonprob.R
Original file line number Diff line number Diff line change
@@ -1 +1,39 @@
# testing main function if works as expected

source("_code_for_all_.R")

# check parameters --------------------------------------------------------

expect_error(
nonprob(data = admin)
)

expect_error(
nonprob(data = admin,
selection = ~ region)
)

expect_error(
nonprob(data = admin,
selection = ~ region,
target = ~ single_shift)
)

expect_error(
nonprob(data = admin,
outcome = single_shift ~ region,
target = ~ single_shift)
)

expect_error(
nonprob(data = admin,
outcome = single_shift ~ region,
target = ~ single_shift,
pop_means = pop_means)
)

expect_error(
nonprob(data = admin,
outcome = single_shift ~ region,
target = ~ single_shift,
pop_size = N)
)
15 changes: 1 addition & 14 deletions inst/tinytest/test_nonprob_unit_level.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
# Load test data
data(jvs)
data(admin)
source("_code_for_all_.R")


# create objects ----------------------------------------------------------

# Setup survey design
expect_silent(
jvs_svy <- svydesign(
ids = ~1,
weights = ~weight,
strata = ~size + nace + region,
data = jvs)
)

# IPW estimator
expect_silent(
ipw_result <- nonprob(
Expand Down
Loading

0 comments on commit b4eee20

Please sign in to comment.