ipw estimator work almost done

ncn-foreigners · Feb 19, 2025 · b4eee20 · b4eee20
1 parent 878834e
commit b4eee20
Show file tree

Hide file tree

Showing 9 changed files with 406 additions and 348 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -53,6 +53,7 @@ nonprobsvy News and Updates
 -   documentation improved.
 -   switching completely to snake_case.
 -   extensive cleaning of the code.
+-   more unit-tests added.
 
 ### Documentation
 

diff --git a/inst/tinytest/_code_for_all_.R b/inst/tinytest/_code_for_all_.R
@@ -0,0 +1,49 @@
+## testing bootstrap
+set.seed(2024)
+
+# Load required data
+data(admin)
+data(jvs)
+
+# Create objects ----------------------------------------------------------
+
+# Create survey design object
+jvs_svy <- svydesign(
+    ids = ~1,
+    weights = ~weight,
+    strata = ~size + nace + region,
+    data = jvs
+)
+
+N <- sum(weights(jvs_svy))
+pop_totals <- colSums(model.matrix(~region + private + nace + size, jvs)*jvs$weight)
+pop_means <- pop_totals[-1]/N
+
+
+# simulated data (Kim & Yang 2019) ----------------------------------------------------------
+
+kim2019_N <- 1e5 ## 1000000
+n <- 500
+x1 <- rnorm(n = kim2019_N, mean = 1, sd = 1)
+x2 <- rexp(n = kim2019_N, rate = 1)
+epsilon <- rnorm(n = kim2019_N)
+y1 <- 1 + x1 + x2 + epsilon
+y2 <- 0.5*(x1 - 0.5)^2 + x2 + epsilon
+p1 <- exp(x2)/(1+exp(x2))
+p2 <- exp(-0.5+0.5*(x2-2)^2)/(1+exp(-0.5+0.5*(x2-2)^2))
+flag_bd1 <- rbinom(n = kim2019_N, size = 1, prob = p1)
+flag_srs <- as.numeric(1:kim2019_N %in% sample(1:kim2019_N, size = n))
+base_w_srs <- kim2019_N/n
+population <- data.frame(x1,x2,y1,y2,p1,p2,base_w_srs, flag_bd1, flag_srs)
+base_w_bd <- kim2019_N/sum(population$flag_bd1)
+
+kim2019_sample_prob <- svydesign(ids= ~1, weights = ~ base_w_srs, data = subset(population, flag_srs == 1))
+kim2019_sample_nonprob <- subset(population, flag_bd1 == 1)
+kim2019_y_true <- c(mean(y1), mean(y2))
+kim2019_totals <- colSums(model.matrix(~ x1 + x2, population))
+
+
+# simulated high-dim data (Yang 2020) -------------------------------------
+
+
+
diff --git a/inst/tinytest/test_check_balance.R b/inst/tinytest/test_check_balance.R
@@ -1,18 +1,4 @@
-# Load required data
-data(admin)
-data(jvs)
-
-
-# Create objects ----------------------------------------------------------
-
-# Create survey design object
-expect_silent(
-  jvs_svy <- svydesign(
-  ids = ~1,
-  weights = ~weight,
-  strata = ~size + nace + region,
-  data = jvs
-))
+source("_code_for_all_.R")
 
 # Create standard IPW estimator
 expect_silent(ipw_est1 <- nonprob(

diff --git a/inst/tinytest/test_ci_coverage.R b/inst/tinytest/test_ci_coverage.R
@@ -0,0 +1,56 @@
+source("_code_for_all_.R")
+
+
+# unit-level data ---------------------------------------------------------
+
+### ipw mle ---------------------------------------------------------------------
+
+ipw_mle <- nonprob(
+  selection = ~x1 + x2,
+  target = ~y1 + y2,
+  svydesign = kim2019_sample_prob,
+  data = kim2019_sample_nonprob,
+  method_selection = "logit")
+
+expect_equal(
+  ipw_mle$confidence_interval$lower_bound < kim2019_y_true &
+  ipw_mle$confidence_interval$upper_bound > kim2019_y_true,
+  c(TRUE, TRUE)
+)
+
+
+### ipw gee ---------------------------------------------------------------------
+
+ipw_gee <- nonprob(
+  selection = ~x1 + x2,
+  target = ~y1 + y2,
+  svydesign = kim2019_sample_prob,
+  data = kim2019_sample_nonprob,
+  method_selection = "logit",
+  control_selection = control_sel(est_method = "gee"))
+
+expect_equal(
+  ipw_gee$confidence_interval$lower_bound < kim2019_y_true &
+  ipw_gee$confidence_interval$upper_bound > kim2019_y_true,
+  c(TRUE, TRUE)
+)
+
+
+# pop level data ----------------------------------------------------------
+
+### ipw mle (is gee) ---------------------------------------------------------------------
+
+ipw_mle <- nonprob(
+  selection = ~x1 + x2,
+  target = ~y1 + y2,
+  pop_total = kim2019_totals,
+  data = kim2019_sample_nonprob,
+  method_selection = "logit")
+
+expect_equal(
+  ipw_mle$confidence_interval$lower_bound < kim2019_y_true &
+  ipw_mle$confidence_interval$upper_bound > kim2019_y_true,
+  c(TRUE, TRUE)
+)
+
+
diff --git a/inst/tinytest/test_ipw.R b/inst/tinytest/test_ipw.R
@@ -1,23 +1,4 @@
-# Load test data
-data(jvs)
-data(admin)
-
-
-# create objects ----------------------------------------------------------
-
-# Setup survey design
-expect_silent(
-  jvs_svy <- svydesign(
-    ids = ~1,
-    weights = ~weight,
-    strata = ~size + nace + region,
-    data = jvs)
-)
-
-
-N <- sum(weights(jvs_svy))
-pop_totals <- colSums(model.matrix(~region + private + nace + size, jvs)*jvs$weight)
-pop_means <- pop_totals[-1]/N
+source("_code_for_all_.R")
 
 # population data only ----------------------------------------------------------------
 

diff --git a/inst/tinytest/test_ipw_boot.R b/inst/tinytest/test_ipw_boot.R
@@ -1,25 +1,4 @@
-## testing bootstrap
-set.seed(2024)
-
-# Load required data
-data(admin)
-data(jvs)
-
-# Create objects ----------------------------------------------------------
-
-# Create survey design object
-expect_silent(
-  jvs_svy <- svydesign(
-    ids = ~1,
-    weights = ~weight,
-    strata = ~size + nace + region,
-    data = jvs
-  ))
-
-N <- sum(weights(jvs_svy))
-pop_totals <- colSums(model.matrix(~region + private + nace + size, jvs)*jvs$weight)
-pop_means <- pop_totals[-1]/N
-
+source("_code_for_all_.R")
 
 # single-core bootstrap ---------------------------------------------------
 
@@ -35,14 +14,14 @@ expect_silent(nonprob(
   control_inference = control_inf(var_method = "bootstrap", num_boot = 2)
 ))
 
-expect_silent(nonprob(
+expect_silent(suppressWarnings(nonprob(
   selection = ~region + private + nace + size,
   target = ~single_shift,
   pop_totals = pop_totals,
   data = admin,
   method_selection = "logit",
   control_inference = control_inf(var_method = "bootstrap", num_boot = 2)
-))
+)))
 
 ### calibrated IPW estimator --------------------------------------------------
 

diff --git a/inst/tinytest/test_nonprob.R b/inst/tinytest/test_nonprob.R
@@ -1 +1,39 @@
-# testing main function if works as expected
+
+source("_code_for_all_.R")
+
+# check parameters --------------------------------------------------------
+
+expect_error(
+  nonprob(data = admin)
+)
+
+expect_error(
+  nonprob(data = admin,
+          selection = ~ region)
+)
+
+expect_error(
+  nonprob(data = admin,
+          selection = ~ region,
+          target = ~ single_shift)
+)
+
+expect_error(
+  nonprob(data = admin,
+          outcome = single_shift ~ region,
+          target = ~ single_shift)
+)
+
+expect_error(
+  nonprob(data = admin,
+          outcome = single_shift ~ region,
+          target = ~ single_shift,
+          pop_means = pop_means)
+)
+
+expect_error(
+  nonprob(data = admin,
+          outcome = single_shift ~ region,
+          target = ~ single_shift,
+          pop_size = N)
+)
diff --git a/inst/tinytest/test_nonprob_unit_level.R b/inst/tinytest/test_nonprob_unit_level.R
@@ -1,19 +1,6 @@
-# Load test data
-data(jvs)
-data(admin)
+source("_code_for_all_.R")
 
 
-# create objects ----------------------------------------------------------
-
-# Setup survey design
-expect_silent(
-  jvs_svy <- svydesign(
-  ids = ~1,
-  weights = ~weight,
-  strata = ~size + nace + region,
-  data = jvs)
-)
-
 # IPW estimator
 expect_silent(
 ipw_result <- nonprob(